''); $currentPost = null; $currentText = ''; function parseDateISO8601($input) { // returns the date in SQL (MySQL, at least)-compatible text format return substr($input, 0, 10) . ' ' . substr($input, 11, 8); } function parseDateRFC822($input) { // returns the date in SQL (MySQL, at least)-compatible text format return strftime('%Y-%m-%d %H:%I:%S', strtotime($input)); } function startElement($parser, $name, $attrs) { global $currentPost, $currentText, $currentGuidAttrs; if ($name == 'item') { $currentPost = new post(); $currentPost->categories = array(); } elseif ($name == 'guid') { $currentGuidAttrs = $attrs; } $currentText = ''; } function endElement($parser, $name) { global $currentPost, $currentText; switch ($name) { case 'title': case 'http://www.w3.org/1999/02/22-rdf-syntax-ns# title': $currentPost->title = $currentText; break; case 'content:encoded': case 'http://purl.org/rss/1.0/modules/content/ encoded': $currentPost->content = $currentText; break; case 'description': case 'http://www.w3.org/1999/02/22-rdf-syntax-ns# description': // content:encoded trumps description, so only save the description // if there's no content already if (!isset($currentPost->content) || !strlen($currentPost->content)) { $currentPost->content = $currentText; } break; case 'pubDate': $currentPost->createDate = parseDateRFC822($currentText); break; case 'dc:date': case 'http://purl.org/dc/elements/1.1/ date': $currentPost->createDate = parseDateISO8601($currentText); break; case 'dcterms:modified': case 'http://purl.org/dc/terms/ modified': $currentPost->modDate = parseDateISO8601($currentText); break; case 'category': case 'dc:subject': case 'http://purl.org/dc/elements/1.1/ subject': $currentPost->categories[] = $currentText; break; case 'guid': if (isset($currentGuidAttrs['isPermaLink']) && $currentGuidAttrs['isPermaLink'] == 'true') { $currentPost->permalink = $currentText; } $currentPost->guid = $currentText; break; case 'item': case 'http://www.w3.org/1999/02/22-rdf-syntax-ns# item': processPost($currentPost); $currentPost = null; break; } $currentText = ''; } function characterData($parser, $data) { global $currentText; $currentText .= $data; } // WordPress-specific code $post_author = 'admin'; require_once('../wp-config.php'); require_once(ABSPATH.WPINC.'/template-functions.php'); require_once(ABSPATH.WPINC.'/functions.php'); require_once(ABSPATH.WPINC.'/vars.php'); if ($kCreateModDateField && !$kTakeNoAction) { require_once(ABSPATH.'/wp-admin/install-helper.php'); $res = ''; $tablename = $tableposts; $ddl = "ALTER TABLE $tableposts ADD COLUMN post_modified datetime"; maybe_add_column($tablename, 'post_modified', $ddl); if (check_column($tablename, 'post_modified', 'datetime')) { $res .= $tablename . ' - ok
'."\n"; } else { $res .= 'There was a problem with ' . $tablename . '
'."\n"; //++$error_count; } echo $res; } function processPost(&$post) { global $kSetModDateField, $kUpdatePostsAlways, $kUpdatePostsIfNewer, $kTakeNoAction; //print_r($post); // Filter out (ignore) posts having categories that are all listed as "excluded" // If a post has no categories, or at least one non-excluded category, it is still // included. if (sizeof($post->categories)) { $gotIncludedCategory = false; foreach ($post->categories as $categoryName) { if (!isset($kExcludedCategories[$categoryName])) { $gotIncludedCategory = true; break; } } if (!$gotIncludedCategory) { return; } } global $post_author, $kExcludeCategories; global $wpdb; global $tableusers, $tableposts, $tablepost2cat, $tablecategories; $post_author_ID = $wpdb->get_var("SELECT ID FROM $tableusers WHERE user_login = '$post_author'"); $post_content = $post->content; $post_content = str_replace('
', '
', $post_content); // XHTMLify
tags /* Un-word-wrap the content, because
tags will be added at display time for line breaks, and RSS feeds are often already soft-wrapped. Replace \n and \r with spaces. However, we don't want to remove word wrapping inside
 tags. Stopping short
    of a full HTML parser, we only un-wrap those sections not inside 
 tag pairs.
    (This code could be misled by things that look like 
 tags wrapped in HTML comments,
    but oh well.)
    /*$pos = $lastpos = 0;
    while ($lastpos !== false && ($pos = strpos($post_content, '
', $lastpos)) !== false) {
        $post_content = substr($post_content, 0, $lastpos)
            . str_replace("\n", ' ', str_replace("\r", ' ', substr($post_content, $lastpos, $pos - $lastpos)))
            . substr($post_content, $pos);
        $lastpos = strpos($post_content, '
', $pos); } if ($lastpos !== false) { $post_content = substr($post_content, 0, $lastpos) . str_replace("\n", ' ', str_replace("\r", ' ', substr($post_content, $lastpos))); } */ $post_content = addslashes($post_content); #$post_content = str_replace("\r", ' ', $post_content); #$post_content = str_replace("\n", ' ', $post_content); $post_date = addslashes($post->createDate); $post_title = addslashes($post->title); $post_modified = $kSetModDateField ? addslashes($post->modDate) : ''; $post_name = ''; if (isset($post->permalink) && strlen($post->permalink)) { $matches = array(); if (preg_match('|/[0-9]{4}/[0-9]{2}/[0-9]{2}/([A-Za-z0-9_-]*)/?|', $post->permalink, $matches)) { $post_name = $matches[1]; $post_name = mysql_escape_string($post_name); } } $categoryIDList = array(); foreach ($post->categories as $categoryName) { if (isset($kExcludedCategories[$categoryName])) { continue; } $categoryID = $wpdb->get_var("SELECT cat_ID FROM $tablecategories WHERE cat_name = '".mysql_escape_string($categoryName)."'"); if (!$categoryID) { if ($kTakeNoAction) { echo "Would have inserted new category '$categoryName'."; $categoryID = 0; } else { $categoryNiceName = sanitize_title($categoryName); $wpdb->query("INSERT INTO $tablecategories (cat_name, category_nicename) VALUES ('".mysql_escape_string($categoryName)."','".mysql_escape_string($categoryNiceName)."')"); $categoryID = $wpdb->get_var("SELECT LAST_INSERT_ID()"); } } else { // category already exists; could update its nicename here if it tended not to be correct already. //$wpdb->query("UPDATE $tablecategories SET category_nicename = '".mysql_escape_string(sanitize_title($categoryName))."' WHERE cat_ID = ".intval($categoryID)); } $categoryIDList[] = $categoryID; } // Quick-n-dirty check for dups: if ($kUpdatePostsIfNewer) { $dupcheck = $wpdb->get_results("SELECT ID,post_date,post_title,post_modified FROM $tableposts WHERE post_date='$post_date' AND post_title='$post_title' LIMIT 1",ARRAY_A); } else { $dupcheck = $wpdb->get_results("SELECT ID,post_date,post_title FROM $tableposts WHERE post_date='$post_date' AND post_title='$post_title' LIMIT 1",ARRAY_A); } if ($dupcheck[0]['ID']) { // post already exists if ($kUpdatePostsAlways || ($kUpdatePostsIfNewer && $kSetModDateField && $dupcheck[0]['post_modified'] < $post_modified)) { print "
\n\nUpdating post, ID = '" . $dupcheck[0]['ID'] . "'
\n"; print "Timestamp: " . $post_date . "
\n"; print "Post Title: '" . stripslashes($post_title) . "'
\n"; if (!$kTakeNoAction) { $postID = $dupcheck[0]['ID']; $result = $wpdb->query(" UPDATE $tableposts SET post_author = '$post_author_ID', post_date = '$post_date', ".($kSetModDateField ? "post_modified = '$post_modified', " : "")." post_content='$post_content', post_title = '$post_title', post_name = '$post_name' WHERE ID = ".intval($postID)); //echo "DELETE FROM $tablepost2cat WHERE post_id = ".intval($postID); $result = $wpdb->query("DELETE FROM $tablepost2cat WHERE post_id = ".intval($postID)); foreach ($categoryIDList as $categoryID) { $result = $wpdb->query(" INSERT INTO $tablepost2cat (post_id, category_id) VALUES (".intval($postID).",".intval($categoryID).") "); } } } else { print "
\n\nSkipping duplicate post, ID = '" . $dupcheck[0]['ID'] . "'
\n"; print "Timestamp: " . $post_date . "
\n"; print "Post Title: '" . stripslashes($post_title) . "'
\n"; } } else { print "
\nInserting new post.
\n"; print "Timestamp: " . $post_date . "
\n"; print "Post Title: '" . stripslashes($post_title) . "'
\n"; if (!$kTakeNoAction) { $result = $wpdb->query(" INSERT INTO $tableposts (post_author,post_date,post_content,post_title,post_name,post_category".($post_modified ? ",post_modified" : "").") VALUES ('$post_author_ID','$post_date','$post_content','$post_title','$post_name','1'".($post_modified ? ",'$post_modified'" : "").") "); $postID = $wpdb->get_var("SELECT LAST_INSERT_ID();"); if ($postID) { foreach ($categoryIDList as $categoryID) { $result = $wpdb->query(" INSERT INTO $tablepost2cat (post_id, category_id) VALUES (".intval($postID).",".intval($categoryID).") "); } } } } } // XML parsing code function importRSSFile($filePath) { if (function_exists('xml_parser_create_ns')) { $xml_parser = xml_parser_create_ns('iso-8859-1',' '); // space sep for namespace URI } else { $xml_parser = xml_parser_create(); } // make sure to turn off case-folding; XML 1.0 is case-sensitive xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, false); xml_set_element_handler($xml_parser, "startElement", "endElement"); xml_set_character_data_handler($xml_parser, "characterData"); if (!($fp = fopen($filePath, "r"))) { die("could not open XML input"); } while ($data = fread($fp, 4096)) { if (!xml_parse($xml_parser, $data, feof($fp))) { die(sprintf("XML error: %s at line %d", xml_error_string(xml_get_error_code($xml_parser)), xml_get_current_line_number($xml_parser))); } } xml_parser_free($xml_parser); fclose($fp); } function importBlogArchive($dirPath) { $startYear = 1980; $endYear = intval(strftime('%Y')); for ($testYear = $startYear; $testYear <= $endYear; $testYear++) { for ($testMonth = 1; $testMonth <= 12; $testMonth++) { $rssFilePath = $dirPath.'/'.$testYear.'/'.($testMonth < 10 ? '0' : '').$testMonth.'.xml'; if (is_file($rssFilePath)) { importRSSFile($rssFilePath); } } } } if (is_dir($path)) { importBlogArchive($path); } else { importRSSFile($path); } /*echo '
echo '
'; */ ?>