'');
$currentPost = null;
$currentText = '';
function parseDateISO8601($input) {
// returns the date in SQL (MySQL, at least)-compatible text format
return substr($input, 0, 10) . ' ' . substr($input, 11, 8);
}
function parseDateRFC822($input) {
// returns the date in SQL (MySQL, at least)-compatible text format
return strftime('%Y-%m-%d %H:%I:%S', strtotime($input));
}
function startElement($parser, $name, $attrs) {
global $currentPost, $currentText, $currentGuidAttrs;
if ($name == 'item') {
$currentPost = new post();
$currentPost->categories = array();
}
elseif ($name == 'guid') {
$currentGuidAttrs = $attrs;
}
$currentText = '';
}
function endElement($parser, $name) {
global $currentPost, $currentText;
switch ($name) {
case 'title': case 'http://www.w3.org/1999/02/22-rdf-syntax-ns# title':
$currentPost->title = $currentText;
break;
case 'content:encoded': case 'http://purl.org/rss/1.0/modules/content/ encoded':
$currentPost->content = $currentText;
break;
case 'description': case 'http://www.w3.org/1999/02/22-rdf-syntax-ns# description':
// content:encoded trumps description, so only save the description
// if there's no content already
if (!isset($currentPost->content) || !strlen($currentPost->content)) {
$currentPost->content = $currentText;
}
break;
case 'pubDate':
$currentPost->createDate = parseDateRFC822($currentText);
break;
case 'dc:date': case 'http://purl.org/dc/elements/1.1/ date':
$currentPost->createDate = parseDateISO8601($currentText);
break;
case 'dcterms:modified': case 'http://purl.org/dc/terms/ modified':
$currentPost->modDate = parseDateISO8601($currentText);
break;
case 'category': case 'dc:subject': case 'http://purl.org/dc/elements/1.1/ subject':
$currentPost->categories[] = $currentText;
break;
case 'guid':
if (isset($currentGuidAttrs['isPermaLink']) && $currentGuidAttrs['isPermaLink'] == 'true') {
$currentPost->permalink = $currentText;
}
$currentPost->guid = $currentText;
break;
case 'item': case 'http://www.w3.org/1999/02/22-rdf-syntax-ns# item':
processPost($currentPost);
$currentPost = null;
break;
}
$currentText = '';
}
function characterData($parser, $data) {
global $currentText;
$currentText .= $data;
}
// WordPress-specific code
$post_author = 'admin';
require_once('../wp-config.php');
require_once(ABSPATH.WPINC.'/template-functions.php');
require_once(ABSPATH.WPINC.'/functions.php');
require_once(ABSPATH.WPINC.'/vars.php');
if ($kCreateModDateField && !$kTakeNoAction) {
require_once(ABSPATH.'/wp-admin/install-helper.php');
$res = '';
$tablename = $tableposts;
$ddl = "ALTER TABLE $tableposts ADD COLUMN post_modified datetime";
maybe_add_column($tablename, 'post_modified', $ddl);
if (check_column($tablename, 'post_modified', 'datetime')) {
$res .= $tablename . ' - ok
'."\n";
} else {
$res .= 'There was a problem with ' . $tablename . '
'."\n";
//++$error_count;
}
echo $res;
}
function processPost(&$post) {
global $kSetModDateField, $kUpdatePostsAlways, $kUpdatePostsIfNewer, $kTakeNoAction;
//print_r($post);
// Filter out (ignore) posts having categories that are all listed as "excluded"
// If a post has no categories, or at least one non-excluded category, it is still
// included.
if (sizeof($post->categories)) {
$gotIncludedCategory = false;
foreach ($post->categories as $categoryName) {
if (!isset($kExcludedCategories[$categoryName])) {
$gotIncludedCategory = true;
break;
}
}
if (!$gotIncludedCategory) {
return;
}
}
global $post_author, $kExcludeCategories;
global $wpdb;
global $tableusers, $tableposts, $tablepost2cat, $tablecategories;
$post_author_ID = $wpdb->get_var("SELECT ID FROM $tableusers WHERE user_login = '$post_author'");
$post_content = $post->content;
$post_content = str_replace('
', '
', $post_content); // XHTMLify
tags
/* Un-word-wrap the content, because
tags will be added at display time
for line breaks, and RSS feeds are often already soft-wrapped. Replace \n and \r
with spaces.
However, we don't want to remove word wrapping inside
tags. Stopping short of a full HTML parser, we only un-wrap those sections not insidetag pairs. (This code could be misled by things that look liketags wrapped in HTML comments, but oh well.) */ /*$pos = $lastpos = 0; while ($lastpos !== false && ($pos = strpos($post_content, '', $lastpos)) !== false) { $post_content = substr($post_content, 0, $lastpos) . str_replace("\n", ' ', str_replace("\r", ' ', substr($post_content, $lastpos, $pos - $lastpos))) . substr($post_content, $pos); $lastpos = strpos($post_content, '', $pos); } if ($lastpos !== false) { $post_content = substr($post_content, 0, $lastpos) . str_replace("\n", ' ', str_replace("\r", ' ', substr($post_content, $lastpos))); } */ $post_content = addslashes($post_content); #$post_content = str_replace("\r", ' ', $post_content); #$post_content = str_replace("\n", ' ', $post_content); $post_date = addslashes($post->createDate); $post_title = addslashes($post->title); $post_modified = $kSetModDateField ? addslashes($post->modDate) : ''; $post_name = ''; if (isset($post->permalink) && strlen($post->permalink)) { $matches = array(); if (preg_match('|/[0-9]{4}/[0-9]{2}/[0-9]{2}/([A-Za-z0-9_-]*)/?|', $post->permalink, $matches)) { $post_name = $matches[1]; $post_name = mysql_escape_string($post_name); } } $categoryIDList = array(); foreach ($post->categories as $categoryName) { if (isset($kExcludedCategories[$categoryName])) { continue; } $categoryID = $wpdb->get_var("SELECT cat_ID FROM $tablecategories WHERE cat_name = '".mysql_escape_string($categoryName)."'"); if (!$categoryID) { if ($kTakeNoAction) { echo "Would have inserted new category '$categoryName'."; $categoryID = 0; } else { $categoryNiceName = sanitize_title($categoryName); $wpdb->query("INSERT INTO $tablecategories (cat_name, category_nicename) VALUES ('".mysql_escape_string($categoryName)."','".mysql_escape_string($categoryNiceName)."')"); $categoryID = $wpdb->get_var("SELECT LAST_INSERT_ID()"); } } else { // category already exists; could update its nicename here if it tended not to be correct already. //$wpdb->query("UPDATE $tablecategories SET category_nicename = '".mysql_escape_string(sanitize_title($categoryName))."' WHERE cat_ID = ".intval($categoryID)); } $categoryIDList[] = $categoryID; } // Quick-n-dirty check for dups: if ($kUpdatePostsIfNewer) { $dupcheck = $wpdb->get_results("SELECT ID,post_date,post_title,post_modified FROM $tableposts WHERE post_date='$post_date' AND post_title='$post_title' LIMIT 1",ARRAY_A); } else { $dupcheck = $wpdb->get_results("SELECT ID,post_date,post_title FROM $tableposts WHERE post_date='$post_date' AND post_title='$post_title' LIMIT 1",ARRAY_A); } if ($dupcheck[0]['ID']) { // post already exists if ($kUpdatePostsAlways || ($kUpdatePostsIfNewer && $kSetModDateField && $dupcheck[0]['post_modified'] < $post_modified)) { print "
\n\nUpdating post, ID = '" . $dupcheck[0]['ID'] . "'
\n"; print "Timestamp: " . $post_date . "
\n"; print "Post Title: '" . stripslashes($post_title) . "'
\n"; if (!$kTakeNoAction) { $postID = $dupcheck[0]['ID']; $result = $wpdb->query(" UPDATE $tableposts SET post_author = '$post_author_ID', post_date = '$post_date', ".($kSetModDateField ? "post_modified = '$post_modified', " : "")." post_content='$post_content', post_title = '$post_title', post_name = '$post_name' WHERE ID = ".intval($postID)); //echo "DELETE FROM $tablepost2cat WHERE post_id = ".intval($postID); $result = $wpdb->query("DELETE FROM $tablepost2cat WHERE post_id = ".intval($postID)); foreach ($categoryIDList as $categoryID) { $result = $wpdb->query(" INSERT INTO $tablepost2cat (post_id, category_id) VALUES (".intval($postID).",".intval($categoryID).") "); } } } else { print "
\n\nSkipping duplicate post, ID = '" . $dupcheck[0]['ID'] . "'
\n"; print "Timestamp: " . $post_date . "
\n"; print "Post Title: '" . stripslashes($post_title) . "'
\n"; } } else { print "
\nInserting new post.
\n"; print "Timestamp: " . $post_date . "
\n"; print "Post Title: '" . stripslashes($post_title) . "'
\n"; if (!$kTakeNoAction) { $result = $wpdb->query(" INSERT INTO $tableposts (post_author,post_date,post_content,post_title,post_name,post_category".($post_modified ? ",post_modified" : "").") VALUES ('$post_author_ID','$post_date','$post_content','$post_title','$post_name','1'".($post_modified ? ",'$post_modified'" : "").") "); $postID = $wpdb->get_var("SELECT LAST_INSERT_ID();"); if ($postID) { foreach ($categoryIDList as $categoryID) { $result = $wpdb->query(" INSERT INTO $tablepost2cat (post_id, category_id) VALUES (".intval($postID).",".intval($categoryID).") "); } } } } } // XML parsing code function importRSSFile($filePath) { if (function_exists('xml_parser_create_ns')) { $xml_parser = xml_parser_create_ns('iso-8859-1',' '); // space sep for namespace URI } else { $xml_parser = xml_parser_create(); } // make sure to turn off case-folding; XML 1.0 is case-sensitive xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, false); xml_set_element_handler($xml_parser, "startElement", "endElement"); xml_set_character_data_handler($xml_parser, "characterData"); if (!($fp = fopen($filePath, "r"))) { die("could not open XML input"); } while ($data = fread($fp, 4096)) { if (!xml_parse($xml_parser, $data, feof($fp))) { die(sprintf("XML error: %s at line %d", xml_error_string(xml_get_error_code($xml_parser)), xml_get_current_line_number($xml_parser))); } } xml_parser_free($xml_parser); fclose($fp); } function importBlogArchive($dirPath) { $startYear = 1980; $endYear = intval(strftime('%Y')); for ($testYear = $startYear; $testYear <= $endYear; $testYear++) { for ($testMonth = 1; $testMonth <= 12; $testMonth++) { $rssFilePath = $dirPath.'/'.$testYear.'/'.($testMonth < 10 ? '0' : '').$testMonth.'.xml'; if (is_file($rssFilePath)) { importRSSFile($rssFilePath); } } } } if (is_dir($path)) { importBlogArchive($path); } else { importRSSFile($path); } /*echo ''; print_r($EZSQL_ERROR); echo ''; */ ?>