// WP2Gopher v1.0 by Michael Proctor [michael@4d2.org]
// WordPress -> Gopher converter
// Creates a set of text files and gophermap files based on the contents of a
// WordPress database, allowing the blog to be served out via pygopherd or
// bucktooth (untested).
// This tool *will remove all files and directories* from the target directory.
// It will create a series of subdirectories, each containing a gophermap pointing
// to 50 posts, to keep listing sizes reasonable. The intent is for you to
// have a single directory which is created and re-created by this script and
// not manually modified.
// All post files are physically located in a single directory so that the location
// of posts does not change as more posts are added and they shift from page to page.
// *********************
// CONFIGURATION SECTION
// Change the variables below to the appropriate values for your server setup.
// Set this to TRUE to enable console output showing conversion status.
// When FALSE, output will only be written in the case of an error. This is
// ideal if this script is to be run as a cronjob.
$verbose = TRUE;
// These should be the same MySQL credentials used by WordPress
$dbserver = 'localhost';
$dbuser = 'USERNAME';
$dbpass = 'PASSWORD';
$dbname = 'DBNAME';
// Change this if you have changed WordPress's default MySQL table prefix
$dbtableprefix = 'wp_';
$blogname = "My Phantabulous Phlog";
$httpurl = "http://www.myblog.com";
$email = "comments@myblog.com";
// The location where the gopher files should be created
// Don't forget the trailing slash!
$gopherpath = "/var/gopher/phlog/";
// The absolute resource path to the directory for your blog.
// For example, if my root menu is at /var/gopher and I am creating blog files
// in /var/gopher/phlog, I would enter /phlog/ here.
// Don't forget the beginning and trailing slashes!
$virtualgopherpath = "/phlog/";
$maxlinelength = 75;
// This will be displayed at the header of every page. The default incorporates the blog
// name and URL set above, so you can leave it alone if you like.
$headermsg = $blogname . "\nMost recent posts first ~ Also available at " . $httpurl;
// A message to display at the end of each post, such as an invitation to comment. The
// default incorporates the email address and URL set above, so you can leave it alone
// if you like.
// If you are also using WPTelnet, you might want to mention the availability
// of comments via telnet in your footer.
$footermsg = "\n\nTo submit a comment on this post, email " . $email . " or visit us on the web [ " . $httpurl . " ].";
// Number of posts per page.
$postsperpage = 50;
// END CONFIGURATION SECTION
// Change code beyond this point at your own peril
// *************************
function delete_directory($dirname) {
// Delete the contents of a directory recursively
if (is_dir($dirname))
$dir_handle = opendir($dirname);
if (!$dir_handle)
return false;
while($file = readdir($dir_handle)) {
if ($file != "." && $file != "..") {
if (!is_dir($dirname."/".$file)) {
unlink($dirname."/".$file);
} else {
delete_directory($dirname.'/'.$file);
}
}
}
closedir($dir_handle);
rmdir($dirname);
return true;
}
function textize($instring) {
// String replacement rules for post contents, titles, and comments.
// This is not a comprehensive list, but it deals with common formatting
// tags and all of the special (i.e. foreign language) characters in
// my blog's database. If you have a lot of goofy HTML character codes you
// may need to add them here.
$replaces = array(
"
" => "_", "" => "_", "
" => "*",
"" => "*", "
" => "*", "" => "*", "
" => "_", "" => "_",
"
" => "_", "" => "_", "
" => "", "
" => "", "
" => "", "
" => "* ", "" => "\n", "
" => "\n",
"
" => "-=-=-=-=-=-=-=-=-=-=\n",
"
" => "\n-=-=-=-=-=-=-=-=-=-=",
"“" => "\"", "”" => "\"", "" => "\"", "" => "\"",
"‘" => "'", "’" => "'", "" => "'", "" => "'",
"
" => "...", "–" => "-", "" => "-", "—" => "--",
"" => "--", "™" => "(tm)", "" => "(tm)", "©" => "(C)",
"®" => "(R)", "&" => "&", "&" => "&", "â" => "a",
"ă" => "a", "ş" => "s", "…" => "...", "°" => " degrees ",
" " => " ", "ñ" => "ñ"
);
// The following HTML tags will not be stripped if they still appear after
// processing the above string replacements
$specialtags = "
";
// Do string replacements on the text for better plain-text display
foreach($replaces as $chgfrom => $chgto) {
$instring = str_replace($chgfrom, $chgto, $instring);
}
// Strip all HTML tags that still exist and won't be specially dealt with
// from the text.
$instring = strip_tags($instring, $specialtags);
// Deal with tags
while(($sttagoffset = strpos($instring, " // Extract the contents of the href attribute
$hrefoffset = strpos($instring, "href", $sttagoffset);
$sqoffset = strpos($instring, "'", $hrefoffset);
$dqoffset = strpos($instring, "\"", $hrefoffset);
if(($sqoffset<$dqoffset && $sqoffset !== FALSE) || ($dqoffset === FALSE)) {
$termchar = "'";
$stquoteoffset = $sqoffset;
} else {
$termchar = "\"";
$stquoteoffset = $dqoffset;
}
if($hrefoffset !== FALSE) {
$edquoteoffset = strpos($instring, $termchar, $stquoteoffset+1);
$hrefcontents = substr($instring, $stquoteoffset+1, $edquoteoffset-$stquoteoffset-1);
$tagreplace = " [ " . $hrefcontents . " ]";
} else {
$tagreplace = "";
}
// Determine the offset of the end of the tag
$edtagoffset = strpos($instring, ">", $sttagoffset+1);
// Remove the entire opening tag
$instring = substr($instring, 0, $sttagoffset) . substr($instring, $edtagoffset+1);
// Now find the closing tag and replace it with our replacement text
$sttagoffset = strpos($instring, "");
$instring = substr($instring, 0, $sttagoffset) . $tagreplace . substr($instring, $sttagoffset+4);
}
// Deal with
tags
while(($sttagoffset = strpos($instring, "
// Extract the contents of the src attribute
$srcoffset = strpos($instring, "src", $sttagoffset);
$sqoffset = strpos($instring, "'", $srcoffset);
$dqoffset = strpos($instring, "\"", $srcoffset);
if(($sqoffset<$dqoffset && $sqoffset !== FALSE) || ($dqoffset === FALSE)) {
$termchar = "'";
$stquoteoffset = $sqoffset;
} else {
$termchar = "\"";
$stquoteoffset = $dqoffset;
}
$edquoteoffset = strpos($instring, $termchar, $stquoteoffset+1);
$srccontents = substr($instring, $stquoteoffset+1, $edquoteoffset-$stquoteoffset-1);
// Extract the contents of the alt attribute
$altoffset = strpos($instring, "alt", $sttagoffset);
$sqoffset = strpos($instring, "'", $altoffset);
$dqoffset = strpos($instring, "\"", $altoffset);
if(($sqoffset<$dqoffset && $sqoffset !== FALSE) || ($dqoffset === FALSE)) {
$termchar = "'";
$stquoteoffset = $sqoffset;
} else {
$termchar = "\"";
$stquoteoffset = $dqoffset;
}
// Compose the tag replacement string
if($altoffset !== FALSE) {
$edquoteoffset = strpos($instring, $termchar, $stquoteoffset+1);
$altcontents = substr($instring, $stquoteoffset+1, $edquoteoffset-$stquoteoffset-1);
$tagreplace = "Image: " . $altcontents . " [ " . $srccontents . " ]";
} else
$tagreplace = "[ see image at " . $srccontents . " ]";
// Remove the entire tag and replace it with the replacement text
$edtagoffset = strpos($instring, ">", $sttagoffset+1);
$instring = substr($instring, 0, $sttagoffset) . $tagreplace . substr($instring, $edtagoffset+1);
}
return($instring);
}
function start_gophermap($gopherpath, $virtualgopherpath, $headermsg, $currentpage, $totalposts, $postsperdir) {
// Create a new directory, create a new gophermap file, and write header
if(!mkdir($gopherpath . "page" . $currentpage))
die("Could not create directory.");
if(!($gophermap = fopen($gopherpath . "/page" . $currentpage . "/gophermap", "w")))
die("Could not open gophermap file for write access.");
fwrite($gophermap, $headermsg . "\n");
fwrite($gophermap, "Page " . $currentpage . " of " . ceil($totalposts/$postsperdir) . "\n\n");
if($currentpage != 1)
fwrite($gophermap, "1Previous Page\t" . $virtualgopherpath . "page" . ($currentpage - 1) . "\n");
if($currentpage < ceil($totalposts/$postsperdir))
fwrite($gophermap, "1Next Page\t" . $virtualgopherpath . "page" . ($currentpage + 1) . "\n");
fwrite($gophermap, "1View All Posts\t" . $virtualgopherpath . "posts\n");
fwrite($gophermap, "\n");
return($gophermap);
}
// Execution begins here
$stime=microtime(3);
// Establish DB connection
if(!(mysql_connect($dbserver, $dbuser, $dbpass)))
die("Could not connect to DB server.");
if(!(mysql_select_db($dbname)))
die("Could not select WordPress DB.");
// Clear out the directory
if(!delete_directory($gopherpath))
die("Could not empty directory " . $gopherpath);
if(!mkdir($gopherpath))
die("Could not recreate directory " . $gopherpath);
if(!mkdir($gopherpath . "posts"))
die("Could not create directory " . $gopherpath . "posts");
// Start the "all posts" gopher map
if(!($allgophermap = fopen($gopherpath . "/posts/gophermap", "w")))
die("Could not open 'all posts' gophermap file for write access.");
fwrite($allgophermap, $headermsg . "\n");
fwrite($allgophermap, "All Posts\n\n");
fwrite($allgophermap, "1Return to Page 1\t" . $virtualgopherpath . "page1\n\n");
$currentpage = 1;
$currentpost = 1;
// Get posts
$query = "SELECT * FROM " . $dbtableprefix . "posts WHERE post_type = 'post' AND post_status = 'publish' ORDER BY post_date DES
C";
$postsresult = mysql_query($query);
$totalposts = mysql_num_rows($postsresult);
$gophermap = start_gophermap($gopherpath, $virtualgopherpath, $headermsg, $currentpage, $totalposts, $postsperpage);
while($row = mysql_fetch_array($postsresult)) {
$post_author = $row['post_author'];
$post_date = $row['post_date'];
$post_content = $row['post_content'];
$post_title = $row['post_title'];
$comment_count = $row['comment_count'];
$postID = $row['ID'];
if($verbose) echo "Processing post " . $postID . "\n";
// Get the author's display name
$query = "SELECT display_name FROM " . $dbtableprefix . "users WHERE ID = " . $post_author;
$authorresult = mysql_query($query);
$data = mysql_fetch_array($authorresult);
$display_name = $data['display_name'];
$post_content = textize($post_content);
$post_title = textize($post_title);
// Create header lines
$postheader = strtoupper($post_title) . "\n(Posted " . $post_date . " by " . $display_name . ")\n\n";
if($comment_count > 1)
$commentblockheader = "\n\n--------\n\nThere are " . $comment_count . " comments on this post:\n";
if($comment_count == 1)
$commentblockheader = "\n\n--------\n\nThere is 1 comment on this post:\n";
if($comment_count == 0)
$commentblockheader = "\n\n--------\n\nThere are no comments on this post.\n";
// Write post data out to a file
$postfilename = $gopherpath . "/posts/post" . $postID . ".txt";
$postfile = fopen($postfilename, "w");
if(!$postfile)
die("Could not open file " . $postfilename . " for write access.");
fwrite($postfile, $postheader);
fwrite($postfile, wordwrap($post_content, $maxlinelength));
fwrite($postfile, $commentblockheader);
// Get comments for this post
// The goofy OR in this query is because Akismet changes the type of the comment_approved field when installed,
// I believe.
$query = "SELECT * FROM " . $dbtableprefix . "comments WHERE comment_post_ID = " . $postID . " AND (comment_approved = '1' OR
comment_approved = 1) ORDER BY comment_date ASC";
$commentsresult = mysql_query($query);
$commentcounter = 1;
while($commentsdata = mysql_fetch_array($commentsresult)) {
$commentheader = "\nComment #" . $commentcounter . " by " . $commentsdata['comment_author'] . " ( " .
$commentsdata['comment_author_email'] . " ) on " . $commentsdata['comment_date'] . "\n";
fwrite($postfile, wordwrap($commentheader, $maxlinelength));
fwrite($postfile, wordwrap(textize($commentsdata['comment_content']), $maxlinelength));
fwrite($postfile, "\n");
$commentcounter++;
}
// Done writing the file for this post; close file and add a link to the gophermap
fwrite($postfile, wordwrap($footermsg, $maxlinelength));
fclose($postfile);
$gophermapline = "0" . $post_title . "\t" . $virtualgopherpath . "posts/post" . $postID . ".txt\n";
fwrite($gophermap, $gophermapline);
fwrite($allgophermap, $gophermapline);
if($currentpost % 50 == 0 && $currentpost < $totalposts) {
fclose($gophermap);
$currentpage++;
$gophermap = start_gophermap($gopherpath, $virtualgopherpath, $headermsg, $currentpage, $totalposts, $postsperpage);
}
$currentpost++;
}
// Done processing posts; close the gophermap file
fclose($gophermap);
fclose($allgophermap);
if ($verbose) echo "Successfully processed all posts!\n";
$etime=microtime(3);
if ($verbose) echo "Finished processing in " . ($etime-$stime) . " seconds.\n";
?>