User:Pearle/pearle.pl

Please copy the original wikitext, not the viewable text, when downloading. Also be sure to remove the "source" tags at the top and bottom and everything outside of them. Thanks.
### IMPORTANT ###

# This code is released into the public domain.  CONTRIBUTIONS are
# welcome, but will also hereby be RELEASED TO THE PUBLIC DOMAIN.

# See the documentation distributed with this code for important
# warnings and caveats.

# Publication date: 12 Nov 2005 (UTC)

### CLONING NOTES ###


# Clone operators: You may wish to undo certain items marked "TEMPORARY".
#  -- Beland 21 Aug 2005

# Clone operators: You will need to change $historyFile at the top of 
# opentaskUpdate().  You may also wish to chage $target there.
# -- Beland 10 Sep 2005

### RECENT CHANGES ###

# Fixes made before the first publication:
# -  Now retains sort keys
# -  Now properly retains sort keys
# -  Support for (hopefully all) non-ASCII characters in titles
# -  Category moves are now done in one edit, not two
# -  Slow down if Wikipedia is slow
# -  Automatically retry if HTTP 500 or 503 (but wait 1, 10, or 60
#    minutes first)
# -  Follow popular conventions for category/interwiki block style
# -  Automatically TRANSFER_TEXT_ACTUALLY before doing a category move
#    and flag for manual review if needed.
# -  Don't wholesale delete interwiki links
# -  Don't add the category manually after doing a null edit

# 30 Apr 2005: Publish initial code
# 15 May 2005: Add HTTP error 502 handling.
# 22 May 2005: Add {{msg:foo}} -> {{foo}} conversion.
# 10 Aug 2005: Fix bug surrounding category moves that require null edits
# 18 Aug 2005: Add CLEANUP_DATE capabilities
# *** 21 Aug 2005: Publish update ***
# 22 Aug 2005: Canonicalize dk to da
# 22 Aug 2005: Carry "wpStarttime", which prevents problems when
#              editing undeleted articles.
# 22 Aug 2005: Null-edit fallback for CLEANUP_DATE
# 23 Aug 2005: CLEANUP_DATE enhancements for weird cases
# 25 Aug 2005: Fix some regexps with \Q and \E 
# 04 Sep 2005: Add logic to handle {{cfm}}
# 04 Sep 2005: Mark changeCategory() edits as minor, by request
# 04 Sep 2005: Fix editing bug in transferText()
# 10 Sep 2005: Add OPENTASK_UPDATE functionality
# *** 10 Sep 2005: Publish update ***
# 12 Sep 2005: add getCategoryImages() and add it to depopulateCat()
# 14 Sep 2005: urlEncode() improvements
# 17 Sep 2005: Add "cleanup" to OPENTASK_UPDATE
# 18 Sep 2005: Add "authority" feature to DEPOPULATE_CAT
# 18 Sep 2005: Prevent infinite loop in interpretCommands()
# 19 Sep 2005: moveCategoryContents() always retains sortkeys; remove
#              extraneous arguments.
# 19 Sep 2005: Preserve whitespace in sortkeys.
# 12 Oct 2005: Print a helpful report from OPENTASK_UPDATE 
# 17 Oct 2005: Fix history-losing bug for OPENTASK_UPDATE
# 22 Oct 2005: Increase OPENTASK_UPDATE character limit to 130
# 27 Oct 2005: Update editbox scrape regexp
# 27 Oct 2005: Add ability to get more than 200 articles from a category
# 27 Oct 2005: Add 3 more major categories to OPENTASK_UPDATE
# 28 Oct 2005: Allow 3 cleanup month categories to be featured at once 
#              in OPENTASK_UPDATE
# 29 Oct 2005: Add "Category:Wikipedia articles needing priority
#              cleanup" to CLEANUP in OPENTASK_UPDATE
# 04 Nov 2005: Add "rough mode" to make batching of null edits less
#              painful
# *** 12 Nov 2005: Publish update ***

#################

use strict;
use Time::HiRes;

# The following may be helpful in debugging character encoding
# problems.

use utf8;
#use encoding 'utf8';

# Initialization
use LWP::UserAgent;
use HTTP::Cookies;
use HTTP::Request::Common qw(POST);
use HTML::Entities;
print "\n";

# LWP:UserAgent is a library which allows us to create a "user agent"
# object that handles the low-level details of making HTTP requests.

$::ua = LWP::UserAgent->new(timeout => 300);
$::ua->agent("Pearle Wisebot/0.1");
$::ua->cookie_jar(HTTP::Cookies->new(file => "/home/beland/wikipedia/pearle-wisebot/cookies.pearle.txt",
				     autosave => 1));
$::ua->cookie_jar->load();

# Hot pipes
$| = 1;


# ---
# test();
#sub test
#{
#    my ($target, $text, $editTime, $startTime, $token);
#
#    $target = "Wikipedia:Sandbox";
#    ($text, $editTime, $startTime, $token) = getPage($target);
#    print $text;
#    $text .= "\Eat my electrons! -- Pearle\n";
#    print "---\n";
#    postPage ($target, $editTime, $startTime, $token, $text, "Test 008"); 
#    die ("Test complete.");
#}
# ---


interpretCommand(@ARGV);

sub interpretCommand
{

    my ($command, @arguments, $i, $line, $argument, @newArguments,
	$from, $to, $page, $pageCopy);

    ($command, @arguments) = @_;

    $command =~ s/\*\s*//;

    myLog(`date`);
    myLog ($command.": ".join(" ", @arguments)."\n");
    print `date`;
    print $command.": ".join(" ", @arguments)."\n";

    if ($command eq "POST_STDIN")
    {
	if ($arguments[2] ne "")
	{
	    myLog ("Too many arguments to POST_STDIN.\n");
	    die ("Too many arguments to POST_STDIN.\n");
	}
	postSTDIN($arguments[0],$arguments[1]);
    }
    elsif ($command eq "POST_STDIN_NULLOK")
    {
	if ($arguments[2] ne "")
	{
	    myLog ("Too many arguments to POST_STDIN.\n");
	    die ("Too many arguments to POST_STDIN.\n");
	}
	$::nullOK = "yes";
	postSTDIN($arguments[0],$arguments[1]);
	$::nullOK = "no";
    }
    elsif ($command eq "MOVE_CONTENTS")
    {
	if ($arguments[2] ne "")
	{
	    if (($arguments[3] eq "")
		and ($arguments[1] eq "->"))
	    {
		moveCategoryContents($arguments[0],$arguments[2]);
		return();
	    }
	    else
	    {
		myLog ("Too many arguments to MOVE_CONTENTS.\n");
		die ("Too many arguments to MOVE_CONTENTS.\n");
	    }
	}
	moveCategoryContents($arguments[0],$arguments[1],"no");
    }
    elsif ($command eq "MOVE_CONTENTS_INCL_CATS")
    {
	if ($arguments[2] ne "")
	{
	    if (($arguments[3] eq "")
		and ($arguments[1] eq "->"))
	    {
		moveCategoryContents($arguments[0],$arguments[2],"yes");
		return();
	    }
	    else
	    {
		myLog ("Too many arguments to MOVE_CONTENTS_INCL_CATS.\n");
		die ("Too many arguments to MOVE_CONTENTS_INCL_CATS.\n");
	    }
	}
	moveCategoryContents($arguments[0],$arguments[1],"yes");
    }
    elsif ($command eq "REMOVE_X_FROM_CAT")
    {
	if ($arguments[2] ne "")
	{
	    myLog ("Too many arguments to REMOVE_X_FROM_CAT.\n");
	    die ("Too many arguments to REMOVE_X_FROM_CAT.\n");
	}
	removeXFromCat($arguments[0],$arguments[1],"");
    }
    elsif ($command eq "DEPOPULATE_CAT")
    {
	if ($arguments[1] eq "per")
	{
	    if ($arguments[3] ne "")
	    {
		myLog ("Too many arguments to DEPOPULATE_CAT.\n");
		die ("Too many arguments to DEPOPULATE_CAT.\n");
	    }
	    depopulateCat($arguments[0], $arguments[2]);
	}
	elsif ($arguments[1] ne "")
	{
	    myLog ("Too many arguments to DEPOPULATE_CAT.\n");
	    die ("Too many arguments to DEPOPULATE_CAT.\n");
	}
	depopulateCat($arguments[0]);
    }
    elsif ($command eq "PRINT_WIKITEXT")
    {
	if ($arguments[1] ne "")
	{
	    myLog ("Too many arguments to PRINT_WIKITEXT.\n");
	    die ("Too many arguments to PRINT_WIKITEXT.\n");
	}
	printWikitext($arguments[0]);
    }
    elsif ($command eq "ADD_CFD_TAG")
    {
	if ($arguments[1] ne "")
	{
	    myLog ("Too many arguments to ADD_CFD_TAG.\n");
	    die ("Too many arguments to ADD_CFD_TAG.\n");
	}
	addCFDTag($arguments[0]);
    }
    elsif ($command eq "REMOVE_CFD_TAG")
    {
	if ($arguments[1] ne "")
	{
	    myLog ("Too many arguments to REMOVE_CFD_TAG.\n");
	    die ("Too many arguments to REMOVE_CFD_TAG.\n");
	}
	removeCFDTag($arguments[0]);
    }
    elsif ($command eq "ADD_TO_CAT")
    {
	if ($arguments[2] ne "")
	{
	    myLog ("Too many arguments to ADD_TO_CAT.\n");
	    die ("Too many arguments to ADD_TO_CAT.\n");
	}
	addToCat($arguments[0],$arguments[1],"");
    }
    elsif ($command eq "ADD_TO_CAT_NULL_OK")
    {
	if ($arguments[2] ne "")
	{
	    myLog ("Too many arguments to ADD_TO_CAT_NULL_OK.\n");
	    die ("Too many arguments to ADD_TO_CAT_NULL_OK.\n");
	}
	$::nullOK = "yes";
	addToCat($arguments[0],$arguments[1],"");
	$::nullOK = "no";
    }
    elsif ($command eq "TRANSFER_TEXT")
    {
	if ($arguments[2] ne "")
	{
	    myLog ("Too many arguments to TRANSFER_TEXT.\n");
	    die ("Too many arguments to TRANSFER_TEXT.\n");
	}
	transferText($arguments[0], $arguments[1]);
    }
    # DON'T USE THE BELOW COMMAND; IT'S NOT IMPLEMENTED PROPERLY YET.
#    elsif ($command eq "LIST_TO_CAT_CHECK")
#    {
#	if ($arguments[2] ne "")
#	{
#	    myLog ("Too many arguments to LIST_TO_CAT_CHECK.\n");
#	    die ("Too many arguments to LIST_TO_CAT_CHECK.\n");
#	}
#	listToCat($arguments[0], $arguments[1], "no");
#    }
    elsif ($command eq "CHANGE_CATEGORY")
    {
	if ($arguments[3] ne "")
	{
	    myLog ("Too many arguments to CHANGE_CATEGORY.\n");
	    die ("Too many arguments to CHANGE_CATEGORY.\n");
	}
	changeCategory($arguments[0], $arguments[1], $arguments[2]);
    }
    elsif ($command eq "CLEANUP_DATE")
    {
	if ($arguments[0] ne "")
	{
	    myLog ("Too many arguments to CLEANUP_DATE.\n");
	    die ("Too many arguments to CLEANUP_DATE.\n");
	}
	cleanupDate();
    }
    elsif ($command eq "OPENTASK_UPDATE")
    {
	if ($arguments[0] ne "")
	{
	    myLog ("Too many arguments to OPENTASK_UPDATE.\n");
	    die ("Too many arguments to OPENTASK_UPDATE.\n");
	}
	opentaskUpdate();
    }
    elsif ($command eq "NULL_EDIT")
    {
	if ($arguments[1] ne "")
	{
	    myLog ("Too many arguments to NULL_EDIT.\n");
	    die ("Too many arguments to NULL_EDIT.\n");
	}
	nullEdit($arguments[0]);
    }
    # DON'T USE THE BELOW COMMAND; IT'S NOT IMPLEMENTED PROPERLY YET.
    #elsif ($command eq "ENFORCE_CATEGORY_REDIRECTS_CHECK")
    #{
    #	enforceCategoryRedirects("no");
    #}

    # This command is for remedial cleanup only.
    #elsif ($command eq "INTERWIKI_LOOP")
    #{
    #	interwikiLoop();
    #}

    elsif ($command eq "ENFORCE_CATEGORY_INTERWIKI")
    {
	if ($arguments[1] ne "")
	{
	    myLog ("Too many arguments to ENFORCE_CATEGORY_INTERWIKI.\n");
	    die ("Too many arguments to ENFORCE_CATEGORY_INTERWIKI.\n");
	}
	enforceCategoryInterwiki($arguments[0]);
    }

## Broken due to recent changes on WP:CFD    
#    elsif ($command eq "ENFORCE_CFD")
#    {
#	enforceCFD();
#    }
    elsif ($command eq "STOP")
    {
	myLog ("Stopped.");
	die ("Stopped.");
    }
    elsif (($command eq "READ_COMMANDS")
	   or ($command eq ""))
    {
	while (<STDIN>)
	{
	    $line = $_;

	    if ($line =~ m/READ_COMMANDS/)
	    {
		myLog ("interpretCommands(): Infinite loop!");
		die ("interpretCommands(): Infinite loop!");
	    }

	    if ($line =~ m/^\s*$/)
	    {
		next;
	    }
	    
	    $line =~ s/\s+$//s;
	    $line =~ s/\*\s*//;

	    if ($line =~ m/\[\[:?(.*?)\]\] -> \[\[:?(.*?)\]\]/)
	    {
		
		$line =~ s/\[\[:?(.*?)\]\] -> \[\[:?(.*?)\]\]//;
		$from = $1;
		$to = $2;
		$line =~ s/\s*$//;
		$from =~ s/ /_/g;
		$to =~ s/ /_/g;
		
		interpretCommand($line, $from, $to);
	    }
	    else
	    {
		while ($line =~ m/\[\[:?(.*?)\]\]/)
		{
		    $line =~ m/\[\[:?(.*?)\]\]/;
		    $page = $1;
		    $pageCopy = $page;
		    $page =~ s/ /_/g;
		    $line =~ s/\[\[:?\Q$pageCopy\E\]\]/$page/;
		    
		    if ($i++ > 100)
		    {
			die ("Possible infinite loop in interpretCommands() #2");
		    }
		}
		interpretCommand(split (" ", $line));
	    }

#	    unless (($line =~ m/TRANSFER_TEXT_CHECK/) or
# 	            ($line =~ m/ENFORCE_CATEGORY_INTERWIKI/))
	    unless ($line =~ m/TRANSFER_TEXT_CHECK/)		
	    {
		limit();
	    }
	}
	myLog ("Execution complete.\n");
	print ("Execution complete.\n");
    }
    else
    {
	myLog ("Unrecognized command '".$command."': ".join(" ", @arguments)."\n");
	die ("Unrecognized command '".$command."': ".join(" ", @arguments));
    }
}

sub limit
{
    my ($i);
    
    # Rate-limiting to avoid hosing the wiki server
    # Min 30 sec unmarked
    # Min 10 sec marked
    # May be raised by retry() if load is heavy

    ### ATTENTION ###
    # Increasing the speed of the bot to faster than 1 edit every 10
    # seconds violates English Wikipedia rules as of April, 2005, and
    # will cause your bot to be banned.  So don't change $normalDelay
    # unless you know what you are doing.  Other sites may have
    # similar policies, and you are advised to check before using your
    # bot at the default speed.
    #################

    if ($::speedLimit < 10)
    {
	$::speedLimit = 10;
    }
    $i = $::speedLimit;
    while ($i >= 0)
    {
	sleep (1);
	print STDERR "Sleeping $i seconds...\r";
	$i--;
    }
    print STDERR "                                   \r";
}

# perl pearle.pl POST_STDIN User:Pearle/categories-alpha "Update from 13 Oct 2004 database dump"
sub postSTDIN
{
    my ($text, $articleName, $comment, $editTime, $startTime, $junk, $token);

    $articleName = $_[0];
    $comment = $_[1];

    while (<STDIN>)
    {
	$text .= $_;
    }

    if ($text =~ m/^\s*$/)
    {
	myLog ("postSTDIN(): Null input.\n");
	die ("postSTDIN(): Null input.\n");
    }
    
    $::nullOK = "yes";
    ($junk, $editTime, $startTime, $token) = getPage($articleName);
    $::nullOK = "no";

    if ($comment eq "")
    {
	$comment = "Automated post";
    }
    postPage ($articleName, $editTime, $startTime, $token, $text, $comment);
}


# perl pearle.pl ADD_TO_CAT Page_name Category:Category_name sortkey
sub addToCat
{
    my ($text, $articleName, $category, $editTime, $startTime, $comment, $status,
	@junk, $sortkey, $token);

    $articleName = $_[0];
    $category = $_[1];
    $sortkey = $_[2];

    ($text, $editTime, $startTime, $token) = getPage($articleName);

    $comment = "Added to ${category}";

    ($status, $text, @junk) = addCatToText($category, $text, $sortkey, $articleName);
    if ($status ne "success")
    {
	return();
    }

    postPage ($articleName, $editTime, $startTime, $token, $text, $comment);
}


sub myLog
{
    open (LOG, ">>/home/beland/wikipedia/pearle-wisebot/log.txt") 
	|| die "Could not append to log!";
    print LOG $_[0];
    close (LOG);
}


sub getPage
{
    my ($target, $request, $response, $reply, $text, $text2,
	$editTime, $startTime, $attemptStartTime, $attemptFinishTime,
	$token, $targetSafe);

    $target = $_[0];

    if ($target =~ m/^\s*$/)
    {
	myLog("getPage: Null target.");
	die("getPage: Null target.");
    }

    $targetSafe = $target;
    $targetSafe =~ s/\&/%26/g;

    # Monitor wiki server responsiveness
    $attemptStartTime = Time::HiRes::time();



    # Create a request-object
    print "GET http://en.wikipedia.org/w/wiki.phtml?title=${targetSafe}&action=edit\n";
    myLog("GET http://en.wikipedia.org/w/wiki.phtml?title=${targetSafe}&action=edit\n");
    $request = HTTP::Request->new(GET => "http://en.wikipedia.org/w/wiki.phtml?title=${targetSafe}&action=edit");
    $response = $::ua->request($request);

    if ($response->is_success)
    {
	$reply = $response->content;

	# Monitor wiki server responsiveness
	$attemptFinishTime = Time::HiRes::time();
	retry ("success", "getPage", sprintf("%.3f", $attemptFinishTime-$attemptStartTime));
	
	# This detects whether or not we're logged in.
	unless ($reply =~ m%<a href="/wiki/User_talk:Pearle">My talk</a>%)
	{
	    # We've lost our identity.
	    myLog ("Wiki server is not recognizing me (1).\n---\n${reply}\n---\n");
	    die ("Wiki server is not recognizing me (1).\n");
	}

	$reply =~ m%<textarea\s+tabindex='1'\s+accesskey=","\s+name="wpTextbox1"\s+id="wpTextbox1"\s+rows='25'\s+cols='80'\s+>(.*?)</textarea>%s;
	$text = $1;

	$reply =~ m/value="(\d+)" name="wpEdittime"/;
	$editTime = $1;

	# Added 22 Aug 2005 to correctly handle articles that have
	# been undeleted
	$reply =~ m/value="(\d+)" name="wpStarttime"/;
	$startTime = $1;

	# Added 9 Mar 2005 after recent software change.
	$reply =~ m/value="(\w+)" name="wpEditToken"/;
	$token = $1;
	###

	if (($text =~ m/^\s*$/)
	    and ($::nullOK ne "yes"))
	{
	    myLog ("getPage($target): Null text!\n");
	    myLog "\n---\n$reply\n---\n";
	    if ($::roughMode eq "yes")
	    {
		return;
	    }
	    else
	    {
		die ("getPage($target): Null text!\n");
	    }
	}

	if (($editTime =~ m/^\s*$/)
	    and ($::nullOK ne "yes"))
	{
	    myLog ("getPage($target): Null time!\n");
	    myLog "\n---\n$reply\n---\n";
	    die ("getPage($target): Null time!\n");
	}

	if (($text =~ m/>/) or
	    ($text =~ m/</))
	{
	    print $text;
	    myLog "\n---\n$text\n---\n";
	    myLog ("getPage($target): Bad text suck!\n");
	    die ("getPage($target): Bad text suck!\n");
	}
	
	# Change ( &quot; -> " ) etc
	# This function is from HTML::Entities.
	decode_entities($text);

	# This may or may not actually work
	$::ua->cookie_jar->save();

	return ($text, $editTime, $startTime, $token);
    } 
    else 
    {
	myLog ("getPage($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/w/wiki.phtml?title=${targetSafe}&action=edit\n".$response->content."\n");
	print ("getPage($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/w/wiki.phtml?title=${targetSafe}&action=edit\n".$response->content."\n");
	# 50X HTTP errors mean there is a problem connecting to the wiki server
	if (($response->status_line =~ m/^500/)
	    or ($response->status_line =~ m/^502/)
	    or ($response->status_line =~ m/^503/))
	{
	    return(retry("getPage", @_));
	}
	else
	{
	    # Unhandled HTTP response
	    die ("getPage($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/w/wiki.phtml?title=${targetSafe}&action=edit\n");
	}
    }
}

sub postPage
{
    my ($request, $response, $pageName, $textToPost, $summaryEntry,
	$editTime, $startTime, $actual, $expected, $attemptStartTime,
	$attemptFinishTime, $date, $editToken, $minor, $pageNameSafe);

    $pageName = $_[0];
    $editTime = $_[1];
    $startTime = $_[2];
    $editToken = $_[3];
    $textToPost = $_[4];
    $summaryEntry = $_[5]; # Max 200 chars!
    $minor = $_[6];

    $summaryEntry = substr($summaryEntry, 0, 200);

    if ($pageName eq "")
    {
	myLog ("postPage(): Empty pageName.\n"); 
	die ("postPage(): Empty pageName.\n"); 
    }

    if ($summaryEntry eq "")
    {
	$summaryEntry = "Automated editing.";
    }
    
    # Monitor server responsiveness
    $attemptStartTime = Time::HiRes::time();

    $pageNameSafe = $pageName;
    $pageNameSafe =~ s/\&/%26/g;

    if ($minor eq "yes")
    {
	$request = POST "http://en.wikipedia.org/w/wiki.phtml?title=${pageNameSafe}&action=submit",
	[wpTextbox1 => $textToPost,
	 wpSummary => $summaryEntry,
	 wpSave => "Save page",
	 wpMinoredit => "on",
	 wpEditToken => $editToken,
	 wpStarttime => $startTime,
	 wpEdittime => $editTime];
	# Optional: wpWatchthis
    }
    else
    {
	$request = POST "http://en.wikipedia.org/w/wiki.phtml?title=${pageNameSafe}&action=submit",
	[wpTextbox1 => $textToPost,
	 wpSummary => $summaryEntry,
	 wpSave => "Save page",
	 wpEditToken => $editToken,
	 wpStarttime => $startTime,
	 wpEdittime => $editTime];
	# Optional: wpWatchthis, wpMinoredit
    }

    # ---
    ## If posts are failing, you can uncomment the below to see what
    ## HTTP request is being made.
    # myLog($request->as_string());
    # print $request->as_string();	$::speedLimit = 60 * 10;
    # print $::ua->request($request)->as_string;
    # ---

    myLog("POSTing...");
    print "POSTing...";
    # Pass request to the user agent and get a response back
    $response = $::ua->request($request);
    myLog("POSTed.\n");
    print "POSTed.\n";


    if ($response->content =~ m/Please confirm that really want to recreate this article./)
    {
	myLog ($response->content."\n");
	die ("Deleted article conflict! See log!");
    }


    # Check the outcome of the response
    if (($response->is_success) or ($response->is_redirect))
    {
	# Monitor server responsiveness
	$attemptFinishTime = Time::HiRes::time();
	retry ("success", "postPage", sprintf("%.3f", $attemptFinishTime-$attemptStartTime));


	$expected = "302 Moved Temporarily";
	$actual = $response->status_line;
	if (($expected ne $actual)
	    and ($actual ne "200 OK"))
	{
	    myLog ("postPage(${pageName}, $editTime)#1 - expected =! actual\n");
	    myLog ($request->as_string());
	    myLog ("EXPECTED: '${expected}'\n");
	    myLog ("  ACTUAL: '${actual}'\n");

	    if ($::roughMode eq "yes")
	    {
		return();
	    }
	    else
	    {
		die ("postPage(${pageName}, $editTime)#1 - expected =! actual - see log\n");
	    }
	}

	$expected = "http://en.wikipedia.org/wiki/${pageName}";
	#$expected =~ s/\'/%27/g;
	#$expected =~ s/\(/%28/g;
	#$expected =~ s/\)/%29/g;
	#$expected =~ s/,/%2C/g;
	$expected = urlEncode($expected);

	$actual = $response->headers->header("Location");


	if (($expected ne $actual)
	    and ($::roughMode ne "yes")
 	    and !(($actual eq "") 
		  and ($response->status_line eq "200 OK")))
 	{
 	    myLog ("postPage(${pageName}, $editTime)#2 - expected =! actual\n");
	    myLog ("EXPECTED: '${expected}'\n");
	    myLog ("  ACTUAL: '${actual}'\n");
	    die ("postPage(${pageName}, $editTime)#2 - expected =! actual - see log\n");
	}


	if ($response->content =~ m/<h1 class="firstHeading">Edit conflict/)
	{
	    myLog ("Edit conflict on '$pageName' at '$editTime'!\n");
	    die ("Edit conflict on '$pageName' at '$editTime'!\n");
	}

	$::ua->cookie_jar->save();
	return ($response->content);
    }
    else
    {
	$date = `date`;
	$date =~ s/\n//g;
	myLog ("Bad response to POST to $pageNameSafe at $date.\n".$response->status_line."\n".$response->content."\n");

	# 50X HTTP errors mean there is a problem connecting to the wiki server
	if (($response->status_line =~ m/^500/)
	    or ($response->status_line =~ m/^502/)
	    or ($response->status_line =~ m/^503/))
	{
	    print "Bad response to POST to $pageNameSafe at $date.\n".$response->status_line."\n".$response->content."\n";
	    return(retry("postPage", @_));
	}
	else
	{
	    # Unhandled HTTP response
	    die ("Bad response to POST to $pageNameSafe at $date.\n".$response->status_line."\n");
	}
    }
}

sub urlSafe
{
    # This function is no longer called because the LWP::UserAgent and
    # HTTP::Request libraries handle character escaping.
}

# perl pearle.pl MOVE_CONTENTS_INCL_CATS Category:From_here Category:To_here
sub moveCategoryContents 
{
    my (@articles, $categoryFrom, $categoryTo, $article, $status,
	@subcats, $includeCategories, $subcat, @junk);


    # -- INITIALIZATION -- 

    $categoryFrom = $_[0];
    $categoryTo = $_[1];
    $includeCategories = $_[2];

    if ($categoryFrom =~ m/^\[\[:(Category:.*?)\]\]/)
    {
	$categoryFrom =~ s/^\[\[:(Category:.*?)\]\]/$1/;
	$categoryFrom =~ s/\s+/_/g;
    }

    if ($categoryTo =~ m/^\[\[:(Category:.*?)\]\]/)
    {
	$categoryTo =~ s/^\[\[:(Category:.*?)\]\]/$1/;
	$categoryTo =~ s/\s+/_/g;
    }

    $categoryFrom =~ s/^\[\[://;
    $categoryTo =~ s/^\[\[://;
    $categoryFrom =~ s/\]\]$//;
    $categoryTo =~ s/\]\]$//;

    unless (($categoryFrom =~ m/^Category:/) and
	    ($categoryTo =~ m/^Category:/))
    {
	myLog ("moveCategoryContents(): Are you sure these are categories? ".$categoryFrom."/".$categoryTo."\n");
	die ("moveCategoryContents(): Are you sure these are categories? ".$categoryFrom."/".$categoryTo."\n");
    }


    transferText ($categoryFrom, $categoryTo);


    # Subcategory transfer
    if ($includeCategories eq "yes")
    {
	@subcats = getSubcategories($categoryFrom);
	
	foreach $subcat (@subcats)
	{
	    if ($subcat =~ m/^\s*$/)
	    {
		next;
	    }

	    $subcat = urlDecode($subcat);
	    
	    print "changeCategory($subcat, $categoryFrom, $categoryTo) c\n";
	    myLog "changeCategory($subcat, $categoryFrom, $categoryTo) c\n";
	    changeCategory($subcat, $categoryFrom, $categoryTo);
	    limit();
	}
    }

    # Article transfer
    @articles = getCategoryArticles($categoryFrom);    

#    foreach $article (reverse(@articles))
    foreach $article (@articles)
    {
	if ($article =~ m/^\s*$/)
	{
	    next;
	}

	$article = urlDecode($article);
	print "changeCategory($article, $categoryFrom, $categoryTo) a\n";
	myLog "changeCategory($article, $categoryFrom, $categoryTo) a\n";
	changeCategory($article, $categoryFrom, $categoryTo);
	limit();
    }
}

# perl pearle.pl DEPOPULATE_CAT Category:To_be_depopulated
sub depopulateCat #($category);
{
    my (@articles, $category, $article, $status, @subcats, $subcat,
	@junk, $authority);

    $category = $_[0];
    $authority = $_[1];


    if ($category =~ m/^\[\[:(Category:.*?)\]\]/)
    {
	$category =~ s/^\[\[:(Category:.*?)\]\]/$1/;
	$category =~ s/\s+/_/g;
    }

    unless ($category =~ m/^Category:/)

    {
	myLog ("depopulateCat(): Are you sure '$category' is a category?\n");
	die ("depopulateCat(): Are you sure '$category' is a category?\n");
    }

    # Remove all subcategories
    @subcats = getSubcategories($category);
    foreach $subcat (@subcats)
    {
	$subcat = urlDecode($subcat);

	print "removeXFromCat($subcat, $category) c\n";
	myLog "removeXFromCat($subcat, $category) c\n";
	($status, @junk) = removeXFromCat($subcat, $category, $authority);
	unless ($status == 0)
	{
	    myLog ("Status: $status\n");
	    print "Status: $status\n";
	}
	limit();
    }

    # Remove all articles
    @articles = getCategoryArticles($category);    
    #foreach $article (reverse(@articles))
    foreach $article (@articles)
    {
	$article = urlDecode($article);

	print "removeXFromCat($article, $category) a\n";
	myLog "removeXFromCat($article, $category) a\n";
	($status, @junk) = removeXFromCat($article, $category, $authority);
	unless ($status == 0)
	{
	    myLog ("Status: $status\n");
	    print "Status: $status\n";
	}
	limit();
    }

    # Remove all images
    @articles = getCategoryImages($category);
    #@articles = reverse(getCategoryImages($category));
    foreach $article (@articles)
    {
	$article = urlDecode($article);

	print "removeXFromCat($article, $category) i\n";
	myLog "removeXFromCat($article, $category) i\n";
	($status, @junk) = removeXFromCat($article, $category, $authority);
	unless ($status == 0)
	{
	    myLog ("Status: $status\n");
	    print "Status: $status\n";
	}
	limit();
    }
}

# perl pearle.pl REMOVE_X_FROM_CAT Article_name Category:Where_the_article_is
sub removeXFromCat
{

    my ($text, $articleName, $category, $editTime, $startTime,
	$comment, $catTmp, $sortkey, @junk, $token, $categoryUnd,
	$categoryHuman, $authority);

    
    $articleName = $_[0];
    $category = $_[1];
    $authority = $_[2];

    #urlSafe($articleName);
    #urlSafe($category);

    unless ($category =~ m/^Category:\w+/)
    {
	myLog ("addToCat(): Bad format on category.\n");
	die ("addToCat(): Bad format on category.\n");
    }

    ($text, $editTime, $startTime, $token) = getPage($articleName);
    
    $comment = "Removed from ${category}";

    if ($authority ne "")
    {
	$authority =~ s/_/ /g;
	$comment = "Removed from ${category} (per [[${authority}]])";

    }

    # Convert underscore to spaces; this is human-readable.
    $category =~ s/_/ /g;

    $categoryHuman = $category;

    # Insert possible whitespace
    $category =~ s/^Category://;
#    $category = "Category:\\s*\\Q".$category."\\E"; # THIS DOES NOT WORK
    $category = "Category:\\s*".$category;
    $category =~ s%\(%\\(%g;
    $category =~ s%\)%\\)%g;
    $category =~ s%\'%\\\'%g;
    $categoryUnd = $category;
    $categoryUnd =~ s/ /_/g;

    unless (($text =~ m/\[\[\s*${category}\s*\]\]/is)
	    or ($text =~ m/\[\[\s*${category}\s*\|.*?\]\]/is)
	    or ($text =~ m/\[\[\s*${categoryUnd}\s*\]\]/is)
	    or ($text =~ m/\[\[\s*${categoryUnd}\s*\|.*?\]\]/is))
    {
	print "removeXFromCat(): $articleName is not in '$category'.\n";
        myLog ("removeXFromCat(): $articleName is not in '$category'.\n");

	### TEMPORARY ###
	### Uncomment these lines if you want category remove attempts
	### to trigger null edits.  This is useful if you have have
	### changed the category on a template, but due to a bug this
	### does not actually move member articles until they are
	### edited.
	($text, @junk) = fixCategoryInterwiki($text);
	postPage ($articleName, $editTime, $startTime, $token, $text, "Mostly null edit to actually remove from ${categoryHuman}", "yes");
	limit();
	### TEMPORARY ###
	return(1);
    }

    if ($text =~ m/^\s*\#REDIRECT/is)
    {
	print "addToCat(): $articleName is a redirect!\n";
	myLog ("addToCat(): $articleName is a redirect!\n");
	return(2);
    }

    # Remember to PRESERVE WHITESPACE for sortkeys!
    $text =~ m/\[\[\s*${category}\s*\|(.*?)\]\]/is;
    $sortkey = $1;
    if ($sortkey eq "")
    {
	$text =~ m/\[\[\s*${categoryUnd}\s*\|(.*?)\]\]/is;
    }

    # Remove the page from the category and any trailing newline.
    $text =~ s/\[\[\s*${category}\s*\|?(.*?)\]\]\n?//isg;
    $text =~ s/\[\[\s*${categoryUnd}\s*\|?(.*?)\]\]\n?//isg;

    ($text, @junk) = fixCategoryInterwiki($text);

    postPage ($articleName, $editTime, $startTime, $token, $text, $comment);
    return(0, $sortkey);
}

# perl pearle.pl PRINT_WIKITEXT Article_you_want_to_get
## Warning: Saves to a file in the current directory with the same name
## as the article, plus another file with the .html extention.
sub printWikitext
{
    my ($editTime, $startTime, $text, $target, $token);

    $target = $_[0];

    $target =~ s/^\[\[://;
    $target =~ s/\]\]$//;

    ($text, $editTime, $startTime, $token) = getPage($target);

    # Save the wikicode version to a file.
    open (WIKITEXT, ">./${target}");
    print WIKITEXT $text;
    close (WIKITEXT);

    # Save the HTML version to a file.
    print `wget http://en.wikipedia.org/wiki/${target} -O ./${target}.html`;
}


# Get a list of the names of articles in a given category.
sub getCategoryArticles
{

    my ($target, $request, $response, $reply, $articles, $article,
	@articles, $attemptStartTime, $attemptFinishTime,
	$targetSpace, $offset, $numberOfArticles, $url,
	@moreArticles);


    $target = $_[0];
    $offset = $_[1];

    # Need both _ and spaces for precise matching later
    $target =~ s/ /_/g;
    $targetSpace = $target;
    $targetSpace =~ s/_/ /g;


    #urlSafe ($target);


    unless ($target =~ m/^Category:/)

    {
	myLog ("getCategoryArticles(): Are you sure '$target' is a category?\n");
	die ("getCategoryArticles(): Are you sure '$target' is a category?\n");
    }

    # Monitor wiki server responsiveness
    $attemptStartTime = Time::HiRes::time();

    if ($offset eq "")
    {
	$url = "http://en.wikipedia.org/wiki/${target}";
    }
    else
    {
	$url = "http://en.wikipedia.org/w/index.php?title=${target}&from=${offset}";
    }

    # Create a request-object
    if ($offset eq "")
    {
	print "GET ${url}\n";
    }
    myLog("GET ${url}\n");
    $request = HTTP::Request->new(GET => "${url}");
    $response = $::ua->request($request);

    if ($response->is_success)
    {
	# Monitor wiki server responsiveness
	$attemptFinishTime = Time::HiRes::time();
	retry ("success", "getCategoryArticles", sprintf("%.3f", $attemptFinishTime-$attemptStartTime));

	$reply = $response->content;

	# This detects whether or not we're logged in.
	unless ($reply =~ m%<a href="/wiki/User_talk:Pearle">My talk</a>%)
	{
	    # We've lost our identity.
	    myLog ("Wiki server is not recognizing me (2).\n---\n${reply}\n---\n");
	    die ("Wiki server is not recognizing me (2).\n");
	}

	$articles = $reply;
	$articles =~ s%^.*?<h2>Articles in category.*?</h2>%%s;
	$articles =~ s%<div class="printfooter">.*?$%%s;
	@articles = $articles =~ m%<li><a href="/wiki/(.*?)" title=%sg;

	if ($reply =~ m%<a\s+href=\"/w/index.php\?title=${target}\&amp;from=(.*?)\"\s+title=\"${targetSpace}\">next 200</a>%s)
	{
	    sleep (1); # Throttle GETs
	    @moreArticles = getCategoryArticles($target, $1);
	    @articles = (@articles, @moreArticles);
	}

	$::ua->cookie_jar->save();

	$numberOfArticles = @articles;

	if ($offset eq "")
	{
	    print "Got $numberOfArticles articles.\n";
	    myLog ("Got $numberOfArticles articles.\n");
	}
	return decodeArray(@articles);
    } 
    else 
    {
	myLog ("getCategoryArticles($target): HTTP ERR (".$response->status_line.") ${url}\n".$response->content."\n");

	# 50X HTTP errors mean there is a problem connecting to the wiki server
	if (($response->status_line =~ m/^500/)
	    or ($response->status_line =~ m/^502/)
	    or ($response->status_line =~ m/^503/))
	{
	    print "getCategoryArticles($target): HTTP ERR (".$response->status_line.") ${url}\n".$response->content."\n";
	    return(retry("getCategoryArticles", @_));
	}
	else
	{
	    # Unhandled HTTP response
	    die ("getCategoryArticles($target): HTTP ERR (".$response->status_line.") ${url}\n");
	}
    }
}

sub decodeArray
{
    my($title, @newTitles);

    foreach $title (@_)
    {
	$title = urlDecode ($title);
	@newTitles = (@newTitles, $title);
    }

    return @newTitles;
}

# Get a list of the names of subcategories of a given category.
sub getSubcategories
{
    my ($target, $request, $response, $reply, $subcats, $subcat,
	@subcats, $attemptStartTime, $attemptFinishTime);

    $target = $_[0];

    #urlSafe ($target);


    unless ($target =~ m/^Category:/)

    {
	myLog ("getSubcategories(): Are you sure '$target' is a category?\n");
	die ("getSubcategories(): Are you sure '$target' is a category?\n");
    }

    # Monitor wiki server responsiveness
    $attemptStartTime = Time::HiRes::time();

    # Create a request-object
    print "GET http://en.wikipedia.org/wiki/${target}\n";
    myLog("GET http://en.wikipedia.org/wiki/${target}\n");
    $request = HTTP::Request->new(GET => "http://en.wikipedia.org/wiki/${target}");
    $response = $::ua->request($request);

    if ($response->is_success)
    {
	# Monitor wiki server responsiveness
	$attemptFinishTime = Time::HiRes::time();
	retry ("success", "getSubcategories", sprintf("%.3f", $attemptFinishTime-$attemptStartTime));

	$reply = $response->content;

	# This detects whether or not we're logged in.
	unless ($reply =~ m%<a href="/wiki/User_talk:Pearle">My talk</a>%)
	{
	    # We've lost our identity.
	    myLog ("Wikipedia is not recognizing me (3).\n---\n${reply}\n---\n");
	    die ("Wikipedia is not recognizing me (3).\n");
	}

	$subcats = $reply;


	if ($subcats =~ m%^.*?<h2>Subcategories</h2>(.*?)<h2>Articles in category.*?</h2>.*?$%s)
	{
	    $subcats =~ s%^.*?<h2>Subcategories</h2>(.*?)<h2>Articles in category.*?</h2>.*?$%$1%s;
	}
	else
	{
	    return ();
	}

	@subcats = $subcats =~ m%<li><a href="/wiki/(.*?)" title=%sg;

	$::ua->cookie_jar->save();
	return decodeArray(@subcats);
    } 
    else 
    {
	myLog ("getSubcategories($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/wiki/${target}\n".$response->content."\n");
	
	# 50X HTTP errors mean there is a problem connecting to the wiki server
	if (($response->status_line =~ m/^500/)
	    or ($response->status_line =~ m/^502/)
	    or ($response->status_line =~ m/^503/))
	{
	    print "getSubcategories($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/wiki/${target}\n".$response->content."\n";
	    return(decodeArray(retry("getCategoryArticles", @_)));
	}
	else
	{
	    # Unhandled HTTP response
	    die ("getSubcategories($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/wiki/${target}\n");
	}
    }
}


# perl pearle.pl ADD_CFD_TAG Category:Category_name
sub addCFDTag
{
    my ($text, $category, $editTime, $startTime, $comment, $catTmp, @junk, $token);

    $category = $_[0];

    #urlSafe($category);

    unless ($category =~ m/^Category:\w+/)
    {
	myLog ("addCFDTag(): Bad format on category.\n");
	die ("addCFDTag(): Bad format on category.\n");
    }

    $::nullOK = "yes";
    ($text, $editTime, $startTime, $token) = getPage($category);
    $::nullOK = "no";

    $comment = "Nominated for deletion or renaming";

    if (($text =~ m/\{\{cfd\}\}/is) or
	($text =~ m/\{\{cfm/is) or
	($text =~ m/\{\{cfr/is))
    {
	print "addCFDTag(): $category is already tagged.\n";
	myLog ("addCFDTag(): $category is already tagged.\n");
	return();
    }

    if ($text =~ m/^\s*\#REDIRECT/is)
    {
	print "addCFDTag(): $category is a redirect!\n";
	myLog ("addCFDTag(): $category is a redirect!\n");
	return();
    }


    # Add the CFD tag to the beginning of the page.
    $text = "{{cfd}}\n".$text;

    ($text, @junk) = fixCategoryInterwiki($text);

    postPage ($category, $editTime, $startTime, $token, $text, $comment);
}


# perl pearle.pl REMOVE_CFD_TAG Category:Category_name
sub removeCFDTag
{
    my ($text, $category, $editTime, $startTime, $comment, $catTmp, @junk, $token);

    $category = $_[0];

    #urlSafe($category);

    unless ($category =~ m/^Category:\w+/)
    {
	myLog ("removeCFDTag(): Bad format on category.\n");
	die ("removeCFDTag(): Bad format on category.\n");
    }

    $::nullOK = "yes";
    ($text, $editTime, $startTime, $token) = getPage($category);
    $::nullOK = "no";

    $comment = "De-listed from [[Wikipedia:Categories for deletion]]";

    unless (($text =~ m/\{\{cfd\}\}/is) or
	    ($text =~ m/\{\{cfm/is) or
	    ($text =~ m/\{\{cfr/is))
    {
	print "removeCFDTag(): $category is not tagged.\n";
	myLog ("removeCFDTag(): $category is not tagged.\n");
	return();
    }

    if ($text =~ m/^\s*\#REDIRECT/is)
    {
	print "removeCFDTag(): $category is a redirect!\n";
	myLog ("removeCFDTag(): $category is a redirect!\n");
	return();
    }


    # Remove the CFD tag.
    $text =~ s/{{cfd}}\s*//gis;
    $text =~ s/\{\{cfr.*?\}\}\s*//is;
    $text =~ s/\{\{cfm.*?\}\}\s*//is;
    $text =~ s/\{\{cfru.*?\}\}\s*//is;

    ($text, @junk) = fixCategoryInterwiki($text);

    postPage ($category, $editTime, $startTime, $token, $text, $comment);
}



# perl pearle.pl TRANSFER_TEXT Category:From_here Category:To_there

## Note that this code is called automatically whenever moving a
## category, so you probably don't need to call it yourself from the
## command line.

sub transferText
{

    my ($source, $destination, $sourceText, $destinationText,
	$sourceTime, $destinationTime, @sourceCategories,
	@destinationCategories, $category, $lastCategory,
	$sourceTextOrig, $destinationTextOrig, $comment, $sourceHuman,
	$destinationHuman, $noMergeFlag, $sourceToken,
	$destinationToken, $junk, $sourceStartTime,
	$destinationStartTime);


    $source = $_[0];
    $destination = $_[1];
    $comment = "Cleanup per [[WP:CFD]] (moving $source to $destination)";
    
    # Make human-readable versions of these variables for use in edit summaries
    $sourceHuman = $source;
    $sourceHuman =~ s/_/ /g;
    $destinationHuman = $destination;
    $destinationHuman =~ s/_/ /g;

    unless (($source =~ m/^Category:/) and
	    ($destination =~ m/^Category:/))
    {
	myLog ("transferText(): Are you sure these are categories? ".$source."/".$destination."\n");
	die ("transferText(): Are you sure these are categories? ".$source."/".$destination."\n");
    }    
    

    ($sourceText, $sourceTime, $sourceStartTime, $sourceToken) = getPage($source);

    # Avoid double runs!

    # This text must be the same as that which is implanted below, and
    # it should be an HTML comment, so that it's invisible.
    if ($sourceText =~ m/<\!--PEARLE-MOVE-CATEGORY-CONTENTS-SRC-FLAG-->/)
    {
	return;
    }

    $sourceTextOrig = $sourceText;
    $sourceText =~ s/{{cfd}}//is;
    $sourceText =~ s/\{\{cfr.*?\}\}\s*//is;
    $sourceText =~ s/\{\{cfm.*?\}\}\s*//is;
    $sourceText =~ s/\{\{cfru.*?\}\}\s*//is;
    $sourceText =~ s/^\s+//s;
    $sourceText =~ s/\s+$//s;

    $::nullOK = "yes";
    ($destinationText, $destinationTime, $destinationStartTime, $destinationToken)
	= getPage($destination);
    $::nullOK = "no";

    $destinationTextOrig = $destinationText;
    $destinationText =~ s/{{cfd}}//is;
    $destinationText =~ s/\{\{cfm.*?\}\}\s*//is;
    $destinationText =~ s/\{\{cfr.*?\}\}\s*//is;
    $destinationText =~ s/\{\{cfru.*?\}\}\s*//is;
    $destinationText =~ s/^\s+//s;
    $destinationText =~ s/\s+$//s;

    # To help keep things straight when we're in a loop.
    print STDOUT "\n----\n";

    if (($sourceText eq "") and
	($destinationText ne ""))
    {
	# The HTML comment must be the same as that above.
	$sourceText = "{{cfd}}\nThis category has been moved to [[:$destinationHuman]].  Any remaining articles and subcategories will soon be moved by a bot unless otherwise noted on [[WP:CFD]].\n<!--PEARLE-MOVE-CATEGORY-CONTENTS-SRC-FLAG-->\n";
    }	
    elsif (($sourceText ne "") and
	($destinationText eq ""))
    {
	$destinationText = $sourceText;
	# The HTML comment must be the same as that above.
	$sourceText = "{{cfd}}\nThis category has been moved to [[:$destinationHuman]].  Any remaining articles and subcategories will soon be moved by a bot unless otherwise noted on [[WP:CFD]].\n<!--PEARLE-MOVE-CATEGORY-CONTENTS-SRC-FLAG-->\n";
    }
    elsif (($sourceText ne "") and
	   ($destinationText ne ""))
    {
	@sourceCategories = $sourceText =~ m/\[\[\s*(Category:.*?)\s*\]\]/gs;
	@destinationCategories = $destinationText =~ m/\[\[\s*(Category:.*?)\s*\]\]/gs;

	$sourceText =~ s/\[\[\s*(Category:.*?)\s*\]\]\s*//gs;
	$sourceText =~ s/^\s+//s;
	$sourceText =~ s/\s+$//s;
	$destinationText =~ s/\[\[\s*(Category:.*?)\s*\]\]\s*//gs;
	$destinationText =~ s/^\s+//s;
	$destinationText =~ s/\s+$//s;

	$destinationText = $sourceText."\n".$destinationText;
	$destinationText =~ s/^\s+//s;
	$destinationText =~ s/\s+$//s;
	
	foreach $category (sort (@sourceCategories, @destinationCategories))
	{
	    if ($category eq $lastCategory)
	    {
		next;
	    }
	    $destinationText .= "\n[[${category}]]";
	    $lastCategory = $category;
	}
	# The HTML comment must be the same as that above.
	$sourceText = "{{cfd}}\nThis category has been moved to [[:$destinationHuman]].  Any remaining articles and subcategories will soon be moved by a bot unless otherwise noted on [[WP:CFD]].\n<!--PEARLE-MOVE-CATEGORY-CONTENTS-SRC-FLAG-->\n";
    }

    $sourceText =~ s/\n\s+\n/\n\n/sg;
    $destinationText =~ s/\n\s+\n/\n\n/sg;

    # You may need to futz with this, depending on the templates
    # currently in use.
    unless (($sourceTextOrig =~ m/\{\{cfd\}\}/i)
	    or ($sourceTextOrig =~ m/\{\{cfr/i)
	    or ($sourceTextOrig =~ m/\{\{cfm/i))
    {
	print STDOUT "FATAL ERROR: $source was not tagged {{cfd}}, {{cfm}}, {{cfr}}, or {{cfru}}!\n";
	myLog("FATAL ERROR: $source was not tagged {{cfd}}, {{cfr}}, {{cfm}}, or {{cfru}}!\n");
	die("FATAL ERROR: $source was not tagged {{cfd}}, {{cfr}}, {{cfm}}, or {{cfru}}!\n");
    }

    if (($sourceText eq $sourceTextOrig) and
	($destinationText eq $destinationTextOrig))
    {
	print STDOUT "No changes for $source and $destination.\n";
	return();
    }

    if ($destinationTextOrig =~ m/^\s*$/)
    {
	print "No merging was required from $source into $destination.\n";
	$noMergeFlag = "yes";
    }

    unless ($noMergeFlag eq "yes")
    {
	$destinationText .= "{{pearle-manual-cleanup}}\n";
    }
    

    # Make sure category and interwiki links conform to style
    # guidelines.
    ($destinationText, $junk) = fixCategoryInterwiki($destinationText);


    # If we did have to change things around, print the changes and post them to the wiki.

    if ($sourceText ne $sourceTextOrig)
    {
	unless ($noMergeFlag eq "yes")
	{
	    print STDOUT "SOURCE FROM:\n%%%${sourceTextOrig}%%%\nSOURCE TO:\n%%%${sourceText}%%%\n";
	}
	postPage ($source, $sourceTime, $sourceStartTime, $sourceToken, $sourceText, $comment);
    }

    if ($destinationText ne $destinationTextOrig)
    {
	unless ($noMergeFlag eq "yes")
	{
	    print STDOUT "DESTINATION FROM:\n%%%${destinationTextOrig}%%%\nDESTINATION TO:\n%%%${destinationText}%%%\n";
	}
	postPage ($destination, $destinationTime, $destinationStartTime, $destinationToken, $destinationText, $comment);
    }
}


# Translate from HTTP URL encoding to the native character set.
sub urlDecode
{
    my ($input);
    
    $input = $_[0];

    $input =~ s/\%([a-f|A-F|0-9][a-f|A-F|0-9])/chr(hex($1))/eg;

    return ($input);
}

# Translate from the native character set to HTTP URL encoding.
sub urlEncode
{
    my ($char, $input, $output);

    $input = $_[0];

    foreach $char (split("",$input))
    {
#	if ($char =~ m/[a-z|A-Z|0-9|\-_\.\!\~\*\'\(\)]/)

	# The below exclusions should conform to Wikipedia practice
	# (possibly non-standard)
	if ($char =~ m/[a-z|A-Z|0-9|\-_\.\/:]/)
	{
	    $output .= $char;
	}
	elsif ($char eq " ")
	{
	    $output .= "+";
	}
	else
	{
	    $output .= uc(sprintf("%%%x", ord($char)));
	    # %HH where HH is the (Unicode?) hex code of $char
	}
    }

    return ($output);
}


# perl pearle.pl CHANGE_CATEGORY Article_name Category:From Category:To
sub changeCategory
{

    my ($articleName, $categoryFrom, $categoryTo, $editTime, $startTime, $text,
	$comment, $catTmp, $sortkey, $token, $junk, $categoryFromUnd);

    $articleName = $_[0];
    $categoryFrom = $_[1];
    $categoryTo = $_[2];
    
    #urlSafe($articleName);
    #urlSafe($categoryFrom);
    #urlSafe($categoryTo);

    unless (($categoryFrom =~ m/^Category:/) and
	    ($categoryTo =~ m/^Category:/))
    {
	myLog ("moveCategoryContents(): Are you sure these are categories? ".$categoryFrom."/".$categoryTo."\n");
	die ("moveCategoryContents(): Are you sure these are categories? ".$categoryFrom."/".$categoryTo."\n");
    }

    if ($articleName =~ m/^\s*$/)
    {
	myLog("changeCategory(): Null target.");
	die("changeCategory(): Null target.");
    }

    ($text, $editTime, $startTime, $token) = getPage($articleName);
    
    $comment = "Moving from ${categoryFrom} to ${categoryTo}";

    # --- Start the removing part ---

    # Convert underscore to spaces; this is human-readable.
    $categoryFrom =~ s/_/ /g;

    # Insert possible whitespace
    $categoryFrom =~ s/^Category://;
    $categoryFrom = "Category:\\s*".$categoryFrom;
    
    # Escape special characters
    $categoryFrom =~ s%\(%\\(%g;
    $categoryFrom =~ s%\)%\\)%g;
    $categoryFrom =~ s%\'%\\\'%g;

    $categoryFromUnd = $categoryFrom;
    $categoryFromUnd =~ s/ /_/g;

    unless (($text =~ m/\[\[\s*${categoryFrom}\s*\]\]/is)
	    or ($text =~ m/\[\[\s*${categoryFrom}\s*\|.*?\]\]/is)
	    or ($text =~ m/\[\[\s*${categoryFromUnd}\s*\]\]/is)
	    or ($text =~ m/\[\[\s*${categoryFromUnd}\s*\|.*?\]\]/is))
    {
        myLog ("changeCategory.r(): $articleName is not in '$categoryFrom'.\n");
	my ($nullEditFlag);

	# Set this to "yes" if you want mass category change attempts
	# to trigger null edits automatically.  You should check the
	# category later to see if everything worked or not, to see if
	# any templates should be changed.  The below will add a small
	# amount of unnecessary server load to try the null edits if
	# template changes haven't already been made.
	$nullEditFlag = "yes";

	if ($nullEditFlag eq "yes")
	{
	    myLog ("changeCategory(): Attempting null edit on $articleName.\n");
	    print "changeCategory(): Attempting null edit on $articleName.\n";
	    nullEdit($articleName);
	    return();
	}
	else
	{
	    print "###${text}###\n";
	    die ("changeCategory.r(): $articleName is not in '$categoryFrom'.\n");
	}
    }

    if ($text =~ m/^\s*\#REDIRECT/is)
    {
	myLog ("changeCategory.r(): $articleName is a redirect!\n");
	die ("changeCategory.r(): $articleName is a redirect!\n");
    }

    # We're lazy and don't fully parse the document to properly check
    # for escaped category tags, so there may be some unnecssary
    # aborts from the following, but they are rare and easily
    # overridden by manually editing the page in question.
    if ($text =~ m/<nowiki>.*?category.*?<\/nowiki>/is)
    {
	myLog ("changeCategory.r(): $articleName has a dangerous nowiki tag!\n");
	die ("changeCategory.r(): $articleName has a dangerous nowiki tag!\n");
    }

    $text =~ m/\[\[\s*${categoryFrom}\s*\|(.*?)\]\]/is;
    $sortkey = $1;
    if ($sortkey eq "")
    {
        $text =~ m/\[\[\s*${categoryFromUnd}\s*\|(.*?)\]\]/is;
    }

    # Remove the page from the category and any trailing newline.
    $text =~ s/\[\[\s*${categoryFrom}\s*\|?(.*?)\]\]\n?//isg;
    $text =~ s/\[\[\s*${categoryFromUnd}\s*\|?(.*?)\]\]\n?//isg;


    # --- Start the adding part ---

    # Remove any newlines at the end of the document.
    $text =~ s/\n*$//s;

    $catTmp = $categoryTo;
    # _ and spaces are equivalent and may be intermingled in wikicode.
    $catTmp =~ s/Category:\s*/Category:\\s*/g;
    $catTmp =~ s/_/[_ ]/g;
    $catTmp =~ s%\(%\\\(%g;
    $catTmp =~ s%\)%\\\)%g;
    $catTmp =~ s%\.%\\\.%g;
    
    if (($text =~ m/(\[\[\s*${catTmp}\s*\|.*?\]\])/is)
	or ($text =~ m/(\[\[\s*${catTmp}\s*\]\])/is))
    {
	myLog ("changeCategory.a(): $articleName is already in '$categoryTo'.\n");
	print "\n1: '${1}'\n";
	print "\ncattmp: '${catTmp}'\n";
   	print "changeCategory.a(): $articleName is already in '$categoryTo'.\n";

	## It's generally OK to merge it in, so don't do this:
	# die "changeCategory.a(): $articleName is already in '$categoryTo'.\n";
	# return();
    }
    elsif ($text =~ m/^\s*\#REDIRECT/is)
    {
	print "changeCategory.a(): $articleName is a redirect!\n";
	myLog ("changeCategory.a(): $articleName is a redirect!\n");
	return();
    }
    else
    {
	# Convert underscore to spaces; this is human-readable.
	$categoryTo =~ s/_/ /g;
	
	# Add the category on a new line.
	if ($sortkey eq "")
	{
	    $text .= "\n[[${categoryTo}]]";
	}
	else
	{
	    $text .= "\n[[${categoryTo}|${sortkey}]]";
	}
    }	
    # --- Post-processing ---
    
    ($text, $junk) = fixCategoryInterwiki($text);
    postPage ($articleName, $editTime, $startTime, $token, $text, $comment, "yes");
}

# This function is not yet finished.  Right now it simply compares the
# membership of a given list and a given category.  Eventually, it is
# intended to be used to convert lists into categories. This is not
# yet authorized behavior.
sub listToCat
{
    my ($lists, $cats, $list, $cat, $listText, @junk, @articlesInList,
	@articlesInCat, %articlesInCat, $article, $implement);

    $lists = $_[0];
    $cats = $_[1];
    $implement = $_[2];

    if ($implement ne "yes")
    {
	print "Diffing membership of '$lists' and '$cats'\n";
    }

    foreach $list (split(";", $lists))
    {
	$list =~ s/^\[\[:?//;
	$list =~ s/\]\]$//;	

	($listText, @junk) = getPage($list);    
	
	$listText =~ s%<nowiki>.*?</nowiki>%%gis;
	$listText =~ s%<pre>.*?</pre>%%gis;
	# <pre>
	@articlesInList = (@articlesInList, $listText =~ m%\[\[(.*?)\]\]%sg);
	sleep 1;
    }

    foreach $cat (split(";", $cats))
    {
	$cat =~ s/^\[\[:?//;
	$cat =~ s/\]\]$//;
	$cat =~ s/^:Category/Category/;

	@articlesInCat = (@articlesInCat, getCategoryArticles($cat));
	sleep 1;
    }

    foreach $article (@articlesInCat)
    {
	$article = urlDecode ($article);
	$articlesInCat{$article} = 1;
	# print "In cat: $article\n";
    }

    foreach $article (@articlesInList)
    {
	$article =~ s/\s+/_/gs;
	$article =~ s/\|.*$//;
	if (exists $articlesInCat{$article})
	{
	    # print "OK: $article\n";
	    delete $articlesInCat{$article};
	}
	else
	{
	    print "Only in list(s): $article\n";
	}
    }

    foreach $article (sort(keys(%articlesInCat)))
    {
	print "Only in cat(s): $article\n";
    }
}

# A little paranoia never hurt anyone.
sub shellfix
{
    my ($string, $stringTmp);

    $string = $_[0];
    $string =~ s/([\*\?\!\(\)\&\>\<])\"\'/\\$1/g;

    $stringTmp = $string;

    $stringTmp =~ s/[Å\p{IsWord}[:alpha:][:digit:]\*,:_.\'\"\)\(\?\-\/\&\>\<\!]//g;

    if ($stringTmp ne "")
    {
        die ("\nUnsafe character(s) in '${string}': '$stringTmp'\n");
    }

    return $string;
}


# You will not be able to use this function; it requires a dataset
# processed by scripts which have not been included.  (It's not
# finished, anyway.)
sub enforceCategoryRedirects
{
    my ($implementActually, $line, $lineTmp, $articlesToMove,
	$article, $flatResults, $entry, $contents, $catTo, $lineTmp2);

    $implementActually = $_[0];
    
    $flatResults = `cat data/reverse-category-links-sorted.txt | grep ^Category:Wikipedia_category_redirects`;
    foreach $line (split("\n", $flatResults))
    {
	$line =~ s/^Category:Wikipedia_category_redirects <\- //;

	$lineTmp = shellfix($line);
	$lineTmp2 = $lineTmp;
	$lineTmp2 =~ s/^Category://;

	if ($line =~ m/^Category/)
	{
	    $articlesToMove = `cat data/reverse-category-links-sorted.txt | grep ^${lineTmp}`;

	    if ($articlesToMove eq "")
	    {
		next;
	    }
	 
	    print "ATM: $articlesToMove\n";
   
	    $entry = `egrep \"^\\([0-9]+,14,'$lineTmp2'\" data/entries-categoryredirect.txt `;
	    $entry =~ m/^\([0-9]+,14,'$lineTmp2','(.*?)',/;
	    $contents = $1;

	    $contents =~ m/\{\{categoryredirect\|(.*?)\}\}/;
	    $catTo = $1;
	    $catTo = ":Category:".$catTo;
	    $catTo =~ s/_/ /g;

	    $lineTmp = $line;
	    $lineTmp =~ s/^Category/:Category/i;
	    $lineTmp =~ s/_/ /g;

	    foreach $article (split("\n", $articlesToMove))
	    {
		print "ARTICLE: $article\n";
		print "LINE: $line\n";
		    
		$article =~ s/^$line <\- //;
		print "* Move [[$article]] from [[$lineTmp]] to [[$catTo]]\n";
	    }
	}
    }
}


# A call to this recursive function handles any retries necessary to
# wait out network or server problems.  It's a bit of a hack.
sub retry
{

    my ($callType, @args, $i, $normalDelay, $firstRetry,
	$secondRetry, $thirdRetry);

    ($callType, @args) = @_;

    ### ATTENTION ###
    # Increasing the speed of the bot to faster than 1 edit every 10
    # seconds violates English Wikipedia rules as of April, 2005, and
    # will cause your bot to be banned.  So don't change $normalDelay
    # unless you know what you are doing.  Other sites may have
    # similar policies, and you are advised to check before using your
    # bot at the default speed.
    #################

    # HTTP failures are usually an indication of high server load.
    # The retry settings here are designed to give human editors
    # priority use of the server, by allowing it ample recovering time
    # when load is high.

    # Time to wait before retry on failure, in seconds
    $normalDelay = 10;       # Normal interval between edits is 10 seconds
    $firstRetry = 60;        # First delay on fail is 1 minute
    $secondRetry = 60 * 10;  # Second delay on fail is 10 minutes
    $thirdRetry = 60 * 60;   # Third delay on fail is 1 hour
    
    # SUCCESS CASE
    # e.g. retry ("success", "getPage", "0.23");
    if ($callType eq "success")
    {
	myLog("Response time for ".$args[0]." (sec): ".$args[1]."\n");
	$::retryDelay = $normalDelay;

	if ($args[0] eq "postPage")
	{
	    # If the response time is greater than 20 seconds...
	    if ($args[1] > 20)
	    {
		print "Wikipedia is very slow.  Increasing minimum wait to 10 min...\n";
		myLog("Wikipedia is very slow.  Increasing minimum wait to 10 min...\n");
		
		$::speedLimit = 60 * 10;
	    }

	    # If the response time is between 10 and 20 seconds...
	    elsif ($args[1] > 10)
	    {
		print "Wikipedia is somewhat slow.  Setting minimum wait to 60 sec...\n";
		myLog("Wikipedia is somewhat slow.  Setting minimum wait to 60 sec...\n");
		
		$::speedLimit = 60;
	    }

	    # If the response time is less than 10 seconds...
	    else
	    {
		if ($::speedLimit > 10)
		{
		    print "Returning to normal minimum wait time.\n";
		    myLog("Returning to normal minimum wait time.\n");
		    $::speedLimit = 10;
		}
	    }
	}
	return();
    }

    # e.g. retry ("getPage", "George_Washington")
    # FAILURE CASES
    elsif (($::retryDelay == $normalDelay)
	   or ($::retryDelay == 0))
    {
	print "First retry for ".$args[0]."\n";
	myLog("First retry for ".$args[0]."\n");
	$::retryDelay = $firstRetry;
	$::speedLimit = 60 * 10;
    }
    elsif ($::retryDelay == $firstRetry)
    {
	print "Second retry for ".$args[0]."\n";
	myLog("Second retry for ".$args[0]."\n");
	$::retryDelay = $secondRetry;
	$::speedLimit = 60 * 10;
    }
    elsif ($::retryDelay == $secondRetry)
    {
	print "Third retry for ".$args[0]."\n";
	myLog("Third retry for ".$args[0]."\n");
	$::retryDelay = $thirdRetry;
	$::speedLimit = 60 * 10;
    }
    elsif ($::retryDelay == $thirdRetry)
    {
	print "Nth retry for ".$args[0]."\n";
	myLog("Nth retry for ".$args[0]."\n");
	$::retryDelay = $thirdRetry;
	$::speedLimit = 60 * 10;
    }
    else
    {
	die ("retry(): Internal error - unknown delay factor '".$::retryDelay."'\n");
    }

    # DEFAULT TO FAILURE CASE HANDLING
    
    $i = $::retryDelay;
    while ($i >= 0)
    {
	sleep (1);
	print STDERR "Waiting $i seconds for retry...\r";
	$i--;
    }
    print "                                     \r";

    # DO THE ACTUAL RETRY
    if ($callType eq "getPage")
    {
	return(getPage(@args));
    }
    elsif ($callType eq "postPage")
    {
	return(postPage(@args));
    }
    elsif ($callType eq "getCategoryArticles")
    {
	return(getCategoryArticles(@args));
    }
    elsif ($callType eq "getSubcategories")
    {
	return(getSubcategories(@args));
    }
    elsif ($callType eq "getURL")
    {
	return(getURL(@args));
    }
    else
    {
	myLog ("retry(): Unknown callType: $callType\n");
	die ("retry(): Unknown callType: $callType\n");
    }
}


# perl pearle ENFORCE_CFD
## This just compares the contents of Category:Categories_for_deletion
## with WP:CFD and /resolved and /unresolved.  It is broken now due to
## recent changes which list all nominations on subpages.  It also
## does not check above the first 200 members of the category, due to
## recent changes which paginates in 200-page blocks.
sub enforceCFD
{
    my (@subcats, $subcat, $cfd, $editTime, $startTime, $token, $cfdU, $cfdR);
    
    @subcats = getSubcategories("Category:Categories_for_deletion");

    ($cfd, $editTime, $startTime, $token) = getPage("Wikipedia:Categories_for_deletion");
    ($cfdU, $editTime, $startTime, $token) = getPage("Wikipedia:Categories_for_deletion/unresolved");
    ($cfdR, $editTime, $startTime, $token) = getPage("Wikipedia:Categories_for_deletion/resolved");

    $cfd =~ s/[\r\n_]/ /g;
    $cfd =~ s/\s+/ /g;
    $cfdU =~ s/[\r\n_]/ /g;
    $cfdU =~ s/\s+/ /g;
    $cfdR =~ s/[\r\n_]/ /g;
    $cfdR =~ s/\s+/ /g;

    foreach $subcat (@subcats)
    {
	$subcat =~ s/[\r\n_]/ /g;
	$subcat =~ s/\s+/ /g;
	$subcat = urlDecode ($subcat);

	unless ($cfd =~ m/$subcat/)
	{
	    print "$subcat is not in WP:CFD";
	    if ($cfdR =~ m/$subcat/)
	    {
		print " (listed on /resolved)";
	    }
	    if ($cfdU =~ m/$subcat/)
	    {
		print " (listed on /unresolved)";
	    }
	    print "\n";
	}
    }
}

# An internal function that handles the complexity of adding a
# category tag to the wikicode of a page.
sub addCatToText
{
    my ($category, $text, $catTmp, $sortkey, $articleName, $junk);

    $category = $_[0];
    $text = $_[1];
    $sortkey = $_[2];
    $articleName = $_[3];

    unless ($category =~ m/^Category:\w+/)
    {
	myLog ("addCatToText(): Bad format on category.\n");
	die ("addCatToText(): Bad format on category.\n");
    }

    $catTmp = $category;
    # _ and spaces are equivalent and may be intermingled.
    $catTmp =~ s/Category:\s*/Category:\\s*/g;
    $catTmp =~ s/_/[_ ]/g;
    $catTmp =~ s%\(%\\\(%g;
    $catTmp =~ s%\)%\\\)%g;
    $catTmp =~ s%\.%\\\.%g;
    
    if (($text =~ m/(\[\[\s*${catTmp}\s*\|.*?\]\])/is)
	or ($text =~ m/(\[\[\s*${catTmp}\s*\]\])/is))
    {
	print "addCatToText(): $articleName is already in '$category'.\n";
	myLog ("addCatToText(): $articleName is already in '$category'.\n");
	print "\n1: '${1}'\n";
	print "\ncattmp: '${catTmp}'\n";
	return("fail", $text);
    }

    if ($text =~ m/^\s*\#REDIRECT/is)
    {
	print "addCatToText(): $articleName is a redirect!\n";
	myLog ("addCatToText(): $articleName is a redirect!\n");
	return("fail", $text);
    }


    # Convert underscore to spaces; this is human-readable.
    $category =~ s/_/ /g;

    # Add the category
    $text .= "\n[[$category]]";
    # Move the category to the right place
    ($text, $junk) = fixCategoryInterwiki($text);
    
    return ("success", $text);
}


### THIS ROUTINE IS CURRENTLY UNUSED ###

# It will probably not be useful to you, anyway, since it requires
# pre-processed database dumps which are not included in Pearle.

sub getPageOffline
{
    my ($target, $result, $targetTmp);

    $target = $_[0];
    # Must run the following before using this function, from 200YMMDD/data:
    # cat entries.txt | perl ../../scripts/rewrite-entries.pl > entries-simple.txt
    # Even after this pre-processing, this routine is incredibly slow.
    # Set up and use MySQL instead if you care about speed.

    $target =~ s/\s/_/g;

    # Double escape the tab, once for Perl, once for the shell
    # -P means "treat as Perl regexp" (yay!)
#    $result = `grep -P '^${target}\\t' /home/beland/wikipedia/20050107/data/entries-simple.txt`;
    
    $targetTmp = shellfix($target);
    $result = `grep -P '^${targetTmp}\\t' /home/beland/wikipedia/20050107/data/matches2.txt`;
    $result =~ s/^${target}\t//;

    $result =~ s/\\n/\n/g;

    return ($result, "junk");
}


# --- CATEGORY AND INTERWIKI STYLE CLEANUP ROUTINES ---

# perl pearle.pl INTERWIKI_LOOP
#
## This command is for remedial cleanup only, and so is probably not
## useful anymore. This loop takes input of the form:
## "ArticleName\tBodyText\n{repeat...}" on STDIN.
#
sub interwikiLoop
{
    my ($article, $text, @junk,	$enforceCategoryInterwikiCalls);
    

    while (<STDIN>)
    {
	if ($_ =~ m/^\s*$/)
	{
	    next;
	}
	
	($article, $text, @junk) = split ("\t", $_);
	$text =~ s/\\n/\n/g;
	enforceCategoryInterwiki($article, $text);

	$enforceCategoryInterwikiCalls++;
	print STDOUT "\r interwikiLoop iteration ".$enforceCategoryInterwikiCalls;

    }
}


# perl pearle.pl ENFORCE_CATEGORY_INTERWIKI Article_name
#
## This function is for both external use.  From the command line, use
## it to tidy up a live page's category and interwiki tags, specifying
## only the name of the page.  It can also be used by interwikiLoop(),
## which supplies the full text on its own.  It will post any changes
## to the live wiki that involve anything more than whitespace
## changes.
##
## This function also does {{msg:foo}} -> {{foo}} conversion, so that
## the article parsing algorithm can be recycled.

#
sub enforceCategoryInterwiki
{

    my ($articleName, $text, $editTime, $startTime, $textOrig, @newLines, $line,
	$textCopy, $textOrigCopy, $message, @junk, $diff, $token,
	$online);

    $articleName = $_[0];
    myLog("enforceCategoryInterwiki($articleName)\n");
    
    $text = $_[1];

    $online = 0;

    if ($text eq "")
    {
	$online = 1;
	($text, $editTime, $startTime, $token) = getPage($articleName);    
    }

    $textOrig = $text;

    ($text, $message) = fixCategoryInterwiki($text);

    if (substantiallyDifferent($text, $textOrig))
    {
	@newLines = split ("\n", $text);
	
	$textCopy = $text;
	$textOrigCopy = $textOrig;

	open (ONE, ">/tmp/article1.$$");
	print ONE $textOrig;
	close (ONE);

	open (TWO, ">/tmp/article2.$$");
	print TWO $text;
	close (TWO);

	$diff = `diff  /tmp/article1.$$ /tmp/article2.$$`;
	unlink("/tmp/article1.$$");
	unlink("/tmp/article2.$$");


	myLog("*** $articleName - $message\n");
	myLog("*** DIFF FOR $articleName\n");
	myLog($diff);
	
	if ($online == 0)
	{
	    # Isolate changed files for later runs
	    open (FIXME, ">>./fixme.interwiki.txt.$$");
	    $text =~ s/\t/\\t/g;
	    $text =~ s/\n/\\n/g;
	    print FIXME $articleName."\t".$text."\n";
	    close (FIXME);
	}

	myLog($articleName." changed by fixCategoryInterwiki(): $message\n");
	print STDOUT $articleName." changed by fixCategoryInterwiki(): $message\n";

	if ($online == 1)
	{
	    postPage ($articleName, $editTime, $startTime, $token, $text, $message, "yes");
	}
    }
    else
    {
	print STDOUT "--- No change for ${articleName}.\n";
	myLog ("--- No change for ${articleName}.\n");
	### TEMPORARY ###
	### Uncomment this line if you want category changes to
	### trigger null edits.  This is useful if you have have
	### changed the category on a template, but due to a bug this
	### does not actually move member articles until they are
	### edited.
	postPage ($articleName, $editTime, $startTime, $token, $textOrig, "null edit", "yes");
	### TEMPORARY ###
    }
}

sub substantiallyDifferent
{
    my($a, $b);

    $a = $_[0];
    $b = $_[1];

    $a =~ s/\s//g;
    $b =~ s/\s//g;
    
    return ($a ne $b);
}


# Given some wikicode as input, this function will tidy up the
# category and interwiki links and return the result and a comment
# suitable for edit summaries.
sub fixCategoryInterwiki
{

    my ($input, @segmentNames, @segmentContents, $langlist, $i,
	$message, $output, $flagForReview, $interwikiBlock,
	$categoryBlock, $flagError, $bodyBlock, $contents, $name,
	@interwikiNames, @interwikiContents, @categoryNames,
	@categoryContents, @bodyNames, @bodyContents, $bodyFlag,
	@bottomNames, @bottomContents, @segmentNamesNew,
	@segmentContentsNew, $lastContents, @stubContents,
	@stubNames, $stubBlock, $msgFlag);

    $input = $_[0];


    # The algorithm here is complex.  The general idea is to split the
    # page in to segments, each of which is assigned a type, and then
    # to rearrange, consolidate, and frob the segments as needed.


    # Start with one segment that includes the whole page.
    @::segmentNames = ("bodyText");
    @::segmentContents = ($input);

    # Recognize and tag certain types of segments.  The order of
    # processing is very important.

    metaTagInterwiki("nowiki", "^(.*?)(\s*<nowiki>.*?</nowiki>\s*)");
    metaTagInterwiki("comment", "^(.*?)(<!.*?>\\n?)");
    metaTagInterwiki("html", "^(.*?)(<.*?>\\n?)");
    metaTagInterwiki("category", "^(.*?)(\\[\\[\\s*Category\\s*:\\s*.*?\\]\\]\\n?)");
    
    $langlist = `cat /home/beland/wikipedia/pearle-wisebot/langlist`;
    $langlist =~ s/^\s*//s;
    $langlist =~ s/\s*$//s;
    $langlist =~ s/\n/\|/gs;
    $langlist .= "|minnan|zh\-cn|zh\-tw|nb";
    metaTagInterwiki("interwiki", "^(.*?)(\\[\\[\\s*($langlist)\\s*:\\s*.*?\\]\\]\\n?)");
    metaTagInterwiki("tag", "^(.*?)(\{\{.*?\}\})");

    # Allow category and interwiki segments to be followed by HTML
    # comments only (plus any intervening whitespace).

    $i = 0;
    while ($i < @::segmentNames)
    {
	$name = $::segmentNames[$i];
	$contents = $::segmentContents[$i];
	
	# {{msg:foo}} -> {{foo}} conversion
	if (($name eq "tag") and
	    ($contents =~ m/^{{msg:(.*?)}}/))
	{
	    $msgFlag = 1;
	    $contents =~ s/^{{msg:(.*?)}}/{{$1}}/;
	    
	}

	if (($name eq "category") or ($name eq "interwiki"))
	{
	    if (!($contents =~ m/\n/) and ($::segmentNames[$i+1] eq "comment"))
	    {
		push (@segmentNamesNew, $name);
		push (@segmentContentsNew, $contents.$::segmentContents[$i+1]);
		$i += 2;
# DEBUG		print "AAA - ".$contents.$::segmentContents[$i+1]);
		next;
	    }
	    
	    if (!($contents =~ m/\n/) 
		and ($::segmentNames[$i+1] eq "bodyText")
		and ($::segmentContents[$i+1] =~ m/^\s*$/)
		and !($::segmentContents[$i+1] =~ m/^\n$/)
		and ($::segmentNames[$i+2] eq "comment")
		)
	    {
		push (@segmentNamesNew, $name);
		push (@segmentContentsNew, 
		      $contents.$::segmentContents[$i+1].$::segmentContents[$i+2]);
		$i += 3;
# DEBUG		print "BBB".$contents.$::segmentContents[$i+1].$::segmentContents[$i+2]);
		next;
	    }

	    # Consolidate with any following whitespace
	    if (($::segmentNames[$i+1] eq "bodyText")
		and ($::segmentContents[$i+1] =~ m/^\s*$/)
		)
	    {
		push (@segmentNamesNew, $name);
		push (@segmentContentsNew, 
		      $contents.$::segmentContents[$i+1]);
		$i += 2;
		next;
	    }
	}
	
	push (@segmentNamesNew, $name);
	push (@segmentContentsNew, $contents);
	
	$i++;
    }
    
    # Clean up results
    @::segmentNames = @segmentNamesNew;
    @::segmentContents = @segmentContentsNew;
    @segmentContentsNew = ();
    @segmentNamesNew = ();


    # Move category and interwiki tags that precede the body text (at
    # the top of the page) to the bottom of the page.

    $bodyFlag = 0;
    foreach $i (0 ... $#::segmentNames)
    {
	$name = $::segmentNames[$i];
	$contents = $::segmentContents[$i];
	
	if ($bodyFlag == 1)
	{
	    push (@segmentNamesNew, $name);
	    push (@segmentContentsNew, $contents);
	}
	elsif (($name eq "category") or ($name eq "interwiki"))
	{
	    push (@bottomNames, $name);
	    push (@bottomContents, $contents);
	}
	else
	{
	    push (@segmentNamesNew, $name);
	    push (@segmentContentsNew, $contents);
	    $bodyFlag = 1;
	}
    }
    
    # Clean up results
    @::segmentNames = (@segmentNamesNew, @bottomNames);
    @::segmentContents = (@segmentContentsNew, @bottomContents);
    @segmentContentsNew = ();
    @segmentNamesNew = ();
    @bottomNames = ();
    @bottomContents = ();


    # Starting at the bottom of the page, isolate category, interwiki,
    # and body text.  If categories or interwiki links are mixed with
    # body text, flag for human review.

    ### DEBUG ###
    # foreach $i (0 ... $#::segmentNames)
    # {
    #  print "---$i ".$::segmentNames[$i]."---\n";
    #  print "%%%".$::segmentContents[$i]."%%%\n";
    # }
    ### DEBUG ###


    ### DEBUG ###
    #my ($first);
    #$first = 1;
    ### DEBUG ###

    $bodyFlag = 0;
    $flagForReview = 0;
    foreach $i (reverse(0 ... $#::segmentNames))
    {
	$name = $::segmentNames[$i];
	$contents = $::segmentContents[$i];
	
	
	if (($name eq "category") and ($bodyFlag == 0))
	{
	    # Push in reverse
	    @categoryNames = ($name, @categoryNames);
	    @categoryContents = ($contents, @categoryContents);
	    next;
	}
	elsif (($name eq "interwiki") and ($bodyFlag == 0))
	{
	    # Push in reverse
	    @interwikiNames = ($name, @interwikiNames);
	    @interwikiContents = ($contents, @interwikiContents);
	    next;
	}
	elsif (($bodyFlag == 0)
	       and ($name eq "tag") 
	       and (($contents =~ m/\{\{[ \w\-]+[\- ]?stub\}\}/) or
		    ($contents =~ m/\{\{[ \w\-]+[\- ]?stub\|.*?\}\}/)))
	{
	    ### IF THIS IS A STUB TAG AND WE ARE STILL $bodyFlag == 0,
	    ### THEN ADD THIS TO $stubBlock!

	    # Canonicalize by making {{msg:Foo}} into {{Foo}}	    
	    s/^\{\{\s*msg:(.*?)\}\}/\{\{$1\}\}/i;
	    
	    # Push in reverse
	    @stubNames = ($name, @stubNames);
	    @stubContents = ($contents, @stubContents);
	    next;
	}
	elsif (($name eq "category") or ($name eq "interwiki"))
	    # bodyFlag implicitly == 1
	{
	    if ($flagForReview == 0)
	    {
		$flagForReview = 1;
		$lastContents =~ s/^\s*//s;
		$lastContents =~ s/\s*$//s;
		$flagError = substr ($lastContents, 0, 30);
	    }
	    # Drop down to push onto main body stack.
	}

	# Handle this below instead.
	## Skip whitespace
	#if (($contents =~ m/^\s*$/s) and ($bodyFlag == 0))
	#{
	#    next;
	#}

	# Delete these comments
	if (($bodyFlag == 0) and ($name == "comment"))
	{
	    if (
		($contents =~ m/<!--\s*interwiki links\s*-->/i) or
		($contents =~ m/<!--\s*interwiki\s*-->/i) or
		($contents =~ m/<!--\s*interlanguage links\s*-->/i) or
		($contents =~ m/<!--\s*categories\s*-->/i) or
		($contents =~ m/<!--\s*other languages\s*-->/i) or
		($contents =~ m/<!--\s*The below are interlanguage links.\s*-->/i)
		)
	    {
		### DEBUG ###
		#print STDOUT ("YELP!\n");
		#
		#foreach $i (0 ...$#bodyNames)
		#{
		#    print "---$i ".$bodyNames[$i]."---\n";
		#    print "%%%".$bodyContents[$i]."%%%\n";
		#}
		#
		#print STDOUT ("END-YELP!");
		### DEBUG ###

		next;
	    }
	}

	# Push onto main body stack (in reverse).
	@bodyNames = ($name, @bodyNames);
	@bodyContents = ($contents, @bodyContents);    
	
	### DEBUG ###
	#if (($flagForReview == 1) and ($first == 1))
	#{
	#    $first = 0;
	#    print "\@\@\@${lastContents}\#\#\#\n";
	#}
	### DEBUG ###

	# This should let tags mixed in with the category and
	# interwiki links (not comingled with body text) bubble up.
	unless (($contents =~ m/^\s*$/s) or ($name eq "tag"))
	{
	    $bodyFlag = 1;
	}

	$lastContents = $contents;
    }
    
    ### DEBUG ###
#    foreach $i (0 ... $#bodyNames)
#    {
#        print "---$i ".$bodyNames[$i]."---\n";
#	print "%%%".$bodyContents[$i]."%%%\n";
#    }
#    foreach $i (0 ... $#categoryNames)
#    {
#        print "---$i ".$categoryNames[$i]."---\n";
#	print "^^^".$categoryContents[$i]."^^^\n";
#    }
#    foreach $i (0 ... $#interwikiNames)
#    {
#        print "---$i ".$interwikiNames[$i]."---\n";
#	print "&&&".$interwikiContents[$i]."&&&\n";
#    }
    ### DEBUG ###

    # Assemble body text, category, interwiki, and stub arrays into strings
    
    foreach $i (0 ... $#bodyNames)
    {
	$name = $bodyNames[$i];
	$contents = $bodyContents[$i];
	
	$bodyBlock .= $contents;
    }
    foreach $i (0 ... $#categoryNames)
    {
	$name = $categoryNames[$i];
	$contents = $categoryContents[$i];
	
	# Enforce style conventions
	$contents =~ s/\[\[category\s*:\s*/\[\[Category:/i;
	
	# Enforce a single newline at the end of each category line.
	$contents =~ s/\s*$//;
	$categoryBlock .= $contents."\n";
    }
    foreach $i (0 ... $#interwikiNames)
    {
	$name = $interwikiNames[$i];
	$contents = $interwikiContents[$i];
	
	# Canonicalize minnan to zh-min-nan, since that's what's in
	# the officially distributed langlist.
	$contents =~ s/^\[\[minnan:/\[\[zh-min-nan:/;

	# Canonicalize zh-ch, Chinese (simplified) and zn-tw, Chinese
	# (traditional) to "zh"; the distinction is being managed
	# implicitly by software now, not explicitly in wikicode.
	$contents =~ s/^\[\[zh-cn:/\[\[zh:/g;
	$contents =~ s/^\[\[zh-tw:/\[\[zh:/g;

	# Canonicalize nb to no
	$contents =~ s/^\[\[nb:/\[\[no:/g;

	# Canonicalize dk to da
	$contents =~ s/^\[\[dk:/\[\[da:/g;

	# Enforce a single newline at the end of each interwiki line.
	$contents =~ s/\s*$//;
	$interwikiBlock .= $contents."\n";
    }
    foreach $i (0 ... $#stubNames)
    {
	$name = $stubNames[$i];
	$contents = $stubContents[$i];
	
	# Enforce a single newline at the end of each stub line.
	$contents =~ s/\s*$//;
	$contents =~ s/^\s*//;
	$stubBlock .= $contents."\n";
    }     

    # Minimize interblock whitespace
    $bodyBlock =~ s/^\s*//s;
    $bodyBlock =~ s/\s*$//s;
    $categoryBlock =~ s/^\s*//s;
    $categoryBlock =~ s/\s*$//s;
    $interwikiBlock =~ s/^\s*//s;
    $interwikiBlock =~ s/\s*$//s;
    $stubBlock =~ s/^\s*//s;
    $stubBlock =~ s/\s*$//s;

    # Assemble the three blocks into a single string, flagging for
    # human review if necessary.
    
    $output = "";
    
    if ($bodyBlock ne "")
    {
	$output .= $bodyBlock."\n\n";
    }
    
    if (($flagForReview == 1) 
	and !($input =~ m/\{\{interwiki-category-check/)
	and !($input =~ m/\{\{split/)
	and !($input =~ m/\[\[Category:Pages for deletion\]\]/))
    {

	$output .= "{{interwiki-category-check|<nowiki>${flagError}</nowiki>}}\n\n";
    }
    
    if ($categoryBlock ne "")
    {
	$output .= $categoryBlock."\n";
    }
    
    if ($interwikiBlock ne "")
    {
#	$output .= "<!-- The below are interlanguage links. -->\n".$interwikiBlock."\n";
	$output .= $interwikiBlock."\n";
    }
    if ($stubBlock ne "")
    {
	$output .= $stubBlock."\n";
    }    

    if ($input ne $output)
    {
	$message = "Minor category, interwiki, or template style cleanup";
	if ($flagForReview == 1) 
	{
	    $message = "Flagged for manual review of category/interwiki style";
	}
	if ($msgFlag == 1)
	{
	    $message .= "; {{msg:foo}} -> {{foo}} conversion for MediaWiki 1.5+ compatibility";
	}
    }
    else
    {
	$message = "No change";
    }
    
    return($output, $message);
}


#sub displayInterwiki
#{
#    my ($i);
#    ## THIS FUNCTION CANNOT BE CALLED DUE TO SCOPING; YOU MUST MANUALLY
#    ## COPY THIS TEXT INTO fixCategoryInterwiki().  IT IS ONLY USEFUL
#    ## FOR DIAGNOSTIC PURPOSES.
#
#    foreach $i (0 ... $#::segmentNames)
#    {
#	print "---$i ".$::segmentNames[$i]."---\n";
#	print "%%%".$::segmentContents[$i]."%%%\n";
#    }
#}


# A subroutine of fixCategoryInterwiki(), this function isolates
# certain parts of existing segments based on a regular expression
# pattern, and tags them with the supplied name (which indicates their
# type).  Sorry for the global variables.
sub metaTagInterwiki
{

    my ($tag, $pattern, $i, $meta, $body, @segmentNamesNew,
	@segmentContentsNew, $name, $contents, $bodyText, );


    $tag = $_[0];
    $pattern = $_[1];

    foreach $i (0 ... $#::segmentNames)
    {
	$name = $::segmentNames[$i];
	$contents = $::segmentContents[$i];
	
	unless ($name eq "bodyText") 
	{
	    push (@segmentNamesNew, $name);
	    push (@segmentContentsNew, $contents);
	    next;
	}
	
	while (1)
	{
	    if ($contents =~ m%$pattern%is)
	    {
		$bodyText = $1;
		$meta = $2;
		
		if ($bodyText ne "")
		{
		    push (@segmentNamesNew, "bodyText");
		    push (@segmentContentsNew, $bodyText);
		}
		
		push (@segmentNamesNew, $tag);
		push (@segmentContentsNew, $meta);
		
		$contents =~ s/\Q${bodyText}${meta}\E//s;
	    }
	    else
	    {
		if ($contents ne "")
		{
		    push (@segmentNamesNew, $name);
		    push (@segmentContentsNew, $contents);
		}
		last;
	    }
	}
    }
	
    @::segmentNames = @segmentNamesNew;
    @::segmentContents = @segmentContentsNew;
    @segmentContentsNew = ();
    @segmentNamesNew = ();
}

sub nullEdit
{
    my ($text, $articleName, $comment, $editTime, $startTime, $token);

    $articleName = $_[0];

    # Only set this to "yes" if you are doing a bunch of null edits
    # and don't care about failures.
    $::roughMode = "no";


    print "nullEdit($articleName)\n";
    myLog ("nullEdit($articleName)\n");

    ($text, $editTime, $startTime, $token) = getPage($articleName);
    unless ($text eq "")
    {
	postPage ($articleName, $editTime, $startTime, $token, $text, "null edit");
    }
}


sub cleanupDate
{
    my ($article, @articles);


    # Get all articles from Category:Wikipedia cleanup
    @articles = getCategoryArticles ("Category:Wikipedia cleanup");

#    @articles = sort({$b cmp $a} @articles);
    @articles = sort(@articles);

    foreach $article (@articles)
    {
	if (($article =~ m/^Wikipedia:/)
	    or ($article =~ m/^Template:/)
	    or ($article =~ m/^User:/)
	    or ($article =~ m/talk:/i)
	    )
	{
	    next;
	}

	cleanupDateArticle($article);
	limit();
    }
}

sub cleanupDateArticle #($target)
{
    my (@result, $link, $currentMonth, $currentYear, $junk, $line,
	$month, $year, $found, $lineCounter, $target);
    
    $target = $_[0];
    print "cleanupDateArticle($target)\n";
    
    @result = parseHistory($target);
    
    ($currentMonth, $currentYear, $junk) = split(" ", $result[0]);
    
    $found = "";
    foreach $line (@result)
    {
	$lineCounter++;
	($month, $year, $link) = split(" ", $line);
	
	if (($month eq $currentMonth)
	    and ($year eq $currentYear))
	{
#	    print "$month $year - SKIP\n";
	    next;
	}

# Skip this, because it produces false positives on articles that were
# protected at the end of last month, but no longer are.  The correct
# thing to do is to check if an article is CURRENTLY protected by
# fetching the current version, but this seems like a waste of network
# resources.

#	if (checkForTag("protected", $link) eq "yes")
#	{
#	    print "$target is {{protected}}; skipping\n";
#	    myLog("$target is {{protected}}; skipping\n");
#	    return();
#	}

	if (checkForTag("sectionclean", $link) eq "yes")
	{
	    print "$target has {{sectionclean}}\n";
	    myLog("$target has {{sectionclean}}\n");
	    nullEdit($target);
	    return();
	}

	if (checkForTag("Sect-Cleanup", $link) eq "yes")
	{
	    print "$target has {{Sect-Cleanup}}\n";
	    myLog("$target has {{Sect-Cleanup}}\n");
	    nullEdit($target);
	    return();
	}

	if (checkForTag("section cleanup", $link) eq "yes")
	{
	    print "$target has {{section cleanup}}\n";
	    myLog("$target has {{section cleanup}}\n");
	    nullEdit($target);
	    return();
	}

	if (checkForTag("sectcleanup", $link) eq "yes")
	{
	    print "$target has {{sectcleanup}}\n";
	    myLog("$target has {{sectcleanup}}\n");
	    nullEdit($target);
	    return();
	}

	if (checkForTag("cleanup-section", $link) eq "yes")
	{
	    print "$target has {{cleanup-section}}\n";
	    myLog("$target has {{cleanup-section}}\n");
	    nullEdit($target);
	    return();
	}


	if (checkForTag("cleanup-list", $link) eq "yes")
	{
	    print "$target has {{cleanup-list}}\n";
	    myLog("$target has {{cleanup-list}}\n");
	    nullEdit($target);
	    return();
	}

	if (checkForTag("cleanup-nonsense", $link) eq "yes")
	{
	    print "$target has {{cleanup-nonsense}}\n";
	    myLog("$target has {{cleanup-nonsense}}\n");
	    nullEdit($target);
	    return();
	}

	if ((checkForTag("cleanup", $link) eq "yes") or
	    (checkForTag("clean", $link) eq "yes") or
	    (checkForTag("CU", $link) eq "yes") or
	    (checkForTag("cu", $link) eq "yes") or
	    (checkForTag("cleanup-quality", $link) eq "yes") or
	    (checkForTag("tidy", $link) eq "yes"))
	{
	    $currentMonth = $month;
	    $currentYear = $year;
#	    print "$month $year - YES\n";
	    next;
	}
	else
	{
#	    print "$month $year - NO\n";
#	    print "Tag added $currentMonth $currentYear\n";
	    $found = "Tag added $currentMonth $currentYear\n";
	    last;
	}
    }
    if ($found eq "")
    {
#	print "HISTORY EXHAUSTED\n";

	if ($lineCounter < 498)
	{
	    $found = "Tag added $currentMonth $currentYear\n";
	}
	else
	{
#	    print "Unable to determine when tag was added to $target.\n";
	    myLog("Unable to determine when tag was added to $target.\n");
	    die("Unable to determine when tag was added to $target.\n");
	}
    }

    if ($found ne "")
    {
	changeTag("cleanup", "cleanup-date\|${currentMonth} ${currentYear}", $target)
	    || changeTag("tidy", "cleanup-date\|${currentMonth} ${currentYear}", $target)
	    || changeTag("CU", "cleanup-date\|${currentMonth} ${currentYear}", $target)
	    || changeTag("cu", "cleanup-date\|${currentMonth} ${currentYear}", $target)
	    || changeTag("cleanup-quality", "cleanup-date\|${currentMonth} ${currentYear}", $target)
	    || changeTag("clean", "cleanup-date\|${currentMonth} ${currentYear}", $target)
	    || nullEdit($target);
    }
}

sub changeTag
{
    my ($tagFrom, $tagFromUpper, $tagTo, $tagToUpper, $articleName,
	$editTime, $startTime, $text, $token, $comment, $junk);

    $tagFrom = $_[0];      # "cleanup"
    $tagTo = $_[1];        # "cleanup-date|August 2005"
    $articleName = $_[2];  # Article name

    print "changeTag (${tagFrom}, ${tagTo}, ${articleName})\n";
    myLog("changeTag (${tagFrom}, ${tagTo}, ${articleName})\n");

    
    $tagFromUpper = ucfirst($tagFrom);
    $tagToUpper = ucfirst($tagTo);

    if ($articleName =~ m/^\s*$/)
    {
	myLog("changeTag(): Null target.");
	die("changeTag(): Null target.");
    }
    
    ($text, $editTime, $startTime, $token) = getPage($articleName);
    
    unless (($text =~ m/\{\{\s*\Q$tagFrom\E\s*\}\}/)
	    or ($text =~ m/\{\{\s*\Q$tagFromUpper\E\s*\}\}/) 
	    or ($text =~ m/\{\{\s*\Qmsg:$tagFrom\E\s*\}\}/)
	    or ($text =~ m/\{\{\s*\Qmsg:$tagFromUpper\E\s*\}\}/)
	    or ($text =~ m/\{\{\s*\QTemplate:$tagFrom\E\s*\}\}/)
	    or ($text =~ m/\{\{\s*\QTemplate:$tagFromUpper\E\s*\}\}/)
	    or ($text =~ m/\{\{\s*\Q$tagFrom\E\|.*?\s*\}\}/)
	    or ($text =~ m/\{\{\s*\Q$tagFromUpper\E\|.*?\s*\}\}/)
	    )
    {
	myLog("changeTag(): {{$tagFrom}} is not in $articleName.\n");
	print "changeTag(): {{$tagFrom}} is not in $articleName.\n";

	# die("changeTag(): {{$tagFrom}} is not in $articleName.\n");
	### TEMPORARY ###
	# <nowiki> Just skip articles with {{tidy}}, {{clean}} {{sectionclean}}, {{advert}}, etc.

	sleep(1); # READ THROTTLE!
	return(0);

    }
    
    if (($text =~ m/\{\{\s*\Q$tagTo\E\s*\}\}/)
	or ($text =~ m/\{\{\s*\Q$tagToUpper\E\s*\}\}/))
    {
	myLog("changeTag(): $articleName already contains {{$tagTo}}.");
	die("changeTag(): $articleName already contains {{$tagTo}}.");
    }

    if ($text =~ m/^\s*\#REDIRECT/is)
    {
	myLog ("changeTag.a(): $articleName is a redirect!\n");
	die ("changeTag.a(): $articleName is a redirect!\n");
	sleep(1); # READ THROTTLE!
	return(0);
    }
    
    # Escape special characters
    $tagFrom =~ s%\(%\\(%g;
    $tagFrom =~ s%\)%\\)%g;
    $tagFrom =~ s%\'%\\\'%g;
	
	
    # We're lazy and don't fully parse the document to properly check
    # for escaped tags, so there may be some unnecssary aborts from
    # the following, but they are rare and easily overridden by
    # manually editing the page in question.
    if (($text =~ m/<nowiki>.*?\Q$tagFrom\E.*?<\/nowiki>/is) or
	($text =~ m/<pre>.*?\Q$tagFrom\E.*?<\/pre>/is))
    # <pre>	
    {
	myLog ("changeTag.r(): $articleName has a dangerous nowiki or pre tag!\n");
	die ("changeTag.r(): $articleName has a dangerous nowiki or pre tag!\n");
    }
    
    # Make the swap!
    $text =~ s/\{\{\s*\Q$tagFrom\E\s*\}\}/{{${tagTo}}}/g;
    $text =~ s/\{\{\s*\Q$tagFromUpper\E\s*\}\}/{{${tagTo}}}/g;
    $text =~ s/\{\{\s*\Qmsg:$tagFrom\E\s*\}\}/{{${tagTo}}}/g;
    $text =~ s/\{\{\s*\Qmsg:$tagFromUpper\E\s*\}\}/{{${tagTo}}}/g;
    $text =~ s/\{\{\s*\QTemplate:$tagFrom\E\s*\}\}/{{${tagTo}}}/g;
    $text =~ s/\{\{\s*\QTemplate:$tagFromUpper\E\s*\}\}/{{${tagTo}}}/g;
    $text =~ s/\{\{\s*\Q$tagFrom\E\|(.*?)\s*\}\}/{{${tagTo}}}/g;
    $text =~ s/\{\{\s*\Q$tagFromUpper\E\|(.*?)\s*\}\}/{{${tagTo}}}/g;

    # Tidy up the article in general
    ($text, $junk) = fixCategoryInterwiki($text);

    # Post the changes
    $comment = "Changing \{\{${tagFrom}\}\} to \{\{${tagTo}\}\}";
    postPage ($articleName, $editTime, $startTime, $token, $text, $comment, "yes");
    return (1);
}

sub parseHistory
{
    my ($pageName, $html, @lines, $line, $date, $month, $year,
	$htmlCopy, $link, @result, $pageNameSafe);

    $pageName = $_[0];

    $pageNameSafe = $pageName;
    $pageNameSafe =~ s/&/%26/g;

    $html = getURL("http://en.wikipedia.org/w/index.php?title=${pageNameSafe}&action=history&limit=500");

    $htmlCopy = $html;

    $html =~ s%^.*?<ul id="pagehistory">%%s;
    $html =~ s%(.*?)</ul>.*$%$1%s;
    $html =~ s%</li>\s*%%s;

    @lines = split ("</li>", $html);
    foreach $line (@lines)
    {
	$line =~ s/\n/ /g;

	if ($line =~ m/^\s*$/)
	{
	    next;
	}
	$line =~ s%<span class='user'>.*?$%%;
	$line =~ s%^.*?Select a newer version for comparison%%;
	$line =~ s%^.*?Select a older version for comparison%%;
        $line =~ s%^.*?name="diff" />%%;
#	print "LINE: ".$line."\n";

	$line =~ m%<a href="(.*?)" title="(.*?)">(.*?)</a>%;
	$link = $1;
	$date = $3;

#	print $link." / $date\n";

	if ($date =~ m/Jan/)
	{
	    $month = "January";
	}
	elsif ($date =~ m/Feb/)
	{
	    $month = "February";
	}
	elsif ($date =~ m/Mar/)
	{
	    $month = "March";
	}
	elsif ($date =~ m/Apr/)
	{
	    $month = "April";
	}
	elsif ($date =~ m/May/)
	{
	    $month = "May";
	}
	elsif ($date =~ m/Jun/)
	{
	    $month = "June";
	}
	elsif ($date =~ m/Jul/)
	{
	    $month = "July";
	}
	elsif ($date =~ m/Aug/)
	{
	    $month = "August";
	}
	elsif ($date =~ m/Sep/)
	{
	    $month = "September";
	}
	elsif ($date =~ m/Oct/)
	{
	    $month = "October";
	}
	elsif ($date =~ m/Nov/)
	{
	    $month = "November";
	}
	elsif ($date =~ m/Dec/)
	{
	    $month = "December";
	}
	else
	{
	    $month = "Unknown month";
	    myLog ("Unknown month - parse failure! $line\nHTML:\n$html\n");
	    die ("Unknown month - parse failure! (see log) LINE: $line\n");
	}
	    
	$date =~ m/(\d\d\d\d)/;
	$year = $1;

	@result = (@result, "$month $year $link");
    }
    
    return (@result);
}

sub checkForTag #($targetURLWithOldIDAttached)
{
    my ($tag, $target, $text);

    $tag = $_[0];
    $target = $_[1];


    # Must be absolute; assuming English Wikipedia here.
    $target =~ s%^/w/index.php%http://en.wikipedia.org/w/index.php%;

    # Decode HTML entities in links
    $target =~ s/\&amp;/\&/g;

    if ($target eq $::cachedTarget)
    {
	$text = $::cachedText;
    }
    else
    {
	$text = getURL ($target."&action=edit");
	$::cachedTarget = $target;
	$::cachedText = $text;
    }

    if ($text =~ m/\{\{\s*\Q$tag\E\s*\}\}/)
    {
#	print $text; die "Cough!";
	return "yes";
    }

    $tag = ucfirst($tag);
    
    if ($text =~ m/\{\{\s*\Q$tag\E\s*\}\}/)
    {
#	print "\n\nSneeze!\n\n"; print $text."\n\n"; 
	return "yes";
    }

    return "no";
}


sub getURL #($target)
{
    # Read throttle!
    sleep (1);

    my ($attemptStartTime, $attemptFinishTime, $request, $response, $reply, $url);
    
    $url = $_[0];

    # Monitor wiki server responsiveness
    $attemptStartTime = Time::HiRes::time();
    
    # Create a request-object
    print "GET ${url}\n";
    myLog("GET ${url}\n");
    $request = HTTP::Request->new(GET => "${url}");
    $response = $::ua->request($request);

    if ($response->is_success)
    {
	$reply = $response->content;

	# Monitor wiki server responsiveness
	$attemptFinishTime = Time::HiRes::time();
	retry ("success", "getURL", sprintf("%.3f", $attemptFinishTime-$attemptStartTime));
	
	# This may or may not actually work
	$::ua->cookie_jar->save();

	return ($reply);
    } 
    else 
    {
	myLog ("getURL(): HTTP ERR (".$response->status_line.") ${url}\n".$response->content."\n");
	print ("getURL(): HTTP ERR (".$response->status_line.") ${url}\n".$response->content."\n");
	# 50X HTTP errors mean there is a problem connecting to the wiki server
	if (($response->status_line =~ m/^500/)
	    or ($response->status_line =~ m/^502/)
	    or ($response->status_line =~ m/^503/))
	{
	    return(retry("getURL", @_));
	}
	else
	{
	    # Unhandled HTTP response
	    die ("getURL(): HTTP ERR (".$response->status_line.") ${url}\n");
	}
    }
}

sub opentaskUpdate
{

    my ($target, $historyFile, $opentaskText, $editTime, $startTime,
	$token, $key, $historyDump);

    $target = "Template:Opentask";
    $historyFile = "/home/beland/wikipedia/pearle-wisebot/opentask-history.pl";

    ($opentaskText, $editTime, $startTime, $token) = getPage($target);

    eval(`cat $historyFile`);


    $opentaskText = doOpentaskUpdate("CLEANUP",
				     "CLEANUP",
				     $opentaskText);

    $opentaskText = doOpentaskUpdate("STYLE",
				     "Category:Wikipedia articles needing style editing",
				     $opentaskText);

    $opentaskText = doOpentaskUpdate("UPDATE",
				     "Category:Wikipedia articles in need of updating",
				     $opentaskText);

    $opentaskText = doOpentaskUpdate("VERIFY",
				     "Category:Wikipedia articles needing factual verification",
				     $opentaskText);

    $opentaskText = doOpentaskUpdate("COPYEDIT",
				     "Category:Wikipedia articles needing copy edit",
				     $opentaskText);

    $opentaskText = doOpentaskUpdate("WIKIFY",
				     "Category:Articles that need to be wikified",
				     $opentaskText);

    $opentaskText = doOpentaskUpdate("MERGE",
				     "Category:Articles to be merged",
				     $opentaskText);

    $opentaskText = doOpentaskUpdate("NPOV",
				     "Category:NPOV disputes",
				     $opentaskText);



    # Dump history



    $historyDump = "\%::history = (\n";
    foreach $key (sort(keys(%::history)))
    {
	$key =~ s/\"/\\\"/g; # Escape!
	$historyDump .= "\"${key}\" => \"".$::history{$key}."\",\n";
    }
    $historyDump =~ s/,\n$//s;
    $historyDump .= "\n)\n";

    open (HISTORY, ">".$historyFile);
    print HISTORY $historyDump;
    close (HISTORY);
    
    
    postPage ($target, $editTime, $startTime, $token, $opentaskText, "Automatic rotation of NPOV, copyedit, wikify, merge, update, style, and verify", "yes");
}

sub doOpentaskUpdate
{

    my ($categoryID, $sourceCategory, $opentaskText, @articles,
	$article, %rank, $featuredString, $characterLimit,
	$featuredStringTmp, $key, $printedFlag, $tmpKey, $l, $nl, %l,
	%nl, $total, $articleUnderscore, $neverListed, @articlesTmp);

    $categoryID = $_[0];
    $sourceCategory = $_[1];
    $opentaskText = $_[2];

    
    $characterLimit = 130;

    if ($sourceCategory eq "CLEANUP")
    {
	@articlesTmp = (getCategoryArticles ("Category:Wikipedia articles needing priority cleanup"),
			getCategoryArticles ("Category:Cleanup from October 2004"),
			getCategoryArticles ("Category:Cleanup from November 2004"),
			getCategoryArticles ("Category:Cleanup from December 2004"));
    }
    else
    {
	@articlesTmp = getCategoryArticles ($sourceCategory);
    }
			


    # Shuffle and clean up article names; and exclude unwanted entries
    foreach $article (@articlesTmp)
    {
	if (($article =~ m/^Wikipedia:/)
	    or ($article =~ m/^Template:/)
	    or ($article =~ m/^User:/)
	    or ($article =~ m/talk:/i)
	    )
	{
	    next;
	}
	@articles = (@articles, $article);
    }

    foreach $article (@articles)
    {
	$article = urlDecode($article);
	$article =~ s/_/ /g;

	$articleUnderscore = $article;
	$articleUnderscore =~ s/ /_/g;
	
	$rank{$article} = rand() + ($::history{"${articleUnderscore}-${categoryID}"} * .5);

	# print " $article: ".$rank{$article}." / ".$::history{"${articleUnderscore}-${categoryID}"}."\n";
    }
    
    # Pick as many articles as will fit in the space allowed
    foreach $article (sort {$rank{$a} <=> $rank {$b}} (keys(%rank)))
    {
	if (length($article)+1 < $characterLimit - length($featuredString))
	{
	    $featuredString .= "[[${article}]],\n";

	    $article =~ s/ /_/g;
	    # Record how many times each article is featured.
	    $::history{"${article}-${categoryID}"}++;
	}
    }

    $featuredStringTmp = $featuredString;
    $featuredStringTmp =~ s/\n/ /g;
    print "Featuring: $featuredStringTmp\n";
    myLog("Featuring: $featuredStringTmp\n");

    foreach $key (sort {$::history{$a} <=> $::history{$b}} (sort(keys (%::history))))
    {
	if ($key =~ m/${categoryID}$/)   
	{
	    if ($::history{$key} > 7)
	    {
		print $::history{$key}." ";
	    }

	    $printedFlag = 0;
	    $tmpKey = $key;
	    $tmpKey =~ s/\-$categoryID$//;
	    # print " '$tmpKey' ";
	    foreach $article (keys(%rank))
	    {
		$article =~ s/ /_/g;
		if ($article eq $tmpKey)
		{
		    if ($::history{$key} > 7)
		    {		 
			print "L ${key}\n"; # Still listed.
		    }
		    $printedFlag = 1;
		    $l++;
		    $l{$::history{$key}}++;
		}
	    }
	    if ($printedFlag == 0)
	    {
#		if ($::history{$key} > 7)
#		{
#		    print "NL ${key}\n"; # Not listed anymore; must be fixed!
#		}
		$nl++;
		$nl{$::history{$key}}++;
	    }
	}
    }

    $total = $l + $nl;
    print "Effectiveness ratio for ${categoryID}: $l L, $nl NL (";
    print sprintf("%.2f", $nl/$total)*100;
    print "%)\n";

    foreach $article (@articles)
    {
	$articleUnderscore = $article;
	$articleUnderscore =~ s/ /_/g;
	if ($::history{"${articleUnderscore}-${categoryID}"} < 1)
	{
	    $neverListed++;
	}
    }

    print "0 L: $neverListed\n";

    foreach $key (sort(keys(%l)))
    {
	print $key." L: ".$l{$key}."\n";
    }

    foreach $key (sort(keys(%nl)))
    {
	print $key." NL: ".$nl{$key}."\n";
    }



    # Insert into actual page text and finish
    $opentaskText =~ s/(<!--START-PEARLE-INSERT-$categoryID-->).*?(<!--END-PEARLE-INSERT-$categoryID-->)/${1}\n$featuredString${2}/gs;
    return ($opentaskText);
}

# Get a list of the names of articles in a given category.
sub getCategoryImages
{
    my ($target, $request, $response, $reply, $images, @images,
	$attemptStartTime, $attemptFinishTime, $image, %imagesHash);


    $target = $_[0];

    #urlSafe ($target);


    unless ($target =~ m/^Category:/)

    {
	myLog ("getCategoryImages(): Are you sure '$target' is a category?\n");
	die ("getCategoryImages(): Are you sure '$target' is a category?\n");
    }

    # Monitor wiki server responsiveness
    $attemptStartTime = Time::HiRes::time();

    # Create a request-object
    print "GET http://en.wikipedia.org/wiki/${target}\n";
    myLog("GET http://en.wikipedia.org/wiki/${target}\n");
    $request = HTTP::Request->new(GET => "http://en.wikipedia.org/wiki/${target}");
    $response = $::ua->request($request);

    if ($response->is_success)
    {
	# Monitor wiki server responsiveness
	$attemptFinishTime = Time::HiRes::time();
	retry ("success", "getCategoryImages", sprintf("%.3f", $attemptFinishTime-$attemptStartTime));

	$reply = $response->content;

	# This detects whether or not we're logged in.
	unless ($reply =~ m%<a href="/wiki/User_talk:Pearle">My talk</a>%)
	{
	    # We've lost our identity.
	    myLog ("Wiki server is not recognizing me (2).\n---\n${reply}\n---\n");
	    die ("Wiki server is not recognizing me (2).\n");
	}

	$images = $reply;
	$images =~ s%^.*?<table class="gallery"%%s;
	$images =~ s%<div class="printfooter">.*?$%%s;
	@images = $images =~ m%<a\s+href="/wiki/(.*?)"\s+title=\"Image:%sg;


	# Uniqify to prevent duplicates
	foreach $image (@images)
	{
	    $imagesHash{$image} = 1;
	}
	@images = ();
	foreach $image (sort(keys(%imagesHash)))
	{
	    @images = (@images, $image);
	}

	$::ua->cookie_jar->save();
	return decodeArray(@images);
    } 
    else 
    {
	myLog ("getCategoryImages($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/wiki/${target}\n".$response->content."\n");

	# 50X HTTP errors mean there is a problem connecting to the wiki server
	if (($response->status_line =~ m/^500/)
	    or ($response->status_line =~ m/^502/)
	    or ($response->status_line =~ m/^503/))
	{
	    print "getCategoryImages($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/wiki/${target}\n".$response->content."\n";
	    return(decodeArray(retry("getCategoryImages", @_)));
	}
	else
	{
	    # Unhandled HTTP response
	    die ("getCategoryImages($target): HTTP ERR (".$response->status_line.") http://en.wikipedia.org/wiki/${target}\n");
	}
    }
}