use strict; use Perlwikipedia; use LWP::UserAgent; my $firstletter = shift; my $startat = shift; my $test = 0; my $soonest_next_op = time; print "\nStarting polbot\n" ; my $pw=Perlwikipedia->new(); #$pw->{debug} = 1; $pw->{mech}->agent('Bot/WP/EN/Quadell/polbot'); print "Logging in\n"; my $login_status=$pw->login('Polbot','(bot password)'); die "I can't log in." unless ($login_status eq 0); # Get exceptions (to skip) my @exceptions = (); print "Getting list of completed judges to skip.\n"; my $todo_list = $pw->get_text('Wikipedia:WikiProject Law/United States federal judges - finished'); my @lines = split(/\n/, $todo_list); foreach my $line (@lines) { # Ignore non-listed lines if ($line =~ /^\*\s*\[\[([^]]*)\]\]/) { push @exceptions, $1; } } # Pull from FJC print "Getting list of all judges starting with $firstletter\n"; my @judge_ids = (); my $url = 'http://www.fjc.gov/servlet/tAsearch?lname=' . $firstletter; print " $url\n"; my $ua = LWP::UserAgent->new; $ua->agent("Mozilla/6.0"); my $res = $ua->get($url); die "could not connect" unless ($res->is_success); my $html = $res->content; while ($html =~ m/<A HREF=\"\/servlet\/tGetInfo\?jid=(\d+)\">([^<]*)</g) { my $thisjid = $1; my $thisname = $2; if ($thisname ge $startat) { unless (grep {$_ eq $thisname} @exceptions) { push @judge_ids, $thisjid; } } } print scalar(@judge_ids) . " judges found.\n\n"; foreach my $jid (@judge_ids) { my $url = "http:\/\/www.fjc.gov\/servlet\/tGetInfo\?jid=$jid"; print "\n$jid: "; $res = $ua->get($url); die "could not connect" unless ($res->is_success); $html = $res->content; $html =~ s/\`/'/g; my @eds = (); my @jus = (); my @pcs = (); my @jcats = (); my $rev_name; my $name; my $last_name; my $art_name; my $persondata_name; my $birth_date; my $birth_year; my $birth_loc; my $death_date; my $death_year; my $death_loc; my $pronoun = "He"; my $active = 0; my $wiki_out; # initial change $html =~ s/(\d)\-\<BR\>/$1-the present<BR>/; # extract name $html =~ m/\<FONT SIZE\=\+1 COLOR\=BLACK\>\<B\>([^\n]*?) *\<\/B\>\<\/FONT\>/m; $rev_name = $1; $rev_name =~ s/ +/ /g; $rev_name =~ s/\[//g; $rev_name =~ s/\]//g; $rev_name =~ m/^(.*?)\, (.*?)( Jr\.| II| III| IV)?$/; $last_name = $1; $name = "$2 $last_name$3"; if ($pw->get_text("$name") =~ /\w/) { $art_name = "User:Polbot/fjc/" . $name; } else { $art_name = $name; } print "==$rev_name at [[$art_name]]==\n"; # extract gender if ($html =~ m/<BR><B>Gender:<\/B> Female/) { $pronoun = "She"; } # extract birth and death info if ($html =~ m/<DD>Born +(\w+) +(\d+), +(\d+)(, +in +[^<]*)?<BR>/) { $birth_date = "$1 $2"; $birth_year = $3; $birth_loc = $4; $birth_loc =~ s/^, +in +//; } elsif ($html =~ m/<DD>Born +(\d+)( +in +[^<]*)?<BR>/) { $birth_year = $1; $birth_loc = $2; $birth_loc =~ s/^ +in +//; } if ($html =~ m/<BR>Died +(\w+) +(\d+), +(\d+)(, +in +[^<]*)?<BR>/) { $death_date = "$1 $2"; $death_year = $3; $death_loc = $4; $death_loc =~ s/^, +in +//; } elsif ($html =~ m/<BR>Died +(\d+)( +in +[^<]*)?<BR>/) { $death_year = $1; $death_loc = $2; $death_loc =~ s/^ +in +//; } $birth_loc = Expand_states($birth_loc); $death_loc = Expand_states($death_loc); #print "birth: '$birth_date', '$birth_year', '$birth_loc'\n"; #print "death: '$death_date', '$death_year', '$death_loc'\n"; # Extract education if ($html =~ m/<BR>\s*<BR><B>Education:<\/B><BR>(.*?)<BR>\s*<BR><B>/i) { my $ed_string = $1; @eds = split(/<[Bb][Rr]><[Bb][Rr]>/, $ed_string); foreach my $ed (@eds) { if ($ed =~ m/^(.*), (.*), (\d+)$/) { $ed = "$pronoun received a [[$2]] from [[$1]] in $3"; } elsif ($ed =~ m/^Read law, (\d+)$/) { $ed = "$pronoun [[read law]] in $1"; } elsif ($ed =~ m/^(.*), (\d+)$/) { $ed = "$pronoun graduated from [[$1]] in $2"; } #print " ED: $ed\n"; } } # Extract Professional Career if ($html =~ m/<B>Professional Career:<\/B><BR>(.*?)<BR>\s*<BR><B>/i) { my $pc_string = $1; @pcs = split(/ *<[Bb][Rr]> */, $pc_string); foreach my $pc (@pcs) { if ($pc =~ m/^Private practice, (.*?), (\d+)\-(\d+|the present)$/) { $pc = "$pronoun was in private practice of law in [[$1]] from $2 to $3"; } elsif ($pc =~ m/^Private practice, (.*?), (\d+)$/) { $pc = "$pronoun was in private practice of law in [[$1]] in $2"; } elsif ($pc =~ m/^Judge, (.*?), (\d+)\-(\d+|the present)$/) { $pc = "$pronoun was a judge to the $1 from $2 to $3"; } elsif ($pc =~ m/^Judge, (.*?), (\d+)$/) { $pc = "$pronoun was a judge to the $1 in $2"; } elsif ($pc =~ m/^U.S. (Army|Navy)(.*?), (\d+)\-(\d+|the present)$/) { $pc = "$pronoun was in the [[United States $1]]$2 from $3 to $4"; } elsif ($pc =~ m/^U.S. (Army|Navy)(.*?), (\d+)$/) { $pc = "$pronoun was in the [[United States $1]]$2 in $3"; } elsif ($pc =~ m/^(.*), (\d+)\-(\d+|the present)$/) { $pc = "$pronoun was a $1 from $2 to $3"; } elsif ($pc =~ m/^(.*), (\d+)$/) { $pc = "$pronoun was a $1 in $2"; } #print "PC: $pc\n"; } } # Extract judgeships if ($html =~ m/<B>Federal Judicial Service:<\/B><BR>(.*?)<BR>\s*<BR>\s*<B>/si) { my $ju_string = $1; @jus = split(/ *<[Bb][Rr]><[Bb][Rr]> */, $ju_string); foreach my $ju (@jus) { if ($ju =~ s/Judge, U\. S\. District Court, ([^<]*)<[Bb][Rr]>/$last_name was a federal judge to the [[United States District Court for the $1]]. /) { push @jcats, "Judges of the United States District Court for the $1"; } if ($ju =~ s/Justice, U\. S\. District Court for the District of Columbia \[Supreme Court of the District of Columbia\]\s*<[Bb][Rr]>/$last_name was a federal judge to the [[United States District Court for the District of Columbia]]. /) { push @jcats, "Judges of the United States District Court for the District of Columbia"; } if ($ju =~ s/Judge, U\. S\. Circuit Courts ([^<]*)<[Bb][Rr]>/$last_name was a federal judge to the [[United States circuit court]] $1. /) { push @jcats, "Judges of the United States circuit courts"; } if ($ju =~ s/Judge, U\. S\. Court of Appeals for District of Columbia Circuit<[Bb][Rr]>/$last_name was a federal judge to the [[United States Court of Appeals for the D.C. Circuit]]. /) { push @jcats, "Judges of the United States Court of Appeals for the D.C. Circuit"; } if ($ju =~ s/Judge, U\. S\. Court of Appeals ([^<]*)<[Bb][Rr]>/$last_name was a federal judge to the [[United States Court of Appeals $1]]. /) { push @jcats, "Judges of the United States Court of Appeals $1"; } if ($ju !~ m/Service terminated/i) { $active = 1; } } } # Mash together. $wiki_out = "{{Cleanup FJC Bio}}\n'''$name''' "; if ($birth_date) { if ($death_date) { $wiki_out .= "($birth_date, $birth_year \&ndash\; $death_date, $death_year) "; } elsif ($death_year) { $wiki_out .= "($birth_date, $birth_year \&ndash\; $death_year) "; } else { $wiki_out .= "(born $birth_date, $birth_year) "; } } elsif ($birth_year) { if ($death_date) { $wiki_out .= "($birth_year \&ndash\; $death_date, $death_year) "; } elsif ($death_year) { $wiki_out .= "($birth_year\&ndash\;$death_year) "; } else { $wiki_out .= "(born $birth_year) "; } } else { if ($death_date) { $wiki_out .= "(died $death_date, $death_year) "; } elsif ($death_year) { $wiki_out .= "(died $death_year) "; } } if ($death_year) { $wiki_out .= "was a "; } else { if ($active) { $wiki_out .= "is a "; } else { $wiki_out .= "is a former "; } } $wiki_out .= "[[United States federal judge]].\n\n"; if ($birth_loc) { $wiki_out .= "$last_name was born in [[$birth_loc]]. "; } foreach my $ed (@eds) { $wiki_out .= "$ed. "; } foreach my $pc (@pcs) { $wiki_out .= "$pc. "; } $wiki_out .= "\n\n"; foreach my $ju (@jus) { $wiki_out .= "$ju\n\n"; } if ($death_loc) { $wiki_out .= "$pronoun died in [[$death_loc]].\n\n"; } $persondata_name = $rev_name; $persondata_name =~ s/\'//g; $persondata_name =~ s/\b(\w+)\b/\u\L$1/g; $wiki_out .= "==External links==\n* {{FJC Bio|$jid}}\n\n"; $wiki_out .= '<!-- Metadata: see [[Wikipedia:Persondata]] -->' . "\n{{Persondata\n|NAME=$persondata_name\n"; $wiki_out .= "|ALTERNATIVE NAMES=\n|SHORT DESCRIPTION=[[United States federal judge]]\n|DATE OF BIRTH="; if ($birth_date) { $wiki_out .= "$birth_date, $birth_year\n"; } else { $wiki_out .= "$birth_year\n"; } $wiki_out .= "|PLACE OF BIRTH="; if ($birth_loc) { $wiki_out .= "[[$birth_loc]]"; } $wiki_out .= "\n|DATE OF DEATH="; if ($death_date) { $wiki_out .= "$death_date, $death_year\n"; } else { $wiki_out .= "$death_year\n"; } $wiki_out .= "|PLACE OF DEATH="; if ($death_loc) { $wiki_out .= "[[$death_loc]]"; } $wiki_out .= "\n}}\n{{DEFAULTSORT:$rev_name}}\n"; if ($birth_year) { $wiki_out .= "[[Category:$birth_year births]]\n"; } if ($death_year) { $wiki_out .= "[[Category:$death_year deaths]]\n"; } else { $wiki_out .= "[[Category:Living people]]\n"; } foreach my $jcat (@jcats) { $wiki_out .= "[[Category:$jcat]]\n"; } # Final substitutions - multiple $wiki_out =~ s/Nominated by /$last_name was nominated by /g; $wiki_out =~ s/Received a recess appointment from /$last_name received a recess appointment from /g; $wiki_out =~ s/Confirmed by the Senate/$pronoun was confirmed by the United States Senate/g; $wiki_out =~ s/vacated by (.*?);/vacated by [[$1]]./g; $wiki_out =~ s/Reassigned /$pronoun was reassigned on /g; $wiki_out =~ s/Service terminated on /$last_name<!-- -->'s service was terminated on /g; $wiki_out =~ s/He was a State attorney general, ([^\.\;]*?) from/$pronoun was the state attorney general of $1 from/g; $wiki_out =~ s/ was a Member of the faculty, / was a member of the faculty of /g; $wiki_out =~ s/ was a Faculty, / was a member of the faculty of /g; $wiki_out =~ s/on (\w+ \d+, \d+), and received commission on \1/on $1, and received commission the same day/g; $wiki_out =~ s/(attorney|general|treasurer|secretary|senator), /$1 of /g; $wiki_out =~ s/ a ([AEIO])/ an $1/g; $wiki_out =~ s/, (\d+)\-(\d+) from / from $1 to $2 and from /g; $wiki_out =~ s/, (\d+) from / in $1 and from /g; # Final substitutions - single $wiki_out =~ s/recess appointment/[[recess appointment]]/; $wiki_out =~ s/senior status/[[senior status]]/; $wiki_out =~ s/U.S. Attorney(,| from)/[[United States Attorney]]$1/; $wiki_out =~ s/United States Senate/[[United States Senate]]/; $wiki_out =~ s/\[\[J\.D\.\]\]/[[Juris Doctor|J.D.]]/; $wiki_out =~ s/Law clerk/[[law clerk]]/; # Presidents $wiki_out =~ s/(from|by) (George W. Bush)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (William J. Clinton)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (George H.W. Bush)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Ronald Reagan)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Jimmy Carter)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Gerald Ford)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Richard M. Nixon)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Lyndon B. Johnson)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (John F. Kennedy)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Dwight D. Eisenhower)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Harry S Truman)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Franklin D. Roosevelt)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Herbert Hoover)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Calvin Coolidge)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Warren G. Harding)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Woodrow Wilson)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (William H. Taft)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Theodore Roosevelt)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (William McKinley)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Benjamin Harrison)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Grover Cleveland)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Chester A. Arthur)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (James A. Garfield)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Rutherford B. Hayes)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Ulysses Grant)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Andrew Johnson)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Abraham Lincoln)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (James Buchanan)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Franklin Pierce)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Millard Fillmore)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Zachary Taylor)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (James K. Polk)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (John Tyler)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Martin Van Buren)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Andrew Jackson)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (John Quincy Adams)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (James Monroe)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (James Madison)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (Thomas Jefferson)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (John Adams)/$1 President [[$2]]/; $wiki_out =~ s/(from|by) (George Washington)/$1 President [[$2]]/; # Write if ($test) { print "Output to file\n"; open(outfile, ">judges.txt"); print outfile $wiki_out; die; } $|=1; print "Waiting " . ($soonest_next_op - time) . " secs... "; $|=1; while (time < $soonest_next_op) {}; $soonest_next_op = time + 9; if ($name eq $art_name) { $pw->edit($art_name, $wiki_out, "Auto-generating new article based on $url"); my $talkmessage = "{{WPBiography\n|living="; if ($death_year) { $talkmessage .= 'no'; } else { $talkmessage .= 'yes'; } $talkmessage .= "\n|class=start\n|priority=low\n|needs-infobox=yes\n|politician-work-group=yes\n}}\n\nThis article was automatically created by a perl script. It could use a human's loving touch. ~~~~"; $pw->edit("Talk:$art_name", $talkmessage, "Auto-adding WPbiography template"); my $listsofar = $pw->get_text("User:Polbot/fjc"); $listsofar .= "|-\n| $rev_name || yes || [[$art_name]]\n"; $pw->edit("User:Polbot/fjc", $listsofar, "Adding [[$art_name]]"); } else { $wiki_out =~ s/\[\[Category/[[:Category/g; $pw->edit($art_name, $wiki_out, "Auto-generating subpage based on $url"); my $otherpage = $pw->get_text("$name"); if ($otherpage =~ m/\#\s*Redirect\s*\[\[\s*(.*?)\s*\]\]/is) { $name = $1; } my $talksofar = $pw->get_text("Talk:$name"); $talksofar .= "\n==Bot-created subpage==\n\nA temporary subpage at [[$art_name]] was automatically created by a perl script, based on [$url this article] at the [[Biographical Directory of Federal Judges]]. The subpage should either be merged into this article, or moved and disambiguated. ~~~~\n"; $pw->edit("Talk:$name", $talksofar, "Auto-adding link to subpage at [[$art_name]]"); my $listsofar = $pw->get_text("User:Polbot/fjc"); $listsofar .= "|-\n| $rev_name || no || [[$art_name]]\n"; $pw->edit("User:Polbot/fjc", $listsofar, "Adding [[$art_name]]"); } print "Article created.\n"; } sub Expand_states { my $place = shift; $place =~ s/AL/Alabama/; $place =~ s/AK/Alaska/; $place =~ s/AZ/Arizona/; $place =~ s/AR/Arkansas/; $place =~ s/CA/California/; $place =~ s/CO/Colorado/; $place =~ s/CT/Connecticut/; $place =~ s/DE/Delaware/; $place =~ s/DC/District of Columbia/; $place =~ s/FL/Florida/; $place =~ s/GA/Georgia/; $place =~ s/HI/Hawaii/; $place =~ s/ID/Idaho/; $place =~ s/IL/Illinois/; $place =~ s/IN/Indiana/; $place =~ s/IA/Iowa/; $place =~ s/KS/Kansas/; $place =~ s/KY/Kentucky/; $place =~ s/LA/Louisiana/; $place =~ s/ME/Maine/; $place =~ s/MD/Maryland/; $place =~ s/MA/Massachusetts/; $place =~ s/MI/Michigan/; $place =~ s/MN/Minnesota/; $place =~ s/MS/Mississippi/; $place =~ s/MO/Missouri/; $place =~ s/MT/Montana/; $place =~ s/NE/Nebraska/; $place =~ s/NV/Nevada/; $place =~ s/NH/New Hampshire/; $place =~ s/NJ/New Jersey/; $place =~ s/NM/New Mexico/; $place =~ s/NY/New York/; $place =~ s/NC/North Carolina/; $place =~ s/ND/North Dakota/; $place =~ s/OH/Ohio/; $place =~ s/OK/Oklahoma/; $place =~ s/OR/Oregon/; $place =~ s/PA/Pennsylvania/; $place =~ s/PR/Puerto Rico/; $place =~ s/RI/Rhode Island/; $place =~ s/SC/South Carolina/; $place =~ s/SD/South Dakota/; $place =~ s/TN/Tennessee/; $place =~ s/TX/Texas/; $place =~ s/UT/Utah/; $place =~ s/VT/Vermont/; $place =~ s/VA/Virginia/; $place =~ s/WA/Washington/; $place =~ s/WV/West Virginia/; $place =~ s/WI/Wisconsin/; $place =~ s/WY/Wyoming/; return $place; }