User:AnomieBOT/source/tasks/ReplaceExternalLinks3.pm
Approved 2011-01-28. Wikipedia:Bots/Requests for approval/AnomieBOT 50 |
package tasks::ReplaceExternalLinks3;
=pod
=begin metadata
Bot: AnomieBOT
Task: ReplaceExternalLinks3
BRFA: Wikipedia:Bots/Requests for approval/AnomieBOT 50
Status: Completed 2011-12-28
Created: 2011-01-06
Process pages linking to <nowiki>http://www.nr.nps.gov/</nowiki>:
* Replace links beginning with "<nowiki>http://www.nr.nps.gov/multiples/</nowiki>" with the corresponding link starting "<nowiki>http://pdfhost.focus.nps.gov/docs/NRHP/Text/</nowiki>".
* Replace {{tl|cite web}} templates with url <nowiki>http://www.nr.nps.gov/</nowiki> with {{tl|NRISref}}.
* Tag other {{tl|cite web}} templates and non-{{tl|cite web}} links with {{tl|NRIS dead link}}.
=end metadata
=cut
use utf8;
use strict;
use Data::Dumper;
use POSIX;
use Date::Parse;
use AnomieBOT::Task qw/:time/;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;
sub new {
my $class=shift;
my $self=$class->SUPER::new();
$self->{'iter'}=undef;
bless $self, $class;
return $self;
}
=pod
=for info
Approved 2011-01-28.<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 50]]
=cut
sub approved {
return -1;
}
sub run {
my ($self, $api)=@_;
my $res;
$api->task('ReplaceExternalLinks3', 0, 10, qw/d::Redirects d::Templates d::Nowiki/);
my $screwup='Errors? [[User:'.$api->user.'/shutoff/ReplaceExternalLinks3]]';
# Spend a max of 5 minutes on this task before restarting
my $endtime=time()+300;
# Get list of citation templates
my %templates=$api->redirects_to_resolved(
'Template:Cite web',
);
if(exists($templates{''})){
$api->warn("Failed to get citation template redirects: ".$templates{''}{'error'}."\n");
return 60;
}
# Get list of infobox templates
my %infoboxes=$api->redirects_to_resolved(
'Template:Infobox NRHP',
);
if(exists($infoboxes{''})){
$api->warn("Failed to get infobox template redirects: ".$infoboxes{''}{'error'}."\n");
return 60;
}
# Get target template
my %t=$api->resolve_redirects('Template:NRISref');
if(exists($t{''})){
$api->warn("Failed to get NRISref template redirect: ".$t{''}{'error'}."\n");
return 60;
}
my $NRISref=$t{'Template:NRISref'};
$NRISref=~s/Template://;
if(!defined($self->{'iter'})){
$self->{'iter'}=$api->iterator(
list => 'exturlusage',
eunamespace => 0,
euprop => 'title',
euquery => 'www.nr.nps.gov',
eulimit => '1000', # exturlusage has issues with big lists
);
}
while(my $pg=$self->{'iter'}->next){
if(!$pg->{'_ok_'}){
$api->warn("Failed to retrieve page list for ".$self->{'iter'}->iterval.": ".$pg->{'error'}."\n");
return 60;
}
return 0 if $api->halting;
my $page=$pg->{'title'};
my $tok=$api->edittoken($page, EditRedir => 1);
if($tok->{'code'} eq 'shutoff'){
$api->warn("Task disabled: ".$tok->{'content'}."\n");
return 300;
}
if($tok->{'code'} ne 'success'){
$api->warn("Failed to get edit token for $page: ".$tok->{'error'}."\n");
next;
}
if(exists($tok->{'missing'})){
$api->warn("WTF? $page does not exist?\n");
next;
}
my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'};
my $outtxt=$intxt;
my ($fix,$fix2,$fix9,$mark)=(0,0,0,0);
# Replace simple moved links
$fix2+=($outtxt=~s!http://www.nr.nps.gov/multiples/!http://pdfhost.focus.nps.gov/docs/NRHP/Text/!g);
# Replace the citation templates
my $nowiki;
$outtxt=$api->process_templates($outtxt, sub {
my $name=shift;
my $params=shift;
my $wikitext=shift;
my $data=shift;
my $oname=shift;
if(exists($infoboxes{"Template:$name"})){
$fix9+=($wikitext=~s/\Q{{convert|0.9|acre}}\E/less than one acre/g);
return $wikitext;
}
return undef unless exists($templates{"Template:$name"});
my ($url,$date,$dt)=('','no date specified','');
foreach ($api->process_paramlist(@$params)){
$_->{'name'}=~s/^\s+|\s+$//g;
$_->{'value'}=~s/^\s+|\s+$//g;
if($_->{'name'} eq 'url'){
$url=$_->{'value'};
} elsif($_->{'name'} eq 'date'){
$dt=$_->{'value'};
}
}
if($url=~m!^http://www.nr.nps.gov/?$!){
my $d=str2time($dt);
if(defined($d)){
$d=strftime('%F', gmtime $d);
$date='2010a' if $d eq '2010-07-09';
$date='2009a' if $d eq '2009-03-13';
$date='2008b' if $d eq '2008-04-24';
$date='2008a' if $d eq '2008-04-15';
$date='2007b' if $d eq '2007-06-30';
$date='2007a' if $d eq '2007-01-23';
$date='2006a' if $d eq '2006-03-15';
}
$d//=$dt;
#$api->warn("Unknown date $d in $page\n") if $date eq 'no date specified';
$date=$d if($date eq 'no date specified' && $d ne '');
$fix++;
return "{{$NRISref|$date}}";
}
if($url=~m!^http://www.nr.nps.gov/!){
$mark++;
return $wikitext."{{NRIS dead link}}";
}
return undef;
});
# Hide cite web templates, we already processed them
($outtxt,$nowiki)=$api->strip_templates($outtxt, sub {
my $name=shift;
return exists($templates{"Template:$name"});
}, {}, $nowiki);
# Mark any bracketed external link.
$mark+=($outtxt=~s!(\[http://www.nr.nps.gov(?:[/:][^][<>\x22\x00-\x20\x7F]*)?(?: *[^\]\x00-\x08\x0a-\x1F]*?)\])!$1\{{NRIS dead link}}!g);
# Hide all bracketed external links.
($outtxt,$nowiki)=$api->strip_regex(qr{\[http://[^][<>\x22\x00-\x20\x7F]+ *[^\]\x00-\x08\x0a-\x1F]*?\]}, $outtxt, $nowiki);
# Mark any bare external link.
$mark+=($outtxt=~s!\b(http://www.nr.nps.gov(?:[/:][^][<>\x22\x00-\x20\x7F]*)?)! fixExtLink($1) !ge);
# Unstrip
$outtxt=$api->replace_stripped($outtxt,$nowiki);
# Avoid doubling up on the template
my $ct=0;
do {
$ct=($outtxt=~s/\{\{NRIS dead link\}\}\s*\{\{NRIS dead link\}\}/{{NRIS dead link}}/g);
$mark-=$ct;
} while($ct>0);
if($outtxt ne $intxt){
my @summary=();
push @summary, "replacing $fix NRIS {{cite web}} template".($fix==1?'':'s')." with {{$NRISref}}" if $fix;
push @summary, "updating $fix2 moved NRIS link".($fix2==1?'':'s') if $fix2;
push @summary, "marking $mark NRIS link".($fix==1?'':'s')." with {{NRIS dead link}}" if $mark;
push @summary, "repairing $fix9 [[User talk:Elkman#NRHP places having area of .9 acres, etc.|incorrect data entry code".($fix9==1?'':'s')."]]" if $fix9;
unless(@summary){
$api->warn("Changes made with no summary for $page, not editing");
next;
}
$summary[$#summary]='and '.$summary[$#summary] if @summary>1;
my $summary=ucfirst(join((@summary>2)?', ':' ', @summary));
$api->log("$summary in $page");
my $r=$api->edit($tok, $outtxt, "$summary. $screwup", 1, 1);
if($r->{'code'} ne 'success'){
$api->warn("Write failed on $page: ".$r->{'error'}."\n");
next;
}
}
# If we've been at it long enough, let another task have a go.
return 0 if time()>=$endtime;
}
$api->log("May be DONE!");
$self->{'iter'}=undef;
return undef;
}
# Duplicate Mediawiki post-processing of bare external links
sub fixExtLink {
my $url=shift;
my $txt='';
$txt=$1.$txt if $url=~s/((?:[<>]|&[lg]t;).*$)//;
my $sep=',;\.:!?';
$sep.=')' unless $url=~/\(/;
$txt=$1.$txt if $url=~s/([$sep]+$)//;
# There shouldn't be a template inside the url
$txt=$1.$txt if $url=~s/(\{\{.*$)//;
return "[$url $url]{{NRIS dead link}}$txt";
}
1;