User:AnomieBOT/source/tasks/TemplateUnsubstifier.pm
Approved 2013-11-17 Wikipedia:Bots/Requests for approval/AnomieBOT II 2 |
package tasks::TemplateUnsubstifier;
=pod
=begin metadata
Bot: AnomieBOT II
Task: TemplateUnsubstifier
BRFA: Wikipedia:Bots/Requests for approval/AnomieBOT II 2
Status: Approved 2013-11-17
Created: 2013-10-31
Apply [[Module:Unsubst]] to maintenance templates.
=end metadata
=cut
use utf8;
use strict;
use POSIX;
use Data::Dumper;
use AnomieBOT::API;
use AnomieBOT::Task qw/bunchlist ISO2timestamp/;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;
sub new {
my $class=shift;
my $self=$class->SUPER::new();
$self->{'templates'}=undef;
$self->{'templates rev'}=undef;
$self->{'nextrun'}=undef;
bless $self, $class;
return $self;
}
=pod
=for info
Approved 2013-11-17<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT II 2]]
=cut
sub approved {
return 200;
}
sub run {
my ($self, $api)=@_;
my ($res, $iter);
$api->task('TemplateUnsubstifier',0,0,qw/d::Talk d::Redirects d::Templates/);
# Time to run?
my $nextrun = $self->{'nextrun'} // $api->store->{'nextrun'} // 0;
my $t = $nextrun - time();
return $t if $t > 0;
if ( defined( $self->{'templates rev'} ) ) {
$res = $api->query(
titles => 'Wikipedia:AutoWikiBrowser/Dated templates',
prop => 'info',
);
if ( $res->{'code'} ne 'success' ) {
$api->warn( "Failed to load info for Wikipedia:AutoWikiBrowser/Dated templates: " . $res->{'error'} . "\n" );
return 60;
}
$res = (values %{$res->{'query'}{'pages'}})[0];
if ( $res->{'lastrevid'} ne $self->{'templates rev'} ) {
$self->{'templates'} = undef;
$self->{'templates rev'} = undef;
}
}
# Get the list of templates to check
my @templates;
if ( defined( $self->{'templates'} ) ) {
@templates = @{$self->{'templates'}};
} else {
$api->log( "Loading templates list from Wikipedia:AutoWikiBrowser/Dated templates" );
$res = $api->query(
titles => 'Wikipedia:AutoWikiBrowser/Dated templates',
prop => 'revisions',
rvprop => 'ids|timestamp|content',
rvslots => 'main',
rvlimit => 1,
);
if ( $res->{'code'} ne 'success' ) {
$api->warn( "Failed to load Wikipedia:AutoWikiBrowser/Dated templates: " . $res->{'error'} . "\n" );
return 60;
}
$res = (values %{$res->{'query'}{'pages'}})[0]{'revisions'}[0];
$t = ISO2timestamp( $res->{'timestamp'} ) + 86400;
if ( $t > time() ) {
# Wait, so some vandal can't so easily add a bogus template to the
# list and get the bot to edit it.
$nextrun = $t - ($t % 86400) + 86520;
$api->store->{'nextrun'} = $self->{'nextrun'} = $nextrun;
$t = $nextrun - time();
return $t if $t > 0;
}
my $txt = $res->{'slots'}{'main'}{'*'};
$txt = $api->strip_nowiki($txt);
$txt =~ s/_/ /g;
$txt =~ s/\{\{\s*Template\s*:/\{\{/gi;
@templates = ($txt=~/\{\{\s*[tT]lx?\s*\|\s*([^|]+?)\s*(?:\||\}\})/g);
my %templates = $api->resolve_redirects( map "Template:$_", @templates );
if ( exists( $templates{''} ) ) {
$api->warn( "Failed to resolve redirects in target template list: " . $templates{''}{'error'} . "\n" );
return 60;
}
@templates = (values %templates);
$iter = $api->iterator(
titles => bunchlist( 500, @templates ),
prop => 'info',
);
@templates = ();
while ( my $p = $iter->next ) {
return 0 if $api->halting;
if ( !$p->{'_ok_'} ) {
$api->warn( "Failed to retrieve templates for WP:AWB/DT templates: " . $p->{'error'} . "\n" );
return 60;
}
next unless $p->{'ns'} == 10; # Sanity check
next unless exists( $p->{'pageid'} ) && exists( $p->{'lastrevid'} ); # Page missing or invalid?
if ( exists( $p->{'redirect'} ) ) { # Redirect?
$api->warn( "How did we manage to get a redirect ($p->{title}) in here? Skipping it." );
next;
}
next if $p->{'lastrevid'} eq ( $api->store->{'lastrev ' . $p->{'pageid'}} // 0 );
push @templates, $p->{'title'};
}
$self->{'templates'} = [@templates];
$self->{'templates rev'} = $res->{'revid'};
}
# Check each template
my $endtime = time() + 300;
my @retry = ();
my $re = $api->redirect_regex();
while ( @templates ) {
return 0 if $api->halting;
my $title = shift @templates;
$res = $api->query(
titles => $title,
prop => 'revisions',
rvprop => 'ids|content',
rvslots => 'main',
rvlimit => 1,
);
if ( $res->{'code'} ne 'success' ) {
$api->warn( "Failed to load $title" . $res->{'error'} . "\n" );
return 60;
}
$res = (values %{$res->{'query'}{'pages'}})[0];
my $pageid = $res->{'pageid'};
my $revid = $res->{'revisions'}[0]{'revid'};
my $intxt = $res->{'revisions'}[0]{'slots'}{'main'}{'*'};
(my $name = $title) =~ s/^Template://;
my $outtxt = undef;
# Sanity check
if ( $intxt =~ /$re/ ) {
$api->warn( "HELP: $title looks like a redirect, refusing to edit\n" );
goto skip;
}
# Split into template and noinclude parts, then process
if ( $intxt =~ /^\s*+(.+?)((?><noinclude>(?>[^<]+|<(?!\/?noinclude))*(?:<\/noinclude>|$))*)\s*$/s ) {
my $trail = $2;
my ($txt, $params) = $self->upgrade_unsubst( $api, $title, $1 );
$outtxt = "{{ {{{|safesubst:}}}#invoke:Unsubst|$params|\$B=\n$txt\n}}$trail" if defined( $txt );
} else {
$api->warn( "HELP: $title doesn't match the basic regular expression, refusing to edit\n" );
goto skip;
}
if ( defined( $outtxt ) ) {
my $tok = $api->edittoken( $title, EditRedir => 1 );
if ( $tok->{'code'} eq 'shutoff' ) {
$api->warn( "Task disabled: " . $tok->{'content'} . "\n" );
return 300;
} elsif ( $tok->{'code'} eq 'pageprotected' || $tok->{'code'} eq 'botexcluded' ) {
# Skip protected and excluded pages
$api->warn( "HELP: Cannot edit $title: " . $tok->{'error'} . "\n" );
} elsif ( $tok->{'code'} ne 'success' ) {
$api->warn( "Failed to get edit token for $title: " . $tok->{'error'} . "\n" );
push @retry, $title;
} elsif ( exists( $tok->{'missing'} ) ) {
# Was deleted, ignore
} elsif ( $tok->{'lastrevid'} ne $revid ) {
# Edited since it was loaded! Retry it.
push @retry, $title;
} else {
$api->log( "Unsubstifying $title" );
my $r = $api->edit( $tok, $outtxt, "[[Module:Unsubst|Unsubstifying]] template, so {{subst:$name}} results in {{$name|date=...}}", 1, 1 );
if ( $r->{'code'} ne 'success' ) {
$api->warn( "Write failed on $title: " . $r->{'error'} . "\n" );
push @retry, $title;
} else {
$revid = $r->{'edit'}{'newrevid'};
}
}
}
skip:
$api->store->{"lastrev $pageid"} = $revid;
$self->{'templates'} = [@templates, @retry];
return 0 if time() > $endtime;
}
return 300 if @retry;
# No more pages to check for now
$self->{'templates'} = undef;
$t = time();
$nextrun = $t - ($t % 86400) + 86520;
$api->store->{'nextrun'} = $self->{'nextrun'} = $nextrun;
return $nextrun - time();
}
sub upgrade_unsubst {
my ( $self, $api, $title, $txt ) = @_;
my $params = '|date=__DATE__';
# If it doesn't have "unsubst", it's fine to wrap.
unless ( $txt =~ /unsubst/i ) {
# sanity-check that the existing template code doesn't break template
# syntax, see https://en.wikipedia.org/w/index.php?diff=prev&oldid=582081088
my $tmp = $api->process_templates( "{{\x02foo\x03|1=$txt}}", sub {
my $name = shift;
my $params = shift;
return undef unless $name eq "\x02foo\x03";
return 'bad' unless @$params == 1;
return 'ok';
} );
return ($txt, $params) if $tmp eq 'ok';
$api->warn( "HELP: $title contains unwrappable content" );
return undef;
}
# If it already uses Module:Unsubst, then we don't need to do anything to
# it.
return undef if $txt =~ /#invoke\s*:\s*[uU]nsubst\s*\|/;
# Sanity check: if it contains anything other than other than a top-level
# {{ifsubst}}, fail.
my $module = 0;
my $unsubst = undef;
my $body = undef;
my $tmp = $api->process_templates( $txt, sub {
my $name = shift;
my $params = shift;
$name =~ s!<includeonly>safesubst:</includeonly>!!;
$name =~ s!\{\{\{\|safesubst:\}\}\}!!;
return undef unless $name =~ /^\s*[iI]fsubst\s*$/;
foreach ($api->process_paramlist(@$params)) {
$unsubst = $_->{'value'} if $_->{'name'} eq '1';
($body = $_->{'value'}) =~ s/^\s+|\s+$//g if $_->{'name'} eq '2';
}
return '';
} );
unless ( $tmp =~ /^\s*$/ ) {
$api->warn( "HELP: $title contains text other than {{ifsubst}}, cannot edit\n" );
return undef;
}
unless ( $unsubst ) {
$api->warn( "HELP: $title doesn't have anything in the 'unsubst' case of {{ifsubst}}, cannot edit\n" );
return undef;
}
unless ( $body ) {
$api->warn( "HELP: $title doesn't have anything in the 'body' case of {{ifsubst}}, cannot edit\n" );
return undef;
}
# Extract parameters from the existing invocation of {{unsubst}}
my $found = 0;
$api->process_templates( $unsubst, sub {
my $name=shift;
my $uparams=shift;
$name =~ s!^<includeonly>(?:safe)?subst:</includeonly>!!i;
$name =~ s!^\{\{\{\|(?:safe)?subst:\}\}\}!!i;
$name =~ s!^(?:safe)?subst:!!i;
return undef unless $name =~ /^\s*[uU]nsubst\s*$/;
$found = 1;
my %params = ();
foreach ($api->process_paramlist(@$uparams)) {
$params{$_->{'name'}} = $_->{'value'};
}
for ( my $i = 1; $i < 10; $i++ ) {
my ( $k, $v ) = ( $params{ $i*2 } // '', $params{ $i*2+1 } // '' );
$k =~ s/^\s+|\s+$//g;
$v =~ s/^\s+|\s+$//g;
next if $k eq '' || $k eq 'date';
next if $v =~ /^{{{\Q$k\E\|¬}}}$/;
$params .= " |$k=$v";
}
return undef;
} );
if ( !$found ) {
$api->warn( "HELP: $title doesn't contain {{unsubst}} in the 'unsubst' case of {{ifsubst}}, cannot edit\n" );
return undef;
}
return ($body, $params);
}
1;