User:AnomieBOT/source/tasks/RedirectBypasser.pm

package tasks::RedirectBypasser;

=pod

=begin metadata

Bot:      AnomieBOT
Task:     RedirectBypasser
BRFA:     Wikipedia:Bots/Requests for approval/AnomieBOT 63
Status:   Approved 2012-06-06
Created:  2012-05-11
OnDemand: true

Bypass redirects in certain limited cases, currently:
* To prevent confusion when a username is being usurped.

=end metadata

=cut

use utf8;
use strict;

use POSIX;
use Data::Dumper;
use AnomieBOT::API;
use AnomieBOT::Task qw/bunchlist/;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

# Page to notify of errors
my $notify='User talk:Nard the Bard';

# Page to notify of errors
my $summary='Bypassing redirects to prevent confusion after usurpation of [[User:N]], per [[User:AnomieBOT/req/Redirect bypass 1|request]]. Errors? [[User:AnomieBOT/shutoff/RedirectBypasser]]';

# List redirects to bypass
my @redirects=(
    'User:N',
    'User:N/Editcounter',
    'User:N/POTUSgallery',
    'User:N/admincoaching',
    'User:N/getqualified',
    'User:N/modern Jesus',
    'User:N/monobook.js',
    'User:N/mountainofskulls',
    'User talk:N',
    'User talk:N/Archive 1',
    'User talk:N/Archive 2',
    'User talk:N/Archive 3',
);

# List of pages to ignore
my @skip=(
    qr{^Wikipedia:Articles for deletion/Log/},
    'Wikipedia:Changing username/Usurpations',
    'Wikipedia:Changing username/Usurpations/Completed/33',
    'User talk:Nard the Bard',
    'User talk:N',
    'User talk:Anomie',
    'User talk:AnomieBOT',
    'Wikipedia:Bots/Requests for approval/AnomieBOT 63',
);

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    $self->{'iter'}=undef;
    $self->{'err'}={};
    $self->{'noerr'}=0;
    bless $self, $class;
    return $self;
}

=pod

=for info
Approved 2012-06-06<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 63]]

=cut

sub approved {
    return -1;
}

sub run {
    my ($self, $api)=@_;
    my $res;

    $api->task('RedirectBypasser',0,0,qw/d::Talk d::Redirects d::Nowiki d::IWNS/);

    my %r=$api->resolve_redirects(@redirects);
    if(exists($r{''})){
        $api->warn("Failed to get redirects: ".$r{''}{'error'}."\n");
        return 60;
    }
    my %redirects=();
    my %re=();
    my $nsre=$api->namespace_re();
    my %nsmap=$api->namespace_map();
    while(my ($k,$v)=each %r){
        next if $k eq $v;
        $redirects{$k}=$v;
        my $re=$k;
        my $prefix='';
        $prefix.=$api->namespace_re($nsmap{$1})."[ _]*:[ _]*" if $re=~s/^($nsre)://;
        $prefix.="(?i:\Q$1\E)" if $re=~s/^(.)//;
        $re=quotemeta($re);
        $re=~s/\\ /[ _]+/g;
        $re='[ _]*'.$prefix.$re.'[ _]*';
        $re{$k}=qr/$re/;
    }
    unless(%redirects){
        $api->warn("No redirects to process!");
        return 3600;
    }

    my $skip=[];
    for my $re (@skip){
        $re=qr/^\Q$re\E$/ unless ref($re) eq 'Regexp';
        push @$skip, $re;
    }
    $skip=join('|',@$skip);
    $skip=qr/$skip/;

    my $iter=$self->{'iter'};
    if(!$iter){
        $iter=$api->iterator(
            generator    => 'backlinks',
            gbltitle     => [keys %redirects],
            gbllimit     => 'max',
        );
        $self->{'iter'}=$iter;
        $self->{'err'}={};
        $self->{'noerr'}=0;
    }
    my $endtime=time()+300;
    while(my $p=$iter->next){
        return 0 if $api->halting;

        if(!$p->{'_ok_'}){
            $api->warn("Failed to retrieve backlinks for ".$iter->iterval.": ".$p->{'error'}."\n");
            return 60;
        }

        next if $p->{'title'}=~/$skip/;
        delete $self->{'err'}{$p->{'title'}};

        my $tok=$api->edittoken($p->{'title'}, EditRedir=>1);
        if($tok->{'code'} eq 'shutoff'){
            $api->warn("Task disabled: ".$tok->{'content'}."\n");
            return 300;
        }
        if($tok->{'code'} eq 'pageprotected'){
            $api->warn("Cannot edit $p->{title}: Page is protected\n");
            $self->{'err'}{$p->{'title'}}="page is protected";
            next;
        }
        if($tok->{'code'} eq 'botexcluded'){
            $api->warn("Cannot edit $p->{title}: Bot exclusion applies\n");
            $self->{'err'}{$p->{'title'}}="bot exclusion ({{tl|bots}} or {{tl|nobots}}) applies";
            next;
        }
        if($tok->{'code'} ne 'success'){
            $api->warn("Failed to get edit token for $p->{title}: ".$tok->{'error'}."\n");
            $self->{'noerr'}=1;
            return 0;
        }

        my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'};
        my ($outtxt,$nowiki)=$api->strip_nowiki($intxt);

        while(my ($k,$v)=each(%redirects)){
            my $re=$re{$k};
            $outtxt=~s/\[\[(?::[ _]*){0,2}$re\|/[[:$v|/g;
            $outtxt=~s/\[\[(?::[ _]*)?((?::[ _]*)?$re)\]\]/[[:$v|$1]]/g;
        }

        $outtxt=$api->replace_nowiki($outtxt, $nowiki);

        if($intxt ne $outtxt){
            $api->log("Bypassing redirects in $p->{title}");
            $res=$api->edit($tok, $outtxt, $summary, 1, 1);
            if($res->{'code'} ne 'success'){
                $api->warn("Write failed on $p->{title}: ".$res->{'error'}."\n");
                $self->{'noerr'}=1;
                next;
            } else {
                # In case one of the "cannot find redirects" pages transcluded
                # this one.
                $self->{'noerr'}=1;
            }
        } else {
            # Purge, thanks to bug 5382
            $res=$api->query(action=>'purge', titles=>$p->{'title'}, forcelinkupdate=>1);
            if($res->{'code'} ne 'success'){
                $self->{'noerr'}=1;
                next;
            }
            # Now check links again
            $res=$api->query(titles=>$p->{'title'}, prop=>'links', pllimit=>'max', pltitles=>$iter->iterval);
            if($res->{'code'} ne 'success'){
                $self->{'noerr'}=1;
                next;
            }
            $res=(values %{$res->{'query'}{'pages'}})[0]{'links'} // [];
            if(grep $_->{'title'} eq $iter->iterval, @$res){
                $api->warn("Cannot find redirects in $p->{title}\n");
                $self->{'err'}{$p->{'title'}}="cannot find redirects; they are probably template-generated or oddly formatted";
            } else {
                # Problem resolved!
            }
            next;
        }

        # If we've been at it long enough, let another task have a go.
        return 0 if time()>$endtime;
    }

    my $ne=($self->{'noerr'} && %{$self->{'err'}});
    if(!$ne && %{$self->{'err'}}){
        my $msg="The following issues were encountered while bypassing redirects:\n";
        for my $p (sort keys %{$self->{'err'}}) {
            $msg.="* [[:$p]]: ".$self->{'err'}{$p}."\n";
        }
        $msg.="Please bypass the redirects in these pages manually. Thanks.";
        $api->whine("Redirect bypass issues", $msg, Pagename => $notify);
    }

    $self->{'iter'}=undef;
    $self->{'err'}={};
    $self->{'noerr'}=0;

    # Restart the scan immediately to find out if there are any real errors.
    return 0 if $ne;

    $api->log("Task may be complete!");

    # No more pages to check for now
    return 600;
}

1;