User:AnomieBOT/source/tasks/ACNClerk.pm

package tasks::ACNClerk;

=pod

=begin metadata

Bot:     AnomieBOT
Task:    ACNClerk
BRFA:    Wikipedia:Bots/Requests for approval/AnomieBOT 51
Status:  Approved 2011-01-24
Created: 2011-01-13

Update crosslinks on [[WP:ACN]] and [[WT:ACN]] when content is archived.

=end metadata

=cut

use utf8;
use strict;

# Apparently this is experimental in the version of Perl on toolforge
no warnings qw( experimental::lexical_subs );
use feature qw( lexical_subs );

use AnomieBOT::Task;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

my $version=1;

use Data::Dumper;

sub new {
    my $class=shift;
    my $self=$class->SUPER::new;
    bless $self, $class;
    return $self;
}

=pod

=for info
Approved 2011-01-24<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 51]]

=cut

sub approved {
    return 2;
}

sub run {
    my ($self, $api)=@_;
    my $res;

    $api->task('ACNClerk', 0, 10, qw(d::Timestamp d::Redirects d::Talk d::Templates));

    if(($api->store->{'version'}//0) != $version){
        foreach my $k (keys %{$api->store}) {
            delete $api->store->{$k} if $k=~/^revid /;
        }
        $api->store->{'version'}=$version;
    }

    my $starttime=time();

    my %slink = $api->redirects_to_resolved( 'Template:Slink' );
    if ( exists( $slink{''} ) ) {
        $api->warn( "Failed to get redirects to Template:Slink: " . $slink{''}{'error'} . "\n" );
        return 60;
    }
    my %acn = $api->redirects_to_resolved( 'Wikipedia:Arbitration Committee/Noticeboard', 'Wikipedia talk:Arbitration Committee/Noticeboard' );
    if ( exists( $acn{''} ) ) {
        $api->warn( "Failed to get redirects to WP:ACN: " . $acn{''}{'error'} . "\n" );
        return 60;
    }

    # Figure out which pages need re-scanning
    my %scan=();
    my @WPpages=();
    my @WTpages=();
    my $iter=$api->iterator(
        generator    => 'allpages',
        gapnamespace => [4,5],
        gapprefix    => 'Arbitration Committee/Noticeboard',
        gaplimit     => 'max',
        prop         => 'info',
    );
    while(my $p=$iter->next){
        if(!$p->{'_ok_'}){
            $api->warn("Could not retrieve page from iterator: ".$p->{'error'}."\n");
            return 60;
        }
        my $title=$p->{'title'};
        my $t=$title; $t=~s/^(?:Wikipedia|Wikipedia talk)://;
        next unless $title=~/^Wikipedia(?: talk)?:Arbitration Committee\/Noticeboard(?:\/Archive (\d+))?$/;
        push @WPpages, $title if $iter->iterval==4;
        push @WTpages, $title if $iter->iterval==5;
        $scan{$title}=$p->{'lastrevid'} unless exists($api->store->{"toc $title"}) && $p->{'lastrevid'}==($api->store->{"revid $title"}//0);
    }
    return 3600 unless %scan;

    # Load the headers for the needed pages
    foreach my $title (keys %scan) {
        return 0 if $api->halting;
        $api->log("Scanning section headings in $title");
        $res=$api->query(
            action => 'parse',
            title  => $title,
            text   => "__TOC__\n{{:$title}}",
            prop   => 'sections',
        );
        if($res->{'code'} ne 'success'){
            $api->warn("Failed to retrieve section list for $title: ".$res->{'error'});
            return 60;
        }
        my @s=();
        foreach my $s (@{$res->{'parse'}{'sections'}}) {
            push @s, { line => $s->{'line'}, anchor => $s->{'anchor'} };
            if($s->{'anchor'}=~/_(\d+)$/){
                my $n=$1;
                unless($s->{'line'}=~/[ _]$n$/){
                    $s->{'anchor'}=~s/_\d+$//;
                    push @s, { line => $s->{'line'}, anchor => $s->{'anchor'} } unless $a;
                }
            }
        }
        $api->store->{"toc $title"}=\@s;
        $api->store->{"revid $title"}=$scan{$title};
    }

    # Construct the mappings
    my %WPmap=();
    foreach my $title (@WPpages) {
        foreach my $s (@{$api->store->{"toc $title"}}) {
            my $tt=$title;
            if(exists($WPmap{$s->{'anchor'}})){
                my $t=$WPmap{$s->{'anchor'}};
                my $n1=($title=~/\/Archive (\d+)$/)?$1:1e100;
                my $n2=($t=~/\/Archive (\d+)$/)?$1:1e100;
                $tt=($n1>=$n2)?$title:$t;
                #$api->warn("Duplicate section heading $s->{anchor} in $title and $t, using $tt");
            }
            $WPmap{$s->{'anchor'}}=$tt;
        }
    }
    my %WTmap=();
    foreach my $title (@WTpages) {
        foreach my $s (@{$api->store->{"toc $title"}}) {
            my $tt=$title;
            if(exists($WTmap{$s->{'anchor'}})){
                my $t=$WTmap{$s->{'anchor'}};
                my $n1=($title=~/\/Archive (\d+)$/)?$1:1e100;
                my $n2=($t=~/\/Archive (\d+)$/)?$1:1e100;
                $tt=($n1>=$n2)?$title:$t;
                #$api->warn("Duplicate section heading $s->{anchor} in $title and $t, using $tt");
            }
            $WTmap{$s->{'anchor'}}=$tt;
        }
    }

    # Load the list of pages to scan for links
    my %scanlinks=();
    $iter=$api->iterator(
        generator    => 'backlinks',
        gblnamespace => '4|5',
        gbltitle     => [keys %scan],
        gbllimit     => 'max',
        gblredirect  => 1,
    );
    while(my $p=$iter->next){
        if(!$p->{'_ok_'}){
            $api->warn("Could not retrieve backlinks from iterator: ".$p->{'error'}."\n");
            return 60;
        }
        my $title=$p->{'title'};
        next unless $title=~/^Wikipedia(?: talk)?:Arbitration Committee\/Noticeboard(?:\/Archive (\d+))?$/;
        $scanlinks{$title}=1;
    }

    # Find all links to relevant pages, with their anchors.
    foreach my $title (keys %scanlinks) {
        return 0 if $api->halting;
        $api->log("Scanning links in $title");

        my $tok=$api->edittoken($title);
        if($tok->{'code'} eq 'shutoff'){
            $api->warn("Task disabled: ".$tok->{'content'}."\n");
            return 300;
        }
        if($tok->{'code'} ne 'success'){
            $api->warn("Failed to get edit token for $title: ".$tok->{'error'});
            return 60;
        }
        my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'};

        # Find all relevant anchors
        my %anchors = ();
        while ( $intxt=~/\[\[([^#|\]]++)#([^|\]]++)(?=\||\]\])/g ) {
            my ($p,$a) = ($1,$2);
            next unless exists($acn{fixpage($p)});
            $anchors{$a} = undef;
        }
        my $fail = 0;
        $api->process_templates( $intxt, sub {
            return undef if $fail;

            my $name = shift;
            my $params = shift;

            return undef unless exists($slink{$name}) || exists($slink{"Template:$name"});
            my ($p,$a);
            ($p,$a,$fail) = procparams( $api, $params );
            return undef if $fail;

            return undef unless exists($acn{fixpage($p)});

            $anchors{$a} = undef;
            return undef;
        } );
        if ( $fail ) {
            $api->warn( "{{slink}} with multiple sections is not supported in $title\n" );
            next;
        }
        if ( !%anchors ) {
            $api->log("No links to check in $title");
            next;
        }

        # Map all the anchors to encoded versions for $WPmap/$WTmap
        my @anchors = sort keys %anchors;
        my $txt = '';
        for ( my $i = 0; $i < @anchors; $i++ ) {
            $txt .= "$i:{{anchorencode:$anchors[$i]}}\n";
        }
        $res=$api->query(action=>'expandtemplates',title=>$title,text=>$txt,prop=>'wikitext');
        if($res->{'code'} ne 'success'){
            $api->warn("Failed to retrieve anchor mapping for $title: ".$res->{'error'});
            return 60;
        }
        foreach my $l (split /\n/, $res->{'expandtemplates'}{'wikitext'}) {
            unless($l=~/^(\d+):(.+)$/){
                $api->warn("Invalid response checking anchor mapping in $title");
                return 60;
            }
            $anchors{$anchors[$1]} = $2;
        }

        # Replace all relevant links. Flag them with ENQ (U+0005)
        my $outtxt = $intxt;
        my sub repl {
            my ( $z, $p, $a ) = @_;

            $p = fixpage($p);
            return $z unless exists($acn{$p});

            my $new = !exists($anchors{$a}) ? '' : ($p=~/^Wikipedia:/i) ? ($WPmap{$anchors{$a}}//'') : ($WTmap{$anchors{$a}}//'');
            if ( $new eq '' ) {
                $api->warn("No mapping for \"$a\" in $title");
                return $z;
            }
            return $z unless $new =~ m{/Archive \d+$};
            return "\x05[[$new#$a";
        };
        $outtxt =~ s/\[\[([^#|\]]++)#([^|\]]++)(?=\||\]\])/repl($&,$1,$2)/ge;
        $outtxt = $api->process_templates( $outtxt, sub {
            my $name = shift;
            my $params = shift;
            shift;
            shift;
            my $oname = shift;

            return undef unless exists($slink{$name}) || exists($slink{"Template:$name"});
            my ($p,$a,$fail,@p) = procparams( $api, $params );
            $p = fixpage($p);
            return undef unless exists($acn{$p});

            my $new = !exists($anchors{$a}) ? '' : ($p=~/^Wikipedia:/i) ? ($WPmap{$anchors{$a}}//'') : ($WTmap{$anchors{$a}}//'');
            if ( $new eq '' ) {
                $api->warn("No mapping for \"$a\" in $title");
                return undef;
            }
            return undef unless $new =~ m{/Archive \d+$};

            unshift @p, $a=~/=/ ? "2=$a" : $a;
            unshift @p, $new;
            return "\x05{{$oname|" . join( '|', @p ) . '}}';
        } );

        # Adjust some text, using the ENQ placeholders to flag it. Then remove the ENQs.
        $outtxt=~s/Discuss this at: (?=.*\x05)/Archived discussion at: /g;
        $outtxt=~s/(\x05.*)(?:Discussion|Discuss|Discuss (?:this|announcement|report))/${1}Archived discussion/g;
        $outtxt=~s/\x05//g;

        if($intxt ne $outtxt){
            $api->log("Adjusting links to archived content in $title");
            $res=$api->edit($tok, $outtxt, "Adjusting links to archived content", 1, 1);
            if($res->{'code'} ne 'success'){
                $api->warn("Failed to edit $title: ".$res->{'error'}."\n");
            }
        }
    }

    return 3600;
}

sub fixpage {
    my $p = shift;
    $p=~s/[ _]+/ /g;
    $p=~s/^(?:WP|Project):/Wikipedia:/i;
    $p=~s/^(?:WT|Project talk):/Wikipedia talk:/i;
    return $p;
}

sub procparams {
    my ($api, $params) = @_;
    my ($p,$a);
    my @p = ();
    my $fail = 0;

    foreach ( $api->process_paramlist( @$params ) ) {
        if ( $_->{'name'} eq '1' ) {
            $p = $_->{'value'};
        } elsif ( $_->{'name'} eq '2' ) {
            $a = $_->{'value'};
        } else {
            $fail = 1 if $_->{'name'} =~ /^\d+$/;
            push @p, $_->{'text'};
        }
    }

    if ( $p =~ s/#(.*)$// ) {
        $fail = 1 if defined( $a );
        $a = $1;
    }

    return ($p,$a,$fail,@p);
}

1;