User:AnomieBOT/source/tasks/PeerReviewArchiver.pm

package tasks::PeerReviewArchiver;

=pod

=begin metadata

Bot:     AnomieBOT
Task:    PeerReviewArchiver
BRFA:    Wikipedia:Bots/Requests for approval/AnomieBOT 76
Status:  Approved 2017-02-18
Created: 2017-01-22

Archive old requests for Peer Review:
* Collect titles and archive indexes from [[:Category:Current peer reviews]]:
** After three months if listed at [[Template:FAC peer review sidebar]] or
  [[:Template:Peer review/Unanswered peer reviews sidebar]] or have only one contributor.
** After one month otherwise.
* For each page where both the talk page and the PR archive page exist, and the
  conditions above are satisfied, replace <code><nowiki>{{Peer review|archive=$index}}</nowiki></code> with
  <code><nowiki>{{subst:Close peer review|archive=$index}}</nowiki></code> on the talk
  page and <code><nowiki>{{Peer review page}}</nowiki></code> with
  <code><nowiki>{{Closed peer review page}}</nowiki></code> on the PR.

=end metadata

=cut

use utf8;
use strict;

use AnomieBOT::Task;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

use POSIX qw/strftime/;
use Data::Dumper;

my @months=qw/January February March April May June July August September October November December/;
my $monthre=join('|', @months);
$monthre=qr/$monthre/;
my %monthnum=();
for(my $i=0; $i<@months; $i++){
    $monthnum{$months[$i]}=$i+1;
}

sub new {
    my $class=shift;
    my $self=$class->SUPER::new;
    bless $self, $class;
    return $self;
}

=pod

=for info
BRFA approved 2017-02-18<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 76]]

=cut

sub approved {
    return 2;
}

sub run {
    my ($self, $api)=@_;

    my $screwup=' Errors? [[User:'.$api->user.'/shutoff/PeerReviewArchiver]]';

    $api->task('PeerReviewArchiver', 0, 10, qw(d::Templates d::Timestamp d::Redirects));

    my $endtime=time()+300;

    my %r = $api->redirects_to_resolved( 'Template:Peer review', 'Template:Peer review page' );
    if(exists($r{''})){
        $api->warn("Failed to get redirects: ".$r{''}{'error'}."\n");
        return 60;
    }
    my $templatePeerReview = $r{'Template:Peer review'};
    my $templatePeerReviewPage = $r{'Template:Peer review page'};

    my %pages = ();

    my @now = gmtime;
    my $curMonth = $now[4] + 1;
    $now[4]--;
    my $y = strftime( '%Y', @now );
    my $expiry1 = strftime( '%FT%TZ', @now );
    $now[4]-=2;
    my $expiry3 = strftime( '%FT%TZ', @now );

    # This category holds peer review pages. Filter by expiry.
    my $iter = $api->iterator(
        generator => 'categorymembers',
        gcmtitle => 'Category:Current peer reviews',
        gcmnamespace => 4,
        gcmtype => 'page',
        gcmlimit => 'max',
        prop => 'revisions',
        rvprop => 'timestamp',
        formatversion => 2,
    );
    while ( my $p = $iter->next ) {
        return 0 if $api->halting;

        if(!$p->{'_ok_'}){
            $api->warn("Failed to retrieve pages in [[Category:Current peer reviews]]: ".$p->{'error'}."\n");
            return 300;
        }

        if ( $p->{'title'} =~ /^Wikipedia:Peer review\/(.+)\/archive(\d+)$/ ) {
            $pages{$1 . '#' . $2} = {
                title => $1,
                index => $2,
                isNew => $p->{'revisions'}[0]{'timestamp'} ge $expiry1,
                facSidebar => 0,
                unansweredSidebar => 0,
                closed => 0,
            };
        }
    }

    # Requested to not process PRs listed at [[Template:FAC peer review sidebar]]
    # at [[User talk:Anomie#AnomieBot closing peer reviews - request for update]].
    # Later updated at [[WT:Peer review#About a change to Wikipedia:Peer review/Guidelines]].
    $iter = $api->iterator(
        generator => 'links',
        titles => 'Template:FAC peer review sidebar',
        gplnamespace => 4,
        gpllimit => 'max',
        formatversion => 2,
    );
    while ( my $p = $iter->next ) {
        return 0 if $api->halting;

        if(!$p->{'_ok_'}){
            $api->warn("Failed to retrieve pages linked from [[Template:FAC peer review sidebar]]: ".$p->{'error'}."\n");
            return 300;
        }

        if ( $p->{'title'} =~ /^Wikipedia:Peer review\/(.+)\/archive(\d+)$/ ) {
            if ( exists( $pages{$1 . '#' . $2} ) ) {
                $pages{$1 . '#' . $2}{'facSidebar'} = 1;
            }
        }
    }

    # Requested to not process PRs listed at [[Template:Peer review/Unanswered peer reviews sidebar]]
    # at [[User talk:AnomieBOT#Further requests for change at Wikipedia peer review]].
    # Later updated at [[WT:Peer review#About a change to Wikipedia:Peer review/Guidelines]].
    $iter = $api->iterator(
        generator => 'links',
        titles => 'Template:Peer review/Unanswered peer reviews sidebar',
        gplnamespace => 4,
        gpllimit => 'max',
        formatversion => 2,
    );
    while ( my $p = $iter->next ) {
        return 0 if $api->halting;

        if(!$p->{'_ok_'}){
            $api->warn("Failed to retrieve pages linked from [[Template:Peer review/Unanswered peer reviews sidebar]]: ".$p->{'error'}."\n");
            return 300;
        }

        if ( $p->{'title'} =~ /^Wikipedia:Peer review\/(.+)\/archive(\d+)$/ ) {
            if ( exists( $pages{$1 . '#' . $2} ) ) {
                $pages{$1 . '#' . $2}{'unansweredSidebar'} = 1;
            }
        }
    }

    my $err = 0;
    foreach my $p (values %pages) {
        return 0 if $api->halting;

        next if $p->{'isNew'};

        $api->log("Checking $p->{title} archive $p->{index}");

        my $talk = 'Talk:' . $p->{'title'};
        my $pr = 'Wikipedia:Peer review/' . $p->{'title'} . '/archive' . $p->{'index'};

        my $tok1 = $api->edittoken( $talk, EditRedir => 1 );
        if($tok1->{'code'} eq 'shutoff'){
            $api->warn("Task disabled: " . $tok1->{'content'} . "\n");
            return 300;
        }
        if($tok1->{'code'} ne 'success'){
            $api->warn("Failed to retrieve edit token for $talk: " . $tok1->{'error'});
            $err = 1;
            next;
        }

        my $tok2 = $api->edittoken( $pr, EditRedir => 1 );
        if($tok2->{'code'} eq 'shutoff'){
            $api->warn("Task disabled: " . $tok2->{'content'} . "\n");
            return 300;
        }
        if($tok2->{'code'} ne 'success'){
            $api->warn("Failed to retrieve edit token for $pr: " . $tok2->{'error'});
            $err = 1;
            next;
        }

        if ( exists( $tok1->{'missing'} ) ) {
            $api->warn( "Skipping $p->{title} archive $p->{index}, $talk does not exist" );
            next;
        }
        if ( exists( $tok2->{'missing'} ) ) {
            $api->warn( "Skipping $p->{title} archive $p->{index}, $pr does not exist" );
            next;
        }

        if ( exists( $tok1->{'redirect'} ) ) {
            $api->warn( "Skipping $p->{title} archive $p->{index}, $talk is a redirect" );
            next;
        }
        if ( exists( $tok2->{'redirect'} ) ) {
            $api->warn( "Skipping $p->{title} archive $p->{index}, $pr is a redirect" );
            next;
        }

        my $why = '';

        # Close conditions:
        #  * On FAC sidebar and over 3 months old
        #  * On unanswered sidebar and over 3 months old
        #  * Only one contributor and over 3 months old
        #  * None of the cases above and over 1 month old.
        # See [[WT:Peer review#About a change to Wikipedia:Peer review/Guidelines]]
        if ( $p->{'facSidebar'} ) {
            if ( $tok2->{'revisions'}[0]{'timestamp'} ge $expiry3 ) {
                $api->warn( "Skipping $p->{title} archive $p->{index}, not expired yet (on FAC sidebar, " . $tok2->{'revisions'}[0]{'timestamp'} . " >= $expiry3)" );
                next;
            }
            $why = 'FAC sidebar but over 3 months old';
        } elsif ( $p->{'unansweredSidebar'} ) {
            if ( $tok2->{'revisions'}[0]{'timestamp'} ge $expiry3 ) {
                $api->warn( "Skipping $p->{title} archive $p->{index}, not expired yet (on unanswered sidebar, " . $tok2->{'revisions'}[0]{'timestamp'} . " >= $expiry3)" );
                next;
            }
            $why = 'unanswered sidebar but over 3 months old';
        } else {
            my $res = $api->query(
                [ 'contributors' ],
                titles => $pr,
                prop => 'contributors',
                pclimit => 2,
                formatversion => 2,
            );
            if($res->{'code'} ne 'success'){
                $api->warn("Failed to retrieve contributor count for $pr: " . $res->{'error'});
                $err = 1;
                next;
            }
            my $ct = ( $res->{'query'}{'pages'}[0]{'anoncontributors'} // 0 ) + @{$res->{'query'}{'pages'}[0]{'contributors'} // []};

            if ( $ct <= 1 ) {
                if ( $tok2->{'revisions'}[0]{'timestamp'} ge $expiry3 ) {
                    $api->warn( "Skipping $p->{title} archive $p->{index}, only one contributor to $pr and " . $tok2->{'revisions'}[0]{'timestamp'} . " >= $expiry3" );
                    next;
                }
                $why = 'unanswered but over 3 months old';
            } else {
                if ( $tok2->{'revisions'}[0]{'timestamp'} ge $expiry1 ) {
                    $api->warn( "Skipping $p->{title} archive $p->{index}, not expired yet (" . $tok2->{'revisions'}[0]{'timestamp'} . " >= $expiry1)" );
                    next;
                }
                $why = 'answered, not on sidebars, and over 1 month old';
            }
        }

        # Save the talk page first, then the PR, because editing the PR resets
        # the month timer so both pages would disappear from both cats.

        my $intxt1 = $tok1->{'revisions'}[0]{'slots'}{'main'}{'*'};
        my $outtxt1 = $api->process_templates( $intxt1, sub {
            my $name=shift;
            my $params=shift;
            return undef unless ($r{"Template:$name"} // '') eq $templatePeerReview;
            return '{{subst:Close peer review' . (@$params ? '|' . join( '|', @$params ) : '' ) . '}}';
        } );

        my $intxt2 = $tok2->{'revisions'}[0]{'slots'}{'main'}{'*'};
        my $outtxt2 = $api->process_templates( $intxt2, sub {
            my $name=shift;
            return undef unless ($r{"Template:$name"} // '') eq $templatePeerReviewPage;
            return '{{Closed peer review page}}';
        } );

        if ( $intxt1 ne $outtxt1 ) {
            my $res = $api->edit($tok1, $outtxt1, "Archive expired peer review ($why). $screwup", 0, 0);
            if($res->{'code'} ne 'success'){
                $api->warn("Write for $talk failed: ".$res->{'error'});
                $err = 1;
                next;
            } else {
                $api->log("Archived peer review on $talk ($why)");
            }
        }

        if ( $intxt2 ne $outtxt2 ) {
            my $res = $api->edit($tok2, $outtxt2, "Archive expired peer review ($why). $screwup", 0, 0);
            if($res->{'code'} ne 'success'){
                $api->warn("Write for $pr failed: ".$res->{'error'});
                $err = 1;
                next;
            } else {
                $api->log("Archived peer review on $pr ($why)");
            }
        }

        $p->{'closed'} = 1;
    }

    if ( 0 ) { # Not consensus to enable yet
        # Clean up sidebars
        for my $title ( 'Template:Peer review/Unanswered peer reviews sidebar', 'Template:FAC peer review sidebar' ) {
            my $tok = $api->edittoken( $title );
            if($tok->{'code'} eq 'shutoff'){
                $api->warn("Task disabled: " . $tok->{'content'} . "\n");
                return 300;
            }
            if($tok->{'code'} ne 'success'){
                $api->warn("Failed to retrieve edit token for $title: " . $tok->{'error'});
                $err = 1;
                next;
            }

            my $intxt = $tok->{'revisions'}[0]{'slots'}{'main'}{'*'};
            my ($text,$nowiki)=$api->strip_nowiki($intxt);
            my $outtxt = '';
            for my $line ( split( /^/m, $text ) ) {
                # We only process lines like
                #  * '''Month dd''' [[Wikipedia:Peer review/<title>/archive<n>]]
                #  * '''Month dd''' [[Wikipedia:Peer review/<title>/archive<n>|<text>]]
                unless ( $line =~ /^\*\s*'''($monthre)\s+([0123]?[0-9])'''\s+\[\[\s*(?i:Wikipedia|WP)\s*:\s*Peer[\s_]+review\/(.+)\/archive(\d+)\s*(?:\|[^]]*)?\]\]\s*$/ ) {
                    $outtxt .= $line;
                    next;
                }
                my ( $m, $d, $t, $i ) = ( $1, $2, $3, $4 );
                $t=~s/[\s_]+/ /g;
                if (
                    # Skip if it was in the category and wasn't just closed.
                    exists( $pages{"$t#$i"} ) && ! $pages{"$t#$i"}->{'closed'} ||
                    # Skip if we didn't see it in the category but it's dated in the past month. This will also skip last year etc, but better safe IMO.
                    ! exists( $pages{"$t#$i"} ) && sprintf( "%04d-%02d-%02dT00:00:00Z", $monthnum{$m} > $curMonth ? $y - 1 : $y, $monthnum{$m}, $d ) ge $expiry1
                ) {
                    $outtxt .= $line;
                    next;
                }
            }
            $outtxt = $api->replace_nowiki($outtxt,$nowiki);

            if ( $intxt ne $outtxt ) {
                my $res = $api->edit($tok, $outtxt, "Remove missing/expired peer reviews. $screwup", 0, 0);
                if($res->{'code'} ne 'success'){
                    $api->warn("Write for $title failed: ".$res->{'error'});
                    $err = 1;
                } else {
                    $api->log("Cleaned up $title");
                }
            }
        }
    }

    return $err ? 300 : 43200;
}

1;