User:AnomieBOT/source/tasks/CategoryCleaner.pm

package tasks::CategoryCleaner;

=pod

=begin metadata

Bot:      AnomieBOT
Task:     CategoryCleaner
BRFA:     Wikipedia:Bots/Requests for approval/AnomieBOT 36
Status:   Approved 2009-12-13
Created:  2009-11-09

Remove pages from categories where the page does not meet the page inclusion
criteria. For example, [[:Category:Wikipedia redirects]] is explicitly stated
that it shouldn't contain any actual redirects.

=end metadata

=cut

use utf8;
use strict;

use Data::Dumper;
use AnomieBOT::Task;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

my @defn=(
    {
        iter => {
            generator   => 'categorymembers',
            gcmtitle    => 'Category:Wikipedia redirects',
            gcmlimit    => '100',
            prop        => 'info',
        },
        validate => sub {
            my $api=shift;
            my $page=shift;
            return exists($page->{'redirect'});
        },
        tokopt => { EditRedir => 1 },
        edit => sub {
            my $api=shift;
            my $tok=shift;
            my $txt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'};
            $txt=~s/\[\[\s*(?i:Category)\s*:\s*[wW]ikipedia redirects\s*(?:\|.*?)?\]\]\s*//;
            return ($txt,"Removing [[Category:Wikipedia redirects]] as that category is not intended for actual redirects");
        },
    },
);

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    $self->{'defn'}=[@defn];
    $self->{'def'}=undef;
    $self->{'iter'}=undef;
    bless $self, $class;
    return $self;
}

=pod

=for info
Approved 2009-12-13<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 36]]

=cut

sub approved {
    return 3;
}

sub run {
    my ($self, $api)=@_;
    my $res;

    $api->task('CategoryCleaner',0,10,qw//);

    my $endtime=time()+300;
    while(1){
        my $def=$self->{'def'};
        my $iter=$self->{'iter'};
        if(!defined($iter)){
            $def=shift @{$self->{'defn'}};
            last unless $def;
            $iter=$api->iterator(%{$def->{'iter'}});
            $self->{'iter'}=$iter;
            $self->{'def'}=$def;
        }
        while(my $page=$iter->next()){
            if(!$page->{'_ok_'}){
                $api->warn("Could not retrieve page from iterator: ".$page->{'error'}."\n");
                return 60;
            }

            next unless $def->{'validate'}($api,$page);

            my $title=$page->{'title'};
            my $tok=$api->edittoken($title, %{$def->{'tokopt'}});
            if($tok->{'code'} eq 'shutoff'){
                $api->warn("Task disabled: ".$tok->{'content'}."\n");
                return 300;
            }
            if($tok->{'code'} ne 'success'){
                $api->warn("Failed to get edit token for $title: ".$tok->{'error'}."\n");
                next;
            }

            my ($txt,$log)=$def->{'edit'}($api,$tok);
            next if(!defined($txt) || $txt eq $tok->{'revisions'}[0]{'slots'}{'main'}{'*'});

            $api->log("Editing $title: $log");
            $res=$api->edit($tok, $txt, $log, 1, 0);
            if($res->{'code'} ne 'success'){
                $api->warn("Failed to edit $title: ".$res->{'error'}."\n");
                next;
            }
            return 0 if time()>=$endtime;
        }
        $self->{'def'}=undef;
        $self->{'iter'}=undef;
    }

    $self->{'def'}=undef;
    $self->{'iter'}=undef;
    $self->{'defn'}=[@defn];
    return 21600;
}

1;