User:AnomieBOT/source/tasks/DatedCategoryCreator.pm

package tasks::DatedCategoryCreator;

=pod

=begin metadata

Bot:      AnomieBOT
Task:     DatedCategoryCreator
BRFA:     Wikipedia:Bots/Requests for approval/AnomieBOT 64
Status:   Approved 2012-05-30
+BRFA:     Wikipedia:Bots/Requests for approval/AnomieBOT 65
+Status:   Approved 2012-06-04
Created:  2012-05-07

Create needed categories under [[:Category:Wikipedia maintenance categories sorted by month]] and [[:Category:Wikipedia categories sorted by month]].

=end metadata

=cut

use utf8;
use strict;

use POSIX;
use Data::Dumper;
use AnomieBOT::API;
use AnomieBOT::Task qw/:time bunchlist/;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

# If any categories do not follow the "CATNAME from MONTH" paradigm, list them
# here.
my %catmap=(
    'Uncategorized pages' => 'Uncategorized from ',
    'Articles needing expert attention by month' => 'Articles needing expert attention from ',
    'Wikipedia articles with style issues by month' => 'Wikipedia articles with style issues from ',
);
my %rcatmap=reverse %catmap;

# Categories to skip entirely
my %skipcat=(
    'Articles with invalid date parameter in template' => 1
);

# Categories to allow old dates
my %oldcats=(
    'Wikipedia articles in need of updating' => 1
);

# Categories to allow future dates, as well as the valid timespan in seconds
# (otherwise, the default is 86400)
my %futurecats=(
    'Articles containing potentially dated statements' => 31622400,
    'Articles that include images for deletion' => 8*86400,
    'Clean-up categories' => 31622400,
);

# Non-config globals
my @months=qw/January February March April May June July August September October November December/;
my $monthre=join('|', @months);
$monthre=qr/$monthre/;

my %monthnum=();
for(my $i=0; $i<@months; $i++){
    $monthnum{$months[$i]}=$i+1;
}

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    $self->{'iter'}=undef;
    bless $self, $class;
    return $self;
}

=pod

=for info
Approved 2012-05-30.<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 64]]

=for info
Supplemental BRFA approved 2012-06-04.<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 65]]

=cut

sub approved {
    return 2;
}

sub run {
    my ($self, $api)=@_;
    my ($res, $iter);

    $api->task('DatedCategoryCreator',0,0,qw/d::Talk d::Redirects d::Templates/);

    my %did=();

    my @t=gmtime(time()+86400);
    my $curmonth=strftime('%B %Y', @t);

    # Part 1: Create needed categories for the current month
    for my $cat ('Wikipedia categories sorted by month', 'Wikipedia maintenance categories sorted by month'){
        $res=$api->query(
            generator    => 'categorymembers',
            gcmtitle     => "Category:$cat",
            gcmnamespace => 14,
            gcmtype      => 'subcat',
            gcmlimit     => 'max',
        );
        if($res->{'code'} ne 'success'){
            $api->warn("Failed to get members of $cat: ".$res->{'error'}."\n");
            return 60;
        }
        $iter=$api->iterator(
            titles => bunchlist(500, map monthly_for_cat($_->{'title'}, $curmonth), values %{$res->{'query'}{'pages'}}),
        );
        while($_=$iter->next){
            return 0 if $api->halting;

            if(!$_->{'_ok_'}){
                $api->warn("Failed to retrieve members in ".$iter->iterval.": ".$_->{'error'}."\n");
                return 60;
            }

            next unless exists($_->{'missing'});

            $did{$_->{'title'}}=1;
            $res=$self->make_dated_cat($api, $_->{'title'}, 'current month');
            return $res if $res;
        }
    }

    # Part 2: Look through pages in [[:Category:Articles with invalid date
    # parameter in template]] for more categories to create
    $iter=$api->iterator(
        generator => 'categorymembers',
        gcmtitle  => 'Category:Articles with invalid date parameter in template',
        gcmlimit  => '500',
        prop      => 'categories',
        cllimit   => 'max',
    );
    while(my $p=$iter->next){
        if(!$p->{'_ok_'}){
            $api->warn("Could not retrieve category members from iterator: ".$p->{'error'}."\n");
            return 60;
        }
        for my $c (@{$p->{'categories'} // []}) {
            # Minimal sanity check; make_dated_cat() will do more
            next unless $c->{'title'}=~/ $monthre \d{4}$/;
            next if exists($did{$c->{'title'}});
            $did{$c->{'title'}}=1;
            $res=$self->make_dated_cat($api, $c->{'title'}, 'non-empty month');
            return $res if $res;
        }
    }

    # Part 3: Look for missing dated categories beginning with "Clean-up categories from".
    $iter = $api->iterator(
        list     => 'allcategories',
        acprefix => 'Clean-up categories from ',
        acprop   => 'size|hidden',
        aclimit  => 'max',
        formatversion => 2,
    );
    while ( my $c = $iter->next ) {
        if ( ! $c->{'_ok_'} ) {
            $api->warn( "Could not retrieve category list from iterator: " . $c->{'error'} . "\n" );
            return 60;
        }
        next unless $c->{'category'} =~ / $monthre \d{4}$/;
        next if $c->{'hidden'} || $c->{'size'} <= 0;
        next if exists( $did{$c->{'title'}} );
        $did{$c->{'title'}}=1;
        $res = $self->make_dated_cat( $api, 'Category:' . $c->{'category'}, 'non-empty month' );
        return $res if $res;
    }

    # No more pages to check for now
    return 7200;
}

# Figure out the name for the monthly cat based on the parent and the month
sub monthly_for_cat {
    my ($title, $month)=@_;

    my $t=$title;
    $t=~s/^Category://;
    return () if exists($skipcat{$t});
    return 'Category:'.($catmap{$t} // "$t from ").$month;
}

# Create the dated category, if it seems to be sane
sub make_dated_cat {
    my ($self,$api,$title,$for)=@_;

    # Figure out the parent cat, and calculate the parameters for {{Monthly
    # clean-up category}}.
    my $parent=$title;
    my $tmplparams='';
    $parent=~s/^Category://;
    $parent=~s/($monthre) (\d{4})$//;
    my ($m,$y)=($1,$2);
    if(exists($rcatmap{$parent})){
        $parent=$rcatmap{$parent};
        $tmplparams.="|cat=$parent";
    } else {
        return 0 unless $parent=~s/ from $//;
        return 0 if exists($skipcat{$parent});
    }
    my $dt = $futurecats{$parent} // 86400;
    my @t=gmtime(time()+$dt);
    return 0 if $y>$t[5]+1900;
    return 0 if($y==$t[5]+1900 && $monthnum{$m}>$t[4]+1);
    return 0 if(!exists($oldcats{$parent}) && $y<2010);

    # Check whether the parent cat actually exists and is a subcat of the meta
    # cat.
    my $res=$api->query(
        titles       => "Category:$parent",
        prop         => 'categories|templates',
        clcategories => 'Category:Wikipedia maintenance categories sorted by month|Category:Wikipedia categories sorted by month',
        tltemplates  => 'Template:Parent monthly maintenance category',
    );
    if($res->{'code'} ne 'success'){
        $api->warn("Failed to check: ".$res->{'content'}."\n");
        return 300;
    }
    $res=(values %{$res->{'query'}{'pages'}})[0];
    if(exists($res->{'missing'})){
        $api->warn("Did not create category $title, because parent category Category:$parent does not exist");
        return 0;
    }
    my ($type, $txt);
    if(grep $_->{'title'} eq 'Category:Wikipedia maintenance categories sorted by month', @{$res->{'categories'}}){
        if ( grep $_->{'title'} eq 'Template:Parent monthly maintenance category', @{$res->{'templates'}} ) {
            $type='maintenance category';
            $txt="{{Monthly maintenance category$tmplparams}}";
        } else {
            $type='clean-up category';
            $txt="{{Monthly clean-up category$tmplparams}}";
        }
    } elsif(grep $_->{'title'} eq 'Category:Wikipedia categories sorted by month', @{$res->{'categories'}}){
        $type='category';
        $txt="{{Monthly maintenance category$tmplparams}}";
    } else {
        $api->warn("Did not create category $title, because parent category $parent is not in Category:Wikipedia maintenance categories sorted by month or Category:Wikipedia categories sorted by month");
        return 0;
    }

    # Ok, create it!
    my $tok=$api->edittoken($title, EditRedir=>1);
    if($tok->{'code'} eq 'shutoff'){
        $api->warn("Task disabled: ".$tok->{'content'}."\n");
        return 300;
    }
    if($tok->{'code'} eq 'pageprotected' || $tok->{'code'} eq 'botexcluded'){
        # Skip protected and excluded pages
        return 0;
    }
    if($tok->{'code'} ne 'success'){
        $api->warn("Failed to get edit token for $title: ".$tok->{'error'}."\n");
        return 0;
    }
    return 0 unless exists($tok->{'missing'});

    $api->log("Creating monthly dated $type for $for in $title");
    my $r=$api->edit($tok, $txt, "Creating monthly dated $type for $for", 1, 1);
    if($r->{'code'} ne 'success'){
        $api->warn("Write failed on $title: ".$r->{'error'}."\n");
        return 0;
    }

    return undef;
}

1;