#!/usr/bin/perl
use Carp qw(cluck confess);
use Memoize;
use warnings FATAL => qw(uninitialized);
use warnings;
use strict qw(subs refs);
# does not do "most articles first day"!
if (defined($ARGV[0]) && ($ARGV[0] ne '-')) {
(open(INPUT,$ARGV[0]) || (die "Can't open $ARGV[0] for input: $!; stopped"));
} else {
(open(INPUT,"-") || (die "Can't dup STDIN for input: $!; stopped"));
}
if (defined($ARGV[1]) && ($ARGV[1] ne '-')) {
(open(OUTPUT,">>" . $ARGV[1]) || (die "Can't open $ARGV[1] for output: $!; stopped"));
} else {
(open(OUTPUT,">-") || (die "Can't dup STDOUT for output: $!; stopped"));
}
$largest_article_size = 0;
$largest_article_user = "";
$largest_article_name = "";
%size_poss_ties = ();
%user_header = (); # without =
%user_first_letter = ();
%user_article_size = ();
%user_total_articles = ();
%user_total_words = ();
%user_total_5k = ();
%user_10k_articles = ();
%user_rollover_words = ();
%user_alias = ();
%user_0k_articles = ();
#^=====???=====
#^{{GOCE Article list
# |articles =
# # {{[Cc]ompleted}} [[???]] (###)
# # {{[Ww]orking}} [[???]] (###) <- optional
# |total-articles = ##
# |total-words = ###
# |rollover-words = ###
# }}
#^----
#^====?====
$curr_first_letter = "";
$user = "";
$user_num = 0;
$expecting = "dashes";
$in_comment = 0;
LINE: while (defined($line = <INPUT>)) {
chomp($line);
if ($line =~ m/^====([0-9a-zA-Z])====$/) {
$curr_first_letter = uc($1);
} elsif (($line =~ m/^\s*$/) || ($line =~ m/^===Totals===\s*$/) ||
($line =~ m/^\s*\{\{CompactTOC8\b/i)) {
next LINE;
} elsif ($line =~ m/^\s*<!--/) {
unless ($line =~ m/-->/) {
$in_comment = 1;
}
next LINE;
} elsif ($in_comment) {
if ($line =~ m/-->/) {
$in_comment = 0;
}
next LINE;
} elsif ($line =~ m/^\s*\#\s*\{\{\s*[Ww]orking\b/) {
next LINE;
} elsif ($line =~ m/^=====((?:[^=]+|.)+?)=====\s*$/) {
my $header = $1;
$header =~ s/^\s+//;
$header =~ s/\s+$//;
unless (($expecting eq "header") || ($expecting eq "dashes")) {
warn "Unexpected header '$header' (expected $expecting)\n";
}
$user = $header;
my $alias = $user;
my $found = 0;
if ($header =~ m/\[\[\s*User:([^\]\|]+)/i) {
$user = $1;
$found = 1;
$alias = $user;
my $qm_user = quotemeta($user);
if ($header =~ m/\[\[\s*User:$qm_user\|\s*([\s\w\.]+)\]\]/) {
$alias = $1;
$alias =~ s/\s+$//;
}
} elsif ($header =~ m/\[\[\s*User[\s_][Tt]alk:([^\]\|]+)/i) {
$user = $1;
$found = 1;
$alias = $user;
} elsif ($header =~ m/^([0-9A-Za-z])$/) {
$curr_first_letter = uc($1);
next LINE;
} else {
warn "Can't distinguish user in '$header'\n";
}
if (exists($user_header{$user})) {
die "Duplicate user '$user' (line '$line'); stopped";
}
$user_alias{$user} = $alias;
if ($found) {
$user_header{$user} = "[[User:$user|$alias]] ("
. "[[User talk:$user|talk]])";
if ($user =~ m/^[^0-9A-Za-z]*([0-9A-Za-z])/) {
$curr_first_letter = uc($1);
}
} else {
$user_header{$user} = $header;
}
$user_first_letter{$user} = $curr_first_letter;
$user_total_articles{$user} = 0;
$user_total_words{$user} = 0;
$user_total_5k{$user} = 0;
$user_0k_articles{$user} = 0;
$user_rollover_words{$user} = 0;
$expecting = "GOCE";
$user_num{$user} = $user_num;
$user_num++;
} elsif ($line =~ m/^----\s*$/) {
unless (($expecting eq "dashes") || ($expecting eq "rollover")) {
warn "$user; Unexpected dashes '$line' (expecting $expecting)\n";
}
$expecting = "header";
} elsif (! length($user)) {
warn "Unexpected line '$line' - no current user\n";
} elsif ($line =~ m/^\{\{\s*GOCE\s+Article\s+list\s*$/) {
unless ($expecting eq "GOCE") {
warn "$user: Unexpected GOCE: '$line' (expecting $expecting)\n";
}
$expecting = "articles";
} elsif ($line =~ m/^\s*\|\s*articles\s*=\s*$/) {
unless ($expecting eq "articles") {
warn "$user: Unexpected articles: '$line'\n";
}
$expecting = "Completed";
} elsif ($line =~
m/^\s*\#\s*(?:<[Ss]>\s*)?\{\{\s*[Cc]ompleted\s*\}\}\s*
\'*\[\[((?:[^]]+|.)*?)\]\]\'*\s*
(?:\(section\)\s*)?(\([^)]*\)|[\d,]+)\s*(.*)$/x) {
my $article = $1;
my $words = $2;
my $rest = $3;
unless ($expecting eq "Completed") {
warn "$user: Unexpected Completed: '$line' (expecting $expecting)\n";
}
if (defined($rest) && length($rest) &&
($rest =~ m/(?:\{\{rejected\}\}|decline|denied|disagree|invalid|
\bnot\b|redflag|\bX\b|Cross|\bN[ao]?\b|
nay|negative)/xi)) {
warn "$user: Skipping '$line' and subtracting 1200 due to '$rest'\n";
$user_total_words{$user} -= 1200;
next LINE;
}
unless (defined($article) && length($article) && defined($words) &&
length($words)) {
$expecting = "Completed";
next LINE;
}
$words =~ s/^\s*[\\(]+\s*//;
$words =~ s/\s*[\\)]+\s*$//;
$words =~ s/,+//g;
if ($words =~ m/^\s*(?:app\w*?x\.?\s*)?(\d+)(?:\s*words)?\s*$/i) {
$words = ($1+0);
} elsif (! length($words)) {
$words = 0;
} elsif ($words =~ m/^\s*zero\s*$/i) {
$words = 0;
} else {
warn "$user: Can't interpret words '$words' from '" . $line
. "'; treating as 0\n";
$words = 0;
}
if ($words > $largest_article_size) {
$largest_article_size = $words;
$largest_article_user = $user;
$largest_article_name = $article;
} elsif ($words && ($words == $largest_article_size)) {
$size_poss_ties{$words} = 1;
}
$user_article_size{$user}{$article} = $words;
$user_total_articles{$user}++;
$user_total_words{$user} += $words;
if ($words == 0) {
$user_0k_articles{$user}++;
}
if ($words >= 5000) {
$user_total_5k{$user} += int($words/5000);
}
if ($words >= 10000) {
$user_10k_articles{$user}{$article} = $words;
}
$expecting = "Completed";
} elsif ($line =~ m/^\s*\#\s*\{\{\s*[Cc]ompleted\s*\}\}\s*<!--/) {
unless ($expecting eq "Completed") {
warn "$user: Unexpected Completed: '$line' (expecting $expecting)\n";
}
$expecting = "Completed";
} elsif ($line =~ m/^\s*\#\s*\{\{\s*[Cc]ompleted\s*\}\}\s*\[\[\]\]/) {
unless ($expecting eq "Completed") {
warn "$user: Unexpected Completed: '$line' (expecting $expecting)\n";
}
$expecting = "Completed";
} elsif ($line =~ m/^\s*\|\s*total-(?:articles|words)\b/i) {
unless (($expecting eq "Completed") || ($expecting eq "rollover")) {
warn "$user: Unexpected total-articles/words '$line' (expecting "
. $expecting . ")\n";
}
$expecting = "rollover";
} elsif ($line =~ m/^\s*\|\s*rollo?ver-words\s*=\s*([\d,]+)\s*$/i) {
my $words = $1;
unless ($expecting eq "rollover") {
warn "$user: Unexpected rollover-words '$line' (expecting $expecting)\n";
}
# note for future improvement: Read old file to figure out
$words =~ s/,+//g;
if ($words =~ m/^(\d+)$/) {
my $rollover = ($1+0);
if ($user_total_articles{$user} > 0) {
$user_rollover_words{$user} = $rollover;
} elsif ($rollover > 0) {
warn "$user: Not doing rollover ($rollover): no articles\n";
}
} else {
warn "$user: Can't interpret rollover-words '$words' (from '" . $line
. "'); treating as 0\n";
}
$expecting = "close";
} elsif ($line =~ m/^\s*\|\s*rollo?ver-words\s*=\s*([\d,]+)\s*\}\}\s*$/i) {
my $words = $1;
unless ($expecting eq "rollover") {
warn "$user: Unexpected rollover-words '$line' (expecting $expecting)\n";
}
# note for future improvement: Read old file to figure out
$words =~ s/,+//g;
if ($words =~ m/^(\d+)$/) {
my $rollover = ($1+0);
if ($user_total_articles{$user} > 0) {
$user_rollover_words{$user} = $rollover;
} elsif ($rollover > 0) {
warn "$user: Not doing rollover: no articles\n";
}
} else {
warn "$user: Can't interpret rollover-words '$words' (from '" . $line
. "'); treating as 0\n";
}
$expecting = "dashes";
} elsif ($line =~ m/^\s*\}\}\s*$/) {
unless ($expecting eq "close") {
warn "$user: Unexpected close '$line' (expecting $expecting)\n";
}
$expecting = "dashes";
} else {
warn "$user: Can't interpret line '$line' (expecting $expecting)\n";
}
}
close(INPUT);
warn "Finished reading input; have " . scalar(keys %user_header)
. " users; largest article was '$largest_article_name' ("
. "$largest_article_size) by $largest_article_user; "
. scalar(keys %user_10k_articles) . " users had 10k+ articles\n";
%awards_total_count =
(4000 => "Modest",
8000 => "Working Wikipedian",
12000 => "Cleanup",
20000 => "Tireless Contributor",
30000 => "(old school) League of Copy Editors",
40000 => "(modern) GOCE",
60000 => "Diligence",
80000 => "Order of the Superior Scribe",
100000 => "Most Excellent Order of the Caretaker");
#$largest_article_size = 0;
#$largest_article_user = "";
#$largest_article_name = "";
#%user_header = (); # without =
#%user_article_size = ();
#%user_total_articles = ();
#%user_total_words = ();
#%user_total_5k = ();
#%user_10k_articles = ();
#%user_rollover_words = ();
#=====$user_header=====
#* total-articles = $user_total_articles
#* total-words = $user_total_words
#* rollover-words = $user_rollover_words
#* grand total = $user_total_words+$user_rollover_words
#* new rollover words = ###
#* barnstars =
#**[for total words]
#**[for total article rank?]
#**[for total words rank?]
#**[for total 5k+ rank?]
#**[for largest article?]
#**[for articles >= 10k?]
if (exists($size_poss_ties{$largest_article_size})) {
warn "Have tie for largest article; check manually!\n";
}
# figure out ranks here
@users_by_num_articles =
sort {$user_total_articles{$b} <=> $user_total_articles{$a}}
(keys %user_total_articles);
@users_by_num_articles = grep {$user_total_articles{$_} > 0}
(@users_by_num_articles);
%user_rank_articles = ();
%rank_articles_users = ();
$curr_num = $user_total_articles{$users_by_num_articles[0]};
$curr_rank = 1;
$user_rank_articles{$users_by_num_articles[0]} = $curr_rank;
$rank_articles_users{$curr_rank}{$users_by_num_articles[0]} = 1;
RANK_ARTICLES: for (my $i = 1; $i <= $#users_by_num_articles; $i++) {
if ($user_total_articles{$users_by_num_articles[$i]} < $curr_num) {
$curr_rank++;
if ($curr_rank > 5) {
last RANK_ARTICLES;
}
}
$user_rank_articles{$users_by_num_articles[$i]} = $curr_rank;
$curr_num = $user_total_articles{$users_by_num_articles[$i]};
$rank_articles_users{$curr_rank}{$users_by_num_articles[$i]} = 1;
}
@users_by_num_words =
sort {$user_total_words{$b} <=> $user_total_words{$a}}
(keys %user_total_words);
@users_by_num_words = grep {$user_total_words{$_} > 0}
(@users_by_num_words);
%user_rank_words = ();
%rank_words_users = ();
$curr_num = $user_total_words{$users_by_num_words[0]};
$curr_rank = 1;
$user_rank_words{$users_by_num_words[0]} = $curr_rank;
$rank_words_users{$curr_rank}{$users_by_num_words[0]} = 1;
RANK_WORDS: for (my $i = 1; $i <= $#users_by_num_words; $i++) {
if ($user_total_words{$users_by_num_words[$i]} < $curr_num) {
$curr_rank++;
if ($curr_rank > 5) {
last RANK_WORDS;
}
}
$user_rank_words{$users_by_num_words[$i]} = $curr_rank;
$curr_num = $user_total_words{$users_by_num_words[$i]};
$rank_words_users{$curr_rank}{$users_by_num_words[$i]} = 1;
}
@users_by_num_5k =
sort {$user_total_5k{$b} <=> $user_total_5k{$a}}
(keys %user_total_5k);
@users_by_num_5k = grep {$user_total_5k{$_} > 0}
(@users_by_num_5k);
%user_rank_5k = ();
%rank_5k_users = ();
$curr_num = $user_total_5k{$users_by_num_5k[0]};
$curr_rank = 1;
$user_rank_5k{$users_by_num_5k[0]} = $curr_rank;
$rank_5k_users{$curr_rank}{$users_by_num_5k[0]} = 1;
RANK_5K: for (my $i = 1; $i <= $#users_by_num_5k; $i++) {
if ($user_total_5k{$users_by_num_5k[$i]} < $curr_num) {
$curr_rank++;
if ($curr_rank > 5) {
last RANK_5K;
}
}
$user_rank_5k{$users_by_num_5k[$i]} = $curr_rank;
$curr_num = $user_total_5k{$users_by_num_5k[$i]};
$rank_5k_users{$curr_rank}{$users_by_num_5k[$i]} = 1;
}
sub num_format { # for Number-Format-1.73
my $num = $_[0];
unless (defined($num)) {
confess "Undefined input to num_format; stopped";
}
unless ($num == int($num)) {
confess "$num is not an integer; stopped";
}
$num = '0'x(3 - (length($num) % 3)) . $num;
$num = join(",", grep {$_ ne ''} (split(/(...)/, $num)));
$num =~ s/^0*,?//;
if ($num eq '') {
$num = 0;
}
return $num;
}
memoize('num_format');
# print out overall chart here
print OUTPUT "\{| class=\"wikitable\"\n";
print OUTPUT "|+ '''Gold Star Award Leaderboard'''\n";
print OUTPUT "!\n";
print OUTPUT "! Articles\n";
print OUTPUT "! Words\n";
print OUTPUT "! 5k+ Articles\n";
for $rank (1..5) {
print OUTPUT "|-\n";
print OUTPUT "| $rank.\n";
print OUTPUT "| "
. join(", ",map {$user_alias{$_}
. " (" . num_format($user_total_articles{$_}) . ")"}
(sort(keys %{ $rank_articles_users{$rank} }))) . "\n";
print OUTPUT "| "
. join(", ",map {$user_alias{$_}
. " (" . num_format($user_total_words{$_}) . ")"}
(sort(keys %{ $rank_words_users{$rank} }))) . "\n";
print OUTPUT "| "
. join(", ",map {$user_alias{$_} . " (" . $user_total_5k{$_} . ")"}
(sort(keys %{ $rank_5k_users{$rank} }))) . "\n";
}
#{| class="wikitable"
#|+ '''Gold Star Award Leaderboard'''
#!
#! Articles
#! Words
#! 5k+ Articles
#|-
#| 1.
#| ??? (###)
#| ??? (###)
#| ??? (###)
#|}
print OUTPUT "|}\n\n";
print OUTPUT "* Most articles, first day: FILL THIS IN!!\n";
print OUTPUT "* Largest single article: \[\[$largest_article_name\]\] ("
. num_format($largest_article_size) . ") - "
. $user_alias{$largest_article_user} . "\n";
if (scalar(keys %user_10k_articles)) {
$to_print_10k = "* 10K article(s):";
foreach $user (sort(keys %user_10k_articles)) {
$to_print_10k .= ", " . $user_alias{$user};
if (scalar(keys %{ $user_10k_articles{$user} }) > 1) {
$to_print_10k .= " (" . scalar(keys %{ $user_10k_articles{$user} })
. ")";
}
}
$to_print_10k =~ s/:,/:/;
print OUTPUT $to_print_10k . ".\n";
}
print OUTPUT "----\n";
print OUTPUT "\n";
print OUTPUT "{{CompactTOC8|num=yes|side=yes}}\n";
print OUTPUT "\n";
$num_users_with_barnstars = 0;
$num_users_with_multiple_barnstars = 0;
$curr_first_letter = "";
%num_to_th = (1 => '1st',
2 => '2nd',
3 => '3rd',
4 => '4th',
5 => '5th');
@users = sort {($user_first_letter{$a} cmp $user_first_letter{$b}) ||
($user_num{$a} <=> $user_num{$b})} (keys %user_num);
foreach $user (@users) {
unless ($user_total_articles{$user} > 0) {
next;
}
if ($user_first_letter{$user} ne $curr_first_letter) {
print OUTPUT "====" . $user_first_letter{$user} . "====\n";
$curr_first_letter = $user_first_letter{$user};
}
print OUTPUT "=====" . $user_header{$user} . "=====\n";
if ($user_0k_articles{$user} > 0) {
print OUTPUT "* total-articles = "
. num_format($user_total_articles{$user}) . " ("
. num_format($user_0k_articles{$user}) . " 0-words)\n";
} else {
print OUTPUT "* total-articles = "
. num_format($user_total_articles{$user}) . "\n";
}
print OUTPUT "* total-words = "
. num_format($user_total_words{$user}) . "\n";
print OUTPUT "* rollover-words = "
. num_format($user_rollover_words{$user}) . "\n";
my $grand_total = $user_total_words{$user} + $user_rollover_words{$user};
print OUTPUT "* grand total = " . num_format($grand_total) . "\n";
my $curr_barnstar_total = "";
my $curr_barnstar_rollover = $grand_total;
foreach $barnstar_level (keys %awards_total_count) {
if ($barnstar_level < $grand_total) {
my $diff = $grand_total - $barnstar_level;
if ($diff < $curr_barnstar_rollover) {
$curr_barnstar_total = $awards_total_count{$barnstar_level};
$curr_barnstar_rollover = $diff;
}
}
}
print OUTPUT "* new rollover words = "
. num_format($curr_barnstar_rollover) . "\n";
my $num_barnstars = ((length($curr_barnstar_total) > 0) +
($user eq $largest_article_user) +
exists($user_10k_articles{$user}) +
exists($user_rank_articles{$user}) +
exists($user_rank_words{$user}) +
exists($user_rank_5k{$user}));
if ($num_barnstars > 0) {
#my(@barnstars_to_print) = ();
$num_users_with_barnstars++;
my $list_stars = "**";
if ($num_barnstars > 1) {
print OUTPUT "* barnstars =\n";
$num_users_with_multiple_barnstars++;
} else {
print OUTPUT "* barnstar =";
$list_stars = "";
}
if (length($curr_barnstar_total)) {
print OUTPUT "$list_stars $curr_barnstar_total\n";
# add to $barnstars_to_print
}
# do rank barnstars
if (exists($user_rank_articles{$user})) {
my $rank = $user_rank_articles{$user};
if (scalar(keys %{ $rank_articles_users{$rank} }) > 1) {
print OUTPUT "$list_stars equal " . $num_to_th{$rank}
. " place, number of articles ("
. num_format($user_total_articles{$user}) . ")\n";
} else {
print OUTPUT "$list_stars " . $num_to_th{$rank}
. " place, number of articles ("
. num_format($user_total_articles{$user}) . ")\n";
}
# add to $barnstars_to_print
}
if (exists($user_rank_words{$user})) {
my $rank = $user_rank_words{$user};
if (scalar(keys %{ $rank_words_users{$rank} }) > 1) {
print OUTPUT "$list_stars equal " . $num_to_th{$rank}
. " place, word count ("
. num_format($user_total_words{$user}) . ")\n";
} else {
print OUTPUT "$list_stars " . $num_to_th{$rank}
. " place, word count ("
. num_format($user_total_words{$user}) . ")\n";
}
# add to $barnstars_to_print
}
if (exists($user_rank_5k{$user})) {
my $rank = $user_rank_5k{$user};
if (scalar(keys %{ $rank_5k_users{$rank} }) > 1) {
print OUTPUT "$list_stars equal " . $num_to_th{$rank}
. " place, 5k+ (" . $user_total_5k{$user} . ")\n";
} else {
print OUTPUT "$list_stars " . $num_to_th{$rank} . " place, 5k+ ("
. $user_total_5k{$user} . ")\n";
}
# add to $barnstars_to_print
}
if ($user eq $largest_article_user) {
print OUTPUT "$list_stars Largest single article: [["
. $largest_article_name
. "]] (" . num_format($largest_article_size) . ")\n";
# add to $barnstars_to_print
}
if (exists($user_10k_articles{$user})) {
print OUTPUT "$list_stars article >= 10k ("
. scalar(keys %{ $user_10k_articles{$user} }) . ")\n";
# add to $barnstars_to_print
}
# print <nowiki>\n$barnstars_to_print\n</nowiki>
} else {
print OUTPUT "* barnstar = (none)\n";
}
print OUTPUT "\n";
}
close(OUTPUT);
warn "Finished; had $num_users_with_barnstars users with barnstars, "
. $num_users_with_multiple_barnstars . " with multiple barnstars\n";