User:Rick Bot/scripts/getadminactivity

#!/bin/bash

WGET="/usr/bin/curl"  # on a mac OS X
# WGET="wget -q -O -"   # on a linux box with wget

# files 
ACTIVE="admins.active"
SEMIACTIVE="Semi-active"
INACTIVE="Inactive"
CONTRIBS=".contrib.times"
WPLA="wpla"

function prevmonth () {
  case $1 in
    January) echo "December";;
    February) echo "January";;
    March) echo "February";;
    April) echo "March";;
    May) echo "April";;
    June) echo "May";;
    July) echo "June";;
    August) echo "July";;
    September) echo "August";;
    October) echo "September";;
    November) echo "October";;
    December) echo "November";;
  esac
}

normalizedate() {
  # normalize from one of two forms
  # 2009-10-02T02:20:26Z
  # 15:59, 13 September 2009
  if [ "$1" = "" ]; then
    echo
  fi
  if [ "$1" = "${1%Z}" ]; then
    echo $1 | cut -c8-
  else
    YEAR=`echo $1 | cut -c1-4`
    MONTHNUM=`echo $1 | cut -c6-7`
    DAY=`echo $1 | cut -c9-10`
    case $MONTHNUM in
      01) MONTH="January";;
      02) MONTH="February";;
      03) MONTH="March";;
      04) MONTH="April";;
      05) MONTH="May";;
      06) MONTH="June";;
      07) MONTH="July";;
      08) MONTH="August";;
      09) MONTH="September";;
      10) MONTH="October";;
      11) MONTH="November";;
      12) MONTH="December";;
    esac
    echo "${DAY#0} $MONTH $YEAR"
  fi
}

function inactive () {
  # $1 is day number of latest contrib
  # $2 is month of latest contrib
  # $3 is year of latest contrib
  # $4 - $6 are day, month, year for today


  
  # if fewer than 6 args, let's say inactive
  [ $# -lt 6 ] && return 0

  # if latest contrib is this month, not inactive
  [ $2 = $5 -a $3 = $6 ] && return 1

  # if latest contrib is last month, not inactive
  MONTH=`prevmonth $5`
  YEAR=$6
  [ $MONTH = "December" ] && let YEAR=$YEAR-1
  [ $2 = $MONTH -a $3 = $YEAR ] && return 1

  # if latest contrib is two months ago, not inactive
  MONTH=`prevmonth $MONTH`
  [ $MONTH = "December" ] && let YEAR=$YEAR-1
  [ $2 = $MONTH -a $3 = $YEAR ] && return 1

  # if latest contrib is less than three months ago, not inactive
  MONTH=`prevmonth $MONTH`
  [ $MONTH = "December" ] && let YEAR=$YEAR-1
  [ $2 = $MONTH -a $3 = $YEAR -a $1 -gt $4 ] && return 1

  return 0
}

function semiactive () {
  # $1 is day number of 30th most recent contrib
  # $2 is month of 30th most recent contrib
  # $3 is year of 30th most recent contrib
  # $4 - $6 are day, month, year for today

  # if 30th most recent contrib is this month, not semi-active
  if [ $2 = $5 -a $3 = $6 ]; then
    return 1
  fi

  # if 30th most recent contrib is last month, not semi-active
  MONTH=`prevmonth $5`
  YEAR=$6
  [ $MONTH = "December" ] && let YEAR=$YEAR-1
  [ $2 = $MONTH -a $3 = $YEAR ] && return 1

  # if 30th most recent contrib is less than two months ago, not semi-active
  MONTH=`prevmonth $MONTH`
  [ $MONTH = "December" ] && let YEAR=$YEAR-1
  [ $2 = $MONTH -a $3 = $YEAR -a $1 -gt $4 ] && return 1

  return 0
}

TODAY=`date +"%e %B %Y"`

rm -f $ACTIVE
rm -f $INACTIVE
rm -f $SEMIACTIVE

let n=1
./listusers sysop >sysops
# $WGET 'http://en.wikipedia.org/w/index.php?title=Special%3aListUsers&group=sysop&limit=3000'  >sysops
SYSOPSIZE=`cat sysops | wc -c`
if [ $SYSOPSIZE -lt 5000 ]; then
  ./listusers sysop >sysops
  # $WGET 'http://en.wikipedia.org/w/index.php?title=Special%3aListUsers&group=sysop&limit=3000'  >sysops
fi

SYSOPSIZE=`cat sysops | wc -c`
if [ $SYSOPSIZE -lt 5000 ]; then
  echo "Can't fetch sysop list!"
  exit 0
fi

# <li><a href="/wiki/User:A_Man_In_Black" title="User:A Man In Black">A Man In Black</a>
#<li><a href="/w/index.php?title=User:Woggly&action=edit&redlink=1" class="new" title="User:Woggly (page does not exist)">Woggly</a>
# cat sysops | sed 's;</a>;</a>\
# ;g' | grep -F "<li>" | sed 's/^.*<li>[^U]*User://' | sed 's/">.*//' | sed 's/.amp;action=edit.amp;redlink=1. class=.new//' | sed 's/ .page does not exist.$//'  | while read line; do
cat sysops | while read line; do

  # line=`echo $line | sed 's/class="mw-redirect" //'`
  # urlname=${line%%\" title=\"User:*}
  # realname=${line##*\" title=\"User:}
  realname="$line"
  urlname=`./urlencode "$realname"`

  # skip Jeffrey O. Gustafson - by request, and Example
  if [ "$urlname" = "Jeffrey_O._Gustafson" -o "$urlname" = "Example" ]; then
    continue
  fi

  # figure  out if user is active based on contribs we already know about
  if [ -s $CONTRIBS.$urlname ]; then
    echo $CONTRIBS.$urlname
    LATEST=`head -1 $CONTRIBS.$urlname`
    LATEST=`normalizedate "$LATEST"`
    THIRTIETH=`tail -1 $CONTRIBS.$urlname`
    THIRTIETH=`normalizedate "$THIRTIETH"`
    inactive $LATEST $TODAY 
    if [ $? -ne 0 ]; then
      # not inactive, how about semiactive?
      semiactive $THIRTIETH $TODAY
      if [ $? -ne 0 ]; then
        echo "# {{user3|$realname}}" >>$ACTIVE
        continue
      fi
    fi
  fi

  # get latest contribs
  let n=n+1
  if [ $n -ge 10 -a "$1" != "-" ]; then
      echo $realname
      sleep 10
      let n=1
  fi


  if [ "$1" != "-" -o ! -s $CONTRIBS.$urlname ]; then
    echo ./usercontribs "$urlname" timestamp
    ./usercontribs "$urlname" timestamp >$CONTRIBS.$urlname
    # echo $WGET "http://en.wikipedia.org/w/index.php?title=Special:Contributions&target=$urlname&limit=30" 
    # $WGET "http://en.wikipedia.org/w/index.php?title=Special:Contributions&target=$urlname&limit=30" | grep "<li cl[^>]*><a href" | sed -e 's/^<li[^>]*><a href[^>]*>//' -e 's/<.*//' >$CONTRIBS.$urlname
  fi

  LATEST=`head -1 $CONTRIBS.$urlname`
  LATEST=`normalizedate "$LATEST"`
  THIRTIETH=`tail -1 $CONTRIBS.$urlname`
  THIRTIETH=`normalizedate "$THIRTIETH"`
  
  # inactive if LATEST contrib not within last three months
  # semi-active if 30th most recent contrib is more than two months ago
  inactive $LATEST $TODAY 
  if [ $? -eq 0 ]; then
    case "$urlname" in
      Jasonr) echo "# {{user3|$realname}} - has administrator access for technical rather than administrative reasons.  Works on hardware upgrades." >> $INACTIVE;;
      *) echo "# {{user3|$realname}} - $LATEST" >>$INACTIVE;;
    esac
    continue
  fi
  semiactive $THIRTIETH $TODAY
  if [ $? -eq 0 ]; then
    echo "# {{user3|$realname}}" >>$SEMIACTIVE
  else
    echo "# {{user3|$realname}}" >>$ACTIVE
  fi
done

# get the current contents of WP:LA
$WGET 'http://en.wikipedia.org/w/index.php?title=Wikipedia:List_of_administrators/A-F&action=raw' >$WPLA
echo >>$WPLA

cat $WPLA | sed -e '/^===#/,$d' >"A-F"

$WGET 'http://en.wikipedia.org/w/index.php?title=Wikipedia:List_of_administrators/G-O&action=raw' >>$WPLA
echo >>$WPLA
$WGET 'http://en.wikipedia.org/w/index.php?title=Wikipedia:List_of_administrators/P-Z&action=raw' >>$WPLA
echo >>$WPLA
$WGET 'http://en.wikipedia.org/w/index.php?title=Wikipedia:List_of_administrators/Semi-active&action=raw' >>$WPLA
egrep "^# {{user3.*<noinclude>" $WPLA >$WPLA.notes

# fix the format and sort order
sort -fd $ACTIVE >tmp.$ACTIVE
( cat $WPLA.notes ; echo "END OF NOTES"; grep "[|][^a-zA-Z]" tmp.$ACTIVE ; grep "[|][a-zA-Z]" tmp.$ACTIVE) | awk >$ACTIVE '
BEGIN {
  print "===#==="
  print "</noinclude>"
  nextletter = "A"
  alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ["
}

/END OF NOTES/ {
  noting=1
  next
}

{
  if (noting == 0) {
    admin = $0
    sub("}} *<noinclude.*","}}",admin)
    notetext = $0
    sub("^.*}} *<noinclude","<noinclude",notetext)
    note[admin] = " " notetext
  } else {
     if ( $0 ~ "^# {{user3.[" nextletter "-[]") {
       print "<noinclude>"
       print ""
       print "===" nextletter "==="
       print "</noinclude>"
       thisletter = substr($0,11,1)
       while (thisletter != nextletter) {
         nextletter = substr(alphabet,index(alphabet,nextletter)+1,1)
         print "<noinclude>"
         print ""
         print "===" nextletter "==="
         print "</noinclude>"
       }
       nextletter = substr(alphabet,index(alphabet,nextletter)+1,1)
     }
     # add "1=" to template invocation for User:Until(1 == 2)
     if ( $0 ~ "=" ) {
        sub("user3.","user3|1=",$0)
     }
     print $0 note[$0]
  }
}'

# split into 3 sections

cp "A-F" "G-O"
cp "A-F" "P-Z"

cat $ACTIVE | sed -e '/^===G/,$d' | sed -e '$d' >>"A-F"
cat $ACTIVE | sed -n -e '/^===G/,$p' | sed -e '/^===P/,$d' | sed -e '$d' >>"G-O"
cat $ACTIVE | sed -n -e '/^===P/,$p' >>"P-Z"

sort -fd $SEMIACTIVE >tmp.$SEMIACTIVE
cat <<% >$SEMIACTIVE
{| id="toc" class="toc" summary="Contents"
! {{MediaWiki:Toc}}:
|<center>[[Wikipedia:List of administrators/A-F##|#]] [[Wikipedia:List of administrators/A-F#A|A]] [[Wikipedia:List of administrators/A-F#B|B]]
[[Wikipedia:List of administrators/A-F#C|C]] [[Wikipedia:List of administrators/A-F#D|D]] [[Wikipedia:List of administrators/A-F#E|E]] [[Wikipedia:List of administrators/A-F#F|F]] [[Wikipedia:List of administrators/G-O#G|G]] [[Wikipedia:List of administrators/G-O#H|H]] [[Wikipedia:List of administrators/G-O#I|I]] [[Wikipedia:List of administrators/G-O#J|J]] [[Wikipedia:List of administrators/G-O#K|K]] [[Wikipedia:List of administrators/G-O#L|L]] [[Wikipedia:List of administrators/G-O#M|M]] [[Wikipedia:List of administrators/G-O#N|N]] [[Wikipedia:List of administrators/G-O#O|O]] [[Wikipedia:List of administrators/P-Z#P|P]] [[Wikipedia:List of administrators/P-Z#Q|Q]] [[Wikipedia:List of administrators/P-Z#R|R]] [[Wikipedia:List of administrators/P-Z#S|S]] [[Wikipedia:List of administrators/P-Z#T|T]] [[Wikipedia:List of administrators/P-Z#U|U]] [[Wikipedia:List of administrators/P-Z#V|V]] [[Wikipedia:List of administrators/P-Z#W|W]] [[Wikipedia:List of administrators/P-Z#X|X]] [[Wikipedia:List of administrators/P-Z#Y|Y]] [[Wikipedia:List of administrators/P-Z#Z|Z]] • [[Wikipedia:List of administrators/Semi-active|Semi-active]] • [[Wikipedia:List of administrators/Inactive|Inactive]]</center>
|}
{{Clear}}

''These administrators have made fewer than 30 edits in the last 2 months but at least one edit in the last 3 months.''

%
( cat $WPLA.notes ; echo "END OF NOTES"; grep "[|][^a-zA-Z]" tmp.$SEMIACTIVE ; grep "[|][a-zA-Z]" tmp.$SEMIACTIVE ) | awk >>$SEMIACTIVE '

/END OF NOTES/ {
  noting=1
  next
}

{
  if (noting == 0) {
    admin = $0
    sub("}} *<noinclude.*","}}",admin)
    notetext = $0
    sub("^.*}} *<noinclude","<noinclude",notetext)
    note[admin] = " " notetext
  } else {
     # add "1=" to template invocation for User:Until(1 == 2)
     if ( $0 ~ "=" ) {
        sub("user3.","admin|1=",$0)
     }
     print $0 note[$0]
  }
}'


cat <<% >tmp.$INACTIVE
{| id="toc" class="toc" summary="Contents"
! {{MediaWiki:Toc}}:
|<center>[[Wikipedia:List of administrators/A-F##|#]] [[Wikipedia:List of administrators/A-F#A|A]] [[Wikipedia:List of administrators/A-F#B|B]]
[[Wikipedia:List of administrators/A-F#C|C]] [[Wikipedia:List of administrators/A-F#D|D]] [[Wikipedia:List of administrators/A-F#E|E]] [[Wikipedia:List of administrators/A-F#F|F]] [[Wikipedia:List of administrators/G-O#G|G]] [[Wikipedia:List of administrators/G-O#H|H]] [[Wikipedia:List of administrators/G-O#I|I]] [[Wikipedia:List of administrators/G-O#J|J]] [[Wikipedia:List of administrators/G-O#K|K]] [[Wikipedia:List of administrators/G-O#L|L]] [[Wikipedia:List of administrators/G-O#M|M]] [[Wikipedia:List of administrators/G-O#N|N]] [[Wikipedia:List of administrators/G-O#O|O]] [[Wikipedia:List of administrators/P-Z#P|P]] [[Wikipedia:List of administrators/P-Z#Q|Q]] [[Wikipedia:List of administrators/P-Z#R|R]] [[Wikipedia:List of administrators/P-Z#S|S]] [[Wikipedia:List of administrators/P-Z#T|T]] [[Wikipedia:List of administrators/P-Z#U|U]] [[Wikipedia:List of administrators/P-Z#V|V]] [[Wikipedia:List of administrators/P-Z#W|W]] [[Wikipedia:List of administrators/P-Z#X|X]] [[Wikipedia:List of administrators/P-Z#Y|Y]] [[Wikipedia:List of administrators/P-Z#Z|Z]] • [[Wikipedia:List of administrators/Semi-active|Semi-active]] • [[Wikipedia:List of administrators/Inactive|Inactive]]</center>
|}

''Administrators who have not edited in at least 3 months, sorted by length of inactivity:''

%
grep "Jasonr}}" $INACTIVE >>tmp.$INACTIVE
if [ $? -eq 0 ]; then
  echo "#: " >>tmp.$INACTIVE
fi
THISYEAR=`date +%Y`
let year=2001
while [ $year -lt $THISYEAR ]; do
  let year=$year+1
  egrep "$year$" $INACTIVE | awk >>tmp.$INACTIVE '
{
  monthday = $(NF-1) $(NF-2)
  if (inactive[monthday] == "") {
    inactive[monthday] = $0
  } else {
    inactive[monthday] = inactive[monthday] "\n" $0
  }
}

END {
  months[1] = "January"
  months[2] = "February"
  months[3] = "March"
  months[4] = "April"
  months[5] = "May"
  months[6] = "June"
  months[7] = "July"
  months[8] = "August"
  months[9] = "September"
  months[10] = "October"
  months[11] = "November"
  months[12] = "December"
  for (month=1; month<=12; month=month+1) {
    for (day=1; day<=31; day=day+1) {
      if (inactive[months[month] day] != "") {
        if (inactive[months[month] day] ~ "=") {
          gsub("user3.","user3|1=",inactive[months[month] day])
        }
        print inactive[months[month] day]
      }
    }
  }
}
'
  if [ $year -lt $THISYEAR ]; then
    echo "#: " >>tmp.$INACTIVE
  fi
done
cat >>tmp.$INACTIVE <<!

<!-- Do not add admins to the inactive list who have just recently announced their departure. It is not uncommon for users to change their mind. This list is not intended to track every temporary absence from activity. Wait until they haven't edited for a month before adding them here. -->
!

cp tmp.$INACTIVE $INACTIVE


$WGET 'http://en.wikipedia.org/w/index.php?title=Wikipedia:List_of_administrators&action=raw' >LA
NUMACTIVE=`cat A-F G-O P-Z | grep "^# ..user3" | wc -l`
NUMACTIVE=`expr $NUMACTIVE`
DATE=`date +"%Y-%m-%d"`
cat LA | sed -e "s/[0-9]* of them active (as of .*)/$NUMACTIVE of them active (as of [[$DATE]])/" -e "s/last checked on .*[.]/last checked on [[$DATE]]./" >LA.new
echo $NUMACTIVE >LA.active

NUMSEMIACTIVE=`cat Semi-active | grep "^# ..user3" | wc -l`
NUMSEMIACTIVE=`expr $NUMSEMIACTIVE`
echo $NUMSEMIACTIVE >Semi-active.count

NUMINACTIVE=`cat Inactive | grep "^# ..user3" | wc -l`
NUMINACTIVE=`expr $NUMINACTIVE`
echo $NUMINACTIVE >Inactive.count