foot.py
mycatlist1=list()
mycatlist2=list()
allplayers=list()
excludelist=list()
donelist=list()
nomatchlist=list()
stublist=list()
redirlist=list()
removed=0
added=0
added=0
Main Program
from wikitools import *
import time
import datetime
import urllib
import json
import userpassbot #Bot password
import warnings
import re
import mwparserfromhell
import datetime
import sys
import foot
site = wiki.Wiki() #Tell Python to use the English Wikipedia's API
site.login(userpassbot.username, userpassbot.password) #login
#routine to autoswitch some of the output - as filenames have accented chars!
def pnt(s):
try:
print(s)
except UnicodeEncodeError:
print(s.encode('utf-8'))
def startAllowed():
textpage = page.Page(site, "User:RonBot/7/Run").getWikiText()
if textpage == "Run":
return "run"
else:
return "no"
def allow_bots(text, user):
user = user.lower().strip()
text = mwparserfromhell.parse(text)
for tl in text.filter_templates():
if tl.name.matches(['bots', 'nobots']):
break
else:
return True
print "template found" #Have we found one
for param in tl.params:
bots = [x.lower().strip() for x in param.value.split(",")]
if param.name == 'allow':
print "We have an ALLOW" # allow found
if ''.join(bots) == 'none': return False
for bot in bots:
if bot in (user, 'all'):
return True
elif param.name == 'deny':
print "We have a DENY" # deny found
if ''.join(bots) == 'none':
print "none - true"
return True
for bot in bots:
if bot in (user, 'all'):
pnt(bot)
pnt(user)
print "all - false"
return False
if (tl.name.matches('nobots') and len(tl.params) == 0):
print "match - false"
return False
return True
def findpages(nextcat):
lastContinue=''
touse=''
while True:
params = {'action':'query',
'list':'categorymembers',
'cmtitle':nextcat,
'cmlimit':'max',
'cmnamespace':'0|14',
'cmcontinue':lastContinue
}
req = api.APIRequest(site, params) #Set the API request
res = req.query(False) #Send the API request and store the result in res
touse = pagelist.listFromQuery(site, res['query']['categorymembers'])#Make a list
for filep in touse: #For page in the list
pagename=filep.unprefixedtitle
if "Category" in pagename:
if pagename not in foot.mycatlist1:
foot.mycatlist1.append(pagename)
pnt("APPENDING "+pagename)
print len(foot.mycatlist1)
else:
pnt("NOT APPENDING "+pagename)
else:
if pagename not in foot.allplayers: #Have we a unique player name?
foot.allplayers.append(pagename)
#pnt(pagename)
if pagename not in foot.excludelist: #Is this name not in the exclude list?
if pagename not in foot.nomatchlist: #Is this name unique
foot.nomatchlist.append(pagename)
if 'continue' not in res:
break
lastContinue = res['continue']['cmcontinue']
print "continue"
return
def splittextpoint(pagetext):
# Only here if we see a "stub"
size=len(pagetext)-1
print size
curly=False
for loopvar in xrange(size,size-100, -1):
mychar=pagetext[loopvar]
print loopvar, repr(mychar)
if mychar=="]":
return size
if mychar=="}":
curly=True
if curly==True:
matchObj = re.match( r'\n', mychar)
if matchObj:
if curly==False:
return size
else:
return loopvar
return size
def ProcessDoneCat(nextcat):
lastContinue=''
touse=''
print("PDC")
while True:
params = {'action':'query',
'list':'categorymembers',
'cmtitle':nextcat,
'cmlimit':'max',
'cmnamespace':'0|14',
'cmcontinue':lastContinue
}
req = api.APIRequest(site, params) #Set the API request
res = req.query(False) #Send the API request and store the result in res
touse = pagelist.listFromQuery(site, res['query']['categorymembers'])#Make a list
for filep in touse: #For page in the list
pnt(filep.unprefixedtitle)
pagename=filep.unprefixedtitle
if pagename in foot.excludelist:
pnt("REMOVE THE CAT IN "+pagename)
foot.donelist.append(pagename)
else:
pnt("EXCUDE "+pagename)
foot.excludelist.append(pagename)
if 'continue' not in res:
break
lastContinue = res['continue']['cmcontinue']
print "continue"
return
def RemoveCat():
size=len(foot.donelist)
print size
for pagetitle in foot.donelist:
pagetitletext = pagetitle.encode('utf-8')
print pagetitletext
pagepage = page.Page(site, pagetitle)
pagetext = pagepage.getWikiText()
go = allow_bots(pagetext, 'RonBot')# does user page allow bots
if go:
print"++++++++++++++++++++++++++++++++++++++++"
print"REMOVAL bot allowed on article"
pnt(pagetext)
pagetext = re.sub(r'\[\[Category:Association footballers not categorized by position\]\]\n*', '', pagetext)
pnt(pagetext)
try:
pagepage.edit(text=pagetext, bot=True, summary="(Task 7) - Removal of [[:Category:Association footballers not categorized by position]]") #(DO NOT UNCOMMENT UNTIL BOT IS APPROVED)
foot.removed += 1
print "writing changed page"
except:
print"Failed to write"
print"++++++++++++++++++++++++++++++++++++++++"
return
def AddCat():
print (time.ctime())
size=len(foot.nomatchlist)
print size
for pagetitle in foot.nomatchlist:
pagetitletext = pagetitle.encode('utf-8')
pagepage = page.Page(site, pagetitle, True, False) # dont follow redirects!
pageredir= pagepage.isRedir()
pagetext = pagepage.getWikiText()
size = len(pagetext)-1
cutplace=size
if "stub" in pagetext:
foot.stublist.append(pagetitle)
cutplace=splittextpoint(pagetext)
go = allow_bots(pagetext, 'RonBot')# does user page allow bots
if go:
#print"++++++++++++++++++++++++++++++++++++++++"
print pagetitletext+ " ADDITION bot allowed on article"
if pageredir:
print "REDIRECT " + pagetitletext # show that page, but don't add the cat.
foot.redirlist.append(pagetitle)
else:
if cutplace<size:
pagetext=pagetext[0:cutplace]+"\n"+"[[Category:Association footballers not categorized by position]]"+"\n"+pagetext[cutplace+1:]
else:
pagetext = pagetext+"\n"+"[[Category:Association footballers not categorized by position]]"
try:
pagepage.edit(text=pagetext, bot=True, summary="(Task 7) - Addition of [[:Category:Association footballers not categorized by position]]") #(DO NOT UNCOMMENT UNTIL BOT IS APPROVED)
foot.added += 1
print "writing changed page"
except:
print"Failed to write"
print"++++++++++++++++++++++++++++++++++++++++"
if foot.added+foot.removed>=13000: #Termination for trials. comment out this line and next for full run
return
return
def findexclude(nextcat):
lastContinue=''
touse=''
while True:
params = {'action':'query',
'list':'categorymembers',
'cmtitle':nextcat,
'cmlimit':'max',
'cmnamespace':'0|14',
'cmcontinue':lastContinue
}
req = api.APIRequest(site, params) #Set the API request
res = req.query(False) #Send the API request and store the result in res
touse = pagelist.listFromQuery(site, res['query']['categorymembers'])#Make a list
for filep in touse: #For page in the list
pagename=filep.unprefixedtitle
if "Category" in pagename:
if pagename not in foot.mycatlist2:
foot.mycatlist2.append(pagename)
pnt("APPENDING "+pagename)
print len(foot.mycatlist2)
else:
pnt("NOT APPENDING "+pagename)
else:
if pagename not in foot.excludelist:
foot.excludelist.append(pagename)
if 'continue' not in res:
break
lastContinue = res['continue']['cmcontinue']
print "continue"
return
def TestMainIO():
#foot.nomatchlist=list()
#foot.nomatchlist.append("User:Ronhjones/Sandbox3")
#foot.nomatchlist.append("User:Ronhjones/Sandbox4")
#foot.nomatchlist.append("User:Ronhjones/Sandbox5")
#pagepage = page.Page(site, 'Sammy Frost', True, False) # dont follow redirects!
#pagetext = pagepage.getWikiText()
##cutplace=splittextpoint(pagetext)
#pagetext=pagetext[0:cutplace]+"\n"+"[[Category:Association footballers not categorized by position]]"+"\n"+pagetext[cutplace+1:]
#pnt(repr(pagetext))
foot.donelist=list()
foot.nomatchlist.append("User:Ronhjones/Sandbox4")
foot.nomatchlist.append("User:Ronhjones/Sandbox5")
AddCat()
sys.exit()
def main():
go = startAllowed() #Check if task is enabled
#TestMainIO() # - test run was OK.
#sys.exit()
#Get the exclude list
foot.mycatlist2=list()
foot.excludlist=list()
foot.stublist=list()
foot.redirlist=list()
foot.mycatlist2.append("Category:Association football defenders")
foot.mycatlist2.append("Category:Association football central defenders")
foot.mycatlist2.append("Category:Association football fullbacks")
foot.mycatlist2.append("Category:Association football sweepers")
foot.mycatlist2.append("Category:Association football forwards")
foot.mycatlist2.append("Category:Association football inside forwards")
foot.mycatlist2.append("Category:Association football outside forwards")
foot.mycatlist2.append("Category:Association football goalkeepers")
foot.mycatlist2.append("Category:Association football midfielders")
foot.mycatlist2.append("Category:Association football wing halves")
foot.mycatlist2.append("Category:Association football wingers")
foot.mycatlist2.append("Category:Women's association football defenders")
foot.mycatlist2.append("Category:Women's association football forwards")
foot.mycatlist2.append("Category:Women's association football goalkeepers")
foot.mycatlist2.append("Category:Women's association football midfielders")
foot.mycatlist2.append("Category:Association football player non-biographical articles")
foot.mycatlist2.append("Category:Association football utility players")
foot.mycatlist2.append("Category:Women's association football central defenders")
foot.mycatlist2.append("Category:Women's association football fullbacks")
foot.mycatlist2.append("Category:Women's association football wingers")
foot.mycatlist2.append("Category:Women's association football utility players")
listnum=0
while listnum<len(foot.mycatlist2):
pnt( "CAT " + foot.mycatlist2[listnum])
findexclude(foot.mycatlist2[listnum])
listnum+=1
print "LIST No. ", listnum
print len(foot.excludelist)
#Get the target cat, if not in exclude then add to that list
#Otherwise add to donelist - these will need to have the cat removed.
foot.removed=0
foot.added=0
print "check the done cat"
ProcessDoneCat("Category:Association footballers not categorized by position")
print len(foot.donelist)
print len(foot.excludelist)
if len(foot.donelist)>0:
RemoveCat()
#write local file
myfile=open('C:\\Python27\\bot\\log7\\articlelist1.txt','w')
print "OPEN FILE 1"
for item in foot.excludelist:
try:
myfile.write("%s\n" % item)
except UnicodeEncodeError:
myfile.write("%s\n" % item.encode('utf-8'))
myfile.close
#Now ready to process Mainlist
#Make a list of players that are NOT in the exclude list
foot.mycatlist1=list()
foot.allplayers=list()
foot.nomatchlist=list()
foot.mycatlist1.append("Category:Association football defenders")
foot.mycatlist1.append("Category:Footballers by city or town")
foot.mycatlist1.append("Category:Association football players by club")
foot.mycatlist1.append("Category:Association football players by competition")
foot.mycatlist1.append("Category:Association football players by country")
foot.mycatlist1.append("Category:Association football players by national team")
foot.mycatlist1.append("Category:Association football players by nationality")
foot.mycatlist1.append("Category:Women's association football players")
foot.mycatlist1.append("Category:Expatriate association football players")
listnum=0
while listnum<len(foot.mycatlist1):
pnt( "CAT" + foot.mycatlist1[listnum])
findpages(foot.mycatlist1[listnum])
listnum+=1
print "LIST No. ", listnum
print len(foot.allplayers)
print len(foot.nomatchlist)
foot.nomatchlist.sort()
if len(foot.nomatchlist)>0:
AddCat()
#write local file
myfile=open('C:\\Python27\\bot\\log7\\articlelist2.txt','w')
print "OPEN FILE 1"
for item in foot.nomatchlist:
try:
myfile.write("%s\n" % item)
except UnicodeEncodeError:
myfile.write("%s\n" % item.encode('utf-8'))
myfile.close
#write local file
myfile=open('C:\\Python27\\bot\\log7\\articlelist3.txt','w')
print "OPEN FILE 3"
for item in foot.allplayers:
try:
myfile.write("%s\n" % item)
except UnicodeEncodeError:
myfile.write("%s\n" % item.encode('utf-8'))
myfile.close
#write local file
myfile=open('C:\\Python27\\bot\\log7\\articlelist4.txt','w')
print "OPEN FILE 4"
for item in foot.stublist:
try:
myfile.write("%s\n" % item)
except UnicodeEncodeError:
myfile.write("%s\n" % item.encode('utf-8'))
myfile.close
#write local file
myfile=open('C:\\Python27\\bot\\log7\\articlelist5.txt','w')
print "OPEN FILE 5"
for item in foot.redirlist:
try:
myfile.write("%s\n" % item)
except UnicodeEncodeError:
myfile.write("%s\n" % item.encode('utf-8'))
myfile.close
print foot.added
print foot.removed
print (time.ctime())
if __name__ == "__main__":
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
main()