CITconfig.py
inputlist=list()
outputlist=list()
partlist=list()
tagged=0
datedlist=list()
date=""
ignore=""
Main program
from wikitools import *
import time
import datetime
import urllib
import json
import userpassbot #Bot password
import warnings
import re
import mwparserfromhell
import datetime
import sys
import CITconfig
site = wiki.Wiki() #Tell Python to use the English Wikipedia's API
site.login(userpassbot.username, userpassbot.password) #login
#routine to autoswitch some of the output - as filenames have accented chars!
def pnt(s):
try:
print(s)
except UnicodeEncodeError:
print(s.encode('utf-8'))
def startAllowed():
textpage = page.Page(site, "User:RonBot/10/Run").getWikiText()
if textpage == "Run":
return "run"
else:
return "no"
def allow_bots(text, user):
user = user.lower().strip()
text = mwparserfromhell.parse(text)
for tl in text.filter_templates():
if tl.name.matches(['bots', 'nobots']):
break
else:
return True
print "template found" #Have we found one
for param in tl.params:
bots = [x.lower().strip() for x in param.value.split(",")]
if param.name == 'allow':
print "We have an ALLOW" # allow found
if ''.join(bots) == 'none': return False
for bot in bots:
if bot in (user, 'all'):
return True
elif param.name == 'deny':
print "We have a DENY" # deny found
if ''.join(bots) == 'none':
print "none - true"
return True
for bot in bots:
if bot in (user, 'all'):
pnt(bot)
pnt(user)
print "all - false"
return False
if (tl.name.matches('nobots') and len(tl.params) == 0):
print "match - false"
return False
return True
def remove_duplicates(l):
return list(set(l))
def firsttimestamp(pagename):
params = {'action':'query',
'titles':pagename,
'prop':'revisions',
'rvprop':'timestamp',
'rvlimit':'1',
'rvdir':'newer'
}
req = api.APIRequest(site, params) #Set the API request
res = req.query(False) #Send the API request and store the result in res
#{
#"continue": {
# "rvcontinue": "20101117204903|397372371",
# "continue": "||"
#},
#"query": {
# "pages": {
# "29641123": {
# "pageid": 29641123,
# "ns": 0,
# "title": "Journal of Internal Medicine",
# "revisions": [
# {
# "timestamp": "2010-11-17T20:38:57Z"
# }
# ]
# }
# }
#}
#}
#pnt(res)
pageid = res['query']['pages'].keys()[0]
#print pageid
timestamp="X"
if int(pageid)>0:
timestamp=str(res['query']['pages'][pageid]['revisions'][0]['timestamp'])
print timestamp
#m = re.search(r'(.*?)T', timestamp)
#datebit = m.group(1)
#print datebit
return timestamp
def checkitem(line1):
line=line1[:-2] # remove the }}
chop=line.split('|')
choplist=list(chop)
if choplist[0]=="{{JCW-exclude":
if len(choplist)>2:
if line not in CITconfig.ignore:
testpage=choplist[2]
createdate=firsttimestamp(testpage)
#print line, createdate
#2010-11-17T20:38:57Z
if createdate<>"X":
timestamp1 = datetime.datetime.strptime(createdate, '%Y-%m-%dT%H:%M:%SZ')
timestamp2 = datetime.datetime.strptime(CITconfig.date, '%Y-%m-%dT%H:%M:%SZ')
print
if timestamp1 < timestamp2:
print '1 < 2'
CITconfig.datedlist.append(line1)
return
def getJCWdate():
pagepage = page.Page(site, 'Template:JCW-date')
print "pagepage"
pagetext = pagepage.getWikiText()
chop=pagetext.split('<')
choplist=list(chop)
CITconfig.date=choplist[0]+"T00:00:00Z"
print CITconfig.date
return
def getandsort(x):
print "getandsort"
CITconfig.partlist=list()
line = CITconfig.inputlist[x]
while line<>"}}":
if len(line)>2:
CITconfig.partlist.append(line)
checkitem(line)
x=x+1
line = CITconfig.inputlist[x]
print "before dup rem", len(CITconfig.partlist)
CITconfig.partlist=remove_duplicates(CITconfig.partlist)
print "after dup rem", len(CITconfig.partlist)
#pnt(CITconfig.partlist)
CITconfig.partlist=sorted(CITconfig.partlist) #Normal sort first
CITconfig.partlist=sorted(CITconfig.partlist, key=str.lower) #sorts using lowercase key
pnt(CITconfig.partlist)
#print CITconfig.inputlist[x] #should be "}}"
for line in CITconfig.partlist: #transfer sorted section
CITconfig.outputlist.append(line)
return x
def writepage(title,mylist):
pagetitle=title
pagepage = page.Page(site, pagetitle)
pagetext=""
for line in mylist:
pagetext=pagetext+line+"\n"
print "witing page"
pagepage.edit(text=pagetext, bot=True, skipmd5=True, summary="update page")
def Process(search):
print "search", search
pagetitle = search
pagetitletext = pagetitle.encode('utf-8')
pnt(pagetitletext)
pagepage = page.Page(site, pagetitle)
print "pagepage"
pagetext = pagepage.getWikiText()
CITconfig.inputlist = list()
CITconfig.outputlist = list()
CITconfig.inputlist=pagetext.splitlines()
size=len(CITconfig.inputlist)
print "SIZE=", size
stop = allow_bots(pagetext, "RonBot")
if not stop:
return
x=0
while x<size:
line=CITconfig.inputlist[x]
#print x,
#pnt(line)
CITconfig.outputlist.append(line)
if "columns-list" in line: # Start of a section
print "X IN", (x+1)
z=getandsort(x+1) #x+1 is the line to start with
print "X OUT", z
x=z-1
x=x+1
pagetext='' # clear ready assemble new page
for line in CITconfig.outputlist:
pagetext=pagetext+line+"\n"
try:
#pagepage.edit(text=pagetext, bot=True, summary="(Task 10) sorting lists ([[User:RonBot|disable]])") #(DO NOT UNCOMMENT UNTIL BOT IS APPROVED)
print pagetext
print "writing changed page"
except:
print"Failed to write"
print ""
return
def getwritepage(search):
pagetitle = search
pagetitletext = pagetitle.encode('utf-8')
pnt(pagetitletext)
pagepage = page.Page(site, pagetitle)
print "pagepage WP"
pagetext = pagepage.getWikiText()
CITconfig.datedlist=list()
tplist=list()
CITconfig.ignore=""
ignoreflag=False
tplist=pagetext.splitlines()
for line in tplist:
if "Report ignore" in line:
ignoreflag=True
if "-->" in line:
ignoreflag=False
if "Report begin" in line:
break
CITconfig.datedlist.append(line)
if ignoreflag==True:
if "{{" in line:
CITconfig.ignore=CITconfig.ignore+line
print""
pnt(tplist)
print""
pnt(CITconfig.datedlist)
print""
pnt(CITconfig.ignore)
print"end of start"
return
def main():
go = startAllowed() #Check if task is enabled
if go == "no":
sys.exit(1)
getJCWdate()
getwritepage('User talk:JL-Bot/Citations.cfg')
CITconfig.datedlist.append("<!-- Report begin-->")
CITconfig.datedlist.append("The following exclusions are likely no longer needed:")
#parameters for API request
search='User:JL-Bot/Citations.cfg'
Process(search)
CITconfig.datedlist.append("<!-- Report end-->")
writepage('User talk:JL-Bot/Citations.cfg',CITconfig.datedlist)
if __name__ == "__main__":
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
main()