import wikipedia, urllib, urllib2, codecs, os, re, string, time, simplejson, traceback
def main():
print """
$$$$$$\ $$\ $$$$$$$\ $$\
$$ __$$\ $$ | $$ __$$\ $$ |
$$ / \__|$$$$$$$\ $$$$$$\ $$$$$$\ $$$$$$\ $$ | $$ | $$$$$$\ $$$$$$\
\$$$$$$\ $$ __$$\ $$ __$$\ $$ __$$\ $$ __$$\ $$$$$$$\ |$$ __$$\\_$$ _|
\____$$\ $$ | $$ |$$$$$$$$ |$$$$$$$$ |$$ / $$ |$$ __$$\ $$ / $$ | $$ |
$$\ $$ |$$ | $$ |$$ ____|$$ ____|$$ | $$ |$$ | $$ |$$ | $$ | $$ |$$\
\$$$$$$ |$$ | $$ |\$$$$$$$\ \$$$$$$$\ $$$$$$$ |$$$$$$$ |\$$$$$$ | \$$$$ |
\______/ \__| \__| \_______| \_______|$$ ____/ \_______/ \______/ \____/
$$ |
$$ |
\__| """
print "\nv0.4\n"
while 1:
run()
print "Run complete, sleeping for one hour..."
time.sleep(3600)
def run():
wikipedia.output(u"Retrieving pages list...")
data = urllib2.urlopen("http://toolserver.org/~earwig/reports/enwiki/deadend_pages.txt").read() # Get the list of pages.
wikipedia.output(u"Page list retrieved, working on pages.")
site = wikipedia.getSite()
for pagename in string.split(data, "\n"): # For each page in the list.
pagename = pagename[2:-2]
if pagename:
pagedata = wikipedia.Page(site, pagename)
else:
continue
try:
process(pagedata) # Process it.
except wikipedia.Error:
wikipedia.output(u"Could not access page, it was probably deleted.")
checked(pagedata.aslink())
def process(page):
wikipedia.output(u"\nWorking on page %s." % page.aslink())
if ifChecked(page.aslink()):
wikipedia.output(u"Page has already been checked.")
return
content = page.get()
if not page.botMayEdit(username="SheepBot"):
wikipedia.output(u"Bot is not allowed to edit this page, skipping.")
return
tempy = re.search("\{\{((dead(_| )?end)|(internal(_| )?links)|(wi?k?i?fy?)|(de?(p|b))|((article|multiple|)( |_)?issues)|(ai)|(needs(_| )links))", string.lower(content))
if tempy:
wikipedia.output(u"Found wikify/dead end template in page, stopping.")
return
if "[[" not in content and "{{" not in content:
wikipedia.output(u"No links or templates found in page; adding dead end template.")
addTemplate(page)
return
try:
links = queryLinks(page.title())
except Exception:
traceback.print_exc()
wikipedia.output(u"Cannot read API query.")
return
if not links:
wikipedia.output(u"No non/category/image links found in page. Adding template.")
addTemplate(page)
return
else:
wikipedia.output(u"Links found from API are actual links. Stopping.")
return
wikipedia.output(u"Links found. Stopping.")
return
def checked(page):
f = codecs.open("edited_pages.txt", "a", 'utf-8')
f.write("\n%s" % page)
f.close()
def ifChecked(page):
f = codecs.open("edited_pages.txt", "r", 'utf-8')
checked_pages = f.read()
f.close()
if page in checked_pages:
return True
return False
def queryLinks(title):
params = {'action':'query', 'prop':'links', 'format':'json', 'pllimit':500, 'plnamespace':0}
params['titles'] = title
data = urllib.urlencode(params)
raw = urllib2.urlopen("http://en.wikipedia.org/w/api.php", data)
res = simplejson.loads(raw.read())
pageid = res['query']['pages'].keys()[0]
try:
links = res['query']['pages'][pageid]['links']
print links
return True
except KeyError:
return False
def addTemplate(page):
content = page.get()
newpage = "{{Dead end|date=March 2010}}\n" + content
comment = "([[WP:BOT|Bot]]): Adding {{dead end}} template to article because of lack of wikilinks."
try:
wikipedia.showDiff(content, newpage)
page.put(newpage, comment)
except wikipedia.LockedPage:
wikipedia.output(u"Page protected, unable to save.")
except wikipedia.PageNotSaved:
wikipedia.output(u"Page unable to be saved.")
except wikipedia.Error:
wikipedia.output(u"MALFUNCTION, MALFUNCTION!")
wikipedia.output("Page saved successfully.")
if __name__ == '__main__':
try:
main()
finally:
wikipedia.stopme()