User:SheepBot/Code

import wikipedia, urllib, urllib2, codecs, os, re, string, time, simplejson, traceback

def main():
    print """
 $$$$$$\  $$\                                     $$$$$$$\             $$\     
$$  __$$\ $$ |                                    $$  __$$\            $$ |    
$$ /  \__|$$$$$$$\   $$$$$$\   $$$$$$\   $$$$$$\  $$ |  $$ | $$$$$$\ $$$$$$\   
\$$$$$$\  $$  __$$\ $$  __$$\ $$  __$$\ $$  __$$\ $$$$$$$\ |$$  __$$\\_$$  _|  
 \____$$\ $$ |  $$ |$$$$$$$$ |$$$$$$$$ |$$ /  $$ |$$  __$$\ $$ /  $$ | $$ |    
$$\   $$ |$$ |  $$ |$$   ____|$$   ____|$$ |  $$ |$$ |  $$ |$$ |  $$ | $$ |$$\ 
\$$$$$$  |$$ |  $$ |\$$$$$$$\ \$$$$$$$\ $$$$$$$  |$$$$$$$  |\$$$$$$  | \$$$$  |
 \______/ \__|  \__| \_______| \_______|$$  ____/ \_______/  \______/   \____/ 
                                        $$ |                                   
                                        $$ |                                   
                                        \__|                                   """
    print "\nv0.4\n"
    while 1:
        run()
        print "Run complete, sleeping for one hour..."
        time.sleep(3600)
 
def run():
    wikipedia.output(u"Retrieving pages list...")
    data = urllib2.urlopen("http://toolserver.org/~earwig/reports/enwiki/deadend_pages.txt").read() # Get the list of pages.
    wikipedia.output(u"Page list retrieved, working on pages.")
    site = wikipedia.getSite()
    for pagename in string.split(data, "\n"): # For each page in the list.
        pagename = pagename[2:-2]
        if pagename:
            pagedata = wikipedia.Page(site, pagename)
        else:
            continue
        try:
            process(pagedata) # Process it.
        except wikipedia.Error:
            wikipedia.output(u"Could not access page, it was probably deleted.")
        checked(pagedata.aslink())
 
def process(page):
    wikipedia.output(u"\nWorking on page %s." % page.aslink())
    if ifChecked(page.aslink()):
        wikipedia.output(u"Page has already been checked.")
        return
    content = page.get()
    if not page.botMayEdit(username="SheepBot"):
        wikipedia.output(u"Bot is not allowed to edit this page, skipping.")
        return
    tempy = re.search("\{\{((dead(_| )?end)|(internal(_| )?links)|(wi?k?i?fy?)|(de?(p|b))|((article|multiple|)( |_)?issues)|(ai)|(needs(_| )links))", string.lower(content))
    if tempy:
        wikipedia.output(u"Found wikify/dead end template in page, stopping.")
        return
    if "[[" not in content and "{{" not in content:
        wikipedia.output(u"No links or templates found in page; adding dead end template.")
        addTemplate(page)
        return
    try:
        links = queryLinks(page.title())
    except Exception:
        traceback.print_exc()
        wikipedia.output(u"Cannot read API query.")
        return
    if not links:
        wikipedia.output(u"No non/category/image links found in page. Adding template.")
        addTemplate(page)
        return
    else:
        wikipedia.output(u"Links found from API are actual links. Stopping.")
        return
    wikipedia.output(u"Links found. Stopping.")
    return
 
def checked(page):
    f = codecs.open("edited_pages.txt", "a", 'utf-8')
    f.write("\n%s" % page)
    f.close()
 
def ifChecked(page):
    f = codecs.open("edited_pages.txt", "r", 'utf-8')
    checked_pages = f.read()
    f.close()
    if page in checked_pages:
        return True
    return False
 
def queryLinks(title):
    params = {'action':'query', 'prop':'links', 'format':'json', 'pllimit':500, 'plnamespace':0}
    params['titles'] = title
    data = urllib.urlencode(params)
    raw = urllib2.urlopen("http://en.wikipedia.org/w/api.php", data)
    res = simplejson.loads(raw.read())
    pageid = res['query']['pages'].keys()[0]
    try:
        links = res['query']['pages'][pageid]['links']
        print links
        return True
    except KeyError:
        return False
 
def addTemplate(page):
    content = page.get()
    newpage = "{{Dead end|date=March 2010}}\n" + content
    comment = "([[WP:BOT|Bot]]): Adding {{dead end}} template to article because of lack of wikilinks."
    try:
        wikipedia.showDiff(content, newpage)
        page.put(newpage, comment)
    except wikipedia.LockedPage:
        wikipedia.output(u"Page protected, unable to save.")
    except wikipedia.PageNotSaved:
        wikipedia.output(u"Page unable to be saved.")
    except wikipedia.Error:
        wikipedia.output(u"MALFUNCTION, MALFUNCTION!")
    wikipedia.output("Page saved successfully.")
 
if __name__ == '__main__':
    try:
        main()
    finally:
        wikipedia.stopme()