#!/usr/bin/env python # -*- coding: utf-8 -*- import difflib import simplejson as json # safely retrieve json objects (and correctly handle '/' in article titles) import pickle # save arrays in files import re #import string # string.atoi - variable wait when lagged import sys # read/write files import time # what day is it? import urllib # read/write websites null = 0 cj = None ClientCookie = None cookielib = None try: import cookielib except ImportError: pass else: import urllib2 urlopen = urllib2.urlopen cj = cookielib.LWPCookieJar() Request = urllib2.Request if not cookielib: try: import ClientCookie except ImportError: import urllib2 urlopen = urllib2.urlopen Request = urllib2.Request else: urlopen = ClientCookie.urlopen cj = ClientCookie.LWPCookieJar() Request = ClientCookie.Request if cj != None: if cookielib: opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) urllib2.install_opener(opener) else: opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj)) ClientCookie.install_opener(opener) ### LOWER-LEVEL URL INTERFACE ### def act (txdata, url = 'http://en.wikipedia.org/w/api.php', txheaders = {'User-agent' : 'VWBot'}): while True: # Loop so that it will continue to retry until it connects to the server, handles error occasionally thrown by server try: req = Request(url, txdata, txheaders) handle = urlopen(req) except IOError, e: #print 'We failed to open "%s".' % url #if hasattr(e, 'code'): # print 'We failed with error code - %s.' % e.code #elif hasattr(e, 'reason'): # print "The error object has the following 'reason' attribute :", e.reason # print "This usually means the server doesn't exist, is down, or we don't have an internet connection." time.sleep(5) else: return handle.read() # handle.info() returns headers, handle.read() returns the page, handle.geturl() returns the true url of the page fetched (in case urlopen has followed any redirects) ### THIS DOES NOT ACCOUNT FOR QUERY-CONTINUE RESULTS, THESE MUST BE RE-QUERIED LATER def action (params): if 'url' in params: url = params['url'] del params['url'] else: url = 'http://en.wikipedia.org/w/api.php' while True: # Loop so that it passes all of the errors params['format'] = 'json' # An appropriate non-aggressive value is maxlag=5 (5 seconds), used by most of the server-side scripts. # Higher values mean more aggressive behaviour, lower values are nicer. #params['maxlag'] = 2 - impractical due to number params['assert'] = 'bot' # If we're trying to make an edit, get an edit token first and set the timestamps to recognize an edit conflict. if params['action'] == 'edit': page = action({'action': 'query', 'prop': 'info|revisions', 'intoken': 'edit', 'titles': params['title']}) params['token'] = page['query']['pages'][page['query']['pages'].keys()[0]]['edittoken'] params['starttimestamp'] = page['query']['pages'][page['query']['pages'].keys()[0]]['starttimestamp'] if 'revisions' in page['query']['pages'][page['query']['pages'].keys()[0]].keys(): # page exists params['basetimestamp'] = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['timestamp'] else: # page doesn't exist params['basetimestamp'] = params['starttimestamp'] page = json.loads(act(urllib.urlencode(params), url)) # log reply file = open(time.strftime('log %Y-%m-%d.txt', time.gmtime()), 'a') file.write(time.asctime(time.gmtime()) + '\t' + str(page) + '\n\n') file.close() # make sure we got a result if params['action'] in page.keys()[0]: # if 'continue' in params['action']: if params['action'] == 'edit': time.sleep(5) return page if page['error']['code'] == 'emptynewsection': return page # We've lagged: wait the duration of the lag (or a minimum of 5 seconds) and try again #if page['error']['code'] == 'maxlag': # time.sleep(max(5,string.atoi(page['error']['info'][page['error']['info'].find(':')+2:page['error']['info'].find('seconds')-1]))) # We've hit an edit conflict or some other unknown error. time.sleep(5) ####################### ##### @ 00:00 GMT ##### ####################### startTime = time.time() ##### 2-STEP LOGIN ##### def login(): page = action({'action': 'login', 'lgname': foo, 'lgpassword': bar}) page = action({'action': 'login', 'lgname': foo, 'lgpassword': bar, 'lgtoken': page['login']['token']}) if page['login']['result'] == 'Throttled': time.sleep(page['login']['wait']) login() login() ##### TASK 1 ##### # TASK 2 - backlogSCV() page = action({'action': 'edit', 'bot': 1, 'title': 'Wikipedia:Suspected copyright violations', 'appendtext': time.strftime('\n{{/%Y-%m-%d}}', time.gmtime()), 'section': 0, 'summary': time.strftime('Adding new listing for %-d %B %Y ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])', time.gmtime())}) ##### TASK 3 ##### page = action({'action': 'query', 'list': 'categorymembers', 'cmtitle': 'Category:Articles tagged for copyright problems', 'cmlimit': 'max'}) blankedPages = [] for i in page['query']['categorymembers']: blankedPages.append(i['title']) file = open('todayLogCopyvio', 'rb') # pull up the previous run alreadyBlankedPages = pickle.load(file) file.close() file = open('yesterdayLogCopyvio', 'wb') # overwrite yesterday's log with today's now that we have the change in articles pickle.dump(alreadyBlankedPages, file) file.close() file = open('todayLogCopyvio', 'wb') # save log so it can be compared to the next run pickle.dump(blankedPages, file) file.close() newBlankedPages = [] for x in blankedPages: if x not in alreadyBlankedPages: newBlankedPages.append(x) # now we have our list to run searches for for i in newBlankedPages: if i[:5] == 'File:': newBlankedPages.remove(i) # also need to report elsewhere - list at [[WP:PUF?]] ##### TASK 5 ##### page = action({'action': 'query', 'list': 'embeddedin', 'eititle': 'Template:Close paraphrasing', 'eilimit': 'max'}) closeParaphrases = [] for i in page['query']['embeddedin']: closeParaphrases.append(i['title']) file = open('todayLogCloseParaphrasing', 'rb') # pull up the previous run oldCloseParaphrases = pickle.load(file) file.close() file = open('yesterdayLogCloseParaphrasing', 'wb') # overwrite yesterday's log with today's now that we have the change in articles pickle.dump(oldCloseParaphrases, file) file.close() file = open('todayLogCloseParaphrasing', 'wb') # save log so it can be compared to the next run pickle.dump(closeParaphrases, file) file.close() newCloseParaphrases = [] for x in closeParaphrases: if x not in oldCloseParaphrases: newCloseParaphrases.append(x) # now we have our list to run searches for ##### TASK 10 ##### page = action({'action': 'query', 'list': 'embeddedin', 'eititle': 'Template:Copypaste', 'eilimit': 'max'}) copyPastes = [] for i in page['query']['embeddedin']: copyPastes.append(i['title']) file = open('todayLogCopypaste', 'rb') # pull up the previous run oldCopyPastes = pickle.load(file) file.close() file = open('yesterdayLogCopypaste', 'wb') # overwrite yesterday's log with today's pickle.dump(oldCopyPastes, file) file.close() file = open('todayLogCopypaste', 'wb') # save the new log so it can be compared to the next run tomorrow pickle.dump(copyPastes, file) file.close() newCopyPastes = [] for x in copyPastes: if x not in oldCopyPastes: newCopyPastes.append(x) # now we have our list to run searches for ####################### ##### @ 00:10 GMT ##### ####################### while time.time() - startTime < 600: # no earlier than 00:10 GMT time.sleep(600 - (time.time() - startTime)) # always update NewListings - this is only needed so Zorglbot doesn't screw up; has no actual effect page = action({'action': 'edit', 'bot': 1, 'title': 'Wikipedia:Copyright problems/NewListings', 'text': time.strftime('{{Wikipedia:Copyright problems/{{#time:Y F j|-7 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-6 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-5 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-4 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-3 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-2 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-1 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j}}}}<!--\n{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*168)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*144)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*120)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*96)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*72)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*48)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*24)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}', time.gmtime()), 'summary': time.strftime('Automatic addition of new listing for %-d %B %Y and archiving of listings older than 7 days ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])', time.gmtime())}) ####################### ##### @ 00:20 GMT ##### ####################### while time.time() - startTime < 1200: # no earlier than 00:20 GMT time.sleep(1200 - (time.time() - startTime)) ##### TASK 3 ##### p3 = re.compile('<!-- This is Cppage. Comment used by User:DumbBOT, do not remove or change -->') p4 = re.compile('====.*====') page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': time.strftime('Wikipedia:Copyright problems/%Y %B %-d', time.gmtime()), 'rvlimit': 1}) # group new page creation AND old page archival if 'missing' in page['query']['pages'][page['query']['pages'].keys()[0]]: # CREATE AND POPULATE "BOT: Automatic creation of new daily page for copyright problems" page = action({'action': 'edit', 'bot': 1, 'title': time.strftime('Wikipedia:Copyright problems/%Y %B %-d', time.gmtime()), 'text': '{{subst:Cppage}}\n<!-- Add new listings at the bottom of the list with the following format:\n\n* {{subst:article-cv|ArticleName}} from [http://www.WhereItWasCopiedFrom.com]. ~~~~\n\n-->\n', 'summary': 'Automatic creation of new daily page for copyright problems including automated findings ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])'}) page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': 'Wikipedia:Copyright problems', 'rvlimit': 1}) newtext = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*'].replace('\n\n===New listings===', time.strftime('\n{{Wikipedia:Copyright problems/%Y %B %-d}}\n\n===New listings===', time.gmtime(time.time()-60*60*192))) page = action({'action': 'edit', 'bot': 1, 'title': 'Wikipedia:Copyright problems', 'text': newtext.encode('utf-8'), 'summary': 'Automatic archiving of listings older than 7 days ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])'}) elif not re.search(p3, page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']): # POPULATE "adding CorenSearchBot findings" page = action({'action': 'edit', 'bot': 1, 'title': time.strftime('Wikipedia:Copyright problems/%Y %B %-d', time.gmtime()), 'text': page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*'].replace(re.search(p4, page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']).group(),'{{subst:Cppage}}'), 'summary': 'Adding automated findings ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])'}) ##### TASKS 3, 5, 7 and 10 ##### def isAlreadyListed(title): page = action({'action': 'query', 'list': 'backlinks', 'bltitle': title.encode('utf-8'), 'bllimit': 'max', 'blfilterredir': 'redirects'}) page['query']['backlinks'].append({'title': title}) for i in page['query']['backlinks']: page = action({'action': 'query', 'list': 'backlinks', 'bltitle': i['title'].encode('utf-8'), 'bllimit': 'max', 'blnamespace': '4'}) for j in page['query']['backlinks']: if 'Wikipedia:Copyright problems' == j['title'] or 'Wikipedia:Suspected copyright violations' == j['title'] or 'Wikipedia:Copyright problems/NewListings' == j['title']: return True return False # replace NewListings check with one for each of the 8 always-listed days ??? def shouldBeRelisted(title): page = action({'action': 'query', 'list': 'backlinks', 'bltitle': title.encode('utf-8'), 'bllimit': 'max', 'blfilterredir': 'redirects'}) page['query']['backlinks'].append({'title': title}) wasListed = False isListed = False for i in page['query']['backlinks']: page = action({'action': 'query', 'list': 'backlinks', 'bltitle': i['title'].encode('utf-8'), 'bllimit': 'max', 'blnamespace': '4'}) for j in page['query']['backlinks']: if 'Wikipedia:Suspected copyright violations/' in j['title'] or 'Wikipedia:Copyright problems/' in j['title']: wasListed = True if 'Wikipedia:Copyright problems' == j['title'] or 'Wikipedia:Suspected copyright violations' == j['title'] or 'Wikipedia:Copyright problems/NewListings' == j['title']: isListed = True if wasListed and not isListed: return True return False # replace NewListings check with one for each of the 8 always-listed days ??? addtext = '' p0 = re.compile('{{Close paraphras.*?}}', re.IGNORECASE | re.DOTALL) p1 = re.compile('{{Close paraphras.*?source.*?}}', re.IGNORECASE | re.DOTALL) # gets {{Close paraphrase}} and {{Close paraphrasing}} p1a = re.compile('\|\W*free\W*=\W*yes', re.IGNORECASE | re.DOTALL) # is source free? for i in newCloseParaphrases: if not isAlreadyListed(i): page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1}) if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]: pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*'] if re.search(p0, pageSource): # could be tag removed before it's analyzed temp = re.search(p0, pageSource).group() tag = re.search(p1, temp) if not re.search(p1a, temp): # only list at WP:CP if non-free if tag: if '|' in tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('='):]: addtext += '* {{subst:article-cv|:' + i + '}} Close paraphrase of ' + tag.group()[tag.group().find('source') +\ tag.group()[tag.group().find('source'):].find('=') + 1:tag.group().find('source') + tag.group()[tag.group().find('source'):].find('=') +\ tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('='):].find('|')].strip() + '. ~~~~\n' else: addtext += '* {{subst:article-cv|:' + i + '}} Close paraphrase of ' +\ tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('=') + 1:-2].strip() + '. ~~~~\n' else: addtext += '* {{subst:article-cv|:' + i + '}} Close paraphrase. ~~~~\n' moretext = '' p2 = re.compile('{{Copyviocore.*?}}', re.IGNORECASE | re.DOTALL) for i in newBlankedPages: if not isAlreadyListed(i): page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1}) if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]: pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*'] tag = re.search(p2, pageSource) if tag: if '|' in tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):]: moretext += '* {{subst:article-cv|:' + i + '}} from ' + tag.group()[tag.group().find('url') +\ tag.group()[tag.group().find('url'):].find('=') + 1:tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') +\ tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):].find('|')].strip() + '. Nomination completed by ~~~~\n' else: moretext += '* {{subst:article-cv|:' + i + '}} from ' +\ tag.group()[tag.group().find('url') + tag.group()[tag.group().find('source'):].find('=') + 1:-2].strip() + '. Nomination completed by ~~~~\n' else: moretext += '* {{subst:article-cv|:' + i + '}} Nomination completed by ~~~~\n' CopyPasteText = '' p5 = re.compile('{{Copy.?past.*?}}|{{Copy\s*\|.*?}}|{{Copy\s*}}', re.IGNORECASE | re.DOTALL) p6 = re.compile('{{Copy.?past.*?url.*?}}|{{Copy\s*\|.*?url.*?}}', re.IGNORECASE | re.DOTALL) for i in newCopyPastes: if not isAlreadyListed(i): page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1}) if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]: pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*'] if re.search(p5, pageSource): # could be tag removed before it's analyzed temp = re.search(p5, pageSource).group() tag = re.search(p6, temp) if tag: if '|' in tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):]: CopyPasteText += '* {{subst:article-cv|:' + i + '}} Copied and pasted from ' + tag.group()[tag.group().find('url') +\ tag.group()[tag.group().find('url'):].find('=') + 1:tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') +\ tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):].find('|')].strip() + '. ~~~~\n' else: CopyPasteText += '* {{subst:article-cv|:' + i + '}} Copied and pasted from ' +\ tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') + 1:-2].strip() + '. ~~~~\n' else: CopyPasteText += '* {{subst:article-cv|:' + i + '}} Copied and pasted. ~~~~\n' ### NOW FOR THE RELISTINGS ### evenmoretext = '' for i in blankedPages: if i in alreadyBlankedPages and shouldBeRelisted(i): # need to check alreadyBlankedPages as there is a delay between transclusion and backlinks page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1}) if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]: pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*'] tag = re.search(p2, pageSource) if tag: if '|' in tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):]: evenmoretext += '* {{subst:article-cv|:' + i + '}} from ' + tag.group()[tag.group().find('url') +\ tag.group()[tag.group().find('url'):].find('=') + 1:tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') +\ tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):].find('|')].strip() + '. Relisting. ~~~~\n' else: evenmoretext += '* {{subst:article-cv|:' + i + '}} from ' +\ tag.group()[tag.group().find('url') + tag.group()[tag.group().find('source'):].find('=') + 1:-2].strip() + '. Relisting. ~~~~\n' else: evenmoretext += '* {{subst:article-cv|:' + i + '}} Relisting. ~~~~\n' for i in copyPastes: if i in oldCopyPastes and shouldBeRelisted(i): page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1}) if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]: pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*'] temp = re.search(p5, pageSource).group() tag = re.search(p6, temp) if tag: if '|' in tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):]: CopyPasteText += '* {{subst:article-cv|:' + i + '}} Copied and pasted from ' + tag.group()[tag.group().find('url') +\ tag.group()[tag.group().find('url'):].find('=') + 1:tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') +\ tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):].find('|')].strip() + '. Relisting. ~~~~\n' else: evenmoretext += '* {{subst:article-cv|:' + i + '}} Copied and pasted from ' +\ tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') + 1:-2].strip() + '. Relisting. ~~~~\n' else: evenmoretext += '* {{subst:article-cv|:' + i + '}} Copied and pasted. Relisting. ~~~~\n' for i in closeParaphrases: if i in oldCloseParaphrases and shouldBeRelisted(i): # need to check alreadyBlankedPages as there is a delay between transclusion and backlinks page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1}) if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]: pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*'] temp = re.search(p0, pageSource).group() tag = re.search(p1, temp) if not re.search(p1a, temp): # only list at WP:CP if non-free if tag: if '|' in tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('='):]: evenmoretext += '* {{subst:article-cv|:' + i + '}} Close paraphrase of ' + tag.group()[tag.group().find('source') +\ tag.group()[tag.group().find('source'):].find('=') + 1:tag.group().find('source') + tag.group()[tag.group().find('source'):].find('=') +\ tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('='):].find('|')].strip() + '. Relisting. ~~~~\n' else: evenmoretext += '* {{subst:article-cv|:' + i + '}} Close paraphrase of ' +\ tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('=') + 1:-2].strip() + '. Relisting. ~~~~\n' else: evenmoretext += '* {{subst:article-cv|:' + i + '}} Close paraphrase. Relisting. ~~~~\n' #addtext should be CloseParaphraseText #moretext should be CopyvioText #evenmoretext should be RelistText editsum = '' if len(addtext) + len(moretext) + len(evenmoretext) + len(CopyPasteText): if len(addtext): if len(moretext): if len(evenmoretext): if len(CopyPasteText): editsum = 'Adding incomplete nominations, copy/pastes, close paraphrases and relisting overlooked pages' else: editsum = 'Adding incomplete nominations, close paraphrases and relisting overlooked pages' elif len(CopyPasteText): editsum = 'Adding incomplete nominations, copy/pastes and close paraphrases' else: editsum = 'Adding incomplete nominations and close paraphrases' elif len(evenmoretext): if len(CopyPasteText): editsum = 'Adding copy/pastes, close paraphrases and relisting overlooked pages' else: editsum = 'Adding close paraphrases and relisting overlooked pages' elif len(CopyPasteText): editsum = 'Adding copy/pastes and close paraphrases' else: editsum = 'Adding close paraphrases' elif len(moretext): if len(evenmoretext): if len(CopyPasteText): editsum = 'Adding incomplete nominations, copy/pastes and relisting overlooked pages' else: editsum = 'Adding incomplete nominations and relisting overlooked pages' elif len(CopyPasteText): editsum = 'Adding incomplete nominations and copy/pastes' else: editsum = 'Adding incomplete nominations' elif len(evenmoretext): if len(CopyPasteText): editsum = 'Adding copy/pastes and relisting overlooked pages' else: editsum = 'Relisting overlooked pages' else: editsum = 'Adding copy/pastes' if len(editsum): page = action({'action': 'edit', 'bot': 1, 'title': time.strftime('Wikipedia:Copyright problems/%Y %B %-d', time.gmtime(time.time()-60*60*24)), 'appendtext': (u'\n' + moretext + CopyPasteText + addtext + evenmoretext).encode('utf-8'), 'section': 2, 'summary': editsum + ' ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])'}) ############################ ##### USERSPACE TRIALS ##### ############################ ##### TASK 4: notify authors that their pages have been blanked (by {{subst:copyvio}}) in case they aren't notified by the taggers, so that the pages don't get relisted for an extra week without any action being taken on them ##### def doNotify(title): page = action({'action': 'query', 'list': 'backlinks', 'bltitle': title.encode('utf-8'), 'bllimit': 'max', 'prop': 'revisions|info', 'rvprop': 'timestamp|user', 'rvdir': 'newer', 'titles': title.encode('utf-8'), 'rvlimit': 1, 'blredirect': 1}) # get backlinks and creation time/user as well as info to determine if it's deleted if 'missing' in page['query']['pages'][page['query']['pages'].keys()[0]]: return "'''Do Nothing''' Article has been deleted." for i in page['query']['backlinks']: # check for CCIs if i['title'][:47] == 'Wikipedia:Contributor copyright investigations/': return "'''Do Nothing''' [[" + i['title'] + '|CCI]]' elif i['title'][:14] == 'Wikipedia:CCI/': return "'''Do Nothing''' [[" + i['title'] + '|CCI]]' if 'redirlinks' in i: for j in i['redirlinks']: if j['title'][:47] == 'Wikipedia:Contributor copyright investigations/': return "'''Do Nothing''' [[" + j['title'] + '|CCI]]' elif j['title'][:14] == 'Wikipedia:CCI/': return "'''Do Nothing''' [[" + j['title'] + '|CCI]]' for i in page['query']['backlinks']: # parse talk pages to see if already notified if i['title'][:10] == 'User talk:': page2 = action({'action': 'parse', 'page': i['title'], 'prop': 'sections'}) for j in page2['parse']['sections']: if j['line'] == 'Copyright problem: ' + title: # need to see if it matches a redirect title too... :( return "'''Do Nothing''' " + i['title'][10:] + ' already notified' page = action({'action': 'query', 'prop': 'categories', 'clcategories': 'Category:Items pending OTRS confirmation of permission|Category:Wikipedia pages with unconfirmed permission received by OTRS|Category:Wikipedia files with unconfirmed permission received by OTRS|Category:Items with OTRS permission confirmed', 'titles': 'Talk:'+title.encode('utf-8')}) if 'categories' in page['query']['pages'][page['query']['pages'].keys()[0]]: return "'''Do Nothing''' OTRS tag" page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'ids|user', 'titles': title.encode('utf-8'), 'rvlimit': 'max'}) articleRevisionIDs = [] for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']: articleRevisionIDs.append(i['revid']) revisionMatch = [] latest = '' for i in articleRevisionIDs: page = action({'action': 'query', 'prop': 'revisions', 'rvstartid': i, 'rvprop': 'content|user|timestamp', 'titles': title.encode('utf-8'), 'rvlimit': 1}) if i == articleRevisionIDs[0]: # maybe ??? tagger = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['user'] # maybe ??? tagtime = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['timestamp'] # maybe ?? if '*' in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0].keys(): # ignore deleted revisions if latest == '': latest = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*'] if '{{Copyviocore' in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']: tagger = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['user'] tagtime = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['timestamp'] revisionMatch.append(difflib.SequenceMatcher(None, latest[latest.find('<!-- Do not use the "Copyviocore" template directly; the above line is generated by "subst:Copyvio|url" -->\n')+108:latest.find('</div>')], page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']).ratio()) diffRevisionMatch = [] for i in range(len(revisionMatch)): if i < len(revisionMatch)-1: diffRevisionMatch.append(round(revisionMatch[i]-revisionMatch[i+1], 6)) else: diffRevisionMatch.append(round(revisionMatch[i], 6)) page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': title.encode('utf-8'), 'rvlimit': 1, 'rvstartid': articleRevisionIDs[[i for i, x in enumerate(diffRevisionMatch) if x == max(diffRevisionMatch)][0]]}) contributor = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['user'] # CHECK FOR CUSTOM NOTIFICATION #tagger at User talk:contributor > tagtime page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': 'User talk:' + contributor.encode('utf-8'), 'rvend': tagtime, 'rvlimit': 'max'}) if 'revisions' in page['query']['pages'][page['query']['pages'].keys()[0]]: for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']: if i['user'] == tagger: return "'''Do Nothing''' " + contributor + ' was left a custom notification' #contributor at Talk:Article/Temp page > tagtime page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': 'Talk:' + title.encode('utf-8') + '/Temp', 'rvend': tagtime, 'rvlimit': 'max'}) if 'revisions' in page['query']['pages'][page['query']['pages'].keys()[0]]: for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']: if i['user'] == contributor: return "'''Do Nothing''' " + contributor + ' created the temporary page' #contributor at Talk:Article > tagtime page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': 'Talk:' + title.encode('utf-8'), 'rvend': tagtime, 'rvlimit': 'max'}) if 'revisions' in page['query']['pages'][page['query']['pages'].keys()[0]]: for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']: if i['user'] == contributor: return "'''Do Nothing''' " + contributor + ' edited the article talk page after it was tagged' #contributor at Article > tagtime page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': title.encode('utf-8'), 'rvend': tagtime, 'rvlimit': 'max'}) if 'revisions' in page['query']['pages'][page['query']['pages'].keys()[0]]: for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']: if i['user'] == contributor: return "'''Do Nothing''' " + contributor + ' edited the article after it was tagged' return "'''Notify contributor''': """ + contributor + ' - tagged by ' + tagger #narrowing with 'blnamespace': '3|4' breaks the blredirect parameter # BETTER BUGFIX - try narrowed backlinks, then get list of redirects ONLY, then get backlinks for each redirect # look for 'Copyright problem: <title or redirect>' # list of all blanked pages article = '' for i in newBlankedPages: article += '*[[:' + i + ']] - ' + doNotify(i) + '\n' page = action({'action': 'edit', 'bot': 1, 'title': 'User:VWBot/Trial', 'text': (article + '\n').encode('utf-8'), 'section': 'new', 'summary': time.strftime('== %-d %B %Y ==', time.gmtime())}) ##### TASK 6: flag when a contributor gets a CorenSearchBot/VWBot notice if he has had a significant amount before ##### # CSBot's user talk contribs from 00:00:00 to 23:59:59 the previous day page = action({'action': 'query', 'list': 'usercontribs', 'ucuser': 'CorenSearchBot', 'uclimit': 'max', 'ucstart': time.strftime('%Y-%m-%dT23:59:59Z', time.gmtime(time.time()-60*60*24)), 'ucend': time.strftime('%Y-%m-%dT00:00:00Z', time.gmtime(time.time()-60*60*24)), 'ucnamespace': '3'}) users = {} for i in page['query']['usercontribs']: users[i['title']] = [] # VWBot's user talk contribs from 00:00:00 to 23:59:59 the previous day page = action({'action': 'query', 'list': 'usercontribs', 'ucuser': 'VWBot', 'uclimit': 'max', 'ucstart': time.strftime('%Y-%m-%dT23:59:59Z', time.gmtime(time.time()-60*60*24)), 'ucend': time.strftime('%Y-%m-%dT00:00:00Z', time.gmtime(time.time()-60*60*24)), 'ucnamespace': '3'}) for i in page['query']['usercontribs']: users[i['title']] = [] for i in ['Merovingian', u'Leszek Jańczuk', 'Ganeshbot', 'Starzynka', 'Ser Amantio di Nicolao', 'Kumioko', 'Packerfansam', 'Alan Liefting']: try: del users['User talk:' + i] except: pass for user in users.keys(): # only checks last 5,000 edits page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'comment|timestamp|user', 'titles': user.encode('utf-8'), 'rvlimit': 'max'}) for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']: if 'user' in i: # needed because RevDelete can return edits with no user field...apparently if i['user'] == 'VWBot' or i['user'] == 'CorenSearchBot': users[user].append([i['comment'][i['comment'].find('on')+3:], time.strftime('%Y %B %-d', time.strptime(i['timestamp'],'%Y-%m-%dT%H:%M:%SZ'))]) addition = u'' for user in users.keys(): if len(users[user]) > 4: addition += '\n==== ' + str(len(users[user])) + ': {{User|1=' + user[10:] + '}} ====\n{{Collapse top|Tagged articles}}\n' for i in users[user]: addition += '* {{subst:article-cv|' + i[0] + '}} created on ' + i[1] + '\n' addition += '{{Collapse bottom}}\n' if len(addition): page = action({'action': 'edit', 'bot': 1, 'title': 'User:VWBot/Trial', 'appendtext': (u'\n\n=== Task 6 ===' + addition).encode('utf-8'), 'summary': 'Listing users who have had multiple articles tagged by CorenSearchBot/VWBot ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])'})