User:BogBot/Source code/Task 05

#!/usr/bin/python
# -*- coding: UTF-8 -*-

# Bot Script to substitute:
# transcluded {{tl|cite pmid}} and {{tl|cite doi}} with in-line {{tl|cite journal}} and
# {{tl|cite isbn}} with {{tl|cite book}} templates.
# If the predominate citation template style is horizontal (template on one line),
# script will replace any vertically formatted templates (template on many lines) with
# horizontally formatted templates.

import codecs
import re
import urllib
import mwparserfromhell
import wikipedia
from stdnum import ean
from stdnum.exceptions import *
from stdnum.util import clean
import subprocess
import sys
from Bio import Entrez

# compiled regular expression

user =  "BogBot"
regexp_ab               = re.compile(r'\{\{(nobots|bots\|(allow=none|deny=.*?' + user + r'.*?|optout=all|deny=all))\}\}')
regexp_passed_parameter = re.compile(r"\{\{\{\s*(?P<PKEY>\S*)\s*\|\s*(?P<PVALUE>\S*)\s*\}\}\}")
regexp_redirect         = re.compile(r"#REDIRECT\[\[(?P<RDIRECT>.*?)\]\]")

def Allowbots(text):
    if (regexp_ab.search(text)):
        return False
    return True

def savepage(page, text, summary = '', minor = False, log_string = ""):
        """Save text to a page and log exceptions."""
        if summary != '':
                wikipedia.setAction(summary)
        try:
                page.put(text, minorEdit = minor)
                wikipedia.output('%s  \03{green}saving %s' % (log_string, page.title()) )
                return ''
        except wikipedia.LockedPage:
                wikipedia.output('%s    \03{red}cannot save %s because it is locked\03{default}' % (log_string, page.title()) )
                return '# %s: page was locked\n' % page.aslink()
        except wikipedia.EditConflict:
                wikipedia.output('%s    \03{red}cannot save %s because of edit conflict\03{default}' % (log_string, page.title()) )
                return '# %s: edit conflict occurred\n' % page.aslink()
        except wikipedia.SpamfilterError, error:
                wikipedia.output('%s    \03{red}cannot save %s because of spam blacklist entry %s\03{default}' % ((log_string, page.title(), error.url)) )
                return '# %s: spam blacklist entry\n' % page.aslink()
        except:
                wikipedia.output('%s    \03{red}unknown error on saving %s\03{default}' % (log_string, page.title()) )
                return '# %s: unknown error occurred\n' % page.aslink()

# isbn.py - functions for handling ISBNs
#
# Copyright (C) 2010, 2011, 2012, 2013 Arthur de Jong
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301 USA

"""ISBN (International Standard Book Number).

The ISBN is the International Standard Book Number, used to identify
publications. This module supports both numbers in ISBN-10 (10-digit) and
ISBN-13 (13-digit) format.

>>> validate('978-9024538270')
'9789024538270'
>>> validate('978-9024538271')
Traceback (most recent call last):
    ...
InvalidChecksum: ...
>>> compact('1-85798-218-5')
'1857982185'
>>> format('9780471117094')
'978-0-471-11709-4'
>>> format('1857982185')
'1-85798-218-5'
>>> isbn_type('1-85798-218-5')
'ISBN10'
>>> isbn_type('978-0-471-11709-4')
'ISBN13'
>>> to_isbn13('1-85798-218-5')
'978-1-85798-218-3'
>>> to_isbn10('978-1-85798-218-3')
'1-85798-218-5'
"""

def compact(number, convert=False):
    """Convert the ISBN to the minimal representation. This strips the number
    of any valid ISBN separators and removes surrounding whitespace. If the
    covert parameter is True the number is also converted to ISBN-13
    format."""
    number = clean(number, ' -').strip().upper()
    if len(number) == 9:
        number = '0' + number
    if convert:
        return to_isbn13(number)
    return number

def _calc_isbn10_check_digit(number):
    """Calculate the ISBN check digit for 10-digit numbers. The number passed
    should not have the check bit included."""
    check = sum((i + 1) * int(n)
                for i, n in enumerate(number)) % 11
    return 'X' if check == 10 else str(check)

def validate(number, convert=False):
    """Checks to see if the number provided is a valid ISBN (either a legacy
    10-digit one or a 13-digit one). This checks the length and the check
    bit but does not check if the group and publisher are valid (use split()
    for that)."""
    number = compact(number, convert=False)
    if not number[:-1].isdigit():
        raise InvalidFormat()
    if len(number) == 10:
        if _calc_isbn10_check_digit(number[:-1]) != number[-1]:
            raise InvalidChecksum()
    elif len(number) == 13:
        ean.validate(number)
    else:
        raise InvalidLength()
    if convert:
        number = to_isbn13(number)
    return number

def isbn_type(number):
    """Check the passed number and returns 'ISBN13', 'ISBN10' or None (for
    invalid) for checking the type of number passed."""
    try:
        number = validate(number, convert=False)
    except ValidationError:
        return None
    if len(number) == 10:
        return 'ISBN10'
    elif len(number) == 13:
        return 'ISBN13'

def is_valid(number):
    """Checks to see if the number provided is a valid ISBN (either a legacy
    10-digit one or a 13-digit one). This checks the length and the check
    bit but does not check if the group and publisher are valid (use split()
    for that)."""
    try:
        return bool(validate(number))
    except ValidationError:
        return False

def to_isbn13(number):
    """Convert the number to ISBN-13 format."""
    number = number.strip()
    min_number = compact(number, convert=False)
    if len(min_number) == 13:
        return number  # nothing to do, already ISBN-13
    # put new check digit in place
    number = number[:-1] + ean.calc_check_digit('978' + min_number[:-1])
    # add prefix
    if ' ' in number:
        return '978 ' + number
    elif '-' in number:
        return '978-' + number
    else:
        return '978' + number

def to_isbn10(number):
    """Convert the number to ISBN-10 format."""
    number = number.strip()
    min_number = compact(number, convert=False)
    if len(min_number) == 10:
        return number  # nothing to do, already ISBN-13
    elif isbn_type(min_number) != 'ISBN13':
        raise InvalidFormat('Not a valid ISBN13.')
    elif not number.startswith('978'):
        raise InvalidFormat('Does not use 978 Bookland prefix.')
    # strip EAN prefix
    number = number[3:-1].strip().strip('-')
    digit = _calc_isbn10_check_digit(min_number[3:-1])
    # append the new check digit
    if ' ' in number:
        return number + ' ' + digit
    elif '-' in number:
        return number + '-' + digit
    else:
        return number + digit

def split(number, convert=False):
    """Split the specified ISBN into an EAN.UCC prefix, a group prefix, a
    registrant, an item number and a check-digit. If the number is in ISBN-10
    format the returned EAN.UCC prefix is '978'. If the covert parameter is
    True the number is converted to ISBN-13 format first."""
    from stdnum import numdb
    # clean up number
    number = compact(number, convert)
    # get Bookland prefix if any
    delprefix = False
    if len(number) == 10:
        number = '978' + number
        delprefix = True
    # split the number
    result = numdb.get('isbn').split(number[:-1])
    itemnr = result.pop() if result else ''
    prefix = result.pop(0) if result else ''
    group = result.pop(0) if result else ''
    publisher = result.pop(0) if result else ''
    # return results
    return ('' if delprefix else prefix, group, publisher, itemnr, number[-1])

def format(number, separator='-', convert=False):
    """Reformat the passed number to the standard format with the EAN.UCC
    prefix (if any), the group prefix, the registrant, the item number and
    the check-digit separated (if possible) by the specified separator.
    Passing an empty separator should equal compact() though this is less
    efficient. If the covert parameter is True the number is converted to
    ISBN-13 format first."""
    return separator.join(x for x in split(number, convert) if x)

def to_unicode(obj, encoding='utf-8'):
    if isinstance(obj, basestring):
        if not isinstance(obj, unicode):
            obj = unicode(obj, encoding)
    return obj

def substitute(wikicode, template, replacement_template, type, pass_through_params):

#    print "pass_through_params: ", pass_through_params

    Entrez.email = 'boghog@me.com'

    templatecode = mwparserfromhell.parse(replacement_template)
    replacement_template_code = templatecode.filter_templates()[0]
    params = replacement_template_code.params
                
    dict = {}
    pt_dict = {}
    new_template = to_unicode("{{")
    new_template = new_template + type + " "
                                         
    for param in params:
        (key, value) = param.split("=",1)
        dict[key.strip()] = value.strip()

    for param in pass_through_params:
        (key, value) = param.split("=",1)
        pt_dict[key.strip()] = value.strip()

    if 'author' in dict.keys():
        author = dict['author']
        del dict['author']
        new_template = new_template + " | author = " + to_unicode(author)

    if 'authors' in dict.keys():
        authors = dict['authors']
        del dict['authors']
        new_template = new_template + " | authors = " + to_unicode(authors)

    first = {}
    last = {}

    for key, value in dict.iteritems():   # iter on both keys and values
        if key.startswith('first'):
            index = key[5:]
            first[index] = to_unicode(value)
        if key.startswith('last'):
            index = key[4:]
            last[index] = to_unicode(value)
                        
    if last:
        n_authors = int(max(last.iterkeys())) + 1
    else:
        n_authors = 0
                
    for i in range(1,n_authors):
        lastn  = "last"  + unicode(i)
        firstn = "first" + unicode(i)
        new_template = new_template + " | " + lastn  + " = " + last[unicode(i)]
        del dict[lastn]
        try:
            new_template = new_template + " | " + firstn + " = " + first[unicode(i)]
            del dict[firstn]
        except:
            new_template = new_template + " | " + firstn + " = "
                           
    if 'title' in dict.keys():
        title = dict['title']
        del dict['title']
        new_template = new_template + " | title = " + to_unicode(title)
                    
    if 'language' in dict.keys():
        language = dict['language']
        del dict['language']
        new_template = new_template + " | language = " + to_unicode(language)
                                   
    if 'journal' in dict.keys():
        journal = dict['journal']
        del dict['journal']
        new_template = new_template + " | journal = " + to_unicode(journal)

    if 'volume' in dict.keys():
        volume = dict['volume']
        del dict['volume']
        new_template = new_template + " | volume = " + to_unicode(volume)

    if 'issue' in dict.keys():
        issue = dict['issue']
        del dict['issue']
        new_template = new_template + " | issue = " + to_unicode(issue)

    if 'pages' in dict.keys():
        pages = dict['pages']
        result = regexp_passed_parameter.search(pages)
        if result:
            pages = result.group('PVALUE')
        del dict['pages']
        if pages and not 'pages' in pt_dict:
            new_template = new_template + to_unicode(" | pages = " + pages)

    if 'page' in dict.keys():
        page = dict['page']
        result = regexp_passed_parameter.search(page)
        if result:
            page = result.group('PVALUE')
        del dict['page']
        if page and not 'page' in pt_dict:
            new_template = new_template + to_unicode(" | page = " + page)

    if 'year' in dict.keys():
        year = dict['year']
        result = regexp_passed_parameter.search(year)
        if result:
            year = result.group('PVALUE')
        del dict['year']
        if year and not 'year' in pt_dict:
            new_template = new_template + " | year = " + to_unicode(year)

    if 'date' in dict.keys():
        date = dict['date']
        del dict['date']
        if date:
            new_template = new_template + " | date = " + to_unicode(date)

    if ('doi' in dict.keys() and'pmid' not in dict.keys()):
        try:
            doi = dict['doi']
            handle = Entrez.esearch("pubmed", term=doi, field = "doi")
            record = Entrez.read(handle)
            pmid = record["IdList"][0]
            dict['pmid'] = pmid
        except:
            pass

    if 'pmid' in dict.keys():
        pmid = dict['pmid']
        del dict['pmid']
        new_template = new_template + " | pmid = " + to_unicode(pmid)

    if 'pmc' in dict.keys():
        pmc = dict['pmc']
        del dict['pmc']
        new_template = new_template + " | pmc = " + to_unicode(pmc)

    if 'doi' in dict.keys():
        doi = dict['doi']
        del dict['doi']
        new_template = new_template + " | doi = " + to_unicode(doi)

    if 'url' in dict.keys():
        url = dict['url']
        del dict['url']
        new_template = new_template + " | url = " + to_unicode(url)
                
    if 'ref' in dict.keys():
        ref = dict['ref']
        result = regexp_passed_parameter.search(ref)
        if result:
            ref = result.group('PVALUE')
        del dict['ref']
        if ref and not 'ref' in pt_dict:
            new_template = new_template + " | ref = " + to_unicode(ref)
                
    for key, value in dict.iteritems():
        new_template = new_template + " | " + to_unicode(key) + " = " + to_unicode(value)
                
    for key, value in pt_dict.iteritems():
        new_template = new_template + " | " + to_unicode(key) + " = " + to_unicode(value)

    new_template = new_template + " }}"
    new_template = to_unicode(new_template)

    templatecode = mwparserfromhell.parse(new_template)
    new_template = templatecode.filter_templates()[0]

#   print template
#   print replacement_template
#   print new_template

    wikicode.replace(template, new_template, recursive=True)

    return (wikicode, new_template)

def v2h(wikicode, template):

    params = template.params

    new_template = to_unicode("{{")
    new_template = new_template + template.name.strip()
                                         
    for param in params:
        (key, value) = param.split("=",1)
        new_template = new_template + " | " + key.strip() + " = " + value.strip()

    new_template = new_template + " }}"
    new_template = to_unicode(new_template)

    templatecode = mwparserfromhell.parse(new_template)
    new_template = templatecode.filter_templates()[0]

    wikicode.replace(template, new_template, recursive=True)

    return (wikicode, new_template)

def replace_coauthor(wikicode, template):

    Entrez.email = 'boghog@me.com'

    new_template = template

    try:
        pmid = new_template.get("pmid").value.strip()
    except ValueError:
        pmid = ""

    try:
        new_template.remove("author")
    except:
        pass

    try:
        new_template.remove("authors")
    except:
        pass

    try:
        new_template.remove("coauthors")
    except:
        pass

    try:
        new_template.remove("first")
    except:
        pass

    try:
        new_template.remove("last")
    except:
        pass

    forever = True
    index = 1
    while forever:
        lastn = "last" + str(index)
        try:
            new_template.remove(lastn)
            index += 1
        except:
            forever = False
        
    forever = True
    index = 1
    while forever:
        firstn = "first" + str(index)
        try:
            new_template.remove(firstn)
            index += 1
        except:
            forever = False
        
    forever = True
    index = 1
    while forever:
        authorn = "author" + str(index)
        try:
            new_template.remove(authorn)
            index += 1
        except:
            forever = False
        
    Vancouver_authors = ""
    authors = ""

    if pmid:
        print "pmid: ", pmid
        handle = Entrez.efetch("pubmed", id=str(pmid), retmode="xml")
        records = Entrez.parse(handle)
        for record in records:
            try:
                authors = record['MedlineCitation']['Article']['AuthorList']
                for author in authors:
                    try:
                        Vancouver_authors = Vancouver_authors + author['LastName'] + " " + author['Initials'] + ", "
                    except:
                        pass
                authors = Vancouver_authors.strip(', ') + " "
            except:
                pass

    new_template.add("author", to_unicode(authors))

    params = new_template.params
                
    dict = {}
    type = "cite journal"
    new_template = to_unicode("{{")
    new_template = new_template + type + " "
                                         
    for param in params:
        (key, value) = param.split("=",1)
        dict[key.strip()] = value.strip()

    type = "cite journal"
    new_template = to_unicode("{{")
    new_template = new_template + type + " "

    if 'author' in dict.keys():
        author = dict['author']
        del dict['author']
        new_template = new_template + " | author = " + to_unicode(author)

    if 'authors' in dict.keys():
        authors = dict['authors']
        del dict['authors']
        new_template = new_template + " | authors = " + to_unicode(authors)

    if 'title' in dict.keys():
        title = dict['title']
        del dict['title']
        new_template = new_template + " | title = " + to_unicode(title)
                    
    if 'language' in dict.keys():
        language = dict['language']
        del dict['language']
        new_template = new_template + " | language = " + to_unicode(language)
                                   
    if 'journal' in dict.keys():
        journal = dict['journal']
        del dict['journal']
        new_template = new_template + " | journal = " + to_unicode(journal)

    if 'volume' in dict.keys():
        volume = dict['volume']
        del dict['volume']
        new_template = new_template + " | volume = " + to_unicode(volume)

    if 'issue' in dict.keys():
        issue = dict['issue']
        del dict['issue']
        new_template = new_template + " | issue = " + to_unicode(issue)

    if 'pages' in dict.keys():
        pages = dict['pages']
        result = regexp_passed_parameter.search(pages)
        if result:
            pages = result.group('PVALUE')
        del dict['pages']
        if pages:
            new_template = new_template + to_unicode(" | pages = " + pages)

    if 'page' in dict.keys():
        page = dict['page']
        result = regexp_passed_parameter.search(page)
        if result:
            page = result.group('PVALUE')
        del dict['page']
        if page:
            new_template = new_template + to_unicode(" | page = " + page)

    if 'year' in dict.keys():
        year = dict['year']
        result = regexp_passed_parameter.search(year)
        if result:
            year = result.group('PVALUE')
        del dict['year']
        if year:
            new_template = new_template + " | year = " + to_unicode(year)

    if 'date' in dict.keys():
        date = dict['date']
        del dict['date']
        if date:
            new_template = new_template + " | date = " + to_unicode(date)

    if ('doi' in dict.keys() and'pmid' not in dict.keys()):
        try:
            doi = dict['doi']
            handle = Entrez.esearch("pubmed", term=doi, field = "doi")
            record = Entrez.read(handle)
            pmid = record["IdList"][0]
            dict['pmid'] = pmid
        except:
            pass

    if 'pmid' in dict.keys():
        pmid = dict['pmid']
        del dict['pmid']
        new_template = new_template + " | pmid = " + to_unicode(pmid)

    if 'pmc' in dict.keys():
        pmc = dict['pmc']
        del dict['pmc']
        new_template = new_template + " | pmc = " + to_unicode(pmc)

    if 'doi' in dict.keys():
        doi = dict['doi']
        del dict['doi']
        new_template = new_template + " | doi = " + to_unicode(doi)

    if 'url' in dict.keys():
        url = dict['url']
        del dict['url']
        new_template = new_template + " | url = " + to_unicode(url)
                
    if 'ref' in dict.keys():
        ref = dict['ref']
        result = regexp_passed_parameter.search(ref)
        if result:
            ref = result.group('PVALUE')
        del dict['ref']
        if ref:
            new_template = new_template + " | ref = " + to_unicode(ref)
                
    for key, value in dict.iteritems():
        new_template = new_template + " | " + to_unicode(key) + " = " + to_unicode(value)
                
    new_template = new_template + " }}"
    new_template = to_unicode(new_template)

    templatecode = mwparserfromhell.parse(new_template)
    new_template = templatecode.filter_templates()[0]

    wikicode.replace(template, new_template, recursive=True)

    return (wikicode, new_template)

def predominate(wikicode):

#   check if in-line and horizontally formatted templates predominate

    cite_journal   = 0
    cite_book      = 0
    cite_pmid      = 0
    cite_doi       = 0
    cite_isbn      = 0

    cite_journal_h = 0
    cite_book_h    = 0
    cite_journal_v = 0
    cite_book_v    = 0

    vanc           = 0

    templates = wikicode.filter_templates()

    for template in templates:

        if (template.name.strip() == "cite journal" or template.name == "Cite journal"):
            cite_journal += 1
            if not template.count('\n'): cite_journal_h += 1
            params = template.params
            for param in params:
                (key, value) = param.split("=",1)
                key = key.strip()
                if key == "author":
                    vanc += 1
                    continue
                
        if (template.name.strip() == "cite book" or template.name == "Cite book"):
            cite_book += 1
            if not template.count('\n'): cite_book_h    += 1 

        if (template.name.strip() == "cite pmid" or template.name == "Cite pmid"):
            cite_pmid += 1

        if (template.name.strip() == "cite doi" or template.name == "Cite doi"):
            cite_doi += 1

        if (template.name.strip() == "cite isbn" or template.name == "Cite isbn"):
            cite_isbn += 1

    if (cite_journal + cite_book > cite_pmid + cite_doi + cite_isbn):
        in_line = True
    else:
        in_line = False
    
    cite_journal_v = cite_journal - cite_journal_h
    cite_book_v    = cite_book    - cite_book_h

    cite_h = cite_journal_h + cite_book_h
    cite_v = cite_journal_v + cite_book_v

#    print "journal: ", cite_journal, cite_journal_h, cite_journal_v
#    print "book:    ", cite_book, cite_book_h, cite_book_v
#    print "total:   ", cite_h, cite_v

    if (cite_h > cite_v):
        horizontal = True
    else:
        horizontal = False

    print vanc, cite_journal
    if (vanc/float(cite_journal) > 0.5):
        vanc_authors = True
    else:
        vanc_authors = False    

    return (in_line, horizontal, vanc_authors)

def savepage(page, text, summary = '', minor = False, log_string = ""):
        """Save text to a page and log exceptions."""
        if summary != '':
                wikipedia.setAction(summary)
        try:
                page.put(text, minorEdit = minor)
                wikipedia.output('%s  \03{green}saving %s' % (log_string, page.title()) )
                return ''
        except wikipedia.LockedPage:
                wikipedia.output('%s    \03{red}cannot save %s because it is locked\03{default}' % (log_string, page.title()) )
                return '# %s: page was locked\n' % page.aslink()
        except wikipedia.EditConflict:
                wikipedia.output('%s    \03{red}cannot save %s because of edit conflict\03{default}' % (log_string, page.title()) )
                return '# %s: edit conflict occurred\n' % page.aslink()
        except wikipedia.SpamfilterError, error:
                wikipedia.output('%s    \03{red}cannot save %s because of spam blacklist entry %s\03{default}' % ((log_string, page.title(), error.url)) )
                return '# %s: spam blacklist entry\n' % page.aslink()
        except:
                wikipedia.output('%s    \03{red}unknown error on saving %s\03{default}' % (log_string, page.title()) )
                return '# %s: unknown error occurred\n' % page.aslink()

def run():

    articles = []
#    articles = codecs.open('/boghog/progs/compat/top_1500_med_articles.txt', mode = 'r', encoding='utf-8')

    articles = ['User:Boghog/Sandbox9']

    for article in articles:

        article = article.rstrip('\n')
        log_string = "* [[" + article + "]], " 

        encoding = 'utf-8'
        if isinstance(article, basestring):
            if not isinstance(article, unicode):
                article = unicode(article, encoding)

        site = wikipedia.getSite()
        page = wikipedia.Page(site, article)
        text = page.get(get_redirect = True)

        if not Allowbots(text):
            continue
        
        wikicode = mwparserfromhell.parse(text)
        templates = wikicode.filter_templates()

        in_line    = False
        horizontal = False

        (in_line, horizontal, vanc_authors) = predominate(wikicode)
        print in_line, horizontal, vanc_authors

        if not in_line:
            log_string += " skipped because in-line not predominate"
            continue

        for template in templates:

            if (template.name.strip() == "cite pmid" or template.name.strip() == "Cite pmid"):
                pass_through_params = template.params[1:]
                pmid = template.get(1).value
                template_name = "Template:Cite pmid/" + str(pmid)
                replacement_template = wikipedia.Page(site, template_name)
                try:
                    replacement_template = replacement_template.get(get_redirect = True)
                except:
                    print "could not open: ", replacement_template
                    continue
                result = regexp_redirect.search(replacement_template)
                if result:
                    replacement_template = result.group('RDIRECT')
                    replacement_template = wikipedia.Page(site, replacement_template)
                    try:
                        replacement_template = replacement_template.get(get_redirect = True)
                    except:
                        print "could not open: ", replacement_template
                        continue
                type = "cite journal"
                (wikicode, template) = substitute(wikicode, template, replacement_template, type, pass_through_params)
                if (vanc_authors and template.count('pmid')):
                    (wikicode, template) = replace_coauthor(wikicode, template)
                continue

            if (template.name.strip() == "cite doi" or template.name.strip() == "Cite doi"):
                pass_through_params = template.params[1:]
                doi = str(template.get(1).value)
                template_name = urllib.quote(doi)
                template_name = re.sub('%', '.', template_name)
                template_name = re.sub('/', '.2F', template_name)
                replacement_template = "Template:Cite doi/" + str(template_name)
                replacement_template = wikipedia.Page(site, replacement_template)
                try:
                    replacement_template = replacement_template.get(get_redirect = True)
                except:
                    print "could not open: ", replacement_template
                    continue
                result = regexp_redirect.search(replacement_template)
                if result:
                    replacement_template = result.group('RDIRECT')
                    replacement_template = wikipedia.Page(site, replacement_template)
                    try:
                        replacement_template = replacement_template.get(get_redirect = True)
                    except:
                        print "could not open: ", replacement_template
                        continue
                type = "cite journal"
                (wikicode, template) = substitute(wikicode, template, replacement_template, type, pass_through_params)
                if (vanc_authors and template.count('pmid')):
                    (wikicode, template) = replace_coauthor(wikicode, template)
                continue
                
            if (template.name.strip() == "cite isbn" or template.name.strip() == "Cite isbn"):
                pass_through_params = template.params[1:]
                isbn = template.get(1).value
                isbn = str(compact(isbn, convert=True))
                isbn = isbn[:-1]
                template_name = "Template:Cite isbn/" + isbn
                replacement_template = wikipedia.Page(site, template_name)
                try:
                    replacement_template = replacement_template.get(get_redirect = True)
                except:
                    print "could not open: ", replacement_template
                    continue
                result = regexp_redirect.search(replacement_template)
                if result:
                    replacement_template = result.group('RDIRECT')
                    replacement_template = wikipedia.Page(site, replacement_template)
                    try:
                        replacement_template = replacement_template.get(get_redirect = True)
                    except:
                        print "could not open: ", replacement_template
                        continue
                type = "cite book"
                (wikicode, template) = substitute(wikicode, template, replacement_template, type, pass_through_params)
                
            if (template.name.strip() == "cite journal" or template.name.strip() == "Cite journal"):
#                print "vertical: ", template.count('\n'), horizontal
                if (template.count('\n') and horizontal):
                    print "v2h"
                    (wikicode, template) = v2h(wikicode, template)
                if ((template.count('coauthors') or vanc_authors) and template.count('pmid')):
                    (wikicode, template) = replace_coauthor(wikicode, template)

            if (template.name.strip() == "cite book"    or template.name.strip() == "Cite book"):
#                print "vertical: ", template.count('\n'), horizontal
                if (template.count('\n') and horizontal):
                    print "v2h"
                    (wikicode, template) = v2h(wikicode, template)

#        print wikicode
         
        if wikicode:
            comment='substitute transcluded cite templates with in-line equivalents and replace vertical with horizontally formatted citation templates per [[Wikipedia:Bots/Requests for approval/BogBot 5]]. Report errors and suggestions to [[User_talk:BogBot]]'
            status = savepage(page, wikicode, comment, False, log_string)
        else:
            print ", page not updated"

        wikipedia.stopme()
        
run()