The VisualEditor, (very annoyingly!), doesn't name references added by users, and gives them names like :0
, :1
, etc. This script fixes that automatically. Might be buggy, only ever tested on osteogenesis imperfecta and furry fandom.
Requires mwparserfromhell. Input filename is first and only argument. Outputs completed wiki page to stdout, and some info on what changed to stderr.
#!/usr/bin/env/python3
import mwparserfromhell
from mwparserfromhell.wikicode import Tag, Wikicode, Wikilink
import re
import sys
_, input_filename = sys.argv
with open(input_filename) as f:
inp = f.read()
parsed = mwparserfromhell.parse(inp)
get_all_links = lambda parsed: parsed.ifilter(forcetype=Wikilink, recursive=True)
get_all_tags = lambda: parsed.ifilter(forcetype=Tag, matches="<\\s*ref\\s*", recursive=True)
tags = list(filter(None, [t if t.has("name") else None for t in get_all_tags()]))
tags_noname = list(filter(None, [t if not t.has("name") else None for t in get_all_tags()]))
tags_noname_idxs = list()
for tag in tags_noname:
for i, tag2 in enumerate(get_all_tags()):
if tag == tag2:
tags_noname_idxs.append(i)
assert len(tags_noname_idxs) == len(tags_noname)
refs = list(filter(lambda s: re.search("^:\d+$", str(s.get("name").value)) and not re.search("/>$", str(s)), tags))
def find_date(template):
date_candidates = {v: template.has(v) for v in ["date", "year", "airdate"]}
if any(date_candidates.values()):
date = [k for k, v in date_candidates.items() if v][0]
date = str(template.get(date).value)
else:
return None
m = re.search("\d{4}", date)
return (str(m.group(0)) if m else None)
def by_work(v, template):
parsed_v = mwparserfromhell.parse(v)
for v in get_all_links(parsed_v):
parsed_v.replace(v, str(v.title))
v = str(parsed_v)
date = find_date(template)
if date is None:
return None
work = re.sub("\s", "", v)
if len(work.strip()) == 0:
return None
return "{}{}".format(work, date)
def by_surname(v, template):
if "," in v:
last = v[:v.index(",")]
elif " " in v:
last = v[:v.index(" ")]
else:
last = v
if len(last.strip()) == 0:
return None
date = find_date(template)
if date is None:
return None
return "{}{}".format(last, date)
def build_refs(refs):
global tags_noname_idxs
pretty = dict()
for (i, ref) in enumerate(refs):
template = ref.contents.get(0)
if not getattr(template, "has", False):
continue
last_candidates = {v: template.has(v) for v in ["vauthors", "authors", "last"]}
work_candidates = {v: template.has(v) for v in ["work", "website", "publisher", "series-link", "series"]}
if any(last_candidates.values()):
last = [k for k, v in last_candidates.items() if v][0]
v = by_surname(str(template.get(last).value), template)
elif any(work_candidates.values()):
work = [k for k, v in work_candidates.items() if v][0]
v = by_work(str(template.get(work).value), template)
else:
continue
if v is None:
continue
elif len(v.strip()) <= 1:
continue
if ref.has("name"):
pretty[str(ref.get("name").value)] = v
else:
pretty[tags_noname_idxs[i]] = v
return pretty
pretty = build_refs(refs)
pretty_noname = build_refs(tags_noname)
for i, tag in enumerate(get_all_tags()):
if tag.has("name"):
k = str(tag.get("name").value)
if k in pretty:
tag.attributes[0].value = pretty[k]
else:
if i in pretty_noname:
tag.add("name", value = pretty_noname[i])
for template in parsed.ifilter_templates():
tn = template.name.strip()
if tn.lower() == "rp" or tn.lower() == "ill" or tn.lower() == "lang" or tn.lower().startswith("lang-") or tn.lower() == "respell" or tn.lower() == "abbr":
template.name = tn[0].lower()+tn[1:]
else:
template.name = tn[0].upper()+tn[1:]
print(tn, "⇒", template.name, file=sys.stderr)
print(parsed)
for k,v in pretty.items():
print(k, "⇒", v, file=sys.stderr)
for i,v in pretty_noname.items():
print("NONAME", i, "⇒", v, file=sys.stderr)
uniq = len(set(pretty.values()))
total = len(pretty.values())
if uniq == total:
print("All replacements unique", file=sys.stderr)
else:
print("Some replacements not unique: {}/{}!".format(total-uniq, total))