(function(){
const scriptName = 'List Unreferenced Paragraphs';
$.when(mw.loader.using('mediawiki.util'), $.ready).then(function(){
const listPortletlink = mw.util.addPortletLink('p-tb', '#', scriptName, scriptName + 'Id');
listPortletlink.onclick = function(e) {
e.preventDefault();
listUnreferencedParagraphs();
};
const highlightPortletlinkName = 'Highlight Unreferenced Paragraphs';
const highlightPortletlink = mw.util.addPortletLink('p-tb', '#', highlightPortletlinkName, highlightPortletlinkName + 'Id');
highlightPortletlink.onclick = function(e) {
e.preventDefault();
highlightUnreferencedParagraphs();
};
});
function listUnreferencedParagraphs(){
const timeout = 50;
let stopProcessing = false;
const content = document.getElementById('content');
const contentContainer = content.parentElement;
content.style.display = 'none';
let scriptContainer = document.createElement('div');
contentContainer.appendChild(scriptContainer);
scriptContainer.outerHTML = `
<div id="scriptContainer" style="display:flex; flex-direction: column;">
<style>
textarea {
resize: none;
padding: 5px;
}
button {
margin: 5px;
}
</style>
<h1>Unreferenced Paragraph Counter</h1>
<div style="display:flex;">
<div style="flex: 1; display:flex; flex-direction: column; margin: 5px; height: 50vh; overflow-y: auto;">
<label for="taList">Article Titles</label>
<textarea id="taList" style="height: 100%;"></textarea>
</div>
<div style="flex: 2; display:flex; flex-direction: column; margin: 5px; height: 50vh; overflow-y: auto;">
<label for="tableCounter">Overview table</label>
<table id="tableCounter" class="wikitable" style="height: 100%; margin: 0px; width: 100%; border-collapse: collapse;">
<thead>
<tr>
<th>Article title</th>
<th title="paragraphs that require and lack references">Paragraphs without references</th>
<th>Maintenance tags</th>
</tr>
</thead>
<tbody id="tbodyCounter">
</tbody>
</table>
</div>
</div>
<div style="display:flex; flex-direction: column">
<div style="display:flex;">
<button id="btStart" style="flex: 1;">Start</button>
<button id="btStop" disabled style="flex: 1;">Stop</button>
<button id="btCopy" style="flex: 1;">Copy</button>
</div>
<div>
<button id="btClose" style="width: 100%;">Close</button>
</div>
</div>
</div>
`;
const btStart = $('#btStart');
btStart.click(function(){
stopProcessing = false;
btStart.prop("disabled", true);
btStop.prop("disabled", false);
let articleTitles = $('#taList').val().trim()
.split('\r').join('')
.split('\n');
// remove duplicates
articleTitles = [...new Set(articleTitles)];
// populate table
$("#tbodyCounter").empty();
for(let i = 0; i < articleTitles.length; i++){
let linkHTML = getLinkHTML(articleTitles[i]);
let row = `<tr><td>${linkHTML}</td><td id="td_unref_${i}" style="text-align: center;">-</td><td id="td_tags_${i}"></td></tr>`;
$("#tbodyCounter").append(row);
}
recursivelyProcessArticles(articleTitles, 0, timeout);
function getLinkHTML(articleTitle) {
var link = document.createElement('a');
link.href = 'https://en.wikipedia.org/wiki/' + encodeURIComponent(articleTitle);
link.textContent = articleTitle;
return link.outerHTML;
}
});
const btStop = $('#btStop');
btStop.click(function(){
stopProcessing = true;
btStart.prop("disabled", false);
btStop.prop("disabled", true);
});
const btCopy = $('#btCopy');
btCopy.click(function(){
const tableText = getTextViaSelection();
copyToClipboard(tableText);
mw.notify("The table was copied to the clipboard.");
function getTextViaSelection(){
const tbodyCounter = $('#tbodyCounter')[0];
const range = document.createRange();
range.selectNodeContents(tbodyCounter);
const selection = window.getSelection();
selection.removeAllRanges();
selection.addRange(range);
return selection.toString();
}
function copyToClipboard(text) {
const textarea = document.createElement('textarea');
textarea.value = text;
document.body.appendChild(textarea);
textarea.select();
document.execCommand('copy');
document.body.removeChild(textarea);
}
});
const btClose = $('#btClose');
btClose.click(function(){
btStop.trigger('click');
let scriptContainer = document.getElementById('scriptContainer');
scriptContainer.parentElement.removeChild(scriptContainer);
content.style.display = '';
});
function recursivelyProcessArticles(articleTitles, index, timeout){
if(!stopProcessing && index < articleTitles.length){
btStop.text(`Stop (${index}/${articleTitles.length})`);
const articleTitle = articleTitles[index];
processArticle(articleTitles, index);
setTimeout(function(){recursivelyProcessArticles(articleTitles, index + 1, timeout);}, timeout);
}
else{
btStop.text(`Stop`);
btStop.trigger('click');
}
}
function processArticle(articleTitles, index){
const articleTitle = articleTitles[index];
const articleSearchTerm = encodeURIComponent(articleTitle);
let wikiApiUrl = `https://en.wikipedia.org/w/api.php?action=parse&page=${articleSearchTerm}&format=json`;
fetch(wikiApiUrl).then(async function(response) { // jshint ignore:line
const data = await response.json();
const cellUnrefId = `td_unref_${index}`;
const cellTagsId = `td_tags_${index}`;
if (data && data.parse && data.parse.text && data.parse.text['*']) {
const articleHTML = data.parse.text['*'];
const parser = new DOMParser();
const doc = parser.parseFromString(articleHTML, 'text/html');
const paragraphContainer = $(doc).find('.mw-parser-output').eq(0);
const paragraphInfo = getParagraphInfo(paragraphContainer);
const unreferencedParagraphs = paragraphInfo.unreferencedParagraphs;
const includedParagraphs = paragraphInfo.includedParagraphs;
//const count = `${unreferencedParagraphs.length} / ${includedParagraphs.length}`;
const count = `${unreferencedParagraphs.length}`;
$('#' + cellUnrefId).html(count);
const maintenanceTagString = getMaintenanceTagString(paragraphContainer);
$('#' + cellTagsId).html(maintenanceTagString);
} else {
$('#' + cellUnrefId).html('error');
$('#' + cellTagId).html('error');
}
});
}
function getMaintenanceTagString(element){
const templateOverview = {};
const amboxes = getAmboxes(element);
for(const ambox of amboxes){
const amboxType = getAmboxTyp(ambox);
updateOverview(templateOverview, amboxType);
}
const inlineTemplates = getInlineTemplates(element);
for(const inlineTemplate of inlineTemplates){
const inlineTemplateType = getInlineTemplateType(inlineTemplate);
updateOverview(templateOverview, inlineTemplateType);
}
const overviewString = getOverviewString(templateOverview);
return overviewString;
function getInlineTemplates(element){
return element.find('.Inline-Template').toArray();
}
function getInlineTemplateType(inlineTemplate){
let innerText = inlineTemplate.innerText;
let type = innerText.substring(1, innerText.length - 1);
return type;
}
function getAmboxes(element){
return element.find('.ambox').toArray();
}
function getAmboxTyp(ambox){
for(const entry of ambox.classList){
if(entry.substring(0,4) === 'box-'){
return entry.substring(4).split('_').join(' ');
}
}
return entry.innerText;
}
function updateOverview(overview, entry){
if(Object.keys(overview).includes(entry)){
overview[entry]++;
}
else{
overview[entry] = 1;
}
}
function getOverviewString(overview){
let overviewString = '';
const keys = Object.keys(overview);
if(keys.length > 0){
for(const key of keys){
const count = overview[key];
overviewString += count + 'x ';
overviewString += key + ', ';
}
overviewString = overviewString.substring(0, overviewString.length - 2);
}
return overviewString;
}
}
}
function highlightUnreferencedParagraphs(){
const paragraphContainer = $('#mw-content-text').find('.mw-parser-output').eq(0);
const paragraphInfo = getParagraphInfo(paragraphContainer);
const includedParagraphs = paragraphInfo.includedParagraphs;
const unreferencedParagraphs = paragraphInfo.unreferencedParagraphs;
for(let p of includedParagraphs){
if(unreferencedParagraphs.includes(p)){
p.style.background = '#faa';
}
else{
p.style.background = '#afa';
}
}
console.log(unreferencedParagraphs);
mw.notify(`${unreferencedParagraphs.length} unreferenced paragraphs found`);
}
function getParagraphInfo(paragraphContainer){
const minimalParagraphLength = 100;
hideRefs(paragraphContainer[0]);
combineMathBlocks(paragraphContainer.children().toArray());
addElementsFollowingParagraphs(paragraphContainer.children().toArray());
addElementsPrecedingParagraphs(paragraphContainer.children().toArray());
showRefs(paragraphContainer[0]);
const children = paragraphContainer.children();
const releventChildren = [];
for(let child of children){
if(child.tagName.toLowerCase() === 'p'){
releventChildren.push(child);
}
else if(child.classList.contains('mw-heading2')){
releventChildren.push(child);
}
}
const articleObject = convertToObject(releventChildren);
removeIrrelevantSections(articleObject);
const paragraphsInRelevantSections = convertToSimpleArray(articleObject);
const includedParagraphs = removeShortParagraphs(paragraphsInRelevantSections);
const unreferencedParagraphs = getUnreferencedParagraphs(includedParagraphs);
return {
'includedParagraphs': includedParagraphs,
'unreferencedParagraphs': unreferencedParagraphs
};
function hideRefs(element){
let refs = element.querySelectorAll('.reference, .Inline-Template');
for(let ref of refs){
ref.style.display = 'none';
}
}
function showRefs(element){
let refs = element.querySelectorAll('.reference, .Inline-Template');
for(let ref of refs){
ref.style.display = '';
}
}
// includes the elements before and after a paragraph consisting only of a math formula into one element; this is based on the idea that the math formula artifically divides a single paragraph into parts
function combineMathBlocks(elements){
for(let i = 1; i < elements.length-1; i++){
let previousElement = elements[i-1];
let element = elements[i];
let nextElement = elements[i+1];
if(isMathBlock(elements[i])){
previousElement.appendChild(element);
previousElement.appendChild(nextElement);
}
}
function isMathBlock(element){
if(element.firstChild && element.firstChild.classList){
if(element.firstChild.classList.contains('mwe-math-element')){
if(element.innerText === element.firstChild.innerText){
return true;
}
}
}
return false;
}
}
// if the meaning of the passage does not end with the html paragraph then add the next element to it.
function addElementsFollowingParagraphs(elements){
for(let i = 0; i < elements.length-1; i++){
let element = elements[i];
let clone = element.cloneNode(true);
removeStyleElements(clone);
let innerText = clone.innerText.trim();
if(element.tagName === 'P' && innerText.length > 0){
let lastCharacter = innerText[innerText.length-1];
const nonEndingCharacters = [',', ':'];
if(nonEndingCharacters.includes(lastCharacter) || isLetter(lastCharacter)){
let nextElement = elements[i+1];
element.appendChild(nextElement);
if(nextElement.tagName === 'STYLE' || nextElement.tagName === 'LINK'){
if(i+2 < elements.length -1){
let nextNextElement = elements[i+2];
element.appendChild(nextNextElement);
}
}
}
}
}
function isLetter(character){
return character.toLowerCase() !== character.toUpperCase();
}
function removeStyleElements(element){
let styleElements = element.getElementsByTagName('style');
for(const styleElement of styleElements){
styleElement.remove();
}
}
}
// if a paragraph starts in the middle then add the previous element
function addElementsPrecedingParagraphs(elements){
for(let i = 1; i < elements.length; i++){
let element = elements[i];
let innerText = element.innerText.trim();
if(element.tagName === 'P' && innerText.length > 0){
let firstCharacter = innerText[0];
if(isLowerCaseLetter(firstCharacter)){
let previousElement = elements[i-1];
element.insertBefore(previousElement, element.firstChild);
}
}
}
function isLowerCaseLetter(character){
return character.toLowerCase() !== character.toUpperCase() && character === character.toLowerCase();
}
}
function convertToObject(elementArray){
const articleObject = {};
let currentSection = "Lead";
articleObject["Lead"] = []; // jshint ignore:line
for(let element of elementArray){
if(element.classList.contains('mw-heading2')){
currentSection = element.innerText.split('[edit]').join('');
articleObject[currentSection] = [];
}
else{
articleObject[currentSection].push(element);
}
}
return articleObject;
}
function removeIrrelevantSections(articleObject){
const excludedSections = ['Lead', 'Plot', 'Plots', 'Plot summary', 'Plot synopsis', 'Synopsis', 'Storylines', 'Appearances', 'Further reading', 'See also', 'External links', 'References', 'Bibliography', 'Notes', 'Selected publications', 'Selected works', 'Cited sources', 'Sources', 'Footnotes'];
for(let sectionName in articleObject){
if(excludedSections.indexOf(sectionName) != -1){
delete articleObject[sectionName];
}
}
}
function convertToSimpleArray(articleObject){
let array = [];
for (let sectionName in articleObject){
array = array.concat(articleObject[sectionName]);
}
return array;
}
function removeShortParagraphs(paragraphArray){
const longParagraphs = [];
for(let paragraph of paragraphArray){
if(paragraph.innerText.length >= minimalParagraphLength){
longParagraphs.push(paragraph);
}
}
return longParagraphs;
}
function getUnreferencedParagraphs(paragraphArray){
const unreferencedParagraph = [];
for(let paragraph of paragraphArray){
if(isUnreferenced(paragraph)){
unreferencedParagraph.push(paragraph);
}
}
return unreferencedParagraph;
}
function isUnreferenced(paragraph){
let hasRegularRef = $(paragraph).find('.reference').length > 0;
let hasHarvRef = false;
const links = $(paragraph).find('a').toArray();
for(const link of links){
let href = link.getAttribute('href');
if(href && href.substring(0, 8) == '#CITEREF'){
hasHarvRef = true;
}
}
return !(hasRegularRef || hasHarvRef);
}
}
})();