// <nowiki>
//Only exact duplicates
//Tries to come up with a name for the reference
// Function to deduplicate references in Wikipedia articles
function deduplicateReferences() {
// Get the edit textarea and summary input
const editTextarea = document.getElementById('wpTextbox1');
const summaryInput = document.getElementById('wpSummary');
if (!editTextarea || !summaryInput) return;
let content = editTextarea.value;
// Regular expression to match <ref> tags
const refRegex = /<ref[^>]*>[\s\S]*?<\/ref>/gi;
// Object to store all references
const allRefs = {};
// Set to store all used reference names
const usedNames = new Set();
// Blacklist of reference names to ignore
const blacklist = [
"doi_org",
"jstor_org",
"amazon_com",
"books_google_com",
"web_archive_org",
"worldcat_org",
"dx_doi_org"
// Add more blacklisted names here
];
// Function to extract domain name from URL
function extractDomain(url) {
try {
let domain = new URL(url).hostname;
domain = domain.replace(/^www\./, ''); // Remove 'www.' if present
return domain === 'archive.org' ? extractDomain(url.split('archive.org/web/')[1]) : domain;
} catch (e) {
return null;
}
}
// Function to generate a unique name for the reference
function generateUniqueName(ref) {
const urlMatch = ref.match(/https?:\/\/[^\s<>"]+/i);
if (urlMatch) {
const domain = extractDomain(urlMatch[0]);
if (domain) {
let baseName = domain.replace(/\./g, '_');
let uniqueName = baseName;
let counter = 1;
while (usedNames.has(uniqueName)) {
uniqueName = `${baseName}_${counter}`;
counter++;
}
usedNames.add(uniqueName);
return uniqueName;
}
}
return null;
}
// Function to extract existing name from a reference
function extractExistingName(ref) {
const nameMatch = ref.match(/name\s*=\s*(["']?)([^"'\s/>]+(?:\s+[^"'\s/>]+)*)\1/i);
return nameMatch ? nameMatch[2] : null;
}
// Function to create a reference tag
function createRefTag(name, content = null) {
if (content) {
return `<ref name="${name}">${content}</ref>`;
} else {
return `<ref name="${name}" />`;
}
}
// Function to check if a reference is blacklisted
function isBlacklisted(ref) {
const name = extractExistingName(ref);
return name && blacklist.includes(name);
}
// First pass: collect all references and used names
content.replace(refRegex, (match) => {
if (!isBlacklisted(match)) {
const existingName = extractExistingName(match);
if (existingName) {
usedNames.add(existingName);
}
if (allRefs[match]) {
allRefs[match].count++;
} else {
allRefs[match] = { count: 1, name: existingName, firstOccurrence: match };
}
}
return match;
});
// Second pass: replace duplicates with named references
let deduplicatedCount = 0;
content = content.replace(refRegex, (match) => {
if (isBlacklisted(match)) {
return match; // Return blacklisted references unchanged
}
if (allRefs[match] && allRefs[match].count > 1) {
if (!allRefs[match].name) {
// This is a duplicate without a name
const generatedName = generateUniqueName(match);
if (generatedName && !blacklist.includes(generatedName)) {
allRefs[match].name = generatedName;
allRefs[match].firstOccurrence = createRefTag(generatedName, match.match(/<ref[^>]*>([\s\S]*)<\/ref>/)[1]);
return allRefs[match].firstOccurrence;
}
} else {
// This is a named reference
if (match === allRefs[match].firstOccurrence) {
// This is the first occurrence, keep it as is
return match;
} else {
// This is a subsequent occurrence, replace with short form
deduplicatedCount++;
return createRefTag(allRefs[match].name);
}
}
}
return match; // Return unchanged for non-duplicates or blacklisted references
});
// Update the textarea with the deduplicated content
if (deduplicatedCount > 0) {
editTextarea.value = content;
// Add edit summary
let currentSummary = summaryInput.value;
let deduplicationSummary = `Deduplicated ${deduplicatedCount} reference${deduplicatedCount > 1 ? 's' : ''}`;
summaryInput.value = currentSummary ? `${currentSummary} • ${deduplicationSummary}` : deduplicationSummary;
document.editform.wpMinoredit.checked = true;
}
}
// Function to check if the edit textarea is ready
function isEditTextareaReady() {
const editTextarea = document.getElementById('wpTextbox1');
const summaryInput = document.getElementById('wpSummary');
return editTextarea && editTextarea.value && summaryInput;
}
// Function to run deduplication when everything is ready
function runDeduplicationWhenReady() {
if (isEditTextareaReady()) {
deduplicateReferences();
} else {
// If not ready, check again after a short delay
setTimeout(runDeduplicationWhenReady, 100);
}
}
// Run the deduplication when the edit page is fully loaded
if (mw.config.get('wgAction') === 'edit') {
if (document.readyState === 'complete') {
runDeduplicationWhenReady();
} else {
window.addEventListener('load', runDeduplicationWhenReady);
}
}
// </nowiki>