User:Polygnotus/DuplicateReferences.js

Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
//Testpage: https://en.wikipedia.org/wiki/User:Polygnotus/DuplicateReferencesTest

// <nowiki>
mw.loader.using(['mediawiki.util'], function () {
    $(document).ready(function () {

        const DEBUG = false;

        function debug(...args) {
            if (DEBUG) {
                console.log('[DuplicateReferences]', ...args);
            }
        }

		if (
		    mw.config.get('wgAction') !== 'view' ||
		    mw.config.get('wgDiffNewId') ||
		    mw.config.get('wgDiffOldId') ||
		    (mw.config.get('wgNamespaceNumber') !== 0 && mw.config.get('wgPageName') !== 'User:Polygnotus/DuplicateReferencesTest')
		) {
			debug("Not the correct page or action, script terminated");
		    return;
		}

        debug("Page title:", document.title);
        debug("URL:", window.location.href);

        function findNextReflistDiv(element) {
            let nextElement = element.nextElementSibling;
            while (nextElement) {
                if (nextElement.tagName.toLowerCase() === 'div' &&
                    (nextElement.classList.contains('reflist') || nextElement.classList.contains('mw-references-wrap'))) {
                    return nextElement;
                }
                nextElement = nextElement.nextElementSibling;
            }
            return null;
        }

        const referencesHeader = document.querySelector("h2#References");
        if (!referencesHeader) {
            debug("References heading not found, script terminated");
            return;
        }

        const containerDiv = referencesHeader.closest("div");
        if (!containerDiv) {
            debug("Container div not found, script terminated");
            return;
        }

        const reflistDiv = findNextReflistDiv(containerDiv);
        if (!reflistDiv) {
            debug("Reflist div not found, script terminated");
            return;
        }

        const referencesList = reflistDiv.querySelector('ol.references');
        if (!referencesList) {
            debug("ol.references not found within reflist div");
            return;
        }

        const style = document.createElement('style');
        style.textContent = `
            li:target { border: 1px dotted red; padding: 2px; background-color: #ffcccc !important;}
            .duplicate-citation-highlight { background-color: #e1eeff; }
            .duplicate-citation-hover { background-color: #cce0ff; border: 1px dotted blue; }
            .duplicate-citation-clicked { border: 1px dotted red; padding: 2px; background-color: #ffe6e6; }
            .mw-collapsible-toggle { font-weight: normal; float: right; }
            .duplicate-references-table { width: 100%; }
            @media only screen and (max-width: 768px) {
                .duplicate-references-table { display: none; }
            }
        `;
        document.head.appendChild(style);

        function addDuplicateCitationsTemplate(linkElement) {
            debug("Adding duplicate citations template");
            showLoading(linkElement);
            var api = new mw.Api();
            var pageTitle = mw.config.get('wgPageName');

            let duplicateInfo = getDuplicateInfo();

            // Get current date
            const currentDate = new Date();
            const monthNames = ["January", "February", "March", "April", "May", "June",
                "July", "August", "September", "October", "November", "December"
            ];
            const currentMonth = monthNames[currentDate.getMonth()];
            const currentYear = currentDate.getFullYear();
            const dateParam = `|date=${currentMonth} ${currentYear}`;

            api.get({
                action: 'query',
                prop: 'revisions',
                titles: pageTitle,
                rvprop: 'content',
                rvslots: 'main',
                formatversion: 2
            }).then(function (data) {
                var page = data.query.pages[0];
                var content = page.revisions[0].slots.main.content;

                // Define the templates to check for
                const templatesToCheck = [
                    '{{short description',
                    '{{DISPLAYTITLE',
                    '{{Lowercase title',
                    '{{Italic title',
                    '{{about',
                    '{{redirect',
                    '{{Distinguish',
                    '{{for'
                ];

                // Find the position to insert the new template
                let insertPosition = 0;
                let lines = content.split('\n');
                for (let i = 0; i < lines.length; i++) {
                    let line = lines[i].trim().toLowerCase();
                    if (templatesToCheck.some(template => line.startsWith(template.toLowerCase()))) {
                        insertPosition = i + 1;
                    } else if (line && !line.startsWith('{{') && !line.startsWith('__')) {
                        break;
                    }
                }

                // Create the reason string
                let reason = '[[User:Polygnotus/DuplicateReferences|DuplicateReferences]] detected:<br>\n';
                if (duplicateInfo.length > 0) {
                    duplicateInfo.forEach((info) => {
                        reason += `* ${info.url} (refs: ${info.refs.map(r => r.number).join(', ')})<br>\n`;
                    });
                }

                // Insert the new template with the reason parameter
                lines.splice(insertPosition, 0, `{{Duplicated citations|reason=${reason}${dateParam}}}`);
                var newContent = lines.join('\n');

                let summary = `[[User:Polygnotus/DuplicateReferences|DuplicateReferences]] +{{Duplicated citations|reason=${reason}${dateParam}}}`;

                return api.postWithToken('csrf', {
                    action: 'edit',
                    title: pageTitle,
                    text: newContent,
                    summary: summary
                });
            }).then(function () {
                showSuccess(linkElement);
                setTimeout(function () {
                    location.reload();
                }, 100); // Reload after 0.1 second
            }).catch(function (error) {
                console.error('Error:', error);
                showError(linkElement);
                mw.notify('Failed to add the template. See console for details.', {type: 'error'});
            });
        }

        function showLoading(element) {
            element.innerHTML = '<sup><small>[ Working... ]</small></sup>';
        }

        function showSuccess(element) {
            element.innerHTML = '<sup><small>[ Done ]</small></sup>';
        }

        function showError(element) {
            element.innerHTML = '<sup><small>[ Error ]</small></sup>';
        }

        function getVisibleText(element) {
            // Recursively get the visible text content of an element
            let text = '';
            for (let node of element.childNodes) {
                if (node.nodeType === Node.TEXT_NODE) {
                    text += node.textContent.trim() + ' ';
                } else if (node.nodeType === Node.ELEMENT_NODE) {
                    // Skip hidden elements
                    const style = window.getComputedStyle(node);
                    if (style.display !== 'none' && style.visibility !== 'hidden') {
                        text += getVisibleText(node) + ' ';
                    }
                }
            }
            return text.trim();
        }

        function calculateLevenshteinDistance(a, b) {
            debug("Comparing:");
            debug("Text 1:", a);
            debug("Text 2:", b);

            if (a.length === 0) return b.length;
            if (b.length === 0) return a.length;

            const matrix = [];

            // Increment along the first column of each row
            for (let i = 0; i <= b.length; i++) {
                matrix[i] = [i];
            }

            // Increment each column in the first row
            for (let j = 0; j <= a.length; j++) {
                matrix[0][j] = j;
            }

            // Fill in the rest of the matrix
            for (let i = 1; i <= b.length; i++) {
                for (let j = 1; j <= a.length; j++) {
                    if (b.charAt(i - 1) === a.charAt(j - 1)) {
                        matrix[i][j] = matrix[i - 1][j - 1];
                    } else {
                        matrix[i][j] = Math.min(
                            matrix[i - 1][j - 1] + 1, // substitution
                            Math.min(
                                matrix[i][j - 1] + 1, // insertion
                                matrix[i - 1][j] + 1  // deletion
                            )
                        );
                    }
                }
            }

            debug("Levenshtein distance:", matrix[b.length][a.length]);
            return matrix[b.length][a.length];
        }

        function calculateSimilarityPercentage(distance, maxLength) {
            const similarity = ((maxLength - distance) / maxLength) * 100;
            debug("Similarity percentage:", similarity.toFixed(2) + "%");
            return Math.round(similarity) + '%';
        }

        function getDuplicateInfo() {
            debug("Getting duplicate info");

            const duplicates = [];
            const urlMap = new Map();
            const referenceItems = Array.from(referencesList.children);

            debug("Number of reference items:", referenceItems.length);

            referenceItems.forEach((item, index) => {
                if (item.tagName.toLowerCase() === 'li') {
                    const refId = item.id;
                    const refNumber = index + 1;
                    debug(`Processing reference item ${refNumber} (${refId})`);

                    // Get the visible text of the entire reference item
                    const refText = getVisibleText(item);
                    debug(`  Reference text: ${refText}`);

                    // Find the first valid link in the reference
                    const links = item.querySelectorAll('a');
                    let validLink = null;
                    for (let link of links) {
                        const url = link.href;

                        // Skip this reference if the URL doesn't contain 'http'
                        if (!url.includes('http')) {
                            debug(`  Skipping reference ${refNumber} - URL does not contain 'http'`);
                            return; // This 'return' is equivalent to 'continue' in a regular for loop
                        }
                        const linkText = link.textContent.trim();

                        if (
                            // (!url.includes("wikipedia.org/wiki/") || url.includes("Special:BookSources")) &&
                            linkText !== "Archived" &&
                            !url.includes("wikipedia.org") &&
                            !url.includes("_(identifier)") &&                       // Templates like ISBN and ISSN and OCLC and S2CID contain (identifier)
                            !url.startsWith("https://search.worldcat.org/") &&  // |issn= parameter in cite news
                            !url.startsWith("https://www.bbc.co.uk/news/live/") &&  // live articles get frequent updates
                            !url.startsWith("https://www.aljazeera.com/news/liveblog/") &&  
                            !url.startsWith("https://www.nbcnews.com/news/world/live-blog/") &&
                            !url.startsWith("https://www.theguardian.com/world/live/") &&
                            !url.startsWith("https://www.nytimes.com/live/") &&
                            !url.startsWith("https://edition.cnn.com/world/live-news/") &&
                            !url.startsWith("https://www.timesofisrael.com/liveblog") &&
                            !url.startsWith("https://www.france24.com/en/live-news/") &&
                            !url.startsWith("https://books.google.com/") &&         //may be 2 different pages of the same book
                            !url.startsWith("https://archive.org/details/isbn_")
                        ) {
                            validLink = link;
                            debug(`  Valid link found: ${url}`);
                            break;
                        }
                    }

                    if (validLink) {
                        const url = validLink.href;
                        if (urlMap.has(url)) {
                            urlMap.get(url).push({id: refId, number: refNumber, text: refText});
                            debug(`  Duplicate found for URL: ${url}`);
                        } else {
                            urlMap.set(url, [{id: refId, number: refNumber, text: refText}]);
                            debug(`  New URL added to map: ${url}`);
                        }
                    } else {
                        debug(`  No valid link found in this item`);
                    }
                }
            });

            urlMap.forEach((refs, url) => {
                if (refs.length > 1) {
                    // Calculate Levenshtein distance for each pair of refs
                    for (let i = 0; i < refs.length - 1; i++) {
                        for (let j = i + 1; j < refs.length; j++) {
                            debug(`Comparing references ${refs[i].number} and ${refs[j].number}:`);
                            const distance = calculateLevenshteinDistance(refs[i].text, refs[j].text);
                            const maxLength = Math.max(refs[i].text.length, refs[j].text.length);
                            const similarity = calculateSimilarityPercentage(distance, maxLength);
                            refs[i].similarity = refs[i].similarity || {};
                            refs[i].similarity[refs[j].id] = similarity;
                        }
                    }
                    duplicates.push({url, refs});
                }
            });

            debug("Number of duplicate sets found:", duplicates.length);
            debug("Duplicate sets:", duplicates);
            return duplicates;
        }

        function createCollapsibleTable(duplicateInfo) {
            const table = document.createElement('table');
            table.className = 'wikitable mw-collapsible duplicate-references-table';
            table.setAttribute('role', 'presentation');

            const tbody = document.createElement('tbody');
            table.appendChild(tbody);

            const headerRow = document.createElement('tr');
            const headerCell = document.createElement('td');
            headerCell.innerHTML = '<strong>Duplicate References</strong>';

            const toggleSpan = document.createElement('span');
            toggleSpan.className = 'mw-collapsible-toggle';
            toggleSpan.innerHTML = '[<a href="#" class="mw-collapsible-text">hide</a>]';
            headerCell.appendChild(toggleSpan);


            // Check if the {{Duplicated citations}} template is already present
            const duplicatedCitationsTemplate = document.querySelector('table.box-Duplicated_citations');

            // Only add the link if the template is not present
            if (!duplicatedCitationsTemplate) {

                // Add the "add {{duplicated citations}}" link to the header
                const addTemplateLink = document.createElement('a');
                addTemplateLink.textContent = ' add {{duplicated citations}} ';
                addTemplateLink.href = '#';
                addTemplateLink.addEventListener('click', function (e) {
                    e.preventDefault();
                    addDuplicateCitationsTemplate(this);
                });
                //headerCell.appendChild(document.createTextNode(' ['));
                headerCell.appendChild(addTemplateLink);
                //headerCell.appendChild(document.createTextNode(']'));
            }
            headerRow.appendChild(headerCell);
            tbody.appendChild(headerRow);

            const pageTitle = mw.config.get('wgPageName').replace(/_/g, ' ');

            duplicateInfo.forEach(({url, refs}) => {
                const row = document.createElement('tr');
                const cell = document.createElement('td');

                // Create report icon
                const reportIcon = document.createElement('a');
                reportIcon.href = `https://en.wikipedia.org/wiki/User_talk:Polygnotus?action=edit&section=new&preloadtitle=Reporting%20%5B%5BUser%3APolygnotus%2FDuplicateReferences%7CDuplicateReferences%5D%5D%20false-positive&preload=User:Polygnotus/$1&preloadparams%5b%5d=${encodeURIComponent(`[[${pageTitle}]] ${url}`)}%20~~~~`;
                reportIcon.innerHTML = '<img src="https://upload.wikimedia.org/wikipedia/commons/thumb/e/ef/Cross_CSS_Red.svg/15px-Cross_CSS_Red.svg.png" width="15" height="15" alt="Report false positive" title="Report false positive" />';
                reportIcon.style.marginRight = '5px';
                cell.appendChild(reportIcon);

                let urlLink = document.createElement('a');
                urlLink.href = url;
                urlLink.textContent = url;
                urlLink.target = "_blank";
                urlLink.rel = "noopener noreferrer";

                cell.appendChild(urlLink);
                cell.appendChild(document.createTextNode(' in refs: '));

                const originalRef = refs[0];
                refs.forEach((ref, index) => {
                    let link = document.createElement('a');
                    link.href = `#${ref.id}`;
                    link.textContent = ref.number;
                    cell.appendChild(link);

                    // Add similarity information
                    if (index > 0) {
                        const similarity = calculateSimilarityPercentage(
                            calculateLevenshteinDistance(originalRef.text, ref.text),
                            Math.max(originalRef.text.length, ref.text.length)
                        );
                        let similarityInfo = document.createElement('span');
                        similarityInfo.textContent = ` (${similarity})`;
                        cell.appendChild(similarityInfo);
                    }
                    link.addEventListener('mouseover', () => {
                        refs.forEach(r => {
                            const citationElement = document.getElementById(r.id);
                            if (citationElement) {
                                if (r.id === ref.id) {
                                    citationElement.classList.add('duplicate-citation-hover');
                                } else {
                                    citationElement.classList.add('duplicate-citation-highlight');
                                }
                            }
                        });
                    });
                    link.addEventListener('mouseout', () => {
                        refs.forEach(r => {
                            const citationElement = document.getElementById(r.id);
                            if (citationElement) {
                                citationElement.classList.remove('duplicate-citation-hover');
                                citationElement.classList.remove('duplicate-citation-highlight');
                            }
                        });
                    });

                    link.addEventListener('click', () => {
                        document.querySelectorAll('.duplicate-citation-clicked').forEach(el => {
                            el.classList.remove('duplicate-citation-clicked');
                        });
                        refs.forEach(r => {
                            const citationElement = document.getElementById(r.id);
                            if (citationElement) {
                                citationElement.classList.add('duplicate-citation-clicked');
                            }
                        });
                    });

                    if (index < refs.length - 1) {
                        cell.appendChild(document.createTextNode(', '));
                    }
                });

                row.appendChild(cell);
                tbody.appendChild(row);
            });

            return table;
        }

        function checkDuplicateReferenceLinks() {
            debug("Checking for duplicate reference links");
            const duplicateInfo = getDuplicateInfo();

            if (duplicateInfo.length > 0) {
                debug("Duplicates found, creating collapsible table");

                const table = createCollapsibleTable(duplicateInfo);
                containerDiv.after(table);

                // Set up collapsible functionality
                const toggleLink = table.querySelector('.mw-collapsible-toggle a');
                const tableBody = $(table).find('tr:not(:first-child)');
                const storageKey = 'duplicateReferencesTableState';

                function setTableState(isCollapsed) {
                    if (isCollapsed) {
                        tableBody.hide();
                        toggleLink.textContent = 'show';
                    } else {
                        tableBody.show();
                        toggleLink.textContent = 'hide';
                    }
                    localStorage.setItem(storageKey, isCollapsed);
                }

                // Initialize state from localStorage
                const initialState = localStorage.getItem(storageKey) === 'true';
                setTableState(initialState);

                toggleLink.addEventListener('click', function (e) {
                    e.preventDefault();
                    const isCurrentlyCollapsed = tableBody.is(':hidden');
                    setTableState(!isCurrentlyCollapsed);
                });
            } else {
                debug("No duplicates found");
            }
        }

        checkDuplicateReferenceLinks();
        debug("Script execution completed");
    });
});
// </nowiki>