Wikipedia:WikiProject Red Link Recovery/Link matching script/Repeated letters
(This repeats the same SQL statements twice deliberately - this is to ensure that repetitions of 3 or 4 letters in a row are handled properly).
This crushing method often produces unacceptably high numbers of false positives. Additional work must be done to filter the results. Positive hits are more likely in titles with more then 2 words.
delete from suggestions where length( suggestion ) - length( replace( suggestion, '_', '' ) ) < 2;
// NB - do not crush xx's and ii's as these appear in roman numerals // - also ee's and oo's - these are just too common in English update crushed_art set title = replace( title, 'aa', 'a' ); update crushed_art set title = replace( title, 'bb', 'b' ); update crushed_art set title = replace( title, 'cc', 'c' ); update crushed_art set title = replace( title, 'dd', 'd' ); update crushed_art set title = replace( title, 'ff', 'f' ); update crushed_art set title = replace( title, 'gg', 'g' ); update crushed_art set title = replace( title, 'hh', 'h' ); update crushed_art set title = replace( title, 'jj', 'j' ); update crushed_art set title = replace( title, 'kk', 'k' ); update crushed_art set title = replace( title, 'll', 'l' ); update crushed_art set title = replace( title, 'mm', 'm' ); update crushed_art set title = replace( title, 'nn', 'n' ); update crushed_art set title = replace( title, 'pp', 'p' ); update crushed_art set title = replace( title, 'qq', 'q' ); update crushed_art set title = replace( title, 'rr', 'r' ); update crushed_art set title = replace( title, 'ss', 's' ); update crushed_art set title = replace( title, 'tt', 't' ); update crushed_art set title = replace( title, 'uu', 'u' ); update crushed_art set title = replace( title, 'vv', 'v' ); update crushed_art set title = replace( title, 'ww', 'w' ); update crushed_art set title = replace( title, 'yy', 'y' ); update crushed_art set title = replace( title, 'zz', 'z' ); update crushed_links set link = replace( link, 'aa', 'a' ); update crushed_links set link = replace( link, 'bb', 'b' ); update crushed_links set link = replace( link, 'cc', 'c' ); update crushed_links set link = replace( link, 'dd', 'd' ); update crushed_links set link = replace( link, 'ff', 'f' ); update crushed_links set link = replace( link, 'gg', 'g' ); update crushed_links set link = replace( link, 'hh', 'h' ); update crushed_links set link = replace( link, 'jj', 'j' ); update crushed_links set link = replace( link, 'kk', 'k' ); update crushed_links set link = replace( link, 'll', 'l' ); update crushed_links set link = replace( link, 'mm', 'm' ); update crushed_links set link = replace( link, 'nn', 'n' ); update crushed_links set link = replace( link, 'pp', 'p' ); update crushed_links set link = replace( link, 'qq', 'q' ); update crushed_links set link = replace( link, 'rr', 'r' ); update crushed_links set link = replace( link, 'ss', 's' ); update crushed_links set link = replace( link, 'tt', 't' ); update crushed_links set link = replace( link, 'uu', 'u' ); update crushed_links set link = replace( link, 'vv', 'v' ); update crushed_links set link = replace( link, 'ww', 'w' ); update crushed_links set link = replace( link, 'yy', 'y' ); update crushed_links set link = replace( link, 'zz', 'z' ); update crushed_art set title = replace( title, 'aa', 'a' ); update crushed_art set title = replace( title, 'bb', 'b' ); update crushed_art set title = replace( title, 'cc', 'c' ); update crushed_art set title = replace( title, 'dd', 'd' ); update crushed_art set title = replace( title, 'ff', 'f' ); update crushed_art set title = replace( title, 'gg', 'g' ); update crushed_art set title = replace( title, 'hh', 'h' ); update crushed_art set title = replace( title, 'jj', 'j' ); update crushed_art set title = replace( title, 'kk', 'k' ); update crushed_art set title = replace( title, 'll', 'l' ); update crushed_art set title = replace( title, 'mm', 'm' ); update crushed_art set title = replace( title, 'nn', 'n' ); update crushed_art set title = replace( title, 'pp', 'p' ); update crushed_art set title = replace( title, 'qq', 'q' ); update crushed_art set title = replace( title, 'rr', 'r' ); update crushed_art set title = replace( title, 'ss', 's' ); update crushed_art set title = replace( title, 'tt', 't' ); update crushed_art set title = replace( title, 'uu', 'u' ); update crushed_art set title = replace( title, 'vv', 'v' ); update crushed_art set title = replace( title, 'ww', 'w' ); update crushed_art set title = replace( title, 'yy', 'y' ); update crushed_art set title = replace( title, 'zz', 'z' ); update crushed_links set link = replace( link, 'aa', 'a' ); update crushed_links set link = replace( link, 'bb', 'b' ); update crushed_links set link = replace( link, 'cc', 'c' ); update crushed_links set link = replace( link, 'dd', 'd' ); update crushed_links set link = replace( link, 'ff', 'f' ); update crushed_links set link = replace( link, 'gg', 'g' ); update crushed_links set link = replace( link, 'hh', 'h' ); update crushed_links set link = replace( link, 'jj', 'j' ); update crushed_links set link = replace( link, 'kk', 'k' ); update crushed_links set link = replace( link, 'll', 'l' ); update crushed_links set link = replace( link, 'mm', 'm' ); update crushed_links set link = replace( link, 'nn', 'n' ); update crushed_links set link = replace( link, 'pp', 'p' ); update crushed_links set link = replace( link, 'qq', 'q' ); update crushed_links set link = replace( link, 'rr', 'r' ); update crushed_links set link = replace( link, 'ss', 's' ); update crushed_links set link = replace( link, 'tt', 't' ); update crushed_links set link = replace( link, 'uu', 'u' ); update crushed_links set link = replace( link, 'vv', 'v' ); update crushed_links set link = replace( link, 'ww', 'w' ); update crushed_links set link = replace( link, 'yy', 'y' ); update crushed_links set link = replace( link, 'zz', 'z' );