User talk:Trappist the monk/CS1 maint: Unrecognized language
Latest comment: 9 years ago by Trappist the monk
Made the edit summary conditional
editI used the module a bit and saw that it appends "Fix |language= spelling in CS1 citations" to the edit summary regardless of changes made, so I made the edit-summary-appending behavior conditional iif changes were made (code below), and verified both cases in my sandbox just now. I didn't want to edit your code in case you didn't agree with this behavior and don't want people taking it as yours before you can revert. 1 line was changed, and 3 lines were added. All 4 lines have comments starting with "// TR". Feel free to incorporate :) ~ Tom.Reding (talk ⋅contribs ⋅dgaf) 20:56, 6 January 2015 (UTC)
- That's cool, thanks. I never let AWB make its automatic fixes – I don't particularly care to be responsible for someone else's code, and, with lots of minor changes made, I find it harder to locate the changes that I'm interested in. I'll incorporate your changes because I suspect that my point of view is the minority.
public string ProcessArticle(string ArticleText, string ArticleTitle, int wikiNamespace, out string Summary, out bool Skip)
{
Skip = false;
Summary = ""; // TR necessary to have here; moved text below "Regex.Match" to make appending conditional
string changes_made = "no"; // TR to make appending to Summary conditional
string pattern; // local variable to hold regex pattern for reuse
string IS_CS1 = @"(?:[Cc]ite[_ ](?=(?:(?:AV|av) [Mm]edia(?: notes)?)|article|blog|book|conference|document|(?:DVD|dvd)(?: notes)?|encyclopa?edia|interview|journal|letter|[Mm]agazine|(?:news(?!group|paper))|paper|podcast|press release|sign|speech|techreport|thesis|video|web)|[Cc]itation|[Cc]ite(?=\s*\|))";
//---------------------------< M I S S P E L L I N G D I C T I O N A R Y >----------------------------------
// This is a crude dictionary of misspellings. For each item, the first is the misspelling, the second is the correct spelling
// This dictionary can also be used to remove 'qualifiers' (eg Portuguese (Brazil) to Portuguese because Portuguese (Brazil) is
// not an ISO 639-1 language). Dictionary misspellings should be lower case only because the code sets all language values to
// lower case before it searches the dictionary.
Dictionary<string, string> spelling_map = new Dictionary<string, string>();
spelling_map.Add("american english", "");
spelling_map.Add("austrian german", "German");
spelling_map.Add("azerbaycani", "Azerbaijani");
spelling_map.Add("azerbaijan", "Azerbaijani");
spelling_map.Add("aelorussian", "Belarusian");
spelling_map.Add("castellà", "Spanish");
spelling_map.Add("castilan", "Spanish");
spelling_map.Add("català", "Catalan");
spelling_map.Add("chinese (simplified han)", "Chinese");
spelling_map.Add("deutsch", "German");
spelling_map.Add("dhivehi", "Divehi");
spelling_map.Add("englisch", "");
spelling_map.Add("español", "Spanish");
spelling_map.Add("espanhol", "Spanish");
spelling_map.Add("francais", "French");
spelling_map.Add("français", "French");
spelling_map.Add("french (abstract)", "French");
spelling_map.Add("germaan", "German");
spelling_map.Add("german (swiss)", "German");
spelling_map.Add("honduran spanish", "Spanish");
spelling_map.Add("in german", "German");
spelling_map.Add("indonesia", "Indonesian");
spelling_map.Add("korea", "korean");
spelling_map.Add("norwegain", "Norwegian");
spelling_map.Add("norwegian bokmal", "Norwegian Bokmål");
spelling_map.Add("polish2", "Polish");
spelling_map.Add("porutguese", "Portuguese");
spelling_map.Add("portuguese (brazil)", "Portuguese");
spelling_map.Add("russian (translated)", "Russian");
spelling_map.Add("slovakian", "Slovak");
spelling_map.Add("slovene, with a summary in english", "Slovenian");
spelling_map.Add("spanihs", "Spanish");
spelling_map.Add("spanis", "Spanish");
spelling_map.Add("spanish (appendix only)", "Spanish");
spelling_map.Add("spanish; castilian", "Spanish");
spelling_map.Add("suomi", "Finnish");
spelling_map.Add("sweden", "Swedish");
spelling_map.Add("vietnamise", "Vietnamese");
//---------------------------< M I S S P E L L I N G S >------------------------------------------------------
// MISSPELLINGS: Fix misspellings in |language=<value> where <value> is misspelled.
pattern = @"({{\s*" + IS_CS1 + @"[^}]+\|\s*language\s*=\s*)([^\|\}]*)";
if (Regex.Match (ArticleText, pattern).Success)
{
ArticleText = Regex.Replace(ArticleText, pattern,
delegate(Match match)
{
string new_spelling;
string return_string = match.Groups[0].Value; // no misspelling, return the raw string
try // get correct spelling from dictionary
{
new_spelling = spelling_map[match.Groups[2].Value.ToLower()]; // will throw an exception if language <value> (key) is not found in dictionary (presumed correct)
changes_made = "yes"; // TR Summary can't be changed here; need a dummy variable
}
catch (KeyNotFoundException) // trap the exception
{
return return_string; // return the raw string
}
return match.Groups[1].Value + new_spelling;
});
}
if (changes_made == "yes") Summary = "Fix |language= spelling in CS1 citations;"; // TR
return ArticleText;
}