User:Monkbot/task 13: remove replace deprecated subscription registration parameters
Task 13 is a single use task that removes or replaces deprecated |subscription=
and |registrarion=
parameters in existing cs1|2 templates.
description
editcs1|2 has deprecated |subscription=
and |registration=
at this RFC (aspect B3). This task:
- applies only to canonically named templates; redirects and template wrappers are not acknowledged
- ignores cs1|2 templates that have
|subscription=
and|registration=
parameters that are not assigned one of the three allowed values (yes
,y
,true
) - does nothing when the citation template holds:
- any of these url parameters (with assigned values):
|url=
,|article-url=
,|chapter-url=
,|entry-url=
,|section-url=
(the url list)
- AND holds any of these identifier parameters (with assigned values):
|doi=
,|DOI=
,|jstor=
,|JSTOR=
,|bibcode=
,|hdl=
,|HDL=
,|ol=
,|OL=
,|osti=
,|OSTI=
(the identifier list)
- this because the task cannot know which of the url parameter or the identifier parameter the original editor intended to be 'marked' by the deprecated parameters (could be one, the other, or both)
- any of these url parameters (with assigned values):
- does nothing when the citation template holds more than one of the url list parameters; again, could be one, the other, or both
- removes
|subscription=
and|registration=
parameters when the citation template does not have any of the url list parameters; cs1|2 identifier parameters are presumed to lie behind a paywall or registration barrier; cs1|2 does not highlight the norm so|subscription=
and|registration=
are superfluous in these citation templates - replaces
|subscription=
and|registration=
with the appropriate|<xxx->url-access=
parameter when the citation template holds only one of the url list parameters
Task 13 skips pages that include {{bots|deny=Monkbot13}}
.
ancillary tasks
editEmpty |subscription=
and |registration=
parameters are deleted. This task does not do awb general fixes.
script
edit// this script removes / replaces deprecated |subscription= and |registration= parameters from cs1|2 templates
//
// to make a list for awb use category: CS1 errors: deprecated parameters
string IS_CS1 = @"(?:[Cc]ite\s*(?=(?:AV media(?: notes)?)|[Aa][Vv] media|[Aa][Vv] media notes|article|ar[Xx]iv|biorxiv|book|conference|document|encyclopa?edia|episode|interview|journal|magazine|mailing ?list|manual|(?:news(?!group|paper))|paper|podcast|press release|report|serial|sign|speech|techreport|thesis|video|web)|[Cc]itation|[Cc]ite(?=\s*\|))";
bool gSkip_subscription = true; // presume that we will skip this page
bool gSkip_registration = true;
string[] url_params = { "url", "article-url", "chapter-url", "entry-url", "section-url"};
//---------------------------< P R O C E S S A R T I C L E >--------------------------------------------------
public string ProcessArticle(string ArticleText, string ArticleTitle, int wikiNamespace, out string Summary, out bool Skip)
{
Skip = false;
// gSkip_subscription = false; // debug; for now we will not skip anything
// gSkip_registration = false;
string pattern;
//---------------------------< E M P T I E S >----------------------------------------------------------------
// delete empty |subscription= and |registration= parameters
ArticleText = Regex.Replace(ArticleText, @"\| *subscription *=\s*([\|\}])", "$1");
ArticleText = Regex.Replace(ArticleText, @"\| *registration *=\s*([\|\}])", "$1");
//---------------------------< H I D E >----------------------------------------------------------------------
// HIDE TEMPLATES: find templates that are not CS1; replace the opening {{ with __0P3N__ and the closing }} with __CL0S3__
while (Regex.Match (ArticleText, @"\{\{(?!\s*" + IS_CS1 + @")([^\{\}]*)\}\}").Success)
{
ArticleText = Regex.Replace(ArticleText, @"\{\{(?!\s*" + IS_CS1 + @")([^\{\}]*)\}\}", "__0P3N__$1__CL0S3__");
}
// wikilinks with parenthetical disambiguation
pattern = @"\[\[([^\|\]]+) +\(([^\)\|]+)\)\|([^\]]+)\]\]";
ArticleText = Regex.Replace(ArticleText, pattern, "__WL_0P3N__$1__D4B_O__$2__D4B_C____P1P3__$3__WL_CL0S3__");
// link label wikilinks
pattern = @"\[\[([^\|\]]+)\|([^\]]+)\]\]";
ArticleText = Regex.Replace(ArticleText, pattern, "__WL_0P3N__$1__P1P3__$2__WL_CL0S3__");
//---------------------------< S U B S C R I P T I O N >------------------------------------------------------
ArticleText = sup_reg_common (ArticleText, "subscription");
//---------------------------< R E G I S T R A T I O N >------------------------------------------------------
ArticleText = sup_reg_common (ArticleText, "registration");
//---------------------------< U N H I D E >------------------------------------------------------------------
// UNHIDE: replace __WL_0P3N__ with [[, __P1P3__ with |, __WL_CL0S3__ with ]]
ArticleText = Regex.Replace(ArticleText, @"__WL_0P3N__", "[[");
ArticleText = Regex.Replace(ArticleText, @"__D4B_O__", " ("); // make sure that there is a space before the '('
ArticleText = Regex.Replace(ArticleText, @"__D4B_C__", ")");
ArticleText = Regex.Replace(ArticleText, @"__P1P3__", "|");
ArticleText = Regex.Replace(ArticleText, @"__WL_CL0S3__", "]]");
// UNHIDE: replace __0P3N__ with {{
ArticleText = Regex.Replace(ArticleText, @"__0P3N__", "{{");
// UNHIDE: replace __CL0S3__ with }}
ArticleText = Regex.Replace(ArticleText, @"__CL0S3__", "}}");
Skip = gSkip_subscription && gSkip_registration;
// Summary = "[[User:Monkbot/task_13: remove replace deprecated subscription registration parameters|Task 13]]: (developmental testing): ";
Summary = "[[User:Monkbot/task_13: remove replace deprecated subscription registration parameters|Task 13]]: ([[Wikipedia:Bots/Requests_for_approval/Monkbot_13|BRFA testing]]): ";
// Summary = "[[User:Monkbot/task_13: remove replace deprecated subscription registration parameters|Task 13]]: ";
if (!gSkip_subscription && !gSkip_registration)
Summary = Summary + "Fix deprecated |subscription= and |registration= in cs1|2 templates;";
else if (!gSkip_subscription)
Summary = Summary + "Fix deprecated |subscription= in cs1|2 templates;";
else
Summary = Summary + "Fix deprecated |registration= in cs1|2 templates;";
gSkip_subscription = true; // reset
gSkip_registration = true;
return ArticleText;
}
//---------------------------< S U P _ R E G _ C O M M O N >--------------------------------------------------
string sup_reg_common (string ArticleText, string sr_param)
{
string pattern = @"(\{\{\s*" + IS_CS1 + @"[^\}]*)\|\s*" + sr_param + @"\s*=\s*(?:yes|true|y)([^\}]*)";
ArticleText = Regex.Replace(ArticleText, pattern,
delegate(Match match)
{
string raw_capture = match.Groups[0].Value; // the whole captured citation
string raw_prefix = match.Groups[1].Value; // citation template up to the start of |subscription=
string raw_postfix = match.Groups[2].Value; // citation after |subscription=
int url_count = 0; // number of url-holding parameters to which |subscription= might apply
string url_param = @""; // will be assigned the last-found url-holding parameter name
foreach (string param in url_params)
{
pattern = @"\|\s*" + param + @"\s*=\s*[^\|\}]"; // just looking for url-holding parameter with something in it
if (Regex.Match (raw_capture, pattern).Success) // look in the raw capture for url-holding parameters
{
url_count++; // count this one
url_param = param; // save the parameter name
}
}
if (1 < url_count) // more than one url-holding parameter, can't know which parameter |subscription= was meant for
return raw_capture; // so do nothing
pattern = @"\|\s*(?:doi|DOI|jstor|JSTOR|bibcode|hdl|HDL|ol|OL|osti|OSTI)\s*=\s*[^\|\}]"; // access params apply to these
if (Regex.Match (raw_capture, pattern).Success) // look for identifiers that have a value to which |subscription= might apply
{
if (0 == url_count)
{
if ("subscription" == sr_param) // for edit summary
gSkip_subscription = false;
else
gSkip_registration = false;
return raw_prefix + raw_postfix; // identifier without url-holding parameter; remove |subscription= because does not apply
}
else // if here, url_count must be 1
return raw_capture; // identifier plus url-holding parameter; can't know to which |subscription= applies
}
else // no identifiers found
{
if (0 == url_count)
{
if ("subscription" == sr_param) // for edit summary
gSkip_subscription = false;
else
gSkip_registration = false;
return raw_prefix + raw_postfix; // no identifier and no url-holding parameter; remove |subscription= because does not apply
}
else // if here, url_count must be 1
{
if ("subscription" == sr_param) // for edit summary
gSkip_subscription = false;
else
gSkip_registration = false;
return raw_prefix + @"|" + url_param + @"-access=" + sr_param + @" " + raw_postfix; // replace |subscription= with |<url param>-access=subscription
}
}
});
return ArticleText;
}