<?php
/* Given a string containing only a template expression surrounded by curly brackets,
* returns a structure that can be modified and then converted back into a string
* Status: unstable, parameters and return value may change */
function wp_parse_template($text) {
if (!ereg("^{{(.*)}}$", $text, $regs))
trigger_error("Incomplete template", E_USER_ERROR);
$text = $regs[1];
if (strpos($text, "{") !== false)
trigger_error("No support for complex templates", E_USER_ERROR);
$args = explode("|", $text);
$ret = array();
$ret["title"] = array_shift($args);
$rargs = array();
foreach ($args as $a) {
$pair = explode("=", $a);
if (count($pair) == 2) {
$rargs[trim($pair[0])] = $pair[1];
if (trim($pair[0]) != $pair[0])
$pmap[trim($pair[0])] = $pair[0];
}
else if (count($pair) == 1)
$rargs[] = $pair[0];
else
trigger_error("Unhandled template argument \"".$a."\a", E_USER_WARNING);
}
$ret["args"] = $rargs;
if (!empty($pmap))
$ret["pmap"] = $pmap;
return $ret;
}
/* Returns an empty but titled template structure
* Status: unstable, parameters may change */
function wptmpl_create($title) {
return array("title" => $title, "args" => array());
}
/* Sets a named template parameter
* Status: unstable, parameters may change */
function wptmpl_set_arg(&$tmpl, $param, $val) {
$tmpl["args"][$param] = $val;
}
/* Removes a named template parameter
* Status: unstable, parameters may change */
function wptmpl_unset_arg(&$tmpl, $param) {
$tmpl["args"][$param] = null;
}
/* Gets the value a named template parameter
* Status: unstable, parameters may change */
function wptmpl_get_arg($tmpl, $param) {
if (!array_key_exists($param, $tmpl["args"]))
return null;
return $tmpl["args"][$param];
}
/* Returns true if the template has a parameter with the given name
* Status: unstable, parameters may change */
function wptmpl_has_arg($tmpl, $param) {
if (!array_key_exists($param, $tmpl["args"]))
return false;
return isset($tmpl["args"][$param]);
}
/* Changes the template name
* Status: unstable, parameters may change */
function wptmpl_set_title(&$tmpl, $title) {
$tmpl["title"] = $title;
}
/* Changes the template name
* Status: unstable, parameters may change */
function wptmpl_get_title($tmpl) {
return $tmpl["title"];
}
/* Returns a string from the template structure
* Status: unstable, parameters may change */
function wp_build_template($tmpl) {
$text = "{{".$tmpl["title"];
if (!array_key_exists("args", $tmpl))
return $text."}}";
$args = $tmpl["args"];
if (array_key_exists("pmap", $tmpl))
$pmap = $tmpl["pmap"];
foreach ($args as $k => $v) {
if (!isset($v))
continue;
else if (is_string($k) && isset($pmap) && array_key_exists($k, $pmap))
$text .= "|".$pmap[$k]."=".$v;
else if (is_string($k))
$text .= "|".$k."=".$v;
else
$text .= "|".$v;
}
return $text."}}";
}
/* Finds the first full template with the given name in the text
* Status: unstable, parameters may change */
function wp_find_template($title, $text, $ignore_case = false) {
$regchars = ".[]{}*?";
if (!eregi("({{[[:space:]]*(".addcslashes($title, $regchars).")[[:space:]]*[|}]+(.*))$", $text, $regs))
return null;
$tstr = $regs[1];
if (!$ignore_case && ucfirst($regs[2]) != ucfirst($title))
return wp_find_template($regs[3]);
$l = 0;
$len = strlen($tstr);
for ($i = 0; $i < $len; $i++) {
if ($tstr[$i] == "{")
$l++;
else if ($tstr[$i] == "}")
$l--;
if ($l <= 0)
break;
}
if ($l > 0)
return null;
return substr($tstr, 0, $i + 1);
}
/* Bot exclusion detector, returns false if the text contains a directive disallowing
* this bot
* Status: unstable, parameters may change */
function wp_page_allows_bot($text, $context = null, $messages = null) {
$tstr = wp_find_template("Nobots", $text, true);
if (!empty($tstr))
return false;
$tstr = wp_find_template("Bots", $text, true);
if (empty($tstr))
return true;
$tmpl = wp_parse_template($tstr);
if (isset($context) && array_key_exists("username", $context))
$botname = $context["username"];
if (array_key_exists("deny", $tmpl["args"])) {
$denied = explode(",", $tmpl["args"]["deny"]);
foreach ($denied as $d) {
$d = trim($d);
if (strtolower($d) == "all")
return false;
if (isset($botname) && $d == $botname)
return false;
}
}
if (array_key_exists("allow", $tmpl["args"])) {
$allowed = explode(",", $tmpl["args"]["allow"]);
foreach ($allowed as $a) {
$a = trim($a);
if (strtolower($a) == "none")
return false;
}
}
if (array_key_exists("optout", $tmpl["args"]) && isset($messages)) {
$optout = explode(",", $tmpl["args"]["optout"]);
foreach ($optout as $o) {
$o = trim($o);
if (strtolower($o) == "all")
return false;
if (is_string($messages) && strtolower($o) == strtolower($messages))
return false;
if (is_array($messages) && in_array($o, $messages))
return false;
}
}
return true;
}
/* Returns an integer timestamp for the date the comment was signed, or null if no signature
* was found. */
function wp_date_comment($text) {
if (!eregi("\[\[user.* ([0-9]+:[0-9]+, [0-9]+ [a-z]+ [0-9]+ \(utc\))", $text, $regs))
return null;
$time = strtotime($regs[1]);
if ($time > time())
return null;
return $time;
}
/* Returns a date string formatted for POSTing, for the given UNIX timestamp. */
function wp_format_post_date($ts) {
return gmdate("Y-m-d\TH:i:s\Z", $ts);
}
/* Returns an array of the transcluded subpages. The subpages are the keys, the section
* names are the values. */
function wp_list_subpages($title, $page) {
$subpages = array();
$lines = explode("\n", $page);
foreach ($lines as $line) {
$line = trim($line);
if (ereg("==(.*)==", $line, $regs)) {
$section = trim($regs[1]);
continue;
}
$line = str_replace("_", " ", $line);
$regchars = ".[]{}*?";
if (ereg("{{".addcslashes($title, $regchars)."/(.*)}}", $line, $regs))
$subpages[rtrim($regs[1])] = $section;
}
return $subpages;
}
/* POSTs the array of data to the wiki
* Status: stable */
function wp_post($post, $context = null) {
$url = "http://en.wikipedia.org/w/api.php";
if (isset($context) && array_key_exists("api url", $context))
$url = $context["api url"];
$header = "Content-type: application/x-www-form-urlencoded\n";
if (isset($context) && array_key_exists("cookies", $context))
$header .= "Cookie: ".http_build_query($context["cookies"], "", "; ")."\n";
$http_opts = array(
"http" => array(
"method" => "POST",
"header" => $header,
"content" => http_build_query($post)
)
);
$sctx = stream_context_create($http_opts);
while (true) {
$ret = file_get_contents($url, 0, $sctx);
if (strstr($ret, "maxlag")) {
sleep(5);
continue;
}
break;
}
return $ret;
}
/* Downloads the page contents from the wiki
* Status: stable */
function wp_get($title, $context = null, &$timestamp = null) {
$titles = array($title);
$timestamps = array();
$pages = wp_get_multiple($titles, $context, $timestamps);
$timestamp = $timestamps[$title];
return $pages[$title];
}
/* Returns an associative array of the contents of all the specified pages
* Status: stable */
function wp_get_multiple($titles, $context = null, &$timestamps = null) {
$timestamps = array();
$pages = array();
if (empty($titles))
return $pages;
$post = array(
"action" => "query",
"format" => "php",
"prop" => "revisions",
"titles" => implode("|", $titles),
"rvprop" => "timestamp|content",
);
if (isset($context) && array_key_exists("maxlag", $context))
$post["maxlag"] = (string)$context["maxlag"];
$ret = wp_post($post, $context);
$ret = unserialize($ret);
$pinfo = $ret["query"]["pages"];
if (!isset($pinfo))
return array();
$revs = array();
foreach ($pinfo as $p) {
$t = $p["title"];
if (empty($p["revisions"]))
continue;
$a = array_shift($p["revisions"]);
$timestamps[$t] = strtotime($a["timestamp"]);
$pages[$t] = $a["*"];
}
return $pages;
}
/* Creates a context structure to be passed to the other functions, also sets
* various options
* Status: unstable, parameters and return value may change */
function wp_create_context($maxlag = null, $bot = false, $api_url = null) {
$context = array();
if (isset($maxlag))
$context["maxlag"] = $maxlag;
if (isset($bot))
$context["bot"] = $bot;
if (isset($api_url))
$context["api url"] = $api_url;
return $context;
}
/* Sets the number of items to be returned for each query
* Status: stable */
function wp_context_set_query_limit($limit, &$context) {
$context["qlimit"] = $limit;
}
/* Logs the bot into the wiki associated with the given context
* Status: unstable, parameters and return value may change */
function wp_login($username, $password, &$context) {
if (!isset($username) || $username == "")
trigger_error("Username not set", E_USER_ERROR);
if (!isset($password))
trigger_error("Password not set", E_USER_ERROR);
$login_post = array(
"action" => "login",
"format" => "php",
"lgname" => $username,
"lgpassword" => $password,
);
$ret = wp_post($login_post, $context);
$ret = unserialize($ret);
if (!array_key_exists("login", $ret))
return false;
$login = $ret["login"];
if ($login["result"] != "Success")
return false;
$prefix = $login["cookieprefix"];
$cookies = array($prefix."UserName" => $settings["username"]);
if (array_key_exists("lguserid", $login))
$cookies[$prefix."UserID"] = $login["lguserid"];
if (array_key_exists("lgtoken", $login))
$cookies[$prefix."Token"] = $login["lgtoken"];
if (array_key_exists("sessionid", $login))
$cookies[$prefix."_session"] = $login["sessionid"];
if (!isset($context))
$context = array();
$context["username"] = $username;
$context["cookies"] = $cookies;
return true;
}
/* Logs out of the wiki
* Status: stable */
function wp_logout($context) {
$post = array(
"action" => "logout",
"format" => "php",
);
wp_post($post, $context);
}
/* Returns an edit token to be used for all edits in the session
* Status: stable */
function wp_get_edit_token($title, $context) {
if (!isset($context) || !isset($context["cookies"]))
trigger_error("Must be logged in to get edit token", E_USER_ERROR);
$post = array(
"action" => "query",
"format" => "php",
"prop" => "info",
"intoken" => "edit",
"titles" => $title,
);
if (isset($context) && array_key_exists("maxlag", $context))
$post["maxlag"] = (string)$context["maxlag"];
$ret = wp_post($post, $context);
$ret = unserialize($ret);
$pages = $ret["query"]["pages"];
foreach ($pages as $p) {
if ($p["title"] == $title)
return $p["edittoken"];
}
return "";
}
/* Uploads a new page or section over the existing one
* Status: stable */
function wp_edit_section($title, $content, $summary, $section, $edtoken, $context,
$timestamp = null) {
if (!isset($context) || !isset($context["cookies"]))
trigger_error("Must be logged in to edit pages", E_USER_ERROR);
if (!wp_page_allows_bot($content, $context))
trigger_error($title." excludes bot edits", E_USER_ERROR);
$post = array(
"action" => "edit",
"format" => "php",
"title" => $title,
"text" => $content,
"token" => $edtoken,
"summary" => $summary,
);
if (array_key_exists("maxlag", $context))
$post["maxlag"] = (string)$context["maxlag"];
if (array_key_exists("bot", $context) && $context["bot"])
$post["bot"] = "yes";
if (isset($timestamp))
$post["basetimestamp"] = wp_format_post_date($timestamp);
if (isset($section))
$post["section"] = $section;
$ret = wp_post($post, $context);
$ret = unserialize($ret);
if (isset($ret["error"])) {
trigger_error($ret["error"]["code"], E_USER_NOTICE);
return false;
}
if (isset($ret["edit"]) && $ret["edit"]["result"] == "Success")
return true;
trigger_error("Unhandled query return status", E_USER_WARNING);
return false;
}
/* Uploads a new page over the existing one
* Status: stable */
function wp_edit_page($title, $content, $summary, $edtoken, $context,
$timestamp = null) {
return wp_edit_section($title, $content, $summary, null, $edtoken, $context, $timestamp);
}
/* Posts a new section to the page
* Status: stable */
function wp_append_section($ptitle, $stitle, $content, $edtoken, $context) {
return wp_edit_section($ptitle, $content, $stitle, "new", $edtoken, $context);
}
/* Keeps trying to modify the page until it is successful. The modifications are made by
* the passed in function, with these parameters:
* $new_page = $modify($old_page, $data); */
function wp_edit_war($title, $summary, $modify, $data, $ctx, $token = null,
$old_page = null, $old_ts = null, $max_tries = null) {
if (!isset($token))
$token = wp_get_edit_token($title, $ctx);
$tries = 0;
while (true) {
unset($last_ts);
if (isset($old_page) && isset($old_ts)) {
$old = $old_page;
$last_ts = $old_ts;
unset($old_page);
unset($old_ts);
}
else
$old = wp_get($title, $ctx, $last_ts);
if (!wp_page_allows_bot($old, $ctx))
trigger_error($title." excludes bot edits", E_USER_ERROR);
$new = $modify($old, $data);
if ($new == $old)
break;
$edited = wp_edit_page($title, $new, $summary, $token, $ctx, $last_ts);
$tries++;
if ($edited)
break;
if (isset($max_tries) && $tries >= $max_tries)
break;
}
}
function wp_edit_test($title, $modify, $data, $ctx) {
$old = wp_get($title, $ctx);
return $modify($old, $data);
}
/* Returns an associative array with the name of the image repository for each file
* Status: stable */
function wp_locate_files($files, $context) {
if (empty($files))
return null;
$post = array(
"action" => "query",
"format" => "php",
"prop" => "imageinfo",
"titles" => implode("|", $files),
);
if (isset($context) && array_key_exists("maxlag", $context))
$post["maxlag"] = (string)$context["maxlag"];
$ret = wp_post($post, $context);
$ret = unserialize($ret);
$pages = $ret["query"]["pages"];
$info = array();
foreach ($pages as $p)
$info[$p["title"]] = $p["imagerepository"];
return $info;
}
/* Returns a list of pages in the category
* Status: stable */
function wp_get_category_members($category, $context = null) {
$ctitle = $category;
if (!eregi("^Category:", $category))
$ctitle = "Category:".$category;
$mlist = array();
while (true) {
$post = array(
"action" => "query",
"format" => "php",
"list" => "categorymembers",
"cmtitle" => $ctitle,
);
if (isset($context) && array_key_exists("maxlag", $context))
$post["maxlag"] = (string)$context["maxlag"];
if (isset($context) && array_key_exists("qlimit", $context))
$post["cmlimit"] = (string)$context["qlimit"];
if (isset($continue))
$post["cmcontinue"] = (string)$continue;
$ret = wp_post($post, $context);
$ret = unserialize($ret);
if (array_key_exists("error", $ret)) {
trigger_error($ret["error"]["info"], E_USER_NOTICE);
return null;
}
$members = $ret["query"]["categorymembers"];
foreach ($members as $m)
$mlist[] = $m["title"];
if (array_key_exists("query-continue", $ret))
$continue = $ret["query-continue"]["categorymembers"]["cmcontinue"];
else
break;
}
return $mlist;
}
/* Goes through the page history to find when subpages were transcluded. The list
* function returns an array of transcluded subpages and takes the arguments:
* list_fn($title, $contents);
* Pass in an array of the subpages to look for as $current_tcs. */
function wp_transcluded_dates($title, $list_fn, $current_tcs, $context = null) {
$tc_ts = array();
$found_missing = array();
while (true) {
$post = array(
"action" => "query",
"format" => "php",
"prop" => "revisions",
"titles" => $title,
"rvprop" => "timestamp|content",
);
if (isset($context) && array_key_exists("qlimit", $context))
$post["rvlimit"] = (string)$context["qlimit"];
if (isset($context) && array_key_exists("maxlag", $context))
$post["maxlag"] = (string)$context["maxlag"];
if (isset($continue))
$post["rvstartid"] = (string)$continue;
$ret = wp_post($post, $context);
$ret = unserialize($ret);
$pages = $ret["query"]["pages"];
$revs = array();
foreach ($pages as $p) {
if ($p["title"] != $title)
continue;
$revs = $p["revisions"];
break;
}
foreach ($revs as $r) {
$time = strtotime($r["timestamp"]);
$tcs = $list_fn($title, $r["*"]);
if (empty($tcs))
continue;
if (isset($current_tcs)) {
foreach ($current_tcs as $tc) {
if (!in_array($tc, $tcs))
$found_missing[$tc] = true;
}
}
foreach ($tcs as $tc) {
if (array_key_exists($tc, $found_missing))
continue;
if (!array_key_exists($tc, $tc_ts))
$tc_ts[$tc] = time();
if ($tc_ts[$tc] > $time)
$tc_ts[$tc] = $time;
}
}
if (isset($current_tcs)) {
$all_missing = true;
foreach ($current_tcs as $tc) {
if (array_key_exists($tc, $found_missing))
continue;
$all_missing = false;
break;
}
if ($all_missing)
break;
}
if (isset($ret["query-continue"]))
$continue = $ret["query-continue"]["revisions"]["rvstartid"];
else
break;
}
return $tc_ts;
}
?>