#!/bin/bash
WGET="/usr/bin/curl" # on a mac OS X
# WGET="wget -q -O -" # on a linux box with wget
# $1 is category to list
echo "" >continue
while [ -e continue ]; do
CONTINUE=`cat continue`
rm continue
QUERY="action=query&list=categorymembers&cmtitle=Category:$1&cmlimit=max&format=xml$CONTINUE"
# echo $QUERY
$WGET "http://en.wikipedia.org/w/api.php?$QUERY" | sed -e 's/<query-continue/\
<query-continue/' -e 's/<cm/\
<cm/g' | awk '
# <?xml version="1.0" encoding="utf-8"?><api><query-continue><categorymembers cmcontinue="A Raider Like Indiana|" /></query-continue><query><categorymembers><cm pageid="18818413" ns="2" title="User:007fan28" /><cm pageid="11799961" ns="2" title="User:10014derek" /><cm pageid="13520239" ns="2" title="User:1362" /><cm pageid="16494702" ns="2" title="User:1362/Subpages/Userboxes" /><cm pageid="5941489" ns="2" title="User:24fan24" /><cm pageid="7703708" ns="2" title="User:333" /><cm pageid="18131098" ns="2" title="User:333/U" /><cm pageid="18933587" ns="2" title="User:5768altalena" /><cm pageid="18933852" ns="2" title="User:5768altalena/cursed be the unholy cannon" /><cm pageid="15471958" ns="2" title="User:5dsddddd" /></categorymembers></query></api>
function hex(num) {
if (num < 10) return(num)
if (num == 10) return("A")
if (num == 11) return("B")
if (num == 12) return("C")
if (num == 13) return("D")
if (num == 14) return("E")
if (num == 15) return("F")
}
/^<query-continue/ {
sub("^.*cmcontinue=\"","")
sub("\".*","")
gsub(" ","%20")
while ($0 ~ "&#[0-9]*;") {
character=$0
sub("^[^&]*&#","",character)
sub(";.*","",character)
num=character
hexchar=""
while (num >0) {
hexchar=hex(num % 16) hexchar
num = (num - (num % 16)) / 16
}
if (hexchar == "") { hexchar="0" }
sub("&#[0-9]*;","%" hexchar)
}
# print "&cmcontinue=" $0
print "&cmcontinue=" $0 >"continue"
}
/^<cm / {
sub("<cm.*title=\"","")
sub("\".*","")
print
}
'
done