User:Rick Bot/scripts/getpages

#!/bin/bash

WGET="/usr/bin/curl"  # on a mac OS X
# WGET="wget -q -O -"   # on a linux box with wget

# $1 is page to get
# $2 is namespace number

QUERY="action=query&list=allpages&apprefix=$1&aplimit=500&format=xml&apfilterredir=nonredirects" 
if [ "$2" != "" ]; then
  QUERY="$QUERY&apnamespace=$2"
fi
if [ "$3" != "" ]; then
  QUERY="$QUERY&apfrom=$3"
fi
# echo $QUERY
$WGET "http://en.wikipedia.org/w/api.php?$QUERY" | sed -e 's/<p pageid=/\
<p pageid=/g'  | awk '

/^<p pageid=/ {
  sub("<p .*title=\"","")
  sub("\".*","")
  print
}
'