User:Sambot/Code/Speedy images

<?php

error_reporting(E_STRICT|E_ALL);
include('trunk/class.pillar.php');
 
$pillar = Pillar::ini_launch('images.ini');
 
$parentcategories = array ('Category:Wikipedia files with no copyright tag',
                           'Category:Wikipedia files with unknown source',
                           'Category:Wikipedia files with unknown copyright status',
                           'Category:Wikipedia files with no non-free use rationale',
                           'Category:Disputed non-free Wikipedia files',
                           'Category:Replaceable non-free use Wikipedia files',
                           'Category:Orphaned non-free use Wikipedia files',
                           );

$cats = array();

foreach ($parentcategories as $parentcat) {
    $continue = null;
    do {
        $i = 0;
        while ($i++ < 5) {
            try {
                $cats = array_merge($cats,$pillar->cursite->get_categorymembers($parentcat,500,$continue,14));
                break;
            } catch (PillarException $e) {
                if ($i == 5) {
                    Pillar::report('5 consecutive failures getting members of ' . $parentcat . ': quitting',Pillar::ACTION);
                    die();
                }
            }
        }
    } while ($continue);
}

$cats = array_filter($cats,'dates');

foreach ($cats as $cat) {
    $continue = null;
    $files = array();
    do {
        $i = 0;
        while ($i++ < 5) {
            try {
                $files = array_merge($files,$pillar->cursite->get_categorymembers($cat,500,$continue,6));
                break;
            } catch (PillarException $e) {
                if ($i == 5) {
                    Pillar::report('5 consecutive failures getting members of ' . $cat . ': quitting',Pillar::ACTION);
                    die();
                }
            }
        }
    } while ($continue);
    
    foreach ($files as $file) {
        do {
            $i = 0;
            try {
                $pages = $pillar->cursite->get_imageuse($file['title'],500,$continue);
                
                foreach ($pages as $page) {
                    open ($page,$file['title']);
                }
            } catch (PillarException $e) {
                if (++$i == 5) {
                    Pillar::report('5 consecutive failures getting pages that use ' . $file['title'] . ': quitting',Pillar::ACTION);
                    die();
                }
            }
        } while ($continue);
    }
}






function open ($title,$imagename) {
    $i = 0;
    while ($i++ < 10) {
        try {
            $page = new Page($title);
        } catch (PillarException $e) {
            if ($i == 10) {
                Pillar::report($title . ' is inaccessible on 10 consecutive attempts: quitting');
                die();
            }
            continue;
        }
        break;
    }
    
    $text = $page->get_text();
 
    $regex = '/\[\[' . preg_replace (array('/^File/i','/[ _]/'),array('(?:(?:File)|(?:Image))','[ _]'),preg_quote($imagename,'/')) . '.*/is';
 
    preg_match_all($regex,$page->get_text(),$matches);
 
    foreach ($matches[0] as $match) {
        $counter = 2;
        $position = 2;
 
        while ($position < strlen($match)) {
            if ($match[$position] == '[') {
                ++$counter;
            } elseif ($match[$position] == ']') {
                --$counter;
            }
 
            ++$position;
 
            if ($counter == 0) {
                break 1; //the while loop only
            }
        }
 
        $imageuse = substr($match,0,$position);
 
        if (preg_match('/[\|]?\{\{\s*Deletable[ \-]image[ \-]caption/i',$imageuse)) {
            continue;
        }
 
        $current = '';
        $template = 0;
        $link = false;
 
        for ($i = 2 ; $i < strlen($imageuse) ; $i++) {
            if ($current && !$template && !$link && ($imageuse[$i] == '|')) {
                $bits[] = $current;
                $current = '';
                continue;
            }
 
            if (!$template && !$link && ($imageuse[$i] == ']') && ($imageuse[$i + 1] == ']')) {
                if ($current) {
                    $bits[] = $current;
                } elseif ($imageuse[$i - 1] == '|') {
                    $bits[] = '';
                }
 
                break;
            }
 
            if (($imageuse[$i] == '{') && ($imageuse[$i + 1] == '{')) {
                $template++;
            }
 
            if (($imageuse[$i] == '}') && ($imageuse[$i + 1] == '}')) {
                $template--;
            }
 
            if (!$link && ($imageuse[$i] == '[') && ($imageuse[$i + 1] == '[')) {
                $link = true;
            }
 
            if ($link && ($imageuse[$i] == ']') && ($imageuse[$i + 1] == ']')) {
                $link = false;
            }
 
            $current .= $imageuse[$i];
        }
 
        $link = '[[' . $bits[0];
 
        array_shift($bits);
 
        $added = false;
        $addlink = '';
        $regex = '/^\s*(?:(?:thumb)|(?:frame)|(?:border)|(?:right)|(?:left)|(?:center)|(?:none)|(?:link=)|(?:\d+\s*px)|(?:\d+x\d+\s*px(?:px)?)\s*$)/i';
 
        for ($i = count($bits) - 1 ; $i >= 0 ; --$i) {
            $bit = $bits[$i];
            
            if (preg_match($regex,$bit)) {
                $addlink = '|' . $bit . $addlink;
            } else {
                if (!$added) {
                    $bit .= ' {{deletable image-caption}}';
                    $added = true;
                }
                $addlink = '|' . $bit . $addlink;
            }
        }
        
        if (!$added) {
            $addlink .= '|{{deletable image-caption}}';
        }
 
        $link .= $addlink . ']]';
 
        $text = str_replace($imageuse,$link,$text);
    }
 
    try {
        $page->put($text,'[[Wikipedia:Bots/Requests for approval/Sambot 12|BOT]]: tagging [[:' . $imagename . ']] with {{[[Template:deletable image-caption|]]}}',false);
    } catch (PillarActionCancelled $e) {
        return;
    } catch (PillarException $e) {
        open ($title,$imagename,$puiname);
    }
}

function dates (&$name) {
    $name = $name['title']; //strip out 'sortkey' field (hence pass by reference)

    if (preg_match('/\d{1,2} \w+ \d{4}$/',$name)) {
        return true;
    } else {
        return false;
    }
}