User:AmiDaniel/VP/RSS source

The following is the method being used in VandalProof version 1.3 to retrieve items from the recent changes RSS feed, and as I've had many requests for it, I decided to copy it here. This method will work for an RSS RC feed on any foundation Wiki regardless of language, etc.

To use it, you will need to copy the following code into a module. Then you will need to retrieve the innerHTML of the RSS feed you wish to scrape (on en.wikipedia, it can be found at http://en.wikipedia.org/w/index.php?title=Special:Recentchanges&feed=rss). Then pass the innerHTML to SplitItems (like so: SplitItems WB_RSS.Document.body.innerHTML). That will then populate the RSSItems variable with every RC item it finds in the feed.

Option Explicit

Public Type RSSItem
    BodyContent As String
    sUser As String
    
    sArticleName As String
    sPageAddress As String
    
    sSummary As String
    
    sAdded As String
    sRemoved As String
    sMatches As String
    
    sNewTime As String
    sOldTime As String
End Type

Public RSSItems() As RSSItem

Public Sub SplitItems(ByVal str$)
    Dim i%
    
    On Error Resume Next
    
    i = UBound(RSSItems)
    If Err Then
        Err.Clear
        ReDim RSSItems(0)
    End If
    
    On Error GoTo 0
    
    Do Until InStr(1, LCase(str), "<item>") = 0
        ReDim Preserve RSSItems(i)
        With RSSItems(i)
            .BodyContent = Left(str, InStr(1, LCase(str), "<item>") - 1)
            .BodyContent = FindAndReplace(.BodyContent, """/w", """" & GlVars.Root & "/w")
            
            
            .sArticleName = BetwixtStr(.BodyContent, "<title>", "</title>")
            .sPageAddress = BetwixtStr(.BodyContent, "<link>", "</link>")
            .sUser = BetwixtStr(.BodyContent, "<dc:creator>", "</dc:creator>")
        
            .sNewTime = BetwixtStr(.BodyContent, "<pubDate>", "</pubdate>")
            
            .sSummary = BetwixtStr(.BodyContent, "<p>", "</p>")
        
            .sSummary = FindAndReplace(.sSummary, "<span class=autocomment>", "/*")
            .sSummary = FindAndReplace(.sSummary, "</span>", "*/")
        
            .sAdded = GetAdded(.BodyContent)
            .sRemoved = GetRemoved(.BodyContent)
            
            .BodyContent = FindAndReplace(.BodyContent, "<link>" & .sPageAddress & "</link>", "<H2><A href=""" & .sPageAddress & """>" & .sArticleName & "</A> (<A href=""" & _
                .sPageAddress & "?diff=cur"">last diff</A>) (<A href=""" & GlVars.Root & "/w/index.php?title=" & Trim(StrtoHTML(.sArticleName)) & "&action=history"">hist</A>)</H2>")
                
            If .sArticleName = "" & GlVars.SpecialText & "Log/newusers" Then
                .BodyContent = .BodyContent & Chr(13) & Chr(10) & Chr(13) & Chr(10) & "<H3><A href=""$bcur:indef-unsername"">Block Username</A></H3>"
            End If
        End With
            
        str = Right(str, Len(str) - InStr(1, LCase(str), "<item>") - Len("<item>") + 1)
        'If InStr(1, lastinstr, LCase(str), "<item>") > 0 Then lastinstr = InStr(1, lastinstr, LCase(str), "<item>")
        i = i + 1
    Loop
End Sub
    
Public Function BetwixtStr$(ByVal sIn$, ByVal sFirst$, ByVal sLast$)
    If InStr(sIn, sLast) Then
        BetwixtStr = Left(sIn, InStrRev(sIn, sLast) - 1)
        If InStr(BetwixtStr, sFirst) Then
            BetwixtStr = Right(BetwixtStr, Len(BetwixtStr) - InStr(BetwixtStr, sFirst) - Len(sFirst) + 1)
        End If
    End If
End Function

Public Function GetAdded$(ByVal sIn$)
    Dim fields
    
    Debug.Print
    Debug.Print sIn
    
    
    If InStr(1, sIn, "<p><b>New page</b></p>") Then GetAdded = "##NEWPAGE##"
    
    Do Until InStr(1, UCase(sIn), "<TD STYLE=""FONT-SIZE: SMALLER; BACKGROUND: #CFC"">") = 0
        sIn = Right(sIn, Len(sIn) - InStr(1, UCase(sIn), "<TD STYLE=""FONT-SIZE: SMALLER; BACKGROUND: #CFC"">") - Len("<TD STYLE=""FONT-SIZE: SMALLER; BACKGROUND: #CFC"">") + 1)
        GetAdded = GetAdded & sIn
        GetAdded = Left(GetAdded, InStr(1, LCase(GetAdded), "</td>") - 1)
    Loop
    
    GetAdded = FindAndReplace(GetAdded, "<span style=""FONT-WEIGHT: bold; COLOR: red"">", "")
    GetAdded = FindAndReplace(GetAdded, "</span>", "")
    GetAdded = FindAndReplace(GetAdded, "</sup>", "")
End Function

Public Function GetRemoved$(ByVal sIn$)
    Dim fields
    
    'If InStr(1, sIn, "<p><b>New page</b></p>") Then GetRemoved = "##NEWPAGE##"
    
    Do Until InStr(1, UCase(sIn), UCase("<td style=""FONT-SIZE: smaller; BACKGROUND: #ffa"">")) = 0
        sIn = Right(sIn, Len(sIn) - InStr(1, UCase(sIn), UCase("<td style=""FONT-SIZE: smaller; BACKGROUND: #ffa"">")) - Len("<td style=""FONT-SIZE: smaller; BACKGROUND: #ffa"">") + 1)
        GetRemoved = GetRemoved & sIn
        GetRemoved = Left(GetRemoved, InStr(1, LCase(GetRemoved), "</td>") - 1)
    Loop
    
    GetRemoved = FindAndReplace(GetRemoved, "<span style=""FONT-WEIGHT: bold; COLOR: red"">", "")
    GetRemoved = FindAndReplace(GetRemoved, "</span>", "")
    GetRemoved = FindAndReplace(GetRemoved, "</sup>", "")
End Function

Function FindAndReplace(ByVal strIn$, ByVal strFind$, ByVal strReplace$)
    Dim lastInstr%, lastInstr_New%
    
    lastInstr = 1
    Do Until InStr(lastInstr, strIn, strFind) = 0
        lastInstr_New = InStr(lastInstr, strIn, strFind)
        strIn = Left(strIn, InStr(lastInstr, strIn, strFind) - 1) & strReplace & Right(strIn, Len(strIn) - InStr(lastInstr, strIn, strFind) - Len(strFind) + 1)
        lastInstr = lastInstr_New
    Loop
    
    FindAndReplace = strIn
End Function