"""
rssweblog.py

Exposes an interface for filtering RSS files, based on XMLFilter, with
high-level operations such as adding, listing, editing, and removing posts.
Can treat blogBrowser date-based archives as one big file (with optimizations
to find posts quickly based on date).

Revision History:

1.5.2 2003-07-16 Andrew Shearer

Prepend filename (minus dir path) to UnicodeError messages when parsing a
weblogArchive; xmllib sometimes throws UTF-8 errors and otherwise there's no
way to tell which file they came from.

1.5.1 2003-07-15 Andrew Shearer

Accept W3CDate, not just xmlrpclib.DateTime, in the MetaWeblog API's
dateCreated member. This allows the caller (the XML-RPC newPost handler)
to pass the current date with timezone and DST intact.

1.5   2003-07-07  Andrew Shearer

Read-only support for RSS 1.0, as well as RSS 2.0 in a namespace.
flNotOnHomePage support. Editing a post to remove all categories now
works. Renamed ISO8601Date to W3CDate; moved W3CDate and XMLFilter to
their own modules.
"""

import os
import os.path
import re
import string
import time
import xmlrpclib

import W3CDate
import XMLFilter

__author__ = "Andrew Shearer"

# Common namespace URIs
kContentURI = "http://purl.org/rss/1.0/modules/content/"
kDCURI = "http://purl.org/dc/elements/1.1/"
kDCTermsURI = "http://purl.org/dc/terms/"
kRadioWeblogPostURI = "http://backend.userland.com/radioWeblogPostModule"
kRSS1URI = "http://purl.org/rss/1.0/"
kRSS2URI = "http://backend.userland.com/RSS2"
kRDFURI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"

# If the RSS writer is forced to declare a namespace (because it
# was missing from the original file), it will use these prefixes.
kNamespacePrefix = {
    kContentURI: 'content',
    kDCURI: 'dc',
    kDCTermsURI: 'dcterms',
    kRadioWeblogPostURI: 'radioWeblogPost',
    kRDFURI: 'rdf',
    "http://purl.org/rss/1.0/modules/syndication/": 'sy',
    "http://webns.net/mvcb/": 'admin',
}

# Name tuples for core RSS 2.0 elements (first item is namespace URI)
kChannel = (None, 'channel')
kItem = (None, 'item')
kAuthor = (None, 'author')
kCategory = (None, 'category')
kDescription = (None, 'description')
kGuid = (None, 'guid')
kLink = (None, 'link')
kPubDate = (None, 'pubDate')
kTitle = (None, 'title')

kCoreNamespaceAlternatives = (None, kRSS1URI, kRSS2URI)

# Name tuples for common namespaced RSS extensions
kContentEncoded = (kContentURI, 'encoded')
kDCDate = (kDCURI, 'date')
kDCTermsModified = (kDCTermsURI, 'modified')
kRadioWeblogPostID = (kRadioWeblogPostURI, 'id')
kRadioWeblogPostFlNotOnHomePage = (kRadioWeblogPostURI, 'flNotOnHomePage')
kRDFAbout = (kRDFURI, 'about')

# Construct an inverted kNamespaceURI dict, for fast lookup
#kNamespacePrefix = {}
#for iterPrefix, iterURI in kNamespaceURI.items():
#    kNamespacePrefix[iterURI] = iterPrefix
    
class SafeFileReplace:
    """A safe transaction-style API for updating a file, by writing the new
    content to a temporary file then replacing the original when done
    ("commit") or throwing away the temp file ("abort"). Code using this
    class should be of the form:
    
    replacer = SafeFileReplace(mypath)
    tempfile = replacer.openTempFile()
    try:
        # write stuff to tempfile
    except:
        replacer.abort()
        raise
    else:
        replacer.commit()
    
    To do: lock the original file somehow during the update, to prevent race
    conditions with multiple update processes or threads. Use a random-number
    algorithm to choose temp file names."""
    def __init__(self, origpath):
        self.origpath = origpath
        self.tempfilepath = None
        self.tempfileptr = None
    
    def openTempFile(self, binary = 0):
        if self.tempfileptr == None:
            self.tempfilepath = self.getTempPath(os.path.dirname(self.origpath))
            if binary: mode = 'wb'
            else: mode = 'w'
            self.tempfileptr = open(self.tempfilepath, mode)
        return self.tempfileptr
    
    def commit(self):
        """replace the orig file with the temp file.
        """
        # would be nice to use FSpExchangeFiles on Mac OS
        self.tempfileptr.close()
        self.tempfileptr = None
        temppath2 = self.getTempPath(os.path.dirname(self.origpath))
        os.rename(self.origpath, temppath2)
        os.rename(self.tempfilepath, self.origpath)
        self.tempfilepath = None
        os.remove(temppath2)
    
    def abort(self):
        """get rid of the temp file"""
        self.tempfileptr.close()
        self.tempfileptr = None
        os.remove(self.tempfilepath)
        self.tempfilepath = None
    
    def getTempPath(self, dir):
        # stupid temp alogorithm
        # we don't use Python's tmpnam function because there's no
        # guarantee that it will choose the same volume or even a writable
        # directory.
        index = 1
        while 1:
            path = os.path.join(dir, '~temp%d.tmp' % index)
            if not os.path.exists(path): return path
            index = index + 1 
    

# --- weblog representation classes ---

class RSSException(Exception):
    pass

class NoPostIDException(RSSException):
    pass

class NoChannelForNewItemException(RSSException):
    pass

class MissingRSSFileException(RSSException):
    pass

class NoSampleRSSFileException(RSSException):
    pass

class Weblog:
    def __init__(self, webloginfo):
        self.webloginfo = webloginfo
        # cache inverted categories list, for fast case-insensitive name lookup
        self.categoriesByName = {}
        categories = webloginfo.get('categories', [])
        for categoryid in range(0, len(categories)):
            self.categoriesByName[string.lower(categories[categoryid]['name'])] = categoryid
    
    def getCategories(self):
        return self.webloginfo.get('categories', [])
    
    def getCategoryNameByID(self, categoryid):
        #return self.getCategories()[categoryid]
        categories = self.getCategories() 
        if categoryid >= 0 and categoryid < len(categories):
            return categories[categoryid]['name']
        else:
            return None
    
    def getCategoryIDByName(self, category):
        return self.categoriesByName.get(string.lower(category))
    
    def getPermaLinkFormat(self):
        return self.webloginfo.get('permaLinkFormat')
    
    def getGuidFormat(self):
        return self.webloginfo.get('guidFormat')
    
    def modifyRSS(self, transformerClass, transformerParams = (), postid = None, postdate = None):
        """return a true value if the transformerClass instance says it finished"""
        return self._modifyRSSFile(self.webloginfo['path'], transformerClass, transformerParams)
    
    def _modifyRSSFile(self, rsspath, transformerClass, transformerParams = ()):
        """return a true value if the transformerClass instance says it finished"""
        replacer = SafeFileReplace(rsspath)
        infile = open(rsspath, 'r')
        outfile = replacer.openTempFile()
        try:
            handlerObj = apply(transformerClass, [XMLFilter.XMLGenerator(outfile)]+list(transformerParams))
            XMLFilter.parseStream(infile, handlerObj, handlerObj.getRecommendedFeatures())
        except:
            infile.close()
            replacer.abort()
            raise
        else:
            infile.close()
            replacer.commit()
        
        return handlerObj.isDone()
            
    def readRSS(self, readerClass, readerParams, postid = None, postdate = None):
        """return a result furnished by the readerClass instance"""
        reader = apply(readerClass, readerParams)
        if self.webloginfo.has_key('stream'):
            XMLFilter.parseStream(self.webloginfo['stream'], reader, reader.getRecommendedFeatures())
        else:
            self._readRSSFile(reader, self.webloginfo['path'])
        return reader.getResult()
    
    def _readRSSFile(self, reader, rsspath):
        XMLFilter.parseFilePath(rsspath, reader, reader.getRecommendedFeatures())
        #return reader.isDone()
        
def WeblogFactory(webloginfo):
    if webloginfo.has_key('stream') or os.path.isfile(webloginfo['path']):
        return Weblog(webloginfo)
    if os.path.isdir(webloginfo['path']):
        return WeblogArchive(webloginfo)
    else:
        raise MissingRSSFileException, "the file or folder specified for the weblog is missing"
            
class WeblogArchive(Weblog):
    """format a weblog as a structured folder hierarchy, ordered by date.
    One folder per year, one RSS file per month.
    Example: {defined root folder} / 2003 / 01.xml
    """
    
    yearRE = re.compile("^[1-9][0-9][0-9][0-9]$")
    
    def __init__(self, webloginfo):
        Weblog.__init__(self, webloginfo)
    
    def modifyRSS(self, transformerClass, params = (), postid = None, postdate = None):
        if postdate:
            rsspaths = [self.dateToRSSPath(postdate)]
        else:
            rsspaths = self.getOrderedRSSPaths()
        isDone = 0
        for rssfile in rsspaths:
            isDone = self._modifyRSSFile(rssfile, transformerClass, params)
            if isDone: break
        self.generateRecent()
        return isDone
    
    def generateRecent(self):
        recentpath = self.webloginfo.get('recent-file')
        if not recentpath:
            return
        maxposts = int(self.webloginfo.get('recent-max','15'))
        recentposts = self.readRSS(RSSLister, (maxposts,))
        isDone = self._modifyRSSFile(recentpath, RSSReplacer, (recentposts,))
        if not isDone:
            raise NoChannelForNewItemException, "Could not write the recent items list, because the recent file did not have a place for them."
     
    def readRSS(self, readerClass, readerParams, postid = None, postdate = None):
        if postdate:
            rsspaths = [self.dateToRSSPath(postdate)]
        else:
            rsspaths = self.getOrderedRSSPaths()
        reader = apply(readerClass, readerParams)
        for rssfile in rsspaths:
            try:
                self._readRSSFile(reader, rssfile)
            except UnicodeError, e:
                raise UnicodeError, ('File %s: %s' % (os.path.basename(rssfile), str(e)))
            if reader.isDone(): return reader.getResult()
        return reader.getResult()
        
    def getOrderedRSSPaths(self):
        parentdir = self.webloginfo['path']
        yeardirnames = os.listdir(self.webloginfo['path'])
        years = []  # will contain list of strings
        for yeardirname in yeardirnames:
            if len(yeardirname) == 4 and self.yearRE.match(yeardirname) and os.path.isdir(os.path.join(parentdir, yeardirname)):
                years.append(yeardirname)
        years.sort()
        years.reverse()
        result = []
        for year in years:
            for month in range(12, 0, -1):
                rsspath = os.path.join(parentdir, year, "%02d.xml" % month)
                if os.path.isfile(rsspath):
                    result.append(rsspath)
        return result  
    
    def dateToRSSPath(self, postdate, autocreate = 1):
        datetuple = postdate.getDateTuple()
        parentdir = self.webloginfo['path']
        year, month, day = datetuple[0:3]
        yearpath = os.path.join(parentdir, "%04d" % year)
        if not os.path.isdir(yearpath):
            if not autocreate: return None
            os.mkdir(yearpath)
        rsspath = os.path.join(yearpath, "%02d.xml" % month)
        if not os.path.isfile(rsspath):
            if not autocreate: return None
            samplefile = self.webloginfo.get('sample-file')
            if not samplefile:
                raise NoSampleRSSFileException, "Could not create the month's RSS archive, because no 'sample-file' to use as a template was specified for the weblog."
            if not os.path.isfile(samplefile):
                raise NoSampleRSSFileException, "Could not create the month's RSS archive, because the 'sample-file' specified for use as a template was not found."
            #shutil.copyfile(emptyfile, rsspath)
            #self._modifyRSS(rsspath, rssDropper, (samplefile,))
            outfile = open(rsspath, 'w')
            try:
                handlerObj = RSSReplacer(XMLFilter.XMLGenerator(outfile), [])
                XMLFilter.parseFilePath(samplefile, handlerObj, handlerObj.getRecommendedFeatures())
            finally:
                outfile.close()
        return rsspath
        
class RSSItem:
    multiValuedElements = {kCategory: 'categories'}
    # element is called 'category' in internal/RSS, 'categories' in MetaWeblog
    
    def __init__(self, weblog = None):
        """weblog object is only used to interpret elements such as category IDs.
        The client must handle actually adding the item to the weblog, normally through the
        item's pipeToRSS() method."""
        self.elementsList = []  # saved order of elements, repeating elems such as category
        self.elementsDict = {}  # RSS struct format, mapping (namespace URI, tagname) pairs to text content or lists of text content (for multiple like-named nodes)
        self.changedElements = {}   # elements for which to favor the elementsDict version over elementsList, while preserving order from elementsList
        self.elementAttrsDict = {}
        self.itemAttrs = {}
        self.weblog = weblog
    
    def setItemAttributes(self, attrs):
        self.itemAttrs = attrs
        
    def addElement(self, tag, attrs, text): # add an RSS file element. Do this when initializing the RSSItem.
        if tag[0] == '': tag = (None, tag[1])  # normalize namespace URI of '' to None
        self.elementsList.append((tag, attrs, text))
        if attrs: self.elementAttrsDict[tag] = attrs
        if self.multiValuedElements.has_key(tag):
            #tag  = self.multiValuedElements[tag]
            if type(text) is not type([]):
                text = [text]
            if self.elementsDict.has_key(tag):
                self.elementsDict[tag].extend(text)
            else:
                self.elementsDict[tag] = text
        else:
            self.elementsDict[tag] = text
        #print 'addElement: ', repr(self.elementsList)
    
    def changeElement(self, tag, attrs, text):  # change an existing RSS element
        if tag[0] == '': tag = (None, tag[1])   # normalize namespace URI of '' to None
        self.changedElements[tag] = 1
        if not self.elementsDict.has_key(tag):
            self.elementsList.append((tag, attrs,text))
        if self.multiValuedElements.has_key(tag):
            #tag = self.multiValuedElements[tag]
            if type(text) is not type([]):
                text = [text]
        self.elementsDict[tag] = text
        self.elementAttrsDict[tag] = attrs
    
    def deleteElement(self, tag):
        if tag[0] == '': tag = (None, tag[1])   # normalize namespace URI of '' to None
        self.changedElements[tag] = 1
        if self.elementsDict.has_key(tag):
            del self.elementsDict[tag]
    
    def setFromBloggerFormat(self, bloggerFormat):
        mapping = (('title', kTitle), ('link', kLink), ('content', kDescription))
        for key, rssElement in mapping:
            if bloggerFormat.get(key):
                if key == 'content':
                    element = bloggerFormat[key]
                    # demunge title and category tags that some clients prepend to description
                    titleprefix = '<title>'
                    titlesuffix = '</title>'
                    catprefix = '<category>'
                    catsuffix = '</category>'
                    suffixindex = string.find(element, titlesuffix)
                    if element[0:len(titleprefix)] == titleprefix and suffixindex > 0:
                        title = string.strip(element[len(titleprefix):suffixindex])
                        element = string.lstrip(element[suffixindex + len(titlesuffix):])
                        if title: self.changeElement(kTitle, {}, title)  # +++ unescape
                    suffixindex = string.find(element, catsuffix)
                    if element[0:len(catprefix)] == catprefix and suffixindex > 0:
                        category = string.strip(element[len(catprefix):suffixindex])
                        element = string.lstrip(element[suffixindex + len(catsuffix):])
                        if category and self.weblog != None:
                            categoryName = self.weblog.getCategoryNameByID(int(category))
                            # +++ should throw exception for unknown categories
                            if categoryName: self.changeElement(kCategory, {}, categoryName) # +++ unescape
                    
                    # set description
                    self.changeElement(kDescription, {}, element)
                    # remove content:encoded element, if it exists; it would be out of sync
                    self.deleteElement(kContentEncoded)
                else:
                    self.changeElement(rssElement, {}, bloggerFormat[key])
            else:
                # delete missing or blank elements.
                # Can't just let blanks stand: RSS Validator won't accept
                # empty <link> elements, for example.
                self.deleteElement(rssElement)
    
    def setFromMetaWeblogFormat(self, metaWeblogFormat):
        if metaWeblogFormat.has_key('description'):
            # remove content:encoded element, if it exists; it would be out of sync
            self.deleteElement(kContentEncoded)
        if metaWeblogFormat.has_key('dateCreated'):
            self.deleteElement(kDCDate)
        #self.deleteElement(kLink)
        # always delete flNotOnHomePage first (make it false), because clients
        # don't bother sending it at all if the user set it to false, so it won't
        # have a chance to overwrite the old value.
        # Same deal for category.
        self.deleteElement(kRadioWeblogPostFlNotOnHomePage)
        self.deleteElement(kCategory)
        elementMapping = {'categories': kCategory, 'postid': (None, ''),
            'dateCreated': kPubDate, 'flNotOnHomePage': kRadioWeblogPostFlNotOnHomePage}
        # don't allow the postid to change
        for key, value in metaWeblogFormat.items():
            element = elementMapping.get(key)
            if key == 'dateCreated':
                if value:    # canonicalize date
                    if isinstance(value, W3CDate.W3CDate):
                        d = value
                    else:
                        if isinstance(value, xmlrpclib.DateTime):
                            value = value.value
                        else:
                            value = str(value)
                        d = W3CDate.W3CDate()
                        d.parse(value)
                    value = d.getRFC822()
            elif key == 'flNotOnHomePage':
                if not value:               # test the xmlrpclib.Boolean value
                    value = None            # deletes element for false values, since existence == truth
                else:
                    value = ''              # for RSS, empty-string element is true
            elif element is None:
                if string.find(key, ':') != -1:
                    # it's an encoded namespace substruct. The MetaWeblog API namespace
                    # encoding isn't reversible in the general case, so we use
                    # a heuristic: only namespace URIs will contain a colon.
                    # (If some future MetaWeblog extension defines a legitimate
                    # struct member with a colon, we couldn't write it to XML anyway,
                    # since XML uses the colon to delimit namespace qualifiers.)
                    for subkey, subvalue in value.items():
                        self._setFromMetaWeblogStructMember((key, subkey), subvalue)
                    element = (None, '')
                else:
                    if key == 'permaLink':
                        element = kGuid
                        value = {'isPermaLink': 'true', '_value': value}
                    #elif key == 'source':
                    #    element = kSource
                    #    ... add code here to transform 'name' attr, if it exists, into '_value'
                    else:
                        element = (None, key)
            if element[1]: self._setFromMetaWeblogStructMember(element, value)
            
    def _setFromMetaWeblogStructMember(self, element, member):
        attrs = {}
        if member and hasattr(member, 'items'):
            # parse a struct of attribute values, including '_value' for the element content
            value = ''
            for attrname, attrvalue in member.items():
                if attrname == '_value':
                    value = attrvalue
                else:
                    attrs[(None, attrname)] = attrvalue
        else:
            # a plain value, no attributes
            value = member
        if value is not None or attrs:
            self.changeElement(element, attrs, value)
        else:
            self.deleteElement(element)
            
    def setFromItem(self, item):
        """Copy attributes from another instance of this class, so
        that the other instance overrides this one whenever attributes overlap.
        
        For some attributes, their absence conveys information, so we have
        to delete those first from our own copy so the other item has a chance
        at transferring that info."""
        self.deleteElement(kRadioWeblogPostFlNotOnHomePage)
        self.deleteElement(kCategory)
        
        didSetDescription = didSetContentEncoded = 0
        for tag, attrs, text in item.elementsList:
            if item.changedElements.has_key(tag):
                attrs = item.elementAttrsDict[tag]
                text = item.elementsDict[tag]
            self.changeElement(tag, attrs, text)
            if tag == kDescription:
                didSetDescription = 1
            elif tag == kContentEncoded:
                didSetContentEncoded = 1
        # if we set only one of 'description' or 'content:encoded', delete the other one
        # Otherwise, when the user edited one of them the other one would get out of sync.
        # An older content:encoded would even override a newly edited description, which
        # would be very confusing.
        if didSetContentEncoded and not didSetDescription:
            self.deleteElement(kDescription)
        elif didSetDescription and not didSetContentEncoded:
            self.deleteElement(kContentEncoded)
            
    def getBloggerFormat(self):
        """Return a struct in Blogger API format: content, postid, optional title & category, dateCreated"""
        item = {}
        item['content'] = self.elementsDict.get(kContentEncoded) or self.elementsDict.get(kDescription,'')
        item['postid'] = self.getBloggerID()
        if self.elementsDict.has_key(kCategory) and self.weblog != None:
            categoryID = self.weblog.getCategoryIDByName(self.elementsDict[kCategory][0])
            if categoryID != None:
                item['content'] = ('<category>' + xmlrpclib.escape(str(categoryID))
                    + '</category>' + item['content'])
        if self.elementsDict.has_key(kTitle):
            #item['title'] = self.elementsDict['title']
            item['content'] = ('<title>' + xmlrpclib.escape(self.elementsDict[kTitle])
                + '</title>' + item['content'])
        date = self.getDate()
        if date is not None: item['dateCreated'] = xmlrpclib.DateTime(date.getXMLRPC())
        if self.elementsDict.has_key(kAuthor):
            item['author'] = self.elementsDict[kAuthor]
        return item
    
    def getMetaWeblogFormat(self):
        result = {}
        specialElements = {kDCDate: '', kPubDate: '', kCategory: 'categories',
            kRadioWeblogPostID: 'postid', kContentEncoded: '', kDCDate: '',
            kRadioWeblogPostFlNotOnHomePage: ''}
            # rename or delete these elements. The deleted elements will be handled later.
        if self.itemAttrs and self.itemAttrs.has_key(kRDFAbout):
            result['permaLink'] = self.itemAttrs[kRDFAbout]
        for tag, value in self.elementsDict.items():
            attrs = self.elementAttrsDict.get(tag)
            if attrs:
                valueStruct = {}
                for attrname, attrvalue in attrs.items():
                    if type(attrname) is type(()):
                        attrname = attrname[1]
                    valueStruct[str(attrname)] = attrvalue
                if value:
                    valueStruct['_value'] = value
                # +++ for source element, should use 'name' attr instead of '_value'
                value = valueStruct
            specialKey = specialElements.get(tag)
            if specialKey:                  # simple rename
                result[specialKey] = value
            elif specialKey == '':          # delete element (it's a special case to handle later)
                pass 
            elif tag[0]:                    # namespace, put in sub-struct by namespace URI
                result.setdefault(str(tag[0]), {})[str(tag[1])] = value
            else:
                result[str(tag[1])] = value # regular value, no namespace
        
        # handle special cases: semantically duplicate elements, etc.
        descriptionOverride = self.elementsDict.get(kContentEncoded)
        if descriptionOverride:
            # weblogging tools such as NetNewsWire ignore content:encoded,
            # so move it into the description instead, which it overrides
            result['description'] = descriptionOverride
        
        if not result.get('postid'):
            try:
                result['postid'] = self.getBloggerID()
            except NoPostIDException:
                pass
        #if self.elementsDict.has_key('category'):
        #    result['categories'] = self.elementsDict['category']
        try:
            date = self.getDate()
        except TypeError, e:
            pass    # don't torpedo the whole call if the item's date is invalid
        else:
            if date is not None:
                result['dateCreated'] = xmlrpclib.DateTime(date.getXMLRPC())
        permaLink = self.getPermaLink()
        if permaLink: result['permaLink'] = permaLink
        
        if self.elementsDict.has_key(kRadioWeblogPostFlNotOnHomePage):
            result['flNotOnHomePage'] = xmlrpclib.True
            # existence is truth in RSS, but it's a non-namespaced boolean in XML-RPC
        
        # doesn't handle userid (numeric), source, link, enclosure
        return result
        
    def pipeToRSS(self, out):
        """recreate the item in RSS format on a SAX-compatible output"""
        out.startElementNS((None, 'item'), 'item', self.itemAttrs)
        out.ignorableWhitespace('\n')
        emittedChangedElements = {}
        # elementsList preserves the order of the original tags
        # changedElements keeps track 
        for tag, attrs, text in self.elementsList:
            if self.changedElements.has_key(tag):
                if emittedChangedElements.has_key(tag):
                    continue
                    # skip this one; we already saw the first of this repeated
                    # element and spit out all the changed elements together
                if not self.elementsDict.has_key(tag):
                    continue
                    # skip this one; the element has been deleted
                text = self.elementsDict[tag]
                attrs = self.elementAttrsDict.get(tag, {})
                emittedChangedElements[tag] = 1
            #print 'emitting %s' % repr(tag)
            qname = tag[1]
            if tag[0] and kNamespacePrefix.has_key(tag[0]):
                qname = kNamespacePrefix[tag[0]] + ':' + tag[1]
            if type(text) is type([]):
                for item in text:
                    out.startElementNS(tag, qname, attrs)
                    if item: out.characters(item)
                    out.endElementNS(tag, qname)
                    #print 'writing list item %s %s' % (repr(tag), item)
                    out.ignorableWhitespace('\n')
            else:
                out.startElementNS(tag, qname, attrs)
                if text: out.characters(text)
                out.endElementNS(tag, qname)
                out.ignorableWhitespace('\n')
        out.endElementNS((None, 'item'), 'item')
        #out.ignorableWhitespace('\n')
        #out.suppressNextWhitespace()

    def getDate(self):
        dateobj = W3CDate.W3CDate()
        rawdate = self.elementsDict.get(kPubDate)  # try using pubDate, which is the stable creation date
        if rawdate:
            dateobj.parseRFC822(rawdate)
        else:
            rawdate = self.elementsDict.get(kDCDate) # next try dc:date element, which we use as a mod date
            if rawdate:
                dateobj.parse(rawdate)
            else:
                #print 'no dc:date',repr(self.elementsDict)
                # +++ search for dcterms: dates
                return None
        return dateobj
    
    def getPermaLink(self):
        if self.elementsDict.has_key(kGuid) and self.elementAttrsDict.has_key(kGuid) and string.lower(self.elementAttrsDict[kGuid].get('isPermaLink','') or self.elementAttrsDict[kGuid].get((None,'isPermaLink'),'')) == 'true':
            return self.elementsDict[kGuid]
        else:
            return None

    bloggerIDFromGuid1 = re.compile(".*#[^0-9]*([0-9]+)$")  # URL with anchor & postid suffix
    bloggerIDFromGuid2 = re.compile("^([0-9]+).*")          # postid prefix
    
    def getBloggerID(self):
        """Return a blogger ID for the given post, which is used by
        Blogger API clients to edit or delete the post. Though no
        limits are specifed in the Blogger API documentation, some clients
        limit the ID to be an integer. BlogApp can't handle IDs of more
        than about 10 digits without dropping the rightmost few in scientific
        notation. So we first try extracting an integer blogger ID from the existing
        guid. If it doesn't exist, we generate an ID based on the post date."""
        id = self.elementsDict.get(kRadioWeblogPostID)
        if not id and self.elementsDict.has_key(kGuid):
            testguid = self.elementsDict[kGuid]
            match = self.bloggerIDFromGuid1.match(testguid)
            if not match: match = self.bloggerIDFromGuid2.match(testguid)
            if match: id = match.groups(1)[0]
            #sys.stderr.write('retrieved id %r' % id)
        if id == None:
            date = self.getDate()
            if date is not None: id = time.strftime("%m%d%H%M%S", date.getDateTuple())
        if not id: raise NoPostIDException, "Could not generate a post id, because there was no post date to go by."
        return str(id)  # str de-Unicodes if necessary
    
    def setCreationDate(self, dateobj = None):
        if dateobj is None:
            dateobj = W3CDate.W3CDate()
            dateobj.setCurrentDate()
        self.changeElement(kPubDate, {}, dateobj.getRFC822())
    
    def setModificationDate(self, dateobj = None):
        if dateobj is None:
            dateobj = W3CDate.W3CDate()
            dateobj.setCurrentDate()
        #self.changeElement((kDCURI, 'date'), {}, dateobj.getRFC8601())
        self.changeElement(kDCTermsModified, {}, dateobj.getRFC8601())
        
    def setCategories(self, categoryList):
        self.changeElement(kCategory, {}, categoryList)
    
    def getCategories(self):
        return self.elementsDict.get(kCategory)

    def setGUID(self, guid, isPermaLink):
        attrs = {}
        if isPermaLink: attrs = {'isPermaLink': 'true'}
        self.changeElement(kGuid, attrs, guid)
    
    def setPostID(self, postid):
        self.changeElement(kRadioWeblogPostID, {}, postid)
    
    def autoSetPostID(self):
        # post date must have already been set
        self.setPostID(self.getBloggerID())
        
class RSSFilter(XMLFilter.XMLFilter):
    """XMLFilter that (optionally) parses each item into an RSSItem instance
    instead of passing the xml code through. At the start of the item,
    self.shouldParseItem() returns a boolean, which if true causes
    all XML to be diverted to a new post object stored as self._currentitem.
    While self._currentitem is None, the XML is passed through as usual."""
    def __init__(self, nextFilter):
        XMLFilter.XMLFilter.__init__(self, nextFilter)
        self._currentitem = None
        self._currenttext = None
        self._currentelemattrs = None
        self._suppressNextWhitespace = 0
    
    def getRecommendedFeatures(self):
        """The recommended features (which clients should pass along to the
        SAX parser) enable namespace parsing and disable external entities.
        (So there's no network traffic even if the document were to specify
        a DTD.)

        """ 
        return {XMLFilter.feature_namespaces: 1,
                XMLFilter.feature_external_ges: 0,
                XMLFilter.feature_external_pes: 0}
    
    def startElementNS(self, nameTuple, qname, attrs):
        self._suppressNextWhitespace = 0
        if self._currentitem != None:
            self._currenttext = ''
            self._currentelemattrs = attrs
        elif nameTuple[1] == 'item' and nameTuple[0] in kCoreNamespaceAlternatives \
            and self.shouldParseItem():    # +++ NS check
            self._currentitem = RSSItem()
            if attrs: self._currentitem.setItemAttributes(attrs)
        else:
            XMLFilter.XMLFilter.startElementNS(self, nameTuple, qname, attrs)
    
    def shouldParseItem(self):
        """overrideable"""
        return 1
        
    def itemFinished(self, item):
        """overrideable"""
        pass
        
    def characters(self, data):
        if self._suppressNextWhitespace:
            if string.strip(data) == '':
                return
            else:
                self._suppressNextWhitespace = 0
        if self._currenttext != None:
            self._currenttext = self._currenttext + data
        elif self._currentitem is None:
            XMLFilter.XMLFilter.characters(self, data)
    
    def endElementNS(self, nameTuple, qname):
        self._suppressNextWhitespace = 0
        item = self._currentitem
        if item is not None:
            currenttext = self._currenttext
            currentelemattrs = self._currentelemattrs
            self._currentelemattrs = self._currenttext = None
            if nameTuple[1] == 'item' and nameTuple[0] in kCoreNamespaceAlternatives: # +++ NS check
                self._currentitem = None
                self.itemFinished(item)
            else:
                # if the element is in an RSS namespace (1.0 or 2.0), normalize it
                # to having no namespace, so the RSSItem can recognize it easily 
                if nameTuple[0] is not None and nameTuple in kCoreNamespaceAlternatives:
                    nameTuple = (None, nameTuple[1])
                item.addElement(nameTuple, currentelemattrs, currenttext)
        else:
            XMLFilter.XMLFilter.endElementNS(self, nameTuple, qname)

    def ignorableWhitespace(self, data):
        if self._suppressNextWhitespace:
            pass
        elif self._currenttext != None:
            self._currenttext = self._currenttext + data
        elif self._currentitem is None:
            XMLFilter.XMLFilter.ignorableWhitespace(self, data)
    
    #def endDocument(self):
    #    XMLFilter.endDocument(self)
        
    def suppressNextWhitespace(self):
        self._suppressNextWhitespace = 1
    
    def isDone(self):
        return 0

class RSSAdder(XMLFilter.XMLFilter):
    """Prepend a post to an RSS XML stream. Not necessary to inherit from
    RSSFilter because we don't need to parse any RSS items."""
    def __init__(self, out, newPost):
        XMLFilter.XMLFilter.__init__(self, out)
        self._newPost = newPost
        
    def getRecommendedFeatures(self):
        """The recommended features (which clients should pass along to the
        SAX parser) enable namespace parsing and disable external entities.
        (So there's no network traffic even if the document were to specify
        a DTD.)

        """ 
        return {XMLFilter.feature_namespaces: 1,
                XMLFilter.feature_external_ges: 0,
                XMLFilter.feature_external_pes: 0}
    
    def startElementNS(self, nameTuple, qname, attrs):
        if self._newPost is not None and nameTuple[1] == 'item' and nameTuple[0] in kCoreNamespaceAlternatives:   # +++ NS check
            self._emitNewPost()
        XMLFilter.XMLFilter.startElementNS(self, nameTuple, qname, attrs)
    
    def _emitNewPost(self):
        # clear out self._newPost first, as recursion prevention
        newPost = self._newPost
        self._newPost = None
        newPost.pipeToRSS(self)
        self.ignorableWhitespace("\n")
        self.ignorableWhitespace("\n")
    
    def endElementNS(self, nameTuple, qname):
        if self._newPost is not None and nameTuple[1] == 'channel' and nameTuple[0] in kCoreNamespaceAlternatives:    # +++ NS check
            self._emitNewPost()
        XMLFilter.XMLFilter.endElementNS(self, nameTuple, qname)
    
    def endDocument(self):
        if self._newPost is not None:
            raise NoChannelForNewItemException, "Could not save new post; RSS file did not contain a channel element"
        XMLFilter.XMLFilter.endDocument(self)
    
    def isDone(self):
        return self._newPost is None

class RSSEditor(RSSFilter):
    """Filter an XML RSS stream, replacing a particular post with an updated version.
    The new post is substituted when a target postid comes along.
    """
    
    def __init__(self, out, postid, newPost):
        RSSFilter.__init__(self, out)
        self._newPost = newPost
        self._postid = str(postid)

    def shouldParseItem(self):
        return self._newPost is not None
    
    def itemFinished(self, item):
        #print "comparing post ID %s to target %s" % (str(item.getBloggerID()), self._postid)
        if str(item.getBloggerID()) == self._postid:
            if not self._newPost: # deleting
                item = None
            else:
                item.setFromItem(self._newPost)
            futureNewPost = None    # we won't need the _newPost variable after we're done
        else:
            futureNewPost = self._newPost   # we'll need to keep _newPost around
        
        # make sure we don't filter the edited item recursively,
        # so clear out vars first
        self._newPost = None
        if item is not None:
            item.pipeToRSS(self)
        else:
            self.suppressNextWhitespace()   # delete trailing whitespace as well as the item
        self._newPost = futureNewPost
         
    def endDocument(self):
        RSSFilter.endDocument(self)
    
    def isDone(self):
        return (self._newPost is None)

class RSSReplacer(XMLFilter.XMLFilter):
    """Filter an XML RSS stream, dropping all posts and replacing them with
    the given posts, if any.
    The channel info is preserved, making this useful for making a new empty file
    from a 'sample' RSS file.
    """
    
    def __init__(self, nextFilter, items = []):
        XMLFilter.XMLFilter.__init__(self, nextFilter)
        self._ignoring = 0
        self._items = items or []
        self._suppressNextWhitespace = 0
    
    def getRecommendedFeatures(self):
        """The recommended features (which clients should pass along to the
        SAX parser) enable namespace parsing and disable external entities.
        (So there's no network traffic even if the document were to specify
        a DTD.)

        """ 
        return {XMLFilter.feature_namespaces: 1,
                XMLFilter.feature_external_ges: 0,
                XMLFilter.feature_external_pes: 0}
    
    def startElementNS(self, nameTuple, qname, attrs):
        if not self._ignoring:
            if (nameTuple[1] == 'item' and nameTuple[0] in kCoreNamespaceAlternatives) and self._items is not None: # +++ NS check
                self._ignoring = 1
            else:
                self._suppressNextWhitespace = 0
                XMLFilter.XMLFilter.startElementNS(self, nameTuple, qname, attrs)
    
    def characters(self, data):
        if not self._ignoring:
            if self._suppressNextWhitespace:
                if string.strip(data) == '':
                    return
                else:
                    self._suppressNextWhitespace = 0
            XMLFilter.XMLFilter.characters(self, data)
    
    def endElementNS(self, nameTuple, qname):
        if nameTuple[1] == 'channel' and nameTuple[0] in kCoreNamespaceAlternatives:    # +++ NS check
            self._ignoring = 0
            items = self._items
            if items is not None:
                self._items = None
                for post in items:
                    post.pipeToRSS(self)
        if not self._ignoring:
            self._suppressNextWhitespace = 0
            XMLFilter.XMLFilter.endElementNS(self, nameTuple, qname)
        elif nameTuple[1] == 'item' and nameTuple[0] in kCoreNamespaceAlternatives:   # +++ NS check
            self._ignoring = 0
            self._suppressNextWhitespace = 1

    def ignorableWhitespace(self, data):
        if not self._ignoring and not self._suppressNextWhitespace:
            XMLFilter.XMLFilter.ignorableWhitespace(self, data)
            
    def isDone(self):
        return self._items is None

class RSSLister(RSSFilter):
    """Accumulate the parsed RSS items into a big Python list, up to an optional
    maximum number of items."""
    
    def __init__(self, maxposts = None):
        """Set up next item in XML filter chain to be an empty XMLSAXHandler
        (the bit bucket) because we don't need to save the XML anywhere."""
        
        RSSFilter.__init__(self, XMLFilter.XMLSAXHandler())
        self._maxposts = maxposts
        self._numposts = 0
        self._items = []
        
    def shouldParseItem(self):
        return self._maxposts == None or self._numposts < self._maxposts
        
    def itemFinished(self, item):
        self._items.append(item)
        self._numposts = self._numposts + 1
    
    def getResult(self):
        """Return the list of accumulated posts."""
        return self._items
    
    def isDone(self):
        return self._maxposts is not None and self._numposts >= self._maxposts

class RSSFilteredLister(RSSFilter):
    """Accumulate the parsed RSS items into a big Python list, up to an optional
    maximum number of items."""
    
    def __init__(self, minDate = None, maxDate = None, minNumber = None, maxNumber = None,
        category = None):
        """Set up next item in XML filter chain to be an empty XMLSAXHandler
        (the bit bucket) because we don't need to save the XML anywhere."""
        
        RSSFilter.__init__(self, XMLFilter.XMLSAXHandler())
        self._minNumber = minNumber
        self._maxNumber = maxNumber
        self._category = category
        self._numposts = 0
        
        self._minDate = self._maxDate = None
        if minDate is not None:
            self._minDate = minDate.getXMLRPC()
        if maxDate is not None:
            self._maxDate = maxDate.getXMLRPC()
        if self._category:
            self._category = string.lower(self._category)
        self._items = []
        
    def shouldParseItem(self):
        self._numposts = self._numposts + 1
        return (self._minNumber is None or self._numposts >= self._minNumber) \
            and (self._maxNumber is None or self._numposts <= self._maxNumber)
        
    def itemFinished(self, item):
        if self._category:
            foundMatch = None
            for postCategory in (item.getCategories() or []):
                if string.lower(postCategory) == self._category:
                    foundMatch = 1
                    break
            if not foundMatch:
                return
        if self._minDate is not None or self._maxDate is not None:
            itemDate = item.getDate()
            if itemDate is not None:
                itemDate = itemDate.getXMLRPC()
                if self._minDate is not None and self._minDate > itemDate: return
                if self._maxDate is not None and self._maxDate < itemDate: return
        self._items.append(item)
    
    def getResult(self):
        """Return the list of accumulated posts."""
        return self._items
    
    def isDone(self):
        return self._maxNumber is not None and self._numposts >= self._maxNumber

class RSSGetPostID(RSSFilter):
    def __init__(self, postid = None, guid = None):
        """postid is a string that looks like an integer, for Blogger API clients,
        which may not handle anything more. guid is the actual value from the RSS
        file, which may happen to contain the postid. Only specify one."""
        
        RSSFilter.__init__(self, XMLFilter.XMLSAXHandler())
        self._postid = postid
        self._guid = guid
        self._post = None   # will hold the post object, once found
            
    def shouldParseItem(self):
        return self._post == None   # haven't already found a post
        
    def itemFinished(self, item):
        if (self._postid is not None and item.getBloggerID() == self._postid) or \
            (self._guid is not None and item.getAttribute('guid') == self._guid):
            self._post = item
    
    def getResult(self):
        return self._post
    
    def isDone(self):
        return self._post is not None

class RSSPostIDChecker(RSSFilter):
    """count the number of occurrences of the given postid in an RSS file"""
    def __init__(self, postid):
        RSSFilter.__init__(self, XMLFilter.XMLSAXHandler())
        # XMLSAXHandler() creates a null destination,
        # since we don't want to save the XML stream
        self.postid = str(postid)
        self.postsMatchingID = 0
    
    def itemFinished(self, item):
        if str(item.getBloggerID()) == self.postid:
            self.postsMatchingID = self.postsMatchingID + 1
    
    def getResult(self):
        return self.postsMatchingID
    
    def isDone(self):
        return 0    # always want to continue on, in case there are more posts with same ID

