
from atompubbase.model import Collection, Context  # http://feedvalidator.googlecode.com/svn/trunk/apptestsuite/client/atompubbase/
from elementtree.ElementTree import ElementTree, tostring, Element  # http://svn.effbot.org/public/elementtree-1.3/elementtree/
from httplib2 import Http
import re
import json

# Use atompubbase to parse the Atom feed.
feed = Collection(Context(collection='http://gdata.youtube.com/feeds/users/markpasc/favorites'))
feedtree = ElementTree(feed.etree())

for entry in feedtree.findall('{http://www.w3.org/2005/Atom}entry'):
    # Find the HTML link.
    links = [link for link in entry.findall('./{http://www.w3.org/2005/Atom}link') if link.attrib['rel'] == 'alternate' and link.attrib['type'] == 'text/html']
    if not links:
        print tostring(entry)
        raise "Oops no links for " + str(entry) + "?!"
    link = links[0]
    html_url = link.attrib['href']

    # Grab the HTML version of the page, so we can get the swfArgs.
    web = Http()
    (response, content) = web.request(html_url)
    if response.status <> 200:
        raise "Ooops could not fetch " + html_url + " :/ "

    # Extract the swfArgs and parse them. (Conveniently they're already a JS
    # hash, so treat it as JSON.)
    match = re.search('var swfArgs = (\\{[^}]+\\})', content)
    if not match:
        raise "No swfArgs in " + html_url + "?!"
    vars = json.read(match.group(1))

    # Is the video there?
    vid_url = 'http://www.youtube.com/get_video?video_id=%(video_id)s&t=%(t)s&l=%(l)s&fmt=18' % vars
    (response, content) = web.request(vid_url, 'HEAD')

    # If we got a non-OK result (usually a 415?), there's no H.264 version of
    # this video. Leave it in the feed, but don't add an enclosure tag.
    if response.status <> 200:
        continue

    # Make sure there's a content-length we can include in the tag.
    if not response.has_key('content-length'):
        raise "Ooops no content-length?! " + str(response)

    # iTunes podcast urls have to end in a recognized extension *before*
    # the query string, so use this redirect I set up.
    itunes_url = 'http://neologasm.org/youtube.mp4?video_id=%(video_id)s&t=%(t)s&l=%(l)s&fmt=18' % vars

    # Add the <link rel="enclosure"> to the entry.
    enc_link = Element('{http://www.w3.org/2005/Atom}link')
    enc_info = {'rel': 'enclosure', 'type': 'video/mp4', 'href': itunes_url, 'length': response['content-length']}
    for (key, val) in enc_info.items():
        enc_link.attrib[key] = val
    entry.append(enc_link)

# Write out the modified feed.
ElementTree.write(feedtree, '/var/www/html/youcast.xml')

