from atompubbase.model import Collection, Context # http://feedvalidator.googlecode.com/svn/trunk/apptestsuite/client/atompubbase/ from elementtree.ElementTree import ElementTree, tostring, Element # http://svn.effbot.org/public/elementtree-1.3/elementtree/ from httplib2 import Http import re import json # Use atompubbase to parse the Atom feed. feed = Collection(Context(collection='http://gdata.youtube.com/feeds/users/markpasc/favorites')) feedtree = ElementTree(feed.etree()) for entry in feedtree.findall('{http://www.w3.org/2005/Atom}entry'): # Find the HTML link. links = [link for link in entry.findall('./{http://www.w3.org/2005/Atom}link') if link.attrib['rel'] == 'alternate' and link.attrib['type'] == 'text/html'] if not links: print tostring(entry) raise "Oops no links for " + str(entry) + "?!" link = links[0] html_url = link.attrib['href'] # Grab the HTML version of the page, so we can get the swfArgs. web = Http() (response, content) = web.request(html_url) if response.status <> 200: raise "Ooops could not fetch " + html_url + " :/ " # Extract the swfArgs and parse them. (Conveniently they're already a JS # hash, so treat it as JSON.) match = re.search('var swfArgs = (\\{[^}]+\\})', content) if not match: raise "No swfArgs in " + html_url + "?!" vars = json.read(match.group(1)) # Is the video there? vid_url = 'http://www.youtube.com/get_video?video_id=%(video_id)s&t=%(t)s&l=%(l)s&fmt=18' % vars (response, content) = web.request(vid_url, 'HEAD') # If we got a non-OK result (usually a 415?), there's no H.264 version of # this video. Leave it in the feed, but don't add an enclosure tag. if response.status <> 200: continue # Make sure there's a content-length we can include in the tag. if not response.has_key('content-length'): raise "Ooops no content-length?! " + str(response) # iTunes podcast urls have to end in a recognized extension *before* # the query string, so use this redirect I set up. itunes_url = 'http://neologasm.org/youtube.mp4?video_id=%(video_id)s&t=%(t)s&l=%(l)s&fmt=18' % vars # Add the to the entry. enc_link = Element('{http://www.w3.org/2005/Atom}link') enc_info = {'rel': 'enclosure', 'type': 'video/mp4', 'href': itunes_url, 'length': response['content-length']} for (key, val) in enc_info.items(): enc_link.attrib[key] = val entry.append(enc_link) # Write out the modified feed. ElementTree.write(feedtree, '/var/www/html/youcast.xml')