#!/usr/bin/python # -*- coding: utf-8 -*- import xml.etree.ElementTree as ET class Syndication: def create_rfc4287_entry(feed_entry): node_entry = ET.Element('entry') node_entry.set('xmlns', 'http://www.w3.org/2005/Atom') # Title title = ET.SubElement(node_entry, 'title') title.set('type', 'text') title.text = feed_entry['title'] # Summary summary = ET.SubElement(node_entry, 'summary') # TODO Try 'content' summary.set('type', 'text') #summary.set('lang', feed_entry['summary_lang']) summary.text = feed_entry['summary'] # Tags if feed_entry['tags']: for term in feed_entry['tags']: tag = ET.SubElement(node_entry, 'category') tag.set('term', term) # Link link = ET.SubElement(node_entry, "link") link.set('href', feed_entry['link']) # Links # for feed_entry_link in feed_entry['links']: # link = ET.SubElement(node_entry, "link") # link.set('href', feed_entry_link['url']) # link.set('type', feed_entry_link['type']) # link.set('rel', feed_entry_link['rel']) # Date saved if 'published' in feed_entry and feed_entry['published']: published = ET.SubElement(node_entry, 'published') published.text = feed_entry['published'] # Date edited if 'updated' in feed_entry and feed_entry['updated']: updated = ET.SubElement(node_entry, 'updated') updated.text = feed_entry['updated'] return node_entry def extract_items(item_payload, limit=False): namespace = '{http://www.w3.org/2005/Atom}' title = item_payload.find(namespace + 'title') links = item_payload.find(namespace + 'link') if (not isinstance(title, ET.Element) and not isinstance(links, ET.Element)): return None title_text = '' if title == None else title.text if isinstance(links, ET.Element): for link in item_payload.findall(namespace + 'link'): link_href = link.attrib['href'] if 'href' in link.attrib else '' if link_href: break contents = item_payload.find(namespace + 'summary') summary_text = '' if isinstance(contents, ET.Element): for summary in item_payload.findall(namespace + 'summary'): summary_text = summary.text or '' if summary_text: break published = item_payload.find(namespace + 'published') published_text = '' if published == None else published.text categories = item_payload.find(namespace + 'category') tags = [] if isinstance(categories, ET.Element): for category in item_payload.findall(namespace + 'category'): if 'term' in category.attrib and category.attrib['term']: category_term = category.attrib['term'] if len(category_term) < 20: tags.append(category_term) elif len(category_term) < 50: tags.append(category_term) if limit and len(tags) > 4: break identifier = item_payload.find(namespace + 'id') if identifier and identifier.attrib: print(identifier.attrib) identifier_text = '' if identifier == None else identifier.text instances = '' # TODO Check the Blasta database for instances. entry = {'title' : title_text, 'link' : link_href, 'summary' : summary_text, 'published' : published_text, 'updated' : published_text, # TODO "Updated" is missing 'tags' : tags} return entry