Blasta/blasta/xml/syndication.py

89 lines
3.7 KiB
Python
Raw Normal View History

#!/usr/bin/python
# -*- coding: utf-8 -*-
import xml.etree.ElementTree as ET
class Syndication:
def create_rfc4287_entry(feed_entry):
node_entry = ET.Element('entry')
node_entry.set('xmlns', 'http://www.w3.org/2005/Atom')
# Title
title = ET.SubElement(node_entry, 'title')
title.set('type', 'text')
title.text = feed_entry['title']
# Summary
summary = ET.SubElement(node_entry, 'summary') # TODO Try 'content'
summary.set('type', 'text')
#summary.set('lang', feed_entry['summary_lang'])
summary.text = feed_entry['summary']
# Tags
if feed_entry['tags']:
for term in feed_entry['tags']:
tag = ET.SubElement(node_entry, 'category')
tag.set('term', term)
# Link
link = ET.SubElement(node_entry, "link")
link.set('href', feed_entry['link'])
# Links
# for feed_entry_link in feed_entry['links']:
# link = ET.SubElement(node_entry, "link")
# link.set('href', feed_entry_link['url'])
# link.set('type', feed_entry_link['type'])
# link.set('rel', feed_entry_link['rel'])
# Date saved
if 'published' in feed_entry and feed_entry['published']:
published = ET.SubElement(node_entry, 'published')
published.text = feed_entry['published']
# Date edited
if 'updated' in feed_entry and feed_entry['updated']:
updated = ET.SubElement(node_entry, 'updated')
updated.text = feed_entry['updated']
return node_entry
def extract_items(item_payload, limit=False):
namespace = '{http://www.w3.org/2005/Atom}'
title = item_payload.find(namespace + 'title')
links = item_payload.find(namespace + 'link')
if (not isinstance(title, ET.Element) and
not isinstance(links, ET.Element)): return None
title_text = '' if title == None else title.text
if isinstance(links, ET.Element):
for link in item_payload.findall(namespace + 'link'):
link_href = link.attrib['href'] if 'href' in link.attrib else ''
if link_href: break
contents = item_payload.find(namespace + 'summary')
summary_text = ''
if isinstance(contents, ET.Element):
for summary in item_payload.findall(namespace + 'summary'):
summary_text = summary.text or ''
if summary_text: break
published = item_payload.find(namespace + 'published')
published_text = '' if published == None else published.text
categories = item_payload.find(namespace + 'category')
tags = []
if isinstance(categories, ET.Element):
for category in item_payload.findall(namespace + 'category'):
if 'term' in category.attrib and category.attrib['term']:
category_term = category.attrib['term']
if len(category_term) < 20:
tags.append(category_term)
elif len(category_term) < 50:
tags.append(category_term)
if limit and len(tags) > 4: break
identifier = item_payload.find(namespace + 'id')
if identifier and identifier.attrib: print(identifier.attrib)
identifier_text = '' if identifier == None else identifier.text
instances = '' # TODO Check the Blasta database for instances.
entry = {'title' : title_text,
'link' : link_href,
'summary' : summary_text,
'published' : published_text,
'updated' : published_text, # TODO "Updated" is missing
'tags' : tags}
return entry