2024-11-13 15:41:33 +01:00
|
|
|
#!/usr/bin/python
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
import xml.etree.ElementTree as ET
|
|
|
|
|
2024-11-17 13:00:31 +01:00
|
|
|
class UtilitiesSyndication:
|
2024-11-13 15:41:33 +01:00
|
|
|
|
|
|
|
def create_rfc4287_entry(feed_entry):
|
|
|
|
node_entry = ET.Element('entry')
|
|
|
|
node_entry.set('xmlns', 'http://www.w3.org/2005/Atom')
|
|
|
|
# Title
|
|
|
|
title = ET.SubElement(node_entry, 'title')
|
|
|
|
title.set('type', 'text')
|
|
|
|
title.text = feed_entry['title']
|
|
|
|
# Summary
|
|
|
|
summary = ET.SubElement(node_entry, 'summary') # TODO Try 'content'
|
|
|
|
summary.set('type', 'text')
|
|
|
|
#summary.set('lang', feed_entry['summary_lang'])
|
|
|
|
summary.text = feed_entry['summary']
|
|
|
|
# Tags
|
|
|
|
if feed_entry['tags']:
|
|
|
|
for term in feed_entry['tags']:
|
|
|
|
tag = ET.SubElement(node_entry, 'category')
|
|
|
|
tag.set('term', term)
|
|
|
|
# Link
|
|
|
|
link = ET.SubElement(node_entry, "link")
|
|
|
|
link.set('href', feed_entry['link'])
|
|
|
|
# Links
|
|
|
|
# for feed_entry_link in feed_entry['links']:
|
|
|
|
# link = ET.SubElement(node_entry, "link")
|
|
|
|
# link.set('href', feed_entry_link['url'])
|
|
|
|
# link.set('type', feed_entry_link['type'])
|
|
|
|
# link.set('rel', feed_entry_link['rel'])
|
|
|
|
# Date saved
|
|
|
|
if 'published' in feed_entry and feed_entry['published']:
|
|
|
|
published = ET.SubElement(node_entry, 'published')
|
|
|
|
published.text = feed_entry['published']
|
|
|
|
# Date edited
|
|
|
|
if 'updated' in feed_entry and feed_entry['updated']:
|
|
|
|
updated = ET.SubElement(node_entry, 'updated')
|
|
|
|
updated.text = feed_entry['updated']
|
|
|
|
return node_entry
|
|
|
|
|
|
|
|
def extract_items(item_payload, limit=False):
|
|
|
|
namespace = '{http://www.w3.org/2005/Atom}'
|
|
|
|
title = item_payload.find(namespace + 'title')
|
|
|
|
links = item_payload.find(namespace + 'link')
|
|
|
|
if (not isinstance(title, ET.Element) and
|
|
|
|
not isinstance(links, ET.Element)): return None
|
|
|
|
title_text = '' if title == None else title.text
|
|
|
|
if isinstance(links, ET.Element):
|
|
|
|
for link in item_payload.findall(namespace + 'link'):
|
|
|
|
link_href = link.attrib['href'] if 'href' in link.attrib else ''
|
|
|
|
if link_href: break
|
|
|
|
contents = item_payload.find(namespace + 'summary')
|
|
|
|
summary_text = ''
|
|
|
|
if isinstance(contents, ET.Element):
|
|
|
|
for summary in item_payload.findall(namespace + 'summary'):
|
|
|
|
summary_text = summary.text or ''
|
|
|
|
if summary_text: break
|
|
|
|
published = item_payload.find(namespace + 'published')
|
|
|
|
published_text = '' if published == None else published.text
|
|
|
|
categories = item_payload.find(namespace + 'category')
|
|
|
|
tags = []
|
|
|
|
if isinstance(categories, ET.Element):
|
|
|
|
for category in item_payload.findall(namespace + 'category'):
|
|
|
|
if 'term' in category.attrib and category.attrib['term']:
|
|
|
|
category_term = category.attrib['term']
|
|
|
|
if len(category_term) < 20:
|
|
|
|
tags.append(category_term)
|
|
|
|
elif len(category_term) < 50:
|
|
|
|
tags.append(category_term)
|
|
|
|
if limit and len(tags) > 4: break
|
|
|
|
|
|
|
|
|
|
|
|
identifier = item_payload.find(namespace + 'id')
|
|
|
|
if identifier and identifier.attrib: print(identifier.attrib)
|
|
|
|
identifier_text = '' if identifier == None else identifier.text
|
|
|
|
|
|
|
|
instances = '' # TODO Check the Blasta database for instances.
|
|
|
|
|
|
|
|
entry = {'title' : title_text,
|
|
|
|
'link' : link_href,
|
|
|
|
'summary' : summary_text,
|
|
|
|
'published' : published_text,
|
|
|
|
'updated' : published_text, # TODO "Updated" is missing
|
|
|
|
'tags' : tags}
|
|
|
|
return entry
|