Create function "get_properties_of_feed";

Do not attempt to scan URL for items upon command "add";
Minor improvements;
Add more URLs to file feeds.toml.
This commit is contained in:
Schimon Jehudah 2024-05-12 09:55:23 +00:00
parent d0e23b3eb9
commit 8bbad28894
7 changed files with 190 additions and 132 deletions

View file

@ -1112,25 +1112,14 @@ async def add_feed(self, jid_bare, db_file, url, identifier):
feed_updated = None feed_updated = None
else: else:
feed_updated = None feed_updated = None
entries_count = len(feed.entries) feed_properties = get_properties_of_feed(db_file,
feed_properties = { feed_id, feed)
"version" : '',
"encoding" : '',
"language" : '',
"rating" : '',
"entries_count" : entries_count,
"icon" : '',
"image" : '',
"logo" : '',
"ttl" : '',
"updated" : feed_updated,
}
await sqlite.update_feed_properties(db_file, feed_id, await sqlite.update_feed_properties(db_file, feed_id,
feed_properties) feed_properties)
feed_id = sqlite.get_feed_id(db_file, url) feed_id = sqlite.get_feed_id(db_file, url)
feed_id = feed_id[0] feed_id = feed_id[0]
new_entries = get_properties_of_entries( new_entries = get_properties_of_entries(
self, jid_bare, db_file, url, feed_id, feed) jid_bare, db_file, url, feed_id, feed)
if new_entries: if new_entries:
await sqlite.add_entries_and_update_feed_state( await sqlite.add_entries_and_update_feed_state(
db_file, feed_id, new_entries) db_file, feed_id, new_entries)
@ -1528,9 +1517,45 @@ async def download_feed(self, db_file, feed_url):
await sqlite.update_feed_status(db_file, feed_id, status_code) await sqlite.update_feed_status(db_file, feed_id, status_code)
def get_properties_of_feed(db_file, feed_id, feed):
if feed.has_key('updated_parsed'):
feed_updated = feed.updated_parsed
try:
feed_updated = dt.convert_struct_time_to_iso8601(feed_updated)
except:
feed_updated = ''
else:
feed_updated = ''
entries_count = len(feed.entries)
feed_version = feed.version if feed.has_key('version') else ''
feed_encoding = feed.encoding if feed.has_key('encoding') else ''
feed_language = feed.feed.language if feed.feed.has_key('language') else ''
feed_icon = feed.feed.icon if feed.feed.has_key('icon') else ''
feed_image = feed.feed.image.href if feed.feed.has_key('image') else ''
feed_logo = feed.feed.logo if feed.feed.has_key('logo') else ''
feed_ttl = feed.feed.ttl if feed.feed.has_key('ttl') else ''
feed_properties = {
"version" : feed_version,
"encoding" : feed_encoding,
"language" : feed_language,
"rating" : '',
"entries_count" : entries_count,
"icon" : feed_icon,
"image" : feed_image,
"logo" : feed_logo,
"ttl" : feed_ttl,
"updated" : feed_updated,
}
return feed_properties
# TODO get all active feeds of active accounts and scan the feed with the earliest scanned time # TODO get all active feeds of active accounts and scan the feed with the earliest scanned time
# TODO Rename function name (idea: scan_and_populate) # TODO Rename function name (idea: scan_and_populate)
def get_properties_of_entries(self, jid_bare, db_file, feed_url, feed_id, feed): def get_properties_of_entries(jid_bare, db_file, feed_url, feed_id, feed):
""" """
Get new entries. Get new entries.

View file

@ -4,6 +4,12 @@
# NOTE <presence xml:lang="fr"></presence> # NOTE <presence xml:lang="fr"></presence>
# TODO Consider splitting into files feeds-cz.toml, feeds-de.toml, feeds-el.toml. # TODO Consider splitting into files feeds-cz.toml, feeds-de.toml, feeds-el.toml.
[[feeds]]
lang = "ch-fr"
name = "Demoniak Network"
link = "https://demoniak.ch/index.xml"
tags = ["computer", "technology"]
[[feeds]] [[feeds]]
lang = "cs-cz" lang = "cs-cz"
name = "Česká pirátská strana" name = "Česká pirátská strana"
@ -160,12 +166,42 @@ name = "Κόμμα Πειρατών Ελλάδας Pirate party of Greece"
link = "https://www.pirateparty.gr/feed/" link = "https://www.pirateparty.gr/feed/"
tags = ["greece", "party", "pirate"] tags = ["greece", "party", "pirate"]
[[feeds]]
lang = "en"
name = "Actual Activists"
link = "https://actualactivists.com/feed/atom/"
tags = ["news", "world", "conspiracy", "lifestyle", "government"]
[[feeds]] [[feeds]]
lang = "en" lang = "en"
name = "Cycling Together with Fiona and Marc" name = "Cycling Together with Fiona and Marc"
link = "https://pixelfed.social/users/cyclingtogether.atom" link = "https://pixelfed.social/users/cyclingtogether.atom"
tags = ["sports", "cycling", "adventure", "life"] tags = ["sports", "cycling", "adventure", "life"]
[[feeds]]
lang = "en"
name = "Debian Disguised Work"
link = "https://disguised.work/atom.xml"
tags = ["linux", "debian", "ubuntu", "industry"]
[[feeds]]
lang = "en"
name = "Dig Deeper"
link = "https://diggy.club/atom.xml"
tags = ["linux", "health", "computer", "wisdom", "life", "industry"]
[[feeds]]
lang = "en"
name = "Earth Newspaper"
link = "https://earthnewspaper.com/feed/atom/"
tags = ["technology", "world", "war", "politics"]
[[feeds]]
lang = "en"
name = "Fedi.Tips"
link = "https://fedi.tips/feed/atom/"
tags = ["technology", "activitypub", "fediverse", "syndication"]
[[feeds]] [[feeds]]
lang = "en" lang = "en"
name = "Lagrange Gemini Client" name = "Lagrange Gemini Client"
@ -178,12 +214,36 @@ name = "[ngn.tf] | blog"
link = "https://api.ngn.tf/blog/feed.atom" link = "https://api.ngn.tf/blog/feed.atom"
tags = ["computer", "service", "technology", "telecommunication", "xmpp"] tags = ["computer", "service", "technology", "telecommunication", "xmpp"]
[[feeds]]
lang = "en"
name = "RTP Blog"
link = "http://righttoprivacy.i2p/rss/"
tags = ["computer", "service", "technology", "telecommunication", "i2p", "privacy"]
[[feeds]]
lang = "en"
name = "Techrights"
link = "http://techrights.org/feed.xml"
tags = ["linux", "bsd", "technology", "industry"]
[[feeds]] [[feeds]]
lang = "en" lang = "en"
name = "The SWORD Project" name = "The SWORD Project"
link = "http://www.crosswire.org/sword/sword.rss.jsp" link = "http://www.crosswire.org/sword/sword.rss.jsp"
tags = ["bible", "religion", "christianity", "history", "education", "life"] tags = ["bible", "religion", "christianity", "history", "education", "life"]
[[feeds]]
lang = "en"
name = "toastal"
link = "https://toast.al/posts/feed.atom"
tags = ["software", "computer", "code", "life"]
[[feeds]]
lang = "en"
name = "Views from Apipucos"
link = "https://apipucos.wordpress.com/feed/atom/"
tags = ["poem", "computer", "brazil", "art", "life"]
[[feeds]] [[feeds]]
lang = "en-au" lang = "en-au"
name = "Pirate Party Australia" name = "Pirate Party Australia"
@ -424,6 +484,12 @@ name = "Jacob's Unnamed Blog"
link = "https://jacobwsmith.xyz/feed.xml" link = "https://jacobwsmith.xyz/feed.xml"
tags = ["book", "community", "culture", "family", "finance", "lifestyle", "market", "usa"] tags = ["book", "community", "culture", "family", "finance", "lifestyle", "market", "usa"]
[[feeds]]
lang = "en-us"
name = "Jerm Warfare"
link = "https://jermwarfare.com/feed/atom"
tags = ["culture", "war", "usa"]
[[feeds]] [[feeds]]
lang = "en-us" lang = "en-us"
name = "Juicing for Health" name = "Juicing for Health"
@ -487,7 +553,7 @@ tags = ["decentralization", "privacy"]
[[feeds]] [[feeds]]
lang = "en-us" lang = "en-us"
name = "nobulart" name = "nobulart"
link = "https://nobulart.com/feed/" link = "https://nobulart.com/feed/atom/"
tags = ["news", "survival", "politics", "usa", "world"] tags = ["news", "survival", "politics", "usa", "world"]
[[feeds]] [[feeds]]
@ -712,6 +778,12 @@ name = "United States Pirate Party"
link = "https://uspirates.org/feed/" link = "https://uspirates.org/feed/"
tags = ["party", "pirate", "usa"] tags = ["party", "pirate", "usa"]
[[feeds]]
lang = "en-us"
name = "Walt Heyer Ministries"
link = "https://waltheyer.com/feed/atom/"
tags = ["sex", "change", "hoomsexuality"]
[[feeds]] [[feeds]]
lang = "en-us" lang = "en-us"
name = "Xonotic" name = "Xonotic"
@ -724,18 +796,24 @@ name = "yaxim"
link = "https://yaxim.org/atom.xml" link = "https://yaxim.org/atom.xml"
tags = ["android", "germany", "jabber", "telecommunication", "xmpp"] tags = ["android", "germany", "jabber", "telecommunication", "xmpp"]
[[feeds]]
lang = "en-us"
name = "You Are Not So Smart"
link = "https://youarenotsosmart.com/feed/atom/"
tags = ["communication", "culture", "psychology", "society"]
[[feeds]]
lang = "es-ar"
name = "Cyberdelia"
link = "https://cyberdelia.com.ar/feeds/all.atom.xml"
tags = ["decentralization", "development", "electronics", "networking", "privacy", "selfhosting", "technology", "xmpp"]
[[feeds]] [[feeds]]
lang = "es-es" lang = "es-es"
name = "Disroot Blog" name = "Disroot Blog"
link = "https://disroot.org/es/blog.atom" link = "https://disroot.org/es/blog.atom"
tags = ["decentralization", "privacy"] tags = ["decentralization", "privacy"]
[[feeds]]
lang = "ch-fr"
name = "Demoniak Network"
link = "https://demoniak.ch/index.xml"
tags = ["computer", "technology"]
[[feeds]] [[feeds]]
lang = "fr-fr" lang = "fr-fr"
name = "Agate Blue" name = "Agate Blue"

View file

@ -156,7 +156,8 @@ def update_proxies(file, proxy_name, proxy_type, proxy_url, action='remove'):
""" """
data = open_config_file('proxies.toml') data = open_config_file('proxies.toml')
proxy_list = data['proxies'][proxy_name][proxy_type] proxy_list = data['proxies'][proxy_name][proxy_type]
breakpoint() # breakpoint()
print('####################### PROXY ######################')
proxy_index = proxy_list.index(proxy_url) proxy_index = proxy_list.index(proxy_url)
proxy_list.pop(proxy_index) proxy_list.pop(proxy_index)
with open(file, 'w') as new_file: with open(file, 'w') as new_file:

View file

@ -27,6 +27,13 @@ TODO
7) See project /offpunk/offblocklist.py 7) See project /offpunk/offblocklist.py
NOTE
1) You might not want to utilize aiohttp, because you
no more scan as many feeds as possible all at once
due to CPU spike.
Consider https://pythonhosted.org/feedparser/http-useragent.html
""" """
from aiohttp import ClientError, ClientSession, ClientTimeout from aiohttp import ClientError, ClientSession, ClientTimeout

View file

@ -363,69 +363,19 @@ async def check_updates(self, jid_bare):
feed_id = sqlite.get_feed_id(db_file, url) feed_id = sqlite.get_feed_id(db_file, url)
feed_id = feed_id[0] feed_id = feed_id[0]
if not result['error']: if not result['error']:
print('MID', 'sqlite.update_feed_status')
await sqlite.update_feed_status(db_file, feed_id, status_code) await sqlite.update_feed_status(db_file, feed_id, status_code)
document = result['content'] document = result['content']
feed = parse(document) feed = parse(document)
# # Skip Librarian
# if 'librarian' in feed.feed.title.lower():
# print('Librarian RSS feeds are not supported by Slixfeed.')
# print('Ask the Librarian developers to migrate to Atom Syndication.')
# print('Skipping URL:', url)
# continue
# # Skip Mastodon
# if 'mastodon' in feed.feed.generator.lower():
# print('Mastodon RSS feeds are not supported by Slixfeed.')
# print('Ask the Mastodon developers to migrate to Atom Syndication.')
# print('Skipping URL:', url)
# continue
feed_valid = 0 if feed.bozo else 1 feed_valid = 0 if feed.bozo else 1
await sqlite.update_feed_validity(db_file, feed_id, feed_valid) await sqlite.update_feed_validity(db_file, feed_id, feed_valid)
feed_properties = action.get_properties_of_feed(db_file,
if feed.has_key('updated_parsed'): feed_id, feed)
feed_updated = feed.updated_parsed
try:
feed_updated = dt.convert_struct_time_to_iso8601(feed_updated)
except:
feed_updated = ''
else:
feed_updated = ''
entries_count = len(feed.entries)
feed_version = feed.version if feed.has_key('version') else ''
feed_encoding = feed.encoding if feed.has_key('encoding') else ''
feed_language = feed.feed.language if feed.feed.has_key('language') else ''
feed_icon = feed.feed.icon if feed.feed.has_key('icon') else ''
feed_image = feed.feed.image.href if feed.feed.has_key('image') else ''
feed_logo = feed.feed.logo if feed.feed.has_key('logo') else ''
feed_ttl = feed.feed.ttl if feed.feed.has_key('ttl') else ''
feed_properties = {
"version" : feed_version,
"encoding" : feed_encoding,
"language" : feed_language,
"rating" : '',
"entries_count" : entries_count,
"icon" : feed_icon,
"image" : feed_image,
"logo" : feed_logo,
"ttl" : feed_ttl,
"updated" : feed_updated,
}
print('MID', 'sqlite.update_feed_properties')
print(feed_properties)
await sqlite.update_feed_properties(db_file, feed_id, await sqlite.update_feed_properties(db_file, feed_id,
feed_properties) feed_properties)
print('MID', 'action.get_properties_of_entries')
new_entries = action.get_properties_of_entries( new_entries = action.get_properties_of_entries(
self, jid_bare, db_file, url, feed_id, feed) jid_bare, db_file, url, feed_id, feed)
if new_entries: await sqlite.add_entries_and_update_feed_state( if new_entries: await sqlite.add_entries_and_update_feed_state(
db_file, feed_id, new_entries) db_file, feed_id, new_entries)
print('END', url)
await asyncio.sleep(50) await asyncio.sleep(50)
val = Config.get_setting_value(self.settings, jid_bare, 'check') val = Config.get_setting_value(self.settings, jid_bare, 'check')
await asyncio.sleep(60 * float(val)) await asyncio.sleep(60 * float(val))

View file

@ -1,2 +1,2 @@
__version__ = '0.1.59' __version__ = '0.1.60'
__version_info__ = (0, 1, 59) __version_info__ = (0, 1, 60)

View file

@ -340,55 +340,51 @@ class Chat:
identifier) identifier)
feed_id = sqlite.get_feed_id(db_file, url) feed_id = sqlite.get_feed_id(db_file, url)
feed_id = feed_id[0] feed_id = feed_id[0]
document = result['content'] result = await fetch.http(url)
feed = parse(document) if not result['error']:
feed_valid = 0 if feed.bozo else 1 document = result['content']
await sqlite.update_feed_validity(db_file, feed_id, feed_valid) feed = parse(document)
if feed.has_key('updated_parsed'): feed_valid = 0 if feed.bozo else 1
feed_updated = feed.updated_parsed await sqlite.update_feed_validity(db_file, feed_id, feed_valid)
try: if feed.has_key('updated_parsed'):
feed_updated = dt.convert_struct_time_to_iso8601(feed_updated) feed_updated = feed.updated_parsed
except: try:
feed_updated = dt.convert_struct_time_to_iso8601(feed_updated)
except:
feed_updated = None
else:
feed_updated = None feed_updated = None
else: feed_properties = action.get_properties_of_feed(
feed_updated = None db_file, feed_id, feed)
entries_count = len(feed.entries) await sqlite.update_feed_properties(db_file, feed_id,
feed_properties = { feed_properties)
"version" : '',
"encoding" : '',
"language" : '',
"rating" : '',
"entries_count" : entries_count,
"icon" : '',
"image" : '',
"logo" : '',
"ttl" : '',
"updated" : feed_updated,
}
await sqlite.update_feed_properties(db_file, feed_id,
feed_properties)
feed_id = sqlite.get_feed_id(db_file, url)
feed_id = feed_id[0]
new_entries = action.get_properties_of_entries(
self, jid_bare, db_file, url, feed_id, feed)
if new_entries:
await sqlite.add_entries_and_update_feed_state(
db_file, feed_id, new_entries)
await action.scan(self, jid_bare, db_file, url)
if jid_bare not in self.settings:
Config.add_settings_jid(self.settings, jid_bare,
db_file)
old = Config.get_setting_value(self.settings, jid_bare,
'old')
if old:
# task.clean_tasks_xmpp_chat(self, jid_bare, ['status'])
# await send_status(jid)
key_list = ['status']
await task.start_tasks_xmpp_chat(self, jid_bare, key_list)
else:
feed_id = sqlite.get_feed_id(db_file, url) feed_id = sqlite.get_feed_id(db_file, url)
feed_id = feed_id[0] feed_id = feed_id[0]
await sqlite.mark_feed_as_read(db_file, feed_id) new_entries = action.get_properties_of_entries(
jid_bare, db_file, url, feed_id, feed)
if new_entries:
await sqlite.add_entries_and_update_feed_state(
db_file, feed_id, new_entries)
# Function "scan" of module "actions" no longer exists.
# If you choose to add this download functionality and
# the look into function "check_updates" of module "task".
# await action.scan(self, jid_bare, db_file, url)
# if jid_bare not in self.settings:
# Config.add_settings_jid(self.settings, jid_bare,
# db_file)
# old = Config.get_setting_value(self.settings, jid_bare,
# 'old')
# if old:
# # task.clean_tasks_xmpp_chat(self, jid_bare, ['status'])
# # await send_status(jid)
# key_list = ['status']
# await task.start_tasks_xmpp_chat(self, jid_bare, key_list)
# else:
# feed_id = sqlite.get_feed_id(db_file, url)
# feed_id = feed_id[0]
# await sqlite.mark_feed_as_read(db_file, feed_id)
response = ('> {}\n' response = ('> {}\n'
'News source has been ' 'News source has been '
'added to subscription list.' 'added to subscription list.'
@ -760,7 +756,7 @@ class Chat:
# self.pending_tasks[jid_bare][self.pending_tasks_counter] = status_message # self.pending_tasks[jid_bare][self.pending_tasks_counter] = status_message
XmppPresence.send(self, jid_bare, status_message, XmppPresence.send(self, jid_bare, status_message,
status_type=status_type) status_type=status_type)
if url.startswith('feed:'): if url.startswith('feed:/') or url.startswith('itpc:/') or url.startswith('rss:/'):
url = uri.feed_to_http(url) url = uri.feed_to_http(url)
url = (await uri.replace_hostname(url, 'feed')) or url url = (await uri.replace_hostname(url, 'feed')) or url
result = await action.add_feed(self, jid_bare, result = await action.add_feed(self, jid_bare,
@ -768,7 +764,7 @@ class Chat:
identifier) identifier)
if isinstance(result, list): if isinstance(result, list):
results = result results = result
response = ("Web feeds found for {}\n\n```\n" response = ("Syndication feeds found for {}\n\n```\n"
.format(url)) .format(url))
for result in results: for result in results:
response += ("Title : {}\n" response += ("Title : {}\n"
@ -826,7 +822,8 @@ class Chat:
'Type: publishing to node.') 'Type: publishing to node.')
XmppMessage.send_reply(self, message, response) XmppMessage.send_reply(self, message, response)
case _ if (message_lowercase.startswith('http') or case _ if (message_lowercase.startswith('http') or
message_lowercase.startswith('feed:')): message_lowercase.startswith('feed:/') or
message_lowercase.startswith('rss:/')):
url = message_text url = message_text
# task.clean_tasks_xmpp_chat(self, jid_bare, ['status']) # task.clean_tasks_xmpp_chat(self, jid_bare, ['status'])
status_type = 'dnd' status_type = 'dnd'
@ -839,7 +836,7 @@ class Chat:
# self.pending_tasks[jid_bare][self.pending_tasks_counter] = status_message # self.pending_tasks[jid_bare][self.pending_tasks_counter] = status_message
XmppPresence.send(self, jid_bare, status_message, XmppPresence.send(self, jid_bare, status_message,
status_type=status_type) status_type=status_type)
if url.startswith('feed:'): if url.startswith('feed:/') or url.startswith('rss:/'):
url = uri.feed_to_http(url) url = uri.feed_to_http(url)
url = (await uri.replace_hostname(url, 'feed')) or url url = (await uri.replace_hostname(url, 'feed')) or url
db_file = config.get_pathname_to_database(jid_file) db_file = config.get_pathname_to_database(jid_file)
@ -858,7 +855,7 @@ class Chat:
identifier) identifier)
if isinstance(result, list): if isinstance(result, list):
results = result results = result
response = ("Web feeds found for {}\n\n```\n" response = ("Syndication feeds found for {}\n\n```\n"
.format(url)) .format(url))
for result in results: for result in results:
response += ("Title : {}\n" response += ("Title : {}\n"
@ -1179,7 +1176,7 @@ class Chat:
self.pending_tasks[jid_bare][pending_tasks_num] = status_message self.pending_tasks[jid_bare][pending_tasks_num] = status_message
XmppPresence.send(self, jid_bare, status_message, XmppPresence.send(self, jid_bare, status_message,
status_type=status_type) status_type=status_type)
if url.startswith('feed:'): if url.startswith('feed:/') or url.startswith('rss:/'):
url = uri.feed_to_http(url) url = uri.feed_to_http(url)
url = (await uri.replace_hostname(url, 'feed')) or url url = (await uri.replace_hostname(url, 'feed')) or url
match len(data): match len(data):
@ -1199,7 +1196,7 @@ class Chat:
result = await crawl.probe_page(url, document) result = await crawl.probe_page(url, document)
if isinstance(result, list): if isinstance(result, list):
results = result results = result
response = ("Web feeds found for {}\n\n```\n" response = ("Syndication feeds found for {}\n\n```\n"
.format(url)) .format(url))
for result in results: for result in results:
response += ("Title : {}\n" response += ("Title : {}\n"
@ -1236,7 +1233,7 @@ class Chat:
result = await crawl.probe_page(url, document) result = await crawl.probe_page(url, document)
if isinstance(result, list): if isinstance(result, list):
results = result results = result
response = ("Web feeds found for {}\n\n```\n" response = ("Syndication feeds found for {}\n\n```\n"
.format(url)) .format(url))
for result in results: for result in results:
response += ("Title : {}\n" response += ("Title : {}\n"