From 8bbad28894f15dbca311780e0079a30e60a13070 Mon Sep 17 00:00:00 2001 From: Schimon Jehudah Date: Sun, 12 May 2024 09:55:23 +0000 Subject: [PATCH] Create function "get_properties_of_feed"; Do not attempt to scan URL for items upon command "add"; Minor improvements; Add more URLs to file feeds.toml. --- slixfeed/action.py | 55 +++++++++++++------ slixfeed/assets/feeds.toml | 92 +++++++++++++++++++++++++++++--- slixfeed/config.py | 3 +- slixfeed/fetch.py | 7 +++ slixfeed/task.py | 56 ++------------------ slixfeed/version.py | 4 +- slixfeed/xmpp/chat.py | 105 ++++++++++++++++++------------------- 7 files changed, 190 insertions(+), 132 deletions(-) diff --git a/slixfeed/action.py b/slixfeed/action.py index 4ba4e49..0b5a7cf 100644 --- a/slixfeed/action.py +++ b/slixfeed/action.py @@ -1112,25 +1112,14 @@ async def add_feed(self, jid_bare, db_file, url, identifier): feed_updated = None else: feed_updated = None - entries_count = len(feed.entries) - feed_properties = { - "version" : '', - "encoding" : '', - "language" : '', - "rating" : '', - "entries_count" : entries_count, - "icon" : '', - "image" : '', - "logo" : '', - "ttl" : '', - "updated" : feed_updated, - } + feed_properties = get_properties_of_feed(db_file, + feed_id, feed) await sqlite.update_feed_properties(db_file, feed_id, feed_properties) feed_id = sqlite.get_feed_id(db_file, url) feed_id = feed_id[0] new_entries = get_properties_of_entries( - self, jid_bare, db_file, url, feed_id, feed) + jid_bare, db_file, url, feed_id, feed) if new_entries: await sqlite.add_entries_and_update_feed_state( db_file, feed_id, new_entries) @@ -1528,9 +1517,45 @@ async def download_feed(self, db_file, feed_url): await sqlite.update_feed_status(db_file, feed_id, status_code) +def get_properties_of_feed(db_file, feed_id, feed): + + if feed.has_key('updated_parsed'): + feed_updated = feed.updated_parsed + try: + feed_updated = dt.convert_struct_time_to_iso8601(feed_updated) + except: + feed_updated = '' + else: + feed_updated = '' + + entries_count = len(feed.entries) + + feed_version = feed.version if feed.has_key('version') else '' + feed_encoding = feed.encoding if feed.has_key('encoding') else '' + feed_language = feed.feed.language if feed.feed.has_key('language') else '' + feed_icon = feed.feed.icon if feed.feed.has_key('icon') else '' + feed_image = feed.feed.image.href if feed.feed.has_key('image') else '' + feed_logo = feed.feed.logo if feed.feed.has_key('logo') else '' + feed_ttl = feed.feed.ttl if feed.feed.has_key('ttl') else '' + + feed_properties = { + "version" : feed_version, + "encoding" : feed_encoding, + "language" : feed_language, + "rating" : '', + "entries_count" : entries_count, + "icon" : feed_icon, + "image" : feed_image, + "logo" : feed_logo, + "ttl" : feed_ttl, + "updated" : feed_updated, + } + + return feed_properties + # TODO get all active feeds of active accounts and scan the feed with the earliest scanned time # TODO Rename function name (idea: scan_and_populate) -def get_properties_of_entries(self, jid_bare, db_file, feed_url, feed_id, feed): +def get_properties_of_entries(jid_bare, db_file, feed_url, feed_id, feed): """ Get new entries. diff --git a/slixfeed/assets/feeds.toml b/slixfeed/assets/feeds.toml index 36bf4e2..284736b 100644 --- a/slixfeed/assets/feeds.toml +++ b/slixfeed/assets/feeds.toml @@ -4,6 +4,12 @@ # NOTE # TODO Consider splitting into files feeds-cz.toml, feeds-de.toml, feeds-el.toml. +[[feeds]] +lang = "ch-fr" +name = "Demoniak Network" +link = "https://demoniak.ch/index.xml" +tags = ["computer", "technology"] + [[feeds]] lang = "cs-cz" name = "Česká pirátská strana" @@ -160,12 +166,42 @@ name = "Κόμμα Πειρατών Ελλάδας – Pirate party of Greece" link = "https://www.pirateparty.gr/feed/" tags = ["greece", "party", "pirate"] +[[feeds]] +lang = "en" +name = "Actual Activists" +link = "https://actualactivists.com/feed/atom/" +tags = ["news", "world", "conspiracy", "lifestyle", "government"] + [[feeds]] lang = "en" name = "Cycling Together with Fiona and Marc" link = "https://pixelfed.social/users/cyclingtogether.atom" tags = ["sports", "cycling", "adventure", "life"] +[[feeds]] +lang = "en" +name = "Debian Disguised Work" +link = "https://disguised.work/atom.xml" +tags = ["linux", "debian", "ubuntu", "industry"] + +[[feeds]] +lang = "en" +name = "Dig Deeper" +link = "https://diggy.club/atom.xml" +tags = ["linux", "health", "computer", "wisdom", "life", "industry"] + +[[feeds]] +lang = "en" +name = "Earth Newspaper" +link = "https://earthnewspaper.com/feed/atom/" +tags = ["technology", "world", "war", "politics"] + +[[feeds]] +lang = "en" +name = "Fedi.Tips" +link = "https://fedi.tips/feed/atom/" +tags = ["technology", "activitypub", "fediverse", "syndication"] + [[feeds]] lang = "en" name = "Lagrange Gemini Client" @@ -178,12 +214,36 @@ name = "[ngn.tf] | blog" link = "https://api.ngn.tf/blog/feed.atom" tags = ["computer", "service", "technology", "telecommunication", "xmpp"] +[[feeds]] +lang = "en" +name = "RTP Blog" +link = "http://righttoprivacy.i2p/rss/" +tags = ["computer", "service", "technology", "telecommunication", "i2p", "privacy"] + +[[feeds]] +lang = "en" +name = "Techrights" +link = "http://techrights.org/feed.xml" +tags = ["linux", "bsd", "technology", "industry"] + [[feeds]] lang = "en" name = "The SWORD Project" link = "http://www.crosswire.org/sword/sword.rss.jsp" tags = ["bible", "religion", "christianity", "history", "education", "life"] +[[feeds]] +lang = "en" +name = "toastal" +link = "https://toast.al/posts/feed.atom" +tags = ["software", "computer", "code", "life"] + +[[feeds]] +lang = "en" +name = "Views from Apipucos" +link = "https://apipucos.wordpress.com/feed/atom/" +tags = ["poem", "computer", "brazil", "art", "life"] + [[feeds]] lang = "en-au" name = "Pirate Party Australia" @@ -424,6 +484,12 @@ name = "Jacob's Unnamed Blog" link = "https://jacobwsmith.xyz/feed.xml" tags = ["book", "community", "culture", "family", "finance", "lifestyle", "market", "usa"] +[[feeds]] +lang = "en-us" +name = "Jerm Warfare" +link = "https://jermwarfare.com/feed/atom" +tags = ["culture", "war", "usa"] + [[feeds]] lang = "en-us" name = "Juicing for Health" @@ -487,7 +553,7 @@ tags = ["decentralization", "privacy"] [[feeds]] lang = "en-us" name = "nobulart" -link = "https://nobulart.com/feed/" +link = "https://nobulart.com/feed/atom/" tags = ["news", "survival", "politics", "usa", "world"] [[feeds]] @@ -712,6 +778,12 @@ name = "United States Pirate Party" link = "https://uspirates.org/feed/" tags = ["party", "pirate", "usa"] +[[feeds]] +lang = "en-us" +name = "Walt Heyer Ministries" +link = "https://waltheyer.com/feed/atom/" +tags = ["sex", "change", "hoomsexuality"] + [[feeds]] lang = "en-us" name = "Xonotic" @@ -724,18 +796,24 @@ name = "yaxim" link = "https://yaxim.org/atom.xml" tags = ["android", "germany", "jabber", "telecommunication", "xmpp"] +[[feeds]] +lang = "en-us" +name = "You Are Not So Smart" +link = "https://youarenotsosmart.com/feed/atom/" +tags = ["communication", "culture", "psychology", "society"] + +[[feeds]] +lang = "es-ar" +name = "Cyberdelia" +link = "https://cyberdelia.com.ar/feeds/all.atom.xml" +tags = ["decentralization", "development", "electronics", "networking", "privacy", "selfhosting", "technology", "xmpp"] + [[feeds]] lang = "es-es" name = "Disroot Blog" link = "https://disroot.org/es/blog.atom" tags = ["decentralization", "privacy"] -[[feeds]] -lang = "ch-fr" -name = "Demoniak Network" -link = "https://demoniak.ch/index.xml" -tags = ["computer", "technology"] - [[feeds]] lang = "fr-fr" name = "Agate Blue" diff --git a/slixfeed/config.py b/slixfeed/config.py index 8f7d935..a8ed1b8 100644 --- a/slixfeed/config.py +++ b/slixfeed/config.py @@ -156,7 +156,8 @@ def update_proxies(file, proxy_name, proxy_type, proxy_url, action='remove'): """ data = open_config_file('proxies.toml') proxy_list = data['proxies'][proxy_name][proxy_type] - breakpoint() + # breakpoint() + print('####################### PROXY ######################') proxy_index = proxy_list.index(proxy_url) proxy_list.pop(proxy_index) with open(file, 'w') as new_file: diff --git a/slixfeed/fetch.py b/slixfeed/fetch.py index 1b89ff1..a399f09 100644 --- a/slixfeed/fetch.py +++ b/slixfeed/fetch.py @@ -27,6 +27,13 @@ TODO 7) See project /offpunk/offblocklist.py +NOTE + +1) You might not want to utilize aiohttp, because you + no more scan as many feeds as possible all at once + due to CPU spike. + Consider https://pythonhosted.org/feedparser/http-useragent.html + """ from aiohttp import ClientError, ClientSession, ClientTimeout diff --git a/slixfeed/task.py b/slixfeed/task.py index 01cd1e3..69240a9 100644 --- a/slixfeed/task.py +++ b/slixfeed/task.py @@ -363,69 +363,19 @@ async def check_updates(self, jid_bare): feed_id = sqlite.get_feed_id(db_file, url) feed_id = feed_id[0] if not result['error']: - print('MID', 'sqlite.update_feed_status') await sqlite.update_feed_status(db_file, feed_id, status_code) document = result['content'] feed = parse(document) - - # # Skip Librarian - # if 'librarian' in feed.feed.title.lower(): - # print('Librarian RSS feeds are not supported by Slixfeed.') - # print('Ask the Librarian developers to migrate to Atom Syndication.') - # print('Skipping URL:', url) - # continue - - # # Skip Mastodon - # if 'mastodon' in feed.feed.generator.lower(): - # print('Mastodon RSS feeds are not supported by Slixfeed.') - # print('Ask the Mastodon developers to migrate to Atom Syndication.') - # print('Skipping URL:', url) - # continue - feed_valid = 0 if feed.bozo else 1 await sqlite.update_feed_validity(db_file, feed_id, feed_valid) - - if feed.has_key('updated_parsed'): - feed_updated = feed.updated_parsed - try: - feed_updated = dt.convert_struct_time_to_iso8601(feed_updated) - except: - feed_updated = '' - else: - feed_updated = '' - - entries_count = len(feed.entries) - - feed_version = feed.version if feed.has_key('version') else '' - feed_encoding = feed.encoding if feed.has_key('encoding') else '' - feed_language = feed.feed.language if feed.feed.has_key('language') else '' - feed_icon = feed.feed.icon if feed.feed.has_key('icon') else '' - feed_image = feed.feed.image.href if feed.feed.has_key('image') else '' - feed_logo = feed.feed.logo if feed.feed.has_key('logo') else '' - feed_ttl = feed.feed.ttl if feed.feed.has_key('ttl') else '' - - feed_properties = { - "version" : feed_version, - "encoding" : feed_encoding, - "language" : feed_language, - "rating" : '', - "entries_count" : entries_count, - "icon" : feed_icon, - "image" : feed_image, - "logo" : feed_logo, - "ttl" : feed_ttl, - "updated" : feed_updated, - } - print('MID', 'sqlite.update_feed_properties') - print(feed_properties) + feed_properties = action.get_properties_of_feed(db_file, + feed_id, feed) await sqlite.update_feed_properties(db_file, feed_id, feed_properties) - print('MID', 'action.get_properties_of_entries') new_entries = action.get_properties_of_entries( - self, jid_bare, db_file, url, feed_id, feed) + jid_bare, db_file, url, feed_id, feed) if new_entries: await sqlite.add_entries_and_update_feed_state( db_file, feed_id, new_entries) - print('END', url) await asyncio.sleep(50) val = Config.get_setting_value(self.settings, jid_bare, 'check') await asyncio.sleep(60 * float(val)) diff --git a/slixfeed/version.py b/slixfeed/version.py index 7eea5dd..4cdc574 100644 --- a/slixfeed/version.py +++ b/slixfeed/version.py @@ -1,2 +1,2 @@ -__version__ = '0.1.59' -__version_info__ = (0, 1, 59) +__version__ = '0.1.60' +__version_info__ = (0, 1, 60) diff --git a/slixfeed/xmpp/chat.py b/slixfeed/xmpp/chat.py index 41a684b..f3302af 100644 --- a/slixfeed/xmpp/chat.py +++ b/slixfeed/xmpp/chat.py @@ -340,55 +340,51 @@ class Chat: identifier) feed_id = sqlite.get_feed_id(db_file, url) feed_id = feed_id[0] - document = result['content'] - feed = parse(document) - feed_valid = 0 if feed.bozo else 1 - await sqlite.update_feed_validity(db_file, feed_id, feed_valid) - if feed.has_key('updated_parsed'): - feed_updated = feed.updated_parsed - try: - feed_updated = dt.convert_struct_time_to_iso8601(feed_updated) - except: + result = await fetch.http(url) + if not result['error']: + document = result['content'] + feed = parse(document) + feed_valid = 0 if feed.bozo else 1 + await sqlite.update_feed_validity(db_file, feed_id, feed_valid) + if feed.has_key('updated_parsed'): + feed_updated = feed.updated_parsed + try: + feed_updated = dt.convert_struct_time_to_iso8601(feed_updated) + except: + feed_updated = None + else: feed_updated = None - else: - feed_updated = None - entries_count = len(feed.entries) - feed_properties = { - "version" : '', - "encoding" : '', - "language" : '', - "rating" : '', - "entries_count" : entries_count, - "icon" : '', - "image" : '', - "logo" : '', - "ttl" : '', - "updated" : feed_updated, - } - await sqlite.update_feed_properties(db_file, feed_id, - feed_properties) - feed_id = sqlite.get_feed_id(db_file, url) - feed_id = feed_id[0] - new_entries = action.get_properties_of_entries( - self, jid_bare, db_file, url, feed_id, feed) - if new_entries: - await sqlite.add_entries_and_update_feed_state( - db_file, feed_id, new_entries) - await action.scan(self, jid_bare, db_file, url) - if jid_bare not in self.settings: - Config.add_settings_jid(self.settings, jid_bare, - db_file) - old = Config.get_setting_value(self.settings, jid_bare, - 'old') - if old: - # task.clean_tasks_xmpp_chat(self, jid_bare, ['status']) - # await send_status(jid) - key_list = ['status'] - await task.start_tasks_xmpp_chat(self, jid_bare, key_list) - else: + feed_properties = action.get_properties_of_feed( + db_file, feed_id, feed) + await sqlite.update_feed_properties(db_file, feed_id, + feed_properties) feed_id = sqlite.get_feed_id(db_file, url) feed_id = feed_id[0] - await sqlite.mark_feed_as_read(db_file, feed_id) + new_entries = action.get_properties_of_entries( + jid_bare, db_file, url, feed_id, feed) + if new_entries: + await sqlite.add_entries_and_update_feed_state( + db_file, feed_id, new_entries) + + # Function "scan" of module "actions" no longer exists. + # If you choose to add this download functionality and + # the look into function "check_updates" of module "task". + # await action.scan(self, jid_bare, db_file, url) + # if jid_bare not in self.settings: + # Config.add_settings_jid(self.settings, jid_bare, + # db_file) + # old = Config.get_setting_value(self.settings, jid_bare, + # 'old') + # if old: + # # task.clean_tasks_xmpp_chat(self, jid_bare, ['status']) + # # await send_status(jid) + # key_list = ['status'] + # await task.start_tasks_xmpp_chat(self, jid_bare, key_list) + # else: + # feed_id = sqlite.get_feed_id(db_file, url) + # feed_id = feed_id[0] + # await sqlite.mark_feed_as_read(db_file, feed_id) + response = ('> {}\n' 'News source has been ' 'added to subscription list.' @@ -760,7 +756,7 @@ class Chat: # self.pending_tasks[jid_bare][self.pending_tasks_counter] = status_message XmppPresence.send(self, jid_bare, status_message, status_type=status_type) - if url.startswith('feed:'): + if url.startswith('feed:/') or url.startswith('itpc:/') or url.startswith('rss:/'): url = uri.feed_to_http(url) url = (await uri.replace_hostname(url, 'feed')) or url result = await action.add_feed(self, jid_bare, @@ -768,7 +764,7 @@ class Chat: identifier) if isinstance(result, list): results = result - response = ("Web feeds found for {}\n\n```\n" + response = ("Syndication feeds found for {}\n\n```\n" .format(url)) for result in results: response += ("Title : {}\n" @@ -826,7 +822,8 @@ class Chat: 'Type: publishing to node.') XmppMessage.send_reply(self, message, response) case _ if (message_lowercase.startswith('http') or - message_lowercase.startswith('feed:')): + message_lowercase.startswith('feed:/') or + message_lowercase.startswith('rss:/')): url = message_text # task.clean_tasks_xmpp_chat(self, jid_bare, ['status']) status_type = 'dnd' @@ -839,7 +836,7 @@ class Chat: # self.pending_tasks[jid_bare][self.pending_tasks_counter] = status_message XmppPresence.send(self, jid_bare, status_message, status_type=status_type) - if url.startswith('feed:'): + if url.startswith('feed:/') or url.startswith('rss:/'): url = uri.feed_to_http(url) url = (await uri.replace_hostname(url, 'feed')) or url db_file = config.get_pathname_to_database(jid_file) @@ -858,7 +855,7 @@ class Chat: identifier) if isinstance(result, list): results = result - response = ("Web feeds found for {}\n\n```\n" + response = ("Syndication feeds found for {}\n\n```\n" .format(url)) for result in results: response += ("Title : {}\n" @@ -1179,7 +1176,7 @@ class Chat: self.pending_tasks[jid_bare][pending_tasks_num] = status_message XmppPresence.send(self, jid_bare, status_message, status_type=status_type) - if url.startswith('feed:'): + if url.startswith('feed:/') or url.startswith('rss:/'): url = uri.feed_to_http(url) url = (await uri.replace_hostname(url, 'feed')) or url match len(data): @@ -1199,7 +1196,7 @@ class Chat: result = await crawl.probe_page(url, document) if isinstance(result, list): results = result - response = ("Web feeds found for {}\n\n```\n" + response = ("Syndication feeds found for {}\n\n```\n" .format(url)) for result in results: response += ("Title : {}\n" @@ -1236,7 +1233,7 @@ class Chat: result = await crawl.probe_page(url, document) if isinstance(result, list): results = result - response = ("Web feeds found for {}\n\n```\n" + response = ("Syndication feeds found for {}\n\n```\n" .format(url)) for result in results: response += ("Title : {}\n"