Create function "get_properties_of_feed";

Do not attempt to scan URL for items upon command "add"; Minor improvements; Add more URLs to file feeds.toml.
2024-05-12 09:55:23 +00:00 · 2024-05-12 09:55:23 +00:00 · 8bbad28894
commit 8bbad28894
parent d0e23b3eb9
7 changed files with 190 additions and 132 deletions
--- a/slixfeed/action.py
+++ b/slixfeed/action.py
@ -1112,25 +1112,14 @@ async def add_feed(self, jid_bare, db_file, url, identifier):
                                feed_updated = None
                        else:
                            feed_updated = None
-                        entries_count = len(feed.entries)
-                        feed_properties = {
-                            "version" : '',
-                            "encoding" : '',
-                            "language" : '',
-                            "rating" : '',
-                            "entries_count" : entries_count,
-                            "icon" : '',
-                            "image" : '',
-                            "logo" : '',
-                            "ttl" : '',
-                            "updated" : feed_updated,
-                            }
+                        feed_properties = get_properties_of_feed(db_file,
+                                                                 feed_id, feed)
                        await sqlite.update_feed_properties(db_file, feed_id,
                                                            feed_properties)
                        feed_id = sqlite.get_feed_id(db_file, url)
                        feed_id = feed_id[0]
                        new_entries = get_properties_of_entries(
-                            self, jid_bare, db_file, url, feed_id, feed)
+                            jid_bare, db_file, url, feed_id, feed)
                        if new_entries:
                            await sqlite.add_entries_and_update_feed_state(
                                db_file, feed_id, new_entries)
@ -1528,9 +1517,45 @@ async def download_feed(self, db_file, feed_url):
    await sqlite.update_feed_status(db_file, feed_id, status_code)


+def get_properties_of_feed(db_file, feed_id, feed):
+
+    if feed.has_key('updated_parsed'):
+        feed_updated = feed.updated_parsed
+        try:
+            feed_updated = dt.convert_struct_time_to_iso8601(feed_updated)
+        except:
+            feed_updated = ''
+    else:
+        feed_updated = ''
+
+    entries_count = len(feed.entries)
+
+    feed_version = feed.version if feed.has_key('version') else ''
+    feed_encoding = feed.encoding if feed.has_key('encoding') else ''
+    feed_language = feed.feed.language if feed.feed.has_key('language') else ''
+    feed_icon = feed.feed.icon if feed.feed.has_key('icon') else ''
+    feed_image = feed.feed.image.href if feed.feed.has_key('image') else ''
+    feed_logo = feed.feed.logo if feed.feed.has_key('logo') else ''
+    feed_ttl = feed.feed.ttl if feed.feed.has_key('ttl') else ''
+
+    feed_properties = {
+        "version" : feed_version,
+        "encoding" : feed_encoding,
+        "language" : feed_language,
+        "rating" : '',
+        "entries_count" : entries_count,
+        "icon" : feed_icon,
+        "image" : feed_image,
+        "logo" : feed_logo,
+        "ttl" : feed_ttl,
+        "updated" : feed_updated,
+        }
+
+    return feed_properties
+
 # TODO get all active feeds of active accounts and scan the feed with the earliest scanned time
 # TODO Rename function name (idea: scan_and_populate)
-def get_properties_of_entries(self, jid_bare, db_file, feed_url, feed_id, feed):
+def get_properties_of_entries(jid_bare, db_file, feed_url, feed_id, feed):
    """
    Get new entries.

--- a/slixfeed/assets/feeds.toml
+++ b/slixfeed/assets/feeds.toml
@ -4,6 +4,12 @@
 # NOTE <presence xml:lang="fr"></presence>
 # TODO Consider splitting into files feeds-cz.toml, feeds-de.toml, feeds-el.toml.

+[[feeds]]
+lang = "ch-fr"
+name = "Demoniak Network"
+link = "https://demoniak.ch/index.xml"
+tags = ["computer", "technology"]
+
 [[feeds]]
 lang = "cs-cz"
 name = "Česká pirátská strana"
@ -160,12 +166,42 @@ name = "Κόμμα Πειρατών Ελλάδας – Pirate party of Greece"
 link = "https://www.pirateparty.gr/feed/"
 tags = ["greece", "party", "pirate"]

+[[feeds]]
+lang = "en"
+name = "Actual Activists"
+link = "https://actualactivists.com/feed/atom/"
+tags = ["news", "world", "conspiracy", "lifestyle", "government"]
+
 [[feeds]]
 lang = "en"
 name = "Cycling Together with Fiona and Marc"
 link = "https://pixelfed.social/users/cyclingtogether.atom"
 tags = ["sports", "cycling", "adventure", "life"]

+[[feeds]]
+lang = "en"
+name = "Debian Disguised Work"
+link = "https://disguised.work/atom.xml"
+tags = ["linux", "debian", "ubuntu", "industry"]
+
+[[feeds]]
+lang = "en"
+name = "Dig Deeper"
+link = "https://diggy.club/atom.xml"
+tags = ["linux", "health", "computer", "wisdom", "life", "industry"]
+
+[[feeds]]
+lang = "en"
+name = "Earth Newspaper"
+link = "https://earthnewspaper.com/feed/atom/"
+tags = ["technology", "world", "war", "politics"]
+
+[[feeds]]
+lang = "en"
+name = "Fedi.Tips"
+link = "https://fedi.tips/feed/atom/"
+tags = ["technology", "activitypub", "fediverse", "syndication"]
+
 [[feeds]]
 lang = "en"
 name = "Lagrange Gemini Client"
@ -178,12 +214,36 @@ name = "[ngn.tf] | blog"
 link = "https://api.ngn.tf/blog/feed.atom"
 tags = ["computer", "service", "technology", "telecommunication", "xmpp"]

+[[feeds]]
+lang = "en"
+name = "RTP Blog"
+link = "http://righttoprivacy.i2p/rss/"
+tags = ["computer", "service", "technology", "telecommunication", "i2p", "privacy"]
+
+[[feeds]]
+lang = "en"
+name = "Techrights"
+link = "http://techrights.org/feed.xml"
+tags = ["linux", "bsd", "technology", "industry"]
+
 [[feeds]]
 lang = "en"
 name = "The SWORD Project"
 link = "http://www.crosswire.org/sword/sword.rss.jsp"
 tags = ["bible", "religion", "christianity", "history", "education", "life"]

+[[feeds]]
+lang = "en"
+name = "toastal"
+link = "https://toast.al/posts/feed.atom"
+tags = ["software", "computer", "code", "life"]
+
+[[feeds]]
+lang = "en"
+name = "Views from Apipucos"
+link = "https://apipucos.wordpress.com/feed/atom/"
+tags = ["poem", "computer", "brazil", "art", "life"]
+
 [[feeds]]
 lang = "en-au"
 name = "Pirate Party Australia"
@ -424,6 +484,12 @@ name = "Jacob's Unnamed Blog"
 link = "https://jacobwsmith.xyz/feed.xml"
 tags = ["book", "community", "culture", "family", "finance", "lifestyle", "market", "usa"]

+[[feeds]]
+lang = "en-us"
+name = "Jerm Warfare"
+link = "https://jermwarfare.com/feed/atom"
+tags = ["culture", "war", "usa"]
+
 [[feeds]]
 lang = "en-us"
 name = "Juicing for Health"
@ -487,7 +553,7 @@ tags = ["decentralization", "privacy"]
 [[feeds]]
 lang = "en-us"
 name = "nobulart"
-link = "https://nobulart.com/feed/"
+link = "https://nobulart.com/feed/atom/"
 tags = ["news", "survival", "politics", "usa", "world"]

 [[feeds]]
@ -712,6 +778,12 @@ name = "United States Pirate Party"
 link = "https://uspirates.org/feed/"
 tags = ["party", "pirate", "usa"]

+[[feeds]]
+lang = "en-us"
+name = "Walt Heyer Ministries"
+link = "https://waltheyer.com/feed/atom/"
+tags = ["sex", "change", "hoomsexuality"]
+
 [[feeds]]
 lang = "en-us"
 name = "Xonotic"
@ -724,18 +796,24 @@ name = "yaxim"
 link = "https://yaxim.org/atom.xml"
 tags = ["android", "germany", "jabber", "telecommunication", "xmpp"]

+[[feeds]]
+lang = "en-us"
+name = "You Are Not So Smart"
+link = "https://youarenotsosmart.com/feed/atom/"
+tags = ["communication", "culture", "psychology", "society"]
+
+[[feeds]]
+lang = "es-ar"
+name = "Cyberdelia"
+link = "https://cyberdelia.com.ar/feeds/all.atom.xml"
+tags = ["decentralization", "development", "electronics", "networking", "privacy", "selfhosting", "technology", "xmpp"]
+
 [[feeds]]
 lang = "es-es"
 name = "Disroot Blog"
 link = "https://disroot.org/es/blog.atom"
 tags = ["decentralization", "privacy"]

-[[feeds]]
-lang = "ch-fr"
-name = "Demoniak Network"
-link = "https://demoniak.ch/index.xml"
-tags = ["computer", "technology"]
-
 [[feeds]]
 lang = "fr-fr"
 name = "Agate Blue"
--- a/slixfeed/config.py
+++ b/slixfeed/config.py
@ -156,7 +156,8 @@ def update_proxies(file, proxy_name, proxy_type, proxy_url, action='remove'):
    """
    data = open_config_file('proxies.toml')
    proxy_list = data['proxies'][proxy_name][proxy_type]
-    breakpoint()
+    # breakpoint()
+    print('####################### PROXY ######################')
    proxy_index = proxy_list.index(proxy_url)
    proxy_list.pop(proxy_index)
    with open(file, 'w') as new_file:
--- a/slixfeed/fetch.py
+++ b/slixfeed/fetch.py
@ -27,6 +27,13 @@ TODO
    
 7) See project /offpunk/offblocklist.py

+NOTE
+
+1) You might not want to utilize aiohttp, because you
+   no more scan as many feeds as possible all at once
+   due to CPU spike.
+   Consider https://pythonhosted.org/feedparser/http-useragent.html
+
 """

 from aiohttp import ClientError, ClientSession, ClientTimeout
--- a/slixfeed/task.py
+++ b/slixfeed/task.py
@ -363,69 +363,19 @@ async def check_updates(self, jid_bare):
            feed_id = sqlite.get_feed_id(db_file, url)
            feed_id = feed_id[0]
            if not result['error']:
-                print('MID', 'sqlite.update_feed_status')
                await sqlite.update_feed_status(db_file, feed_id, status_code)
                document = result['content']
                feed = parse(document)
-
-                # # Skip Librarian
-                # if 'librarian' in feed.feed.title.lower():
-                #     print('Librarian RSS feeds are not supported by Slixfeed.')
-                #     print('Ask the Librarian developers to migrate to Atom Syndication.')
-                #     print('Skipping URL:', url)
-                #     continue
-
-                # # Skip Mastodon
-                # if 'mastodon' in feed.feed.generator.lower():
-                #     print('Mastodon RSS feeds are not supported by Slixfeed.')
-                #     print('Ask the Mastodon developers to migrate to Atom Syndication.')
-                #     print('Skipping URL:', url)
-                #     continue
-
                feed_valid = 0 if feed.bozo else 1
                await sqlite.update_feed_validity(db_file, feed_id, feed_valid)
-
-                if feed.has_key('updated_parsed'):
-                    feed_updated = feed.updated_parsed
-                    try:
-                        feed_updated = dt.convert_struct_time_to_iso8601(feed_updated)
-                    except:
-                        feed_updated = ''
-                else:
-                    feed_updated = ''
-
-                entries_count = len(feed.entries)
-
-                feed_version = feed.version if feed.has_key('version') else ''
-                feed_encoding = feed.encoding if feed.has_key('encoding') else ''
-                feed_language = feed.feed.language if feed.feed.has_key('language') else ''
-                feed_icon = feed.feed.icon if feed.feed.has_key('icon') else ''
-                feed_image = feed.feed.image.href if feed.feed.has_key('image') else ''
-                feed_logo = feed.feed.logo if feed.feed.has_key('logo') else ''
-                feed_ttl = feed.feed.ttl if feed.feed.has_key('ttl') else ''
-
-                feed_properties = {
-                    "version" : feed_version,
-                    "encoding" : feed_encoding,
-                    "language" : feed_language,
-                    "rating" : '',
-                    "entries_count" : entries_count,
-                    "icon" : feed_icon,
-                    "image" : feed_image,
-                    "logo" : feed_logo,
-                    "ttl" : feed_ttl,
-                    "updated" : feed_updated,
-                    }
-                print('MID', 'sqlite.update_feed_properties')
-                print(feed_properties)
+                feed_properties = action.get_properties_of_feed(db_file,
+                                                                feed_id, feed)
                await sqlite.update_feed_properties(db_file, feed_id,
                                                    feed_properties)
-                print('MID', 'action.get_properties_of_entries')
                new_entries = action.get_properties_of_entries(
-                    self, jid_bare, db_file, url, feed_id, feed)
+                    jid_bare, db_file, url, feed_id, feed)
                if new_entries: await sqlite.add_entries_and_update_feed_state(
                        db_file, feed_id, new_entries)
-                print('END', url)
            await asyncio.sleep(50)
        val = Config.get_setting_value(self.settings, jid_bare, 'check')
        await asyncio.sleep(60 * float(val))
--- a/slixfeed/version.py
+++ b/slixfeed/version.py
@ -1,2 +1,2 @@
-__version__ = '0.1.59'
-__version_info__ = (0, 1, 59)
+__version__ = '0.1.60'
+__version_info__ = (0, 1, 60)
--- a/slixfeed/xmpp/chat.py
+++ b/slixfeed/xmpp/chat.py
@ -340,55 +340,51 @@ class Chat:
                                                     identifier)
                            feed_id = sqlite.get_feed_id(db_file, url)
                            feed_id = feed_id[0]
-                            document = result['content']
-                            feed = parse(document)
-                            feed_valid = 0 if feed.bozo else 1
-                            await sqlite.update_feed_validity(db_file, feed_id, feed_valid)
-                            if feed.has_key('updated_parsed'):
-                                feed_updated = feed.updated_parsed
-                                try:
-                                    feed_updated = dt.convert_struct_time_to_iso8601(feed_updated)
-                                except:
+                            result = await fetch.http(url)
+                            if not result['error']:
+                                document = result['content']
+                                feed = parse(document)
+                                feed_valid = 0 if feed.bozo else 1
+                                await sqlite.update_feed_validity(db_file, feed_id, feed_valid)
+                                if feed.has_key('updated_parsed'):
+                                    feed_updated = feed.updated_parsed
+                                    try:
+                                        feed_updated = dt.convert_struct_time_to_iso8601(feed_updated)
+                                    except:
+                                        feed_updated = None
+                                else:
                                    feed_updated = None
-                            else:
-                                feed_updated = None
-                            entries_count = len(feed.entries)
-                            feed_properties = {
-                                "version" : '',
-                                "encoding" : '',
-                                "language" : '',
-                                "rating" : '',
-                                "entries_count" : entries_count,
-                                "icon" : '',
-                                "image" : '',
-                                "logo" : '',
-                                "ttl" : '',
-                                "updated" : feed_updated,
-                                }
-                            await sqlite.update_feed_properties(db_file, feed_id,
-                                                                feed_properties)
-                            feed_id = sqlite.get_feed_id(db_file, url)
-                            feed_id = feed_id[0]
-                            new_entries = action.get_properties_of_entries(
-                                self, jid_bare, db_file, url, feed_id, feed)
-                            if new_entries:
-                                await sqlite.add_entries_and_update_feed_state(
-                                    db_file, feed_id, new_entries)
-                            await action.scan(self, jid_bare, db_file, url)
-                            if jid_bare not in self.settings:
-                                Config.add_settings_jid(self.settings, jid_bare,
-                                                        db_file)
-                            old = Config.get_setting_value(self.settings, jid_bare,
-                                                           'old')
-                            if old:
-                                # task.clean_tasks_xmpp_chat(self, jid_bare, ['status'])
-                                # await send_status(jid)
-                                key_list = ['status']
-                                await task.start_tasks_xmpp_chat(self, jid_bare, key_list)
-                            else:
+                                feed_properties = action.get_properties_of_feed(
+                                    db_file, feed_id, feed)
+                                await sqlite.update_feed_properties(db_file, feed_id,
+                                                                    feed_properties)
                                feed_id = sqlite.get_feed_id(db_file, url)
                                feed_id = feed_id[0]
-                                await sqlite.mark_feed_as_read(db_file, feed_id)
+                                new_entries = action.get_properties_of_entries(
+                                    jid_bare, db_file, url, feed_id, feed)
+                                if new_entries:
+                                    await sqlite.add_entries_and_update_feed_state(
+                                        db_file, feed_id, new_entries)
+
+                                # Function "scan" of module "actions" no longer exists.
+                                # If you choose to add this download functionality and
+                                # the look into function "check_updates" of module "task".
+                                # await action.scan(self, jid_bare, db_file, url)
+                                # if jid_bare not in self.settings:
+                                #     Config.add_settings_jid(self.settings, jid_bare,
+                                #                             db_file)
+                                # old = Config.get_setting_value(self.settings, jid_bare,
+                                #                                'old')
+                                # if old:
+                                #     # task.clean_tasks_xmpp_chat(self, jid_bare, ['status'])
+                                #     # await send_status(jid)
+                                #     key_list = ['status']
+                                #     await task.start_tasks_xmpp_chat(self, jid_bare, key_list)
+                                # else:
+                                #     feed_id = sqlite.get_feed_id(db_file, url)
+                                #     feed_id = feed_id[0]
+                                #     await sqlite.mark_feed_as_read(db_file, feed_id)
+
                            response = ('> {}\n'
                                        'News source has been '
                                        'added to subscription list.'
@ -760,7 +756,7 @@ class Chat:
                                # self.pending_tasks[jid_bare][self.pending_tasks_counter] = status_message
                                XmppPresence.send(self, jid_bare, status_message,
                                                  status_type=status_type)
-                                if url.startswith('feed:'):
+                                if url.startswith('feed:/') or url.startswith('itpc:/') or url.startswith('rss:/'):
                                    url = uri.feed_to_http(url)
                                url = (await uri.replace_hostname(url, 'feed')) or url
                                result = await action.add_feed(self, jid_bare,
@ -768,7 +764,7 @@ class Chat:
                                                               identifier)
                                if isinstance(result, list):
                                    results = result
-                                    response = ("Web feeds found for {}\n\n```\n"
+                                    response = ("Syndication feeds found for {}\n\n```\n"
                                                .format(url))
                                    for result in results:
                                        response += ("Title : {}\n"
@ -826,7 +822,8 @@ class Chat:
                                    'Type: publishing to node.')
                    XmppMessage.send_reply(self, message, response)
                case _ if (message_lowercase.startswith('http') or
-                           message_lowercase.startswith('feed:')):
+                           message_lowercase.startswith('feed:/') or
+                           message_lowercase.startswith('rss:/')):
                    url = message_text
                    # task.clean_tasks_xmpp_chat(self, jid_bare, ['status'])
                    status_type = 'dnd'
@ -839,7 +836,7 @@ class Chat:
                    # self.pending_tasks[jid_bare][self.pending_tasks_counter] = status_message
                    XmppPresence.send(self, jid_bare, status_message,
                                      status_type=status_type)
-                    if url.startswith('feed:'):
+                    if url.startswith('feed:/') or url.startswith('rss:/'):
                        url = uri.feed_to_http(url)
                    url = (await uri.replace_hostname(url, 'feed')) or url
                    db_file = config.get_pathname_to_database(jid_file)
@ -858,7 +855,7 @@ class Chat:
                                                   identifier)
                    if isinstance(result, list):
                        results = result
-                        response = ("Web feeds found for {}\n\n```\n"
+                        response = ("Syndication feeds found for {}\n\n```\n"
                                    .format(url))
                        for result in results:
                            response += ("Title : {}\n"
@ -1179,7 +1176,7 @@ class Chat:
                        self.pending_tasks[jid_bare][pending_tasks_num] = status_message
                        XmppPresence.send(self, jid_bare, status_message,
                                          status_type=status_type)
-                        if url.startswith('feed:'):
+                        if url.startswith('feed:/') or url.startswith('rss:/'):
                            url = uri.feed_to_http(url)
                        url = (await uri.replace_hostname(url, 'feed')) or url
                        match len(data):
@ -1199,7 +1196,7 @@ class Chat:
                                                result = await crawl.probe_page(url, document)
                                                if isinstance(result, list):
                                                    results = result
-                                                    response = ("Web feeds found for {}\n\n```\n"
+                                                    response = ("Syndication feeds found for {}\n\n```\n"
                                                                .format(url))
                                                    for result in results:
                                                        response += ("Title : {}\n"
@ -1236,7 +1233,7 @@ class Chat:
                                                result = await crawl.probe_page(url, document)
                                                if isinstance(result, list):
                                                    results = result
-                                                    response = ("Web feeds found for {}\n\n```\n"
+                                                    response = ("Syndication feeds found for {}\n\n```\n"
                                                                .format(url))
                                                    for result in results:
                                                        response += ("Title : {}\n"