Improve connectivity recovery

2023-12-24 18:37:05 +00:00 · 2023-12-24 18:37:05 +00:00 · 56d0da9a76
commit 56d0da9a76
parent 0566589a9d
5 changed files with 97 additions and 62 deletions
--- a/slixfeed/main.py
+++ b/slixfeed/main.py
@ -13,42 +13,44 @@ FIXME
 TODO
-0) from slixfeed.FILENAME import XYZ
+1) from slixfeed.FILENAME import XYZ
-      See project feed2toot
+   See project /chaica/feed2toot
-1) SQL prepared statements.
+2) SQL prepared statements;
-2) Machine Learning for scrapping Title, Link, Summary and Timstamp.
+3) Machine Learning for scrapping Title, Link, Summary and Timstamp;
   Scrape element </article> (example: Liferea)
   http://intertwingly.net/blog/
   https://www.brandenburg.de/
-3) Set MUC subject
+4) Set MUC subject
   Feeds which entries are to be set as groupchat subject.
   Perhaps not, as it would require to check every feed for this setting.
-   Maybe a separate bot.
+   Maybe a separate bot;
-4) Support categories.
+5) Support categories;
-5) Default prepackaged list of feeds.
+6) XMPP commands;
-6) XMPP commands.
+7) Bot as transport;
-7) Bot as transport.
+8) OMEMO;
-8) OMEMO.
+9) Logging;
   https://docs.python.org/3/howto/logging.html
-9) Logging.
+10) Readability
    See project /buriy/python-readability
-10) Default feeds (e.g. Blacklisted News, TBOT etc.)
+11) Download and upload/send article (xHTML, HTMLZ, Markdown, MHTML, TXT).
 11) Download and upload/send article (xHTML, xHTMLZ, Markdown, MHTML, TXT).
    Use Readability.
 12) Fetch summary from URL, instead of storing summary, or
    Store 5 upcoming summaries.
    This would help making the database files smaller.
 13) Support protocol Gopher
-    https://github.com/michael-lazar/pygopherd
+    See project /michael-lazar/pygopherd
-    https://github.com/gopherball/gb
+    See project /gopherball/gb
 14) Support ActivityPub @person@domain (see Tip Of The Day).
@ -60,7 +62,11 @@ TODO
 16) Brand: News Broker, Newsman, Newsdealer, Laura Harbinger
-17) See project offpunk/offblocklist.py
+17) See project /offpunk/offblocklist.py
 18) Search messages of government regulated publishers, and promote other sources.
    Dear reader, we couldn't get news from XYZ as they don't provide RSS feeds.
    However, you might want to get news from (1) (2) and (3) instead!
 """
--- a/slixfeed/datahandler.py
+++ b/slixfeed/datahandler.py
@ -19,14 +19,14 @@ from asyncio import TimeoutError
 from asyncio.exceptions import IncompleteReadError
 from bs4 import BeautifulSoup
 from confighandler import get_list, get_value_default
 from datetimehandler import now, rfc2822_to_iso8601
 from email.utils import parseaddr
 from feedparser import parse
 from http.client import IncompleteRead
 from lxml import html
 from datetimehandler import now, rfc2822_to_iso8601
 from urlhandler import complete_url, join_url, trim_url
 from listhandler import is_listed
 from lxml import html
 import sqlitehandler as sqlite
 from urlhandler import complete_url, join_url, trim_url
 from urllib import error
 # from xml.etree.ElementTree import ElementTree, ParseError
 from urllib.parse import urljoin, urlsplit, urlunsplit
@ -202,8 +202,8 @@ async def download_updates(db_file, url=None):
                        print("PROBLEM: date is int")
                        print(date)
                        # breakpoint()
-                    print(source)
+                    # print(source)
-                    print(date)
+                    # print(date)
                    await sqlite.add_entry_and_set_date(
                        db_file,
                        source,
@ -261,9 +261,9 @@ async def view_feed(url):
        title = get_title(url, feed)
        entries = feed.entries
        msg = "Preview of {}:\n```\n".format(title)
-        count = 0
+        counter = 0
        for entry in entries:
-            count += 1
+            counter += 1
            if entry.has_key("title"):
                title = entry.title
            else:
@ -292,9 +292,9 @@ async def view_feed(url):
                    title,
                    date,
                    link,
-                    count
+                    counter
                    )
-            if count > 4:
+            if counter > 4:
                break
        msg += (
            "```\nSource: {}"
@ -446,7 +446,7 @@ async def add_feed(db_file, url):
            title = get_title(url, feed)
            if feed.bozo:
                bozo = (
-                    "Bozo detected. Failed to load: {}."
+                    "Bozo detected. Failed to load: {}"
                    ).format(url)
                print(bozo)
                msg = await probe_page(add_feed, url, res[0], db_file=db_file)
@ -505,7 +505,7 @@ async def probe_page(callback, url, doc, num=None, db_file=None):
        elif isinstance(msg, list):
            url = msg[0]
            if db_file:
-                print("if db_file", db_file)
+                # print("if db_file", db_file)
                return await callback(db_file, url)
            elif num:
                return await callback(url, num)
@ -531,6 +531,8 @@ async def download_feed(url):
        user_agent = await get_value_default("user-agent", "Network")
    except:
        user_agent = "Slixfeed/0.1"
    if not len(user_agent):
        user_agent = "Slixfeed/0.1"
    timeout = ClientTimeout(total=10)
    headers = {'User-Agent': user_agent}
    async with ClientSession(headers=headers) as session:
@ -597,6 +599,8 @@ def get_title(url, feed):
        title = feed["feed"]["title"]
    except:
        title = urlsplit(url).netloc
    if not title:
        title = urlsplit(url).netloc
    return title
@ -621,7 +625,7 @@ async def feed_mode_request(url, tree):
    """
    feeds = {}
    parted_url = urlsplit(url)
-    paths = await get_list("pathnames")
+    paths = await get_list("pathnames", "lists.yaml")
    for path in paths:
        address = urlunsplit([
            parted_url.scheme,
@ -693,7 +697,7 @@ async def feed_mode_request(url, tree):
            ).format(url)
        if not positive:
            msg = (
-                "No feeds were found for {}."
+                "No feeds were found for {}"
                ).format(url)
        return msg
    elif feeds:
@ -721,17 +725,21 @@ async def feed_mode_scan(url, tree):
    feeds = {}
    # paths = []
    # TODO Test
-    paths = await get_list("pathnames")
+    paths = await get_list("pathnames", "lists.yaml")
    for path in paths:
        # xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
-        xpath_query = "//a[contains(@href,'{}')]".format(path)
+        # xpath_query = "//a[contains(@href,'{}')]".format(path)
        num = 5
        xpath_query = "(//a[contains(@href,'{}')])[position()<={}]".format(path, num)
        addresses = tree.xpath(xpath_query)
        xpath_query = "(//a[contains(@href,'{}')])[position()>last()-{}]".format(path, num)
        addresses += tree.xpath(xpath_query)
        parted_url = urlsplit(url)
        # NOTE Should number of addresses be limited or
        # perhaps be N from the start and N from the end
        for address in addresses:
-            print(address.xpath('@href')[0])
+            # print(address.xpath('@href')[0])
-            print(addresses)
+            # print(addresses)
            address = address.xpath('@href')[0]
            if "/" not in address:
                protocol = parted_url.scheme
@ -759,11 +767,15 @@ async def feed_mode_scan(url, tree):
            if res[1] == 200:
                try:
                    feeds[address] = parse(res[0])
                    # print(feeds[address])
                    # breakpoint()
                    # print(feeds)
                except:
                    continue
    if len(feeds) > 1:
-        positive = 0
+        # print(feeds)
        # breakpoint()
        counter = 0
        msg = (
            "RSS URL scan has found {} feeds:\n```\n"
            ).format(len(feeds))
@ -779,23 +791,32 @@ async def feed_mode_scan(url, tree):
            feed_addr = feed
            feed_amnt = len(feeds[feed].entries)
            if feed_amnt:
-                positive = 1
+                # NOTE Because there could be many false positives
                # which are revealed in second phase of scan, we
                # could end with a single feed, which would be
                # listed instead of fetched, so feed_mark is
                # utilized in order to make fetch possible.
                feed_mark = [feed_addr]
                counter += 1
                msg += (
-                    "Title: {}\n"
+                    "Title : {}\n"
-                    " Link: {}\n"
+                    "Link  : {}\n"
-                    "Count: {}\n"
+                    "Count : {}\n"
                    "\n"
                    ).format(
                        feed_name,
                        feed_addr,
                        feed_amnt
                        )
-        msg += (
+        if counter > 1:
-            "```\nThe above feeds were extracted from\n{}"
+            msg += (
-            ).format(url)
+                "```\nThe above feeds were extracted from\n{}"
-        if not positive:
+                ).format(url)
        elif feed_mark:
            return feed_mark
        else:
            msg = (
-                "No feeds were found for {}."
+                "No feeds were found for {}"
                ).format(url)
        return msg
    elif feeds:
--- a/slixfeed/sqlitehandler.py
+++ b/slixfeed/sqlitehandler.py
@ -471,7 +471,10 @@ async def get_entry_unread(db_file, num=None):
            title = result[1]
            summary = result[2]
            # Remove HTML tags
-            summary = BeautifulSoup(summary, "lxml").text
+            try:
                summary = BeautifulSoup(summary, "lxml").text
            except:
                print(result[2])
            # TODO Limit text length
            summary = summary.replace("\n\n\n", "\n\n")
            length = await get_settings_value(db_file, "length")
--- a/slixfeed/taskhandler.py
+++ b/slixfeed/taskhandler.py
@ -78,7 +78,7 @@ await taskhandler.start_tasks(
 """
 async def start_tasks_xmpp(self, jid, tasks):
-    print("start_tasks_xmpp", jid, tasks)
+    # print("start_tasks_xmpp", jid, tasks)
    task_manager[jid] = {}
    for task in tasks:
        # print("task:", task)
@ -109,7 +109,7 @@ async def start_tasks_xmpp(self, jid, tasks):
    #     await task
 async def clean_tasks_xmpp(jid, tasks):
-    print("clean_tasks_xmpp", jid, tasks)
+    # print("clean_tasks_xmpp", jid, tasks)
    for task in tasks:
        # if task_manager[jid][task]:
        try:
@ -132,7 +132,7 @@ Consider callback e.g. Slixfeed.send_status.
 Or taskhandler for each protocol or specific taskhandler function.
 """
 async def task_jid(self, jid):
-    print("task_jid", jid)
+    # print("task_jid", jid)
    """
    JID (Jabber ID) task manager.
@ -258,7 +258,7 @@ async def send_update(self, jid, num=None):
 async def send_status(self, jid):
-    print("send_status", jid)
+    # print("send_status", jid)
    # print(await current_time(), jid, "def send_status")
    """
    Send status message.
@ -336,7 +336,7 @@ async def send_status(self, jid):
 async def refresh_task(self, jid, callback, key, val=None):
-    print("refresh_task", jid, key)
+    # print("refresh_task", jid, key)
    """
    Apply new setting at runtime.
@ -382,7 +382,7 @@ async def refresh_task(self, jid, callback, key, val=None):
 # TODO Take this function out of
 # <class 'slixmpp.clientxmpp.ClientXMPP'>
 async def check_updates(jid):
-    print("check_updates", jid)
+    # print("check_updates", jid)
    # print(await current_time(), jid, "def check_updates")
    """
    Start calling for update check up.
--- a/slixfeed/xmpphandler.py
+++ b/slixfeed/xmpphandler.py
@ -64,6 +64,7 @@ import listhandler as lister
 import sqlitehandler as sqlite
 import taskhandler as tasker
 import urlhandler as urlfixer
 from time import sleep
 from slixmpp.plugins.xep_0363.http_upload import FileTooBig, HTTPError, UploadServiceNotFound
 # from slixmpp.plugins.xep_0402 import BookmarkStorage, Conference
@ -102,7 +103,6 @@ class Slixfeed(slixmpp.ClientXMPP):
        # The bot works fine when the nickname is hardcoded; or
        # The bot won't join some MUCs when its nickname has brackets
        self.nick = nick
        # The session_start event will be triggered when
        # the bot establishes its connection with the server
        # and the XML streams are ready for use. We want to
@ -387,10 +387,12 @@ class Slixfeed(slixmpp.ClientXMPP):
        #     print(current_time(),"Maximum connection attempts exceeded.")
        #     logging.error("Maximum connection attempts exceeded.")
        print(current_time(), "Attempt number", self.connection_attempts)
-        self.reconnect(wait=5.0)
+        seconds = 30
        seconds = 5
        print(current_time(), "Next attempt within", seconds, "seconds")
-        await asyncio.sleep(seconds)
+        # NOTE asyncio.sleep doesn't interval as expected
        # await asyncio.sleep(seconds)
        sleep(seconds)
        self.reconnect(wait=5.0)
    async def inspect_connection(self, event):
@ -912,7 +914,7 @@ class Slixfeed(slixmpp.ClientXMPP):
                        ["status"]
                        )
                    task = (
-                        "📫️ Processing request to fetch data from {} ..."
+                        "📫️ Processing request to fetch data from {}"
                        ).format(url)
                    process_task_message(self, jid, task)
                    action = await initdb(
@ -1080,8 +1082,9 @@ class Slixfeed(slixmpp.ClientXMPP):
                    action = (
                        "Only new items of newly added feeds will be sent."
                        )
-                case _ if message_lowercase.startswith("next"):
+                # TODO Will you add support for number of messages?
-                    num = message[5:]
+                case "next":
                    # num = message[5:]
                    await tasker.clean_tasks_xmpp(
                        jid,
                        ["interval", "status"]
@ -1137,13 +1140,15 @@ class Slixfeed(slixmpp.ClientXMPP):
                    else:
                        action = "Missing value."
                case "random":
-                    action = "Updates will be sent randomly."
+                    # TODO /questions/2279706/select-random-row-from-a-sqlite-table
                    # NOTE sqlitehandler.get_entry_unread
                    action = "Updates will be sent by random order."
                case _ if message_lowercase.startswith("read"):
                    data = message[5:]
                    data = data.split()
                    url = data[0]
                    task = (
-                        "📫️ Processing request to fetch data from {} ..."
+                        "📫️ Processing request to fetch data from {}"
                        ).format(url)
                    process_task_message(self, jid, task)
                    await tasker.clean_tasks_xmpp(