From 031eb6ce534d52e41d5361c8934a62266d1a591d Mon Sep 17 00:00:00 2001 From: Schimon Jehudah Date: Mon, 13 Nov 2023 13:45:10 +0000 Subject: [PATCH] Update 8 files - /slixfeed/sqlitehandler.py - /slixfeed/xmpphandler.py - /slixfeed/opmlhandler.py - /slixfeed/datahandler.py - /slixfeed/datetimehandler.py - /slixfeed/__main__.py - /slixfeed/confighandler.py - /slixfeed/filterhandler.py --- slixfeed/__main__.py | 43 +- slixfeed/confighandler.py | 104 ++- slixfeed/datahandler.py | 851 ++++++++++++++++++++----- slixfeed/datetimehandler.py | 81 +++ slixfeed/filterhandler.py | 105 +++ slixfeed/opmlhandler.py | 56 ++ slixfeed/sqlitehandler.py | 1201 ++++++++++++++++++++++++++--------- slixfeed/xmpphandler.py | 845 ++++++++++++++++-------- 8 files changed, 2535 insertions(+), 751 deletions(-) create mode 100644 slixfeed/datetimehandler.py create mode 100644 slixfeed/filterhandler.py create mode 100644 slixfeed/opmlhandler.py diff --git a/slixfeed/__main__.py b/slixfeed/__main__.py index 0fb8831..ee9c5af 100644 --- a/slixfeed/__main__.py +++ b/slixfeed/__main__.py @@ -1,13 +1,42 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# TODO -# -# 0) sql prepared statements -# 1) Autodetect feed: -# if page is not feed (or HTML) and contains -# 2) OPML import/export -# 3) 2022-12-30 reduce async to (maybe) prevent inner lock. async on task: commands, downloader, updater +""" + +FIXME + +1) Check feed duplication on runtime. + When feed is valid and is not yet in the database it is + posible to send a batch which would result in duplication. + Consequently, it might result in database lock error upon + feed removal attempt + +TODO + +1) SQL prepared statements + +2) Machine Learning for scrapping Title, Link, Summary and Timstamp + +3) Support MUC + +4) Support categories + +5) Default prepackaged list of feeds + +6) XMPP commands + +7) Bot as transport + +8) OMEMO + +9) Logging + +10) Default feeds (e.g. Blacklisted News, TBOT etc.) + +11) Download and upload/send article (xHTML, xHTMLZ, Markdown, MHTML, TXT) + Use Readability + +""" # vars and their meanings: # jid = Jabber ID (XMPP) diff --git a/slixfeed/confighandler.py b/slixfeed/confighandler.py index ba2fbaf..638893a 100644 --- a/slixfeed/confighandler.py +++ b/slixfeed/confighandler.py @@ -1,6 +1,15 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +""" + +TODO + +1) Use file settings.csv and pathnames.txt instead: + See get_value_default and get_default_list + +""" + import os import sys @@ -8,16 +17,21 @@ def get_default_dbdir(): """ Determine the directory path where dbfile will be stored. - If $XDG_DATA_HOME is defined, use it - else if $HOME exists, use it - else if the platform is Windows, use %APPDATA% - else use the current directory. + * If $XDG_DATA_HOME is defined, use it; + * else if $HOME exists, use it; + * else if the platform is Windows, use %APPDATA%; + * else use the current directory. - :return: Path to database file. + Returns + ------- + str + Path to database file. Note ---- - This code was taken from the buku project. + This function was taken from project buku. + + See https://github.com/jarun/buku * Arun Prakash Jana (jarun) * Dmitry Marakasov (AMDmi3) @@ -41,12 +55,15 @@ def get_default_confdir(): """ Determine the directory path where configuration will be stored. - If $XDG_CONFIG_HOME is defined, use it - else if $HOME exists, use it - else if the platform is Windows, use %APPDATA% - else use the current directory. + * If $XDG_CONFIG_HOME is defined, use it; + * else if $HOME exists, use it; + * else if the platform is Windows, use %APPDATA%; + * else use the current directory. - :return: Path to configueation directory. + Returns + ------- + str + Path to configueation directory. """ # config_home = xdg.BaseDirectory.xdg_config_home config_home = os.environ.get('XDG_CONFIG_HOME') @@ -67,24 +84,69 @@ async def get_value_default(key): """ Get settings default value. - :param key: "enabled", "interval", "quantum". - :return: Integer. + Parameters + ---------- + key : str + Key: enabled, filter-allow, filter-deny, + interval, quantum, random. + + Returns + ------- + result : int or str + Value. """ - if key == "enabled": - result = 1 - elif key == "quantum": - result = 4 - elif key == "interval": - result = 30 + match key: + case "enabled": + result = 1 + case "filter-allow": + result = "hitler,sadam,saddam" + case "filter-deny": + result = "crim,dead,death,disaster,holocaust,murder,war" + case "interval": + result = 30 + case "quantum": + result = 4 + case "random": + result = 0 return result +def get_list(): + """ + Get dictionary file. + + Returns + ------- + paths : list + Dictionary of pathnames. + """ + paths = [] + cfg_dir = get_default_confdir() + if not os.path.isdir(cfg_dir): + os.mkdir(cfg_dir) + cfg_file = os.path.join(cfg_dir, r"url_paths.txt") + if not os.path.isfile(cfg_file): + # confighandler.generate_dictionary() + list = get_default_list() + file = open(cfg_file, "w") + file.writelines("\n".join(list)) + file.close() + file = open(cfg_file, "r") + lines = file.readlines() + for line in lines: + paths.extend([line.strip()]) + return paths + + # async def generate_dictionary(): def get_default_list(): """ Generate a dictionary file. - :return: List. + Returns + ------- + paths : list + Dictionary of pathnames. """ paths = [ ".atom", @@ -139,6 +201,8 @@ def get_default_list(): # "/rss.json", "/rss.php", "/rss.xml", + "/syndication.php?type=atom1.0", #mybb + "/syndication.php?type=rss2.0", "/timeline.rss", "/videos.atom", # "/videos.json", diff --git a/slixfeed/datahandler.py b/slixfeed/datahandler.py index b0ad499..0e763da 100644 --- a/slixfeed/datahandler.py +++ b/slixfeed/datahandler.py @@ -1,29 +1,75 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import feedparser import aiohttp import asyncio +import feedparser import os + import sqlitehandler import confighandler +import datetimehandler +import filterhandler -from http.client import IncompleteRead from asyncio.exceptions import IncompleteReadError +from http.client import IncompleteRead from urllib import error from bs4 import BeautifulSoup # from xml.etree.ElementTree import ElementTree, ParseError -from urllib.parse import urlparse +from urllib.parse import urljoin +from urllib.parse import urlsplit +from urllib.parse import urlunsplit from lxml import html -async def download_updates(db_file): + +# NOTE Perhaps this needs to be executed +# just once per program execution +async def initdb(jid, callback, message=None): + """ + Callback function to instantiate action on database. + + Parameters + ---------- + jid : str + Jabber ID. + callback : ? + Function name. + message : str, optional + Optional kwarg when a message is a part or + required argument. The default is None. + + Returns + ------- + object + Coroutine object. + """ + db_dir = confighandler.get_default_dbdir() + if not os.path.isdir(db_dir): + os.mkdir(db_dir) + db_file = os.path.join(db_dir, r"{}.db".format(jid)) + sqlitehandler.create_tables(db_file) + # await sqlitehandler.set_default_values(db_file) + if message: + return await callback(db_file, message) + else: + return await callback(db_file) + + +async def download_updates(db_file, url=None): """ Check feeds for new entries. - :param db_file: Database filename. + Parameters + ---------- + db_file : str + Path to database file. + url : str, optional + URL. The default is None. """ - urls = await sqlitehandler.get_subscriptions(db_file) - + if url: + urls = [url] # Valid [url] and [url,] and (url,) + else: + urls = await sqlitehandler.get_feeds_url(db_file) for url in urls: # print(os.path.basename(db_file), url[0]) source = url[0] @@ -34,31 +80,42 @@ async def download_updates(db_file): # urls.next() # next(urls) continue - - await sqlitehandler.update_source_status(db_file, res[1], source) - + await sqlitehandler.update_source_status( + db_file, + res[1], + source + ) if res[0]: try: feed = feedparser.parse(res[0]) if feed.bozo: - # bozo = ("WARNING: Bozo detected for feed <{}>. " - # "For more information, visit " - # "https://pythonhosted.org/feedparser/bozo.html" - # .format(source)) - # print(bozo) + bozo = ( + "WARNING: Bozo detected for feed: {}\n" + "For more information, visit " + "https://pythonhosted.org/feedparser/bozo.html" + ).format(source) + print(bozo) valid = 0 else: valid = 1 - await sqlitehandler.update_source_validity(db_file, source, valid) - except (IncompleteReadError, IncompleteRead, error.URLError) as e: - print(e) + await sqlitehandler.update_source_validity( + db_file, + source, + valid) + except ( + IncompleteReadError, + IncompleteRead, + error.URLError + ) as e: + # print(e) + # TODO Print error to log + None # NOTE I don't think there should be "return" # because then we might stop scanning next URLs # return # TODO Place these couple of lines back down # NOTE Need to correct the SQL statement to do so # NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW - if res[1] == 200: # NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW # TODO Place these couple of lines back down @@ -66,25 +123,60 @@ async def download_updates(db_file): entries = feed.entries # length = len(entries) # await sqlitehandler.remove_entry(db_file, source, length) - await sqlitehandler.remove_nonexistent_entries(db_file, feed, source) - - new_entry = 0 + await sqlitehandler.remove_nonexistent_entries( + db_file, + feed, + source + ) + # new_entry = 0 for entry in entries: - + if entry.has_key("id"): + eid = entry.id if entry.has_key("title"): title = entry.title else: title = feed["feed"]["title"] - if entry.has_key("link"): - link = entry.link + # link = complete_url(source, entry.link) + link = await join_url(source, entry.link) + link = await trim_url(link) else: link = source - - exist = await sqlitehandler.check_entry_exist(db_file, title, link) - + # TODO Pass date too for comparion check + if entry.has_key("published"): + date = entry.published + date = await datetimehandler.rfc2822_to_iso8601(date) + else: + date = None + exist = await sqlitehandler.check_entry_exist( + db_file, + source, + eid=eid, + title=title, + link=link, + date=date + ) if not exist: - new_entry = new_entry + 1 + # new_entry = new_entry + 1 + if entry.has_key("published"): + date = entry.published + date = await datetimehandler.rfc2822_to_iso8601(date) + # try: + # date = datetime.strptime(date, "%a, %d %b %Y %H:%M:%S %z") + # except: + # date = datetime.strptime(date, '%a, %d %b %Y %H:%M:%S %Z') + # finally: + # date = date.isoformat() + # if parsedate(date): # Is RFC 2822 format + # date = parsedate_to_datetime(date) # Process timestamp + # date = date.isoformat() # Convert to ISO 8601 + else: + # TODO Just set date = "*** No date ***" + # date = datetime.now().isoformat() + date = await datetimehandler.now() + # NOTE Would seconds result in better database performance + # date = datetime.datetime(date) + # date = (date-datetime.datetime(1970,1,1)).total_seconds() # TODO Enhance summary if entry.has_key("summary"): summary = entry.summary @@ -93,164 +185,156 @@ async def download_updates(db_file): # TODO Limit text length summary = summary.replace("\n\n", "\n")[:300] + " ‍⃨" else: - summary = '*** No summary ***' - entry = (title, summary, link, source, 0); - await sqlitehandler.add_entry_and_set_date(db_file, source, entry) + summary = "*** No summary ***" + read_status = 0 + pathname = urlsplit(link).path + string = ( + "{} {} {}" + ).format( + title, + summary, + pathname + ) + allow_list = await filterhandler.is_listed( + db_file, + "allow", + string + ) + if not allow_list: + reject_list = await filterhandler.is_listed( + db_file, + "deny", + string + ) + if reject_list: + print(">>> REJECTED", title) + summary = "REJECTED" + # summary = "" + read_status = 1 + entry = ( + title, + summary, + link, + eid, + source, + date, + read_status + ) + await sqlitehandler.add_entry_and_set_date( + db_file, + source, + entry + ) + # print(await datetimehandler.current_time(), entry, title) + # else: + # print(await datetimehandler.current_time(), exist, title) + + +async def add_feed_no_check(db_file, data): + """ + Add given feed without validity check. + + Parameters + ---------- + db_file : str + Path to database file. + data : str + URL or URL and Title. + + Returns + ------- + msg : str + Status message. + """ + url = data[0] + title = data[1] + url = await trim_url(url) + exist = await sqlitehandler.check_feed_exist(db_file, url) + if not exist: + msg = await sqlitehandler.add_feed(db_file, url, title) + await download_updates(db_file, [url]) + else: + ix = exist[0] + name = exist[1] + msg = ( + "> {}\nNews source \"{}\" is already " + "listed in the subscription list at " + "index {}".format(url, name, ix) + ) + return msg async def add_feed(db_file, url): """ Check whether feed exist, otherwise process it. - :param db_file: Database filename. - :param url: URL. - :return: Status message. + Parameters + ---------- + db_file : str + Path to database file. + url : str + URL. + + Returns + ------- + msg : str + Status message. """ + msg = None + url = await trim_url(url) exist = await sqlitehandler.check_feed_exist(db_file, url) - if not exist: res = await download_feed(url) if res[0]: feed = feedparser.parse(res[0]) title = await get_title(url, feed) if feed.bozo: - bozo = ("WARNING: Bozo detected. Failed to load <{}>.".format(url)) + bozo = ( + "Bozo detected. Failed to load: {}." + ).format(url) print(bozo) try: # tree = etree.fromstring(res[0]) # etree is for xml tree = html.fromstring(res[0]) except: - return "Failed to parse URL <{}> as feed".format(url) - - print("RSS Auto-Discovery Engaged") - xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]""" - # xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href""" - # xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href" - feeds = tree.xpath(xpath_query) - if len(feeds) > 1: - msg = "RSS Auto-Discovery has found {} feeds:\n\n".format(len(feeds)) - for feed in feeds: - # # The following code works; - # # The following code will catch - # # only valid resources (i.e. not 404); - # # The following code requires more bandwidth. - # res = await download_feed(feed) - # if res[0]: - # disco = feedparser.parse(res[0]) - # title = disco["feed"]["title"] - # msg += "{} \n {} \n\n".format(title, feed) - feed_name = feed.xpath('@title')[0] - feed_addr = feed.xpath('@href')[0] - msg += "{}\n{}\n\n".format(feed_name, feed_addr) - msg += "The above feeds were extracted from\n{}".format(url) - return msg - elif feeds: - url = feeds[0].xpath('@href')[0] - # Why wouldn't add_feed return a message - # upon success unless return is explicitly - # mentioned, yet upon failure it wouldn't? - return await add_feed(db_file, url) - - print("RSS Scan Mode Engaged") - feeds = {} - paths = [] - # TODO Test - cfg_dir = confighandler.get_default_confdir() - if not os.path.isdir(cfg_dir): - os.mkdir(cfg_dir) - cfg_file = os.path.join(cfg_dir, r"url_paths.txt") - if not os.path.isfile(cfg_file): - # confighandler.generate_dictionary() - list = confighandler.get_default_list() - file = open(cfg_file, "w") - file.writelines("\n".join(list)) - file.close() - file = open(cfg_file, "r") - lines = file.readlines() - for line in lines: - paths.extend([line.strip()]) - for path in paths: - # xpath_query = "//*[@*[contains(.,'{}')]]".format(path) - xpath_query = "//a[contains(@href,'{}')]".format(path) - addresses = tree.xpath(xpath_query) - parted_url = urlparse(url) - # NOTE Should number of addresses be limited or - # perhaps be N from the start and N from the end - for address in addresses: - address = address.xpath('@href')[0] - if address.startswith('/'): - address = parted_url.scheme + '://' + parted_url.netloc + address - res = await download_feed(address) - if res[1] == 200: - try: - feeds[address] = feedparser.parse(res[0])["feed"]["title"] - except: - continue - if len(feeds) > 1: - msg = "RSS URL scan has found {} feeds:\n\n".format(len(feeds)) - for feed in feeds: - # try: - # res = await download_feed(feed) - # except: - # continue - feed_name = feeds[feed] - feed_addr = feed - msg += "{}\n{}\n\n".format(feed_name, feed_addr) - msg += "The above feeds were extracted from\n{}".format(url) - return msg - elif feeds: - url = list(feeds)[0] - return await add_feed(db_file, url) - - # (HTTP) Request(s) Paths - print("RSS Arbitrary Mode Engaged") - feeds = {} - parted_url = urlparse(url) - for path in paths: - address = parted_url.scheme + '://' + parted_url.netloc + path - res = await download_feed(address) - if res[1] == 200: - # print(feedparser.parse(res[0])["feed"]["title"]) - # feeds[address] = feedparser.parse(res[0])["feed"]["title"] - try: - title = feedparser.parse(res[0])["feed"]["title"] - except: - title = '*** No Title ***' - feeds[address] = title - - # Check whether URL has path (i.e. not root) - if parted_url.path.split('/')[1]: - paths.extend([".atom", ".feed", ".rdf", ".rss"]) if '.rss' not in paths else -1 - # if paths.index('.rss'): - # paths.extend([".atom", ".feed", ".rdf", ".rss"]) - address = parted_url.scheme + '://' + parted_url.netloc + '/' + parted_url.path.split('/')[1] + path - res = await download_feed(address) - if res[1] == 200: - try: - title = feedparser.parse(res[0])["feed"]["title"] - except: - title = '*** No Title ***' - feeds[address] = title - if len(feeds) > 1: - msg = "RSS URL discovery has found {} feeds:\n\n".format(len(feeds)) - for feed in feeds: - feed_name = feeds[feed] - feed_addr = feed - msg += "{}\n{}\n\n".format(feed_name, feed_addr) - msg += "The above feeds were extracted from\n{}".format(url) - elif feeds: - url = list(feeds)[0] - msg = await add_feed(db_file, url) - else: - msg = "No news feeds were found for URL <{}>.".format(url) + msg = ( + "> {}\nFailed to parse URL as feed." + ).format(url) + if not msg: + print("RSS Auto-Discovery Engaged") + msg = await feed_mode_auto_discovery(db_file, url, tree) + if not msg: + print("RSS Scan Mode Engaged") + msg = await feed_mode_scan(db_file, url, tree) + if not msg: + print("RSS Arbitrary Mode Engaged") + msg = await feed_mode_request(db_file, url, tree) + if not msg: + msg = ( + "> {}\nNo news feeds were found for URL." + ).format(url) else: - msg = await sqlitehandler.add_feed(db_file, title, url, res) + status = res[1] + msg = await sqlitehandler.add_feed( + db_file, + url, + title, + status + ) + await download_updates(db_file, [url]) else: - msg = "Failed to get URL <{}>. Reason: {}".format(url, res[1]) + status = res[1] + msg = ( + "> {}\nFailed to get URL. Reason: {}" + ).format(url, status) else: ix = exist[0] name = exist[1] - msg = "> {}\nNews source \"{}\" is already listed in the subscription list at index {}".format(url, name, ix) + msg = ( + "> {}\nNews source \"{}\" is already " + "listed in the subscription list at " + "index {}".format(url, name, ix) + ) return msg @@ -258,8 +342,15 @@ async def download_feed(url): """ Download content of given URL. - :param url: URL. - :return: Document or error message. + Parameters + ---------- + url : str + URL. + + Returns + ------- + msg: list or str + Document or error message. """ timeout = aiohttp.ClientTimeout(total=10) async with aiohttp.ClientSession() as session: @@ -271,30 +362,438 @@ async def download_feed(url): try: doc = await response.text() # print (response.content_type) - return [doc, status] + msg = [ + doc, + status + ] except: - # return [False, "The content of this document doesn't appear to be textual."] - return [False, "Document is too large or is not textual."] + # msg = [ + # False, + # ("The content of this document " + # "doesn't appear to be textual." + # ) + # ] + msg = [ + False, + "Document is too large or is not textual." + ] else: - return [False, "HTTP Error: " + str(status)] + msg = [ + False, + "HTTP Error: " + str(status) + ] except aiohttp.ClientError as e: - print('Error', str(e)) - return [False, "Error: " + str(e)] + # print('Error', str(e)) + msg = [ + False, + "Error: " + str(e) + ] except asyncio.TimeoutError as e: # print('Timeout:', str(e)) - return [False, "Timeout: " + str(e)] + msg = [ + False, + "Timeout: " + str(e) + ] + return msg async def get_title(url, feed): """ Get title of feed. - :param url: URL - :param feed: Parsed feed - :return: Title or URL hostname. + Parameters + ---------- + url : str + URL. + feed : dict + Parsed feed document. + + Returns + ------- + title : str + Title or URL hostname. """ try: title = feed["feed"]["title"] except: - title = urlparse(url).netloc + title = urlsplit(url).netloc return title + + +# NOTE Read the documentation +# https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin +def complete_url(source, link): + """ + Check if URL is pathname and complete it into URL. + + Parameters + ---------- + source : str + Feed URL. + link : str + Link URL or pathname. + + Returns + ------- + str + URL. + """ + if link.startswith("www."): + return "http://" + link + parted_link = urlsplit(link) + parted_feed = urlsplit(source) + if parted_link.scheme == "magnet" and parted_link.query: + return link + if parted_link.scheme and parted_link.netloc: + return link + if link.startswith("//"): + if parted_link.netloc and parted_link.path: + new_link = urlunsplit([ + parted_feed.scheme, + parted_link.netloc, + parted_link.path, + parted_link.query, + parted_link.fragment + ]) + elif link.startswith("/"): + new_link = urlunsplit([ + parted_feed.scheme, + parted_feed.netloc, + parted_link.path, + parted_link.query, + parted_link.fragment + ]) + elif link.startswith("../"): + pathlink = parted_link.path.split("/") + pathfeed = parted_feed.path.split("/") + for i in pathlink: + if i == "..": + if pathlink.index("..") == 0: + pathfeed.pop() + else: + break + while pathlink.count(".."): + if pathlink.index("..") == 0: + pathlink.remove("..") + else: + break + pathlink = "/".join(pathlink) + pathfeed.extend([pathlink]) + new_link = urlunsplit([ + parted_feed.scheme, + parted_feed.netloc, + "/".join(pathfeed), + parted_link.query, + parted_link.fragment + ]) + else: + pathlink = parted_link.path.split("/") + pathfeed = parted_feed.path.split("/") + if link.startswith("./"): + pathlink.remove(".") + if not source.endswith("/"): + pathfeed.pop() + pathlink = "/".join(pathlink) + pathfeed.extend([pathlink]) + new_link = urlunsplit([ + parted_feed.scheme, + parted_feed.netloc, + "/".join(pathfeed), + parted_link.query, + parted_link.fragment + ]) + return new_link + + +""" +TODO +Feed https://www.ocaml.org/feed.xml +Link %20https://frama-c.com/fc-versions/cobalt.html%20 + +FIXME +Feed https://cyber.dabamos.de/blog/feed.rss +Link https://cyber.dabamos.de/blog/#article-2022-07-15 +""" +async def join_url(source, link): + """ + Join base URL with given pathname. + + Parameters + ---------- + source : str + Feed URL. + link : str + Link URL or pathname. + + Returns + ------- + str + URL. + """ + if link.startswith("www."): + new_link = "http://" + link + elif link.startswith("%20") and link.endswith("%20"): + old_link = link.split("%20") + del old_link[0] + old_link.pop() + new_link = "".join(old_link) + else: + new_link = urljoin(source, link) + return new_link + + +async def trim_url(url): + """ + Check URL pathname for double slash. + + Parameters + ---------- + url : str + URL. + + Returns + ------- + url : str + URL. + """ + parted_url = urlsplit(url) + protocol = parted_url.scheme + hostname = parted_url.netloc + pathname = parted_url.path + queries = parted_url.query + fragment = parted_url.fragment + while "//" in pathname: + pathname = pathname.replace("//", "/") + url = urlunsplit([ + protocol, + hostname, + pathname, + queries, + fragment + ]) + return url + + +# TODO Improve scan by gradual decreasing of path +async def feed_mode_request(db_file, url, tree): + """ + Lookup for feeds by pathname using HTTP Requests. + + Parameters + ---------- + db_file : str + Path to database file. + url : str + URL. + tree : TYPE + DESCRIPTION. + + Returns + ------- + msg : str + Message with URLs. + """ + feeds = {} + parted_url = urlsplit(url) + paths = confighandler.get_list() + for path in paths: + address = urlunsplit([ + parted_url.scheme, + parted_url.netloc, + path, + None, + None + ]) + res = await download_feed(address) + if res[1] == 200: + # print(feedparser.parse(res[0])["feed"]["title"]) + # feeds[address] = feedparser.parse(res[0])["feed"]["title"] + try: + title = feedparser.parse(res[0])["feed"]["title"] + except: + title = '*** No Title ***' + feeds[address] = title + # Check whether URL has path (i.e. not root) + if parted_url.path.split('/')[1]: + paths.extend( + [".atom", ".feed", ".rdf", ".rss"] + ) if '.rss' not in paths else -1 + # if paths.index('.rss'): + # paths.extend([".atom", ".feed", ".rdf", ".rss"]) + address = urlunsplit([ + parted_url.scheme, + parted_url.netloc, + parted_url.path.split('/')[1] + path, + None, + None + ]) + res = await download_feed(address) + if res[1] == 200: + try: + title = feedparser.parse(res[0])["feed"]["title"] + except: + title = '*** No Title ***' + feeds[address] = title + if len(feeds) > 1: + msg = ( + "RSS URL discovery has found {} feeds:\n```\n" + ).format(len(feeds)) + for feed in feeds: + feed_name = feeds[feed] + feed_addr = feed + msg += "{}\n{}\n\n".format(feed_name, feed_addr) + msg += ( + "```\nThe above feeds were extracted from\n{}" + ).format(url) + elif feeds: + feed_addr = list(feeds)[0] + msg = await add_feed(db_file, feed_addr) + return msg + + +async def feed_mode_scan(db_file, url, tree): + """ + Scan page for potential feeds by pathname. + + Parameters + ---------- + db_file : str + Path to database file. + url : str + URL. + tree : TYPE + DESCRIPTION. + + Returns + ------- + msg : str + Message with URLs. + """ + feeds = {} + # paths = [] + # TODO Test + paths = confighandler.get_list() + for path in paths: + # xpath_query = "//*[@*[contains(.,'{}')]]".format(path) + xpath_query = "//a[contains(@href,'{}')]".format(path) + addresses = tree.xpath(xpath_query) + parted_url = urlsplit(url) + # NOTE Should number of addresses be limited or + # perhaps be N from the start and N from the end + for address in addresses: + print(address.xpath('@href')[0]) + print(addresses) + address = address.xpath('@href')[0] + if "/" not in address: + protocol = parted_url.scheme + hostname = parted_url.netloc + pathname = address + address = urlunsplit([ + protocol, + hostname, + pathname, + None, + None + ]) + if address.startswith('/'): + protocol = parted_url.scheme + hostname = parted_url.netloc + pathname = address + address = urlunsplit([ + protocol, + hostname, + pathname, + None, + None + ]) + res = await download_feed(address) + if res[1] == 200: + try: + feeds[address] = feedparser.parse(res[0])["feed"]["title"] + print(feeds) + except: + continue + if len(feeds) > 1: + msg = ( + "RSS URL scan has found {} feeds:\n```\n" + ).format(len(feeds)) + for feed in feeds: + # try: + # res = await download_feed(feed) + # except: + # continue + feed_name = feeds[feed] + feed_addr = feed + msg += "{}\n{}\n\n".format(feed_name, feed_addr) + msg += ( + "```\nThe above feeds were extracted from\n{}" + ).format(url) + return msg + elif feeds: + feed_addr = list(feeds)[0] + msg = await add_feed(db_file, feed_addr) + return msg + + +async def feed_mode_auto_discovery(db_file, url, tree): + """ + Lookup for feeds using RSS autodiscovery technique. + + See: https://www.rssboard.org/rss-autodiscovery + + Parameters + ---------- + db_file : str + Path to database file. + url : str + URL. + tree : TYPE + DESCRIPTION. + + Returns + ------- + msg : str + Message with URLs. + """ + xpath_query = ( + '//link[(@rel="alternate") and ' + '(@type="application/atom+xml" or ' + '@type="application/rdf+xml" or ' + '@type="application/rss+xml")]' + ) + # xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href""" + # xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href" + feeds = tree.xpath(xpath_query) + if len(feeds) > 1: + msg = ( + "RSS Auto-Discovery has found {} feeds:\n```\n" + ).format(len(feeds)) + for feed in feeds: + # # The following code works; + # # The following code will catch + # # only valid resources (i.e. not 404); + # # The following code requires more bandwidth. + # res = await download_feed(feed) + # if res[0]: + # disco = feedparser.parse(res[0]) + # title = disco["feed"]["title"] + # msg += "{} \n {} \n\n".format(title, feed) + feed_name = feed.xpath('@title')[0] + feed_addr = await join_url(url, feed.xpath('@href')[0]) + # if feed_addr.startswith("/"): + # feed_addr = url + feed_addr + msg += "{}\n{}\n\n".format(feed_name, feed_addr) + msg += ( + "```\nThe above feeds were extracted from\n{}" + ).format(url) + return msg + elif feeds: + feed_addr = await join_url(url, feeds[0].xpath('@href')[0]) + # if feed_addr.startswith("/"): + # feed_addr = url + feed_addr + # NOTE Why wouldn't add_feed return a message + # upon success unless return is explicitly + # mentioned, yet upon failure it wouldn't? + # return await add_feed(db_file, feed_addr) + msg = await add_feed(db_file, feed_addr) + return msg \ No newline at end of file diff --git a/slixfeed/datetimehandler.py b/slixfeed/datetimehandler.py new file mode 100644 index 0000000..1964ebd --- /dev/null +++ b/slixfeed/datetimehandler.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +https://feedparser.readthedocs.io/en/latest/date-parsing.html +""" + +from datetime import datetime +from dateutil.parser import parse +from email.utils import parsedate +from email.utils import parsedate_to_datetime + +async def now(): + """ + ISO 8601 Timestamp. + + Returns + ------- + date : ? + ISO 8601 Timestamp. + """ + date = datetime.now().isoformat() + return date + + +async def current_time(): + """ + Print HH:MM:SS timestamp. + + Returns + ------- + date : ? + HH:MM:SS timestamp. + """ + now = datetime.now() + time = now.strftime("%H:%M:%S") + return time + + +async def validate(date): + """ + Validate date format. + + Parameters + ---------- + date : str + Timestamp. + + Returns + ------- + date : str + Timestamp. + """ + try: + parse(date) + except: + date = now() + return date + + +async def rfc2822_to_iso8601(date): + """ + Convert RFC 2822 into ISO 8601. + + Parameters + ---------- + date : str + RFC 2822 Timestamp. + + Returns + ------- + date : str + ISO 8601 Timestamp. + """ + if parsedate(date): + try: + date = parsedate_to_datetime(date) + date = date.isoformat() + except: + date = now() + return date diff --git a/slixfeed/filterhandler.py b/slixfeed/filterhandler.py new file mode 100644 index 0000000..8c5db93 --- /dev/null +++ b/slixfeed/filterhandler.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" + +TODO + +1) Website-specific filter (i.e. audiobookbay). + +2) Exclude websites from filtering (e.g. metapedia). + +3) Filter phrases: + Refer to sqlitehandler.search_entries for implementation. + It is expected to be more complex than function search_entries. + +""" + +import sqlitehandler + +async def set_filter(newwords, keywords): + """ + Append new keywords to filter. + + Parameters + ---------- + newwords : str + List of new keywords. + keywords : str + List of current keywords. + + Returns + ------- + val : str + List of current keywords and new keywords. + """ + try: + keywords = keywords.split(",") + except: + keywords = [] + newwords = newwords.lower().split(",") + for word in newwords: + word = word.strip() + if len(word) and word not in keywords: + keywords.extend([word]) + keywords.sort() + val = ",".join(keywords) + return val + +async def is_listed(db_file, type, string): +# async def reject(db_file, string): +# async def is_blacklisted(db_file, string): + filter_type = "filter-" + type + list = await sqlitehandler.get_settings_value( + db_file, + filter_type + ) + if list: + list = list.split(",") + for i in list: + if not i or len(i) < 2: + continue + if i in string.lower(): + print(">>> ACTIVATE", i) + return 1 + else: + return None + +""" + +This code was tested at module datahandler + + reject = 0 + blacklist = await sqlitehandler.get_settings_value( + db_file, + "filter-deny" + ) + # print(">>> blacklist:") + # print(blacklist) + # breakpoint() + if blacklist: + blacklist = blacklist.split(",") + # print(">>> blacklist.split") + # print(blacklist) + # breakpoint() + for i in blacklist: + # print(">>> length", len(i)) + # breakpoint() + # if len(i): + if not i or len(i) < 2: + print(">>> continue due to length", len(i)) + # breakpoint() + continue + # print(title) + # print(">>> blacklisted word:", i) + # breakpoint() + test = (title + " " + summary + " " + link) + if i in test.lower(): + reject = 1 + break + + if reject: + print("rejected:",title) + entry = (title, '', link, source, date, 1); + +""" \ No newline at end of file diff --git a/slixfeed/opmlhandler.py b/slixfeed/opmlhandler.py new file mode 100644 index 0000000..1340ca0 --- /dev/null +++ b/slixfeed/opmlhandler.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" + +{ + 'bozo': False, + 'bozo_exception': None, + 'feeds': [ + { + 'url': 'https://kurtmckee.org/tag/listparser/feed', + 'title': 'listparser blog', + 'categories': [], + 'tags': [] + }, + { + 'url': 'https://github.com/kurtmckee/listparser/commits/develop.atom', + 'title': 'listparser changelog', + 'categories': [], + 'tags': [] + } + ], + 'lists': [], + 'opportunities': [], + 'meta': { + 'title': 'listparser project feeds', + 'author': { + 'name': 'Kurt McKee', + 'email': 'contactme@kurtmckee.org', + 'url': 'https://kurtmckee.org/' + } + }, + 'version': 'opml2' + } + +""" + +import listparser +import lxml + +import sqlitehandler +import datahandler + +async def import_opml(db_file, opml_doc): + feeds = listparser.parse(opml_doc)['feeds'] + for feed in feeds: + url = feed['url'] + title = feed['title'] + # categories = feed['categories'] + # tags = feed['tags'] + await datahandler.add_feed_no_check(db_file, [url, title]) + + +# NOTE Use OPyML or LXML +async def export_opml(): + result = await sqlitehandler.get_feeds() diff --git a/slixfeed/sqlitehandler.py b/slixfeed/sqlitehandler.py index 311f72e..4bfc9fb 100644 --- a/slixfeed/sqlitehandler.py +++ b/slixfeed/sqlitehandler.py @@ -1,6 +1,20 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +""" + +TODO + +1) Table feeds: + category + type (atom, rdf, rss0.9. rss2 etc.) + +2) Function mark_all_read for entries of given feed + +3) Statistics + +""" + import sqlite3 import asyncio @@ -8,11 +22,13 @@ from sqlite3 import Error from datetime import date import confighandler +import datahandler +import datetimehandler # from eliot import start_action, to_file -# # with start_action(action_type="list_subscriptions()", db=db_file): +# # with start_action(action_type="list_feeds()", db=db_file): # # with start_action(action_type="last_entries()", num=num): -# # with start_action(action_type="get_subscriptions()"): +# # with start_action(action_type="get_feeds()"): # # with start_action(action_type="remove_entry()", source=source): # # with start_action(action_type="search_entries()", query=query): # # with start_action(action_type="check_entry()", link=link): @@ -26,9 +42,16 @@ def create_connection(db_file): """ Create a database connection to the SQLite database specified by db_file. - - :param db_file: Database filename. - :return: Connection object or None. + + Parameters + ---------- + db_file : str + Path to database file. + + Returns + ------- + conn : object + Connection object or None. """ conn = None try: @@ -43,45 +66,67 @@ def create_tables(db_file): """ Create SQLite tables. - :param db_file: Database filename. + Parameters + ---------- + db_file : str + Path to database file. """ with create_connection(db_file) as conn: - feeds_table_sql = """ - CREATE TABLE IF NOT EXISTS feeds ( - id integer PRIMARY KEY, - name text, - address text NOT NULL, - enabled integer NOT NULL, - scanned text, - updated text, - status integer, - valid integer - ); """ - entries_table_sql = """ - CREATE TABLE IF NOT EXISTS entries ( - id integer PRIMARY KEY, - title text NOT NULL, - summary text NOT NULL, - link text NOT NULL, - source text, - read integer - ); """ - # statistics_table_sql = """ - # CREATE TABLE IF NOT EXISTS statistics ( - # id integer PRIMARY KEY, - # title text NOT NULL, - # number integer - # ); """ - settings_table_sql = """ - CREATE TABLE IF NOT EXISTS settings ( - id integer PRIMARY KEY, - key text NOT NULL, - value integer - ); """ + feeds_table_sql =( + "CREATE TABLE IF NOT EXISTS feeds (" + "id INTEGER PRIMARY KEY," + "name TEXT," + "address TEXT NOT NULL," + "enabled INTEGER NOT NULL," + "scanned TEXT," + "updated TEXT," + "status INTEGER," + "valid INTEGER" + ");" + ) + entries_table_sql = ( + "CREATE TABLE IF NOT EXISTS entries (" + "id INTEGER PRIMARY KEY," + "title TEXT NOT NULL," + "summary TEXT NOT NULL," + "link TEXT NOT NULL," + "entry_id TEXT," + "source TEXT NOT NULL," + "timestamp TEXT," + "read INTEGER" + ");" + ) + archive_table_sql = ( + "CREATE TABLE IF NOT EXISTS archive (" + "id INTEGER PRIMARY KEY," + "title TEXT NOT NULL," + "summary TEXT NOT NULL," + "link TEXT NOT NULL," + "entry_id TEXT," + "source TEXT NOT NULL," + "timestamp TEXT," + "read INTEGER" + ");" + ) + # statistics_table_sql = ( + # "CREATE TABLE IF NOT EXISTS statistics (" + # "id INTEGER PRIMARY KEY," + # "title TEXT NOT NULL," + # "number INTEGER" + # ");" + # ) + settings_table_sql = ( + "CREATE TABLE IF NOT EXISTS settings (" + "id INTEGER PRIMARY KEY," + "key TEXT NOT NULL," + "value INTEGER" + ");" + ) cur = conn.cursor() # cur = get_cursor(db_file) cur.execute(feeds_table_sql) cur.execute(entries_table_sql) + cur.execute(archive_table_sql) # cur.execute(statistics_table_sql) cur.execute(settings_table_sql) @@ -90,8 +135,15 @@ def get_cursor(db_file): """ Allocate a cursor to connection per database. - :param db_file: Database filename. - :return: Cursor. + Parameters + ---------- + db_file : str + Path to database file. + + Returns + ------- + CURSORS[db_file] : object + Cursor. """ if db_file in CURSORS: return CURSORS[db_file] @@ -102,15 +154,25 @@ def get_cursor(db_file): return CURSORS[db_file] -async def add_feed(db_file, title, url, res): +async def add_feed(db_file, url, title=None, status=None): """ Add a new feed into the feeds table. - :param db_file: Database filename. - :param title: Feed title. - :param url: URL. - :param res: XML document. - :return: Message. + Parameters + ---------- + db_file : str + Path to database file. + url : str + URL. + title : str, optional + Feed Title. The default is None. + status : str, optional + HTTP status code. The default is None. + + Returns + ------- + msg : str + Message. """ #TODO consider async with DBLOCK #conn = create_connection(db_file) @@ -120,7 +182,7 @@ async def add_feed(db_file, title, url, res): # exist = await check_feed_exist(db_file, url) # if not exist: - # res = await main.download_feed(url) + # status = await main.download_feed(url) # else: # return "News source is already listed in the subscription list" @@ -128,44 +190,78 @@ async def add_feed(db_file, title, url, res): with create_connection(db_file) as conn: cur = conn.cursor() # title = feed["feed"]["title"] - feed = (title, url, 1, res[1], 1) - sql = """INSERT INTO feeds(name, address, enabled, status, valid) - VALUES(?, ?, ?, ?, ?) """ + feed = (title, url, 1, status, 1) + sql = ( + "INSERT INTO feeds(" + "name, address, enabled, status, valid" + ")" + "VALUES(?, ?, ?, ?, ?) " + ) cur.execute(sql, feed) source = title if title else '<' + url + '>' - msg = """> {}\nNews source \"{}\" has been added to subscription list. - """.format(url, source) + msg = ( + "> {}\nNews source \"{}\" has been added " + "to subscription list." + ).format(url, source) return msg async def remove_feed(db_file, ix): """ - Delete a feed by feed id. + Delete a feed by feed ID. - :param db_file: Database filename. - :param ix: Index of feed. - :return: Message. + Parameters + ---------- + db_file : str + Path to database file. + ix : str + Index of feed. + + Returns + ------- + msg : str + Message. """ with create_connection(db_file) as conn: async with DBLOCK: cur = conn.cursor() try: - sql = "SELECT address FROM feeds WHERE id = ?" + sql = ( + "SELECT address " + "FROM feeds " + "WHERE id = ?" + ) # cur # for i in url: # url = i[0] url = cur.execute(sql, (ix,)).fetchone()[0] - sql = "SELECT name FROM feeds WHERE id = ?" + sql = ( + "SELECT name " + "FROM feeds " + "WHERE id = ?" + ) name = cur.execute(sql, (ix,)).fetchone()[0] # NOTE Should we move DBLOCK to this line? 2022-12-23 - sql = "DELETE FROM entries WHERE source = ?" + sql = ( + "DELETE " + "FROM entries " + "WHERE source = ?" + ) cur.execute(sql, (url,)) - sql = "DELETE FROM feeds WHERE id = ?" + sql = ( + "DELETE FROM feeds " + "WHERE id = ?" + ) cur.execute(sql, (ix,)) - msg = "> {}\nNews source \"{}\" has been removed from subscription list.".format(url, name) + msg = ( + "> {}\nNews source \"{}\" has been removed " + "from subscription list." + ).format(url, name) except: - msg = "No news source with ID {}.".format(ix) + msg = ( + "No news source with ID {}." + ).format(ix) return msg @@ -174,27 +270,50 @@ async def check_feed_exist(db_file, url): Check whether a feed exists. Query for feeds by given url. - :param db_file: Database filename. - :param url: URL. - :return: Index ID and Name or None. + Parameters + ---------- + db_file : str + Path to database file. + url : str + URL. + + Returns + ------- + result : list + List of ID and Name of feed. """ cur = get_cursor(db_file) - sql = "SELECT id, name FROM feeds WHERE address = ?" + sql = ( + "SELECT id, name " + "FROM feeds " + "WHERE address = ?" + ) result = cur.execute(sql, (url,)).fetchone() return result -async def get_number_of_items(db_file, str): +async def get_number_of_items(db_file, table): """ Return number of entries or feeds. - :param cur: Cursor object. - :param str: "entries" or "feeds". - :return: Number of rows. + Parameters + ---------- + db_file : str + Path to database file. + table : str + "entries" or "feeds". + + Returns + ------- + count : ? + Number of rows. """ with create_connection(db_file) as conn: cur = conn.cursor() - sql = "SELECT count(id) FROM {}".format(str) + sql = ( + "SELECT count(id) " + "FROM {}" + ).format(table) count = cur.execute(sql).fetchone()[0] return count @@ -203,13 +322,23 @@ async def get_number_of_feeds_active(db_file): """ Return number of active feeds. - :param db_file: Database filename. - :param cur: Cursor object. - :return: Number of rows. + Parameters + ---------- + db_file : str + Path to database file. + + Returns + ------- + count : ? + Number of rows. """ with create_connection(db_file) as conn: cur = conn.cursor() - sql = "SELECT count(id) FROM feeds WHERE enabled = 1" + sql = ( + "SELECT count(id) " + "FROM feeds " + "WHERE enabled = 1" + ) count = cur.execute(sql).fetchone()[0] return count @@ -217,54 +346,124 @@ async def get_number_of_feeds_active(db_file): async def get_number_of_entries_unread(db_file): """ Return number of unread items. - - :param db_file: Database filename. - :param cur: Cursor object. - :return: Number of rows. + + Parameters + ---------- + db_file : str + Path to database file. + + Returns + ------- + count : ? + Number of rows. """ with create_connection(db_file) as conn: cur = conn.cursor() - sql = "SELECT count(id) FROM entries WHERE read = 0" + sql = ( + "SELECT count(id) " + "FROM entries " + "WHERE read = 0" + ) count = cur.execute(sql).fetchone()[0] return count -async def get_entry_unread(db_file): +# TODO Read from entries and archives +async def get_entry_unread(db_file, num=None): """ - Check read status of entry. - - :param db_file: Database filename. - :return: News item as message. + Extract information from unread entries. + + Parameters + ---------- + db_file : str + Path to database file. + num : str, optional + Number. The default is None. + + Returns + ------- + entry : str + News item message. """ + if not num: + num = await get_settings_value(db_file, "quantum") + else: + num = int(num) with create_connection(db_file) as conn: cur = conn.cursor() - sql = "SELECT id FROM entries WHERE read = 0" - ix = cur.execute(sql).fetchone() - if ix is None: - return False - ix = ix[0] - sql = "SELECT title FROM entries WHERE id = :id" - title = cur.execute(sql, (ix,)).fetchone()[0] - sql = "SELECT summary FROM entries WHERE id = :id" - summary = cur.execute(sql, (ix,)).fetchone()[0] - sql = "SELECT link FROM entries WHERE id = :id" - link = cur.execute(sql, (ix,)).fetchone()[0] - entry = "{}\n\n{}\n\n{}".format(title, summary, link) - async with DBLOCK: - await mark_as_read(cur, ix) - # async with DBLOCK: - # await update_statistics(db_file) - return entry + # sql = "SELECT id FROM entries WHERE read = 0 LIMIT 1" + # sql = "SELECT id FROM entries WHERE read = 0 ORDER BY timestamp DESC LIMIT 1" + sql = ( + "SELECT id, title, summary, link " + "FROM entries " + "WHERE read = 0 " + "ORDER BY timestamp " + "DESC LIMIT :num" + ) + results = cur.execute(sql, (num,)) + results = results.fetchall() + + # TODO Add filtering + # TODO Do this when entry is added to list and mark it as read + # DONE! + # results = [] + # if get_settings_value(db_file, "filter-deny"): + # while len(results) < num: + # result = cur.execute(sql).fetchone() + # blacklist = await get_settings_value(db_file, "filter-deny").split(",") + # for i in blacklist: + # if i in result[1]: + # continue + # print("rejected:", result[1]) + # print("accepted:", result[1]) + # results.extend([result]) + + # news_list = "You've got {} news items:\n".format(num) + news_list = "" + # NOTE Why doesn't this work without list? + # i.e. for result in results + # for result in results.fetchall(): + for result in results: + ix = result[0] + title = result[1] + summary = result[2] + link = result[3] + if num > 1: + news_list += ( + "\n{}\n{}\n" + ).format( + str(title), + str(link) + ) + else: + news_list = ( + "{}\n\n{}\n\n{}" + ).format( + str(title), + str(summary), + str(link) + ) + async with DBLOCK: + await mark_as_read(cur, ix) + return news_list async def mark_as_read(cur, ix): """ Set read status of entry. - - :param cur: Cursor object. - :param ix: Index of entry. + + Parameters + ---------- + db_file : str + Path to database file. + ix : str + Index of entry. """ - sql = "UPDATE entries SET summary = '', read = 1 WHERE id = ?" + sql = ( + "UPDATE entries " + "SET summary = '', read = 1 " + "WHERE id = ?" + ) cur.execute(sql, (ix,)) @@ -272,8 +471,15 @@ async def statistics(db_file): """ Return table statistics. - :param db_file: Database filename. - :return: News item as message. + Parameters + ---------- + db_file : str + Path to database file. + + Returns + ------- + msg : str + Statistics as message. """ feeds = await get_number_of_items(db_file, 'feeds') active_feeds = await get_number_of_feeds_active(db_file) @@ -283,57 +489,108 @@ async def statistics(db_file): # """.format(unread_entries, entries, feeds) with create_connection(db_file) as conn: cur = conn.cursor() - sql = "SELECT value FROM settings WHERE key = \"enabled\"" - status = cur.execute(sql).fetchone()[0] - sql = "SELECT value FROM settings WHERE key = \"interval\"" - interval = cur.execute(sql).fetchone()[0] - msg = """News items: {} ({})\nNews sources: {} ({})\nUpdate interval: {}\nOperation status: {} - """.format(unread_entries, entries, active_feeds, feeds, interval, status) + keys = [] + for key in ["enabled", "interval", "quantum"]: + sql = ( + "SELECT value " + "FROM settings " + "WHERE key = ?" + ) + keys.extend([cur.execute(sql, (key,)).fetchone()[0]]) + msg = ( + "```\n" + "News items : {} ({})\n" + "News sources : {} ({})\n" + "Update interval : {}\n" + "Items per update : {}\n" + "Operation status : {}\n" + "```" + ).format( + unread_entries, entries, + active_feeds, feeds, + keys[1], + keys[2], + keys[0] + ) return msg -#TODO statistics async def update_statistics(cur): """ Update table statistics. - - :param cur: Cursor object. + + Parameters + ---------- + cur : object + Cursor object. """ stat_dict = {} stat_dict["feeds"] = await get_number_of_items(cur, 'feeds') stat_dict["entries"] = await get_number_of_items(cur, 'entries') stat_dict["unread"] = await get_number_of_entries_unread(cur=cur) for i in stat_dict: - sql = "SELECT id FROM statistics WHERE title = ?" + sql = ( + "SELECT id " + "FROM statistics " + "WHERE title = ?" + ) cur.execute(sql, (i,)) if cur.fetchone(): - sql = "UPDATE statistics SET number = :num WHERE title = :title" - cur.execute(sql, {"title": i, "num": stat_dict[i]}) + sql = ( + "UPDATE statistics " + "SET number = :num " + "WHERE title = :title" + ) + cur.execute(sql, { + "title": i, + "num": stat_dict[i] + }) else: - sql = "SELECT count(id) FROM statistics" + sql = ( + "SELECT count(id) " + "FROM statistics" + ) count = cur.execute(sql).fetchone()[0] ix = count + 1 - sql = "INSERT INTO statistics VALUES(?,?,?)" + sql = ( + "INSERT INTO statistics " + "VALUES(?,?,?)" + ) cur.execute(sql, (ix, i, stat_dict[i])) -# TODO mark_all_read for entries of feed async def toggle_status(db_file, ix): """ Toggle status of feed. - - :param db_file: Database filename. - :param ix: Index of entry. - :return: Message + + Parameters + ---------- + db_file : str + Path to database file. + ix : str + Index of entry. + + Returns + ------- + msg : str + Message. """ async with DBLOCK: with create_connection(db_file) as conn: cur = conn.cursor() try: #cur = get_cursor(db_file) - sql = "SELECT name FROM feeds WHERE id = :id" + sql = ( + "SELECT name " + "FROM feeds " + "WHERE id = :id" + ) title = cur.execute(sql, (ix,)).fetchone()[0] - sql = "SELECT enabled FROM feeds WHERE id = ?" + sql = ( + "SELECT enabled " + "FROM feeds " + "WHERE id = ?" + ) # NOTE [0][1][2] status = cur.execute(sql, (ix,)).fetchone()[0] # FIXME always set to 1 @@ -345,11 +602,22 @@ async def toggle_status(db_file, ix): else: status = 1 state = "enabled" - sql = "UPDATE feeds SET enabled = :status WHERE id = :id" - cur.execute(sql, {"status": status, "id": ix}) - msg = "Updates for '{}' are now {}.".format(title, state) + sql = ( + "UPDATE feeds " + "SET enabled = :status " + "WHERE id = :id" + ) + cur.execute(sql, { + "status": status, + "id": ix + }) + msg = ( + "Updates for '{}' are now {}." + ).format(title, state) except: - msg = "No news source with ID {}.".format(ix) + msg = ( + "No news source with ID {}." + ).format(ix) return msg @@ -357,18 +625,38 @@ async def set_date(cur, url): """ Set last update date of feed. - :param cur: Cursor object. - :param url: URL. + Parameters + ---------- + cur : object + Cursor object. + url : str + URL. """ today = date.today() - sql = "UPDATE feeds SET updated = :today WHERE address = :url" + sql = ( + "UPDATE feeds " + "SET updated = :today " + "WHERE address = :url" + ) # cur = conn.cursor() - cur.execute(sql, {"today": today, "url": url}) + cur.execute(sql, { + "today": today, + "url": url + }) async def add_entry_and_set_date(db_file, source, entry): """ - TODO + Add entry to table entries and set date of source in table feeds. + + Parameters + ---------- + db_file : str + Path to database file. + source : str + Feed URL. + entry : list + Entry properties. """ async with DBLOCK: with create_connection(db_file) as conn: @@ -379,49 +667,102 @@ async def add_entry_and_set_date(db_file, source, entry): async def update_source_status(db_file, status, source): """ - TODO + Set HTTP status of source in table feeds. + + Parameters + ---------- + db_file : str + Path to database file. + source : str + Feed URL. + status : str + Status ID or message. """ - sql = "UPDATE feeds SET status = :status, scanned = :scanned WHERE address = :url" + sql = ( + "UPDATE feeds " + "SET status = :status, scanned = :scanned " + "WHERE address = :url" + ) async with DBLOCK: with create_connection(db_file) as conn: cur = conn.cursor() - cur.execute(sql, {"status": status, "scanned": date.today(), "url": source}) + cur.execute(sql, { + "status" : status, + "scanned" : date.today(), + "url" : source + }) async def update_source_validity(db_file, source, valid): """ - TODO + Set validity status of source in table feeds. + + Parameters + ---------- + db_file : str + Path to database file. + source : str + Feed URL. + valid : boolean + 0 or 1. """ - sql = "UPDATE feeds SET valid = :validity WHERE address = :url" + sql = ( + "UPDATE feeds " + "SET valid = :validity " + "WHERE address = :url" + ) async with DBLOCK: with create_connection(db_file) as conn: cur = conn.cursor() - cur.execute(sql, {"validity": valid, "url": source}) + cur.execute(sql, { + "validity": valid, + "url": source + }) async def add_entry(cur, entry): """ - Add a new entry into the entries table. + Add a new entry row into the entries table. - :param cur: Cursor object. - :param entry: + Parameters + ---------- + cur : object + Cursor object. + entry : str + Entry properties. """ - sql = """ INSERT INTO entries(title, summary, link, source, read) - VALUES(?, ?, ?, ?, ?) """ + sql = ( + "INSERT " + "INTO entries(" + "title, " + "summary, " + "link, " + "entry_id, " + "source, " + "timestamp, " + "read" + ") " + "VALUES(?, ?, ?, ?, ?, ?, ?)" + ) cur.execute(sql, entry) -# This function doesn't work as expected with bbs and wiki feeds +# NOTE See remove_nonexistent_entries +# NOTE This function doesn't work as expected with bbs and docuwiki feeds async def remove_entry(db_file, source, length): """ Maintain list of entries equal to feed. Check the number returned by feed and delete existing entries up to the same returned amount. - - :param db_file: Database filename. - :param source: - :param length: - :return: + + Parameters + ---------- + db_file : str + Path to database file. + source : str + Feed URL. + length : str + Number. """ # FIXED # Dino empty titles are not counted https://dino.im/index.xml @@ -431,108 +772,242 @@ async def remove_entry(db_file, source, length): async with DBLOCK: with create_connection(db_file) as conn: cur = conn.cursor() - sql = "SELECT count(id) FROM entries WHERE source = ?" + sql = ( + "SELECT count(id) " + "FROM entries " + "WHERE source = ?" + ) count = cur.execute(sql, (source,)).fetchone()[0] limit = count - length if limit: limit = limit; - sql = """DELETE FROM entries WHERE id IN ( - SELECT id FROM entries - WHERE source = :source - ORDER BY id - ASC LIMIT :limit)""" - cur.execute(sql, {"source": source, "limit": limit}) + sql = ( + "DELETE FROM entries " + "WHERE id " + "IN (SELECT id " + "FROM entries " + "WHERE source = :source " + "ORDER BY id " + "ASC LIMIT :limit)" + ) + cur.execute(sql, { + "source": source, + "limit": limit + }) +# TODO Move entries that don't exist into table archive. +# NOTE Entries that are read from archive are deleted. +# NOTE Unlike entries from table entries, entries from +# table archive are not marked as read. async def remove_nonexistent_entries(db_file, feed, source): """ Remove entries that don't exist in a given parsed feed. Check the entries returned from feed and delete non existing entries - :param db_file: Database filename. - :param feed: URL of parsed feed. - :param source: URL of associated feed. - """ - async with DBLOCK: - with create_connection(db_file) as conn: - cur = conn.cursor() - sql = "SELECT id, title, link FROM entries WHERE source = ?" - entries_db = cur.execute(sql, (source,)).fetchall() - for entry_db in entries_db: - exist = False - for entry_feed in feed.entries: - # TODO better check and don't repeat code - if entry_feed.has_key("title"): - title = entry_feed.title - else: - title = feed["feed"]["title"] - - if entry_feed.has_key("link"): - link = entry_feed.link - else: - link = source - # TODO better check and don't repeat code - if entry_db[1] == title and entry_db[2] == link: - exist = True - break - if not exist: - # TODO Send to table archive - # TODO Also make a regular/routine check for sources that have been changed (though that can only happen when manually editing) - sql = "DELETE FROM entries WHERE id = ?" - cur.execute(sql, (entry_db[0],)) - - -async def get_subscriptions(db_file): - """ - Query table feeds. - - :param db_file: Database filename. - :return: List of feeds. + Parameters + ---------- + db_file : str + Path to database file. + feed : list + Parsed feed document. + source : str + Feed URL. URL of associated feed. """ with create_connection(db_file) as conn: cur = conn.cursor() - sql = "SELECT address FROM feeds WHERE enabled = 1" + sql = ( + "SELECT id, title, link, entry_id, timestamp, read " + "FROM entries " + "WHERE source = ?" + ) + items = cur.execute(sql, (source,)).fetchall() + entries = feed.entries + for entry in entries: + valid = False + for item in items: + # TODO better check and don't repeat code + if entry.has_key("id") and item[3]: + if entry.id == item[3]: + valid = True + break + else: + if entry.has_key("title"): + title = entry.title + else: + title = feed["feed"]["title"] + if entry.has_key("link"): + link = await datahandler.join_url(source, entry.link) + else: + link = source + if entry.has_key("published") and item[4]: + time = await datetimehandler.rfc2822_to_iso8601(entry.published) + if (item[1] == title and + item[2] == link and + item[4] == time): + valid = True + break + else: + if (item[1] == title and + item[2] == link): + valid = True + break + # TODO better check and don't repeat code + if not valid: + async with DBLOCK: + # TODO Send to table archive + # TODO Also make a regular/routine check for sources that + # have been changed (though that can only happen when + # manually editing) + ix = item[0] + if item[5] == 1: + sql = ( + "DELETE " + "FROM entries " + "WHERE id = :ix" + ) + cur.execute(sql, (ix,)) + else: + print(">>> ARCHIVING:") + print("title:", item[1]) + print("link :", item[2]) + print("id :", item[3]) + sql = ( + "INSERT " + "INTO archive " + "SELECT * " + # "SELECT title, summary, " + # "link, source, timestamp " + "FROM entries " + "WHERE entries.id = :ix" + ) + cur.execute(sql, (ix,)) + sql = ( + "DELETE " + "FROM entries " + "WHERE id = :ix" + ) + cur.execute(sql, (ix,)) + + +async def get_feeds(db_file): + """ + Query table feeds for Title, URL, Categories, Tags. + + Parameters + ---------- + db_file : str + Path to database file. + + Returns + ------- + result : list + Title, URL, Categories, Tags of feeds. + """ + with create_connection(db_file) as conn: + cur = conn.cursor() + sql = ( + "SELECT name, address, type, categories, tags " + "FROM feeds" + ) result = cur.execute(sql).fetchall() return result -async def list_subscriptions(db_file): +async def get_feeds_url(db_file): + """ + Query active feeds for URLs. + + Parameters + ---------- + db_file : str + Path to database file. + + Returns + ------- + result : list + URLs of active feeds. + """ + with create_connection(db_file) as conn: + cur = conn.cursor() + sql = ( + "SELECT address " + "FROM feeds " + "WHERE enabled = 1" + ) + result = cur.execute(sql).fetchall() + return result + + +async def list_feeds(db_file): """ Query table feeds and list items. - :param db_file: Database filename. - :return: List of feeds. + Parameters + ---------- + db_file : str + Path to database file. + + Returns + ------- + msg : str + URLs of feeds as message. """ cur = get_cursor(db_file) - sql = "SELECT name, address, updated, id, enabled FROM feeds" + sql = ( + "SELECT name, address, updated, enabled, id " + "FROM feeds" + ) results = cur.execute(sql) - - feeds_list = "List of subscriptions: \n" + feeds_list = "\nList of subscriptions:\n```" counter = 0 for result in results: counter += 1 - feeds_list += """\n{} \n{} \nLast updated: {} \nID: {} [{}] - """.format(str(result[0]), str(result[1]), str(result[2]), - str(result[3]), str(result[4])) + feeds_list += ( + "Name : {}\n" + "Address : {}\n" + "Updated : {}\n" + "Status : {}\n" + "ID : {}\n" + "\n" + ).format( + str(result[0]), + str(result[1]), + str(result[2]), + str(result[3]), + str(result[4]) + ) if counter: - return feeds_list + "\n Total of {} subscriptions".format(counter) + return feeds_list + ( + "```\nTotal of {} subscriptions.\n" + ).format(counter) else: - msg = ("List of subscriptions is empty. \n" - "To add feed, send a message as follows: \n" - "feed add URL \n" - "Example: \n" - "add https://reclaimthenet.org/feed/") + msg = ( + "List of subscriptions is empty.\n" + "To add feed, send a URL\n" + "Try these:\n" + # TODO Pick random from featured/recommended + "https://reclaimthenet.org/feed/" + ) return msg async def last_entries(db_file, num): """ Query entries - - :param db_file: Database filename. - :param num: Number - :return: List of recent N entries + + Parameters + ---------- + db_file : str + Path to database file. + num : str + Number. + + Returns + ------- + titles_list : str + List of recent N entries as message. """ num = int(num) if num > 50: @@ -540,81 +1015,194 @@ async def last_entries(db_file, num): elif num < 1: num = 1 cur = get_cursor(db_file) - sql = "SELECT title, link FROM entries ORDER BY ROWID DESC LIMIT :num" + # sql = "SELECT title, link FROM entries ORDER BY ROWID DESC LIMIT :num" + sql = ( + "SELECT title, link " + "FROM entries " + "WHERE read = 0 " + "ORDER BY timestamp " + "DESC LIMIT :num " + ) results = cur.execute(sql, (num,)) - - - titles_list = "Recent {} titles: \n".format(num) + titles_list = "Recent {} titles:\n".format(num) for result in results: - titles_list += "\n{} \n{}".format(str(result[0]), str(result[1])) + titles_list += ( + "\n{}\n{}\n" + ).format( + str(result[0]), + str(result[1]) + ) return titles_list +async def search_feeds(db_file, query): + """ + Query feeds. + + Parameters + ---------- + db_file : str + Path to database file. + query : str + Search query. + + Returns + ------- + titles_list : str + Feeds of specified keywords as message. + """ + cur = get_cursor(db_file) + sql = ( + "SELECT name, id, address " + "FROM feeds " + "WHERE name LIKE ? " + "LIMIT 50" + ) + results = cur.execute(sql, [f'%{query}%']) + results_list = ( + "Feeds containing '{}':\n```" + ).format(query) + counter = 0 + for result in results: + counter += 1 + results_list += ( + "\n{} [{}]\n{}\n" + ).format( + str(result[0]), + str(result[1]), + str(result[2]) + ) + if counter: + return results_list + "\n```\nTotal of {} feeds".format(counter) + else: + return "No feeds found for: {}".format(query) + + async def search_entries(db_file, query): """ - Query entries - - :param db_file: Database filename. - :param query: Search query - :return: Entries with specified keywords + Query entries. + + Parameters + ---------- + db_file : str + Path to database file. + query : str + Search query. + + Returns + ------- + titles_list : str + Entries of specified keywords as message. """ - if len(query) < 2: - return "Please enter at least 2 characters to search" - cur = get_cursor(db_file) - sql = "SELECT title, link FROM entries WHERE title LIKE ? LIMIT 50" + sql = ( + "SELECT title, link " + "FROM entries " + "WHERE title LIKE ? " + "LIMIT 50" + ) results = cur.execute(sql, [f'%{query}%']) - - results_list = "Search results for '{}': \n".format(query) + results_list = ( + "Search results for '{}':\n```" + ).format(query) counter = 0 for result in results: counter += 1 - results_list += """\n{} \n{} - """.format(str(result[0]), str(result[1])) + results_list += ( + "\n{}\n{}\n" + ).format( + str(result[0]), + str(result[1]) + ) if counter: - return results_list + "\n Total of {} results".format(counter) + return results_list + "```\nTotal of {} results".format(counter) else: return "No results found for: {}".format(query) -async def check_entry_exist(db_file, title, link): +async def check_entry_exist(db_file, source, eid=None, + title=None, link=None, date=None): """ Check whether an entry exists. - Query entries by title and link. + If entry has an ID, check by ID. + If entry has timestamp, check by title, link and date. + Otherwise, check by title and link. - :param db_file: Database filename. - :param link: Entry URL. - :param title: Entry title. - :return: Index ID or None. + Parameters + ---------- + db_file : str + Path to database file. + source : str + Feed URL. URL of associated feed. + eid : str, optional + Entry ID. The default is None. + title : str, optional + Entry title. The default is None. + link : str, optional + Entry URL. The default is None. + date : str, optional + Entry Timestamp. The default is None. + + Returns + ------- + bool + True or None. """ cur = get_cursor(db_file) - sql = "SELECT id FROM entries WHERE title = :title and link = :link" - result = cur.execute(sql, {"title": title, "link": link}).fetchone() - return result + if eid: + sql = ( + "SELECT id " + "FROM entries " + "WHERE entry_id = :eid and source = :source" + ) + result = cur.execute(sql, { + "eid": eid, + "source": source + }).fetchone() + elif date: + sql = ( + "SELECT id " + "FROM entries " + "WHERE " + "title = :title and " + "link = :link and " + "timestamp = :date" + ) + result = cur.execute(sql, { + "title": title, + "link": link, + "timestamp": date + }).fetchone() + else: + sql = ( + "SELECT id " + "FROM entries " + "WHERE title = :title and link = :link" + ) + result = cur.execute(sql, { + "title": title, + "link": link + }).fetchone() + if result: + return True + else: + None -# TODO dictionary -# settings = { -# "enabled" : { -# "message": "Updates are {}".format(status), -# "value": val -# }, -# "interval" : { -# "message": "Updates will be sent every {} minutes".format(val), -# "value": val -# }, -# "quantom" : { -# "message": "Every updates will contain {} news items".format(val), -# "value": val -# } -# } async def set_settings_value(db_file, key_value): """ Set settings value. - :param db_file: Database filename. - :param key_value: List of key ("enabled", "interval", "quantum") and value (Integer). - :return: Message. + Parameters + ---------- + db_file : str + Path to database file. + key_value : list + key : str + enabled, filter-allow, filter-deny, + interval, master, quantum, random. + value : int + Numeric value. """ # if isinstance(key_value, list): # key = key_value[0] @@ -631,22 +1219,34 @@ async def set_settings_value(db_file, key_value): with create_connection(db_file) as conn: cur = conn.cursor() await set_settings_value_default(cur, key) - sql = "UPDATE settings SET value = :value WHERE key = :key" - cur.execute(sql, {"key": key, "value": val}) - if key == 'quantum': - msg = "Each update will contain {} news items.".format(val) - elif key == 'interval': - msg = "Updates will be sent every {} minutes.".format(val) - else: - if val: - status = "disabled" - else: - status = "enabled" - msg = "Updates are {}.".format(status) - return msg + sql = ( + "UPDATE settings " + "SET value = :value " + "WHERE key = :key" + ) + cur.execute(sql, { + "key": key, + "value": val + }) +# TODO Place settings also in a file async def set_settings_value_default(cur, key): + """ + Set default settings value. + + Parameters + ---------- + cur : object + Cursor object. + key : str + Key: enabled, interval, master, quantum, random. + + Returns + ------- + val : str + Numeric value. + """ # async def set_settings_value_default(cur): # keys = ["enabled", "interval", "quantum"] # for i in keys: @@ -656,11 +1256,19 @@ async def set_settings_value_default(cur, key): # val = await settings.get_value_default(i) # sql = "INSERT INTO settings(key,value) VALUES(?,?)" # cur.execute(sql, (i, val)) - sql = "SELECT id FROM settings WHERE key = ?" + sql = ( + "SELECT id " + "FROM settings " + "WHERE key = ?" + ) cur.execute(sql, (key,)) if not cur.fetchone(): val = await confighandler.get_value_default(key) - sql = "INSERT INTO settings(key,value) VALUES(?,?)" + sql = ( + "INSERT " + "INTO settings(key,value) " + "VALUES(?,?)" + ) cur.execute(sql, (key, val)) return val @@ -669,8 +1277,17 @@ async def get_settings_value(db_file, key): """ Get settings value. - :param db_file: Database filename. - :param key: "enabled", "interval", "quantum". + Parameters + ---------- + db_file : str + Path to database file. + key : str + Key: "enabled", "interval", "master", "quantum", "random". + + Returns + ------- + val : str + Numeric value. """ # try: # with create_connection(db_file) as conn: @@ -686,10 +1303,14 @@ async def get_settings_value(db_file, key): with create_connection(db_file) as conn: try: cur = conn.cursor() - sql = "SELECT value FROM settings WHERE key = ?" - result = cur.execute(sql, (key,)).fetchone()[0] + sql = ( + "SELECT value " + "FROM settings " + "WHERE key = ?" + ) + val = cur.execute(sql, (key,)).fetchone()[0] except: - result = await set_settings_value_default(cur, key) - if not result: - result = await set_settings_value_default(cur, key) - return result + val = await set_settings_value_default(cur, key) + if not val: + val = await set_settings_value_default(cur, key) + return val diff --git a/slixfeed/xmpphandler.py b/slixfeed/xmpphandler.py index 92d5dd6..815c44b 100644 --- a/slixfeed/xmpphandler.py +++ b/slixfeed/xmpphandler.py @@ -1,38 +1,56 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -from datetime import datetime +""" + +TODO + +1) Deprecate "add" (see above) and make it interactive. + Slixfeed: Do you still want to add this URL to subscription list? + See: case _ if message_lowercase.startswith("add"): + +2) Use loop (with gather) instead of TaskGroup + +""" import asyncio import os import slixmpp +from slixmpp.plugins.xep_0363.http_upload import FileTooBig, HTTPError, UploadServiceNotFound + import confighandler import datahandler +import datetimehandler +import filterhandler import sqlitehandler +main_task = [] jid_tasker = {} task_manager = {} +loop = asyncio.get_event_loop() +# asyncio.set_event_loop(loop) -time_now = datetime.now() +# time_now = datetime.now() # time_now = time_now.strftime("%H:%M:%S") -def print_time(): - # return datetime.now().strftime("%H:%M:%S") - now = datetime.now() - current_time = now.strftime("%H:%M:%S") - return current_time +# def print_time(): +# # return datetime.now().strftime("%H:%M:%S") +# now = datetime.now() +# current_time = now.strftime("%H:%M:%S") +# return current_time + + +async def handle_event(): + print("Event handled!") class Slixfeed(slixmpp.ClientXMPP): """ - Slixmpp news bot that will send updates - from feeds it receives. + Slixmpp + ------- + News bot that sends updates from RSS feeds. """ - - print("slixmpp.ClientXMPP") - print(repr(slixmpp.ClientXMPP)) - def __init__(self, jid, password): slixmpp.ClientXMPP.__init__(self, jid, password) @@ -52,7 +70,7 @@ class Slixfeed(slixmpp.ClientXMPP): self.add_event_handler("message", self.message) self.add_event_handler("disconnected", self.reconnect) # Initialize event loop - self.loop = asyncio.get_event_loop() + # self.loop = asyncio.get_event_loop() async def start(self, event): @@ -70,116 +88,316 @@ class Slixfeed(slixmpp.ClientXMPP): """ self.send_presence() await self.get_roster() - await self.select_file() - self.send_presence( - pshow="away", - pstatus="Slixmpp has been restarted.", - pto="sch@pimux.de" - ) + # for task in main_task: + # task.cancel() + if not main_task: + await self.select_file() async def message(self, msg): """ Process incoming message stanzas. Be aware that this also includes MUC messages and error messages. It is usually - a good idea to check the messages's type before processing - or sending replies. + a good practice to check the messages's type before + processing or sending replies. - Arguments: - msg -- The received message stanza. See the documentation - for stanza objects and the Message stanza to see - how it may be used. + Parameters + ---------- + self : ? + Self. + msg : str + The received message stanza. See the documentation + for stanza objects and the Message stanza to see + how it may be used. """ if msg["type"] in ("chat", "normal"): action = 0 jid = msg["from"].bare + + db_dir = confighandler.get_default_dbdir() + os.chdir(db_dir) + if jid + ".db" not in os.listdir(): + await self.task_jid(jid) + message = " ".join(msg["body"].split()) - message = message.lower() - if message.startswith("help"): - action = print_help() - # NOTE: Might not need it - # elif message.startswith("add "): - # url = message[4:] - elif message.startswith("http"): - url = message - action = await initdb(jid, datahandler.add_feed, url) - # action = "> " + message + "\n" + action - elif message.startswith("quantum "): - key = message[:7] - val = message[8:] - # action = "Every update will contain {} news items.".format(action) - action = await initdb(jid, sqlitehandler.set_settings_value, [key, val]) - await self.refresh_task(jid, key, val) - elif message.startswith("interval "): - key = message[:8] - val = message[9:] - # action = "Updates will be sent every {} minutes.".format(action) - action = await initdb(jid, sqlitehandler.set_settings_value, [key, val]) - await self.refresh_task(jid, key, val) - elif message.startswith("list"): - action = await initdb(jid, sqlitehandler.list_subscriptions) - elif message.startswith("recent "): - num = message[7:] - action = await initdb(jid, sqlitehandler.last_entries, num) - elif message.startswith("remove "): - ix = message[7:] - action = await initdb(jid, sqlitehandler.remove_feed, ix) - elif message.startswith("search "): - query = message[7:] - action = await initdb(jid, sqlitehandler.search_entries, query) - elif message.startswith("start"): - # action = "Updates are enabled." - key = "enabled" - val = 1 - actiona = await initdb(jid, sqlitehandler.set_settings_value, [key, val]) - asyncio.create_task(self.task_jid(jid)) - # print(print_time(), "task_manager[jid]") - # print(task_manager[jid]) - elif message.startswith("stats"): - action = await initdb(jid, sqlitehandler.statistics) - elif message.startswith("status "): - ix = message[7:] - action = await initdb(jid, sqlitehandler.toggle_status, ix) - elif message.startswith("stop"): + message_lowercase = message.lower() + + print(await datetimehandler.current_time(), "ACCOUNT: " + str(msg["from"])) + print(await datetimehandler.current_time(), "COMMAND:", message) + + match message_lowercase: + case "help": + action = print_help() + case _ if message_lowercase in ["greetings", "hello", "hey"]: + action = ( + "Greeting! I'm Slixfeed The News Bot!" + "\n" + "Send a URL of a news website to start." + ) + case _ if message_lowercase.startswith("add"): + message = message[4:] + url = message.split(" ")[0] + title = " ".join(message.split(" ")[1:]) + if url.startswith("http"): + action = await datahandler.initdb( + jid, + datahandler.add_feed_no_check, + [url, title] + ) + await self.send_status(jid) + else: + action = "Missing URL." + case _ if message_lowercase.startswith("allow"): + key = "filter-" + message[:5] + val = message[6:] + if val: + keywords = await datahandler.initdb( + jid, + sqlitehandler.get_settings_value, + key + ) + val = await filterhandler.set_filter( + val, + keywords + ) + await datahandler.initdb( + jid, + sqlitehandler.set_settings_value, + [key, val] + ) + action = ( + "Approved keywords\n" + "```\n{}\n```" + ).format(val) + else: + action = "Missing keywords." + case _ if message_lowercase.startswith("deny"): + key = "filter-" + message[:4] + val = message[5:] + if val: + keywords = await datahandler.initdb( + jid, + sqlitehandler.get_settings_value, + key + ) + val = await filterhandler.set_filter( + val, + keywords + ) + await datahandler.initdb( + jid, + sqlitehandler.set_settings_value, + [key, val] + ) + action = ( + "Rejected keywords\n" + "```\n{}\n```" + ).format(val) + else: + action = "Missing keywords." + case _ if message_lowercase.startswith("http"): + url = message + action = await datahandler.initdb( + jid, + datahandler.add_feed, + url + ) + # action = "> " + message + "\n" + action + await self.send_status(jid) + case _ if message_lowercase.startswith("feeds"): + query = message[6:] + if query: + if len(query) > 3: + action = await datahandler.initdb( + jid, + sqlitehandler.search_feeds, + query + ) + else: + action = ( + "Enter at least 4 characters to search" + ) + else: + action = await datahandler.initdb( + jid, + sqlitehandler.list_feeds + ) + case _ if message_lowercase.startswith("interval"): + # FIXME + # The following error occurs only upon first attempt to set interval. + # /usr/lib/python3.11/asyncio/events.py:73: RuntimeWarning: coroutine 'Slixfeed.send_update' was never awaited + # self._args = None + # RuntimeWarning: Enable tracemalloc to get the object allocation traceback + key = message[:8] + val = message[9:] + if val: + # action = ( + # "Updates will be sent every {} minutes." + # ).format(action) + await datahandler.initdb( + jid, + sqlitehandler.set_settings_value, + [key, val] + ) + await self.refresh_task( + jid, + self.send_update, + key, + val + ) + action = ( + "Updates will be sent every {} minutes." + ).format(val) + else: + action = "Missing value." + case _ if message_lowercase.startswith("next"): + num = message[5:] + await self.send_update(jid, num) + await self.send_status(jid) + # await self.refresh_task(jid, key, val) + case _ if message_lowercase.startswith("quantum"): + key = message[:7] + val = message[8:] + if val: + # action = ( + # "Every update will contain {} news items." + # ).format(action) + await datahandler.initdb( + jid, + sqlitehandler.set_settings_value, + [key, val] + ) + action = ( + "Next update will contain {} news items." + ).format(val) + else: + action = "Missing value." + case _ if message_lowercase.startswith("random"): + action = "Updates will be sent randomly." + case _ if message_lowercase.startswith("recent"): + num = message[7:] + if num: + action = await datahandler.initdb( + jid, + sqlitehandler.last_entries, + num + ) + else: + action = "Missing value." + case _ if message_lowercase.startswith("remove"): + ix = message[7:] + if ix: + action = await datahandler.initdb( + jid, + sqlitehandler.remove_feed, + ix + ) + await self.send_status(jid) + else: + action = "Missing feed ID." + case _ if message_lowercase.startswith("search"): + query = message[7:] + if query: + if len(query) > 1: + action = await datahandler.initdb( + jid, + sqlitehandler.search_entries, + query + ) + else: + action = ( + "Enter at least 2 characters to search" + ) + else: + action = "Missing search query." + case "start": + # action = "Updates are enabled." + key = "enabled" + val = 1 + await datahandler.initdb( + jid, + sqlitehandler.set_settings_value, + [key, val] + ) + asyncio.create_task(self.task_jid(jid)) + action = "Updates are enabled." + # print(await datetimehandler.current_time(), "task_manager[jid]") + # print(task_manager[jid]) + case "stats": + action = await datahandler.initdb( + jid, + sqlitehandler.statistics + ) + case _ if message_lowercase.startswith("status "): + ix = message[7:] + action = await datahandler.initdb( + jid, + sqlitehandler.toggle_status, + ix + ) + case "stop": + # FIXME + # The following error occurs only upon first attempt to stop. + # /usr/lib/python3.11/asyncio/events.py:73: RuntimeWarning: coroutine 'Slixfeed.send_update' was never awaited + # self._args = None + # RuntimeWarning: Enable tracemalloc to get the object allocation traceback # action = "Updates are disabled." - try: - task_manager[jid]["check"].cancel() - # task_manager[jid]["status"].cancel() - task_manager[jid]["interval"].cancel() + # try: + # # task_manager[jid]["check"].cancel() + # # task_manager[jid]["status"].cancel() + # task_manager[jid]["interval"].cancel() + # key = "enabled" + # val = 0 + # action = await datahandler.initdb( + # jid, + # sqlitehandler.set_settings_value, + # [key, val] + # ) + # except: + # action = "Updates are already disabled." + # # print("Updates are already disabled. Nothing to do.") + # # await self.send_status(jid) key = "enabled" val = 0 - actiona = await initdb(jid, sqlitehandler.set_settings_value, [key, val]) - await self.send_status(jid) - print(print_time(), "task_manager[jid]") - print(task_manager[jid]) - except: - # action = "Updates are already disabled." - await self.send_status(jid) - else: - action = "Unknown command. Press \"help\" for list of commands" + await datahandler.initdb( + jid, + sqlitehandler.set_settings_value, + [key, val] + ) + await self.task_jid(jid) + action = "Updates are disabled." + case "support": + # TODO Send an invitation. + action = "xmpp:slixmpp@muc.poez.io?join" + case _: + action = ( + "Unknown command. " + "Press \"help\" for list of commands" + ) if action: msg.reply(action).send() - print(print_time(), "COMMAND ACCOUNT") - print("COMMAND:", message) - print("ACCOUNT: " + str(msg["from"])) - async def select_file(self): """ Initiate actions by JID (Jabber ID). - :param self: Self + Parameters + ---------- + self : ? + Self. """ while True: db_dir = confighandler.get_default_dbdir() if not os.path.isdir(db_dir): - msg = ("Slixfeed can not work without a database. \n" - "To create a database, follow these steps: \n" - "Add Slixfeed contact to your roster \n" - "Send a feed to the bot by: \n" - "add https://reclaimthenet.org/feed/") - print(print_time(), msg) + msg = ( + "Slixfeed can not work without a database.\n" + "To create a database, follow these steps:\n" + "Add Slixfeed contact to your roster.\n" + "Send a feed to the bot by URL:\n" + "https://reclaimthenet.org/feed/" + ) + # print(await datetimehandler.current_time(), msg) print(msg) else: os.chdir(db_dir) @@ -191,114 +409,165 @@ class Slixfeed(slixmpp.ClientXMPP): # jid_tasker[jid] = asyncio.create_task(self.task_jid(jid)) # await jid_tasker[jid] async with asyncio.TaskGroup() as tg: - print("main task") - print(print_time(), "repr(tg)") - print(repr(tg)) # for file in files: if file.endswith(".db") and not file.endswith(".db-jour.db"): jid = file[:-3] - tg.create_task(self.task_jid(jid)) + main_task.extend([tg.create_task(self.task_jid(jid))]) + # main_task = [tg.create_task(self.task_jid(jid))] # task_manager.update({jid: tg}) - # print(task_manager) # {} - print(print_time(), "repr(tg) id(tg)") - print(jid, repr(tg)) # sch@pimux.de - print(jid, id(tg)) # sch@pimux.de 139879835500624 - # - # 139879835500624 async def task_jid(self, jid): """ JID (Jabber ID) task manager. - :param self: Self - :param jid: Jabber ID + Parameters + ---------- + self : ? + Self. + jid : str + Jabber ID. """ - enabled = await initdb( + enabled = await datahandler.initdb( jid, sqlitehandler.get_settings_value, "enabled" ) - print(print_time(), "enabled", enabled, jid) + # print(await datetimehandler.current_time(), "enabled", enabled, jid) if enabled: - print("sub task") - print(print_time(), "repr(self) id(self)") - print(repr(self)) - print(id(self)) task_manager[jid] = {} - task_manager[jid]["check"] = asyncio.create_task(check_updates(jid)) - task_manager[jid]["status"] = asyncio.create_task(self.send_status(jid)) - task_manager[jid]["interval"] = asyncio.create_task(self.send_update(jid)) + task_manager[jid]["check"] = asyncio.create_task( + check_updates(jid) + ) + task_manager[jid]["status"] = asyncio.create_task( + self.send_status(jid) + ) + task_manager[jid]["interval"] = asyncio.create_task( + self.send_update(jid) + ) await task_manager[jid]["check"] await task_manager[jid]["status"] await task_manager[jid]["interval"] - print(print_time(), "task_manager[jid].items()") - print(task_manager[jid].items()) - print(print_time(), "task_manager[jid]") - print(task_manager[jid]) - print(print_time(), "task_manager") - print(task_manager) else: + # FIXME + # The following error occurs only upon first attempt to stop. + # /usr/lib/python3.11/asyncio/events.py:73: RuntimeWarning: coroutine 'Slixfeed.send_update' was never awaited + # self._args = None + # RuntimeWarning: Enable tracemalloc to get the object allocation traceback + try: + task_manager[jid]["interval"].cancel() + except: + None await self.send_status(jid) - async def send_update(self, jid): + + async def send_update(self, jid, num=None): """ Send news items as messages. - :param self: Self - :param jid: Jabber ID + Parameters + ---------- + self : ? + Self. + jid : str + Jabber ID. + num : str, optional + Number. The default is None. """ - new = await initdb( + # print("Starting send_update()") + # print(jid) + new = await datahandler.initdb( jid, - sqlitehandler.get_entry_unread + sqlitehandler.get_entry_unread, + num ) if new: - print(print_time(), "> SEND UPDATE",jid) + print(await datetimehandler.current_time(), "> SEND UPDATE",jid) self.send_message( mto=jid, mbody=new, mtype="chat" ) - interval = await initdb( + await self.refresh_task( jid, - sqlitehandler.get_settings_value, + self.send_update, "interval" - ) + ) + # interval = await datahandler.initdb( + # jid, + # sqlitehandler.get_settings_value, + # "interval" + # ) + # task_manager[jid]["interval"] = loop.call_at( + # loop.time() + 60 * interval, + # loop.create_task, + # self.send_update(jid) + # ) + + # print(await datetimehandler.current_time(), "asyncio.get_event_loop().time()") + # print(await datetimehandler.current_time(), asyncio.get_event_loop().time()) # await asyncio.sleep(60 * interval) - self.loop.call_at( - self.loop.time() + 60 * interval, - self.loop.create_task, - self.send_update(jid) - ) + + # loop.call_later( + # 60 * interval, + # loop.create_task, + # self.send_update(jid) + # ) + + # print + # await handle_event() + async def send_status(self, jid): """ Send status message. - :param self: Self - :param jid: Jabber ID + Parameters + ---------- + self : ? + Self. + jid : str + Jabber ID. """ - print(print_time(), "> SEND STATUS",jid) - unread = await initdb( - jid, - sqlitehandler.get_number_of_entries_unread - ) - - if unread: - status_text = "📰 News items: {}".format(str(unread)) - status_mode = "chat" - else: - status_text = "🗞 No News" - status_mode = "available" - - enabled = await initdb( + print(await datetimehandler.current_time(), "> SEND STATUS",jid) + enabled = await datahandler.initdb( jid, sqlitehandler.get_settings_value, "enabled" ) - if not enabled: status_mode = "xa" + status_text = "Send \"Start\" to receive news." + else: + feeds = await datahandler.initdb( + jid, + sqlitehandler.get_number_of_items, + "feeds" + ) + if not feeds: + status_mode = "available" + status_text = ( + "📂️ Send a URL from a blog or a news website." + ) + else: + unread = await datahandler.initdb( + jid, + sqlitehandler.get_number_of_entries_unread + ) + if unread: + status_mode = "chat" + status_text = ( + "📰 You have {} news items to read." + ).format(str(unread)) + # status_text = ( + # "📰 News items: {}" + # ).format(str(unread)) + # status_text = ( + # "📰 You have {} news items" + # ).format(str(unread)) + else: + status_mode = "available" + status_text = "🗞 No news" # print(status_text, "for", jid) self.send_presence( @@ -306,37 +575,55 @@ class Slixfeed(slixmpp.ClientXMPP): pstatus=status_text, pto=jid, #pfrom=None - ) - - await asyncio.sleep(60 * 20) - - # self.loop.call_at( - # self.loop.time() + 60 * 20, - # self.loop.create_task, + ) + # await asyncio.sleep(60 * 20) + await self.refresh_task( + jid, + self.send_status, + "status", + "20" + ) + # loop.call_at( + # loop.time() + 60 * 20, + # loop.create_task, # self.send_status(jid) # ) - async def refresh_task(self, jid, key, val): + async def refresh_task(self, jid, callback, key, val=None): """ - Apply settings on runtime. + Apply new setting at runtime. - :param self: Self - :param jid: Jabber ID - :param key: Key - :param val: Value + Parameters + ---------- + self : ? + Self. + jid : str + Jabber ID. + key : str + Key. + val : str, optional + Value. The default is None. """ + if not val: + val = await datahandler.initdb( + jid, + sqlitehandler.get_settings_value, + key + ) if jid in task_manager: task_manager[jid][key].cancel() - loop = asyncio.get_event_loop() - print(print_time(), "loop") - print(loop) - print(print_time(), "loop") task_manager[jid][key] = loop.call_at( loop.time() + 60 * float(val), loop.create_task, - self.send_update(jid) + callback(jid) + # self.send_update(jid) ) + # task_manager[jid][key] = loop.call_later( + # 60 * float(val), + # loop.create_task, + # self.send_update(jid) + # ) # task_manager[jid][key] = self.send_update.loop.call_at( # self.send_update.loop.time() + 60 * val, # self.send_update.loop.create_task, @@ -350,16 +637,19 @@ async def check_updates(jid): """ Start calling for update check up. - :param jid: Jabber ID + Parameters + ---------- + jid : str + Jabber ID. """ while True: - print(print_time(), "> CHCK UPDATE",jid) - await initdb(jid, datahandler.download_updates) + print(await datetimehandler.current_time(), "> CHCK UPDATE",jid) + await datahandler.initdb(jid, datahandler.download_updates) await asyncio.sleep(60 * 90) # Schedule to call this function again in 90 minutes - # self.loop.call_at( - # self.loop.time() + 60 * 90, - # self.loop.create_task, + # loop.call_at( + # loop.time() + 60 * 90, + # loop.create_task, # self.check_updates(jid) # ) @@ -367,84 +657,123 @@ async def check_updates(jid): def print_help(): """ Print help manual. + + Returns + ------- + msg : str + Message. """ - msg = ("Slixfeed - News syndication bot for Jabber/XMPP \n" - "\n" - "DESCRIPTION: \n" - " Slixfeed is a news aggregator bot for online news feeds. \n" - " Supported filetypes: Atom, RDF and RSS. \n" - "\n" - "BASIC USAGE: \n" - " start \n" - " Enable bot and send updates. \n" - " Stop \n" - " Disable bot and stop updates. \n" - " batch N \n" - " Send N updates for each interval. \n" - " interval N \n" - " Send an update every N minutes. \n" - " feed list \n" - " List subscriptions. \n" - "\n" - "EDIT OPTIONS: \n" - " add URL \n" - " Add URL to subscription list. \n" - " remove ID \n" - " Remove feed from subscription list. \n" - " status ID \n" - " Toggle update status of feed. \n" - "\n" - "SEARCH OPTIONS: \n" - " search TEXT \n" - " Search news items by given keywords. \n" - " recent N \n" - " List recent N news items (up to 50 items). \n" - "\n" - "STATISTICS OPTIONS: \n" - " analyses \n" - " Show report and statistics of feeds. \n" - " obsolete \n" - " List feeds that are not available. \n" - " unread \n" - " Print number of unread news items. \n" - "\n" - "BACKUP OPTIONS: \n" - " export opml \n" - " Send an OPML file with your feeds. \n" - " backup news html\n" - " Send an HTML formatted file of your news items. \n" - " backup news md \n" - " Send a Markdown file of your news items. \n" - " backup news text \n" - " Send a Plain Text file of your news items. \n" - "\n" - "DOCUMENTATION: \n" - " Slixfeed \n" - " https://gitgud.io/sjehuda/slixfeed \n" - " Slixmpp \n" - " https://slixmpp.readthedocs.io/ \n" - " feedparser \n" - " https://pythonhosted.org/feedparser") + msg = ( + "```\n" + "NAME\n" + "Slixfeed - News syndication bot for Jabber/XMPP\n" + "\n" + "DESCRIPTION\n" + " Slixfeed is a news aggregator bot for online news feeds.\n" + " This program is primarily designed for XMPP.\n" + " For more information, visit https://xmpp.org/software/\n" + "\n" + "BASIC USAGE\n" + " start\n" + " Enable bot and send updates.\n" + " stop\n" + " Disable bot and stop updates.\n" + " feeds\n" + " List subscriptions.\n" + " interval N\n" + " Set interval update to every N minutes.\n" + " next N\n" + " Send N next updates.\n" + " quantum N\n" + " Set N updates for each interval.\n" + "\n" + "FILTER OPTIONS\n" + " allow\n" + " Keywords to allow (comma separates).\n" + " deny\n" + " Keywords to block (comma separates).\n" + # " filter clear allow\n" + # " Reset allow list.\n" + # " filter clear deny\n" + # " Reset deny list.\n" + "\n" + "EDIT OPTIONS\n" + " URL\n" + " Add URL to subscription list.\n" + " add URL TITLE\n" + " Add URL to subscription list (without validity check).\n" + " remove ID\n" + " Remove feed from subscription list.\n" + " status ID\n" + " Toggle update status of feed.\n" + "\n" + "SEARCH OPTIONS\n" + " feeds TEXT\n" + " Search subscriptions by given keywords.\n" + " search TEXT\n" + " Search news items by given keywords.\n" + " recent N\n" + " List recent N news items (up to 50 items).\n" + "\n" + # "STATISTICS OPTIONS\n" + # " analyses\n" + # " Show report and statistics of feeds.\n" + # " obsolete\n" + # " List feeds that are not available.\n" + # " unread\n" + # " Print number of unread news items.\n" + # "\n" + # "BACKUP OPTIONS\n" + # " export opml\n" + # " Send an OPML file with your feeds.\n" + # " backup news html\n" + # " Send an HTML formatted file of your news items.\n" + # " backup news md\n" + # " Send a Markdown file of your news items.\n" + # " backup news text\n" + # " Send a Plain Text file of your news items.\n" + # "\n" + "SUPPORT\n" + " support" + " Join xmpp:slixmpp@muc.poez.io?join\n" + "\n" + # "PROTOCOLS\n" + # " Supported prootcols are IRC, Matrix and XMPP.\n" + # " For the best experience, we recommend you to use XMPP.\n" + # "\n" + "FILETYPES\n" + " Supported filetypes are Atom, RDF and RSS.\n" + "\n" + "AUTHORS\n" + " Laura Harbinger, Schimon Zackary.\n" + "\n" + "COPYRIGHT\n" + " Slixfeed is free software; you can redistribute it and/or\n" + " modify it under the terms of the GNU General Public License\n" + " as published by the Free Software Foundation; version 3 only\n" + "\n" + " Slixfeed is distributed in the hope that it will be useful,\n" + " but WITHOUT ANY WARRANTY; without even the implied warranty of\n" + " MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" + " GNU General Public License for more details.\n" + "\n" + "NOTE\n" + " Make Slixfeed your own.\n" + "\n" + " You can run Slixfeed on your own computer, server, and\n" + " even on a Linux phone (i.e. Droidian, Mobian NixOS,\n" + " postmarketOS). You can also use Termux.\n" + "\n" + " All you need is one of the above and an XMPP account to\n" + " connect Slixfeed to.\n" + "\n" + "DOCUMENTATION\n" + " Slixfeed\n" + " https://gitgud.io/sjehuda/slixfeed\n" + " Slixmpp\n" + " https://slixmpp.readthedocs.io/\n" + " feedparser\n" + " https://pythonhosted.org/feedparser\n" + "\n```" + ) return msg - - -# TODO Perhaps this needs to be executed -# just once per program execution -async def initdb(jid, callback, message=None): - """ - Callback function to instantiate action on database. - - :param jid: JID (Jabber ID). - :param callback: Function name. - :param massage: Optional kwarg when a message is a part or required argument. - """ - db_dir = confighandler.get_default_dbdir() - if not os.path.isdir(db_dir): - os.mkdir(db_dir) - db_file = os.path.join(db_dir, r"{}.db".format(jid)) - sqlitehandler.create_tables(db_file) - # await sqlitehandler.set_default_values(db_file) - if message: - return await callback(db_file, message) - else: - return await callback(db_file)