From dbe9ec3073045003f43ac470b12c4dbe9dc5bcae Mon Sep 17 00:00:00 2001 From: Schimon Jehudah Date: Sat, 6 Jan 2024 22:03:08 +0000 Subject: [PATCH] Restructure modules and database. Add OPML import functionality. Minor improvements. --- slixfeed.py | 83 +-- slixfeed/__main__.py | 5 +- slixfeed/action.py | 559 ++++++++++++++++++-- slixfeed/config.py | 17 +- slixfeed/crawl.py | 9 +- slixfeed/datetime.py | 7 + slixfeed/export.py | 18 - slixfeed/fetch.py | 15 +- slixfeed/filter.py | 152 ------ slixfeed/log.py | 33 -- slixfeed/opml.py | 75 --- slixfeed/sqlite.py | 1051 ++++++++++++++++++++++++-------------- slixfeed/task.py | 19 +- slixfeed/url.py | 41 +- slixfeed/xmpp/compose.py | 194 ------- slixfeed/xmpp/process.py | 150 +++--- 16 files changed, 1352 insertions(+), 1076 deletions(-) delete mode 100644 slixfeed/export.py delete mode 100644 slixfeed/filter.py delete mode 100644 slixfeed/log.py delete mode 100644 slixfeed/opml.py delete mode 100644 slixfeed/xmpp/compose.py diff --git a/slixfeed.py b/slixfeed.py index 91a35b4..9f58311 100644 --- a/slixfeed.py +++ b/slixfeed.py @@ -18,18 +18,20 @@ # # SPDX-License-Identifier: MIT -from slixfeed.__main__ import Jabber -from slixfeed.xmpp.client import Slixfeed -from slixfeed.config import get_default_confdir from argparse import ArgumentParser -import configparser -# import filehandler -# from filehandler import get_default_confdir from getpass import getpass import logging -import os +from slixfeed.__main__ import Jabber +from slixfeed.config import get_value +from slixfeed.xmpp.client import Slixfeed import sys +# import socks +# import socket +# # socks.set_default_proxy(socks.SOCKS5, values[0], values[1]) +# socks.set_default_proxy(socks.SOCKS5, 'localhost', 9050) +# socket.socket = socks.socksocket + if __name__ == '__main__': # Setup the command line arguments. @@ -37,65 +39,34 @@ if __name__ == '__main__': # Output verbosity options. parser.add_argument( - "-q", - "--quiet", - help="set logging to ERROR", - action="store_const", - dest="loglevel", - const=logging.ERROR, - default=logging.INFO - ) + "-q", "--quiet", help="set logging to ERROR", + action="store_const", dest="loglevel", + const=logging.ERROR, default=logging.INFO) parser.add_argument( - "-d", - "--debug", - help="set logging to DEBUG", - action="store_const", - dest="loglevel", - const=logging.DEBUG, - default=logging.INFO - ) + "-d", "--debug", help="set logging to DEBUG", + action="store_const", dest="loglevel", + const=logging.DEBUG, default=logging.INFO) # JID and password options. parser.add_argument( - "-j", - "--jid", - dest="jid", - help="Jabber ID" - ) + "-j", "--jid", dest="jid", help="Jabber ID") parser.add_argument( - "-p", - "--password", - dest="password", - help="Password of JID" - ) + "-p", "--password", dest="password", help="Password of JID") parser.add_argument( - "-n", - "--nickname", - dest="nickname", - help="Display name" - ) + "-n", "--nickname", dest="nickname", help="Display name") args = parser.parse_args() # Setup logging. logging.basicConfig( - level=args.loglevel, - format='%(levelname)-8s %(message)s' - ) + level=args.loglevel, format='%(levelname)-8s %(message)s') # Try configuration file - config = configparser.RawConfigParser() - config_dir = get_default_confdir() - if not os.path.isdir(config_dir): - os.mkdir(config_dir) - # TODO Copy file from /etc/slixfeed/ or /usr/share/slixfeed/ - config_file = os.path.join(config_dir, r"accounts.ini") - config.read(config_file) - if config.has_section("XMPP"): - xmpp = config["XMPP"] - nickname = xmpp["nickname"] - username = xmpp["username"] - password = xmpp["password"] + values = get_value( + "accounts", "XMPP", ["nickname", "username", "password"]) + nickname = values[0] + username = values[1] + password = values[2] # Use arguments if were given if args.jid: @@ -106,11 +77,11 @@ if __name__ == '__main__': nickname = args.nickname # Prompt for credentials if none were given - if username is None: + if not username: username = input("Username: ") - if password is None: + if not password: password = getpass("Password: ") - if nickname is None: + if not nickname: nickname = input("Nickname: ") Jabber(username, password, nickname) diff --git a/slixfeed/__main__.py b/slixfeed/__main__.py index 8275b24..cb134c8 100644 --- a/slixfeed/__main__.py +++ b/slixfeed/__main__.py @@ -39,7 +39,10 @@ TODO 9) Readability See project /buriy/python-readability -10) Download and upload/send article (xHTML, HTMLZ, Markdown, MHTML, TXT). +9.1) IDEA: Bot to display Title and Excerpt + (including sending a PDF version of it) of posted link + +10) Download and upload/send article (xHTML, HTMLZ, Markdown, MHTML, PDF, TXT). 11) Fetch summary from URL, instead of storing summary, or Store 5 upcoming summaries. diff --git a/slixfeed/action.py b/slixfeed/action.py index f01ebba..e1fe7fd 100644 --- a/slixfeed/action.py +++ b/slixfeed/action.py @@ -1,41 +1,391 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +""" + +TODO + +1) Call sqlite function from function statistics. + Returning a list of values doesn't' seem to be a good practice. + +""" + from asyncio.exceptions import IncompleteReadError from bs4 import BeautifulSoup from http.client import IncompleteRead from feedparser import parse +import logging import slixfeed.config as config import slixfeed.crawl as crawl -from slixfeed.datetime import now, rfc2822_to_iso8601 +from slixfeed.datetime import ( + current_date, current_time, now, + convert_struct_time_to_iso8601, + rfc2822_to_iso8601 + ) import slixfeed.fetch as fetch import slixfeed.sqlite as sqlite -import slixfeed.read as read -import slixfeed.task as task -from slixfeed.url import complete_url, join_url, trim_url +from slixfeed.url import ( + # complete_url, + join_url, + remove_tracking_parameters, + replace_hostname, + trim_url + ) +import slixfeed.xmpp.bookmark as bookmark from urllib import error from urllib.parse import urlsplit +import xml.etree.ElementTree as ET + + +def log_to_markdown(timestamp, filename, jid, message): + """ + Log message to file. + + Parameters + ---------- + timestamp : str + Time stamp. + filename : str + Jabber ID as name of file. + jid : str + Jabber ID. + message : str + Message content. + + Returns + ------- + None. + + """ + with open(filename + '.md', 'a') as file: + # entry = "{} {}:\n{}\n\n".format(timestamp, jid, message) + entry = ( + "## {}\n" + "### {}\n\n" + "{}\n\n").format(jid, timestamp, message) + file.write(entry) + + +def is_feed(feed): + """ + Determine whether document is feed or not. + + Parameters + ---------- + feed : dict + Parsed feed. + + Returns + ------- + val : boolean + True or False. + """ + print("Check function action.is_feed") + breakpoint() + value = False + message = None + if not feed.entries: + if "version" in feed.keys(): + feed["version"] + if feed.version: + value = True + # message = ( + # "Empty feed for {}" + # ).format(url) + elif "title" in feed["feed"].keys(): + value = True + # message = ( + # "Empty feed for {}" + # ).format(url) + else: + value = False + # message = ( + # "No entries nor title for {}" + # ).format(url) + elif feed.bozo: + value = False + # message = ( + # "Bozo detected for {}" + # ).format(url) + else: + value = True + # message = ( + # "Good feed for {}" + # ).format(url) + print(message) + return value + + +def list_unread_entries(result, feed_title): + # TODO Add filtering + # TODO Do this when entry is added to list and mark it as read + # DONE! + # results = [] + # if get_settings_value(db_file, "filter-deny"): + # while len(results) < num: + # result = cur.execute(sql).fetchone() + # blacklist = await get_settings_value(db_file, "filter-deny").split(",") + # for i in blacklist: + # if i in result[1]: + # continue + # print("rejected:", result[1]) + # print("accepted:", result[1]) + # results.extend([result]) + + # news_list = "You've got {} news items:\n".format(num) + # NOTE Why doesn't this work without list? + # i.e. for result in results + # for result in results.fetchall(): + ix = result[0] + title = result[1] + # # TODO Retrieve summary from feed + # # See fetch.view_entry + # summary = result[2] + # # Remove HTML tags + # try: + # summary = BeautifulSoup(summary, "lxml").text + # except: + # print(result[2]) + # breakpoint() + # # TODO Limit text length + # summary = summary.replace("\n\n\n", "\n\n") + # length = await get_settings_value(db_file, "length") + # summary = summary[:length] + " […]" + # summary = summary.strip().split('\n') + # summary = ["> " + line for line in summary] + # summary = "\n".join(summary) + link = result[2] + link = remove_tracking_parameters(link) + link = (replace_hostname(link, "link")) or link + news_item = ( + "\n{}\n{}\n{}\n" + ).format(str(title), str(link), str(feed_title)) + return news_item + + +def list_search_results(query, results): + results_list = ( + "Search results for '{}':\n\n```" + ).format(query) + for result in results: + results_list += ( + "\n{}\n{}\n" + ).format(str(result[0]), str(result[1])) + if len(results): + return results_list + "```\nTotal of {} results".format(len(results)) + else: + return "No results were found for: {}".format(query) + + +def list_feeds_by_query(query, results): + results_list = ( + "Feeds containing '{}':\n\n```" + ).format(query) + for result in results: + results_list += ( + "\nName : {} [{}]" + "\nURL : {}" + "\n" + ).format( + str(result[0]), str(result[1]), str(result[2])) + if len(results): + return results_list + "\n```\nTotal of {} feeds".format(len(results)) + else: + return "No feeds were found for: {}".format(query) + + +def list_statistics(values): + """ + Return table statistics. + + Parameters + ---------- + db_file : str + Path to database file. + + Returns + ------- + msg : str + Statistics as message. + """ + msg = ( + "```" + "\nSTATISTICS\n" + "News items : {}/{}\n" + "News sources : {}/{}\n" + "\nOPTIONS\n" + "Items to archive : {}\n" + "Update interval : {}\n" + "Items per update : {}\n" + "Operation status : {}\n" + "```" + ).format(values[0], values[1], values[2], values[3], + values[4], values[5], values[6], values[7]) + return msg + + +# FIXME Replace counter by len +def list_last_entries(results, num): + titles_list = "Recent {} titles:\n\n```".format(num) + for result in results: + titles_list += ( + "\n{}\n{}\n" + ).format( + str(result[0]), str(result[1])) + if len(results): + titles_list += "```\n" + return titles_list + else: + return "There are no news at the moment." + + +def list_feeds(results): + feeds_list = "\nList of subscriptions:\n\n```\n" + for result in results: + feeds_list += ( + "Name : {}\n" + "URL : {}\n" + # "Updated : {}\n" + # "Status : {}\n" + "ID : {}\n" + "\n" + ).format( + str(result[0]), str(result[1]), str(result[2])) + if len(results): + return feeds_list + ( + "```\nTotal of {} subscriptions.\n" + ).format(len(results)) + else: + msg = ( + "List of subscriptions is empty.\n" + "To add feed, send a URL\n" + "Try these:\n" + # TODO Pick random from featured/recommended + "https://reclaimthenet.org/feed/" + ) + return msg + + +async def list_bookmarks(self): + conferences = await bookmark.get(self) + groupchat_list = "\nList of groupchats:\n\n```\n" + counter = 0 + for conference in conferences: + counter += 1 + groupchat_list += ( + "{}\n" + "\n" + ).format( + conference["jid"] + ) + groupchat_list += ( + "```\nTotal of {} groupchats.\n" + ).format(counter) + return groupchat_list + + +def export_to_markdown(jid, filename, results): + with open(filename, 'w') as file: + file.write( + '# Subscriptions for {}\n'.format(jid)) + file.write( + '## Set of feeds exported with Slixfeed\n') + for result in results: + file.write( + '- [{}]({})\n'.format(result[0], result[1])) + file.write( + '\n\n* * *\n\nThis list was saved on {} from xmpp:{} using ' + '[Slixfeed](https://gitgud.io/sjehuda/slixfeed)\n'.format( + current_date(), jid)) + + +def export_to_opml(jid, filename, results): + root = ET.Element("opml") + root.set("version", "1.0") + head = ET.SubElement(root, "head") + ET.SubElement(head, "title").text = "Subscriptions for {}".format(jid) + ET.SubElement(head, "description").text = ( + "Set of feeds exported with Slixfeed") + ET.SubElement(head, "generator").text = "Slixfeed" + ET.SubElement(head, "urlPublic").text = ( + "https://gitgud.io/sjehuda/slixfeed") + time_stamp = current_time() + ET.SubElement(head, "dateCreated").text = time_stamp + ET.SubElement(head, "dateModified").text = time_stamp + body = ET.SubElement(root, "body") + for result in results: + outline = ET.SubElement(body, "outline") + outline.set("text", result[0]) + outline.set("xmlUrl", result[1]) + # outline.set("type", result[2]) + tree = ET.ElementTree(root) + tree.write(filename) + + +async def import_opml(db_file, url): + result = await fetch.download_feed(url) + document = result[0] + if document: + root = ET.fromstring(document) + before = await sqlite.get_number_of_items( + db_file, 'feeds') + feeds = [] + for child in root.findall(".//outline"): + url = child.get("xmlUrl") + title = child.get("text") + # feed = (url, title) + # feeds.extend([feed]) + feeds.extend([(url, title)]) + await sqlite.import_feeds( + db_file, feeds) + after = await sqlite.get_number_of_items( + db_file, 'feeds') + difference = int(after) - int(before) + return difference async def add_feed(db_file, url): while True: - exist = await sqlite.is_feed_exist(db_file, url) + exist = await sqlite.get_feed_id_and_name(db_file, url) if not exist: result = await fetch.download_feed(url) document = result[0] - status = result[1] + status_code = result[1] if document: feed = parse(document) - # if read.is_feed(url, feed): - if read.is_feed(feed): - try: + # if is_feed(url, feed): + if is_feed(feed): + if "title" in feed["feed"].keys(): title = feed["feed"]["title"] - except: + else: title = urlsplit(url).netloc + if "language" in feed["feed"].keys(): + language = feed["feed"]["language"] + else: + language = '' + if "encoding" in feed.keys(): + encoding = feed["encoding"] + else: + encoding = '' + if "updated_parsed" in feed["feed"].keys(): + updated = feed["feed"]["updated_parsed"] + updated = convert_struct_time_to_iso8601(updated) + else: + updated = '' + version = feed["version"] + entries = len(feed["entries"]) await sqlite.insert_feed( - db_file, url, title, status) - await organize_items( - db_file, [url]) + db_file, url, + title=title, + entries=entries, + version=version, + encoding=encoding, + language=language, + status_code=status_code, + updated=updated + ) + await scan( + db_file, url) old = ( await sqlite.get_settings_value( db_file, "old") @@ -44,7 +394,7 @@ async def add_feed(db_file, url): "settings", "Settings", "old") ) if not old: - await sqlite.mark_source_as_read( + await sqlite.mark_feed_as_read( db_file, url) response = ( "> {}\nNews source {} has been " @@ -66,7 +416,7 @@ async def add_feed(db_file, url): else: response = ( "> {}\nFailed to load URL. Reason: {}" - ).format(url, status) + ).format(url, status_code) break else: ix = exist[0] @@ -87,11 +437,11 @@ async def view_feed(url): status = result[1] if document: feed = parse(document) - # if read.is_feed(url, feed): - if read.is_feed(feed): - try: + # if is_feed(url, feed): + if is_feed(feed): + if "title" in feed["feed"].keys(): title = feed["feed"]["title"] - except: + else: title = urlsplit(url).netloc entries = feed.entries response = "Preview of {}:\n\n```\n".format(title) @@ -156,11 +506,11 @@ async def view_entry(url, num): status = result[1] if document: feed = parse(document) - # if read.is_feed(url, feed): - if read.is_feed(feed): - try: + # if is_feed(url, feed): + if is_feed(feed): + if "title" in feed["feed"].keys(): title = feed["feed"]["title"] - except: + else: title = urlsplit(url).netloc entries = feed.entries num = int(num) - 1 @@ -222,6 +572,113 @@ async def view_entry(url, num): return response +async def scan(db_file, url): + """ + Check feeds for new entries. + + Parameters + ---------- + db_file : str + Path to database file. + url : str, optional + URL. The default is None. + """ + if isinstance(url, tuple): url = url[0] + try: + result = await fetch.download_feed(url) + except: + breakpoint() + # if not result: + # return + try: + document = result[0] + status = result[1] + except: + return + if document and status == 200: + feed = parse(document) + entries = feed.entries + # length = len(entries) + await remove_nonexistent_entries( + db_file, feed, url) + try: + if feed.bozo: + # bozo = ( + # "WARNING: Bozo detected for feed: {}\n" + # "For more information, visit " + # "https://pythonhosted.org/feedparser/bozo.html" + # ).format(url) + # print(bozo) + valid = 0 + else: + valid = 1 + await sqlite.update_feed_validity( + db_file, url, valid) + if "updated_parsed" in feed["feed"].keys(): + updated = feed["feed"]["updated_parsed"] + updated = convert_struct_time_to_iso8601(updated) + else: + updated = '' + await sqlite.update_feed_properties( + db_file, url, len(feed["entries"]), updated) + # await update_feed_status + except ( + IncompleteReadError, + IncompleteRead, + error.URLError + ) as e: + print("Error:", e) + return + # new_entry = 0 + for entry in entries: + if entry.has_key("published"): + date = entry.published + date = rfc2822_to_iso8601(date) + elif entry.has_key("updated"): + date = entry.updated + date = rfc2822_to_iso8601(date) + else: + date = now() + if entry.has_key("link"): + # link = complete_url(source, entry.link) + link = join_url(url, entry.link) + link = trim_url(link) + else: + link = url + # title = feed["feed"]["title"] + # title = "{}: *{}*".format(feed["feed"]["title"], entry.title) + title = entry.title if entry.has_key("title") else date + entry_id = entry.id if entry.has_key("id") else link + summary = entry.summary if entry.has_key("summary") else '' + exist = await sqlite.check_entry_exist( + db_file, url, entry_id=entry_id, + title=title, link=link, date=date) + if not exist: + if entry.has_key("summary"): + summary = entry.summary + read_status = 0 + pathname = urlsplit(link).path + string = ("{} {} {}" + ).format( + title, summary, pathname + ) + allow_list = await config.is_include_keyword( + db_file, "filter-allow", string) + if not allow_list: + reject_list = await config.is_include_keyword( + db_file, "filter-deny", string) + if reject_list: + read_status = 1 + if isinstance(date, int): + logging.error("Variable 'date' is int:", date) + await sqlite.add_entry( + db_file, title, link, entry_id, + url, date, read_status) + await sqlite.set_date(db_file, url) + + + + # NOTE Why (if res[0]) and (if res[1] == 200)? async def organize_items(db_file, urls): """ @@ -236,16 +693,17 @@ async def organize_items(db_file, urls): """ for url in urls: # print(os.path.basename(db_file), url[0]) - source = url[0] - res = await fetch.download_feed(source) + url = url[0] + res = await fetch.download_feed(url) # TypeError: 'NoneType' object is not subscriptable if res is None: # Skip to next feed # urls.next() # next(urls) continue - await sqlite.update_source_status( - db_file, res[1], source) + status = res[1] + await sqlite.update_feed_status( + db_file, url, status) if res[0]: try: feed = parse(res[0]) @@ -254,28 +712,36 @@ async def organize_items(db_file, urls): # "WARNING: Bozo detected for feed: {}\n" # "For more information, visit " # "https://pythonhosted.org/feedparser/bozo.html" - # ).format(source) + # ).format(url) # print(bozo) valid = 0 else: valid = 1 - await sqlite.update_source_validity( - db_file, source, valid) + await sqlite.update_feed_validity( + db_file, url, valid) + if "updated_parsed" in feed["feed"].keys(): + updated = feed["feed"]["updated_parsed"] + updated = convert_struct_time_to_iso8601(updated) + else: + updated = '' + entries = len(feed["entries"]) + await sqlite.update_feed_properties( + db_file, url, entries, updated) except ( IncompleteReadError, IncompleteRead, error.URLError ) as e: - # print(e) + print(e) # TODO Print error to log - None + # None # NOTE I don't think there should be "return" # because then we might stop scanning next URLs # return # TODO Place these couple of lines back down # NOTE Need to correct the SQL statement to do so # NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW - if res[1] == 200: + if status == 200: # NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW # TODO Place these couple of lines back down # NOTE Need to correct the SQL statement to do so @@ -283,7 +749,7 @@ async def organize_items(db_file, urls): # length = len(entries) # await remove_entry(db_file, source, length) await remove_nonexistent_entries( - db_file, feed, source) + db_file, feed, url) # new_entry = 0 for entry in entries: # TODO Pass date too for comparion check @@ -308,19 +774,18 @@ async def organize_items(db_file, urls): # title = feed["feed"]["title"] if entry.has_key("link"): # link = complete_url(source, entry.link) - link = join_url(source, entry.link) + link = join_url(url, entry.link) link = trim_url(link) else: - link = source + link = url if entry.has_key("id"): eid = entry.id else: eid = link exist = await sqlite.check_entry_exist( - db_file, source, eid=eid, + db_file, url, eid=eid, title=title, link=link, date=date) if not exist: - print(url) # new_entry = new_entry + 1 # TODO Enhance summary if entry.has_key("summary"): @@ -356,7 +821,7 @@ async def organize_items(db_file, urls): # summary = "" read_status = 1 entry = ( - title, link, eid, source, date, read_status) + title, link, eid, url, date, read_status) if isinstance(date, int): print("PROBLEM: date is int") print(date) @@ -364,13 +829,13 @@ async def organize_items(db_file, urls): # print(source) # print(date) await sqlite.add_entry_and_set_date( - db_file, source, entry) + db_file, url, entry) # print(current_time(), entry, title) # else: # print(current_time(), exist, title) -async def remove_nonexistent_entries(db_file, feed, source): +async def remove_nonexistent_entries(db_file, feed, url): """ Remove entries that don't exist in a given parsed feed. Check the entries returned from feed and delete read non @@ -382,10 +847,10 @@ async def remove_nonexistent_entries(db_file, feed, source): Path to database file. feed : list Parsed feed document. - source : str + url : str Feed URL. URL of associated feed. """ - items = await sqlite.get_entries_of_source(db_file, feed, source) + items = await sqlite.get_entries_of_feed(db_file, feed, url) entries = feed.entries # breakpoint() for item in items: @@ -409,9 +874,9 @@ async def remove_nonexistent_entries(db_file, feed, source): else: title = feed["feed"]["title"] if entry.has_key("link"): - link = join_url(source, entry.link) + link = join_url(url, entry.link) else: - link = source + link = url if entry.has_key("published") and item[4]: # print("compare11:", title, link, time) # print("compare22:", item[1], item[2], item[4]) @@ -459,11 +924,11 @@ async def remove_nonexistent_entries(db_file, feed, source): # print("link :", item[2]) # print("id :", item[3]) if item[5] == 1: - sqlite.delete_entry_by_id(db_file, ix) + await sqlite.delete_entry_by_id(db_file, ix) # print(">>> DELETING:", item[1]) else: # print(">>> ARCHIVING:", item[1]) - sqlite.archive_entry(db_file, ix) + await sqlite.archive_entry(db_file, ix) limit = ( await sqlite.get_settings_value(db_file, "archive") ) or ( diff --git a/slixfeed/config.py b/slixfeed/config.py index 1732c9f..753307b 100644 --- a/slixfeed/config.py +++ b/slixfeed/config.py @@ -25,6 +25,7 @@ import sys import yaml import logging + def get_value(filename, section, keys): """ Get setting value. @@ -45,7 +46,7 @@ def get_value(filename, section, keys): """ result = None config_res = configparser.RawConfigParser() - config_dir = get_default_confdir() + config_dir = get_default_config_directory() # if not os.path.isdir(config_dir): # config_dir = '/usr/share/slixfeed/' if not os.path.isdir(config_dir): @@ -105,7 +106,7 @@ def get_value_default(filename, section, key): Value. """ config_res = configparser.RawConfigParser() - config_dir = get_default_confdir() + config_dir = get_default_config_directory() if not os.path.isdir(config_dir): config_dir = '/usr/share/slixfeed/' config_file = os.path.join(config_dir, filename + ".ini") @@ -131,7 +132,7 @@ def get_list(filename, key): result : list List of pathnames or keywords. """ - config_dir = get_default_confdir() + config_dir = get_default_config_directory() if not os.path.isdir(config_dir): config_dir = '/usr/share/slixfeed/' config_file = os.path.join(config_dir, filename) @@ -143,7 +144,7 @@ def get_list(filename, key): return result -def get_default_dbdir(): +def get_default_data_directory(): """ Determine the directory path where dbfile will be stored. @@ -183,7 +184,7 @@ def get_default_dbdir(): return os.path.join(data_home, 'slixfeed') -def get_default_confdir(): +def get_default_config_directory(): """ Determine the directory path where configuration will be stored. @@ -233,10 +234,12 @@ def get_pathname_to_database(jid): object Coroutine object. """ - db_dir = get_default_dbdir() + db_dir = get_default_data_directory() if not os.path.isdir(db_dir): os.mkdir(db_dir) - db_file = os.path.join(db_dir, r"{}.db".format(jid)) + if not os.path.isdir(db_dir + "/sqlite"): + os.mkdir(db_dir + "/sqlite") + db_file = os.path.join(db_dir, "sqlite", r"{}.db".format(jid)) sqlite.create_tables(db_file) return db_file # await set_default_values(db_file) diff --git a/slixfeed/crawl.py b/slixfeed/crawl.py index dcfe562..df595ed 100644 --- a/slixfeed/crawl.py +++ b/slixfeed/crawl.py @@ -6,10 +6,14 @@ TODO 1.1) Do not compose messages. + Only return results. + See: # TODO return feeds 1.2) Return URLs, nothing else other (e.g. processed messages). -1.3) Correction of URLs is aceptable. +1.3) NOTE: Correction of URLs is aceptable. + +2) Consider merging with module fetch.py """ @@ -161,6 +165,7 @@ async def feed_mode_request(url, tree): # print(feeds) except: continue + # TODO return feeds if len(feeds) > 1: counter = 0 msg = ( @@ -275,6 +280,7 @@ async def feed_mode_scan(url, tree): # print(feeds) except: continue + # TODO return feeds if len(feeds) > 1: # print(feeds) # breakpoint() @@ -352,6 +358,7 @@ async def feed_mode_auto_discovery(url, tree): # xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href""" # xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href" feeds = tree.xpath(xpath_query) + # TODO return feeds if len(feeds) > 1: msg = ( "RSS Auto-Discovery has found {} feeds:\n\n```\n" diff --git a/slixfeed/datetime.py b/slixfeed/datetime.py index 47a2012..f0332c6 100644 --- a/slixfeed/datetime.py +++ b/slixfeed/datetime.py @@ -9,6 +9,7 @@ from datetime import datetime from dateutil.parser import parse from email.utils import parsedate, parsedate_to_datetime + def now(): """ ISO 8601 Timestamp. @@ -22,6 +23,12 @@ def now(): return date +def convert_struct_time_to_iso8601(struct_time): + date = datetime(*struct_time[:6]) + date = date.isoformat() + return date + + def current_date(): """ Print MM DD, YYYY (Weekday Time) timestamp. diff --git a/slixfeed/export.py b/slixfeed/export.py deleted file mode 100644 index 33c7aba..0000000 --- a/slixfeed/export.py +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -from slixfeed.datetime import current_date - -def markdown(jid, filename, results): - with open(filename, 'w') as file: - file.write( - '# Subscriptions for {}\n'.format(jid)) - file.write( - '## Set of feeds exported with Slixfeed\n') - for result in results: - file.write( - '- [{}]({})\n'.format(result[0], result[1])) - file.write( - '\n\n* * *\n\nThis list was saved on {} from xmpp:{} using ' - '[Slixfeed](https://gitgud.io/sjehuda/slixfeed)\n'.format( - current_date(), jid)) diff --git a/slixfeed/fetch.py b/slixfeed/fetch.py index 383f5f7..a024667 100644 --- a/slixfeed/fetch.py +++ b/slixfeed/fetch.py @@ -25,19 +25,12 @@ TODO from aiohttp import ClientError, ClientSession, ClientTimeout from asyncio import TimeoutError -from asyncio.exceptions import IncompleteReadError -from bs4 import BeautifulSoup -from email.utils import parseaddr -from feedparser import parse -from http.client import IncompleteRead -from lxml import html +# from asyncio.exceptions import IncompleteReadError +# from bs4 import BeautifulSoup +# from http.client import IncompleteRead +# from lxml import html import slixfeed.config as config -from slixfeed.datetime import now, rfc2822_to_iso8601 -import slixfeed.sqlite as sqlite -from slixfeed.url import complete_url, join_url, trim_url -from urllib import error # from xml.etree.ElementTree import ElementTree, ParseError -from urllib.parse import urlsplit, urlunsplit # async def dat(): diff --git a/slixfeed/filter.py b/slixfeed/filter.py deleted file mode 100644 index 9b80620..0000000 --- a/slixfeed/filter.py +++ /dev/null @@ -1,152 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -""" - -TODO - -1) Website-specific filter (i.e. audiobookbay). - -2) Exclude websites from filtering (e.g. metapedia). - -3) Filter phrases: - Refer to sqlitehandler.search_entries for implementation. - It is expected to be more complex than function search_entries. - -""" - -import slixfeed.config as config -import slixfeed.sqlite as sqlite - - -async def add_to_list(newwords, keywords): - """ - Append new keywords to list. - - Parameters - ---------- - newwords : str - List of new keywords. - keywords : str - List of current keywords. - - Returns - ------- - val : str - List of current keywords and new keywords. - """ - if isinstance(keywords, str) or keywords is None: - try: - keywords = keywords.split(",") - except: - keywords = [] - newwords = newwords.lower().split(",") - for word in newwords: - word = word.strip() - if len(word) and word not in keywords: - keywords.extend([word]) - keywords.sort() - val = ",".join(keywords) - return val - - -async def remove_from_list(newwords, keywords): - """ - Remove given keywords from list. - - Parameters - ---------- - newwords : str - List of new keywords. - keywords : str - List of current keywords. - - Returns - ------- - val : str - List of new keywords. - """ - if isinstance(keywords, str) or keywords is None: - try: - keywords = keywords.split(",") - except: - keywords = [] - newwords = newwords.lower().split(",") - for word in newwords: - word = word.strip() - if len(word) and word in keywords: - keywords.remove(word) - keywords.sort() - val = ",".join(keywords) - return val - - -async def is_include_keyword(db_file, key, string): - """ - Check keyword match. - - Parameters - ---------- - db_file : str - Path to database file. - type : str - "allow" or "deny". - string : str - String. - - Returns - ------- - Matched keyword or None. - - """ -# async def reject(db_file, string): -# async def is_blacklisted(db_file, string): - keywords = (await sqlite.get_filters_value(db_file, key)) or '' - keywords = keywords.split(",") - keywords = keywords + (config.get_list("lists.yaml", key)) - for keyword in keywords: - if not keyword or len(keyword) < 2: - continue - if keyword in string.lower(): - # print(">>> ACTIVATE", i) - # return 1 - return keyword - -""" - -This code was tested at module datahandler - -reject = 0 -blacklist = await get_settings_value( - db_file, - "filter-deny" - ) -# print(">>> blacklist:") -# print(blacklist) -# breakpoint() -if blacklist: - blacklist = blacklist.split(",") - # print(">>> blacklist.split") - # print(blacklist) - # breakpoint() - for i in blacklist: - # print(">>> length", len(i)) - # breakpoint() - # if len(i): - if not i or len(i) < 2: - print(">>> continue due to length", len(i)) - # breakpoint() - continue - # print(title) - # print(">>> blacklisted word:", i) - # breakpoint() - test = (title + " " + summary + " " + link) - if i in test.lower(): - reject = 1 - break - -if reject: - print("rejected:",title) - entry = (title, '', link, source, date, 1); - -""" \ No newline at end of file diff --git a/slixfeed/log.py b/slixfeed/log.py deleted file mode 100644 index 0a1032a..0000000 --- a/slixfeed/log.py +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - - -def markdown(timestamp, filename, jid, message): - """ - Log message to file. - - Parameters - ---------- - timestamp : str - Time stamp. - filename : str - Jabber ID as name of file. - jid : str - Jabber ID. - message : str - Message content. - - Returns - ------- - None. - - """ - with open(filename + '.md', 'a') as file: - # entry = "{} {}:\n{}\n\n".format(timestamp, jid, message) - entry = ( - "## {}\n" - "### {}\n\n" - "{}\n\n").format(jid, timestamp, message) - file.write(entry) - - diff --git a/slixfeed/opml.py b/slixfeed/opml.py deleted file mode 100644 index 64ff1f9..0000000 --- a/slixfeed/opml.py +++ /dev/null @@ -1,75 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -""" - -{ - 'bozo': False, - 'bozo_exception': None, - 'feeds': [ - { - 'url': 'https://kurtmckee.org/tag/listparser/feed', - 'title': 'listparser blog', - 'categories': [], - 'tags': [] - }, - { - 'url': 'https://github.com/kurtmckee/listparser/commits/develop.atom', - 'title': 'listparser changelog', - 'categories': [], - 'tags': [] - } - ], - 'lists': [], - 'opportunities': [], - 'meta': { - 'title': 'listparser project feeds', - 'author': { - 'name': 'Kurt McKee', - 'email': 'contactme@kurtmckee.org', - 'url': 'https://kurtmckee.org/' - } - }, - 'version': 'opml2' - } - -import listparser -import lxml - - -async def import_from_file(db_file, opml_doc): - feeds = listparser.parse(opml_doc)['feeds'] - for feed in feeds: - url = feed['url'] - title = feed['title'] - # categories = feed['categories'] - # tags = feed['tags'] - # await datahandler.add_feed_no_check(db_file, [url, title]) - -""" - -from slixfeed.datetime import current_time -import xml.etree.ElementTree as ET - -# NOTE Use OPyML or LXML -def export_to_file(jid, filename, results): - root = ET.Element("opml") - root.set("version", "1.0") - head = ET.SubElement(root, "head") - ET.SubElement(head, "title").text = "Subscriptions for {}".format(jid) - ET.SubElement(head, "description").text = ( - "Set of feeds exported with Slixfeed") - ET.SubElement(head, "generator").text = "Slixfeed" - ET.SubElement(head, "urlPublic").text = ( - "https://gitgud.io/sjehuda/slixfeed") - time_stamp = current_time() - ET.SubElement(head, "dateCreated").text = time_stamp - ET.SubElement(head, "dateModified").text = time_stamp - body = ET.SubElement(root, "body") - for result in results: - outline = ET.SubElement(body, "outline") - outline.set("text", result[0]) - outline.set("xmlUrl", result[1]) - # outline.set("type", result[2]) - tree = ET.ElementTree(root) - tree.write(filename) diff --git a/slixfeed/sqlite.py b/slixfeed/sqlite.py index adebfe5..19c2eb2 100644 --- a/slixfeed/sqlite.py +++ b/slixfeed/sqlite.py @@ -24,7 +24,7 @@ from slixfeed.datetime import ( current_time, rfc2822_to_iso8601 ) -from sqlite3 import connect, Error +from sqlite3 import connect, Error, IntegrityError from slixfeed.url import join_url # from eliot import start_action, to_file @@ -58,6 +58,7 @@ def create_connection(db_file): conn = None try: conn = connect(db_file) + conn.execute("PRAGMA foreign_keys = ON") return conn except Error as e: print(e) @@ -74,64 +75,120 @@ def create_tables(db_file): Path to database file. """ with create_connection(db_file) as conn: - feeds_table_sql =( - "CREATE TABLE IF NOT EXISTS feeds (" - "id INTEGER PRIMARY KEY," - "name TEXT," - "address TEXT NOT NULL," - "enabled INTEGER NOT NULL," - "scanned TEXT," - "updated TEXT," - "status INTEGER," - "valid INTEGER" - ");" + feeds_table_sql = ( + """ + CREATE TABLE IF NOT EXISTS feeds ( + id INTEGER NOT NULL, + name TEXT, + url TEXT NOT NULL UNIQUE, + PRIMARY KEY ("id") + ); + """ + ) + properties_table_sql = ( + """ + CREATE TABLE IF NOT EXISTS properties ( + id INTEGER NOT NULL, + feed_id INTEGER NOT NULL, + type TEXT, + encoding TEXT, + language TEXT, + entries INTEGER, + FOREIGN KEY ("feed_id") REFERENCES "feeds" ("id") + ON UPDATE CASCADE + ON DELETE CASCADE, + PRIMARY KEY (id) + ); + """ + ) + status_table_sql = ( + """ + CREATE TABLE IF NOT EXISTS status ( + id INTEGER NOT NULL, + feed_id INTEGER NOT NULL, + enabled INTEGER NOT NULL DEFAULT 1, + updated TEXT, + scanned TEXT, + renewed TEXT, + status_code INTEGER, + valid INTEGER, + filter INTEGER NOT NULL DEFAULT 1, + FOREIGN KEY ("feed_id") REFERENCES "feeds" ("id") + ON UPDATE CASCADE + ON DELETE CASCADE, + PRIMARY KEY ("id") + ); + """ ) entries_table_sql = ( - "CREATE TABLE IF NOT EXISTS entries (" - "id INTEGER PRIMARY KEY," - "title TEXT NOT NULL," - "link TEXT NOT NULL," - "entry_id TEXT," - "source TEXT NOT NULL," - "timestamp TEXT," - "read INTEGER" - ");" + """ + CREATE TABLE IF NOT EXISTS entries ( + id INTEGER NOT NULL, + title TEXT NOT NULL, + link TEXT NOT NULL, + entry_id TEXT NOT NULL, + feed_id INTEGER NOT NULL, + timestamp TEXT, + read INTEGER NOT NULL DEFAULT 0, + FOREIGN KEY ("feed_id") REFERENCES "feeds" ("id") + ON UPDATE CASCADE + ON DELETE CASCADE, + PRIMARY KEY ("id") + ); + """ ) archive_table_sql = ( - "CREATE TABLE IF NOT EXISTS archive (" - "id INTEGER PRIMARY KEY," - "title TEXT NOT NULL," - "link TEXT NOT NULL," - "entry_id TEXT," - "source TEXT NOT NULL," - "timestamp TEXT," - "read INTEGER" - ");" + """ + CREATE TABLE IF NOT EXISTS archive ( + id INTEGER NOT NULL, + title TEXT NOT NULL, + link TEXT NOT NULL, + entry_id TEXT NOT NULL, + feed_id INTEGER NOT NULL, + timestamp TEXT, + read INTEGER NOT NULL DEFAULT 0, + FOREIGN KEY ("feed_id") REFERENCES "feeds" ("id") + ON UPDATE CASCADE + ON DELETE CASCADE, + PRIMARY KEY ("id") + ); + """ ) # statistics_table_sql = ( - # "CREATE TABLE IF NOT EXISTS statistics (" - # "id INTEGER PRIMARY KEY," - # "title TEXT NOT NULL," - # "number INTEGER" - # ");" + # """ + # CREATE TABLE IF NOT EXISTS statistics ( + # id INTEGER NOT NULL, + # title TEXT NOT NULL, + # number INTEGER, + # PRIMARY KEY ("id") + # ); + # """ # ) settings_table_sql = ( - "CREATE TABLE IF NOT EXISTS settings (" - "id INTEGER PRIMARY KEY," - "key TEXT NOT NULL," - "value INTEGER" - ");" + """ + CREATE TABLE IF NOT EXISTS settings ( + id INTEGER NOT NULL, + key TEXT NOT NULL, + value INTEGER, + PRIMARY KEY ("id") + ); + """ ) filters_table_sql = ( - "CREATE TABLE IF NOT EXISTS filters (" - "id INTEGER PRIMARY KEY," - "key TEXT NOT NULL," - "value TEXT" - ");" + """ + CREATE TABLE IF NOT EXISTS filters ( + id INTEGER NOT NULL, + key TEXT NOT NULL, + value TEXT, + PRIMARY KEY ("id") + ); + """ ) cur = conn.cursor() # cur = get_cursor(db_file) cur.execute(feeds_table_sql) + cur.execute(status_table_sql) + cur.execute(properties_table_sql) cur.execute(entries_table_sql) cur.execute(archive_table_sql) # cur.execute(statistics_table_sql) @@ -162,7 +219,45 @@ def get_cursor(db_file): return CURSORS[db_file] -async def insert_feed(db_file, url, title=None, status=None): +async def import_feeds(db_file, feeds): + """ + Insert a new feed into the feeds table. + + Parameters + ---------- + db_file : str + Path to database file. + feeds : list + Set of feeds (Title and URL). + """ + async with DBLOCK: + with create_connection(db_file) as conn: + cur = conn.cursor() + for feed in feeds: + url = feed[0] + title = feed[1] + feed = ( + title, url + ) + sql = ( + """ + INSERT + INTO feeds( + name, url) + VALUES( + ?, ?) + """ + ) + try: + cur.execute(sql, feed) + except IntegrityError as e: + logging.warning("Skipping: " + url) + logging.error(e) + + +async def insert_feed( + db_file, url, title=None, entries=None, version=None, + encoding=None, language=None, status_code=None, updated=None): """ Insert a new feed into the feeds table. @@ -173,36 +268,96 @@ async def insert_feed(db_file, url, title=None, status=None): url : str URL. title : str, optional - Feed Title. The default is None. + Feed title. The default is None. + entries : int, optional + Number of entries. The default is None. + version : str, optional + Type of feed. The default is None. + encoding : str, optional + Encoding of feed. The default is None. + language : str, optional + Language code of feed. The default is None. status : str, optional HTTP status code. The default is None. + updated : ???, optional + Date feed was last updated. The default is None. """ - #TODO consider async with DBLOCK - #conn = create_connection(db_file) - - # with create_connection(db_file) as conn: - # #exist = await is_feed_exist(conn, url) - # exist = await is_feed_exist(db_file, url) - - # if not exist: - # status = await main.download_feed(url) - # else: - # return "News source is already listed in the subscription list" async with DBLOCK: with create_connection(db_file) as conn: cur = conn.cursor() - # title = feed["feed"]["title"] - feed = (title, url, 1, status, 1) + feed = ( + title, url + ) sql = ( - "INSERT INTO feeds(" - "name, address, enabled, status, valid" - ")" - "VALUES(?, ?, ?, ?, ?) " + """ + INSERT + INTO feeds( + name, url) + VALUES( + ?, ?) + """ ) cur.execute(sql, feed) + sql = ( + """ + SELECT id + FROM feeds + WHERE url = :url + """ + ) + feed_id = cur.execute(sql, (url,)).fetchone()[0] + status = ( + feed_id, 1, updated, status_code, 1 + ) + sql = ( + """ + INSERT + INTO status( + feed_id, enabled, updated, status_code, valid) + VALUES( + ?, ?, ?, ?, ?) + """ + ) + cur.execute(sql, status) + properties = ( + feed_id, entries, version, encoding, language + ) + sql = ( + """ + INSERT + INTO properties( + feed_id, entries, type, encoding, language) + VALUES( + ?, ?, ?, ?, ?) + """ + ) + cur.execute(sql, properties) -async def remove_feed(db_file, ix): +async def remove_feed_by_url(db_file, url): + """ + Delete a feed by feed URL. + + Parameters + ---------- + db_file : str + Path to database file. + url : str + URL of feed. + """ + with create_connection(db_file) as conn: + async with DBLOCK: + cur = conn.cursor() + sql = ( + """ + DELETE FROM feeds + WHERE url = ? + """ + ) + cur.execute(sql, (url,)) + + +async def remove_feed_by_index(db_file, ix): """ Delete a feed by feed ID. @@ -217,33 +372,37 @@ async def remove_feed(db_file, ix): async with DBLOCK: cur = conn.cursor() sql = ( - "SELECT address " - "FROM feeds " - "WHERE id = ?" + """ + SELECT url + FROM feeds + WHERE id = ? + """ ) url = cur.execute(sql, (ix,)).fetchone()[0] - # NOTE Should we move DBLOCK to this line? 2022-12-23 + # # NOTE Should we move DBLOCK to this line? 2022-12-23 + # sql = ( + # "DELETE " + # "FROM entries " + # "WHERE feed_id = ?" + # ) + # cur.execute(sql, (url,)) # Error? 2024-01-05 + # sql = ( + # "DELETE " + # "FROM archive " + # "WHERE feed_id = ?" + # ) + # cur.execute(sql, (url,)) sql = ( - "DELETE " - "FROM entries " - "WHERE source = ?" - ) - cur.execute(sql, (url,)) - sql = ( - "DELETE " - "FROM archive " - "WHERE source = ?" - ) - cur.execute(sql, (url,)) - sql = ( - "DELETE FROM feeds " - "WHERE id = ?" + """ + DELETE FROM feeds + WHERE id = ? + """ ) cur.execute(sql, (ix,)) + return url -# TODO Rename function name -async def is_feed_exist(db_file, url): +async def get_feed_id_and_name(db_file, url): """ Get Id and Name of feed. Check whether a feed exists. @@ -263,9 +422,11 @@ async def is_feed_exist(db_file, url): """ cur = get_cursor(db_file) sql = ( - "SELECT id, name " - "FROM feeds " - "WHERE address = ?" + """ + SELECT id, name + FROM feeds + WHERE url = ? + """ ) result = cur.execute(sql, (url,)).fetchone() return result @@ -290,8 +451,10 @@ async def get_number_of_items(db_file, table): with create_connection(db_file) as conn: cur = conn.cursor() sql = ( - "SELECT count(id) " - "FROM {}" + """ + SELECT count(id) + FROM {} + """ ).format(table) count = cur.execute(sql).fetchone()[0] return count @@ -314,9 +477,11 @@ async def get_number_of_feeds_active(db_file): with create_connection(db_file) as conn: cur = conn.cursor() sql = ( - "SELECT count(id) " - "FROM feeds " - "WHERE enabled = 1" + """ + SELECT count(id) + FROM status + WHERE enabled = 1 + """ ) count = cur.execute(sql).fetchone()[0] return count @@ -339,24 +504,23 @@ async def get_number_of_entries_unread(db_file): with create_connection(db_file) as conn: cur = conn.cursor() sql = ( - "SELECT " - "(" - "SELECT count(id) " - "FROM entries " - "WHERE read = 0" - ") " - "+ " - "(" - "SELECT count(id) " - "FROM archive" - ") " - "AS total_count" + """ + SELECT + ( + SELECT count(id) + FROM entries + WHERE read = 0 + ) + ( + SELECT count(id) + FROM archive + ) + AS total_count + """ ) count = cur.execute(sql).fetchone()[0] return count -# TODO Read from entries and archives async def get_unread_entries(db_file, num): """ Extract information from unread entries. @@ -375,40 +539,19 @@ async def get_unread_entries(db_file, num): """ with create_connection(db_file) as conn: cur = conn.cursor() - # sql = ( - # "SELECT id " - # "FROM entries " - # "WHERE read = 0 " - # "LIMIT 1" - # ) - # sql = ("SELECT id " - # "FROM entries " - # "WHERE read = 0 " - # "ORDER BY timestamp DESC " - # "LIMIT 1" - # ) - # sql = ( - # "SELECT id, title, summary, link " - # "FROM entries " - # "WHERE read = 0 " - # "ORDER BY timestamp " - # "DESC LIMIT :num" - # ) sql = ( - "SELECT id, title, link, source, timestamp " - "FROM entries " - "WHERE read = 0 " - "UNION ALL " - "SELECT id, title, link, source, timestamp " - "FROM archive " - "ORDER BY timestamp " - "DESC LIMIT :num" + """ + SELECT id, title, link, feed_id, timestamp + FROM entries + WHERE read = 0 + UNION ALL + SELECT id, title, link, feed_id, timestamp + FROM archive + ORDER BY timestamp + DESC LIMIT :num + """ ) - results = cur.execute(sql, (num,)) - results = results.fetchall() - # print("### sqlite.get_unread_entries ###") - # print(results) - # breakpoint() + results = cur.execute(sql, (num,)).fetchall() return results @@ -424,33 +567,37 @@ async def mark_entry_as_read(cur, ix): Index of entry. """ sql = ( - "UPDATE entries " - "SET read = 1 " - "WHERE id = ?" + """ + UPDATE entries + SET read = 1 + WHERE id = ? + """ ) cur.execute(sql, (ix,)) -async def mark_source_as_read(db_file, source): +async def mark_feed_as_read(db_file, url): """ - Set read status of entries of given source as read. + Set read status of entries of given feed as read. Parameters ---------- db_file : str Path to database file. - source : str + url : str URL. """ async with DBLOCK: with create_connection(db_file) as conn: cur = conn.cursor() sql = ( - "UPDATE entries " - "SET read = 1 " - "WHERE source = ?" + """ + UPDATE entries + SET read = 1 + WHERE feed_id = ? + """ ) - cur.execute(sql, (source,)) + cur.execute(sql, (url,)) async def delete_entry_by_id(db_file, ix): @@ -468,9 +615,11 @@ async def delete_entry_by_id(db_file, ix): with create_connection(db_file) as conn: cur = conn.cursor() sql = ( - "DELETE " - "FROM entries " - "WHERE id = :ix" + """ + DELETE + FROM entries + WHERE id = :ix + """ ) cur.execute(sql, (ix,)) @@ -490,11 +639,13 @@ async def archive_entry(db_file, ix): with create_connection(db_file) as conn: cur = conn.cursor() sql = ( - "INSERT " - "INTO archive " - "SELECT * " - "FROM entries " - "WHERE entries.id = :ix" + """ + INSERT + INTO archive + SELECT * + FROM entries + WHERE entries.id = :ix + """ ) try: cur.execute(sql, (ix,)) @@ -504,9 +655,11 @@ async def archive_entry(db_file, ix): "into archive at index", ix ) sql = ( - "DELETE " - "FROM entries " - "WHERE id = :ix" + """ + DELETE + FROM entries + WHERE id = :ix + """ ) try: cur.execute(sql, (ix,)) @@ -517,17 +670,18 @@ async def archive_entry(db_file, ix): ) -def get_feed_title(db_file, source): +def get_feed_title(db_file, ix): with create_connection(db_file) as conn: cur = conn.cursor() sql = ( - "SELECT name " - "FROM feeds " - "WHERE address = :source " + """ + SELECT name + FROM feeds + WHERE id = :ix + """ ) - feed_title = cur.execute(sql, (source,)) - feed_title = feed_title.fetchone()[0] - return feed_title + title = cur.execute(sql, (ix,)).fetchone()[0] + return title async def mark_as_read(db_file, ix): @@ -557,12 +711,17 @@ async def mark_all_as_read(db_file): with create_connection(db_file) as conn: cur = conn.cursor() sql = ( - "UPDATE entries " - "SET read = 1 " + """ + UPDATE entries + SET read = 1 + """ ) cur.execute(sql) sql = ( - "DELETE FROM archive" + """ + DELETE + FROM archive + """ ) cur.execute(sql) @@ -579,8 +738,10 @@ async def delete_archived_entry(cur, ix): Index of entry. """ sql = ( - "DELETE FROM archive " - "WHERE id = ?" + """ + DELETE FROM archive + WHERE id = ? + """ ) cur.execute(sql, (ix,)) @@ -621,7 +782,7 @@ async def statistics(db_file): value = cur.execute(sql, (key,)).fetchone()[0] except: print("Error for key:", key) - value = "none" + value = "Default" values.extend([value]) return values @@ -687,9 +848,11 @@ async def set_enabled_status(db_file, ix, status): with create_connection(db_file) as conn: cur = conn.cursor() sql = ( - "UPDATE feeds " - "SET enabled = :status " - "WHERE id = :id" + """ + UPDATE feeds + SET enabled = :status + WHERE id = :id + """ ) cur.execute(sql, { "status": status, @@ -697,150 +860,225 @@ async def set_enabled_status(db_file, ix, status): }) -async def set_date(cur, url): - """ - Set last update date of feed. - - Parameters - ---------- - cur : object - Cursor object. - url : str - URL. - """ - sql = ( - "UPDATE feeds " - "SET updated = :today " - "WHERE address = :url" - ) - # cur = conn.cursor() - cur.execute(sql, { - "today": date.today(), - "url": url - }) - - -async def add_entry_and_set_date(db_file, source, entry): - """ - Add an entry to table entries and set date of source in table feeds. - - Parameters - ---------- - db_file : str - Path to database file. - source : str - Feed URL. - entry : list - Entry properties. - """ - # TODO While `async with DBLOCK` does work well from - # outside of functions, it would be better practice - # to place it within the functions. - async with DBLOCK: - with create_connection(db_file) as conn: - cur = conn.cursor() - await add_entry(cur, entry) - await set_date(cur, source) - - -async def update_source_status(db_file, status, source): - """ - Set HTTP status of source in table feeds. - - Parameters - ---------- - db_file : str - Path to database file. - source : str - Feed URL. - status : str - Status ID or message. - """ - sql = ( - "UPDATE feeds " - "SET status = :status, scanned = :scanned " - "WHERE address = :url" - ) - async with DBLOCK: - with create_connection(db_file) as conn: - cur = conn.cursor() - cur.execute(sql, { - "status" : status, - "scanned" : date.today(), - "url" : source - }) - - -async def update_source_validity(db_file, source, valid): - """ - Set validity status of source in table feeds. - - Parameters - ---------- - db_file : str - Path to database file. - source : str - Feed URL. - valid : boolean - 0 or 1. - """ - sql = ( - "UPDATE feeds " - "SET valid = :validity " - "WHERE address = :url" - ) - async with DBLOCK: - with create_connection(db_file) as conn: - cur = conn.cursor() - cur.execute(sql, { - "validity": valid, - "url": source - }) - """ TODO -Investigate why causes entry[6] (date) to be int 0 +Investigate what causes date to be int 0 + +NOTE + +When time functions of slixfeed.timedate +were async, there were errors of coroutines """ -async def add_entry(cur, entry): +async def add_entry(db_file, title, link, + entry_id, url, date, read_status): """ Add a new entry row into the entries table. Parameters ---------- - cur : object - Cursor object. + db_file : str + Path to database file. entry : str Entry properties. """ - sql = ( - "INSERT " - "INTO entries(" - "title, " - "link, " - "entry_id, " - "source, " - "timestamp, " - "read" - ") " - "VALUES(?, ?, ?, ?, ?, ?)" - ) - try: - cur.execute(sql, entry) - except: - # None - print("Unknown error for sqlite.add_entry") - print(entry) - # - # print(current_time(), "COROUTINE OBJECT NOW") - # for i in entry: - # print(type(i)) - # print(i) - # print(type(entry)) - # print(entry) - # print(current_time(), "COROUTINE OBJECT NOW") - # breakpoint() + async with DBLOCK: + with create_connection(db_file) as conn: + cur = conn.cursor() + sql = ( + """ + SELECT id + FROM feeds + WHERE url = :url + """ + ) + feed_id = cur.execute(sql, (url,)).fetchone()[0] + sql = ( + """ + INSERT + INTO entries( + title, link, entry_id, feed_id, timestamp, read) + VALUES( + :title, :link, :entry_id, :feed_id, :timestamp, :read) + """ + ) + cur.execute(sql, { + "title": title, + "link": link, + "entry_id": entry_id, + "feed_id": feed_id, + "timestamp": date, + "read": read_status + }) + # try: + # cur.execute(sql, entry) + # except: + # # None + # print("Unknown error for sqlite.add_entry") + # print(entry) + # # + # # print(current_time(), "COROUTINE OBJECT NOW") + # # for i in entry: + # # print(type(i)) + # # print(i) + # # print(type(entry)) + # # print(entry) + # # print(current_time(), "COROUTINE OBJECT NOW") + # # breakpoint() + + +async def set_date(db_file, url): + """ + Set renewed date of given feed. + + Parameters + ---------- + db_file : str + Path to database file. + url : str + URL. + """ + async with DBLOCK: + with create_connection(db_file) as conn: + cur = conn.cursor() + sql = ( + """ + SELECT id + FROM feeds + WHERE url = :url + """ + ) + feed_id = cur.execute(sql, (url,)).fetchone()[0] + sql = ( + """ + UPDATE status + SET renewed = :today + WHERE feed_id = :feed_id + """ + ) + # cur = conn.cursor() + cur.execute(sql, { + "today": date.today(), + "feed_id": feed_id + }) + + +async def update_feed_status(db_file, url, status_code): + """ + Set status_code of feed_id in table status. + + Parameters + ---------- + db_file : str + Path to database file. + url : str + Feed URL. + status : str + Status ID or message. + """ + async with DBLOCK: + with create_connection(db_file) as conn: + cur = conn.cursor() + sql = ( + """ + SELECT id + FROM feeds + WHERE url = :url + """ + ) + try: + feed_id = cur.execute(sql, (url,)).fetchone()[0] + except: + breakpoint() + sql = ( + """ + UPDATE status + SET status_code = :status_code, scanned = :scanned + WHERE feed_id = :feed_id + """ + ) + cur.execute(sql, { + "status_code" : status_code, + "scanned" : date.today(), + "feed_id" : feed_id + }) + + +async def update_feed_validity(db_file, url, valid): + """ + Set validity status of feed_id in table status. + + Parameters + ---------- + db_file : str + Path to database file. + url : str + Feed URL. + valid : boolean + 0 or 1. + """ + async with DBLOCK: + with create_connection(db_file) as conn: + cur = conn.cursor() + sql = ( + """ + SELECT id + FROM feeds + WHERE url = :url + """ + ) + feed_id = cur.execute(sql, (url,)).fetchone()[0] + sql = ( + """ + UPDATE status + SET valid = :valid + WHERE feed_id = :feed_id + """ + ) + cur.execute(sql, { + "valid": valid, + "feed_id": feed_id + }) + + +async def update_feed_properties(db_file, url, entries, updated): + """ + Update properties of url in table feeds. + + Parameters + ---------- + db_file : str + Path to database file. + url : str + Feed URL. + entries : int + Number of entries. + updated : ??? + Date feed was last updated. + """ + async with DBLOCK: + with create_connection(db_file) as conn: + cur = conn.cursor() + sql = ( + """ + SELECT id + FROM feeds + WHERE url = :url + """ + ) + feed_id = cur.execute(sql, (url,)).fetchone()[0] + sql = ( + """ + UPDATE properties + SET entries = :entries + WHERE feed_id = :feed_id + """ + ) + cur.execute(sql, { + "entries" : entries, + "feed_id": feed_id + }) async def maintain_archive(db_file, limit): @@ -851,13 +1089,17 @@ async def maintain_archive(db_file, limit): ---------- db_file : str Path to database file. + limit : str + Number of maximum entries to store. """ async with DBLOCK: with create_connection(db_file) as conn: cur = conn.cursor() sql = ( - "SELECT count(id) " - "FROM archive" + """ + SELECT count(id) + FROM archive + """ ) count = cur.execute(sql).fetchone()[0] # FIXME Upon first time joining to a groupchat @@ -869,12 +1111,14 @@ async def maintain_archive(db_file, limit): difference = count - int(limit) if difference > 0: sql = ( - "DELETE FROM archive " - "WHERE id " - "IN (SELECT id " - "FROM archive " - "ORDER BY timestamp ASC " - "LIMIT :difference)" + """ + DELETE FROM archive + WHERE id + IN (SELECT id + FROM archive + ORDER BY timestamp ASC + LIMIT :difference) + """ ) cur.execute(sql, { "difference": difference @@ -885,7 +1129,7 @@ async def maintain_archive(db_file, limit): # NOTE Entries that are read from archive are deleted. # NOTE Unlike entries from table entries, entries from # table archive are not marked as read. -async def get_entries_of_source(db_file, feed, source): +async def get_entries_of_feed(db_file, feed, url): """ Remove entries that don't exist in a given parsed feed. Check the entries returned from feed and delete read non @@ -897,17 +1141,19 @@ async def get_entries_of_source(db_file, feed, source): Path to database file. feed : list Parsed feed document. - source : str + url : str Feed URL. URL of associated feed. """ with create_connection(db_file) as conn: cur = conn.cursor() sql = ( - "SELECT id, title, link, entry_id, timestamp, read " - "FROM entries " - "WHERE source = ?" + """ + SELECT id, title, link, entry_id, timestamp, read + FROM entries + WHERE feed_id = ? + """ ) - items = cur.execute(sql, (source,)).fetchall() + items = cur.execute(sql, (url,)).fetchall() return items @@ -935,7 +1181,8 @@ async def get_entries_of_source(db_file, feed, source): # result = cur.execute(sql).fetchall() # return result - +# TODO select by "feed_id" (of table "status") from +# "feed" urls that are enabled in table "status" async def get_feeds_url(db_file): """ Query active feeds for URLs. @@ -953,9 +1200,10 @@ async def get_feeds_url(db_file): with create_connection(db_file) as conn: cur = conn.cursor() sql = ( - "SELECT address " - "FROM feeds " - "WHERE enabled = 1" + """ + SELECT url + FROM feeds + """ ) result = cur.execute(sql).fetchall() return result @@ -975,12 +1223,19 @@ async def get_feeds(db_file): results : ??? URLs of feeds. """ + # TODO + # 1) Select id from table feeds + # Select name, url (feeds) updated, enabled, feed_id (status) + # 2) Sort feeds by id. Sort status by feed_id + # results += cur.execute(sql).fetchall() cur = get_cursor(db_file) sql = ( - "SELECT name, address, updated, enabled, id " - "FROM feeds" + """ + SELECT name, url, id + FROM feeds + """ ) - results = cur.execute(sql) + results = cur.execute(sql).fetchall() return results @@ -1008,17 +1263,20 @@ async def last_entries(db_file, num): # "LIMIT :num" # ) sql = ( - "SELECT title, link, timestamp " - "FROM entries " - "WHERE read = 0 " - "UNION ALL " - "SELECT title, link, timestamp " - "FROM archive " - "WHERE read = 0 " - "ORDER BY timestamp DESC " - "LIMIT :num " + """ + SELECT title, link, timestamp + FROM entries + WHERE read = 0 + UNION ALL + SELECT title, link, timestamp + FROM archive + WHERE read = 0 + ORDER BY timestamp DESC + LIMIT :num + """ ) - results = cur.execute(sql, (num,)) + results = cur.execute( + sql, (num,)).fetchall() return results @@ -1040,13 +1298,16 @@ async def search_feeds(db_file, query): """ cur = get_cursor(db_file) sql = ( - "SELECT name, address, id, enabled " - "FROM feeds " - "WHERE name LIKE ? " - "OR address LIKE ? " - "LIMIT 50" + """ + SELECT name, id, url + FROM feeds + WHERE name LIKE ? + OR url LIKE ? + LIMIT 50 + """ ) - results = cur.execute(sql, [f'%{query}%', f'%{query}%']) + results = cur.execute( + sql, [f'%{query}%', f'%{query}%']).fetchall() return results @@ -1068,19 +1329,19 @@ async def search_entries(db_file, query): """ cur = get_cursor(db_file) sql = ( - "SELECT title, link " - "FROM entries " - "WHERE title LIKE ? " - "UNION ALL " - "SELECT title, link " - "FROM archive " - "WHERE title LIKE ? " - "LIMIT 50" + """ + SELECT title, link + FROM entries + WHERE title LIKE ? + UNION ALL + SELECT title, link + FROM archive + WHERE title LIKE ? + LIMIT 50 + """ ) - results = cur.execute(sql, ( - f'%{query}%', - f'%{query}%' - )) + results = cur.execute( + sql, (f'%{query}%', f'%{query}%')).fetchall() return results """ @@ -1105,8 +1366,8 @@ ERROR DATE: result = https://blog.heckel.io/feed/ 19:32:06 ERROR DATE: result = https://mwl.io/feed """ -async def check_entry_exist(db_file, source, eid=None, - title=None, link=None, date=None): +async def check_entry_exist( + db_file, url, entry_id=None, title=None, link=None, date=None): """ Check whether an entry exists. If entry has an ID, check by ID. @@ -1119,7 +1380,7 @@ async def check_entry_exist(db_file, source, eid=None, Path to database file. source : str Feed URL. URL of associated feed. - eid : str, optional + entry_id : str, optional Entry ID. The default is None. title : str, optional Entry title. The default is None. @@ -1134,24 +1395,28 @@ async def check_entry_exist(db_file, source, eid=None, True or None. """ cur = get_cursor(db_file) - if eid: + if entry_id: sql = ( - "SELECT id " - "FROM entries " - "WHERE entry_id = :eid and source = :source" + """ + SELECT id + FROM entries + WHERE entry_id = :entry_id and feed_id = :feed_id + """ ) result = cur.execute(sql, { - "eid": eid, - "source": source + "entry_id": entry_id, + "feed_id": url }).fetchone() elif date: sql = ( - "SELECT id " - "FROM entries " - "WHERE " - "title = :title and " - "link = :link and " - "timestamp = :date" + """ + SELECT id + FROM entries + WHERE + title = :title and + link = :link and + timestamp = :date + """ ) try: result = cur.execute(sql, { @@ -1160,13 +1425,15 @@ async def check_entry_exist(db_file, source, eid=None, "timestamp": date }).fetchone() except: - print(current_time(), "ERROR DATE: source =", source) + print(current_time(), "ERROR DATE: source =", url) print(current_time(), "ERROR DATE: date =", date) else: sql = ( - "SELECT id " - "FROM entries " - "WHERE title = :title and link = :link" + """ + SELECT id + FROM entries + WHERE title = :title and link = :link + """ ) result = cur.execute(sql, { "title": title, @@ -1176,9 +1443,9 @@ async def check_entry_exist(db_file, source, eid=None, if result: return True else: - None + return None except: - print(current_time(), "ERROR DATE: result =", source) + print(current_time(), "ERROR DATE: result =", url) async def set_settings_value(db_file, key_value): @@ -1211,9 +1478,11 @@ async def set_settings_value(db_file, key_value): cur = conn.cursor() # try: sql = ( - "UPDATE settings " - "SET value = :value " - "WHERE key = :key" + """ + UPDATE settings + SET value = :value + WHERE key = :key + """ ) cur.execute(sql, { "key": key, @@ -1246,9 +1515,11 @@ async def get_settings_value(db_file, key): cur = conn.cursor() try: sql = ( - "SELECT value " - "FROM settings " - "WHERE key = ?" + """ + SELECT value + FROM settings + WHERE key = ? + """ ) value = cur.execute(sql, (key,)).fetchone()[0] return value @@ -1287,9 +1558,11 @@ async def set_filters_value(db_file, key_value): with create_connection(db_file) as conn: cur = conn.cursor() sql = ( - "UPDATE filters " - "SET value = :value " - "WHERE key = :key" + """ + UPDATE filters + SET value = :value + WHERE key = :key + """ ) cur.execute(sql, { "key": key, @@ -1317,9 +1590,11 @@ async def get_filters_value(db_file, key): cur = conn.cursor() try: sql = ( - "SELECT value " - "FROM filters " - "WHERE key = ?" + """ + SELECT value + FROM filters + WHERE key = ? + """ ) value = cur.execute(sql, (key,)).fetchone()[0] return value diff --git a/slixfeed/task.py b/slixfeed/task.py index 7ad7e0d..14a8f0b 100644 --- a/slixfeed/task.py +++ b/slixfeed/task.py @@ -44,12 +44,12 @@ import logging import os import slixmpp +import slixfeed.action as action from slixfeed.config import ( get_pathname_to_database, - get_default_dbdir, + get_default_data_directory, get_value_default) from slixfeed.datetime import current_time -from slixfeed.action import organize_items from slixfeed.sqlite import ( get_feed_title, get_feeds_url, @@ -63,7 +63,6 @@ from slixfeed.sqlite import ( ) # from xmpp import Slixfeed import slixfeed.xmpp.client as xmpp -from slixfeed.xmpp.compose import list_unread_entries import slixfeed.xmpp.utility as utility main_task = [] @@ -229,8 +228,13 @@ async def send_update(self, jid, num=None): news_digest = [] results = await get_unread_entries(db_file, num) for result in results: - title = get_feed_title(db_file, result[3]) - news_item = list_unread_entries(result, title) + ix = result[0] + title_e = result[1] + url = result[2] + feed_id = result[3] + date = result[4] + title_f = get_feed_title(db_file, feed_id) + news_item = action.list_unread_entries(result, title_f) news_digest.extend([news_item]) # print(db_file) # print(result[0]) @@ -423,7 +427,8 @@ async def check_updates(jid): while True: db_file = get_pathname_to_database(jid) urls = await get_feeds_url(db_file) - await organize_items(db_file, urls) + for url in urls: + await action.scan(db_file, url) val = get_value_default( "settings", "Settings", "check") await asyncio.sleep(60 * float(val)) @@ -504,7 +509,7 @@ async def select_file(self): Initiate actions by JID (Jabber ID). """ while True: - db_dir = get_default_dbdir() + db_dir = get_default_data_directory() if not os.path.isdir(db_dir): msg = ( "Slixfeed can not work without a database.\n" diff --git a/slixfeed/url.py b/slixfeed/url.py index 5549144..9bbf36b 100644 --- a/slixfeed/url.py +++ b/slixfeed/url.py @@ -7,16 +7,11 @@ TODO 1) ActivityPub URL revealer activitypub_to_http. -2) Remove tracking queries. - -3) Redirect to Invidious, Librarian, Nitter, ProxiTok etc. - because they provide RSS. - """ -import slixfeed.config as config from email.utils import parseaddr import random +import slixfeed.config as config from urllib.parse import ( parse_qs, urlencode, @@ -131,22 +126,6 @@ def feed_to_http(url): return new_url -def activitypub_to_http(namespace): - """ - Replace ActivityPub namespace by HTTP. - - Parameters - ---------- - namespace : str - Namespace. - - Returns - ------- - new_url : str - URL. - """ - - def check_xmpp_uri(uri): """ Check validity of XMPP URI. @@ -318,4 +297,20 @@ def trim_url(url): queries, fragment ]) - return url \ No newline at end of file + return url + + +def activitypub_to_http(namespace): + """ + Replace ActivityPub namespace by HTTP. + + Parameters + ---------- + namespace : str + Namespace. + + Returns + ------- + new_url : str + URL. + """ diff --git a/slixfeed/xmpp/compose.py b/slixfeed/xmpp/compose.py deleted file mode 100644 index 6e5e178..0000000 --- a/slixfeed/xmpp/compose.py +++ /dev/null @@ -1,194 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -""" - -TODO - -1) Port functions insert_feed, remove_feed, get_entry_unread. - -2) Merge with action.py - -3) Call sqlite function from function statistics. - Returning a list of values doesn't' seem to be a good practice. - -""" - -import slixfeed.xmpp.bookmark as bookmark -from slixfeed.url import remove_tracking_parameters, replace_hostname - - -def list_unread_entries(result, feed_title): - # TODO Add filtering - # TODO Do this when entry is added to list and mark it as read - # DONE! - # results = [] - # if get_settings_value(db_file, "filter-deny"): - # while len(results) < num: - # result = cur.execute(sql).fetchone() - # blacklist = await get_settings_value(db_file, "filter-deny").split(",") - # for i in blacklist: - # if i in result[1]: - # continue - # print("rejected:", result[1]) - # print("accepted:", result[1]) - # results.extend([result]) - - # news_list = "You've got {} news items:\n".format(num) - # NOTE Why doesn't this work without list? - # i.e. for result in results - # for result in results.fetchall(): - ix = result[0] - title = result[1] - # # TODO Retrieve summary from feed - # # See fetch.view_entry - # summary = result[2] - # # Remove HTML tags - # try: - # summary = BeautifulSoup(summary, "lxml").text - # except: - # print(result[2]) - # breakpoint() - # # TODO Limit text length - # summary = summary.replace("\n\n\n", "\n\n") - # length = await get_settings_value(db_file, "length") - # summary = summary[:length] + " […]" - # summary = summary.strip().split('\n') - # summary = ["> " + line for line in summary] - # summary = "\n".join(summary) - link = result[2] - link = remove_tracking_parameters(link) - link = (replace_hostname(link, "link")) or link - news_item = ( - "\n{}\n{}\n{}\n" - ).format(str(title), str(link), str(feed_title)) - return news_item - - -def list_search_results(query, results): - results_list = ( - "Search results for '{}':\n\n```" - ).format(query) - counter = 0 - for result in results: - counter += 1 - results_list += ( - "\n{}\n{}\n" - ).format(str(result[0]), str(result[1])) - if counter: - return results_list + "```\nTotal of {} results".format(counter) - else: - return "No results were found for: {}".format(query) - - -def list_feeds_by_query(query, results): - results_list = ( - "Feeds containing '{}':\n\n```" - ).format(query) - counter = 0 - for result in results: - counter += 1 - results_list += ( - "\nName : {}" - "\nURL : {}" - "\nIndex : {}" - "\nMode : {}" - "\n" - ).format(str(result[0]), str(result[1]), - str(result[2]), str(result[3])) - if counter: - return results_list + "\n```\nTotal of {} feeds".format(counter) - else: - return "No feeds were found for: {}".format(query) - - -def list_statistics(values): - """ - Return table statistics. - - Parameters - ---------- - db_file : str - Path to database file. - - Returns - ------- - msg : str - Statistics as message. - """ - msg = ( - "```" - "\nSTATISTICS\n" - "News items : {}/{}\n" - "News sources : {}/{}\n" - "\nOPTIONS\n" - "Items to archive : {}\n" - "Update interval : {}\n" - "Items per update : {}\n" - "Operation status : {}\n" - "```" - ).format(values[0], values[1], values[2], values[3], - values[4], values[5], values[6], values[7]) - return msg - - -def list_last_entries(results, num): - titles_list = "Recent {} titles:\n\n```".format(num) - counter = 0 - for result in results: - counter += 1 - titles_list += ( - "\n{}\n{}\n" - ).format(str(result[0]), str(result[1])) - if counter: - titles_list += "```\n" - return titles_list - else: - return "There are no news at the moment." - - -def list_feeds(results): - feeds_list = "\nList of subscriptions:\n\n```\n" - counter = 0 - for result in results: - counter += 1 - feeds_list += ( - "Name : {}\n" - "Address : {}\n" - "Updated : {}\n" - "Status : {}\n" - "ID : {}\n" - "\n" - ).format(str(result[0]), str(result[1]), str(result[2]), - str(result[3]), str(result[4])) - if counter: - return feeds_list + ( - "```\nTotal of {} subscriptions.\n" - ).format(counter) - else: - msg = ( - "List of subscriptions is empty.\n" - "To add feed, send a URL\n" - "Try these:\n" - # TODO Pick random from featured/recommended - "https://reclaimthenet.org/feed/" - ) - return msg - - -async def list_bookmarks(self): - conferences = await bookmark.get(self) - groupchat_list = "\nList of groupchats:\n\n```\n" - counter = 0 - for conference in conferences: - counter += 1 - groupchat_list += ( - "{}\n" - "\n" - ).format( - conference["jid"] - ) - groupchat_list += ( - "```\nTotal of {} groupchats.\n" - ).format(counter) - return groupchat_list \ No newline at end of file diff --git a/slixfeed/xmpp/process.py b/slixfeed/xmpp/process.py index 92d75a6..c8f3197 100644 --- a/slixfeed/xmpp/process.py +++ b/slixfeed/xmpp/process.py @@ -22,29 +22,20 @@ import os import slixfeed.action as action from slixfeed.config import ( add_to_list, - get_default_dbdir, + get_default_data_directory, get_value_default, get_value, get_pathname_to_database, remove_from_list) -import slixfeed.crawl as crawl from slixfeed.datetime import current_time, timestamp -import slixfeed.export as export -import slixfeed.fetch as fetch -import slixfeed.opml as opml import slixfeed.sqlite as sqlite import slixfeed.task as task -import slixfeed.log as log -import slixfeed.read as read import slixfeed.url as uri import slixfeed.xmpp.bookmark as bookmark -import slixfeed.xmpp.compose as compose import slixfeed.xmpp.muc as groupchat -import slixfeed.xmpp.status as status import slixfeed.xmpp.text as text import slixfeed.xmpp.upload as upload from slixfeed.xmpp.utility import jid_type -from urllib.parse import urlsplit, urlunsplit async def event(self, event): @@ -137,7 +128,7 @@ async def message(self, message): # # Begin processing new JID # # Deprecated in favour of event "presence_available" - # db_dir = get_default_dbdir() + # db_dir = get_default_data_directory() # os.chdir(db_dir) # if jid + ".db" not in os.listdir(): # await task_jid(jid) @@ -221,10 +212,12 @@ async def message(self, message): title = " ".join(message_text.split(" ")[1:]) if url.startswith("http"): db_file = get_pathname_to_database(jid) - exist = await sqlite.is_feed_exist(db_file, url) + exist = await sqlite.get_feed_id_and_name( + db_file, url) if not exist: - await sqlite.insert_feed(db_file, url, title) - await action.organize_items(db_file, [url]) + await sqlite.insert_feed( + db_file, url, title) + await action.scan(db_file, url) old = ( await sqlite.get_settings_value(db_file, "old") ) or ( @@ -237,7 +230,7 @@ async def message(self, message): await task.start_tasks_xmpp( self, jid, ["status"]) else: - await sqlite.mark_source_as_read( + await sqlite.mark_feed_as_read( db_file, url) response = ( "> {}\nNews source has been " @@ -325,7 +318,7 @@ async def message(self, message): case "bookmarks": if jid == get_value( "accounts", "XMPP", "operator"): - response = await compose.list_bookmarks(self) + response = await action.list_bookmarks(self) else: response = ( "This action is restricted. " @@ -368,13 +361,6 @@ async def message(self, message): else: response = "Missing keywords." send_reply_message(self, message, response) - case _ if message_lowercase.startswith("import "): - status_type = "dnd" - status_message = ( - "📥️ Procesing request to import feeds ..." - ) - send_status_message( - self, jid, status_type, status_message) case _ if message_lowercase.startswith("export "): key = message_text[7:] if key in ("opml", "html", "md", "xbel"): @@ -384,7 +370,7 @@ async def message(self, message): ).format(key) send_status_message( self, jid, status_type, status_message) - data_dir = get_default_dbdir() + data_dir = get_default_data_directory() if not os.path.isdir(data_dir): os.mkdir(data_dir) if not os.path.isdir(data_dir + '/' + key): @@ -397,10 +383,10 @@ async def message(self, message): case "html": response = "Not yet implemented." case "md": - export.markdown( + action.export_to_markdown( jid, filename, results) case "opml": - opml.export_to_file( + action.export_to_opml( jid, filename, results) case "xbel": response = "Not yet implemented." @@ -409,24 +395,54 @@ async def message(self, message): "Feeds exported successfully to {}.\n{}" ).format(key, url) # send_oob_reply_message(message, url, response) - await send_oob_message(self, jid, url) - await task.start_tasks_xmpp(self, jid, ["status"]) + await send_oob_message( + self, jid, url) + await task.start_tasks_xmpp( + self, jid, ["status"]) else: response = "Unsupported filetype." - send_reply_message(self, message, response) + send_reply_message(self, message, response) case _ if (message_lowercase.startswith("gemini:") or message_lowercase.startswith("gopher:")): response = "Gemini and Gopher are not supported yet." send_reply_message(self, message, response) + case _ if (message_lowercase.startswith("http")) and( + message_lowercase.endswith(".opml")): + url = message_text + await task.clean_tasks_xmpp( + jid, ["status"]) + status_type = "dnd" + status_message = ( + "📥️ Procesing request to import feeds ..." + ) + send_status_message( + self, jid, status_type, status_message) + db_file = get_pathname_to_database(jid) + count = await action.import_opml(db_file, url) + if count: + response = ( + "Successfully imported {} feeds" + ).format(count) + else: + response = ( + "OPML file was not imported." + ) + await task.clean_tasks_xmpp( + jid, ["status"]) + await task.start_tasks_xmpp( + self, jid, ["status"]) + send_reply_message(self, message, response) case _ if (message_lowercase.startswith("http") or message_lowercase.startswith("feed:")): url = message_text - await task.clean_tasks_xmpp(jid, ["status"]) + await task.clean_tasks_xmpp( + jid, ["status"]) status_type = "dnd" status_message = ( "📫️ Processing request to fetch data from {}" ).format(url) - send_status_message(self, jid, status_type, status_message) + send_status_message( + self, jid, status_type, status_message) if url.startswith("feed:"): url = uri.feed_to_http(url) url = (uri.replace_hostname(url, "feed")) or url @@ -443,7 +459,7 @@ async def message(self, message): if len(query) > 3: db_file = get_pathname_to_database(jid) result = await sqlite.search_feeds(db_file, query) - response = compose.list_feeds_by_query(query, result) + response = action.list_feeds_by_query(query, result) else: response = ( "Enter at least 4 characters to search" @@ -451,7 +467,7 @@ async def message(self, message): else: db_file = get_pathname_to_database(jid) result = await sqlite.get_feeds(db_file) - response = compose.list_feeds(result) + response = action.list_feeds(result) send_reply_message(self, message, response) case "goodbye": if message["type"] == "groupchat": @@ -616,7 +632,8 @@ async def message(self, message): status_message = ( "📫️ Processing request to fetch data from {}" ).format(url) - send_status_message(self, jid, status_type, status_message) + send_status_message( + self, jid, status_type, status_message) if url.startswith("feed:"): url = uri.feed_to_http(url) url = (uri.replace_hostname(url, "feed")) or url @@ -651,52 +668,59 @@ async def message(self, message): else: db_file = get_pathname_to_database(jid) result = await sqlite.last_entries(db_file, num) - response = compose.list_last_entries(result, num) + response = action.list_last_entries(result, num) except: response = "Enter a numeric value only." else: response = "Missing value." send_reply_message(self, message, response) - # NOTE Should people be asked for numeric value? case _ if message_lowercase.startswith("remove "): - ix = message_text[7:] - if ix: + ix_url = message_text[7:] + if ix_url: db_file = get_pathname_to_database(jid) try: - await sqlite.remove_feed(db_file, ix) - response = ( - "News source {} has been removed " - "from subscription list.").format(ix) - # await refresh_task( - # self, - # jid, - # send_status, - # "status", - # 20 - # ) - await task.clean_tasks_xmpp( - jid, ["status"]) - await task.start_tasks_xmpp( - self, jid, ["status"]) + ix = int(ix_url) + try: + url = await sqlite.remove_feed_by_index( + db_file, ix) + response = ( + "> {}\nNews source {} has been removed " + "from subscription list.").format(url, ix) + except: + response = ( + "No news source with ID {}.".format(ix)) except: + url = ix_url + await sqlite.remove_feed_by_url(db_file, url) response = ( - "No news source with ID {}.".format(ix)) + "> {}\nNews source has been removed " + "from subscription list.").format(url) + # await refresh_task( + # self, + # jid, + # send_status, + # "status", + # 20 + # ) + await task.clean_tasks_xmpp(jid, ["status"]) + await task.start_tasks_xmpp(self, jid, ["status"]) else: response = "Missing feed ID." send_reply_message(self, message, response) case _ if message_lowercase.startswith("reset"): - source = message_text[6:] + url = message_text[6:] await task.clean_tasks_xmpp(jid, ["status"]) status_type = "dnd" status_message = "📫️ Marking entries as read..." send_status_message( self, jid, status_type, status_message) - if source: + if url: db_file = get_pathname_to_database(jid) - await sqlite.mark_source_as_read(db_file, source) + await sqlite.mark_feed_as_read( + db_file, url) response = ( "All entries of {} have been " - "marked as read.".format(source) + "marked as read.".format(url) ) else: db_file = get_pathname_to_database(jid) @@ -712,7 +736,7 @@ async def message(self, message): db_file = get_pathname_to_database(jid) results = await sqlite.search_entries( db_file, query) - response = compose.list_search_results( + response = action.list_search_results( query, results) else: response = ( @@ -738,7 +762,7 @@ async def message(self, message): case "stats": db_file = get_pathname_to_database(jid) result = await sqlite.statistics(db_file) - response = compose.list_statistics(result) + response = action.list_statistics(result) send_reply_message(self, message, response) case _ if message_lowercase.startswith("disable "): ix = message_text[8:] @@ -829,15 +853,15 @@ async def message(self, message): # if response: message.reply(response).send() if not response: response = "EMPTY MESSAGE - ACTION ONLY" - data_dir = get_default_dbdir() + data_dir = get_default_data_directory() if not os.path.isdir(data_dir): os.mkdir(data_dir) if not os.path.isdir(data_dir + '/logs/'): os.mkdir(data_dir + '/logs/') - log.markdown( + action.log_to_markdown( current_time(), os.path.join(data_dir, "logs", jid), jid, message_text) - log.markdown( + action.log_to_markdown( current_time(), os.path.join(data_dir, "logs", jid), self.boundjid.bare, response)