From de200b3b0376c26e86df1a93c37740dc61c4ad00 Mon Sep 17 00:00:00 2001 From: Schimon Jehudah Date: Tue, 2 Jan 2024 18:11:36 +0000 Subject: [PATCH] Split more functions into smaller functions --- slixfeed/fetch.py | 86 ++++++--------------------- slixfeed/format.py | 9 +++ slixfeed/sqlite.py | 125 +++++++++++++-------------------------- slixfeed/task.py | 28 ++++++++- slixfeed/xmpp/compose.py | 51 +++++++++++++++- slixfeed/xmpp/process.py | 4 +- 6 files changed, 143 insertions(+), 160 deletions(-) diff --git a/slixfeed/fetch.py b/slixfeed/fetch.py index 89ff2d9..b315117 100644 --- a/slixfeed/fetch.py +++ b/slixfeed/fetch.py @@ -18,6 +18,9 @@ TODO 4) Refactor view_entry and view_feed - Why "if" twice? +5) Replace sqlite.remove_nonexistent_entries by sqlite.check_entry_exist + Same check, just reverse. + """ from aiohttp import ClientError, ClientSession, ClientTimeout @@ -64,10 +67,7 @@ async def download_updates(db_file, url=None): # next(urls) continue await sqlite.update_source_status( - db_file, - res[1], - source - ) + db_file, res[1], source) if res[0]: try: feed = parse(res[0]) @@ -82,9 +82,7 @@ async def download_updates(db_file, url=None): else: valid = 1 await sqlite.update_source_validity( - db_file, - source, - valid) + db_file, source, valid) except ( IncompleteReadError, IncompleteRead, @@ -107,10 +105,7 @@ async def download_updates(db_file, url=None): # length = len(entries) # await remove_entry(db_file, source, length) await sqlite.remove_nonexistent_entries( - db_file, - feed, - source - ) + db_file, feed, source) # new_entry = 0 for entry in entries: # TODO Pass date too for comparion check @@ -144,13 +139,8 @@ async def download_updates(db_file, url=None): else: eid = link exist = await sqlite.check_entry_exist( - db_file, - source, - eid=eid, - title=title, - link=link, - date=date - ) + db_file, source, eid=eid, + title=title, link=link, date=date) if not exist: # new_entry = new_entry + 1 # TODO Enhance summary @@ -176,16 +166,10 @@ async def download_updates(db_file, url=None): pathname ) allow_list = await config.is_listed( - db_file, - "filter-allow", - string - ) + db_file, "filter-allow", string) if not allow_list: reject_list = await config.is_listed( - db_file, - "filter-deny", - string - ) + db_file, "filter-deny", string) if reject_list: # print(">>> REJECTED", title) summary = ( @@ -196,13 +180,7 @@ async def download_updates(db_file, url=None): # summary = "" read_status = 1 entry = ( - title, - link, - eid, - source, - date, - read_status - ) + title, link, eid, source, date, read_status) if isinstance(date, int): print("PROBLEM: date is int") print(date) @@ -210,10 +188,7 @@ async def download_updates(db_file, url=None): # print(source) # print(date) await sqlite.add_entry_and_set_date( - db_file, - source, - entry - ) + db_file, source, entry) # print(current_time(), entry, title) # else: # print(current_time(), exist, title) @@ -293,12 +268,7 @@ async def view_feed(url): "Link : {}\n" "Count : {}\n" "\n" - ).format( - title, - date, - link, - counter - ) + ).format(title, date, link, counter) if counter > 4: break msg += ( @@ -377,11 +347,7 @@ async def view_entry(url, num): "\n" "{}\n" "\n" - ).format( - title, - summary, - link - ) + ).format(title, summary, link) else: msg = ( ">{}\n" @@ -452,19 +418,11 @@ async def add_feed(db_file, url): if utility.is_feed(url, feed): status = res[1] msg = await sqlite.insert_feed( - db_file, - url, - title, - status - ) + db_file, url, title, status) await download_updates(db_file, [url]) else: msg = await probe_page( - add_feed, - url, - res[0], - db_file=db_file - ) + add_feed, url, res[0], db_file=db_file) else: status = res[1] msg = ( @@ -684,11 +642,7 @@ async def feed_mode_request(url, tree): "Link : {}\n" "Items: {}\n" "\n" - ).format( - feed_name, - feed_addr, - feed_amnt - ) + ).format(feed_name, feed_addr, feed_amnt) if counter > 1: msg += ( "```\nThe above feeds were extracted from\n{}" @@ -805,11 +759,7 @@ async def feed_mode_scan(url, tree): "Link : {}\n" "Count : {}\n" "\n" - ).format( - feed_name, - feed_addr, - feed_amnt - ) + ).format(feed_name, feed_addr, feed_amnt) if counter > 1: msg += ( "```\nThe above feeds were extracted from\n{}" diff --git a/slixfeed/format.py b/slixfeed/format.py index f783d91..61e3983 100644 --- a/slixfeed/format.py +++ b/slixfeed/format.py @@ -7,4 +7,13 @@ TODO Move code from sqlite.get_entry_unread +if num > 1: + news_list += ( + "\n{}\n{}\n{}\n" + ).format(str(title), str(link), str(feed_title)) +else: + news_list = ( + "{}\n{}\n{}" + ).format(str(title), str(link), str(feed_title)) + """ diff --git a/slixfeed/sqlite.py b/slixfeed/sqlite.py index 302fb39..270e15c 100644 --- a/slixfeed/sqlite.py +++ b/slixfeed/sqlite.py @@ -26,11 +26,7 @@ from slixfeed.datetime import ( rfc2822_to_iso8601 ) from sqlite3 import connect, Error -from slixfeed.url import ( - join_url, - remove_tracking_parameters, - replace_hostname - ) +from slixfeed.url import join_url # from eliot import start_action, to_file # # with start_action(action_type="list_feeds()", db=db_file): @@ -397,7 +393,7 @@ async def get_number_of_entries_unread(db_file): # TODO Read from entries and archives -async def get_entry_unread(db_file, num=None): +async def get_unread_entries(db_file, num): """ Extract information from unread entries. @@ -413,10 +409,6 @@ async def get_entry_unread(db_file, num=None): entry : str News item message. """ - if not num: - num = await get_settings_value(db_file, "quantum") - else: - num = int(num) with create_connection(db_file) as conn: cur = conn.cursor() # sql = ( @@ -450,78 +442,13 @@ async def get_entry_unread(db_file, num=None): ) results = cur.execute(sql, (num,)) results = results.fetchall() - - # TODO Add filtering - # TODO Do this when entry is added to list and mark it as read - # DONE! - # results = [] - # if get_settings_value(db_file, "filter-deny"): - # while len(results) < num: - # result = cur.execute(sql).fetchone() - # blacklist = await get_settings_value(db_file, "filter-deny").split(",") - # for i in blacklist: - # if i in result[1]: - # continue - # print("rejected:", result[1]) - # print("accepted:", result[1]) - # results.extend([result]) - - # news_list = "You've got {} news items:\n".format(num) - news_list = "" - # NOTE Why doesn't this work without list? - # i.e. for result in results - # for result in results.fetchall(): - for result in results: - ix = result[0] - title = result[1] - # # TODO Retrieve summary from feed - # # See fetch.view_entry - # summary = result[2] - # # Remove HTML tags - # try: - # summary = BeautifulSoup(summary, "lxml").text - # except: - # print(result[2]) - # breakpoint() - # # TODO Limit text length - # summary = summary.replace("\n\n\n", "\n\n") - # length = await get_settings_value(db_file, "length") - # summary = summary[:length] + " […]" - # summary = summary.strip().split('\n') - # summary = ["> " + line for line in summary] - # summary = "\n".join(summary) - link = result[2] - link = remove_tracking_parameters(link) - link = (replace_hostname(link, "link")) or link - sql = ( - "SELECT name " - "FROM feeds " - "WHERE address = :source " - ) - source = result[3] - feed = cur.execute(sql, (source,)) - feed = feed.fetchone()[0] - if num > 1: - news_list += ( - "\n{}\n{}\n{}\n" - ).format(str(title), str(link), str(feed)) - else: - news_list = ( - "{}\n{}\n{}" - ).format(str(title), str(link), str(feed)) - # TODO While `async with DBLOCK` does work well from - # outside of functions, it would be better practice - # to place it within the functions. - async with DBLOCK: - # NOTE: We can use DBLOCK once for both - # functions, because, due to exclusive - # ID, only one can ever occur. - await mark_entry_as_read(cur, ix) - await delete_entry(cur, ix) - return news_list + # print("### sqlite.get_unread_entries ###") + # print(results) + # breakpoint() + return results -async def mark_entry_as_read(cur, ix): +def mark_entry_as_read(cur, ix): """ Set read status of entry as read. @@ -562,6 +489,32 @@ async def mark_source_as_read(db_file, source): cur.execute(sql, (source,)) +def get_feed_title(db_file, source): + with create_connection(db_file) as conn: + cur = conn.cursor() + sql = ( + "SELECT name " + "FROM feeds " + "WHERE address = :source " + ) + feed_title = cur.execute(sql, (source,)) + feed_title = feed_title.fetchone()[0] + return feed_title + + +async def mark_as_read(db_file, ix): + async with DBLOCK: + with create_connection(db_file) as conn: + cur = conn.cursor() + # TODO While `async with DBLOCK` does work well from + # outside of functions, it would be better practice + # to place it within the functions. + # NOTE: We can use DBLOCK once for both + # functions, because, due to exclusive + # ID, only one can ever occur. + mark_entry_as_read(cur, ix) + delete_archived_entry(cur, ix) + async def mark_all_as_read(db_file): """ Set read status of all entries as read. @@ -585,7 +538,7 @@ async def mark_all_as_read(db_file): cur.execute(sql) -async def delete_entry(cur, ix): +def delete_archived_entry(cur, ix): """ Delete entry from table archive. @@ -614,8 +567,8 @@ async def statistics(db_file): Returns ------- - msg : str - Statistics as message. + values : list + List of values. """ values = [] values.extend([await get_number_of_entries_unread(db_file)]) @@ -883,13 +836,15 @@ async def add_entry(cur, entry): try: cur.execute(sql, entry) except: - print("Unknown error for sqlite.add_entry") + print("") + # print("Unknown error for sqlite.add_entry") + # print(entry) # print(current_time(), "COROUTINE OBJECT NOW") # for i in entry: # print(type(i)) # print(i) # print(type(entry)) - print(entry) + # print(entry) # print(current_time(), "COROUTINE OBJECT NOW") # breakpoint() diff --git a/slixfeed/task.py b/slixfeed/task.py index 3c697f9..c66f711 100644 --- a/slixfeed/task.py +++ b/slixfeed/task.py @@ -51,13 +51,18 @@ from slixfeed.config import ( from slixfeed.datetime import current_time from slixfeed.fetch import download_updates from slixfeed.sqlite import ( - get_entry_unread, + get_unread_entries, + get_feed_title, get_settings_value, get_number_of_items, - get_number_of_entries_unread + get_number_of_entries_unread, + mark_as_read, + mark_entry_as_read, + delete_archived_entry ) # from xmpp import Slixfeed import slixfeed.xmpp.client as xmpp +from slixfeed.xmpp.compose import list_unread_entries import slixfeed.xmpp.utility as utility main_task = [] @@ -199,8 +204,25 @@ async def send_update(self, jid, num=None): db_file = get_pathname_to_database(jid) enabled = await get_settings_value(db_file, "enabled") if enabled: - new = await get_entry_unread(db_file, num) + if not num: + num = await get_settings_value(db_file, "quantum") + else: + num = int(num) + news_digest = [] + results = await get_unread_entries(db_file, num) + for result in results: + title = get_feed_title(db_file, result[3]) + news_item = list_unread_entries(result, title) + news_digest.extend([news_item]) + # print(db_file) + # print(result[0]) + # breakpoint() + await mark_as_read(db_file, result[0]) + new = " ".join(news_digest) + # breakpoint() if new: + # print("if new") + # breakpoint() # TODO Add while loop to assure delivery. # print(await current_time(), ">>> ACT send_message",jid) chat_type = await utility.jid_type(self, jid) diff --git a/slixfeed/xmpp/compose.py b/slixfeed/xmpp/compose.py index 29c0fce..8bd3c46 100644 --- a/slixfeed/xmpp/compose.py +++ b/slixfeed/xmpp/compose.py @@ -9,6 +9,53 @@ import slixfeed.xmpp.bookmark as bookmark from slixfeed.url import remove_tracking_parameters, replace_hostname +def list_unread_entries(result, feed_title): + # TODO Add filtering + # TODO Do this when entry is added to list and mark it as read + # DONE! + # results = [] + # if get_settings_value(db_file, "filter-deny"): + # while len(results) < num: + # result = cur.execute(sql).fetchone() + # blacklist = await get_settings_value(db_file, "filter-deny").split(",") + # for i in blacklist: + # if i in result[1]: + # continue + # print("rejected:", result[1]) + # print("accepted:", result[1]) + # results.extend([result]) + + # news_list = "You've got {} news items:\n".format(num) + # NOTE Why doesn't this work without list? + # i.e. for result in results + # for result in results.fetchall(): + ix = result[0] + title = result[1] + # # TODO Retrieve summary from feed + # # See fetch.view_entry + # summary = result[2] + # # Remove HTML tags + # try: + # summary = BeautifulSoup(summary, "lxml").text + # except: + # print(result[2]) + # breakpoint() + # # TODO Limit text length + # summary = summary.replace("\n\n\n", "\n\n") + # length = await get_settings_value(db_file, "length") + # summary = summary[:length] + " […]" + # summary = summary.strip().split('\n') + # summary = ["> " + line for line in summary] + # summary = "\n".join(summary) + link = result[2] + link = remove_tracking_parameters(link) + link = (replace_hostname(link, "link")) or link + news_item = ( + "\n{}\n{}\n{}\n" + ).format(str(title), str(link), str(feed_title)) + return news_item + + def list_search_results(query, results): results_list = ( "Search results for '{}':\n\n```" @@ -76,7 +123,7 @@ def list_statistics(values): return msg -async def list_last_entries(results, num): +def list_last_entries(results, num): titles_list = "Recent {} titles:\n\n```".format(num) counter = 0 for result in results: @@ -91,7 +138,7 @@ async def list_last_entries(results, num): return "There are no news at the moment." -async def list_feeds(results): +def list_feeds(results): feeds_list = "\nList of subscriptions:\n\n```\n" counter = 0 for result in results: diff --git a/slixfeed/xmpp/process.py b/slixfeed/xmpp/process.py index 0e00e8c..fbfa435 100644 --- a/slixfeed/xmpp/process.py +++ b/slixfeed/xmpp/process.py @@ -144,7 +144,7 @@ async def message(self, message): print(current_time(), "ACCOUNT: " + str(message["from"])) print(current_time(), "COMMAND:", message_text) - + response = 0 match message_lowercase: # case "breakpoint": # if jid == get_value("accounts", "XMPP", "operator"): @@ -712,7 +712,7 @@ async def message(self, message): # commands are sent one close to the next # if response: message.reply(response).send() - response = "EMPTY MESSAGE - ACTION ONLY" + if not response: response = "EMPTY MESSAGE - ACTION ONLY" log_dir = get_default_dbdir() if not os.path.isdir(log_dir): os.mkdir(log_dir)