From 956ce69fcb9aa8604f735ff60297ff5e510bda41 Mon Sep 17 00:00:00 2001 From: Schimon Jehudah Date: Sun, 7 Jan 2024 09:57:54 +0000 Subject: [PATCH] Fix entry duplication check --- slixfeed/action.py | 67 +++++++++++++--------------- slixfeed/sqlite.py | 94 ++++++++++++++++++++++++++++++++-------- slixfeed/xmpp/utility.py | 4 +- 3 files changed, 108 insertions(+), 57 deletions(-) diff --git a/slixfeed/action.py b/slixfeed/action.py index 50287a3..fa2d05e 100644 --- a/slixfeed/action.py +++ b/slixfeed/action.py @@ -162,34 +162,36 @@ def list_unread_entries(result, feed_title): def list_search_results(query, results): - results_list = ( + message = ( "Search results for '{}':\n\n```" ).format(query) for result in results: - results_list += ( + message += ( "\n{}\n{}\n" ).format(str(result[0]), str(result[1])) if len(results): - return results_list + "```\nTotal of {} results".format(len(results)) + message += "```\nTotal of {} results".format(len(results)) else: - return "No results were found for: {}".format(query) + message = "No results were found for: {}".format(query) + return message def list_feeds_by_query(query, results): - results_list = ( + message = ( "Feeds containing '{}':\n\n```" ).format(query) for result in results: - results_list += ( + message += ( "\nName : {} [{}]" "\nURL : {}" "\n" ).format( str(result[0]), str(result[1]), str(result[2])) if len(results): - return results_list + "\n```\nTotal of {} feeds".format(len(results)) + message += "\n```\nTotal of {} feeds".format(len(results)) else: - return "No feeds were found for: {}".format(query) + message = "No feeds were found for: {}".format(query) + return message def list_statistics(values): @@ -206,7 +208,7 @@ def list_statistics(values): msg : str Statistics as message. """ - msg = ( + message = ( "```" "\nSTATISTICS\n" "News items : {}/{}\n" @@ -219,28 +221,28 @@ def list_statistics(values): "```" ).format(values[0], values[1], values[2], values[3], values[4], values[5], values[6], values[7]) - return msg + return message # FIXME Replace counter by len def list_last_entries(results, num): - titles_list = "Recent {} titles:\n\n```".format(num) + message = "Recent {} titles:\n\n```".format(num) for result in results: - titles_list += ( + message += ( "\n{}\n{}\n" ).format( str(result[0]), str(result[1])) if len(results): - titles_list += "```\n" - return titles_list + message += "```\n" else: - return "There are no news at the moment." + message = "There are no news at the moment." + return message def list_feeds(results): - feeds_list = "\nList of subscriptions:\n\n```\n" + message = "\nList of subscriptions:\n\n```\n" for result in results: - feeds_list += ( + message += ( "Name : {}\n" "URL : {}\n" # "Updated : {}\n" @@ -250,36 +252,34 @@ def list_feeds(results): ).format( str(result[0]), str(result[1]), str(result[2])) if len(results): - return feeds_list + ( + message += ( "```\nTotal of {} subscriptions.\n" ).format(len(results)) else: - msg = ( + message = ( "List of subscriptions is empty.\n" "To add feed, send a URL\n" "Try these:\n" # TODO Pick random from featured/recommended "https://reclaimthenet.org/feed/" ) - return msg + return message async def list_bookmarks(self): conferences = await bookmark.get(self) - groupchat_list = "\nList of groupchats:\n\n```\n" - counter = 0 + message = "\nList of groupchats:\n\n```\n" for conference in conferences: - counter += 1 - groupchat_list += ( + message += ( "{}\n" "\n" ).format( conference["jid"] ) - groupchat_list += ( + message += ( "```\nTotal of {} groupchats.\n" - ).format(counter) - return groupchat_list + ).format(len(conferences)) + return message def export_to_markdown(jid, filename, results): @@ -582,12 +582,7 @@ async def scan(db_file, url): URL. The default is None. """ if isinstance(url, tuple): url = url[0] - try: - result = await fetch.download_feed(url) - except: - breakpoint() - # if not result: - # return + result = await fetch.download_feed(url) try: document = result[0] status = result[1] @@ -625,7 +620,7 @@ async def scan(db_file, url): IncompleteRead, error.URLError ) as e: - print("Error:", e) + logging.error(e) return # new_entry = 0 for entry in entries: @@ -647,13 +642,11 @@ async def scan(db_file, url): # title = "{}: *{}*".format(feed["feed"]["title"], entry.title) title = entry.title if entry.has_key("title") else date entry_id = entry.id if entry.has_key("id") else link - summary = entry.summary if entry.has_key("summary") else '' exist = await sqlite.check_entry_exist( db_file, url, entry_id=entry_id, title=title, link=link, date=date) if not exist: - if entry.has_key("summary"): - summary = entry.summary + summary = entry.summary if entry.has_key("summary") else '' read_status = 0 pathname = urlsplit(link).path string = ("{} {} {}" diff --git a/slixfeed/sqlite.py b/slixfeed/sqlite.py index d3ffb8b..f634306 100644 --- a/slixfeed/sqlite.py +++ b/slixfeed/sqlite.py @@ -120,6 +120,11 @@ def create_tables(db_file): ); """ ) + # TODO + # Consider parameter unique: + # entry_id TEXT NOT NULL UNIQUE, + # Will eliminate function: + # check_entry_exist entries_table_sql = ( """ CREATE TABLE IF NOT EXISTS entries ( @@ -555,6 +560,33 @@ async def get_unread_entries(db_file, num): return results +async def get_feed_id(cur, url): + """ + Get index of given feed. + + Parameters + ---------- + cur : object + Cursor object. + url : str + URL. + + Returns + ------- + feed_id : str + Feed index. + """ + sql = ( + """ + SELECT id + FROM feeds + WHERE url = :url + """ + ) + feed_id = cur.execute(sql, (url,)).fetchone()[0] + return feed_id + + async def mark_entry_as_read(cur, ix): """ Set read status of entry as read. @@ -871,8 +903,8 @@ When time functions of slixfeed.timedate were async, there were errors of coroutines """ -async def add_entry(db_file, title, link, - entry_id, url, date, read_status): +async def add_entry( + db_file, title, link, entry_id, url, date, read_status): """ Add a new entry row into the entries table. @@ -880,8 +912,18 @@ async def add_entry(db_file, title, link, ---------- db_file : str Path to database file. - entry : str - Entry properties. + title : str + Title. + link : str + Link. + entry_id : str + Entry index. + url : str + URL. + date : str + Date. + read_status : str + 0 or 1. """ async with DBLOCK: with create_connection(db_file) as conn: @@ -999,9 +1041,9 @@ async def update_feed_status(db_file, url, status_code): """ ) cur.execute(sql, { - "status_code" : status_code, - "scanned" : date.today(), - "feed_id" : feed_id + "status_code": status_code, + "scanned": date.today(), + "feed_id": feed_id }) @@ -1344,6 +1386,7 @@ async def search_entries(db_file, query): sql, (f'%{query}%', f'%{query}%')).fetchall() return results + """ FIXME @@ -1395,18 +1438,30 @@ async def check_entry_exist( True or None. """ cur = get_cursor(db_file) + exist = False if entry_id: + sql = ( + """ + SELECT id + FROM feeds + WHERE url = :url + """ + ) + feed_id = cur.execute(sql, (url,)).fetchone()[0] sql = ( """ SELECT id FROM entries - WHERE entry_id = :entry_id and feed_id = :feed_id + WHERE + entry_id = :entry_id and + feed_id = :feed_id """ ) result = cur.execute(sql, { "entry_id": entry_id, - "feed_id": url + "feed_id": feed_id }).fetchone() + if result: exist = True elif date: sql = ( """ @@ -1424,6 +1479,7 @@ async def check_entry_exist( "link": link, "timestamp": date }).fetchone() + if result: exist = True except: print(current_time(), "ERROR DATE: source =", url) print(current_time(), "ERROR DATE: date =", date) @@ -1432,20 +1488,24 @@ async def check_entry_exist( """ SELECT id FROM entries - WHERE title = :title and link = :link + WHERE + title = :title and + link = :link """ ) result = cur.execute(sql, { "title": title, "link": link }).fetchone() - try: - if result: - return True - else: - return None - except: - print(current_time(), "ERROR DATE: result =", url) + if result: exist = True + # try: + # if result: + # return True + # else: + # return None + # except: + # print(current_time(), "ERROR DATE: result =", url) + return exist async def set_settings_value(db_file, key_value): diff --git a/slixfeed/xmpp/utility.py b/slixfeed/xmpp/utility.py index f784e43..ae992d8 100644 --- a/slixfeed/xmpp/utility.py +++ b/slixfeed/xmpp/utility.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -from slixfeed.datetime import current_time from slixmpp.exceptions import IqTimeout import logging @@ -39,5 +38,4 @@ async def jid_type(self, jid): ("Jabber ID:", jid) ] for message in messages: - print(current_time(), message) - logging.error(current_time(), message) \ No newline at end of file + logging.error(message) \ No newline at end of file