Fix entry duplication check

This commit is contained in:
Schimon Jehudah 2024-01-07 09:57:54 +00:00
parent fa4c85cd61
commit 956ce69fcb
3 changed files with 108 additions and 57 deletions

View file

@ -162,34 +162,36 @@ def list_unread_entries(result, feed_title):
def list_search_results(query, results): def list_search_results(query, results):
results_list = ( message = (
"Search results for '{}':\n\n```" "Search results for '{}':\n\n```"
).format(query) ).format(query)
for result in results: for result in results:
results_list += ( message += (
"\n{}\n{}\n" "\n{}\n{}\n"
).format(str(result[0]), str(result[1])) ).format(str(result[0]), str(result[1]))
if len(results): if len(results):
return results_list + "```\nTotal of {} results".format(len(results)) message += "```\nTotal of {} results".format(len(results))
else: else:
return "No results were found for: {}".format(query) message = "No results were found for: {}".format(query)
return message
def list_feeds_by_query(query, results): def list_feeds_by_query(query, results):
results_list = ( message = (
"Feeds containing '{}':\n\n```" "Feeds containing '{}':\n\n```"
).format(query) ).format(query)
for result in results: for result in results:
results_list += ( message += (
"\nName : {} [{}]" "\nName : {} [{}]"
"\nURL : {}" "\nURL : {}"
"\n" "\n"
).format( ).format(
str(result[0]), str(result[1]), str(result[2])) str(result[0]), str(result[1]), str(result[2]))
if len(results): if len(results):
return results_list + "\n```\nTotal of {} feeds".format(len(results)) message += "\n```\nTotal of {} feeds".format(len(results))
else: else:
return "No feeds were found for: {}".format(query) message = "No feeds were found for: {}".format(query)
return message
def list_statistics(values): def list_statistics(values):
@ -206,7 +208,7 @@ def list_statistics(values):
msg : str msg : str
Statistics as message. Statistics as message.
""" """
msg = ( message = (
"```" "```"
"\nSTATISTICS\n" "\nSTATISTICS\n"
"News items : {}/{}\n" "News items : {}/{}\n"
@ -219,28 +221,28 @@ def list_statistics(values):
"```" "```"
).format(values[0], values[1], values[2], values[3], ).format(values[0], values[1], values[2], values[3],
values[4], values[5], values[6], values[7]) values[4], values[5], values[6], values[7])
return msg return message
# FIXME Replace counter by len # FIXME Replace counter by len
def list_last_entries(results, num): def list_last_entries(results, num):
titles_list = "Recent {} titles:\n\n```".format(num) message = "Recent {} titles:\n\n```".format(num)
for result in results: for result in results:
titles_list += ( message += (
"\n{}\n{}\n" "\n{}\n{}\n"
).format( ).format(
str(result[0]), str(result[1])) str(result[0]), str(result[1]))
if len(results): if len(results):
titles_list += "```\n" message += "```\n"
return titles_list
else: else:
return "There are no news at the moment." message = "There are no news at the moment."
return message
def list_feeds(results): def list_feeds(results):
feeds_list = "\nList of subscriptions:\n\n```\n" message = "\nList of subscriptions:\n\n```\n"
for result in results: for result in results:
feeds_list += ( message += (
"Name : {}\n" "Name : {}\n"
"URL : {}\n" "URL : {}\n"
# "Updated : {}\n" # "Updated : {}\n"
@ -250,36 +252,34 @@ def list_feeds(results):
).format( ).format(
str(result[0]), str(result[1]), str(result[2])) str(result[0]), str(result[1]), str(result[2]))
if len(results): if len(results):
return feeds_list + ( message += (
"```\nTotal of {} subscriptions.\n" "```\nTotal of {} subscriptions.\n"
).format(len(results)) ).format(len(results))
else: else:
msg = ( message = (
"List of subscriptions is empty.\n" "List of subscriptions is empty.\n"
"To add feed, send a URL\n" "To add feed, send a URL\n"
"Try these:\n" "Try these:\n"
# TODO Pick random from featured/recommended # TODO Pick random from featured/recommended
"https://reclaimthenet.org/feed/" "https://reclaimthenet.org/feed/"
) )
return msg return message
async def list_bookmarks(self): async def list_bookmarks(self):
conferences = await bookmark.get(self) conferences = await bookmark.get(self)
groupchat_list = "\nList of groupchats:\n\n```\n" message = "\nList of groupchats:\n\n```\n"
counter = 0
for conference in conferences: for conference in conferences:
counter += 1 message += (
groupchat_list += (
"{}\n" "{}\n"
"\n" "\n"
).format( ).format(
conference["jid"] conference["jid"]
) )
groupchat_list += ( message += (
"```\nTotal of {} groupchats.\n" "```\nTotal of {} groupchats.\n"
).format(counter) ).format(len(conferences))
return groupchat_list return message
def export_to_markdown(jid, filename, results): def export_to_markdown(jid, filename, results):
@ -582,12 +582,7 @@ async def scan(db_file, url):
URL. The default is None. URL. The default is None.
""" """
if isinstance(url, tuple): url = url[0] if isinstance(url, tuple): url = url[0]
try: result = await fetch.download_feed(url)
result = await fetch.download_feed(url)
except:
breakpoint()
# if not result:
# return
try: try:
document = result[0] document = result[0]
status = result[1] status = result[1]
@ -625,7 +620,7 @@ async def scan(db_file, url):
IncompleteRead, IncompleteRead,
error.URLError error.URLError
) as e: ) as e:
print("Error:", e) logging.error(e)
return return
# new_entry = 0 # new_entry = 0
for entry in entries: for entry in entries:
@ -647,13 +642,11 @@ async def scan(db_file, url):
# title = "{}: *{}*".format(feed["feed"]["title"], entry.title) # title = "{}: *{}*".format(feed["feed"]["title"], entry.title)
title = entry.title if entry.has_key("title") else date title = entry.title if entry.has_key("title") else date
entry_id = entry.id if entry.has_key("id") else link entry_id = entry.id if entry.has_key("id") else link
summary = entry.summary if entry.has_key("summary") else ''
exist = await sqlite.check_entry_exist( exist = await sqlite.check_entry_exist(
db_file, url, entry_id=entry_id, db_file, url, entry_id=entry_id,
title=title, link=link, date=date) title=title, link=link, date=date)
if not exist: if not exist:
if entry.has_key("summary"): summary = entry.summary if entry.has_key("summary") else ''
summary = entry.summary
read_status = 0 read_status = 0
pathname = urlsplit(link).path pathname = urlsplit(link).path
string = ("{} {} {}" string = ("{} {} {}"

View file

@ -120,6 +120,11 @@ def create_tables(db_file):
); );
""" """
) )
# TODO
# Consider parameter unique:
# entry_id TEXT NOT NULL UNIQUE,
# Will eliminate function:
# check_entry_exist
entries_table_sql = ( entries_table_sql = (
""" """
CREATE TABLE IF NOT EXISTS entries ( CREATE TABLE IF NOT EXISTS entries (
@ -555,6 +560,33 @@ async def get_unread_entries(db_file, num):
return results return results
async def get_feed_id(cur, url):
"""
Get index of given feed.
Parameters
----------
cur : object
Cursor object.
url : str
URL.
Returns
-------
feed_id : str
Feed index.
"""
sql = (
"""
SELECT id
FROM feeds
WHERE url = :url
"""
)
feed_id = cur.execute(sql, (url,)).fetchone()[0]
return feed_id
async def mark_entry_as_read(cur, ix): async def mark_entry_as_read(cur, ix):
""" """
Set read status of entry as read. Set read status of entry as read.
@ -871,8 +903,8 @@ When time functions of slixfeed.timedate
were async, there were errors of coroutines were async, there were errors of coroutines
""" """
async def add_entry(db_file, title, link, async def add_entry(
entry_id, url, date, read_status): db_file, title, link, entry_id, url, date, read_status):
""" """
Add a new entry row into the entries table. Add a new entry row into the entries table.
@ -880,8 +912,18 @@ async def add_entry(db_file, title, link,
---------- ----------
db_file : str db_file : str
Path to database file. Path to database file.
entry : str title : str
Entry properties. Title.
link : str
Link.
entry_id : str
Entry index.
url : str
URL.
date : str
Date.
read_status : str
0 or 1.
""" """
async with DBLOCK: async with DBLOCK:
with create_connection(db_file) as conn: with create_connection(db_file) as conn:
@ -999,9 +1041,9 @@ async def update_feed_status(db_file, url, status_code):
""" """
) )
cur.execute(sql, { cur.execute(sql, {
"status_code" : status_code, "status_code": status_code,
"scanned" : date.today(), "scanned": date.today(),
"feed_id" : feed_id "feed_id": feed_id
}) })
@ -1344,6 +1386,7 @@ async def search_entries(db_file, query):
sql, (f'%{query}%', f'%{query}%')).fetchall() sql, (f'%{query}%', f'%{query}%')).fetchall()
return results return results
""" """
FIXME FIXME
@ -1395,18 +1438,30 @@ async def check_entry_exist(
True or None. True or None.
""" """
cur = get_cursor(db_file) cur = get_cursor(db_file)
exist = False
if entry_id: if entry_id:
sql = (
"""
SELECT id
FROM feeds
WHERE url = :url
"""
)
feed_id = cur.execute(sql, (url,)).fetchone()[0]
sql = ( sql = (
""" """
SELECT id SELECT id
FROM entries FROM entries
WHERE entry_id = :entry_id and feed_id = :feed_id WHERE
entry_id = :entry_id and
feed_id = :feed_id
""" """
) )
result = cur.execute(sql, { result = cur.execute(sql, {
"entry_id": entry_id, "entry_id": entry_id,
"feed_id": url "feed_id": feed_id
}).fetchone() }).fetchone()
if result: exist = True
elif date: elif date:
sql = ( sql = (
""" """
@ -1424,6 +1479,7 @@ async def check_entry_exist(
"link": link, "link": link,
"timestamp": date "timestamp": date
}).fetchone() }).fetchone()
if result: exist = True
except: except:
print(current_time(), "ERROR DATE: source =", url) print(current_time(), "ERROR DATE: source =", url)
print(current_time(), "ERROR DATE: date =", date) print(current_time(), "ERROR DATE: date =", date)
@ -1432,20 +1488,24 @@ async def check_entry_exist(
""" """
SELECT id SELECT id
FROM entries FROM entries
WHERE title = :title and link = :link WHERE
title = :title and
link = :link
""" """
) )
result = cur.execute(sql, { result = cur.execute(sql, {
"title": title, "title": title,
"link": link "link": link
}).fetchone() }).fetchone()
try: if result: exist = True
if result: # try:
return True # if result:
else: # return True
return None # else:
except: # return None
print(current_time(), "ERROR DATE: result =", url) # except:
# print(current_time(), "ERROR DATE: result =", url)
return exist
async def set_settings_value(db_file, key_value): async def set_settings_value(db_file, key_value):

View file

@ -1,7 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from slixfeed.datetime import current_time
from slixmpp.exceptions import IqTimeout from slixmpp.exceptions import IqTimeout
import logging import logging
@ -39,5 +38,4 @@ async def jid_type(self, jid):
("Jabber ID:", jid) ("Jabber ID:", jid)
] ]
for message in messages: for message in messages:
print(current_time(), message) logging.error(message)
logging.error(current_time(), message)