Fix entry duplication check

This commit is contained in:
Schimon Jehudah 2024-01-07 09:57:54 +00:00
parent fa4c85cd61
commit 956ce69fcb
3 changed files with 108 additions and 57 deletions

View file

@ -162,34 +162,36 @@ def list_unread_entries(result, feed_title):
def list_search_results(query, results):
results_list = (
message = (
"Search results for '{}':\n\n```"
).format(query)
for result in results:
results_list += (
message += (
"\n{}\n{}\n"
).format(str(result[0]), str(result[1]))
if len(results):
return results_list + "```\nTotal of {} results".format(len(results))
message += "```\nTotal of {} results".format(len(results))
else:
return "No results were found for: {}".format(query)
message = "No results were found for: {}".format(query)
return message
def list_feeds_by_query(query, results):
results_list = (
message = (
"Feeds containing '{}':\n\n```"
).format(query)
for result in results:
results_list += (
message += (
"\nName : {} [{}]"
"\nURL : {}"
"\n"
).format(
str(result[0]), str(result[1]), str(result[2]))
if len(results):
return results_list + "\n```\nTotal of {} feeds".format(len(results))
message += "\n```\nTotal of {} feeds".format(len(results))
else:
return "No feeds were found for: {}".format(query)
message = "No feeds were found for: {}".format(query)
return message
def list_statistics(values):
@ -206,7 +208,7 @@ def list_statistics(values):
msg : str
Statistics as message.
"""
msg = (
message = (
"```"
"\nSTATISTICS\n"
"News items : {}/{}\n"
@ -219,28 +221,28 @@ def list_statistics(values):
"```"
).format(values[0], values[1], values[2], values[3],
values[4], values[5], values[6], values[7])
return msg
return message
# FIXME Replace counter by len
def list_last_entries(results, num):
titles_list = "Recent {} titles:\n\n```".format(num)
message = "Recent {} titles:\n\n```".format(num)
for result in results:
titles_list += (
message += (
"\n{}\n{}\n"
).format(
str(result[0]), str(result[1]))
if len(results):
titles_list += "```\n"
return titles_list
message += "```\n"
else:
return "There are no news at the moment."
message = "There are no news at the moment."
return message
def list_feeds(results):
feeds_list = "\nList of subscriptions:\n\n```\n"
message = "\nList of subscriptions:\n\n```\n"
for result in results:
feeds_list += (
message += (
"Name : {}\n"
"URL : {}\n"
# "Updated : {}\n"
@ -250,36 +252,34 @@ def list_feeds(results):
).format(
str(result[0]), str(result[1]), str(result[2]))
if len(results):
return feeds_list + (
message += (
"```\nTotal of {} subscriptions.\n"
).format(len(results))
else:
msg = (
message = (
"List of subscriptions is empty.\n"
"To add feed, send a URL\n"
"Try these:\n"
# TODO Pick random from featured/recommended
"https://reclaimthenet.org/feed/"
)
return msg
return message
async def list_bookmarks(self):
conferences = await bookmark.get(self)
groupchat_list = "\nList of groupchats:\n\n```\n"
counter = 0
message = "\nList of groupchats:\n\n```\n"
for conference in conferences:
counter += 1
groupchat_list += (
message += (
"{}\n"
"\n"
).format(
conference["jid"]
)
groupchat_list += (
message += (
"```\nTotal of {} groupchats.\n"
).format(counter)
return groupchat_list
).format(len(conferences))
return message
def export_to_markdown(jid, filename, results):
@ -582,12 +582,7 @@ async def scan(db_file, url):
URL. The default is None.
"""
if isinstance(url, tuple): url = url[0]
try:
result = await fetch.download_feed(url)
except:
breakpoint()
# if not result:
# return
result = await fetch.download_feed(url)
try:
document = result[0]
status = result[1]
@ -625,7 +620,7 @@ async def scan(db_file, url):
IncompleteRead,
error.URLError
) as e:
print("Error:", e)
logging.error(e)
return
# new_entry = 0
for entry in entries:
@ -647,13 +642,11 @@ async def scan(db_file, url):
# title = "{}: *{}*".format(feed["feed"]["title"], entry.title)
title = entry.title if entry.has_key("title") else date
entry_id = entry.id if entry.has_key("id") else link
summary = entry.summary if entry.has_key("summary") else ''
exist = await sqlite.check_entry_exist(
db_file, url, entry_id=entry_id,
title=title, link=link, date=date)
if not exist:
if entry.has_key("summary"):
summary = entry.summary
summary = entry.summary if entry.has_key("summary") else ''
read_status = 0
pathname = urlsplit(link).path
string = ("{} {} {}"

View file

@ -120,6 +120,11 @@ def create_tables(db_file):
);
"""
)
# TODO
# Consider parameter unique:
# entry_id TEXT NOT NULL UNIQUE,
# Will eliminate function:
# check_entry_exist
entries_table_sql = (
"""
CREATE TABLE IF NOT EXISTS entries (
@ -555,6 +560,33 @@ async def get_unread_entries(db_file, num):
return results
async def get_feed_id(cur, url):
"""
Get index of given feed.
Parameters
----------
cur : object
Cursor object.
url : str
URL.
Returns
-------
feed_id : str
Feed index.
"""
sql = (
"""
SELECT id
FROM feeds
WHERE url = :url
"""
)
feed_id = cur.execute(sql, (url,)).fetchone()[0]
return feed_id
async def mark_entry_as_read(cur, ix):
"""
Set read status of entry as read.
@ -871,8 +903,8 @@ When time functions of slixfeed.timedate
were async, there were errors of coroutines
"""
async def add_entry(db_file, title, link,
entry_id, url, date, read_status):
async def add_entry(
db_file, title, link, entry_id, url, date, read_status):
"""
Add a new entry row into the entries table.
@ -880,8 +912,18 @@ async def add_entry(db_file, title, link,
----------
db_file : str
Path to database file.
entry : str
Entry properties.
title : str
Title.
link : str
Link.
entry_id : str
Entry index.
url : str
URL.
date : str
Date.
read_status : str
0 or 1.
"""
async with DBLOCK:
with create_connection(db_file) as conn:
@ -999,9 +1041,9 @@ async def update_feed_status(db_file, url, status_code):
"""
)
cur.execute(sql, {
"status_code" : status_code,
"scanned" : date.today(),
"feed_id" : feed_id
"status_code": status_code,
"scanned": date.today(),
"feed_id": feed_id
})
@ -1344,6 +1386,7 @@ async def search_entries(db_file, query):
sql, (f'%{query}%', f'%{query}%')).fetchall()
return results
"""
FIXME
@ -1395,18 +1438,30 @@ async def check_entry_exist(
True or None.
"""
cur = get_cursor(db_file)
exist = False
if entry_id:
sql = (
"""
SELECT id
FROM feeds
WHERE url = :url
"""
)
feed_id = cur.execute(sql, (url,)).fetchone()[0]
sql = (
"""
SELECT id
FROM entries
WHERE entry_id = :entry_id and feed_id = :feed_id
WHERE
entry_id = :entry_id and
feed_id = :feed_id
"""
)
result = cur.execute(sql, {
"entry_id": entry_id,
"feed_id": url
"feed_id": feed_id
}).fetchone()
if result: exist = True
elif date:
sql = (
"""
@ -1424,6 +1479,7 @@ async def check_entry_exist(
"link": link,
"timestamp": date
}).fetchone()
if result: exist = True
except:
print(current_time(), "ERROR DATE: source =", url)
print(current_time(), "ERROR DATE: date =", date)
@ -1432,20 +1488,24 @@ async def check_entry_exist(
"""
SELECT id
FROM entries
WHERE title = :title and link = :link
WHERE
title = :title and
link = :link
"""
)
result = cur.execute(sql, {
"title": title,
"link": link
}).fetchone()
try:
if result:
return True
else:
return None
except:
print(current_time(), "ERROR DATE: result =", url)
if result: exist = True
# try:
# if result:
# return True
# else:
# return None
# except:
# print(current_time(), "ERROR DATE: result =", url)
return exist
async def set_settings_value(db_file, key_value):

View file

@ -1,7 +1,6 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from slixfeed.datetime import current_time
from slixmpp.exceptions import IqTimeout
import logging
@ -39,5 +38,4 @@ async def jid_type(self, jid):
("Jabber ID:", jid)
]
for message in messages:
print(current_time(), message)
logging.error(current_time(), message)
logging.error(message)