Fix entry duplication check
This commit is contained in:
parent
fa4c85cd61
commit
956ce69fcb
3 changed files with 108 additions and 57 deletions
|
@ -162,34 +162,36 @@ def list_unread_entries(result, feed_title):
|
||||||
|
|
||||||
|
|
||||||
def list_search_results(query, results):
|
def list_search_results(query, results):
|
||||||
results_list = (
|
message = (
|
||||||
"Search results for '{}':\n\n```"
|
"Search results for '{}':\n\n```"
|
||||||
).format(query)
|
).format(query)
|
||||||
for result in results:
|
for result in results:
|
||||||
results_list += (
|
message += (
|
||||||
"\n{}\n{}\n"
|
"\n{}\n{}\n"
|
||||||
).format(str(result[0]), str(result[1]))
|
).format(str(result[0]), str(result[1]))
|
||||||
if len(results):
|
if len(results):
|
||||||
return results_list + "```\nTotal of {} results".format(len(results))
|
message += "```\nTotal of {} results".format(len(results))
|
||||||
else:
|
else:
|
||||||
return "No results were found for: {}".format(query)
|
message = "No results were found for: {}".format(query)
|
||||||
|
return message
|
||||||
|
|
||||||
|
|
||||||
def list_feeds_by_query(query, results):
|
def list_feeds_by_query(query, results):
|
||||||
results_list = (
|
message = (
|
||||||
"Feeds containing '{}':\n\n```"
|
"Feeds containing '{}':\n\n```"
|
||||||
).format(query)
|
).format(query)
|
||||||
for result in results:
|
for result in results:
|
||||||
results_list += (
|
message += (
|
||||||
"\nName : {} [{}]"
|
"\nName : {} [{}]"
|
||||||
"\nURL : {}"
|
"\nURL : {}"
|
||||||
"\n"
|
"\n"
|
||||||
).format(
|
).format(
|
||||||
str(result[0]), str(result[1]), str(result[2]))
|
str(result[0]), str(result[1]), str(result[2]))
|
||||||
if len(results):
|
if len(results):
|
||||||
return results_list + "\n```\nTotal of {} feeds".format(len(results))
|
message += "\n```\nTotal of {} feeds".format(len(results))
|
||||||
else:
|
else:
|
||||||
return "No feeds were found for: {}".format(query)
|
message = "No feeds were found for: {}".format(query)
|
||||||
|
return message
|
||||||
|
|
||||||
|
|
||||||
def list_statistics(values):
|
def list_statistics(values):
|
||||||
|
@ -206,7 +208,7 @@ def list_statistics(values):
|
||||||
msg : str
|
msg : str
|
||||||
Statistics as message.
|
Statistics as message.
|
||||||
"""
|
"""
|
||||||
msg = (
|
message = (
|
||||||
"```"
|
"```"
|
||||||
"\nSTATISTICS\n"
|
"\nSTATISTICS\n"
|
||||||
"News items : {}/{}\n"
|
"News items : {}/{}\n"
|
||||||
|
@ -219,28 +221,28 @@ def list_statistics(values):
|
||||||
"```"
|
"```"
|
||||||
).format(values[0], values[1], values[2], values[3],
|
).format(values[0], values[1], values[2], values[3],
|
||||||
values[4], values[5], values[6], values[7])
|
values[4], values[5], values[6], values[7])
|
||||||
return msg
|
return message
|
||||||
|
|
||||||
|
|
||||||
# FIXME Replace counter by len
|
# FIXME Replace counter by len
|
||||||
def list_last_entries(results, num):
|
def list_last_entries(results, num):
|
||||||
titles_list = "Recent {} titles:\n\n```".format(num)
|
message = "Recent {} titles:\n\n```".format(num)
|
||||||
for result in results:
|
for result in results:
|
||||||
titles_list += (
|
message += (
|
||||||
"\n{}\n{}\n"
|
"\n{}\n{}\n"
|
||||||
).format(
|
).format(
|
||||||
str(result[0]), str(result[1]))
|
str(result[0]), str(result[1]))
|
||||||
if len(results):
|
if len(results):
|
||||||
titles_list += "```\n"
|
message += "```\n"
|
||||||
return titles_list
|
|
||||||
else:
|
else:
|
||||||
return "There are no news at the moment."
|
message = "There are no news at the moment."
|
||||||
|
return message
|
||||||
|
|
||||||
|
|
||||||
def list_feeds(results):
|
def list_feeds(results):
|
||||||
feeds_list = "\nList of subscriptions:\n\n```\n"
|
message = "\nList of subscriptions:\n\n```\n"
|
||||||
for result in results:
|
for result in results:
|
||||||
feeds_list += (
|
message += (
|
||||||
"Name : {}\n"
|
"Name : {}\n"
|
||||||
"URL : {}\n"
|
"URL : {}\n"
|
||||||
# "Updated : {}\n"
|
# "Updated : {}\n"
|
||||||
|
@ -250,36 +252,34 @@ def list_feeds(results):
|
||||||
).format(
|
).format(
|
||||||
str(result[0]), str(result[1]), str(result[2]))
|
str(result[0]), str(result[1]), str(result[2]))
|
||||||
if len(results):
|
if len(results):
|
||||||
return feeds_list + (
|
message += (
|
||||||
"```\nTotal of {} subscriptions.\n"
|
"```\nTotal of {} subscriptions.\n"
|
||||||
).format(len(results))
|
).format(len(results))
|
||||||
else:
|
else:
|
||||||
msg = (
|
message = (
|
||||||
"List of subscriptions is empty.\n"
|
"List of subscriptions is empty.\n"
|
||||||
"To add feed, send a URL\n"
|
"To add feed, send a URL\n"
|
||||||
"Try these:\n"
|
"Try these:\n"
|
||||||
# TODO Pick random from featured/recommended
|
# TODO Pick random from featured/recommended
|
||||||
"https://reclaimthenet.org/feed/"
|
"https://reclaimthenet.org/feed/"
|
||||||
)
|
)
|
||||||
return msg
|
return message
|
||||||
|
|
||||||
|
|
||||||
async def list_bookmarks(self):
|
async def list_bookmarks(self):
|
||||||
conferences = await bookmark.get(self)
|
conferences = await bookmark.get(self)
|
||||||
groupchat_list = "\nList of groupchats:\n\n```\n"
|
message = "\nList of groupchats:\n\n```\n"
|
||||||
counter = 0
|
|
||||||
for conference in conferences:
|
for conference in conferences:
|
||||||
counter += 1
|
message += (
|
||||||
groupchat_list += (
|
|
||||||
"{}\n"
|
"{}\n"
|
||||||
"\n"
|
"\n"
|
||||||
).format(
|
).format(
|
||||||
conference["jid"]
|
conference["jid"]
|
||||||
)
|
)
|
||||||
groupchat_list += (
|
message += (
|
||||||
"```\nTotal of {} groupchats.\n"
|
"```\nTotal of {} groupchats.\n"
|
||||||
).format(counter)
|
).format(len(conferences))
|
||||||
return groupchat_list
|
return message
|
||||||
|
|
||||||
|
|
||||||
def export_to_markdown(jid, filename, results):
|
def export_to_markdown(jid, filename, results):
|
||||||
|
@ -582,12 +582,7 @@ async def scan(db_file, url):
|
||||||
URL. The default is None.
|
URL. The default is None.
|
||||||
"""
|
"""
|
||||||
if isinstance(url, tuple): url = url[0]
|
if isinstance(url, tuple): url = url[0]
|
||||||
try:
|
result = await fetch.download_feed(url)
|
||||||
result = await fetch.download_feed(url)
|
|
||||||
except:
|
|
||||||
breakpoint()
|
|
||||||
# if not result:
|
|
||||||
# return
|
|
||||||
try:
|
try:
|
||||||
document = result[0]
|
document = result[0]
|
||||||
status = result[1]
|
status = result[1]
|
||||||
|
@ -625,7 +620,7 @@ async def scan(db_file, url):
|
||||||
IncompleteRead,
|
IncompleteRead,
|
||||||
error.URLError
|
error.URLError
|
||||||
) as e:
|
) as e:
|
||||||
print("Error:", e)
|
logging.error(e)
|
||||||
return
|
return
|
||||||
# new_entry = 0
|
# new_entry = 0
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
|
@ -647,13 +642,11 @@ async def scan(db_file, url):
|
||||||
# title = "{}: *{}*".format(feed["feed"]["title"], entry.title)
|
# title = "{}: *{}*".format(feed["feed"]["title"], entry.title)
|
||||||
title = entry.title if entry.has_key("title") else date
|
title = entry.title if entry.has_key("title") else date
|
||||||
entry_id = entry.id if entry.has_key("id") else link
|
entry_id = entry.id if entry.has_key("id") else link
|
||||||
summary = entry.summary if entry.has_key("summary") else ''
|
|
||||||
exist = await sqlite.check_entry_exist(
|
exist = await sqlite.check_entry_exist(
|
||||||
db_file, url, entry_id=entry_id,
|
db_file, url, entry_id=entry_id,
|
||||||
title=title, link=link, date=date)
|
title=title, link=link, date=date)
|
||||||
if not exist:
|
if not exist:
|
||||||
if entry.has_key("summary"):
|
summary = entry.summary if entry.has_key("summary") else ''
|
||||||
summary = entry.summary
|
|
||||||
read_status = 0
|
read_status = 0
|
||||||
pathname = urlsplit(link).path
|
pathname = urlsplit(link).path
|
||||||
string = ("{} {} {}"
|
string = ("{} {} {}"
|
||||||
|
|
|
@ -120,6 +120,11 @@ def create_tables(db_file):
|
||||||
);
|
);
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
# TODO
|
||||||
|
# Consider parameter unique:
|
||||||
|
# entry_id TEXT NOT NULL UNIQUE,
|
||||||
|
# Will eliminate function:
|
||||||
|
# check_entry_exist
|
||||||
entries_table_sql = (
|
entries_table_sql = (
|
||||||
"""
|
"""
|
||||||
CREATE TABLE IF NOT EXISTS entries (
|
CREATE TABLE IF NOT EXISTS entries (
|
||||||
|
@ -555,6 +560,33 @@ async def get_unread_entries(db_file, num):
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
async def get_feed_id(cur, url):
|
||||||
|
"""
|
||||||
|
Get index of given feed.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
cur : object
|
||||||
|
Cursor object.
|
||||||
|
url : str
|
||||||
|
URL.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
feed_id : str
|
||||||
|
Feed index.
|
||||||
|
"""
|
||||||
|
sql = (
|
||||||
|
"""
|
||||||
|
SELECT id
|
||||||
|
FROM feeds
|
||||||
|
WHERE url = :url
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
feed_id = cur.execute(sql, (url,)).fetchone()[0]
|
||||||
|
return feed_id
|
||||||
|
|
||||||
|
|
||||||
async def mark_entry_as_read(cur, ix):
|
async def mark_entry_as_read(cur, ix):
|
||||||
"""
|
"""
|
||||||
Set read status of entry as read.
|
Set read status of entry as read.
|
||||||
|
@ -871,8 +903,8 @@ When time functions of slixfeed.timedate
|
||||||
were async, there were errors of coroutines
|
were async, there were errors of coroutines
|
||||||
|
|
||||||
"""
|
"""
|
||||||
async def add_entry(db_file, title, link,
|
async def add_entry(
|
||||||
entry_id, url, date, read_status):
|
db_file, title, link, entry_id, url, date, read_status):
|
||||||
"""
|
"""
|
||||||
Add a new entry row into the entries table.
|
Add a new entry row into the entries table.
|
||||||
|
|
||||||
|
@ -880,8 +912,18 @@ async def add_entry(db_file, title, link,
|
||||||
----------
|
----------
|
||||||
db_file : str
|
db_file : str
|
||||||
Path to database file.
|
Path to database file.
|
||||||
entry : str
|
title : str
|
||||||
Entry properties.
|
Title.
|
||||||
|
link : str
|
||||||
|
Link.
|
||||||
|
entry_id : str
|
||||||
|
Entry index.
|
||||||
|
url : str
|
||||||
|
URL.
|
||||||
|
date : str
|
||||||
|
Date.
|
||||||
|
read_status : str
|
||||||
|
0 or 1.
|
||||||
"""
|
"""
|
||||||
async with DBLOCK:
|
async with DBLOCK:
|
||||||
with create_connection(db_file) as conn:
|
with create_connection(db_file) as conn:
|
||||||
|
@ -999,9 +1041,9 @@ async def update_feed_status(db_file, url, status_code):
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
cur.execute(sql, {
|
cur.execute(sql, {
|
||||||
"status_code" : status_code,
|
"status_code": status_code,
|
||||||
"scanned" : date.today(),
|
"scanned": date.today(),
|
||||||
"feed_id" : feed_id
|
"feed_id": feed_id
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@ -1344,6 +1386,7 @@ async def search_entries(db_file, query):
|
||||||
sql, (f'%{query}%', f'%{query}%')).fetchall()
|
sql, (f'%{query}%', f'%{query}%')).fetchall()
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
FIXME
|
FIXME
|
||||||
|
|
||||||
|
@ -1395,18 +1438,30 @@ async def check_entry_exist(
|
||||||
True or None.
|
True or None.
|
||||||
"""
|
"""
|
||||||
cur = get_cursor(db_file)
|
cur = get_cursor(db_file)
|
||||||
|
exist = False
|
||||||
if entry_id:
|
if entry_id:
|
||||||
|
sql = (
|
||||||
|
"""
|
||||||
|
SELECT id
|
||||||
|
FROM feeds
|
||||||
|
WHERE url = :url
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
feed_id = cur.execute(sql, (url,)).fetchone()[0]
|
||||||
sql = (
|
sql = (
|
||||||
"""
|
"""
|
||||||
SELECT id
|
SELECT id
|
||||||
FROM entries
|
FROM entries
|
||||||
WHERE entry_id = :entry_id and feed_id = :feed_id
|
WHERE
|
||||||
|
entry_id = :entry_id and
|
||||||
|
feed_id = :feed_id
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
result = cur.execute(sql, {
|
result = cur.execute(sql, {
|
||||||
"entry_id": entry_id,
|
"entry_id": entry_id,
|
||||||
"feed_id": url
|
"feed_id": feed_id
|
||||||
}).fetchone()
|
}).fetchone()
|
||||||
|
if result: exist = True
|
||||||
elif date:
|
elif date:
|
||||||
sql = (
|
sql = (
|
||||||
"""
|
"""
|
||||||
|
@ -1424,6 +1479,7 @@ async def check_entry_exist(
|
||||||
"link": link,
|
"link": link,
|
||||||
"timestamp": date
|
"timestamp": date
|
||||||
}).fetchone()
|
}).fetchone()
|
||||||
|
if result: exist = True
|
||||||
except:
|
except:
|
||||||
print(current_time(), "ERROR DATE: source =", url)
|
print(current_time(), "ERROR DATE: source =", url)
|
||||||
print(current_time(), "ERROR DATE: date =", date)
|
print(current_time(), "ERROR DATE: date =", date)
|
||||||
|
@ -1432,20 +1488,24 @@ async def check_entry_exist(
|
||||||
"""
|
"""
|
||||||
SELECT id
|
SELECT id
|
||||||
FROM entries
|
FROM entries
|
||||||
WHERE title = :title and link = :link
|
WHERE
|
||||||
|
title = :title and
|
||||||
|
link = :link
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
result = cur.execute(sql, {
|
result = cur.execute(sql, {
|
||||||
"title": title,
|
"title": title,
|
||||||
"link": link
|
"link": link
|
||||||
}).fetchone()
|
}).fetchone()
|
||||||
try:
|
if result: exist = True
|
||||||
if result:
|
# try:
|
||||||
return True
|
# if result:
|
||||||
else:
|
# return True
|
||||||
return None
|
# else:
|
||||||
except:
|
# return None
|
||||||
print(current_time(), "ERROR DATE: result =", url)
|
# except:
|
||||||
|
# print(current_time(), "ERROR DATE: result =", url)
|
||||||
|
return exist
|
||||||
|
|
||||||
|
|
||||||
async def set_settings_value(db_file, key_value):
|
async def set_settings_value(db_file, key_value):
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from slixfeed.datetime import current_time
|
|
||||||
from slixmpp.exceptions import IqTimeout
|
from slixmpp.exceptions import IqTimeout
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
@ -39,5 +38,4 @@ async def jid_type(self, jid):
|
||||||
("Jabber ID:", jid)
|
("Jabber ID:", jid)
|
||||||
]
|
]
|
||||||
for message in messages:
|
for message in messages:
|
||||||
print(current_time(), message)
|
logging.error(message)
|
||||||
logging.error(current_time(), message)
|
|
Loading…
Reference in a new issue