Update 2 files

- /slixfeed/__main__.py
- /slixfeed/database.py
This commit is contained in:
Schimon Jehudah 2023-09-29 11:49:24 +00:00
parent 8907c5bdb9
commit cf44241698
2 changed files with 720 additions and 460 deletions

View file

@ -27,14 +27,18 @@ import time
import aiohttp import aiohttp
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from xml.etree.ElementTree import ElementTree, ParseError
from urllib.parse import urlparse
from lxml import html
import feedparser import feedparser
import slixmpp import slixmpp
from eliot import start_action, to_file # from eliot import start_action, to_file
# # to_file(open("slixfeed.log", "w"))
# # with start_action(action_type="set_date()", jid=jid):
# # with start_action(action_type="message()", msg=msg):
from . import database import database
to_file(open("slixfeed.log", "w"))
class Slixfeed(slixmpp.ClientXMPP): class Slixfeed(slixmpp.ClientXMPP):
@ -51,7 +55,8 @@ class Slixfeed(slixmpp.ClientXMPP):
# listen for this event so that we we can initialize # listen for this event so that we we can initialize
# our roster. # our roster.
self.add_event_handler("session_start", self.start) self.add_event_handler("session_start", self.start)
self.add_event_handler("session_start", self.send_updates) self.add_event_handler("session_start", self.send_update)
self.add_event_handler("session_start", self.send_status)
self.add_event_handler("session_start", self.check_updates) self.add_event_handler("session_start", self.check_updates)
# The message event is triggered whenever a message # The message event is triggered whenever a message
@ -91,7 +96,6 @@ class Slixfeed(slixmpp.ClientXMPP):
for stanza objects and the Message stanza to see for stanza objects and the Message stanza to see
how it may be used. how it may be used.
""" """
with start_action(action_type="message()", msg=msg):
if msg['type'] in ('chat', 'normal'): if msg['type'] in ('chat', 'normal'):
message = " ".join(msg['body'].split()) message = " ".join(msg['body'].split())
if message.lower().startswith('help'): if message.lower().startswith('help'):
@ -114,7 +118,7 @@ class Slixfeed(slixmpp.ClientXMPP):
elif message.lower().startswith('feed add '): elif message.lower().startswith('feed add '):
print("COMMAND: feed add") print("COMMAND: feed add")
print("ACCOUNT: " + str(msg['from'])) print("ACCOUNT: " + str(msg['from']))
action = await initdb(msg['from'].bare, is_feed_exist, message[9:]) action = await initdb(msg['from'].bare, add_feed, message[9:])
elif message.lower().startswith('feed remove '): elif message.lower().startswith('feed remove '):
print("COMMAND: feed remove") print("COMMAND: feed remove")
print("ACCOUNT: " + str(msg['from'])) print("ACCOUNT: " + str(msg['from']))
@ -138,7 +142,6 @@ class Slixfeed(slixmpp.ClientXMPP):
async def check_updates(self, event): async def check_updates(self, event):
# print("check_updates") # print("check_updates")
# time.sleep(1) # time.sleep(1)
with start_action(action_type="check_updates()", event=event):
while True: while True:
print("Checking update") print("Checking update")
db_dir = get_default_dbdir() db_dir = get_default_dbdir()
@ -154,12 +157,12 @@ class Slixfeed(slixmpp.ClientXMPP):
for file in files: for file in files:
jid = file[:-3] jid = file[:-3]
await initdb(jid, download_updates) await initdb(jid, download_updates)
await asyncio.sleep(9) # await asyncio.sleep(9)
await asyncio.sleep(90)
async def send_updates(self, event): async def send_update(self, event):
# print("send_updates") # print("send_update")
# time.sleep(1) # time.sleep(1)
with start_action(action_type="send_updates()", event=event):
while True: while True:
db_dir = get_default_dbdir() db_dir = get_default_dbdir()
if not os.path.isdir(db_dir): if not os.path.isdir(db_dir):
@ -175,18 +178,63 @@ class Slixfeed(slixmpp.ClientXMPP):
for file in files: for file in files:
if not file.endswith('.db-jour.db'): if not file.endswith('.db-jour.db'):
jid = file[:-3] jid = file[:-3]
new = await initdb( new = await initdb(
jid, jid,
database.get_unread database.get_unread
) )
if new: if new:
# NOTE Consider send_message
msg = self.make_message( msg = self.make_message(
mto=jid, mto=jid,
mbody=new, mbody=new,
mtype='chat' mtype='chat'
) )
msg.send() msg.send()
await asyncio.sleep(15)
# await asyncio.sleep(15)
await asyncio.sleep(60 * 3)
async def send_status(self, event):
while True:
db_dir = get_default_dbdir()
if not os.path.isdir(db_dir):
msg = ("Slixfeed can not work without a database. \n"
"To create a database, follow these steps: \n"
"Add Slixfeed contact to your roster \n"
"Send a feed to the bot by: \n"
"feed add https://reclaimthenet.org/feed/")
print(msg)
else:
files = os.listdir(db_dir)
for file in files:
jid = file[:-3]
unread = await initdb(
jid,
database.get_unread_entries_number
)
if unread:
msg_status = ('News', str(unread))
msg_status = ' '.join(msg_status)
else:
msg_status = 'No News'
print(msg_status, 'for', jid)
# NOTE Consider send_presence
sts = self.make_presence(
pstatus=msg_status,
pto=jid,
pfrom=jid,
pnick='Slixfeed'
)
sts.send()
await asyncio.sleep(60)
def print_help(): def print_help():
@ -278,7 +326,6 @@ def get_default_dbdir():
async def initdb(jid, callback, message=None): async def initdb(jid, callback, message=None):
# print("initdb") # print("initdb")
# time.sleep(1) # time.sleep(1)
with start_action(action_type="initdb()", jid=jid):
db_dir = get_default_dbdir() db_dir = get_default_dbdir()
if not os.path.isdir(db_dir): if not os.path.isdir(db_dir):
os.mkdir(db_dir) os.mkdir(db_dir)
@ -297,11 +344,9 @@ async def download_updates(db_file):
# print("db_file") # print("db_file")
# print(db_file) # print(db_file)
# time.sleep(1) # time.sleep(1)
with start_action(action_type="download_updates()", db=db_file):
urls = await database.get_subscriptions(db_file) urls = await database.get_subscriptions(db_file)
for url in urls: for url in urls:
with start_action(action_type="download_updates()", url=url):
# print("for url in urls") # print("for url in urls")
source = url[0] source = url[0]
# print("source: ", source) # print("source: ", source)
@ -342,17 +387,44 @@ async def download_updates(db_file):
# NOTE Need to correct the SQL statement to do so # NOTE Need to correct the SQL statement to do so
entries = feed.entries entries = feed.entries
length = len(entries) length = len(entries)
await database.remove_entry(db_file, source, length) # breakpoint()
# await database.remove_entry(db_file, source, length)
await database.remove_nonexistent_entries(db_file, feed, source)
# breakpoint()
new_entry = 0
for entry in entries: for entry in entries:
if entry.has_key("title"): if entry.has_key("title"):
title = entry.title title = entry.title
else: else:
title = feed["feed"]["title"] title = feed["feed"]["title"]
link = source if not entry.link else entry.link
exist = await database.check_entry(db_file, title, link) if entry.has_key("link"):
link = entry.link
else:
link = source
# print('source:', source)
exist = await database.check_entry_exist(db_file, title, link)
# breakpoint()
# if exist:
# print("//////// OLD ////////")
# print(source)
# print('ex:',exist)
# if entry.has_key("id"):
# print('id:',entry.id)
if not exist: if not exist:
# breakpoint()
new_entry = new_entry + 1
# print("******** NEW ********")
# print('T',title)
# if entry.has_key("date"):
# print('D',entry.date)
# print('L',link)
# print('ex',exist)
# TODO Enhance summary
if entry.has_key("summary"): if entry.has_key("summary"):
summary = entry.summary summary = entry.summary
# Remove HTML tags # Remove HTML tags
@ -364,20 +436,21 @@ async def download_updates(db_file):
#print('~~~~~~summary not in entry') #print('~~~~~~summary not in entry')
entry = (title, summary, link, source, 0); entry = (title, summary, link, source, 0);
await database.add_entry_and_set_date(db_file, source, entry) await database.add_entry_and_set_date(db_file, source, entry)
# print("### added", new_entry, "entries")
async def download_feed(url): async def download_feed(url):
with start_action(action_type="download_feed()", url=url):
# print("download_feed") # print("download_feed")
# time.sleep(1) # time.sleep(1)
timeout = aiohttp.ClientTimeout(total=10) timeout = aiohttp.ClientTimeout(total=10)
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
# async with aiohttp.ClientSession(trust_env=True) as session: # async with aiohttp.ClientSession(trust_env=True) as session:
try: try:
async with session.get(url, timeout=timeout) as response: async with session.get(url, timeout=timeout) as response:
status = response.status status = response.status
if response.status == 200: if response.status == 200:
doc = await response.text() doc = await response.text()
# print (response.content_type)
return [doc, status] return [doc, status]
else: else:
return [False, status] return [False, status]
@ -389,7 +462,7 @@ async def download_feed(url):
return [False, "timeout"] return [False, "timeout"]
async def is_feed_exist(db_file, url): async def add_feed(db_file, url):
# print("add_feed") # print("add_feed")
# print("db_file") # print("db_file")
# print(db_file) # print(db_file)
@ -400,13 +473,157 @@ async def is_feed_exist(db_file, url):
:param url: :param url:
:return: string :return: string
""" """
exist = await database.check_feed(db_file, url) exist = await database.check_feed_exist(db_file, url)
if not exist: if not exist:
res = await download_feed(url) res = await download_feed(url)
await database.add_feed(db_file, url, res) if res[0]:
feed = feedparser.parse(res[0])
if feed.bozo:
bozo = ("WARNING: Bozo detected. Failed to load <{}>.".format(url))
print(bozo)
try:
# tree = etree.fromstring(res[0]) # etree -> html
tree = html.fromstring(res[0])
except:
return "Failed to parse {} as feed".format(url)
print("RSS Auto-Discovery Engaged")
xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]"""
# xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href"""
# xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href"
feeds = tree.xpath(xpath_query)
if len(feeds) > 1:
msg = "RSS Auto-Discovery has found {} feeds:\n\n".format(len(feeds))
for feed in feeds:
# # The following code works;
# # The following code will catch
# # only valid resources (i.e. not 404);
# # The following code requires more bandwidth.
# res = await download_feed(feed)
# if res[0]:
# disco = feedparser.parse(res[0])
# title = disco["feed"]["title"]
# msg += "{} \n {} \n\n".format(title, feed)
feed_name = feed.xpath('@title')[0]
feed_addr = feed.xpath('@href')[0]
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
msg += "The above feeds were extracted from\n{}".format(url)
return msg
elif feeds:
url = feeds[0].xpath('@href')[0]
# Why wouldn't add_feed return a message
# upon success unless return is explicitly
# mentioned, yet upon failure it wouldn't?
return await add_feed(db_file, url)
# Search for feeds by file extension and path
paths = ["/atom",
"/atom.php",
"/atom.xml",
"/rdf",
"/rdf.php",
"/rdf.xml",
"/rss",
"/rss.php",
"/rss.xml",
"/feed",
"/feed.atom",
"/feed.rdf",
"/feed.rss",
"/feed.xml",
"/news",
"/news/feed",
"?format=rss",
"/feeds/news_feed",
"/content-feeds/",
"/app.php/feed", # phpBB
"/posts.rss" # Discourse
] # More paths "rss.json", "feed.json"
print("RSS Scan Mode Engaged")
feeds = {}
for path in paths:
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
xpath_query = "//a[contains(@href,'{}')]".format(path)
addresses = tree.xpath(xpath_query)
parted_url = urlparse(url)
for address in addresses:
address = address.xpath('@href')[0]
if address.startswith('/'):
address = parted_url.netloc + address
res = await download_feed(address)
# print(address)
if res[1] == 200:
# print(address)
try:
feeds[address] = feedparser.parse(res[0])["feed"]["title"]
# print(feeds)
except:
# print('Not a feed')
continue
if len(feeds) > 1:
msg = "RSS URL scan has found {} feeds:\n\n".format(len(feeds))
for feed in feeds:
# try:
# res = await download_feed(feed)
# except:
# continue
feed_name = feeds[feed]
feed_addr = feed
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
msg += "The above feeds were extracted from\n{}".format(url)
return msg
elif feeds:
url = list(feeds)[0]
return await add_feed(db_file, url)
# (HTTP) Request(s) Paths
print("RSS Arbitrary Mode Engaged")
feeds = {}
parted_url = urlparse(url)
for path in paths:
# print(path)
if parted_url.path.split('/')[1]:
paths.extend([".atom", ".feed", ".rdf", ".rss"]) if '.rss' not in paths else -1
# if paths.index('.rss'):
# paths.extend([".atom", ".feed", ".rdf", ".rss"])
address = parted_url.scheme + '://' + parted_url.netloc + '/' + parted_url.path.split('/')[1] + path
res = await download_feed(address)
if res[1] == 200:
# print('2res[1]')
# print(res[1])
# print(feedparser.parse(res[0])["feed"]["title"])
feeds[address] = feedparser.parse(res[0])["feed"]["title"]
# print(feeds)
else: else:
return "News source is already listed in the subscription list" address = parted_url.scheme + '://' + parted_url.netloc + path
res = await download_feed(address)
if res[1] == 200:
# print('1res[1]')
# print(res[1])
# print(feedparser.parse(res[0])["feed"]["title"])
feeds[address] = feedparser.parse(res[0])["feed"]["title"]
# print(feeds)
if len(feeds) > 1:
msg = "RSS URL discovery has found {} feeds:\n\n".format(len(feeds))
for feed in feeds:
feed_name = feeds[feed]
feed_addr = feed
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
msg += "The above feeds were extracted from\n{}".format(url)
return msg
elif feeds:
url = list(feeds)[0]
return await add_feed(db_file, url)
else:
return "No news feeds were found for URL <{}>.".format(url)
else:
return await database.add_feed(db_file, feed, url, res)
else:
return "Failed to get URL <{}>. HTTP Error {}".format(url, res[1])
else:
return "News source <{}> is already listed in the subscription list".format(url)
def toggle_state(jid, state): def toggle_state(jid, state):
@ -418,7 +635,6 @@ def toggle_state(jid, state):
:param state: boolean :param state: boolean
:return: :return:
""" """
with start_action(action_type="set_date()", jid=jid):
db_dir = get_default_dbdir() db_dir = get_default_dbdir()
db_file = os.path.join(db_dir, r"{}.db".format(jid)) db_file = os.path.join(db_dir, r"{}.db".format(jid))
bk_file = os.path.join(db_dir, r"{}.db.bak".format(jid)) bk_file = os.path.join(db_dir, r"{}.db.bak".format(jid))

View file

@ -9,7 +9,13 @@ import asyncio
from datetime import date from datetime import date
import feedparser import feedparser
from eliot import start_action, to_file # from eliot import start_action, to_file
# # with start_action(action_type="list_subscriptions()", db=db_file):
# # with start_action(action_type="last_entries()", num=num):
# # with start_action(action_type="get_subscriptions()"):
# # with start_action(action_type="remove_entry()", source=source):
# # with start_action(action_type="search_entries()", query=query):
# # with start_action(action_type="check_entry()", link=link):
# aiosqlite # aiosqlite
DBLOCK = asyncio.Lock() DBLOCK = asyncio.Lock()
@ -27,7 +33,6 @@ def create_connection(db_file):
:param db_file: database file :param db_file: database file
:return: Connection object or None :return: Connection object or None
""" """
with start_action(action_type="create_connection()", db=db_file):
conn = None conn = None
try: try:
conn = sqlite3.connect(db_file) conn = sqlite3.connect(db_file)
@ -42,7 +47,6 @@ def create_tables(db_file):
# print("db_file") # print("db_file")
# print(db_file) # print(db_file)
# time.sleep(1) # time.sleep(1)
with start_action(action_type="create_tables()", db=db_file):
with create_connection(db_file) as conn: with create_connection(db_file) as conn:
feeds_table_sql = """ feeds_table_sql = """
CREATE TABLE IF NOT EXISTS feeds ( CREATE TABLE IF NOT EXISTS feeds (
@ -77,7 +81,6 @@ def get_cursor(db_file):
:param db_file: database file :param db_file: database file
:return: Cursor :return: Cursor
""" """
with start_action(action_type="get_cursor()", db=db_file):
if db_file in CURSORS: if db_file in CURSORS:
return CURSORS[db_file] return CURSORS[db_file]
else: else:
@ -87,7 +90,7 @@ def get_cursor(db_file):
return CURSORS[db_file] return CURSORS[db_file]
async def add_feed(db_file, url, res): async def add_feed(db_file, feed, url, res):
# print("add_feed") # print("add_feed")
# print("db_file") # print("db_file")
# print(db_file) # print(db_file)
@ -98,13 +101,12 @@ async def add_feed(db_file, url, res):
:param feed: :param feed:
:return: string :return: string
""" """
with start_action(action_type="add_feed()", url=url):
#TODO consider async with DBLOCK #TODO consider async with DBLOCK
#conn = create_connection(db_file) #conn = create_connection(db_file)
# with create_connection(db_file) as conn: # with create_connection(db_file) as conn:
# #exist = await check_feed(conn, url) # #exist = await check_feed_exist(conn, url)
# exist = await check_feed(db_file, url) # exist = await check_feed_exist(db_file, url)
# if not exist: # if not exist:
# res = await main.download_feed(url) # res = await main.download_feed(url)
@ -114,27 +116,11 @@ async def add_feed(db_file, url, res):
async with DBLOCK: async with DBLOCK:
with create_connection(db_file) as conn: with create_connection(db_file) as conn:
cur = conn.cursor() cur = conn.cursor()
if res[0]:
feed = feedparser.parse(res[0])
if feed.bozo:
feed = (url, 1, res[1], 0)
#sql = """INSERT INTO feeds(address,enabled,status,valid)
# VALUES(?,?,?,?) """
#cur.execute(sql, feed)
bozo = ("WARNING: Bozo detected. Failed to load URL.")
print(bozo)
return "Failed to parse URL as feed"
else:
title = feed["feed"]["title"] title = feed["feed"]["title"]
feed = (title, url, 1, res[1], 1) feed = (title, url, 1, res[1], 1)
sql = """INSERT INTO feeds(name,address,enabled,status,valid) sql = """INSERT INTO feeds(name,address,enabled,status,valid)
VALUES(?,?,?,?,?) """ VALUES(?,?,?,?,?) """
cur.execute(sql, feed) cur.execute(sql, feed)
else:
feed = (url, 1, res[1], 0)
#sql = "INSERT INTO feeds(address,enabled,status,valid) VALUES(?,?,?,?) "
#cur.execute(sql, feed)
return "Failed to get URL. HTTP Error {}".format(res[1])
source = title if title else '<' + url + '>' source = title if title else '<' + url + '>'
msg = 'News source "{}" has been added to subscription list'.format(source) msg = 'News source "{}" has been added to subscription list'.format(source)
@ -152,10 +138,10 @@ async def remove_feed(db_file, ix):
:param id: id of the feed :param id: id of the feed
:return: string :return: string
""" """
with start_action(action_type="remove_feed()", id=ix):
with create_connection(db_file) as conn: with create_connection(db_file) as conn:
with DBLOCK: async with DBLOCK:
cur = conn.cursor() cur = conn.cursor()
try:
sql = "SELECT address FROM feeds WHERE id = ?" sql = "SELECT address FROM feeds WHERE id = ?"
url = cur.execute(sql, (ix,)) url = cur.execute(sql, (ix,))
for i in url: for i in url:
@ -167,10 +153,12 @@ async def remove_feed(db_file, ix):
cur.execute(sql, (ix,)) cur.execute(sql, (ix,))
return """News source <{}> has been removed from subscription list return """News source <{}> has been removed from subscription list
""".format(url) """.format(url)
except:
return """No news source with ID {}""".format(ix)
async def check_feed(db_file, url): async def check_feed_exist(db_file, url):
# print("check_feed") # print("is_feed_exist")
# print("db_file") # print("db_file")
# print(db_file) # print(db_file)
# time.sleep(1) # time.sleep(1)
@ -181,13 +169,27 @@ async def check_feed(db_file, url):
:param url: :param url:
:return: row :return: row
""" """
with start_action(action_type="check_feed()", url=url):
cur = get_cursor(db_file) cur = get_cursor(db_file)
sql = "SELECT id FROM feeds WHERE address = ?" sql = "SELECT id FROM feeds WHERE address = ?"
cur.execute(sql, (url,)) cur.execute(sql, (url,))
return cur.fetchone() return cur.fetchone()
async def get_unread_entries_number(db_file):
"""
Check number of unread items
:param db_file
:return: string
"""
with create_connection(db_file) as conn:
cur = conn.cursor()
sql = "SELECT count(id) FROM entries WHERE read = 0"
count = cur.execute(sql)
count = cur.fetchone()[0]
return count
async def get_unread(db_file): async def get_unread(db_file):
# print("get_unread") # print("get_unread")
# print("db_file") # print("db_file")
@ -199,7 +201,6 @@ async def get_unread(db_file):
:param id: id of the entry :param id: id of the entry
:return: string :return: string
""" """
with start_action(action_type="get_unread()", db=db_file):
with create_connection(db_file) as conn: with create_connection(db_file) as conn:
entry = [] entry = []
cur = conn.cursor() cur = conn.cursor()
@ -236,7 +237,6 @@ async def mark_as_read(cur, ix):
:param cur: :param cur:
:param ix: index of the entry :param ix: index of the entry
""" """
with start_action(action_type="mark_as_read()", id=ix):
sql = "UPDATE entries SET summary = '', read = 1 WHERE id = ?" sql = "UPDATE entries SET summary = '', read = 1 WHERE id = ?"
cur.execute(sql, (ix,)) cur.execute(sql, (ix,))
@ -253,7 +253,6 @@ async def toggle_status(db_file, ix):
:param id: id of the feed :param id: id of the feed
:return: string :return: string
""" """
with start_action(action_type="toggle_status()", db=db_file):
async with DBLOCK: async with DBLOCK:
with create_connection(db_file) as conn: with create_connection(db_file) as conn:
cur = conn.cursor() cur = conn.cursor()
@ -270,13 +269,13 @@ async def toggle_status(db_file, ix):
# TODO Reset feed table before further testing # TODO Reset feed table before further testing
if status == 1: if status == 1:
status = 0 status = 0
notice = "News updates for '{}' are now disabled".format(title) state = "disabled"
else: else:
status = 1 status = 1
notice = "News updates for '{}' are now enabled".format(title) state = "enabled"
sql = "UPDATE feeds SET enabled = :status WHERE id = :id" sql = "UPDATE feeds SET enabled = :status WHERE id = :id"
cur.execute(sql, {"status": status, "id": ix}) cur.execute(sql, {"status": status, "id": ix})
return notice return "Updates for '{}' are now {}".format(title, state)
async def set_date(cur, url): async def set_date(cur, url):
@ -287,7 +286,6 @@ async def set_date(cur, url):
:param url: url of the feed :param url: url of the feed
:return: :return:
""" """
with start_action(action_type="set_date()", url=url):
today = date.today() today = date.today()
sql = "UPDATE feeds SET updated = :today WHERE address = :url" sql = "UPDATE feeds SET updated = :today WHERE address = :url"
# cur = conn.cursor() # cur = conn.cursor()
@ -327,13 +325,13 @@ async def add_entry(cur, entry):
:param entry: :param entry:
:return: :return:
""" """
with start_action(action_type="add_entry()", entry=entry):
sql = """ INSERT INTO entries(title,summary,link,source,read) sql = """ INSERT INTO entries(title,summary,link,source,read)
VALUES(?,?,?,?,?) """ VALUES(?,?,?,?,?) """
# cur = conn.cursor() # cur = conn.cursor()
cur.execute(sql, entry) cur.execute(sql, entry)
# This function doesn't work as expected with bbs and wiki feeds
async def remove_entry(db_file, source, length): async def remove_entry(db_file, source, length):
# print("remove_entry") # print("remove_entry")
# time.sleep(1) # time.sleep(1)
@ -346,7 +344,6 @@ async def remove_entry(db_file, source, length):
:param length: :param length:
:return: :return:
""" """
with start_action(action_type="remove_entry()", source=source):
# FIXED # FIXED
# Dino empty titles are not counted https://dino.im/index.xml # Dino empty titles are not counted https://dino.im/index.xml
# SOLVED # SOLVED
@ -367,6 +364,59 @@ async def remove_entry(db_file, source, length):
ORDER BY id ORDER BY id
ASC LIMIT :limit)""" ASC LIMIT :limit)"""
cur.execute(sql, {"source": source, "limit": limit}) cur.execute(sql, {"source": source, "limit": limit})
print('### removed', limit, 'from', source)
async def remove_nonexistent_entries(db_file, feed, source):
"""
Remove entries that don't exist in feed'
Check the entries returned from feed and delete
non existing entries
:param conn:
:param source:
:param length:
:return:
"""
async with DBLOCK:
with create_connection(db_file) as conn:
cur = conn.cursor()
sql = "SELECT id, title, link FROM entries WHERE source = ?"
cur.execute(sql, (source,))
entries_db = cur.fetchall()
# print('entries_db')
# print(entries_db)
for entry_db in entries_db:
# entry_db[1] = id
# entry_db[2] = title
# entry_db[3] = link
exist = False
# print("check-db")
for entry_feed in feed.entries:
# print("check-feed")
# TODO better check and don't repeat code
if entry_feed.has_key("title"):
title = entry_feed.title
else:
title = feed["feed"]["title"]
if entry_feed.has_key("link"):
link = entry_feed.link
else:
link = source
# TODO better check and don't repeat code
if entry_db[1] == title and entry_db[2] == link:
# print('exist')
# print(title)
exist = True
break
if not exist:
# print('>>> not exist')
# print(entry_db[1])
# TODO Send to table archive
# TODO Also make a regular/routine check for sources that have been changed (though that can only happen when manually editing)
sql = "DELETE FROM entries WHERE id = ?"
cur.execute(sql, (entry_db[0],))
# breakpoint()
async def get_subscriptions(db_file): async def get_subscriptions(db_file):
@ -377,7 +427,6 @@ async def get_subscriptions(db_file):
:param conn: :param conn:
:return: rows (tuple) :return: rows (tuple)
""" """
with start_action(action_type="get_subscriptions()"):
with create_connection(db_file) as conn: with create_connection(db_file) as conn:
cur = conn.cursor() cur = conn.cursor()
sql = "SELECT address FROM feeds WHERE enabled = 1" sql = "SELECT address FROM feeds WHERE enabled = 1"
@ -395,7 +444,6 @@ async def list_subscriptions(db_file):
:param conn: :param conn:
:return: rows (string) :return: rows (string)
""" """
with start_action(action_type="list_subscriptions()", db=db_file):
with create_connection(db_file) as conn: with create_connection(db_file) as conn:
# cur = conn.cursor() # cur = conn.cursor()
cur = get_cursor(db_file) cur = get_cursor(db_file)
@ -431,7 +479,6 @@ async def last_entries(db_file, num):
:param num: integer :param num: integer
:return: rows (string) :return: rows (string)
""" """
with start_action(action_type="last_entries()", num=num):
num = int(num) num = int(num)
if num > 50: if num > 50:
num = 50 num = 50
@ -461,7 +508,6 @@ async def search_entries(db_file, query):
:param query: string :param query: string
:return: rows (string) :return: rows (string)
""" """
with start_action(action_type="search_entries()", query=query):
if len(query) < 2: if len(query) < 2:
return "Please enter at least 2 characters to search" return "Please enter at least 2 characters to search"
@ -483,7 +529,7 @@ async def search_entries(db_file, query):
return "No results found for: {}".format(query) return "No results found for: {}".format(query)
async def check_entry(db_file, title, link): async def check_entry_exist(db_file, title, link):
# print("check_entry") # print("check_entry")
# time.sleep(1) # time.sleep(1)
""" """
@ -494,9 +540,7 @@ async def check_entry(db_file, title, link):
:param title: :param title:
:return: row :return: row
""" """
with start_action(action_type="check_entry()", link=link): cur = get_cursor(db_file)
with create_connection(db_file) as conn:
cur = conn.cursor()
sql = "SELECT id FROM entries WHERE title = :title and link = :link" sql = "SELECT id FROM entries WHERE title = :title and link = :link"
cur.execute(sql, {"title": title, "link": link}) cur.execute(sql, {"title": title, "link": link})
return cur.fetchone() return cur.fetchone()