From cf44241698a6fd093af3b055e58d7b2d9d9483c6 Mon Sep 17 00:00:00 2001 From: Schimon Jehudah Date: Fri, 29 Sep 2023 11:49:24 +0000 Subject: [PATCH] Update 2 files - /slixfeed/__main__.py - /slixfeed/database.py --- slixfeed/__main__.py | 634 +++++++++++++++++++++++++++++-------------- slixfeed/database.py | 546 ++++++++++++++++++++----------------- 2 files changed, 720 insertions(+), 460 deletions(-) diff --git a/slixfeed/__main__.py b/slixfeed/__main__.py index 4e29625..f9e7fe7 100644 --- a/slixfeed/__main__.py +++ b/slixfeed/__main__.py @@ -27,14 +27,18 @@ import time import aiohttp from bs4 import BeautifulSoup +from xml.etree.ElementTree import ElementTree, ParseError +from urllib.parse import urlparse +from lxml import html import feedparser import slixmpp -from eliot import start_action, to_file +# from eliot import start_action, to_file +# # to_file(open("slixfeed.log", "w")) +# # with start_action(action_type="set_date()", jid=jid): +# # with start_action(action_type="message()", msg=msg): -from . import database - -to_file(open("slixfeed.log", "w")) +import database class Slixfeed(slixmpp.ClientXMPP): @@ -51,7 +55,8 @@ class Slixfeed(slixmpp.ClientXMPP): # listen for this event so that we we can initialize # our roster. self.add_event_handler("session_start", self.start) - self.add_event_handler("session_start", self.send_updates) + self.add_event_handler("session_start", self.send_update) + self.add_event_handler("session_start", self.send_status) self.add_event_handler("session_start", self.check_updates) # The message event is triggered whenever a message @@ -91,102 +96,145 @@ class Slixfeed(slixmpp.ClientXMPP): for stanza objects and the Message stanza to see how it may be used. """ - with start_action(action_type="message()", msg=msg): - if msg['type'] in ('chat', 'normal'): - message = " ".join(msg['body'].split()) - if message.lower().startswith('help'): - print("COMMAND: help") - print("ACCOUNT: " + str(msg['from'])) - action = print_help() - # NOTE: Might not need it - elif message.lower().startswith('feed recent '): - print("COMMAND: feed recent") - print("ACCOUNT: " + str(msg['from'])) - action = await initdb(msg['from'].bare, database.last_entries, message[12:]) - elif message.lower().startswith('feed search '): - print("COMMAND: feed search") - print("ACCOUNT: " + str(msg['from'])) - action = await initdb( msg['from'].bare, database.search_entries, message[12:]) - elif message.lower().startswith('feed list'): - print("COMMAND: feed list") - print("ACCOUNT: " + str(msg['from'])) - action = await initdb(msg['from'].bare, database.list_subscriptions) - elif message.lower().startswith('feed add '): - print("COMMAND: feed add") - print("ACCOUNT: " + str(msg['from'])) - action = await initdb(msg['from'].bare, is_feed_exist, message[9:]) - elif message.lower().startswith('feed remove '): - print("COMMAND: feed remove") - print("ACCOUNT: " + str(msg['from'])) - action = await initdb(msg['from'].bare, database.remove_feed, message[12:]) - elif message.lower().startswith('feed status '): - print("COMMAND: feed status") - print("ACCOUNT: " + str(msg['from'])) - action = await initdb(msg['from'].bare, database.toggle_status, message[12:]) - elif message.lower().startswith('enable'): - print("COMMAND: enable") - print("ACCOUNT: " + str(msg['from'])) - action = toggle_state(msg['from'].bare, True) - elif message.lower().startswith('disable'): - print("COMMAND: disable") - print("ACCOUNT: " + str(msg['from'])) - action = toggle_state(msg['from'].bare, False) - else: - action = 'Unknown command. Press "help" for list of commands' - msg.reply(action).send() + if msg['type'] in ('chat', 'normal'): + message = " ".join(msg['body'].split()) + if message.lower().startswith('help'): + print("COMMAND: help") + print("ACCOUNT: " + str(msg['from'])) + action = print_help() + # NOTE: Might not need it + elif message.lower().startswith('feed recent '): + print("COMMAND: feed recent") + print("ACCOUNT: " + str(msg['from'])) + action = await initdb(msg['from'].bare, database.last_entries, message[12:]) + elif message.lower().startswith('feed search '): + print("COMMAND: feed search") + print("ACCOUNT: " + str(msg['from'])) + action = await initdb( msg['from'].bare, database.search_entries, message[12:]) + elif message.lower().startswith('feed list'): + print("COMMAND: feed list") + print("ACCOUNT: " + str(msg['from'])) + action = await initdb(msg['from'].bare, database.list_subscriptions) + elif message.lower().startswith('feed add '): + print("COMMAND: feed add") + print("ACCOUNT: " + str(msg['from'])) + action = await initdb(msg['from'].bare, add_feed, message[9:]) + elif message.lower().startswith('feed remove '): + print("COMMAND: feed remove") + print("ACCOUNT: " + str(msg['from'])) + action = await initdb(msg['from'].bare, database.remove_feed, message[12:]) + elif message.lower().startswith('feed status '): + print("COMMAND: feed status") + print("ACCOUNT: " + str(msg['from'])) + action = await initdb(msg['from'].bare, database.toggle_status, message[12:]) + elif message.lower().startswith('enable'): + print("COMMAND: enable") + print("ACCOUNT: " + str(msg['from'])) + action = toggle_state(msg['from'].bare, True) + elif message.lower().startswith('disable'): + print("COMMAND: disable") + print("ACCOUNT: " + str(msg['from'])) + action = toggle_state(msg['from'].bare, False) + else: + action = 'Unknown command. Press "help" for list of commands' + msg.reply(action).send() async def check_updates(self, event): # print("check_updates") # time.sleep(1) - with start_action(action_type="check_updates()", event=event): - while True: - print("Checking update") - db_dir = get_default_dbdir() - if not os.path.isdir(db_dir): - msg = ("Slixfeed can not work without a database. \n" - "To create a database, follow these steps: \n" - "Add Slixfeed contact to your roster \n" - "Send a feed to the bot by: \n" - "feed add https://reclaimthenet.org/feed/") - print(msg) - else: - files = os.listdir(db_dir) - for file in files: - jid = file[:-3] - await initdb(jid, download_updates) - await asyncio.sleep(9) + while True: + print("Checking update") + db_dir = get_default_dbdir() + if not os.path.isdir(db_dir): + msg = ("Slixfeed can not work without a database. \n" + "To create a database, follow these steps: \n" + "Add Slixfeed contact to your roster \n" + "Send a feed to the bot by: \n" + "feed add https://reclaimthenet.org/feed/") + print(msg) + else: + files = os.listdir(db_dir) + for file in files: + jid = file[:-3] + await initdb(jid, download_updates) + # await asyncio.sleep(9) + await asyncio.sleep(90) - async def send_updates(self, event): - # print("send_updates") + async def send_update(self, event): + # print("send_update") # time.sleep(1) - with start_action(action_type="send_updates()", event=event): - while True: - db_dir = get_default_dbdir() - if not os.path.isdir(db_dir): - msg = ("Slixfeed can not work without a database. \n" - "To create a database, follow these steps: \n" - "Add Slixfeed contact to your roster \n" - "Send a feed to the bot by: \n" - "feed add https://reclaimthenet.org/feed/") - print(msg) - else: - os.chdir(db_dir) - files = os.listdir() - for file in files: - if not file.endswith('.db-jour.db'): - jid = file[:-3] - new = await initdb( - jid, - database.get_unread + while True: + db_dir = get_default_dbdir() + if not os.path.isdir(db_dir): + msg = ("Slixfeed can not work without a database. \n" + "To create a database, follow these steps: \n" + "Add Slixfeed contact to your roster \n" + "Send a feed to the bot by: \n" + "feed add https://reclaimthenet.org/feed/") + print(msg) + else: + os.chdir(db_dir) + files = os.listdir() + for file in files: + if not file.endswith('.db-jour.db'): + jid = file[:-3] + + new = await initdb( + jid, + database.get_unread + ) + + if new: + # NOTE Consider send_message + msg = self.make_message( + mto=jid, + mbody=new, + mtype='chat' ) - if new: - msg = self.make_message( - mto=jid, - mbody=new, - mtype='chat' - ) - msg.send() - await asyncio.sleep(15) + + msg.send() + + # await asyncio.sleep(15) + await asyncio.sleep(60 * 3) + + async def send_status(self, event): + while True: + db_dir = get_default_dbdir() + if not os.path.isdir(db_dir): + msg = ("Slixfeed can not work without a database. \n" + "To create a database, follow these steps: \n" + "Add Slixfeed contact to your roster \n" + "Send a feed to the bot by: \n" + "feed add https://reclaimthenet.org/feed/") + print(msg) + else: + files = os.listdir(db_dir) + for file in files: + jid = file[:-3] + + unread = await initdb( + jid, + database.get_unread_entries_number + ) + + if unread: + msg_status = ('News', str(unread)) + msg_status = ' '.join(msg_status) + else: + msg_status = 'No News' + print(msg_status, 'for', jid) + + # NOTE Consider send_presence + sts = self.make_presence( + pstatus=msg_status, + pto=jid, + pfrom=jid, + pnick='Slixfeed' + ) + + sts.send() + + await asyncio.sleep(60) def print_help(): @@ -278,17 +326,16 @@ def get_default_dbdir(): async def initdb(jid, callback, message=None): # print("initdb") # time.sleep(1) - with start_action(action_type="initdb()", jid=jid): - db_dir = get_default_dbdir() - if not os.path.isdir(db_dir): - os.mkdir(db_dir) - db_file = os.path.join(db_dir, r"{}.db".format(jid)) - database.create_tables(db_file) - - if message: - return await callback(db_file, message) - else: - return await callback(db_file) + db_dir = get_default_dbdir() + if not os.path.isdir(db_dir): + os.mkdir(db_dir) + db_file = os.path.join(db_dir, r"{}.db".format(jid)) + database.create_tables(db_file) + + if message: + return await callback(db_file, message) + else: + return await callback(db_file) # NOTE I don't think there should be "return" # because then we might stop scanning next URLs @@ -297,99 +344,125 @@ async def download_updates(db_file): # print("db_file") # print(db_file) # time.sleep(1) - with start_action(action_type="download_updates()", db=db_file): - urls = await database.get_subscriptions(db_file) + urls = await database.get_subscriptions(db_file) for url in urls: - with start_action(action_type="download_updates()", url=url): - # print("for url in urls") - source = url[0] - # print("source: ", source) - res = await download_feed(source) - # TypeError: 'NoneType' object is not subscriptable - if res is None: - # Skip to next feed - # urls.next() - # next(urls) - continue - - await database.update_source_status(db_file, res[1], source) - - if res[0]: - try: - feed = feedparser.parse(res[0]) - if feed.bozo: - bozo = ("WARNING: Bozo detected for feed <{}>. " - "For more information, visit " - "https://pythonhosted.org/feedparser/bozo.html" - .format(source)) - print(bozo) - valid = 0 - else: - valid = 1 - - await database.update_source_validity(db_file, source, valid) - except (IncompleteReadError, IncompleteRead, error.URLError) as e: - print(e) - # return - # TODO Place these couple of lines back down - # NOTE Need to correct the SQL statement to do so - # NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW - - if res[1] == 200: - # NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW - # TODO Place these couple of lines back down - # NOTE Need to correct the SQL statement to do so - entries = feed.entries - length = len(entries) - await database.remove_entry(db_file, source, length) - - for entry in entries: - if entry.has_key("title"): - title = entry.title - else: - title = feed["feed"]["title"] - link = source if not entry.link else entry.link - exist = await database.check_entry(db_file, title, link) + # print("for url in urls") + source = url[0] + # print("source: ", source) + res = await download_feed(source) + # TypeError: 'NoneType' object is not subscriptable + if res is None: + # Skip to next feed + # urls.next() + # next(urls) + continue - if not exist: - if entry.has_key("summary"): - summary = entry.summary - # Remove HTML tags - summary = BeautifulSoup(summary, "lxml").text - # TODO Limit text length - summary = summary.replace("\n\n", "\n")[:300] + " ‍⃨" - else: - summary = '*** No summary ***' - #print('~~~~~~summary not in entry') - entry = (title, summary, link, source, 0); - await database.add_entry_and_set_date(db_file, source, entry) + await database.update_source_status(db_file, res[1], source) + + if res[0]: + try: + feed = feedparser.parse(res[0]) + if feed.bozo: + bozo = ("WARNING: Bozo detected for feed <{}>. " + "For more information, visit " + "https://pythonhosted.org/feedparser/bozo.html" + .format(source)) + print(bozo) + valid = 0 + else: + valid = 1 + + await database.update_source_validity(db_file, source, valid) + except (IncompleteReadError, IncompleteRead, error.URLError) as e: + print(e) + # return + # TODO Place these couple of lines back down + # NOTE Need to correct the SQL statement to do so + # NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW + + if res[1] == 200: + # NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW + # TODO Place these couple of lines back down + # NOTE Need to correct the SQL statement to do so + entries = feed.entries + length = len(entries) + # breakpoint() + # await database.remove_entry(db_file, source, length) + await database.remove_nonexistent_entries(db_file, feed, source) + # breakpoint() + + new_entry = 0 + for entry in entries: + + if entry.has_key("title"): + title = entry.title + else: + title = feed["feed"]["title"] + + if entry.has_key("link"): + link = entry.link + else: + link = source + # print('source:', source) + + exist = await database.check_entry_exist(db_file, title, link) + # breakpoint() + # if exist: + # print("//////// OLD ////////") + # print(source) + # print('ex:',exist) + # if entry.has_key("id"): + # print('id:',entry.id) + + if not exist: + # breakpoint() + new_entry = new_entry + 1 + # print("******** NEW ********") + # print('T',title) + # if entry.has_key("date"): + # print('D',entry.date) + # print('L',link) + # print('ex',exist) + # TODO Enhance summary + if entry.has_key("summary"): + summary = entry.summary + # Remove HTML tags + summary = BeautifulSoup(summary, "lxml").text + # TODO Limit text length + summary = summary.replace("\n\n", "\n")[:300] + " ‍⃨" + else: + summary = '*** No summary ***' + #print('~~~~~~summary not in entry') + entry = (title, summary, link, source, 0); + await database.add_entry_and_set_date(db_file, source, entry) + # print("### added", new_entry, "entries") async def download_feed(url): - with start_action(action_type="download_feed()", url=url): # print("download_feed") - # time.sleep(1) - timeout = aiohttp.ClientTimeout(total=10) - async with aiohttp.ClientSession() as session: - # async with aiohttp.ClientSession(trust_env=True) as session: - try: - async with session.get(url, timeout=timeout) as response: - status = response.status - if response.status == 200: - doc = await response.text() - return [doc, status] - else: - return [False, status] - except aiohttp.ClientError as e: - print('Error', str(e)) - return [False, "error"] - except asyncio.TimeoutError as e: - print('Timeout', str(e)) - return [False, "timeout"] + # time.sleep(1) + timeout = aiohttp.ClientTimeout(total=10) + async with aiohttp.ClientSession() as session: +# async with aiohttp.ClientSession(trust_env=True) as session: + try: + async with session.get(url, timeout=timeout) as response: + status = response.status + if response.status == 200: + doc = await response.text() + # print (response.content_type) + return [doc, status] + else: + return [False, status] + except aiohttp.ClientError as e: + print('Error', str(e)) + return [False, "error"] + except asyncio.TimeoutError as e: + print('Timeout', str(e)) + return [False, "timeout"] -async def is_feed_exist(db_file, url): +async def add_feed(db_file, url): # print("add_feed") # print("db_file") # print(db_file) @@ -400,13 +473,157 @@ async def is_feed_exist(db_file, url): :param url: :return: string """ - exist = await database.check_feed(db_file, url) + exist = await database.check_feed_exist(db_file, url) if not exist: res = await download_feed(url) - await database.add_feed(db_file, url, res) + if res[0]: + feed = feedparser.parse(res[0]) + if feed.bozo: + bozo = ("WARNING: Bozo detected. Failed to load <{}>.".format(url)) + print(bozo) + try: + # tree = etree.fromstring(res[0]) # etree -> html + tree = html.fromstring(res[0]) + except: + return "Failed to parse {} as feed".format(url) + + print("RSS Auto-Discovery Engaged") + xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]""" + # xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href""" + # xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href" + feeds = tree.xpath(xpath_query) + if len(feeds) > 1: + msg = "RSS Auto-Discovery has found {} feeds:\n\n".format(len(feeds)) + for feed in feeds: + # # The following code works; + # # The following code will catch + # # only valid resources (i.e. not 404); + # # The following code requires more bandwidth. + # res = await download_feed(feed) + # if res[0]: + # disco = feedparser.parse(res[0]) + # title = disco["feed"]["title"] + # msg += "{} \n {} \n\n".format(title, feed) + feed_name = feed.xpath('@title')[0] + feed_addr = feed.xpath('@href')[0] + msg += "{}\n{}\n\n".format(feed_name, feed_addr) + msg += "The above feeds were extracted from\n{}".format(url) + return msg + elif feeds: + url = feeds[0].xpath('@href')[0] + # Why wouldn't add_feed return a message + # upon success unless return is explicitly + # mentioned, yet upon failure it wouldn't? + return await add_feed(db_file, url) + + # Search for feeds by file extension and path + paths = ["/atom", + "/atom.php", + "/atom.xml", + "/rdf", + "/rdf.php", + "/rdf.xml", + "/rss", + "/rss.php", + "/rss.xml", + "/feed", + "/feed.atom", + "/feed.rdf", + "/feed.rss", + "/feed.xml", + "/news", + "/news/feed", + "?format=rss", + "/feeds/news_feed", + "/content-feeds/", + "/app.php/feed", # phpBB + "/posts.rss" # Discourse + ] # More paths "rss.json", "feed.json" + + print("RSS Scan Mode Engaged") + feeds = {} + for path in paths: + # xpath_query = "//*[@*[contains(.,'{}')]]".format(path) + xpath_query = "//a[contains(@href,'{}')]".format(path) + addresses = tree.xpath(xpath_query) + parted_url = urlparse(url) + for address in addresses: + address = address.xpath('@href')[0] + if address.startswith('/'): + address = parted_url.netloc + address + res = await download_feed(address) + # print(address) + if res[1] == 200: + # print(address) + try: + feeds[address] = feedparser.parse(res[0])["feed"]["title"] + # print(feeds) + except: + # print('Not a feed') + continue + if len(feeds) > 1: + msg = "RSS URL scan has found {} feeds:\n\n".format(len(feeds)) + for feed in feeds: + # try: + # res = await download_feed(feed) + # except: + # continue + feed_name = feeds[feed] + feed_addr = feed + msg += "{}\n{}\n\n".format(feed_name, feed_addr) + msg += "The above feeds were extracted from\n{}".format(url) + return msg + elif feeds: + url = list(feeds)[0] + return await add_feed(db_file, url) + + # (HTTP) Request(s) Paths + print("RSS Arbitrary Mode Engaged") + feeds = {} + parted_url = urlparse(url) + for path in paths: + # print(path) + if parted_url.path.split('/')[1]: + paths.extend([".atom", ".feed", ".rdf", ".rss"]) if '.rss' not in paths else -1 + # if paths.index('.rss'): + # paths.extend([".atom", ".feed", ".rdf", ".rss"]) + address = parted_url.scheme + '://' + parted_url.netloc + '/' + parted_url.path.split('/')[1] + path + res = await download_feed(address) + if res[1] == 200: + # print('2res[1]') + # print(res[1]) + # print(feedparser.parse(res[0])["feed"]["title"]) + feeds[address] = feedparser.parse(res[0])["feed"]["title"] + # print(feeds) + else: + address = parted_url.scheme + '://' + parted_url.netloc + path + res = await download_feed(address) + if res[1] == 200: + # print('1res[1]') + # print(res[1]) + # print(feedparser.parse(res[0])["feed"]["title"]) + feeds[address] = feedparser.parse(res[0])["feed"]["title"] + # print(feeds) + if len(feeds) > 1: + msg = "RSS URL discovery has found {} feeds:\n\n".format(len(feeds)) + for feed in feeds: + feed_name = feeds[feed] + feed_addr = feed + msg += "{}\n{}\n\n".format(feed_name, feed_addr) + msg += "The above feeds were extracted from\n{}".format(url) + return msg + elif feeds: + url = list(feeds)[0] + return await add_feed(db_file, url) + else: + return "No news feeds were found for URL <{}>.".format(url) + else: + return await database.add_feed(db_file, feed, url, res) + else: + return "Failed to get URL <{}>. HTTP Error {}".format(url, res[1]) else: - return "News source is already listed in the subscription list" + return "News source <{}> is already listed in the subscription list".format(url) def toggle_state(jid, state): @@ -418,23 +635,22 @@ def toggle_state(jid, state): :param state: boolean :return: """ - with start_action(action_type="set_date()", jid=jid): - db_dir = get_default_dbdir() - db_file = os.path.join(db_dir, r"{}.db".format(jid)) - bk_file = os.path.join(db_dir, r"{}.db.bak".format(jid)) - - if state: - if os.path.exists(db_file): - return "Updates are already enabled" - elif os.path.exists(bk_file): - os.renames(bk_file, db_file) - return "Updates are now enabled" - else: - if os.path.exists(bk_file): - return "Updates are already disabled" - elif os.path.exists(db_file): - os.renames(db_file, bk_file) - return "Updates are now disabled" + db_dir = get_default_dbdir() + db_file = os.path.join(db_dir, r"{}.db".format(jid)) + bk_file = os.path.join(db_dir, r"{}.db.bak".format(jid)) + + if state: + if os.path.exists(db_file): + return "Updates are already enabled" + elif os.path.exists(bk_file): + os.renames(bk_file, db_file) + return "Updates are now enabled" + else: + if os.path.exists(bk_file): + return "Updates are already disabled" + elif os.path.exists(db_file): + os.renames(db_file, bk_file) + return "Updates are now disabled" if __name__ == '__main__': diff --git a/slixfeed/database.py b/slixfeed/database.py index 2d1dcbf..568100e 100644 --- a/slixfeed/database.py +++ b/slixfeed/database.py @@ -9,7 +9,13 @@ import asyncio from datetime import date import feedparser -from eliot import start_action, to_file +# from eliot import start_action, to_file +# # with start_action(action_type="list_subscriptions()", db=db_file): +# # with start_action(action_type="last_entries()", num=num): +# # with start_action(action_type="get_subscriptions()"): +# # with start_action(action_type="remove_entry()", source=source): +# # with start_action(action_type="search_entries()", query=query): +# # with start_action(action_type="check_entry()", link=link): # aiosqlite DBLOCK = asyncio.Lock() @@ -27,14 +33,13 @@ def create_connection(db_file): :param db_file: database file :return: Connection object or None """ - with start_action(action_type="create_connection()", db=db_file): - conn = None - try: - conn = sqlite3.connect(db_file) - return conn - except Error as e: - print(e) + conn = None + try: + conn = sqlite3.connect(db_file) return conn + except Error as e: + print(e) + return conn def create_tables(db_file): @@ -42,33 +47,32 @@ def create_tables(db_file): # print("db_file") # print(db_file) # time.sleep(1) - with start_action(action_type="create_tables()", db=db_file): - with create_connection(db_file) as conn: - feeds_table_sql = """ - CREATE TABLE IF NOT EXISTS feeds ( - id integer PRIMARY KEY, - name text, - address text NOT NULL, - enabled integer NOT NULL, - scanned text, - updated text, - status integer, - valid integer - ); """ - entries_table_sql = """ - CREATE TABLE IF NOT EXISTS entries ( - id integer PRIMARY KEY, - title text NOT NULL, - summary text NOT NULL, - link text NOT NULL, - source text, - read integer - ); """ - - c = conn.cursor() - # c = get_cursor(db_file) - c.execute(feeds_table_sql) - c.execute(entries_table_sql) + with create_connection(db_file) as conn: + feeds_table_sql = """ + CREATE TABLE IF NOT EXISTS feeds ( + id integer PRIMARY KEY, + name text, + address text NOT NULL, + enabled integer NOT NULL, + scanned text, + updated text, + status integer, + valid integer + ); """ + entries_table_sql = """ + CREATE TABLE IF NOT EXISTS entries ( + id integer PRIMARY KEY, + title text NOT NULL, + summary text NOT NULL, + link text NOT NULL, + source text, + read integer + ); """ + + c = conn.cursor() + # c = get_cursor(db_file) + c.execute(feeds_table_sql) + c.execute(entries_table_sql) def get_cursor(db_file): @@ -77,17 +81,16 @@ def get_cursor(db_file): :param db_file: database file :return: Cursor """ - with start_action(action_type="get_cursor()", db=db_file): - if db_file in CURSORS: - return CURSORS[db_file] - else: - with create_connection(db_file) as conn: - cur = conn.cursor() - CURSORS[db_file] = cur + if db_file in CURSORS: return CURSORS[db_file] + else: + with create_connection(db_file) as conn: + cur = conn.cursor() + CURSORS[db_file] = cur + return CURSORS[db_file] -async def add_feed(db_file, url, res): +async def add_feed(db_file, feed, url, res): # print("add_feed") # print("db_file") # print(db_file) @@ -98,47 +101,30 @@ async def add_feed(db_file, url, res): :param feed: :return: string """ - with start_action(action_type="add_feed()", url=url): - #TODO consider async with DBLOCK - #conn = create_connection(db_file) + #TODO consider async with DBLOCK + #conn = create_connection(db_file) - # with create_connection(db_file) as conn: - # #exist = await check_feed(conn, url) - # exist = await check_feed(db_file, url) - - # if not exist: - # res = await main.download_feed(url) - # else: - # return "News source is already listed in the subscription list" - - async with DBLOCK: - with create_connection(db_file) as conn: - cur = conn.cursor() - if res[0]: - feed = feedparser.parse(res[0]) - if feed.bozo: - feed = (url, 1, res[1], 0) - #sql = """INSERT INTO feeds(address,enabled,status,valid) - # VALUES(?,?,?,?) """ - #cur.execute(sql, feed) - bozo = ("WARNING: Bozo detected. Failed to load URL.") - print(bozo) - return "Failed to parse URL as feed" - else: - title = feed["feed"]["title"] - feed = (title, url, 1, res[1], 1) - sql = """INSERT INTO feeds(name,address,enabled,status,valid) - VALUES(?,?,?,?,?) """ - cur.execute(sql, feed) - else: - feed = (url, 1, res[1], 0) - #sql = "INSERT INTO feeds(address,enabled,status,valid) VALUES(?,?,?,?) " - #cur.execute(sql, feed) - return "Failed to get URL. HTTP Error {}".format(res[1]) - - source = title if title else '<' + url + '>' - msg = 'News source "{}" has been added to subscription list'.format(source) - return msg + # with create_connection(db_file) as conn: + # #exist = await check_feed_exist(conn, url) + # exist = await check_feed_exist(db_file, url) + + # if not exist: + # res = await main.download_feed(url) + # else: + # return "News source is already listed in the subscription list" + + async with DBLOCK: + with create_connection(db_file) as conn: + cur = conn.cursor() + title = feed["feed"]["title"] + feed = (title, url, 1, res[1], 1) + sql = """INSERT INTO feeds(name,address,enabled,status,valid) + VALUES(?,?,?,?,?) """ + cur.execute(sql, feed) + + source = title if title else '<' + url + '>' + msg = 'News source "{}" has been added to subscription list'.format(source) + return msg async def remove_feed(db_file, ix): @@ -152,10 +138,10 @@ async def remove_feed(db_file, ix): :param id: id of the feed :return: string """ - with start_action(action_type="remove_feed()", id=ix): - with create_connection(db_file) as conn: - with DBLOCK: - cur = conn.cursor() + with create_connection(db_file) as conn: + async with DBLOCK: + cur = conn.cursor() + try: sql = "SELECT address FROM feeds WHERE id = ?" url = cur.execute(sql, (ix,)) for i in url: @@ -167,10 +153,12 @@ async def remove_feed(db_file, ix): cur.execute(sql, (ix,)) return """News source <{}> has been removed from subscription list """.format(url) + except: + return """No news source with ID {}""".format(ix) -async def check_feed(db_file, url): - # print("check_feed") +async def check_feed_exist(db_file, url): + # print("is_feed_exist") # print("db_file") # print(db_file) # time.sleep(1) @@ -181,11 +169,25 @@ async def check_feed(db_file, url): :param url: :return: row """ - with start_action(action_type="check_feed()", url=url): - cur = get_cursor(db_file) - sql = "SELECT id FROM feeds WHERE address = ?" - cur.execute(sql, (url,)) - return cur.fetchone() + cur = get_cursor(db_file) + sql = "SELECT id FROM feeds WHERE address = ?" + cur.execute(sql, (url,)) + return cur.fetchone() + + +async def get_unread_entries_number(db_file): + """ + Check number of unread items + :param db_file + :return: string + """ + with create_connection(db_file) as conn: + cur = conn.cursor() + sql = "SELECT count(id) FROM entries WHERE read = 0" + count = cur.execute(sql) + count = cur.fetchone()[0] + return count + async def get_unread(db_file): @@ -199,33 +201,32 @@ async def get_unread(db_file): :param id: id of the entry :return: string """ - with start_action(action_type="get_unread()", db=db_file): - with create_connection(db_file) as conn: - entry = [] - cur = conn.cursor() - # cur = get_cursor(db_file) - sql = "SELECT id FROM entries WHERE read = 0" - ix = cur.execute(sql).fetchone() - if ix is None: - return False - ix = ix[0] - sql = "SELECT title FROM entries WHERE id = :id" - cur.execute(sql, (ix,)) - title = cur.fetchone()[0] - entry.append(title) - sql = "SELECT summary FROM entries WHERE id = :id" - cur.execute(sql, (ix,)) - summary = cur.fetchone()[0] - entry.append(summary) - sql = "SELECT link FROM entries WHERE id = :id" - cur.execute(sql, (ix,)) - link = cur.fetchone()[0] - entry.append(link) - entry = "{}\n\n{}\n\nLink to article:\n{}".format(entry[0], entry[1], entry[2]) - # print(entry) - async with DBLOCK: - await mark_as_read(cur, ix) - return entry + with create_connection(db_file) as conn: + entry = [] + cur = conn.cursor() + # cur = get_cursor(db_file) + sql = "SELECT id FROM entries WHERE read = 0" + ix = cur.execute(sql).fetchone() + if ix is None: + return False + ix = ix[0] + sql = "SELECT title FROM entries WHERE id = :id" + cur.execute(sql, (ix,)) + title = cur.fetchone()[0] + entry.append(title) + sql = "SELECT summary FROM entries WHERE id = :id" + cur.execute(sql, (ix,)) + summary = cur.fetchone()[0] + entry.append(summary) + sql = "SELECT link FROM entries WHERE id = :id" + cur.execute(sql, (ix,)) + link = cur.fetchone()[0] + entry.append(link) + entry = "{}\n\n{}\n\nLink to article:\n{}".format(entry[0], entry[1], entry[2]) + # print(entry) + async with DBLOCK: + await mark_as_read(cur, ix) + return entry async def mark_as_read(cur, ix): @@ -236,9 +237,8 @@ async def mark_as_read(cur, ix): :param cur: :param ix: index of the entry """ - with start_action(action_type="mark_as_read()", id=ix): - sql = "UPDATE entries SET summary = '', read = 1 WHERE id = ?" - cur.execute(sql, (ix,)) + sql = "UPDATE entries SET summary = '', read = 1 WHERE id = ?" + cur.execute(sql, (ix,)) # TODO mark_all_read for entries of feed @@ -253,30 +253,29 @@ async def toggle_status(db_file, ix): :param id: id of the feed :return: string """ - with start_action(action_type="toggle_status()", db=db_file): - async with DBLOCK: - with create_connection(db_file) as conn: - cur = conn.cursor() - #cur = get_cursor(db_file) - sql = "SELECT name FROM feeds WHERE id = :id" - cur.execute(sql, (ix,)) - title = cur.fetchone()[0] - sql = "SELECT enabled FROM feeds WHERE id = ?" - # NOTE [0][1][2] - cur.execute(sql, (ix,)) - status = cur.fetchone()[0] - # FIXME always set to 1 - # NOTE Maybe because is not integer - # TODO Reset feed table before further testing - if status == 1: - status = 0 - notice = "News updates for '{}' are now disabled".format(title) - else: - status = 1 - notice = "News updates for '{}' are now enabled".format(title) - sql = "UPDATE feeds SET enabled = :status WHERE id = :id" - cur.execute(sql, {"status": status, "id": ix}) - return notice + async with DBLOCK: + with create_connection(db_file) as conn: + cur = conn.cursor() + #cur = get_cursor(db_file) + sql = "SELECT name FROM feeds WHERE id = :id" + cur.execute(sql, (ix,)) + title = cur.fetchone()[0] + sql = "SELECT enabled FROM feeds WHERE id = ?" + # NOTE [0][1][2] + cur.execute(sql, (ix,)) + status = cur.fetchone()[0] + # FIXME always set to 1 + # NOTE Maybe because is not integer + # TODO Reset feed table before further testing + if status == 1: + status = 0 + state = "disabled" + else: + status = 1 + state = "enabled" + sql = "UPDATE feeds SET enabled = :status WHERE id = :id" + cur.execute(sql, {"status": status, "id": ix}) + return "Updates for '{}' are now {}".format(title, state) async def set_date(cur, url): @@ -287,11 +286,10 @@ async def set_date(cur, url): :param url: url of the feed :return: """ - with start_action(action_type="set_date()", url=url): - today = date.today() - sql = "UPDATE feeds SET updated = :today WHERE address = :url" - # cur = conn.cursor() - cur.execute(sql, {"today": today, "url": url}) + today = date.today() + sql = "UPDATE feeds SET updated = :today WHERE address = :url" + # cur = conn.cursor() + cur.execute(sql, {"today": today, "url": url}) async def add_entry_and_set_date(db_file, source, entry): @@ -327,13 +325,13 @@ async def add_entry(cur, entry): :param entry: :return: """ - with start_action(action_type="add_entry()", entry=entry): - sql = """ INSERT INTO entries(title,summary,link,source,read) - VALUES(?,?,?,?,?) """ - # cur = conn.cursor() - cur.execute(sql, entry) + sql = """ INSERT INTO entries(title,summary,link,source,read) + VALUES(?,?,?,?,?) """ + # cur = conn.cursor() + cur.execute(sql, entry) +# This function doesn't work as expected with bbs and wiki feeds async def remove_entry(db_file, source, length): # print("remove_entry") # time.sleep(1) @@ -346,27 +344,79 @@ async def remove_entry(db_file, source, length): :param length: :return: """ - with start_action(action_type="remove_entry()", source=source): - # FIXED - # Dino empty titles are not counted https://dino.im/index.xml - # SOLVED - # Add text if is empty - # title = '*** No title ***' if not entry.title else entry.title - async with DBLOCK: - with create_connection(db_file) as conn: - cur = conn.cursor() - sql = "SELECT count(id) FROM entries WHERE source = ?" - count = cur.execute(sql, (source,)) - count = cur.fetchone()[0] - limit = count - length - if limit: - limit = limit; - sql = """DELETE FROM entries WHERE id IN ( - SELECT id FROM entries - WHERE source = :source - ORDER BY id - ASC LIMIT :limit)""" - cur.execute(sql, {"source": source, "limit": limit}) + # FIXED + # Dino empty titles are not counted https://dino.im/index.xml + # SOLVED + # Add text if is empty + # title = '*** No title ***' if not entry.title else entry.title + async with DBLOCK: + with create_connection(db_file) as conn: + cur = conn.cursor() + sql = "SELECT count(id) FROM entries WHERE source = ?" + count = cur.execute(sql, (source,)) + count = cur.fetchone()[0] + limit = count - length + if limit: + limit = limit; + sql = """DELETE FROM entries WHERE id IN ( + SELECT id FROM entries + WHERE source = :source + ORDER BY id + ASC LIMIT :limit)""" + cur.execute(sql, {"source": source, "limit": limit}) + print('### removed', limit, 'from', source) + + +async def remove_nonexistent_entries(db_file, feed, source): + """ + Remove entries that don't exist in feed' + Check the entries returned from feed and delete + non existing entries + :param conn: + :param source: + :param length: + :return: + """ + async with DBLOCK: + with create_connection(db_file) as conn: + cur = conn.cursor() + sql = "SELECT id, title, link FROM entries WHERE source = ?" + cur.execute(sql, (source,)) + entries_db = cur.fetchall() + # print('entries_db') + # print(entries_db) + for entry_db in entries_db: + # entry_db[1] = id + # entry_db[2] = title + # entry_db[3] = link + exist = False + # print("check-db") + for entry_feed in feed.entries: + # print("check-feed") + # TODO better check and don't repeat code + if entry_feed.has_key("title"): + title = entry_feed.title + else: + title = feed["feed"]["title"] + + if entry_feed.has_key("link"): + link = entry_feed.link + else: + link = source + # TODO better check and don't repeat code + if entry_db[1] == title and entry_db[2] == link: + # print('exist') + # print(title) + exist = True + break + if not exist: + # print('>>> not exist') + # print(entry_db[1]) + # TODO Send to table archive + # TODO Also make a regular/routine check for sources that have been changed (though that can only happen when manually editing) + sql = "DELETE FROM entries WHERE id = ?" + cur.execute(sql, (entry_db[0],)) + # breakpoint() async def get_subscriptions(db_file): @@ -377,12 +427,11 @@ async def get_subscriptions(db_file): :param conn: :return: rows (tuple) """ - with start_action(action_type="get_subscriptions()"): - with create_connection(db_file) as conn: - cur = conn.cursor() - sql = "SELECT address FROM feeds WHERE enabled = 1" - cur.execute(sql) - return cur.fetchall() + with create_connection(db_file) as conn: + cur = conn.cursor() + sql = "SELECT address FROM feeds WHERE enabled = 1" + cur.execute(sql) + return cur.fetchall() async def list_subscriptions(db_file): @@ -395,29 +444,28 @@ async def list_subscriptions(db_file): :param conn: :return: rows (string) """ - with start_action(action_type="list_subscriptions()", db=db_file): - with create_connection(db_file) as conn: - # cur = conn.cursor() - cur = get_cursor(db_file) - sql = "SELECT name, address, updated, id, enabled FROM feeds" - results = cur.execute(sql) - - feeds_list = "List of subscriptions: \n" - counter = 0 - for result in results: - counter += 1 - feeds_list += """\n{} \n{} \nLast updated: {} \nID: {} [{}] - """.format(str(result[0]), str(result[1]), str(result[2]), - str(result[3]), str(result[4])) - if counter: - return feeds_list + "\n Total of {} subscriptions".format(counter) - else: - msg = ("List of subscriptions is empty. \n" - "To add feed, send a message as follows: \n" - "feed add URL \n" - "Example: \n" - "feed add https://reclaimthenet.org/feed/") - return msg + with create_connection(db_file) as conn: + # cur = conn.cursor() + cur = get_cursor(db_file) + sql = "SELECT name, address, updated, id, enabled FROM feeds" + results = cur.execute(sql) + + feeds_list = "List of subscriptions: \n" + counter = 0 + for result in results: + counter += 1 + feeds_list += """\n{} \n{} \nLast updated: {} \nID: {} [{}] + """.format(str(result[0]), str(result[1]), str(result[2]), + str(result[3]), str(result[4])) + if counter: + return feeds_list + "\n Total of {} subscriptions".format(counter) + else: + msg = ("List of subscriptions is empty. \n" + "To add feed, send a message as follows: \n" + "feed add URL \n" + "Example: \n" + "feed add https://reclaimthenet.org/feed/") + return msg async def last_entries(db_file, num): @@ -431,23 +479,22 @@ async def last_entries(db_file, num): :param num: integer :return: rows (string) """ - with start_action(action_type="last_entries()", num=num): - num = int(num) - if num > 50: - num = 50 - elif num < 1: - num = 1 - with create_connection(db_file) as conn: - # cur = conn.cursor() - cur = get_cursor(db_file) - sql = "SELECT title, link FROM entries ORDER BY ROWID DESC LIMIT :num" - results = cur.execute(sql, (num,)) - - - titles_list = "Recent {} titles: \n".format(num) - for result in results: - titles_list += "\n{} \n{}".format(str(result[0]), str(result[1])) - return titles_list + num = int(num) + if num > 50: + num = 50 + elif num < 1: + num = 1 + with create_connection(db_file) as conn: + # cur = conn.cursor() + cur = get_cursor(db_file) + sql = "SELECT title, link FROM entries ORDER BY ROWID DESC LIMIT :num" + results = cur.execute(sql, (num,)) + + + titles_list = "Recent {} titles: \n".format(num) + for result in results: + titles_list += "\n{} \n{}".format(str(result[0]), str(result[1])) + return titles_list async def search_entries(db_file, query): @@ -461,29 +508,28 @@ async def search_entries(db_file, query): :param query: string :return: rows (string) """ - with start_action(action_type="search_entries()", query=query): - if len(query) < 2: - return "Please enter at least 2 characters to search" + if len(query) < 2: + return "Please enter at least 2 characters to search" - with create_connection(db_file) as conn: - # cur = conn.cursor() - cur = get_cursor(db_file) - sql = "SELECT title, link FROM entries WHERE title LIKE ? LIMIT 50" - results = cur.execute(sql, [f'%{query}%']) - - results_list = "Search results for '{}': \n".format(query) - counter = 0 - for result in results: - counter += 1 - results_list += """\n{} \n{} - """.format(str(result[0]), str(result[1])) - if counter: - return results_list + "\n Total of {} results".format(counter) - else: - return "No results found for: {}".format(query) + with create_connection(db_file) as conn: + # cur = conn.cursor() + cur = get_cursor(db_file) + sql = "SELECT title, link FROM entries WHERE title LIKE ? LIMIT 50" + results = cur.execute(sql, [f'%{query}%']) + + results_list = "Search results for '{}': \n".format(query) + counter = 0 + for result in results: + counter += 1 + results_list += """\n{} \n{} + """.format(str(result[0]), str(result[1])) + if counter: + return results_list + "\n Total of {} results".format(counter) + else: + return "No results found for: {}".format(query) -async def check_entry(db_file, title, link): +async def check_entry_exist(db_file, title, link): # print("check_entry") # time.sleep(1) """ @@ -494,9 +540,7 @@ async def check_entry(db_file, title, link): :param title: :return: row """ - with start_action(action_type="check_entry()", link=link): - with create_connection(db_file) as conn: - cur = conn.cursor() - sql = "SELECT id FROM entries WHERE title = :title and link = :link" - cur.execute(sql, {"title": title, "link": link}) - return cur.fetchone() + cur = get_cursor(db_file) + sql = "SELECT id FROM entries WHERE title = :title and link = :link" + cur.execute(sql, {"title": title, "link": link}) + return cur.fetchone()