Update 2 files

- /slixfeed/__main__.py
- /slixfeed/database.py
This commit is contained in:
Schimon Jehudah 2023-09-29 11:49:24 +00:00
parent 8907c5bdb9
commit cf44241698
2 changed files with 720 additions and 460 deletions

View file

@ -27,14 +27,18 @@ import time
import aiohttp import aiohttp
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from xml.etree.ElementTree import ElementTree, ParseError
from urllib.parse import urlparse
from lxml import html
import feedparser import feedparser
import slixmpp import slixmpp
from eliot import start_action, to_file # from eliot import start_action, to_file
# # to_file(open("slixfeed.log", "w"))
# # with start_action(action_type="set_date()", jid=jid):
# # with start_action(action_type="message()", msg=msg):
from . import database import database
to_file(open("slixfeed.log", "w"))
class Slixfeed(slixmpp.ClientXMPP): class Slixfeed(slixmpp.ClientXMPP):
@ -51,7 +55,8 @@ class Slixfeed(slixmpp.ClientXMPP):
# listen for this event so that we we can initialize # listen for this event so that we we can initialize
# our roster. # our roster.
self.add_event_handler("session_start", self.start) self.add_event_handler("session_start", self.start)
self.add_event_handler("session_start", self.send_updates) self.add_event_handler("session_start", self.send_update)
self.add_event_handler("session_start", self.send_status)
self.add_event_handler("session_start", self.check_updates) self.add_event_handler("session_start", self.check_updates)
# The message event is triggered whenever a message # The message event is triggered whenever a message
@ -91,102 +96,145 @@ class Slixfeed(slixmpp.ClientXMPP):
for stanza objects and the Message stanza to see for stanza objects and the Message stanza to see
how it may be used. how it may be used.
""" """
with start_action(action_type="message()", msg=msg): if msg['type'] in ('chat', 'normal'):
if msg['type'] in ('chat', 'normal'): message = " ".join(msg['body'].split())
message = " ".join(msg['body'].split()) if message.lower().startswith('help'):
if message.lower().startswith('help'): print("COMMAND: help")
print("COMMAND: help") print("ACCOUNT: " + str(msg['from']))
print("ACCOUNT: " + str(msg['from'])) action = print_help()
action = print_help() # NOTE: Might not need it
# NOTE: Might not need it elif message.lower().startswith('feed recent '):
elif message.lower().startswith('feed recent '): print("COMMAND: feed recent")
print("COMMAND: feed recent") print("ACCOUNT: " + str(msg['from']))
print("ACCOUNT: " + str(msg['from'])) action = await initdb(msg['from'].bare, database.last_entries, message[12:])
action = await initdb(msg['from'].bare, database.last_entries, message[12:]) elif message.lower().startswith('feed search '):
elif message.lower().startswith('feed search '): print("COMMAND: feed search")
print("COMMAND: feed search") print("ACCOUNT: " + str(msg['from']))
print("ACCOUNT: " + str(msg['from'])) action = await initdb( msg['from'].bare, database.search_entries, message[12:])
action = await initdb( msg['from'].bare, database.search_entries, message[12:]) elif message.lower().startswith('feed list'):
elif message.lower().startswith('feed list'): print("COMMAND: feed list")
print("COMMAND: feed list") print("ACCOUNT: " + str(msg['from']))
print("ACCOUNT: " + str(msg['from'])) action = await initdb(msg['from'].bare, database.list_subscriptions)
action = await initdb(msg['from'].bare, database.list_subscriptions) elif message.lower().startswith('feed add '):
elif message.lower().startswith('feed add '): print("COMMAND: feed add")
print("COMMAND: feed add") print("ACCOUNT: " + str(msg['from']))
print("ACCOUNT: " + str(msg['from'])) action = await initdb(msg['from'].bare, add_feed, message[9:])
action = await initdb(msg['from'].bare, is_feed_exist, message[9:]) elif message.lower().startswith('feed remove '):
elif message.lower().startswith('feed remove '): print("COMMAND: feed remove")
print("COMMAND: feed remove") print("ACCOUNT: " + str(msg['from']))
print("ACCOUNT: " + str(msg['from'])) action = await initdb(msg['from'].bare, database.remove_feed, message[12:])
action = await initdb(msg['from'].bare, database.remove_feed, message[12:]) elif message.lower().startswith('feed status '):
elif message.lower().startswith('feed status '): print("COMMAND: feed status")
print("COMMAND: feed status") print("ACCOUNT: " + str(msg['from']))
print("ACCOUNT: " + str(msg['from'])) action = await initdb(msg['from'].bare, database.toggle_status, message[12:])
action = await initdb(msg['from'].bare, database.toggle_status, message[12:]) elif message.lower().startswith('enable'):
elif message.lower().startswith('enable'): print("COMMAND: enable")
print("COMMAND: enable") print("ACCOUNT: " + str(msg['from']))
print("ACCOUNT: " + str(msg['from'])) action = toggle_state(msg['from'].bare, True)
action = toggle_state(msg['from'].bare, True) elif message.lower().startswith('disable'):
elif message.lower().startswith('disable'): print("COMMAND: disable")
print("COMMAND: disable") print("ACCOUNT: " + str(msg['from']))
print("ACCOUNT: " + str(msg['from'])) action = toggle_state(msg['from'].bare, False)
action = toggle_state(msg['from'].bare, False) else:
else: action = 'Unknown command. Press "help" for list of commands'
action = 'Unknown command. Press "help" for list of commands' msg.reply(action).send()
msg.reply(action).send()
async def check_updates(self, event): async def check_updates(self, event):
# print("check_updates") # print("check_updates")
# time.sleep(1) # time.sleep(1)
with start_action(action_type="check_updates()", event=event): while True:
while True: print("Checking update")
print("Checking update") db_dir = get_default_dbdir()
db_dir = get_default_dbdir() if not os.path.isdir(db_dir):
if not os.path.isdir(db_dir): msg = ("Slixfeed can not work without a database. \n"
msg = ("Slixfeed can not work without a database. \n" "To create a database, follow these steps: \n"
"To create a database, follow these steps: \n" "Add Slixfeed contact to your roster \n"
"Add Slixfeed contact to your roster \n" "Send a feed to the bot by: \n"
"Send a feed to the bot by: \n" "feed add https://reclaimthenet.org/feed/")
"feed add https://reclaimthenet.org/feed/") print(msg)
print(msg) else:
else: files = os.listdir(db_dir)
files = os.listdir(db_dir) for file in files:
for file in files: jid = file[:-3]
jid = file[:-3] await initdb(jid, download_updates)
await initdb(jid, download_updates) # await asyncio.sleep(9)
await asyncio.sleep(9) await asyncio.sleep(90)
async def send_updates(self, event): async def send_update(self, event):
# print("send_updates") # print("send_update")
# time.sleep(1) # time.sleep(1)
with start_action(action_type="send_updates()", event=event): while True:
while True: db_dir = get_default_dbdir()
db_dir = get_default_dbdir() if not os.path.isdir(db_dir):
if not os.path.isdir(db_dir): msg = ("Slixfeed can not work without a database. \n"
msg = ("Slixfeed can not work without a database. \n" "To create a database, follow these steps: \n"
"To create a database, follow these steps: \n" "Add Slixfeed contact to your roster \n"
"Add Slixfeed contact to your roster \n" "Send a feed to the bot by: \n"
"Send a feed to the bot by: \n" "feed add https://reclaimthenet.org/feed/")
"feed add https://reclaimthenet.org/feed/") print(msg)
print(msg) else:
else: os.chdir(db_dir)
os.chdir(db_dir) files = os.listdir()
files = os.listdir() for file in files:
for file in files: if not file.endswith('.db-jour.db'):
if not file.endswith('.db-jour.db'): jid = file[:-3]
jid = file[:-3]
new = await initdb( new = await initdb(
jid, jid,
database.get_unread database.get_unread
)
if new:
# NOTE Consider send_message
msg = self.make_message(
mto=jid,
mbody=new,
mtype='chat'
) )
if new:
msg = self.make_message( msg.send()
mto=jid,
mbody=new, # await asyncio.sleep(15)
mtype='chat' await asyncio.sleep(60 * 3)
)
msg.send() async def send_status(self, event):
await asyncio.sleep(15) while True:
db_dir = get_default_dbdir()
if not os.path.isdir(db_dir):
msg = ("Slixfeed can not work without a database. \n"
"To create a database, follow these steps: \n"
"Add Slixfeed contact to your roster \n"
"Send a feed to the bot by: \n"
"feed add https://reclaimthenet.org/feed/")
print(msg)
else:
files = os.listdir(db_dir)
for file in files:
jid = file[:-3]
unread = await initdb(
jid,
database.get_unread_entries_number
)
if unread:
msg_status = ('News', str(unread))
msg_status = ' '.join(msg_status)
else:
msg_status = 'No News'
print(msg_status, 'for', jid)
# NOTE Consider send_presence
sts = self.make_presence(
pstatus=msg_status,
pto=jid,
pfrom=jid,
pnick='Slixfeed'
)
sts.send()
await asyncio.sleep(60)
def print_help(): def print_help():
@ -278,17 +326,16 @@ def get_default_dbdir():
async def initdb(jid, callback, message=None): async def initdb(jid, callback, message=None):
# print("initdb") # print("initdb")
# time.sleep(1) # time.sleep(1)
with start_action(action_type="initdb()", jid=jid): db_dir = get_default_dbdir()
db_dir = get_default_dbdir() if not os.path.isdir(db_dir):
if not os.path.isdir(db_dir): os.mkdir(db_dir)
os.mkdir(db_dir) db_file = os.path.join(db_dir, r"{}.db".format(jid))
db_file = os.path.join(db_dir, r"{}.db".format(jid)) database.create_tables(db_file)
database.create_tables(db_file)
if message:
if message: return await callback(db_file, message)
return await callback(db_file, message) else:
else: return await callback(db_file)
return await callback(db_file)
# NOTE I don't think there should be "return" # NOTE I don't think there should be "return"
# because then we might stop scanning next URLs # because then we might stop scanning next URLs
@ -297,99 +344,125 @@ async def download_updates(db_file):
# print("db_file") # print("db_file")
# print(db_file) # print(db_file)
# time.sleep(1) # time.sleep(1)
with start_action(action_type="download_updates()", db=db_file): urls = await database.get_subscriptions(db_file)
urls = await database.get_subscriptions(db_file)
for url in urls: for url in urls:
with start_action(action_type="download_updates()", url=url): # print("for url in urls")
# print("for url in urls") source = url[0]
source = url[0] # print("source: ", source)
# print("source: ", source) res = await download_feed(source)
res = await download_feed(source) # TypeError: 'NoneType' object is not subscriptable
# TypeError: 'NoneType' object is not subscriptable if res is None:
if res is None: # Skip to next feed
# Skip to next feed # urls.next()
# urls.next() # next(urls)
# next(urls) continue
continue
await database.update_source_status(db_file, res[1], source)
if res[0]:
try:
feed = feedparser.parse(res[0])
if feed.bozo:
bozo = ("WARNING: Bozo detected for feed <{}>. "
"For more information, visit "
"https://pythonhosted.org/feedparser/bozo.html"
.format(source))
print(bozo)
valid = 0
else:
valid = 1
await database.update_source_validity(db_file, source, valid)
except (IncompleteReadError, IncompleteRead, error.URLError) as e:
print(e)
# return
# TODO Place these couple of lines back down
# NOTE Need to correct the SQL statement to do so
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
if res[1] == 200:
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
# TODO Place these couple of lines back down
# NOTE Need to correct the SQL statement to do so
entries = feed.entries
length = len(entries)
await database.remove_entry(db_file, source, length)
for entry in entries:
if entry.has_key("title"):
title = entry.title
else:
title = feed["feed"]["title"]
link = source if not entry.link else entry.link
exist = await database.check_entry(db_file, title, link)
if not exist: await database.update_source_status(db_file, res[1], source)
if entry.has_key("summary"):
summary = entry.summary if res[0]:
# Remove HTML tags try:
summary = BeautifulSoup(summary, "lxml").text feed = feedparser.parse(res[0])
# TODO Limit text length if feed.bozo:
summary = summary.replace("\n\n", "\n")[:300] + " ‍⃨" bozo = ("WARNING: Bozo detected for feed <{}>. "
else: "For more information, visit "
summary = '*** No summary ***' "https://pythonhosted.org/feedparser/bozo.html"
#print('~~~~~~summary not in entry') .format(source))
entry = (title, summary, link, source, 0); print(bozo)
await database.add_entry_and_set_date(db_file, source, entry) valid = 0
else:
valid = 1
await database.update_source_validity(db_file, source, valid)
except (IncompleteReadError, IncompleteRead, error.URLError) as e:
print(e)
# return
# TODO Place these couple of lines back down
# NOTE Need to correct the SQL statement to do so
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
if res[1] == 200:
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
# TODO Place these couple of lines back down
# NOTE Need to correct the SQL statement to do so
entries = feed.entries
length = len(entries)
# breakpoint()
# await database.remove_entry(db_file, source, length)
await database.remove_nonexistent_entries(db_file, feed, source)
# breakpoint()
new_entry = 0
for entry in entries:
if entry.has_key("title"):
title = entry.title
else:
title = feed["feed"]["title"]
if entry.has_key("link"):
link = entry.link
else:
link = source
# print('source:', source)
exist = await database.check_entry_exist(db_file, title, link)
# breakpoint()
# if exist:
# print("//////// OLD ////////")
# print(source)
# print('ex:',exist)
# if entry.has_key("id"):
# print('id:',entry.id)
if not exist:
# breakpoint()
new_entry = new_entry + 1
# print("******** NEW ********")
# print('T',title)
# if entry.has_key("date"):
# print('D',entry.date)
# print('L',link)
# print('ex',exist)
# TODO Enhance summary
if entry.has_key("summary"):
summary = entry.summary
# Remove HTML tags
summary = BeautifulSoup(summary, "lxml").text
# TODO Limit text length
summary = summary.replace("\n\n", "\n")[:300] + " ‍⃨"
else:
summary = '*** No summary ***'
#print('~~~~~~summary not in entry')
entry = (title, summary, link, source, 0);
await database.add_entry_and_set_date(db_file, source, entry)
# print("### added", new_entry, "entries")
async def download_feed(url): async def download_feed(url):
with start_action(action_type="download_feed()", url=url):
# print("download_feed") # print("download_feed")
# time.sleep(1) # time.sleep(1)
timeout = aiohttp.ClientTimeout(total=10) timeout = aiohttp.ClientTimeout(total=10)
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
# async with aiohttp.ClientSession(trust_env=True) as session: # async with aiohttp.ClientSession(trust_env=True) as session:
try: try:
async with session.get(url, timeout=timeout) as response: async with session.get(url, timeout=timeout) as response:
status = response.status status = response.status
if response.status == 200: if response.status == 200:
doc = await response.text() doc = await response.text()
return [doc, status] # print (response.content_type)
else: return [doc, status]
return [False, status] else:
except aiohttp.ClientError as e: return [False, status]
print('Error', str(e)) except aiohttp.ClientError as e:
return [False, "error"] print('Error', str(e))
except asyncio.TimeoutError as e: return [False, "error"]
print('Timeout', str(e)) except asyncio.TimeoutError as e:
return [False, "timeout"] print('Timeout', str(e))
return [False, "timeout"]
async def is_feed_exist(db_file, url): async def add_feed(db_file, url):
# print("add_feed") # print("add_feed")
# print("db_file") # print("db_file")
# print(db_file) # print(db_file)
@ -400,13 +473,157 @@ async def is_feed_exist(db_file, url):
:param url: :param url:
:return: string :return: string
""" """
exist = await database.check_feed(db_file, url) exist = await database.check_feed_exist(db_file, url)
if not exist: if not exist:
res = await download_feed(url) res = await download_feed(url)
await database.add_feed(db_file, url, res) if res[0]:
feed = feedparser.parse(res[0])
if feed.bozo:
bozo = ("WARNING: Bozo detected. Failed to load <{}>.".format(url))
print(bozo)
try:
# tree = etree.fromstring(res[0]) # etree -> html
tree = html.fromstring(res[0])
except:
return "Failed to parse {} as feed".format(url)
print("RSS Auto-Discovery Engaged")
xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]"""
# xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href"""
# xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href"
feeds = tree.xpath(xpath_query)
if len(feeds) > 1:
msg = "RSS Auto-Discovery has found {} feeds:\n\n".format(len(feeds))
for feed in feeds:
# # The following code works;
# # The following code will catch
# # only valid resources (i.e. not 404);
# # The following code requires more bandwidth.
# res = await download_feed(feed)
# if res[0]:
# disco = feedparser.parse(res[0])
# title = disco["feed"]["title"]
# msg += "{} \n {} \n\n".format(title, feed)
feed_name = feed.xpath('@title')[0]
feed_addr = feed.xpath('@href')[0]
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
msg += "The above feeds were extracted from\n{}".format(url)
return msg
elif feeds:
url = feeds[0].xpath('@href')[0]
# Why wouldn't add_feed return a message
# upon success unless return is explicitly
# mentioned, yet upon failure it wouldn't?
return await add_feed(db_file, url)
# Search for feeds by file extension and path
paths = ["/atom",
"/atom.php",
"/atom.xml",
"/rdf",
"/rdf.php",
"/rdf.xml",
"/rss",
"/rss.php",
"/rss.xml",
"/feed",
"/feed.atom",
"/feed.rdf",
"/feed.rss",
"/feed.xml",
"/news",
"/news/feed",
"?format=rss",
"/feeds/news_feed",
"/content-feeds/",
"/app.php/feed", # phpBB
"/posts.rss" # Discourse
] # More paths "rss.json", "feed.json"
print("RSS Scan Mode Engaged")
feeds = {}
for path in paths:
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
xpath_query = "//a[contains(@href,'{}')]".format(path)
addresses = tree.xpath(xpath_query)
parted_url = urlparse(url)
for address in addresses:
address = address.xpath('@href')[0]
if address.startswith('/'):
address = parted_url.netloc + address
res = await download_feed(address)
# print(address)
if res[1] == 200:
# print(address)
try:
feeds[address] = feedparser.parse(res[0])["feed"]["title"]
# print(feeds)
except:
# print('Not a feed')
continue
if len(feeds) > 1:
msg = "RSS URL scan has found {} feeds:\n\n".format(len(feeds))
for feed in feeds:
# try:
# res = await download_feed(feed)
# except:
# continue
feed_name = feeds[feed]
feed_addr = feed
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
msg += "The above feeds were extracted from\n{}".format(url)
return msg
elif feeds:
url = list(feeds)[0]
return await add_feed(db_file, url)
# (HTTP) Request(s) Paths
print("RSS Arbitrary Mode Engaged")
feeds = {}
parted_url = urlparse(url)
for path in paths:
# print(path)
if parted_url.path.split('/')[1]:
paths.extend([".atom", ".feed", ".rdf", ".rss"]) if '.rss' not in paths else -1
# if paths.index('.rss'):
# paths.extend([".atom", ".feed", ".rdf", ".rss"])
address = parted_url.scheme + '://' + parted_url.netloc + '/' + parted_url.path.split('/')[1] + path
res = await download_feed(address)
if res[1] == 200:
# print('2res[1]')
# print(res[1])
# print(feedparser.parse(res[0])["feed"]["title"])
feeds[address] = feedparser.parse(res[0])["feed"]["title"]
# print(feeds)
else:
address = parted_url.scheme + '://' + parted_url.netloc + path
res = await download_feed(address)
if res[1] == 200:
# print('1res[1]')
# print(res[1])
# print(feedparser.parse(res[0])["feed"]["title"])
feeds[address] = feedparser.parse(res[0])["feed"]["title"]
# print(feeds)
if len(feeds) > 1:
msg = "RSS URL discovery has found {} feeds:\n\n".format(len(feeds))
for feed in feeds:
feed_name = feeds[feed]
feed_addr = feed
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
msg += "The above feeds were extracted from\n{}".format(url)
return msg
elif feeds:
url = list(feeds)[0]
return await add_feed(db_file, url)
else:
return "No news feeds were found for URL <{}>.".format(url)
else:
return await database.add_feed(db_file, feed, url, res)
else:
return "Failed to get URL <{}>. HTTP Error {}".format(url, res[1])
else: else:
return "News source is already listed in the subscription list" return "News source <{}> is already listed in the subscription list".format(url)
def toggle_state(jid, state): def toggle_state(jid, state):
@ -418,23 +635,22 @@ def toggle_state(jid, state):
:param state: boolean :param state: boolean
:return: :return:
""" """
with start_action(action_type="set_date()", jid=jid): db_dir = get_default_dbdir()
db_dir = get_default_dbdir() db_file = os.path.join(db_dir, r"{}.db".format(jid))
db_file = os.path.join(db_dir, r"{}.db".format(jid)) bk_file = os.path.join(db_dir, r"{}.db.bak".format(jid))
bk_file = os.path.join(db_dir, r"{}.db.bak".format(jid))
if state:
if state: if os.path.exists(db_file):
if os.path.exists(db_file): return "Updates are already enabled"
return "Updates are already enabled" elif os.path.exists(bk_file):
elif os.path.exists(bk_file): os.renames(bk_file, db_file)
os.renames(bk_file, db_file) return "Updates are now enabled"
return "Updates are now enabled" else:
else: if os.path.exists(bk_file):
if os.path.exists(bk_file): return "Updates are already disabled"
return "Updates are already disabled" elif os.path.exists(db_file):
elif os.path.exists(db_file): os.renames(db_file, bk_file)
os.renames(db_file, bk_file) return "Updates are now disabled"
return "Updates are now disabled"
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -9,7 +9,13 @@ import asyncio
from datetime import date from datetime import date
import feedparser import feedparser
from eliot import start_action, to_file # from eliot import start_action, to_file
# # with start_action(action_type="list_subscriptions()", db=db_file):
# # with start_action(action_type="last_entries()", num=num):
# # with start_action(action_type="get_subscriptions()"):
# # with start_action(action_type="remove_entry()", source=source):
# # with start_action(action_type="search_entries()", query=query):
# # with start_action(action_type="check_entry()", link=link):
# aiosqlite # aiosqlite
DBLOCK = asyncio.Lock() DBLOCK = asyncio.Lock()
@ -27,14 +33,13 @@ def create_connection(db_file):
:param db_file: database file :param db_file: database file
:return: Connection object or None :return: Connection object or None
""" """
with start_action(action_type="create_connection()", db=db_file): conn = None
conn = None try:
try: conn = sqlite3.connect(db_file)
conn = sqlite3.connect(db_file)
return conn
except Error as e:
print(e)
return conn return conn
except Error as e:
print(e)
return conn
def create_tables(db_file): def create_tables(db_file):
@ -42,33 +47,32 @@ def create_tables(db_file):
# print("db_file") # print("db_file")
# print(db_file) # print(db_file)
# time.sleep(1) # time.sleep(1)
with start_action(action_type="create_tables()", db=db_file): with create_connection(db_file) as conn:
with create_connection(db_file) as conn: feeds_table_sql = """
feeds_table_sql = """ CREATE TABLE IF NOT EXISTS feeds (
CREATE TABLE IF NOT EXISTS feeds ( id integer PRIMARY KEY,
id integer PRIMARY KEY, name text,
name text, address text NOT NULL,
address text NOT NULL, enabled integer NOT NULL,
enabled integer NOT NULL, scanned text,
scanned text, updated text,
updated text, status integer,
status integer, valid integer
valid integer ); """
); """ entries_table_sql = """
entries_table_sql = """ CREATE TABLE IF NOT EXISTS entries (
CREATE TABLE IF NOT EXISTS entries ( id integer PRIMARY KEY,
id integer PRIMARY KEY, title text NOT NULL,
title text NOT NULL, summary text NOT NULL,
summary text NOT NULL, link text NOT NULL,
link text NOT NULL, source text,
source text, read integer
read integer ); """
); """
c = conn.cursor()
c = conn.cursor() # c = get_cursor(db_file)
# c = get_cursor(db_file) c.execute(feeds_table_sql)
c.execute(feeds_table_sql) c.execute(entries_table_sql)
c.execute(entries_table_sql)
def get_cursor(db_file): def get_cursor(db_file):
@ -77,17 +81,16 @@ def get_cursor(db_file):
:param db_file: database file :param db_file: database file
:return: Cursor :return: Cursor
""" """
with start_action(action_type="get_cursor()", db=db_file): if db_file in CURSORS:
if db_file in CURSORS:
return CURSORS[db_file]
else:
with create_connection(db_file) as conn:
cur = conn.cursor()
CURSORS[db_file] = cur
return CURSORS[db_file] return CURSORS[db_file]
else:
with create_connection(db_file) as conn:
cur = conn.cursor()
CURSORS[db_file] = cur
return CURSORS[db_file]
async def add_feed(db_file, url, res): async def add_feed(db_file, feed, url, res):
# print("add_feed") # print("add_feed")
# print("db_file") # print("db_file")
# print(db_file) # print(db_file)
@ -98,47 +101,30 @@ async def add_feed(db_file, url, res):
:param feed: :param feed:
:return: string :return: string
""" """
with start_action(action_type="add_feed()", url=url): #TODO consider async with DBLOCK
#TODO consider async with DBLOCK #conn = create_connection(db_file)
#conn = create_connection(db_file)
# with create_connection(db_file) as conn: # with create_connection(db_file) as conn:
# #exist = await check_feed(conn, url) # #exist = await check_feed_exist(conn, url)
# exist = await check_feed(db_file, url) # exist = await check_feed_exist(db_file, url)
# if not exist: # if not exist:
# res = await main.download_feed(url) # res = await main.download_feed(url)
# else: # else:
# return "News source is already listed in the subscription list" # return "News source is already listed in the subscription list"
async with DBLOCK: async with DBLOCK:
with create_connection(db_file) as conn: with create_connection(db_file) as conn:
cur = conn.cursor() cur = conn.cursor()
if res[0]: title = feed["feed"]["title"]
feed = feedparser.parse(res[0]) feed = (title, url, 1, res[1], 1)
if feed.bozo: sql = """INSERT INTO feeds(name,address,enabled,status,valid)
feed = (url, 1, res[1], 0) VALUES(?,?,?,?,?) """
#sql = """INSERT INTO feeds(address,enabled,status,valid) cur.execute(sql, feed)
# VALUES(?,?,?,?) """
#cur.execute(sql, feed) source = title if title else '<' + url + '>'
bozo = ("WARNING: Bozo detected. Failed to load URL.") msg = 'News source "{}" has been added to subscription list'.format(source)
print(bozo) return msg
return "Failed to parse URL as feed"
else:
title = feed["feed"]["title"]
feed = (title, url, 1, res[1], 1)
sql = """INSERT INTO feeds(name,address,enabled,status,valid)
VALUES(?,?,?,?,?) """
cur.execute(sql, feed)
else:
feed = (url, 1, res[1], 0)
#sql = "INSERT INTO feeds(address,enabled,status,valid) VALUES(?,?,?,?) "
#cur.execute(sql, feed)
return "Failed to get URL. HTTP Error {}".format(res[1])
source = title if title else '<' + url + '>'
msg = 'News source "{}" has been added to subscription list'.format(source)
return msg
async def remove_feed(db_file, ix): async def remove_feed(db_file, ix):
@ -152,10 +138,10 @@ async def remove_feed(db_file, ix):
:param id: id of the feed :param id: id of the feed
:return: string :return: string
""" """
with start_action(action_type="remove_feed()", id=ix): with create_connection(db_file) as conn:
with create_connection(db_file) as conn: async with DBLOCK:
with DBLOCK: cur = conn.cursor()
cur = conn.cursor() try:
sql = "SELECT address FROM feeds WHERE id = ?" sql = "SELECT address FROM feeds WHERE id = ?"
url = cur.execute(sql, (ix,)) url = cur.execute(sql, (ix,))
for i in url: for i in url:
@ -167,10 +153,12 @@ async def remove_feed(db_file, ix):
cur.execute(sql, (ix,)) cur.execute(sql, (ix,))
return """News source <{}> has been removed from subscription list return """News source <{}> has been removed from subscription list
""".format(url) """.format(url)
except:
return """No news source with ID {}""".format(ix)
async def check_feed(db_file, url): async def check_feed_exist(db_file, url):
# print("check_feed") # print("is_feed_exist")
# print("db_file") # print("db_file")
# print(db_file) # print(db_file)
# time.sleep(1) # time.sleep(1)
@ -181,11 +169,25 @@ async def check_feed(db_file, url):
:param url: :param url:
:return: row :return: row
""" """
with start_action(action_type="check_feed()", url=url): cur = get_cursor(db_file)
cur = get_cursor(db_file) sql = "SELECT id FROM feeds WHERE address = ?"
sql = "SELECT id FROM feeds WHERE address = ?" cur.execute(sql, (url,))
cur.execute(sql, (url,)) return cur.fetchone()
return cur.fetchone()
async def get_unread_entries_number(db_file):
"""
Check number of unread items
:param db_file
:return: string
"""
with create_connection(db_file) as conn:
cur = conn.cursor()
sql = "SELECT count(id) FROM entries WHERE read = 0"
count = cur.execute(sql)
count = cur.fetchone()[0]
return count
async def get_unread(db_file): async def get_unread(db_file):
@ -199,33 +201,32 @@ async def get_unread(db_file):
:param id: id of the entry :param id: id of the entry
:return: string :return: string
""" """
with start_action(action_type="get_unread()", db=db_file): with create_connection(db_file) as conn:
with create_connection(db_file) as conn: entry = []
entry = [] cur = conn.cursor()
cur = conn.cursor() # cur = get_cursor(db_file)
# cur = get_cursor(db_file) sql = "SELECT id FROM entries WHERE read = 0"
sql = "SELECT id FROM entries WHERE read = 0" ix = cur.execute(sql).fetchone()
ix = cur.execute(sql).fetchone() if ix is None:
if ix is None: return False
return False ix = ix[0]
ix = ix[0] sql = "SELECT title FROM entries WHERE id = :id"
sql = "SELECT title FROM entries WHERE id = :id" cur.execute(sql, (ix,))
cur.execute(sql, (ix,)) title = cur.fetchone()[0]
title = cur.fetchone()[0] entry.append(title)
entry.append(title) sql = "SELECT summary FROM entries WHERE id = :id"
sql = "SELECT summary FROM entries WHERE id = :id" cur.execute(sql, (ix,))
cur.execute(sql, (ix,)) summary = cur.fetchone()[0]
summary = cur.fetchone()[0] entry.append(summary)
entry.append(summary) sql = "SELECT link FROM entries WHERE id = :id"
sql = "SELECT link FROM entries WHERE id = :id" cur.execute(sql, (ix,))
cur.execute(sql, (ix,)) link = cur.fetchone()[0]
link = cur.fetchone()[0] entry.append(link)
entry.append(link) entry = "{}\n\n{}\n\nLink to article:\n{}".format(entry[0], entry[1], entry[2])
entry = "{}\n\n{}\n\nLink to article:\n{}".format(entry[0], entry[1], entry[2]) # print(entry)
# print(entry) async with DBLOCK:
async with DBLOCK: await mark_as_read(cur, ix)
await mark_as_read(cur, ix) return entry
return entry
async def mark_as_read(cur, ix): async def mark_as_read(cur, ix):
@ -236,9 +237,8 @@ async def mark_as_read(cur, ix):
:param cur: :param cur:
:param ix: index of the entry :param ix: index of the entry
""" """
with start_action(action_type="mark_as_read()", id=ix): sql = "UPDATE entries SET summary = '', read = 1 WHERE id = ?"
sql = "UPDATE entries SET summary = '', read = 1 WHERE id = ?" cur.execute(sql, (ix,))
cur.execute(sql, (ix,))
# TODO mark_all_read for entries of feed # TODO mark_all_read for entries of feed
@ -253,30 +253,29 @@ async def toggle_status(db_file, ix):
:param id: id of the feed :param id: id of the feed
:return: string :return: string
""" """
with start_action(action_type="toggle_status()", db=db_file): async with DBLOCK:
async with DBLOCK: with create_connection(db_file) as conn:
with create_connection(db_file) as conn: cur = conn.cursor()
cur = conn.cursor() #cur = get_cursor(db_file)
#cur = get_cursor(db_file) sql = "SELECT name FROM feeds WHERE id = :id"
sql = "SELECT name FROM feeds WHERE id = :id" cur.execute(sql, (ix,))
cur.execute(sql, (ix,)) title = cur.fetchone()[0]
title = cur.fetchone()[0] sql = "SELECT enabled FROM feeds WHERE id = ?"
sql = "SELECT enabled FROM feeds WHERE id = ?" # NOTE [0][1][2]
# NOTE [0][1][2] cur.execute(sql, (ix,))
cur.execute(sql, (ix,)) status = cur.fetchone()[0]
status = cur.fetchone()[0] # FIXME always set to 1
# FIXME always set to 1 # NOTE Maybe because is not integer
# NOTE Maybe because is not integer # TODO Reset feed table before further testing
# TODO Reset feed table before further testing if status == 1:
if status == 1: status = 0
status = 0 state = "disabled"
notice = "News updates for '{}' are now disabled".format(title) else:
else: status = 1
status = 1 state = "enabled"
notice = "News updates for '{}' are now enabled".format(title) sql = "UPDATE feeds SET enabled = :status WHERE id = :id"
sql = "UPDATE feeds SET enabled = :status WHERE id = :id" cur.execute(sql, {"status": status, "id": ix})
cur.execute(sql, {"status": status, "id": ix}) return "Updates for '{}' are now {}".format(title, state)
return notice
async def set_date(cur, url): async def set_date(cur, url):
@ -287,11 +286,10 @@ async def set_date(cur, url):
:param url: url of the feed :param url: url of the feed
:return: :return:
""" """
with start_action(action_type="set_date()", url=url): today = date.today()
today = date.today() sql = "UPDATE feeds SET updated = :today WHERE address = :url"
sql = "UPDATE feeds SET updated = :today WHERE address = :url" # cur = conn.cursor()
# cur = conn.cursor() cur.execute(sql, {"today": today, "url": url})
cur.execute(sql, {"today": today, "url": url})
async def add_entry_and_set_date(db_file, source, entry): async def add_entry_and_set_date(db_file, source, entry):
@ -327,13 +325,13 @@ async def add_entry(cur, entry):
:param entry: :param entry:
:return: :return:
""" """
with start_action(action_type="add_entry()", entry=entry): sql = """ INSERT INTO entries(title,summary,link,source,read)
sql = """ INSERT INTO entries(title,summary,link,source,read) VALUES(?,?,?,?,?) """
VALUES(?,?,?,?,?) """ # cur = conn.cursor()
# cur = conn.cursor() cur.execute(sql, entry)
cur.execute(sql, entry)
# This function doesn't work as expected with bbs and wiki feeds
async def remove_entry(db_file, source, length): async def remove_entry(db_file, source, length):
# print("remove_entry") # print("remove_entry")
# time.sleep(1) # time.sleep(1)
@ -346,27 +344,79 @@ async def remove_entry(db_file, source, length):
:param length: :param length:
:return: :return:
""" """
with start_action(action_type="remove_entry()", source=source): # FIXED
# FIXED # Dino empty titles are not counted https://dino.im/index.xml
# Dino empty titles are not counted https://dino.im/index.xml # SOLVED
# SOLVED # Add text if is empty
# Add text if is empty # title = '*** No title ***' if not entry.title else entry.title
# title = '*** No title ***' if not entry.title else entry.title async with DBLOCK:
async with DBLOCK: with create_connection(db_file) as conn:
with create_connection(db_file) as conn: cur = conn.cursor()
cur = conn.cursor() sql = "SELECT count(id) FROM entries WHERE source = ?"
sql = "SELECT count(id) FROM entries WHERE source = ?" count = cur.execute(sql, (source,))
count = cur.execute(sql, (source,)) count = cur.fetchone()[0]
count = cur.fetchone()[0] limit = count - length
limit = count - length if limit:
if limit: limit = limit;
limit = limit; sql = """DELETE FROM entries WHERE id IN (
sql = """DELETE FROM entries WHERE id IN ( SELECT id FROM entries
SELECT id FROM entries WHERE source = :source
WHERE source = :source ORDER BY id
ORDER BY id ASC LIMIT :limit)"""
ASC LIMIT :limit)""" cur.execute(sql, {"source": source, "limit": limit})
cur.execute(sql, {"source": source, "limit": limit}) print('### removed', limit, 'from', source)
async def remove_nonexistent_entries(db_file, feed, source):
"""
Remove entries that don't exist in feed'
Check the entries returned from feed and delete
non existing entries
:param conn:
:param source:
:param length:
:return:
"""
async with DBLOCK:
with create_connection(db_file) as conn:
cur = conn.cursor()
sql = "SELECT id, title, link FROM entries WHERE source = ?"
cur.execute(sql, (source,))
entries_db = cur.fetchall()
# print('entries_db')
# print(entries_db)
for entry_db in entries_db:
# entry_db[1] = id
# entry_db[2] = title
# entry_db[3] = link
exist = False
# print("check-db")
for entry_feed in feed.entries:
# print("check-feed")
# TODO better check and don't repeat code
if entry_feed.has_key("title"):
title = entry_feed.title
else:
title = feed["feed"]["title"]
if entry_feed.has_key("link"):
link = entry_feed.link
else:
link = source
# TODO better check and don't repeat code
if entry_db[1] == title and entry_db[2] == link:
# print('exist')
# print(title)
exist = True
break
if not exist:
# print('>>> not exist')
# print(entry_db[1])
# TODO Send to table archive
# TODO Also make a regular/routine check for sources that have been changed (though that can only happen when manually editing)
sql = "DELETE FROM entries WHERE id = ?"
cur.execute(sql, (entry_db[0],))
# breakpoint()
async def get_subscriptions(db_file): async def get_subscriptions(db_file):
@ -377,12 +427,11 @@ async def get_subscriptions(db_file):
:param conn: :param conn:
:return: rows (tuple) :return: rows (tuple)
""" """
with start_action(action_type="get_subscriptions()"): with create_connection(db_file) as conn:
with create_connection(db_file) as conn: cur = conn.cursor()
cur = conn.cursor() sql = "SELECT address FROM feeds WHERE enabled = 1"
sql = "SELECT address FROM feeds WHERE enabled = 1" cur.execute(sql)
cur.execute(sql) return cur.fetchall()
return cur.fetchall()
async def list_subscriptions(db_file): async def list_subscriptions(db_file):
@ -395,29 +444,28 @@ async def list_subscriptions(db_file):
:param conn: :param conn:
:return: rows (string) :return: rows (string)
""" """
with start_action(action_type="list_subscriptions()", db=db_file): with create_connection(db_file) as conn:
with create_connection(db_file) as conn: # cur = conn.cursor()
# cur = conn.cursor() cur = get_cursor(db_file)
cur = get_cursor(db_file) sql = "SELECT name, address, updated, id, enabled FROM feeds"
sql = "SELECT name, address, updated, id, enabled FROM feeds" results = cur.execute(sql)
results = cur.execute(sql)
feeds_list = "List of subscriptions: \n"
feeds_list = "List of subscriptions: \n" counter = 0
counter = 0 for result in results:
for result in results: counter += 1
counter += 1 feeds_list += """\n{} \n{} \nLast updated: {} \nID: {} [{}]
feeds_list += """\n{} \n{} \nLast updated: {} \nID: {} [{}] """.format(str(result[0]), str(result[1]), str(result[2]),
""".format(str(result[0]), str(result[1]), str(result[2]), str(result[3]), str(result[4]))
str(result[3]), str(result[4])) if counter:
if counter: return feeds_list + "\n Total of {} subscriptions".format(counter)
return feeds_list + "\n Total of {} subscriptions".format(counter) else:
else: msg = ("List of subscriptions is empty. \n"
msg = ("List of subscriptions is empty. \n" "To add feed, send a message as follows: \n"
"To add feed, send a message as follows: \n" "feed add URL \n"
"feed add URL \n" "Example: \n"
"Example: \n" "feed add https://reclaimthenet.org/feed/")
"feed add https://reclaimthenet.org/feed/") return msg
return msg
async def last_entries(db_file, num): async def last_entries(db_file, num):
@ -431,23 +479,22 @@ async def last_entries(db_file, num):
:param num: integer :param num: integer
:return: rows (string) :return: rows (string)
""" """
with start_action(action_type="last_entries()", num=num): num = int(num)
num = int(num) if num > 50:
if num > 50: num = 50
num = 50 elif num < 1:
elif num < 1: num = 1
num = 1 with create_connection(db_file) as conn:
with create_connection(db_file) as conn: # cur = conn.cursor()
# cur = conn.cursor() cur = get_cursor(db_file)
cur = get_cursor(db_file) sql = "SELECT title, link FROM entries ORDER BY ROWID DESC LIMIT :num"
sql = "SELECT title, link FROM entries ORDER BY ROWID DESC LIMIT :num" results = cur.execute(sql, (num,))
results = cur.execute(sql, (num,))
titles_list = "Recent {} titles: \n".format(num)
titles_list = "Recent {} titles: \n".format(num) for result in results:
for result in results: titles_list += "\n{} \n{}".format(str(result[0]), str(result[1]))
titles_list += "\n{} \n{}".format(str(result[0]), str(result[1])) return titles_list
return titles_list
async def search_entries(db_file, query): async def search_entries(db_file, query):
@ -461,29 +508,28 @@ async def search_entries(db_file, query):
:param query: string :param query: string
:return: rows (string) :return: rows (string)
""" """
with start_action(action_type="search_entries()", query=query): if len(query) < 2:
if len(query) < 2: return "Please enter at least 2 characters to search"
return "Please enter at least 2 characters to search"
with create_connection(db_file) as conn: with create_connection(db_file) as conn:
# cur = conn.cursor() # cur = conn.cursor()
cur = get_cursor(db_file) cur = get_cursor(db_file)
sql = "SELECT title, link FROM entries WHERE title LIKE ? LIMIT 50" sql = "SELECT title, link FROM entries WHERE title LIKE ? LIMIT 50"
results = cur.execute(sql, [f'%{query}%']) results = cur.execute(sql, [f'%{query}%'])
results_list = "Search results for '{}': \n".format(query) results_list = "Search results for '{}': \n".format(query)
counter = 0 counter = 0
for result in results: for result in results:
counter += 1 counter += 1
results_list += """\n{} \n{} results_list += """\n{} \n{}
""".format(str(result[0]), str(result[1])) """.format(str(result[0]), str(result[1]))
if counter: if counter:
return results_list + "\n Total of {} results".format(counter) return results_list + "\n Total of {} results".format(counter)
else: else:
return "No results found for: {}".format(query) return "No results found for: {}".format(query)
async def check_entry(db_file, title, link): async def check_entry_exist(db_file, title, link):
# print("check_entry") # print("check_entry")
# time.sleep(1) # time.sleep(1)
""" """
@ -494,9 +540,7 @@ async def check_entry(db_file, title, link):
:param title: :param title:
:return: row :return: row
""" """
with start_action(action_type="check_entry()", link=link): cur = get_cursor(db_file)
with create_connection(db_file) as conn: sql = "SELECT id FROM entries WHERE title = :title and link = :link"
cur = conn.cursor() cur.execute(sql, {"title": title, "link": link})
sql = "SELECT id FROM entries WHERE title = :title and link = :link" return cur.fetchone()
cur.execute(sql, {"title": title, "link": link})
return cur.fetchone()