Update 8 files

- /slixfeed/sqlitehandler.py
- /slixfeed/xmpphandler.py
- /slixfeed/opmlhandler.py
- /slixfeed/datahandler.py
- /slixfeed/datetimehandler.py
- /slixfeed/__main__.py
- /slixfeed/confighandler.py
- /slixfeed/filterhandler.py
This commit is contained in:
Schimon Jehudah 2023-11-13 13:45:10 +00:00
parent 9d6a211d36
commit 031eb6ce53
8 changed files with 2535 additions and 751 deletions

View file

@ -1,13 +1,42 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# TODO """
#
# 0) sql prepared statements FIXME
# 1) Autodetect feed:
# if page is not feed (or HTML) and contains <link rel="alternate"> 1) Check feed duplication on runtime.
# 2) OPML import/export When feed is valid and is not yet in the database it is
# 3) 2022-12-30 reduce async to (maybe) prevent inner lock. async on task: commands, downloader, updater posible to send a batch which would result in duplication.
Consequently, it might result in database lock error upon
feed removal attempt
TODO
1) SQL prepared statements
2) Machine Learning for scrapping Title, Link, Summary and Timstamp
3) Support MUC
4) Support categories
5) Default prepackaged list of feeds
6) XMPP commands
7) Bot as transport
8) OMEMO
9) Logging
10) Default feeds (e.g. Blacklisted News, TBOT etc.)
11) Download and upload/send article (xHTML, xHTMLZ, Markdown, MHTML, TXT)
Use Readability
"""
# vars and their meanings: # vars and their meanings:
# jid = Jabber ID (XMPP) # jid = Jabber ID (XMPP)

View file

@ -1,6 +1,15 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
"""
TODO
1) Use file settings.csv and pathnames.txt instead:
See get_value_default and get_default_list
"""
import os import os
import sys import sys
@ -8,16 +17,21 @@ def get_default_dbdir():
""" """
Determine the directory path where dbfile will be stored. Determine the directory path where dbfile will be stored.
If $XDG_DATA_HOME is defined, use it * If $XDG_DATA_HOME is defined, use it;
else if $HOME exists, use it * else if $HOME exists, use it;
else if the platform is Windows, use %APPDATA% * else if the platform is Windows, use %APPDATA%;
else use the current directory. * else use the current directory.
:return: Path to database file. Returns
-------
str
Path to database file.
Note Note
---- ----
This code was taken from the buku project. This function was taken from project buku.
See https://github.com/jarun/buku
* Arun Prakash Jana (jarun) * Arun Prakash Jana (jarun)
* Dmitry Marakasov (AMDmi3) * Dmitry Marakasov (AMDmi3)
@ -41,12 +55,15 @@ def get_default_confdir():
""" """
Determine the directory path where configuration will be stored. Determine the directory path where configuration will be stored.
If $XDG_CONFIG_HOME is defined, use it * If $XDG_CONFIG_HOME is defined, use it;
else if $HOME exists, use it * else if $HOME exists, use it;
else if the platform is Windows, use %APPDATA% * else if the platform is Windows, use %APPDATA%;
else use the current directory. * else use the current directory.
:return: Path to configueation directory. Returns
-------
str
Path to configueation directory.
""" """
# config_home = xdg.BaseDirectory.xdg_config_home # config_home = xdg.BaseDirectory.xdg_config_home
config_home = os.environ.get('XDG_CONFIG_HOME') config_home = os.environ.get('XDG_CONFIG_HOME')
@ -67,24 +84,69 @@ async def get_value_default(key):
""" """
Get settings default value. Get settings default value.
:param key: "enabled", "interval", "quantum". Parameters
:return: Integer. ----------
key : str
Key: enabled, filter-allow, filter-deny,
interval, quantum, random.
Returns
-------
result : int or str
Value.
""" """
if key == "enabled": match key:
result = 1 case "enabled":
elif key == "quantum": result = 1
result = 4 case "filter-allow":
elif key == "interval": result = "hitler,sadam,saddam"
result = 30 case "filter-deny":
result = "crim,dead,death,disaster,holocaust,murder,war"
case "interval":
result = 30
case "quantum":
result = 4
case "random":
result = 0
return result return result
def get_list():
"""
Get dictionary file.
Returns
-------
paths : list
Dictionary of pathnames.
"""
paths = []
cfg_dir = get_default_confdir()
if not os.path.isdir(cfg_dir):
os.mkdir(cfg_dir)
cfg_file = os.path.join(cfg_dir, r"url_paths.txt")
if not os.path.isfile(cfg_file):
# confighandler.generate_dictionary()
list = get_default_list()
file = open(cfg_file, "w")
file.writelines("\n".join(list))
file.close()
file = open(cfg_file, "r")
lines = file.readlines()
for line in lines:
paths.extend([line.strip()])
return paths
# async def generate_dictionary(): # async def generate_dictionary():
def get_default_list(): def get_default_list():
""" """
Generate a dictionary file. Generate a dictionary file.
:return: List. Returns
-------
paths : list
Dictionary of pathnames.
""" """
paths = [ paths = [
".atom", ".atom",
@ -139,6 +201,8 @@ def get_default_list():
# "/rss.json", # "/rss.json",
"/rss.php", "/rss.php",
"/rss.xml", "/rss.xml",
"/syndication.php?type=atom1.0", #mybb
"/syndication.php?type=rss2.0",
"/timeline.rss", "/timeline.rss",
"/videos.atom", "/videos.atom",
# "/videos.json", # "/videos.json",

View file

@ -1,29 +1,75 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import feedparser
import aiohttp import aiohttp
import asyncio import asyncio
import feedparser
import os import os
import sqlitehandler import sqlitehandler
import confighandler import confighandler
import datetimehandler
import filterhandler
from http.client import IncompleteRead
from asyncio.exceptions import IncompleteReadError from asyncio.exceptions import IncompleteReadError
from http.client import IncompleteRead
from urllib import error from urllib import error
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
# from xml.etree.ElementTree import ElementTree, ParseError # from xml.etree.ElementTree import ElementTree, ParseError
from urllib.parse import urlparse from urllib.parse import urljoin
from urllib.parse import urlsplit
from urllib.parse import urlunsplit
from lxml import html from lxml import html
async def download_updates(db_file):
# NOTE Perhaps this needs to be executed
# just once per program execution
async def initdb(jid, callback, message=None):
"""
Callback function to instantiate action on database.
Parameters
----------
jid : str
Jabber ID.
callback : ?
Function name.
message : str, optional
Optional kwarg when a message is a part or
required argument. The default is None.
Returns
-------
object
Coroutine object.
"""
db_dir = confighandler.get_default_dbdir()
if not os.path.isdir(db_dir):
os.mkdir(db_dir)
db_file = os.path.join(db_dir, r"{}.db".format(jid))
sqlitehandler.create_tables(db_file)
# await sqlitehandler.set_default_values(db_file)
if message:
return await callback(db_file, message)
else:
return await callback(db_file)
async def download_updates(db_file, url=None):
""" """
Check feeds for new entries. Check feeds for new entries.
:param db_file: Database filename. Parameters
----------
db_file : str
Path to database file.
url : str, optional
URL. The default is None.
""" """
urls = await sqlitehandler.get_subscriptions(db_file) if url:
urls = [url] # Valid [url] and [url,] and (url,)
else:
urls = await sqlitehandler.get_feeds_url(db_file)
for url in urls: for url in urls:
# print(os.path.basename(db_file), url[0]) # print(os.path.basename(db_file), url[0])
source = url[0] source = url[0]
@ -34,31 +80,42 @@ async def download_updates(db_file):
# urls.next() # urls.next()
# next(urls) # next(urls)
continue continue
await sqlitehandler.update_source_status(
await sqlitehandler.update_source_status(db_file, res[1], source) db_file,
res[1],
source
)
if res[0]: if res[0]:
try: try:
feed = feedparser.parse(res[0]) feed = feedparser.parse(res[0])
if feed.bozo: if feed.bozo:
# bozo = ("WARNING: Bozo detected for feed <{}>. " bozo = (
# "For more information, visit " "WARNING: Bozo detected for feed: {}\n"
# "https://pythonhosted.org/feedparser/bozo.html" "For more information, visit "
# .format(source)) "https://pythonhosted.org/feedparser/bozo.html"
# print(bozo) ).format(source)
print(bozo)
valid = 0 valid = 0
else: else:
valid = 1 valid = 1
await sqlitehandler.update_source_validity(db_file, source, valid) await sqlitehandler.update_source_validity(
except (IncompleteReadError, IncompleteRead, error.URLError) as e: db_file,
print(e) source,
valid)
except (
IncompleteReadError,
IncompleteRead,
error.URLError
) as e:
# print(e)
# TODO Print error to log
None
# NOTE I don't think there should be "return" # NOTE I don't think there should be "return"
# because then we might stop scanning next URLs # because then we might stop scanning next URLs
# return # return
# TODO Place these couple of lines back down # TODO Place these couple of lines back down
# NOTE Need to correct the SQL statement to do so # NOTE Need to correct the SQL statement to do so
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW # NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
if res[1] == 200: if res[1] == 200:
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW # NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
# TODO Place these couple of lines back down # TODO Place these couple of lines back down
@ -66,25 +123,60 @@ async def download_updates(db_file):
entries = feed.entries entries = feed.entries
# length = len(entries) # length = len(entries)
# await sqlitehandler.remove_entry(db_file, source, length) # await sqlitehandler.remove_entry(db_file, source, length)
await sqlitehandler.remove_nonexistent_entries(db_file, feed, source) await sqlitehandler.remove_nonexistent_entries(
db_file,
new_entry = 0 feed,
source
)
# new_entry = 0
for entry in entries: for entry in entries:
if entry.has_key("id"):
eid = entry.id
if entry.has_key("title"): if entry.has_key("title"):
title = entry.title title = entry.title
else: else:
title = feed["feed"]["title"] title = feed["feed"]["title"]
if entry.has_key("link"): if entry.has_key("link"):
link = entry.link # link = complete_url(source, entry.link)
link = await join_url(source, entry.link)
link = await trim_url(link)
else: else:
link = source link = source
# TODO Pass date too for comparion check
exist = await sqlitehandler.check_entry_exist(db_file, title, link) if entry.has_key("published"):
date = entry.published
date = await datetimehandler.rfc2822_to_iso8601(date)
else:
date = None
exist = await sqlitehandler.check_entry_exist(
db_file,
source,
eid=eid,
title=title,
link=link,
date=date
)
if not exist: if not exist:
new_entry = new_entry + 1 # new_entry = new_entry + 1
if entry.has_key("published"):
date = entry.published
date = await datetimehandler.rfc2822_to_iso8601(date)
# try:
# date = datetime.strptime(date, "%a, %d %b %Y %H:%M:%S %z")
# except:
# date = datetime.strptime(date, '%a, %d %b %Y %H:%M:%S %Z')
# finally:
# date = date.isoformat()
# if parsedate(date): # Is RFC 2822 format
# date = parsedate_to_datetime(date) # Process timestamp
# date = date.isoformat() # Convert to ISO 8601
else:
# TODO Just set date = "*** No date ***"
# date = datetime.now().isoformat()
date = await datetimehandler.now()
# NOTE Would seconds result in better database performance
# date = datetime.datetime(date)
# date = (date-datetime.datetime(1970,1,1)).total_seconds()
# TODO Enhance summary # TODO Enhance summary
if entry.has_key("summary"): if entry.has_key("summary"):
summary = entry.summary summary = entry.summary
@ -93,164 +185,156 @@ async def download_updates(db_file):
# TODO Limit text length # TODO Limit text length
summary = summary.replace("\n\n", "\n")[:300] + " ‍⃨" summary = summary.replace("\n\n", "\n")[:300] + " ‍⃨"
else: else:
summary = '*** No summary ***' summary = "*** No summary ***"
entry = (title, summary, link, source, 0); read_status = 0
await sqlitehandler.add_entry_and_set_date(db_file, source, entry) pathname = urlsplit(link).path
string = (
"{} {} {}"
).format(
title,
summary,
pathname
)
allow_list = await filterhandler.is_listed(
db_file,
"allow",
string
)
if not allow_list:
reject_list = await filterhandler.is_listed(
db_file,
"deny",
string
)
if reject_list:
print(">>> REJECTED", title)
summary = "REJECTED"
# summary = ""
read_status = 1
entry = (
title,
summary,
link,
eid,
source,
date,
read_status
)
await sqlitehandler.add_entry_and_set_date(
db_file,
source,
entry
)
# print(await datetimehandler.current_time(), entry, title)
# else:
# print(await datetimehandler.current_time(), exist, title)
async def add_feed_no_check(db_file, data):
"""
Add given feed without validity check.
Parameters
----------
db_file : str
Path to database file.
data : str
URL or URL and Title.
Returns
-------
msg : str
Status message.
"""
url = data[0]
title = data[1]
url = await trim_url(url)
exist = await sqlitehandler.check_feed_exist(db_file, url)
if not exist:
msg = await sqlitehandler.add_feed(db_file, url, title)
await download_updates(db_file, [url])
else:
ix = exist[0]
name = exist[1]
msg = (
"> {}\nNews source \"{}\" is already "
"listed in the subscription list at "
"index {}".format(url, name, ix)
)
return msg
async def add_feed(db_file, url): async def add_feed(db_file, url):
""" """
Check whether feed exist, otherwise process it. Check whether feed exist, otherwise process it.
:param db_file: Database filename. Parameters
:param url: URL. ----------
:return: Status message. db_file : str
Path to database file.
url : str
URL.
Returns
-------
msg : str
Status message.
""" """
msg = None
url = await trim_url(url)
exist = await sqlitehandler.check_feed_exist(db_file, url) exist = await sqlitehandler.check_feed_exist(db_file, url)
if not exist: if not exist:
res = await download_feed(url) res = await download_feed(url)
if res[0]: if res[0]:
feed = feedparser.parse(res[0]) feed = feedparser.parse(res[0])
title = await get_title(url, feed) title = await get_title(url, feed)
if feed.bozo: if feed.bozo:
bozo = ("WARNING: Bozo detected. Failed to load <{}>.".format(url)) bozo = (
"Bozo detected. Failed to load: {}."
).format(url)
print(bozo) print(bozo)
try: try:
# tree = etree.fromstring(res[0]) # etree is for xml # tree = etree.fromstring(res[0]) # etree is for xml
tree = html.fromstring(res[0]) tree = html.fromstring(res[0])
except: except:
return "Failed to parse URL <{}> as feed".format(url) msg = (
"> {}\nFailed to parse URL as feed."
print("RSS Auto-Discovery Engaged") ).format(url)
xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]""" if not msg:
# xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href""" print("RSS Auto-Discovery Engaged")
# xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href" msg = await feed_mode_auto_discovery(db_file, url, tree)
feeds = tree.xpath(xpath_query) if not msg:
if len(feeds) > 1: print("RSS Scan Mode Engaged")
msg = "RSS Auto-Discovery has found {} feeds:\n\n".format(len(feeds)) msg = await feed_mode_scan(db_file, url, tree)
for feed in feeds: if not msg:
# # The following code works; print("RSS Arbitrary Mode Engaged")
# # The following code will catch msg = await feed_mode_request(db_file, url, tree)
# # only valid resources (i.e. not 404); if not msg:
# # The following code requires more bandwidth. msg = (
# res = await download_feed(feed) "> {}\nNo news feeds were found for URL."
# if res[0]: ).format(url)
# disco = feedparser.parse(res[0])
# title = disco["feed"]["title"]
# msg += "{} \n {} \n\n".format(title, feed)
feed_name = feed.xpath('@title')[0]
feed_addr = feed.xpath('@href')[0]
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
msg += "The above feeds were extracted from\n{}".format(url)
return msg
elif feeds:
url = feeds[0].xpath('@href')[0]
# Why wouldn't add_feed return a message
# upon success unless return is explicitly
# mentioned, yet upon failure it wouldn't?
return await add_feed(db_file, url)
print("RSS Scan Mode Engaged")
feeds = {}
paths = []
# TODO Test
cfg_dir = confighandler.get_default_confdir()
if not os.path.isdir(cfg_dir):
os.mkdir(cfg_dir)
cfg_file = os.path.join(cfg_dir, r"url_paths.txt")
if not os.path.isfile(cfg_file):
# confighandler.generate_dictionary()
list = confighandler.get_default_list()
file = open(cfg_file, "w")
file.writelines("\n".join(list))
file.close()
file = open(cfg_file, "r")
lines = file.readlines()
for line in lines:
paths.extend([line.strip()])
for path in paths:
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
xpath_query = "//a[contains(@href,'{}')]".format(path)
addresses = tree.xpath(xpath_query)
parted_url = urlparse(url)
# NOTE Should number of addresses be limited or
# perhaps be N from the start and N from the end
for address in addresses:
address = address.xpath('@href')[0]
if address.startswith('/'):
address = parted_url.scheme + '://' + parted_url.netloc + address
res = await download_feed(address)
if res[1] == 200:
try:
feeds[address] = feedparser.parse(res[0])["feed"]["title"]
except:
continue
if len(feeds) > 1:
msg = "RSS URL scan has found {} feeds:\n\n".format(len(feeds))
for feed in feeds:
# try:
# res = await download_feed(feed)
# except:
# continue
feed_name = feeds[feed]
feed_addr = feed
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
msg += "The above feeds were extracted from\n{}".format(url)
return msg
elif feeds:
url = list(feeds)[0]
return await add_feed(db_file, url)
# (HTTP) Request(s) Paths
print("RSS Arbitrary Mode Engaged")
feeds = {}
parted_url = urlparse(url)
for path in paths:
address = parted_url.scheme + '://' + parted_url.netloc + path
res = await download_feed(address)
if res[1] == 200:
# print(feedparser.parse(res[0])["feed"]["title"])
# feeds[address] = feedparser.parse(res[0])["feed"]["title"]
try:
title = feedparser.parse(res[0])["feed"]["title"]
except:
title = '*** No Title ***'
feeds[address] = title
# Check whether URL has path (i.e. not root)
if parted_url.path.split('/')[1]:
paths.extend([".atom", ".feed", ".rdf", ".rss"]) if '.rss' not in paths else -1
# if paths.index('.rss'):
# paths.extend([".atom", ".feed", ".rdf", ".rss"])
address = parted_url.scheme + '://' + parted_url.netloc + '/' + parted_url.path.split('/')[1] + path
res = await download_feed(address)
if res[1] == 200:
try:
title = feedparser.parse(res[0])["feed"]["title"]
except:
title = '*** No Title ***'
feeds[address] = title
if len(feeds) > 1:
msg = "RSS URL discovery has found {} feeds:\n\n".format(len(feeds))
for feed in feeds:
feed_name = feeds[feed]
feed_addr = feed
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
msg += "The above feeds were extracted from\n{}".format(url)
elif feeds:
url = list(feeds)[0]
msg = await add_feed(db_file, url)
else:
msg = "No news feeds were found for URL <{}>.".format(url)
else: else:
msg = await sqlitehandler.add_feed(db_file, title, url, res) status = res[1]
msg = await sqlitehandler.add_feed(
db_file,
url,
title,
status
)
await download_updates(db_file, [url])
else: else:
msg = "Failed to get URL <{}>. Reason: {}".format(url, res[1]) status = res[1]
msg = (
"> {}\nFailed to get URL. Reason: {}"
).format(url, status)
else: else:
ix = exist[0] ix = exist[0]
name = exist[1] name = exist[1]
msg = "> {}\nNews source \"{}\" is already listed in the subscription list at index {}".format(url, name, ix) msg = (
"> {}\nNews source \"{}\" is already "
"listed in the subscription list at "
"index {}".format(url, name, ix)
)
return msg return msg
@ -258,8 +342,15 @@ async def download_feed(url):
""" """
Download content of given URL. Download content of given URL.
:param url: URL. Parameters
:return: Document or error message. ----------
url : str
URL.
Returns
-------
msg: list or str
Document or error message.
""" """
timeout = aiohttp.ClientTimeout(total=10) timeout = aiohttp.ClientTimeout(total=10)
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
@ -271,30 +362,438 @@ async def download_feed(url):
try: try:
doc = await response.text() doc = await response.text()
# print (response.content_type) # print (response.content_type)
return [doc, status] msg = [
doc,
status
]
except: except:
# return [False, "The content of this document doesn't appear to be textual."] # msg = [
return [False, "Document is too large or is not textual."] # False,
# ("The content of this document "
# "doesn't appear to be textual."
# )
# ]
msg = [
False,
"Document is too large or is not textual."
]
else: else:
return [False, "HTTP Error: " + str(status)] msg = [
False,
"HTTP Error: " + str(status)
]
except aiohttp.ClientError as e: except aiohttp.ClientError as e:
print('Error', str(e)) # print('Error', str(e))
return [False, "Error: " + str(e)] msg = [
False,
"Error: " + str(e)
]
except asyncio.TimeoutError as e: except asyncio.TimeoutError as e:
# print('Timeout:', str(e)) # print('Timeout:', str(e))
return [False, "Timeout: " + str(e)] msg = [
False,
"Timeout: " + str(e)
]
return msg
async def get_title(url, feed): async def get_title(url, feed):
""" """
Get title of feed. Get title of feed.
:param url: URL Parameters
:param feed: Parsed feed ----------
:return: Title or URL hostname. url : str
URL.
feed : dict
Parsed feed document.
Returns
-------
title : str
Title or URL hostname.
""" """
try: try:
title = feed["feed"]["title"] title = feed["feed"]["title"]
except: except:
title = urlparse(url).netloc title = urlsplit(url).netloc
return title return title
# NOTE Read the documentation
# https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin
def complete_url(source, link):
"""
Check if URL is pathname and complete it into URL.
Parameters
----------
source : str
Feed URL.
link : str
Link URL or pathname.
Returns
-------
str
URL.
"""
if link.startswith("www."):
return "http://" + link
parted_link = urlsplit(link)
parted_feed = urlsplit(source)
if parted_link.scheme == "magnet" and parted_link.query:
return link
if parted_link.scheme and parted_link.netloc:
return link
if link.startswith("//"):
if parted_link.netloc and parted_link.path:
new_link = urlunsplit([
parted_feed.scheme,
parted_link.netloc,
parted_link.path,
parted_link.query,
parted_link.fragment
])
elif link.startswith("/"):
new_link = urlunsplit([
parted_feed.scheme,
parted_feed.netloc,
parted_link.path,
parted_link.query,
parted_link.fragment
])
elif link.startswith("../"):
pathlink = parted_link.path.split("/")
pathfeed = parted_feed.path.split("/")
for i in pathlink:
if i == "..":
if pathlink.index("..") == 0:
pathfeed.pop()
else:
break
while pathlink.count(".."):
if pathlink.index("..") == 0:
pathlink.remove("..")
else:
break
pathlink = "/".join(pathlink)
pathfeed.extend([pathlink])
new_link = urlunsplit([
parted_feed.scheme,
parted_feed.netloc,
"/".join(pathfeed),
parted_link.query,
parted_link.fragment
])
else:
pathlink = parted_link.path.split("/")
pathfeed = parted_feed.path.split("/")
if link.startswith("./"):
pathlink.remove(".")
if not source.endswith("/"):
pathfeed.pop()
pathlink = "/".join(pathlink)
pathfeed.extend([pathlink])
new_link = urlunsplit([
parted_feed.scheme,
parted_feed.netloc,
"/".join(pathfeed),
parted_link.query,
parted_link.fragment
])
return new_link
"""
TODO
Feed https://www.ocaml.org/feed.xml
Link %20https://frama-c.com/fc-versions/cobalt.html%20
FIXME
Feed https://cyber.dabamos.de/blog/feed.rss
Link https://cyber.dabamos.de/blog/#article-2022-07-15
"""
async def join_url(source, link):
"""
Join base URL with given pathname.
Parameters
----------
source : str
Feed URL.
link : str
Link URL or pathname.
Returns
-------
str
URL.
"""
if link.startswith("www."):
new_link = "http://" + link
elif link.startswith("%20") and link.endswith("%20"):
old_link = link.split("%20")
del old_link[0]
old_link.pop()
new_link = "".join(old_link)
else:
new_link = urljoin(source, link)
return new_link
async def trim_url(url):
"""
Check URL pathname for double slash.
Parameters
----------
url : str
URL.
Returns
-------
url : str
URL.
"""
parted_url = urlsplit(url)
protocol = parted_url.scheme
hostname = parted_url.netloc
pathname = parted_url.path
queries = parted_url.query
fragment = parted_url.fragment
while "//" in pathname:
pathname = pathname.replace("//", "/")
url = urlunsplit([
protocol,
hostname,
pathname,
queries,
fragment
])
return url
# TODO Improve scan by gradual decreasing of path
async def feed_mode_request(db_file, url, tree):
"""
Lookup for feeds by pathname using HTTP Requests.
Parameters
----------
db_file : str
Path to database file.
url : str
URL.
tree : TYPE
DESCRIPTION.
Returns
-------
msg : str
Message with URLs.
"""
feeds = {}
parted_url = urlsplit(url)
paths = confighandler.get_list()
for path in paths:
address = urlunsplit([
parted_url.scheme,
parted_url.netloc,
path,
None,
None
])
res = await download_feed(address)
if res[1] == 200:
# print(feedparser.parse(res[0])["feed"]["title"])
# feeds[address] = feedparser.parse(res[0])["feed"]["title"]
try:
title = feedparser.parse(res[0])["feed"]["title"]
except:
title = '*** No Title ***'
feeds[address] = title
# Check whether URL has path (i.e. not root)
if parted_url.path.split('/')[1]:
paths.extend(
[".atom", ".feed", ".rdf", ".rss"]
) if '.rss' not in paths else -1
# if paths.index('.rss'):
# paths.extend([".atom", ".feed", ".rdf", ".rss"])
address = urlunsplit([
parted_url.scheme,
parted_url.netloc,
parted_url.path.split('/')[1] + path,
None,
None
])
res = await download_feed(address)
if res[1] == 200:
try:
title = feedparser.parse(res[0])["feed"]["title"]
except:
title = '*** No Title ***'
feeds[address] = title
if len(feeds) > 1:
msg = (
"RSS URL discovery has found {} feeds:\n```\n"
).format(len(feeds))
for feed in feeds:
feed_name = feeds[feed]
feed_addr = feed
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
msg += (
"```\nThe above feeds were extracted from\n{}"
).format(url)
elif feeds:
feed_addr = list(feeds)[0]
msg = await add_feed(db_file, feed_addr)
return msg
async def feed_mode_scan(db_file, url, tree):
"""
Scan page for potential feeds by pathname.
Parameters
----------
db_file : str
Path to database file.
url : str
URL.
tree : TYPE
DESCRIPTION.
Returns
-------
msg : str
Message with URLs.
"""
feeds = {}
# paths = []
# TODO Test
paths = confighandler.get_list()
for path in paths:
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
xpath_query = "//a[contains(@href,'{}')]".format(path)
addresses = tree.xpath(xpath_query)
parted_url = urlsplit(url)
# NOTE Should number of addresses be limited or
# perhaps be N from the start and N from the end
for address in addresses:
print(address.xpath('@href')[0])
print(addresses)
address = address.xpath('@href')[0]
if "/" not in address:
protocol = parted_url.scheme
hostname = parted_url.netloc
pathname = address
address = urlunsplit([
protocol,
hostname,
pathname,
None,
None
])
if address.startswith('/'):
protocol = parted_url.scheme
hostname = parted_url.netloc
pathname = address
address = urlunsplit([
protocol,
hostname,
pathname,
None,
None
])
res = await download_feed(address)
if res[1] == 200:
try:
feeds[address] = feedparser.parse(res[0])["feed"]["title"]
print(feeds)
except:
continue
if len(feeds) > 1:
msg = (
"RSS URL scan has found {} feeds:\n```\n"
).format(len(feeds))
for feed in feeds:
# try:
# res = await download_feed(feed)
# except:
# continue
feed_name = feeds[feed]
feed_addr = feed
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
msg += (
"```\nThe above feeds were extracted from\n{}"
).format(url)
return msg
elif feeds:
feed_addr = list(feeds)[0]
msg = await add_feed(db_file, feed_addr)
return msg
async def feed_mode_auto_discovery(db_file, url, tree):
"""
Lookup for feeds using RSS autodiscovery technique.
See: https://www.rssboard.org/rss-autodiscovery
Parameters
----------
db_file : str
Path to database file.
url : str
URL.
tree : TYPE
DESCRIPTION.
Returns
-------
msg : str
Message with URLs.
"""
xpath_query = (
'//link[(@rel="alternate") and '
'(@type="application/atom+xml" or '
'@type="application/rdf+xml" or '
'@type="application/rss+xml")]'
)
# xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href"""
# xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href"
feeds = tree.xpath(xpath_query)
if len(feeds) > 1:
msg = (
"RSS Auto-Discovery has found {} feeds:\n```\n"
).format(len(feeds))
for feed in feeds:
# # The following code works;
# # The following code will catch
# # only valid resources (i.e. not 404);
# # The following code requires more bandwidth.
# res = await download_feed(feed)
# if res[0]:
# disco = feedparser.parse(res[0])
# title = disco["feed"]["title"]
# msg += "{} \n {} \n\n".format(title, feed)
feed_name = feed.xpath('@title')[0]
feed_addr = await join_url(url, feed.xpath('@href')[0])
# if feed_addr.startswith("/"):
# feed_addr = url + feed_addr
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
msg += (
"```\nThe above feeds were extracted from\n{}"
).format(url)
return msg
elif feeds:
feed_addr = await join_url(url, feeds[0].xpath('@href')[0])
# if feed_addr.startswith("/"):
# feed_addr = url + feed_addr
# NOTE Why wouldn't add_feed return a message
# upon success unless return is explicitly
# mentioned, yet upon failure it wouldn't?
# return await add_feed(db_file, feed_addr)
msg = await add_feed(db_file, feed_addr)
return msg

View file

@ -0,0 +1,81 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
https://feedparser.readthedocs.io/en/latest/date-parsing.html
"""
from datetime import datetime
from dateutil.parser import parse
from email.utils import parsedate
from email.utils import parsedate_to_datetime
async def now():
"""
ISO 8601 Timestamp.
Returns
-------
date : ?
ISO 8601 Timestamp.
"""
date = datetime.now().isoformat()
return date
async def current_time():
"""
Print HH:MM:SS timestamp.
Returns
-------
date : ?
HH:MM:SS timestamp.
"""
now = datetime.now()
time = now.strftime("%H:%M:%S")
return time
async def validate(date):
"""
Validate date format.
Parameters
----------
date : str
Timestamp.
Returns
-------
date : str
Timestamp.
"""
try:
parse(date)
except:
date = now()
return date
async def rfc2822_to_iso8601(date):
"""
Convert RFC 2822 into ISO 8601.
Parameters
----------
date : str
RFC 2822 Timestamp.
Returns
-------
date : str
ISO 8601 Timestamp.
"""
if parsedate(date):
try:
date = parsedate_to_datetime(date)
date = date.isoformat()
except:
date = now()
return date

105
slixfeed/filterhandler.py Normal file
View file

@ -0,0 +1,105 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
TODO
1) Website-specific filter (i.e. audiobookbay).
2) Exclude websites from filtering (e.g. metapedia).
3) Filter phrases:
Refer to sqlitehandler.search_entries for implementation.
It is expected to be more complex than function search_entries.
"""
import sqlitehandler
async def set_filter(newwords, keywords):
"""
Append new keywords to filter.
Parameters
----------
newwords : str
List of new keywords.
keywords : str
List of current keywords.
Returns
-------
val : str
List of current keywords and new keywords.
"""
try:
keywords = keywords.split(",")
except:
keywords = []
newwords = newwords.lower().split(",")
for word in newwords:
word = word.strip()
if len(word) and word not in keywords:
keywords.extend([word])
keywords.sort()
val = ",".join(keywords)
return val
async def is_listed(db_file, type, string):
# async def reject(db_file, string):
# async def is_blacklisted(db_file, string):
filter_type = "filter-" + type
list = await sqlitehandler.get_settings_value(
db_file,
filter_type
)
if list:
list = list.split(",")
for i in list:
if not i or len(i) < 2:
continue
if i in string.lower():
print(">>> ACTIVATE", i)
return 1
else:
return None
"""
This code was tested at module datahandler
reject = 0
blacklist = await sqlitehandler.get_settings_value(
db_file,
"filter-deny"
)
# print(">>> blacklist:")
# print(blacklist)
# breakpoint()
if blacklist:
blacklist = blacklist.split(",")
# print(">>> blacklist.split")
# print(blacklist)
# breakpoint()
for i in blacklist:
# print(">>> length", len(i))
# breakpoint()
# if len(i):
if not i or len(i) < 2:
print(">>> continue due to length", len(i))
# breakpoint()
continue
# print(title)
# print(">>> blacklisted word:", i)
# breakpoint()
test = (title + " " + summary + " " + link)
if i in test.lower():
reject = 1
break
if reject:
print("rejected:",title)
entry = (title, '', link, source, date, 1);
"""

56
slixfeed/opmlhandler.py Normal file
View file

@ -0,0 +1,56 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
{
'bozo': False,
'bozo_exception': None,
'feeds': [
{
'url': 'https://kurtmckee.org/tag/listparser/feed',
'title': 'listparser blog',
'categories': [],
'tags': []
},
{
'url': 'https://github.com/kurtmckee/listparser/commits/develop.atom',
'title': 'listparser changelog',
'categories': [],
'tags': []
}
],
'lists': [],
'opportunities': [],
'meta': {
'title': 'listparser project feeds',
'author': {
'name': 'Kurt McKee',
'email': 'contactme@kurtmckee.org',
'url': 'https://kurtmckee.org/'
}
},
'version': 'opml2'
}
"""
import listparser
import lxml
import sqlitehandler
import datahandler
async def import_opml(db_file, opml_doc):
feeds = listparser.parse(opml_doc)['feeds']
for feed in feeds:
url = feed['url']
title = feed['title']
# categories = feed['categories']
# tags = feed['tags']
await datahandler.add_feed_no_check(db_file, [url, title])
# NOTE Use OPyML or LXML
async def export_opml():
result = await sqlitehandler.get_feeds()

File diff suppressed because it is too large Load diff

View file

@ -1,38 +1,56 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from datetime import datetime """
TODO
1) Deprecate "add" (see above) and make it interactive.
Slixfeed: Do you still want to add this URL to subscription list?
See: case _ if message_lowercase.startswith("add"):
2) Use loop (with gather) instead of TaskGroup
"""
import asyncio import asyncio
import os import os
import slixmpp import slixmpp
from slixmpp.plugins.xep_0363.http_upload import FileTooBig, HTTPError, UploadServiceNotFound
import confighandler import confighandler
import datahandler import datahandler
import datetimehandler
import filterhandler
import sqlitehandler import sqlitehandler
main_task = []
jid_tasker = {} jid_tasker = {}
task_manager = {} task_manager = {}
loop = asyncio.get_event_loop()
# asyncio.set_event_loop(loop)
time_now = datetime.now() # time_now = datetime.now()
# time_now = time_now.strftime("%H:%M:%S") # time_now = time_now.strftime("%H:%M:%S")
def print_time(): # def print_time():
# return datetime.now().strftime("%H:%M:%S") # # return datetime.now().strftime("%H:%M:%S")
now = datetime.now() # now = datetime.now()
current_time = now.strftime("%H:%M:%S") # current_time = now.strftime("%H:%M:%S")
return current_time # return current_time
async def handle_event():
print("Event handled!")
class Slixfeed(slixmpp.ClientXMPP): class Slixfeed(slixmpp.ClientXMPP):
""" """
Slixmpp news bot that will send updates Slixmpp
from feeds it receives. -------
News bot that sends updates from RSS feeds.
""" """
print("slixmpp.ClientXMPP")
print(repr(slixmpp.ClientXMPP))
def __init__(self, jid, password): def __init__(self, jid, password):
slixmpp.ClientXMPP.__init__(self, jid, password) slixmpp.ClientXMPP.__init__(self, jid, password)
@ -52,7 +70,7 @@ class Slixfeed(slixmpp.ClientXMPP):
self.add_event_handler("message", self.message) self.add_event_handler("message", self.message)
self.add_event_handler("disconnected", self.reconnect) self.add_event_handler("disconnected", self.reconnect)
# Initialize event loop # Initialize event loop
self.loop = asyncio.get_event_loop() # self.loop = asyncio.get_event_loop()
async def start(self, event): async def start(self, event):
@ -70,116 +88,316 @@ class Slixfeed(slixmpp.ClientXMPP):
""" """
self.send_presence() self.send_presence()
await self.get_roster() await self.get_roster()
await self.select_file()
self.send_presence( # for task in main_task:
pshow="away", # task.cancel()
pstatus="Slixmpp has been restarted.", if not main_task:
pto="sch@pimux.de" await self.select_file()
)
async def message(self, msg): async def message(self, msg):
""" """
Process incoming message stanzas. Be aware that this also Process incoming message stanzas. Be aware that this also
includes MUC messages and error messages. It is usually includes MUC messages and error messages. It is usually
a good idea to check the messages's type before processing a good practice to check the messages's type before
or sending replies. processing or sending replies.
Arguments: Parameters
msg -- The received message stanza. See the documentation ----------
for stanza objects and the Message stanza to see self : ?
how it may be used. Self.
msg : str
The received message stanza. See the documentation
for stanza objects and the Message stanza to see
how it may be used.
""" """
if msg["type"] in ("chat", "normal"): if msg["type"] in ("chat", "normal"):
action = 0 action = 0
jid = msg["from"].bare jid = msg["from"].bare
db_dir = confighandler.get_default_dbdir()
os.chdir(db_dir)
if jid + ".db" not in os.listdir():
await self.task_jid(jid)
message = " ".join(msg["body"].split()) message = " ".join(msg["body"].split())
message = message.lower() message_lowercase = message.lower()
if message.startswith("help"):
action = print_help() print(await datetimehandler.current_time(), "ACCOUNT: " + str(msg["from"]))
# NOTE: Might not need it print(await datetimehandler.current_time(), "COMMAND:", message)
# elif message.startswith("add "):
# url = message[4:] match message_lowercase:
elif message.startswith("http"): case "help":
url = message action = print_help()
action = await initdb(jid, datahandler.add_feed, url) case _ if message_lowercase in ["greetings", "hello", "hey"]:
# action = "> " + message + "\n" + action action = (
elif message.startswith("quantum "): "Greeting! I'm Slixfeed The News Bot!"
key = message[:7] "\n"
val = message[8:] "Send a URL of a news website to start."
# action = "Every update will contain {} news items.".format(action) )
action = await initdb(jid, sqlitehandler.set_settings_value, [key, val]) case _ if message_lowercase.startswith("add"):
await self.refresh_task(jid, key, val) message = message[4:]
elif message.startswith("interval "): url = message.split(" ")[0]
key = message[:8] title = " ".join(message.split(" ")[1:])
val = message[9:] if url.startswith("http"):
# action = "Updates will be sent every {} minutes.".format(action) action = await datahandler.initdb(
action = await initdb(jid, sqlitehandler.set_settings_value, [key, val]) jid,
await self.refresh_task(jid, key, val) datahandler.add_feed_no_check,
elif message.startswith("list"): [url, title]
action = await initdb(jid, sqlitehandler.list_subscriptions) )
elif message.startswith("recent "): await self.send_status(jid)
num = message[7:] else:
action = await initdb(jid, sqlitehandler.last_entries, num) action = "Missing URL."
elif message.startswith("remove "): case _ if message_lowercase.startswith("allow"):
ix = message[7:] key = "filter-" + message[:5]
action = await initdb(jid, sqlitehandler.remove_feed, ix) val = message[6:]
elif message.startswith("search "): if val:
query = message[7:] keywords = await datahandler.initdb(
action = await initdb(jid, sqlitehandler.search_entries, query) jid,
elif message.startswith("start"): sqlitehandler.get_settings_value,
# action = "Updates are enabled." key
key = "enabled" )
val = 1 val = await filterhandler.set_filter(
actiona = await initdb(jid, sqlitehandler.set_settings_value, [key, val]) val,
asyncio.create_task(self.task_jid(jid)) keywords
# print(print_time(), "task_manager[jid]") )
# print(task_manager[jid]) await datahandler.initdb(
elif message.startswith("stats"): jid,
action = await initdb(jid, sqlitehandler.statistics) sqlitehandler.set_settings_value,
elif message.startswith("status "): [key, val]
ix = message[7:] )
action = await initdb(jid, sqlitehandler.toggle_status, ix) action = (
elif message.startswith("stop"): "Approved keywords\n"
"```\n{}\n```"
).format(val)
else:
action = "Missing keywords."
case _ if message_lowercase.startswith("deny"):
key = "filter-" + message[:4]
val = message[5:]
if val:
keywords = await datahandler.initdb(
jid,
sqlitehandler.get_settings_value,
key
)
val = await filterhandler.set_filter(
val,
keywords
)
await datahandler.initdb(
jid,
sqlitehandler.set_settings_value,
[key, val]
)
action = (
"Rejected keywords\n"
"```\n{}\n```"
).format(val)
else:
action = "Missing keywords."
case _ if message_lowercase.startswith("http"):
url = message
action = await datahandler.initdb(
jid,
datahandler.add_feed,
url
)
# action = "> " + message + "\n" + action
await self.send_status(jid)
case _ if message_lowercase.startswith("feeds"):
query = message[6:]
if query:
if len(query) > 3:
action = await datahandler.initdb(
jid,
sqlitehandler.search_feeds,
query
)
else:
action = (
"Enter at least 4 characters to search"
)
else:
action = await datahandler.initdb(
jid,
sqlitehandler.list_feeds
)
case _ if message_lowercase.startswith("interval"):
# FIXME
# The following error occurs only upon first attempt to set interval.
# /usr/lib/python3.11/asyncio/events.py:73: RuntimeWarning: coroutine 'Slixfeed.send_update' was never awaited
# self._args = None
# RuntimeWarning: Enable tracemalloc to get the object allocation traceback
key = message[:8]
val = message[9:]
if val:
# action = (
# "Updates will be sent every {} minutes."
# ).format(action)
await datahandler.initdb(
jid,
sqlitehandler.set_settings_value,
[key, val]
)
await self.refresh_task(
jid,
self.send_update,
key,
val
)
action = (
"Updates will be sent every {} minutes."
).format(val)
else:
action = "Missing value."
case _ if message_lowercase.startswith("next"):
num = message[5:]
await self.send_update(jid, num)
await self.send_status(jid)
# await self.refresh_task(jid, key, val)
case _ if message_lowercase.startswith("quantum"):
key = message[:7]
val = message[8:]
if val:
# action = (
# "Every update will contain {} news items."
# ).format(action)
await datahandler.initdb(
jid,
sqlitehandler.set_settings_value,
[key, val]
)
action = (
"Next update will contain {} news items."
).format(val)
else:
action = "Missing value."
case _ if message_lowercase.startswith("random"):
action = "Updates will be sent randomly."
case _ if message_lowercase.startswith("recent"):
num = message[7:]
if num:
action = await datahandler.initdb(
jid,
sqlitehandler.last_entries,
num
)
else:
action = "Missing value."
case _ if message_lowercase.startswith("remove"):
ix = message[7:]
if ix:
action = await datahandler.initdb(
jid,
sqlitehandler.remove_feed,
ix
)
await self.send_status(jid)
else:
action = "Missing feed ID."
case _ if message_lowercase.startswith("search"):
query = message[7:]
if query:
if len(query) > 1:
action = await datahandler.initdb(
jid,
sqlitehandler.search_entries,
query
)
else:
action = (
"Enter at least 2 characters to search"
)
else:
action = "Missing search query."
case "start":
# action = "Updates are enabled."
key = "enabled"
val = 1
await datahandler.initdb(
jid,
sqlitehandler.set_settings_value,
[key, val]
)
asyncio.create_task(self.task_jid(jid))
action = "Updates are enabled."
# print(await datetimehandler.current_time(), "task_manager[jid]")
# print(task_manager[jid])
case "stats":
action = await datahandler.initdb(
jid,
sqlitehandler.statistics
)
case _ if message_lowercase.startswith("status "):
ix = message[7:]
action = await datahandler.initdb(
jid,
sqlitehandler.toggle_status,
ix
)
case "stop":
# FIXME
# The following error occurs only upon first attempt to stop.
# /usr/lib/python3.11/asyncio/events.py:73: RuntimeWarning: coroutine 'Slixfeed.send_update' was never awaited
# self._args = None
# RuntimeWarning: Enable tracemalloc to get the object allocation traceback
# action = "Updates are disabled." # action = "Updates are disabled."
try: # try:
task_manager[jid]["check"].cancel() # # task_manager[jid]["check"].cancel()
# task_manager[jid]["status"].cancel() # # task_manager[jid]["status"].cancel()
task_manager[jid]["interval"].cancel() # task_manager[jid]["interval"].cancel()
# key = "enabled"
# val = 0
# action = await datahandler.initdb(
# jid,
# sqlitehandler.set_settings_value,
# [key, val]
# )
# except:
# action = "Updates are already disabled."
# # print("Updates are already disabled. Nothing to do.")
# # await self.send_status(jid)
key = "enabled" key = "enabled"
val = 0 val = 0
actiona = await initdb(jid, sqlitehandler.set_settings_value, [key, val]) await datahandler.initdb(
await self.send_status(jid) jid,
print(print_time(), "task_manager[jid]") sqlitehandler.set_settings_value,
print(task_manager[jid]) [key, val]
except: )
# action = "Updates are already disabled." await self.task_jid(jid)
await self.send_status(jid) action = "Updates are disabled."
else: case "support":
action = "Unknown command. Press \"help\" for list of commands" # TODO Send an invitation.
action = "xmpp:slixmpp@muc.poez.io?join"
case _:
action = (
"Unknown command. "
"Press \"help\" for list of commands"
)
if action: msg.reply(action).send() if action: msg.reply(action).send()
print(print_time(), "COMMAND ACCOUNT")
print("COMMAND:", message)
print("ACCOUNT: " + str(msg["from"]))
async def select_file(self): async def select_file(self):
""" """
Initiate actions by JID (Jabber ID). Initiate actions by JID (Jabber ID).
:param self: Self Parameters
----------
self : ?
Self.
""" """
while True: while True:
db_dir = confighandler.get_default_dbdir() db_dir = confighandler.get_default_dbdir()
if not os.path.isdir(db_dir): if not os.path.isdir(db_dir):
msg = ("Slixfeed can not work without a database. \n" msg = (
"To create a database, follow these steps: \n" "Slixfeed can not work without a database.\n"
"Add Slixfeed contact to your roster \n" "To create a database, follow these steps:\n"
"Send a feed to the bot by: \n" "Add Slixfeed contact to your roster.\n"
"add https://reclaimthenet.org/feed/") "Send a feed to the bot by URL:\n"
print(print_time(), msg) "https://reclaimthenet.org/feed/"
)
# print(await datetimehandler.current_time(), msg)
print(msg) print(msg)
else: else:
os.chdir(db_dir) os.chdir(db_dir)
@ -191,114 +409,165 @@ class Slixfeed(slixmpp.ClientXMPP):
# jid_tasker[jid] = asyncio.create_task(self.task_jid(jid)) # jid_tasker[jid] = asyncio.create_task(self.task_jid(jid))
# await jid_tasker[jid] # await jid_tasker[jid]
async with asyncio.TaskGroup() as tg: async with asyncio.TaskGroup() as tg:
print("main task")
print(print_time(), "repr(tg)")
print(repr(tg)) # <TaskGroup entered>
for file in files: for file in files:
if file.endswith(".db") and not file.endswith(".db-jour.db"): if file.endswith(".db") and not file.endswith(".db-jour.db"):
jid = file[:-3] jid = file[:-3]
tg.create_task(self.task_jid(jid)) main_task.extend([tg.create_task(self.task_jid(jid))])
# main_task = [tg.create_task(self.task_jid(jid))]
# task_manager.update({jid: tg}) # task_manager.update({jid: tg})
# print(task_manager) # {}
print(print_time(), "repr(tg) id(tg)")
print(jid, repr(tg)) # sch@pimux.de <TaskGroup tasks=1 entered>
print(jid, id(tg)) # sch@pimux.de 139879835500624
# <xmpphandler.Slixfeed object at 0x7f24922124d0> <TaskGroup tasks=2 entered>
# <xmpphandler.Slixfeed object at 0x7f24922124d0> 139879835500624
async def task_jid(self, jid): async def task_jid(self, jid):
""" """
JID (Jabber ID) task manager. JID (Jabber ID) task manager.
:param self: Self Parameters
:param jid: Jabber ID ----------
self : ?
Self.
jid : str
Jabber ID.
""" """
enabled = await initdb( enabled = await datahandler.initdb(
jid, jid,
sqlitehandler.get_settings_value, sqlitehandler.get_settings_value,
"enabled" "enabled"
) )
print(print_time(), "enabled", enabled, jid) # print(await datetimehandler.current_time(), "enabled", enabled, jid)
if enabled: if enabled:
print("sub task")
print(print_time(), "repr(self) id(self)")
print(repr(self))
print(id(self))
task_manager[jid] = {} task_manager[jid] = {}
task_manager[jid]["check"] = asyncio.create_task(check_updates(jid)) task_manager[jid]["check"] = asyncio.create_task(
task_manager[jid]["status"] = asyncio.create_task(self.send_status(jid)) check_updates(jid)
task_manager[jid]["interval"] = asyncio.create_task(self.send_update(jid)) )
task_manager[jid]["status"] = asyncio.create_task(
self.send_status(jid)
)
task_manager[jid]["interval"] = asyncio.create_task(
self.send_update(jid)
)
await task_manager[jid]["check"] await task_manager[jid]["check"]
await task_manager[jid]["status"] await task_manager[jid]["status"]
await task_manager[jid]["interval"] await task_manager[jid]["interval"]
print(print_time(), "task_manager[jid].items()")
print(task_manager[jid].items())
print(print_time(), "task_manager[jid]")
print(task_manager[jid])
print(print_time(), "task_manager")
print(task_manager)
else: else:
# FIXME
# The following error occurs only upon first attempt to stop.
# /usr/lib/python3.11/asyncio/events.py:73: RuntimeWarning: coroutine 'Slixfeed.send_update' was never awaited
# self._args = None
# RuntimeWarning: Enable tracemalloc to get the object allocation traceback
try:
task_manager[jid]["interval"].cancel()
except:
None
await self.send_status(jid) await self.send_status(jid)
async def send_update(self, jid):
async def send_update(self, jid, num=None):
""" """
Send news items as messages. Send news items as messages.
:param self: Self Parameters
:param jid: Jabber ID ----------
self : ?
Self.
jid : str
Jabber ID.
num : str, optional
Number. The default is None.
""" """
new = await initdb( # print("Starting send_update()")
# print(jid)
new = await datahandler.initdb(
jid, jid,
sqlitehandler.get_entry_unread sqlitehandler.get_entry_unread,
num
) )
if new: if new:
print(print_time(), "> SEND UPDATE",jid) print(await datetimehandler.current_time(), "> SEND UPDATE",jid)
self.send_message( self.send_message(
mto=jid, mto=jid,
mbody=new, mbody=new,
mtype="chat" mtype="chat"
) )
interval = await initdb( await self.refresh_task(
jid, jid,
sqlitehandler.get_settings_value, self.send_update,
"interval" "interval"
) )
# interval = await datahandler.initdb(
# jid,
# sqlitehandler.get_settings_value,
# "interval"
# )
# task_manager[jid]["interval"] = loop.call_at(
# loop.time() + 60 * interval,
# loop.create_task,
# self.send_update(jid)
# )
# print(await datetimehandler.current_time(), "asyncio.get_event_loop().time()")
# print(await datetimehandler.current_time(), asyncio.get_event_loop().time())
# await asyncio.sleep(60 * interval) # await asyncio.sleep(60 * interval)
self.loop.call_at(
self.loop.time() + 60 * interval, # loop.call_later(
self.loop.create_task, # 60 * interval,
self.send_update(jid) # loop.create_task,
) # self.send_update(jid)
# )
# print
# await handle_event()
async def send_status(self, jid): async def send_status(self, jid):
""" """
Send status message. Send status message.
:param self: Self Parameters
:param jid: Jabber ID ----------
self : ?
Self.
jid : str
Jabber ID.
""" """
print(print_time(), "> SEND STATUS",jid) print(await datetimehandler.current_time(), "> SEND STATUS",jid)
unread = await initdb( enabled = await datahandler.initdb(
jid,
sqlitehandler.get_number_of_entries_unread
)
if unread:
status_text = "📰 News items: {}".format(str(unread))
status_mode = "chat"
else:
status_text = "🗞 No News"
status_mode = "available"
enabled = await initdb(
jid, jid,
sqlitehandler.get_settings_value, sqlitehandler.get_settings_value,
"enabled" "enabled"
) )
if not enabled: if not enabled:
status_mode = "xa" status_mode = "xa"
status_text = "Send \"Start\" to receive news."
else:
feeds = await datahandler.initdb(
jid,
sqlitehandler.get_number_of_items,
"feeds"
)
if not feeds:
status_mode = "available"
status_text = (
"📂️ Send a URL from a blog or a news website."
)
else:
unread = await datahandler.initdb(
jid,
sqlitehandler.get_number_of_entries_unread
)
if unread:
status_mode = "chat"
status_text = (
"📰 You have {} news items to read."
).format(str(unread))
# status_text = (
# "📰 News items: {}"
# ).format(str(unread))
# status_text = (
# "📰 You have {} news items"
# ).format(str(unread))
else:
status_mode = "available"
status_text = "🗞 No news"
# print(status_text, "for", jid) # print(status_text, "for", jid)
self.send_presence( self.send_presence(
@ -306,37 +575,55 @@ class Slixfeed(slixmpp.ClientXMPP):
pstatus=status_text, pstatus=status_text,
pto=jid, pto=jid,
#pfrom=None #pfrom=None
) )
# await asyncio.sleep(60 * 20)
await asyncio.sleep(60 * 20) await self.refresh_task(
jid,
# self.loop.call_at( self.send_status,
# self.loop.time() + 60 * 20, "status",
# self.loop.create_task, "20"
)
# loop.call_at(
# loop.time() + 60 * 20,
# loop.create_task,
# self.send_status(jid) # self.send_status(jid)
# ) # )
async def refresh_task(self, jid, key, val): async def refresh_task(self, jid, callback, key, val=None):
""" """
Apply settings on runtime. Apply new setting at runtime.
:param self: Self Parameters
:param jid: Jabber ID ----------
:param key: Key self : ?
:param val: Value Self.
jid : str
Jabber ID.
key : str
Key.
val : str, optional
Value. The default is None.
""" """
if not val:
val = await datahandler.initdb(
jid,
sqlitehandler.get_settings_value,
key
)
if jid in task_manager: if jid in task_manager:
task_manager[jid][key].cancel() task_manager[jid][key].cancel()
loop = asyncio.get_event_loop()
print(print_time(), "loop")
print(loop)
print(print_time(), "loop")
task_manager[jid][key] = loop.call_at( task_manager[jid][key] = loop.call_at(
loop.time() + 60 * float(val), loop.time() + 60 * float(val),
loop.create_task, loop.create_task,
self.send_update(jid) callback(jid)
# self.send_update(jid)
) )
# task_manager[jid][key] = loop.call_later(
# 60 * float(val),
# loop.create_task,
# self.send_update(jid)
# )
# task_manager[jid][key] = self.send_update.loop.call_at( # task_manager[jid][key] = self.send_update.loop.call_at(
# self.send_update.loop.time() + 60 * val, # self.send_update.loop.time() + 60 * val,
# self.send_update.loop.create_task, # self.send_update.loop.create_task,
@ -350,16 +637,19 @@ async def check_updates(jid):
""" """
Start calling for update check up. Start calling for update check up.
:param jid: Jabber ID Parameters
----------
jid : str
Jabber ID.
""" """
while True: while True:
print(print_time(), "> CHCK UPDATE",jid) print(await datetimehandler.current_time(), "> CHCK UPDATE",jid)
await initdb(jid, datahandler.download_updates) await datahandler.initdb(jid, datahandler.download_updates)
await asyncio.sleep(60 * 90) await asyncio.sleep(60 * 90)
# Schedule to call this function again in 90 minutes # Schedule to call this function again in 90 minutes
# self.loop.call_at( # loop.call_at(
# self.loop.time() + 60 * 90, # loop.time() + 60 * 90,
# self.loop.create_task, # loop.create_task,
# self.check_updates(jid) # self.check_updates(jid)
# ) # )
@ -367,84 +657,123 @@ async def check_updates(jid):
def print_help(): def print_help():
""" """
Print help manual. Print help manual.
Returns
-------
msg : str
Message.
""" """
msg = ("Slixfeed - News syndication bot for Jabber/XMPP \n" msg = (
"\n" "```\n"
"DESCRIPTION: \n" "NAME\n"
" Slixfeed is a news aggregator bot for online news feeds. \n" "Slixfeed - News syndication bot for Jabber/XMPP\n"
" Supported filetypes: Atom, RDF and RSS. \n" "\n"
"\n" "DESCRIPTION\n"
"BASIC USAGE: \n" " Slixfeed is a news aggregator bot for online news feeds.\n"
" start \n" " This program is primarily designed for XMPP.\n"
" Enable bot and send updates. \n" " For more information, visit https://xmpp.org/software/\n"
" Stop \n" "\n"
" Disable bot and stop updates. \n" "BASIC USAGE\n"
" batch N \n" " start\n"
" Send N updates for each interval. \n" " Enable bot and send updates.\n"
" interval N \n" " stop\n"
" Send an update every N minutes. \n" " Disable bot and stop updates.\n"
" feed list \n" " feeds\n"
" List subscriptions. \n" " List subscriptions.\n"
"\n" " interval N\n"
"EDIT OPTIONS: \n" " Set interval update to every N minutes.\n"
" add URL \n" " next N\n"
" Add URL to subscription list. \n" " Send N next updates.\n"
" remove ID \n" " quantum N\n"
" Remove feed from subscription list. \n" " Set N updates for each interval.\n"
" status ID \n" "\n"
" Toggle update status of feed. \n" "FILTER OPTIONS\n"
"\n" " allow\n"
"SEARCH OPTIONS: \n" " Keywords to allow (comma separates).\n"
" search TEXT \n" " deny\n"
" Search news items by given keywords. \n" " Keywords to block (comma separates).\n"
" recent N \n" # " filter clear allow\n"
" List recent N news items (up to 50 items). \n" # " Reset allow list.\n"
"\n" # " filter clear deny\n"
"STATISTICS OPTIONS: \n" # " Reset deny list.\n"
" analyses \n" "\n"
" Show report and statistics of feeds. \n" "EDIT OPTIONS\n"
" obsolete \n" " URL\n"
" List feeds that are not available. \n" " Add URL to subscription list.\n"
" unread \n" " add URL TITLE\n"
" Print number of unread news items. \n" " Add URL to subscription list (without validity check).\n"
"\n" " remove ID\n"
"BACKUP OPTIONS: \n" " Remove feed from subscription list.\n"
" export opml \n" " status ID\n"
" Send an OPML file with your feeds. \n" " Toggle update status of feed.\n"
" backup news html\n" "\n"
" Send an HTML formatted file of your news items. \n" "SEARCH OPTIONS\n"
" backup news md \n" " feeds TEXT\n"
" Send a Markdown file of your news items. \n" " Search subscriptions by given keywords.\n"
" backup news text \n" " search TEXT\n"
" Send a Plain Text file of your news items. \n" " Search news items by given keywords.\n"
"\n" " recent N\n"
"DOCUMENTATION: \n" " List recent N news items (up to 50 items).\n"
" Slixfeed \n" "\n"
" https://gitgud.io/sjehuda/slixfeed \n" # "STATISTICS OPTIONS\n"
" Slixmpp \n" # " analyses\n"
" https://slixmpp.readthedocs.io/ \n" # " Show report and statistics of feeds.\n"
" feedparser \n" # " obsolete\n"
" https://pythonhosted.org/feedparser") # " List feeds that are not available.\n"
# " unread\n"
# " Print number of unread news items.\n"
# "\n"
# "BACKUP OPTIONS\n"
# " export opml\n"
# " Send an OPML file with your feeds.\n"
# " backup news html\n"
# " Send an HTML formatted file of your news items.\n"
# " backup news md\n"
# " Send a Markdown file of your news items.\n"
# " backup news text\n"
# " Send a Plain Text file of your news items.\n"
# "\n"
"SUPPORT\n"
" support"
" Join xmpp:slixmpp@muc.poez.io?join\n"
"\n"
# "PROTOCOLS\n"
# " Supported prootcols are IRC, Matrix and XMPP.\n"
# " For the best experience, we recommend you to use XMPP.\n"
# "\n"
"FILETYPES\n"
" Supported filetypes are Atom, RDF and RSS.\n"
"\n"
"AUTHORS\n"
" Laura Harbinger, Schimon Zackary.\n"
"\n"
"COPYRIGHT\n"
" Slixfeed is free software; you can redistribute it and/or\n"
" modify it under the terms of the GNU General Public License\n"
" as published by the Free Software Foundation; version 3 only\n"
"\n"
" Slixfeed is distributed in the hope that it will be useful,\n"
" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
" GNU General Public License for more details.\n"
"\n"
"NOTE\n"
" Make Slixfeed your own.\n"
"\n"
" You can run Slixfeed on your own computer, server, and\n"
" even on a Linux phone (i.e. Droidian, Mobian NixOS,\n"
" postmarketOS). You can also use Termux.\n"
"\n"
" All you need is one of the above and an XMPP account to\n"
" connect Slixfeed to.\n"
"\n"
"DOCUMENTATION\n"
" Slixfeed\n"
" https://gitgud.io/sjehuda/slixfeed\n"
" Slixmpp\n"
" https://slixmpp.readthedocs.io/\n"
" feedparser\n"
" https://pythonhosted.org/feedparser\n"
"\n```"
)
return msg return msg
# TODO Perhaps this needs to be executed
# just once per program execution
async def initdb(jid, callback, message=None):
"""
Callback function to instantiate action on database.
:param jid: JID (Jabber ID).
:param callback: Function name.
:param massage: Optional kwarg when a message is a part or required argument.
"""
db_dir = confighandler.get_default_dbdir()
if not os.path.isdir(db_dir):
os.mkdir(db_dir)
db_file = os.path.join(db_dir, r"{}.db".format(jid))
sqlitehandler.create_tables(db_file)
# await sqlitehandler.set_default_values(db_file)
if message:
return await callback(db_file, message)
else:
return await callback(db_file)