Update 8 files

- /slixfeed/sqlitehandler.py
- /slixfeed/xmpphandler.py
- /slixfeed/opmlhandler.py
- /slixfeed/datahandler.py
- /slixfeed/datetimehandler.py
- /slixfeed/__main__.py
- /slixfeed/confighandler.py
- /slixfeed/filterhandler.py
This commit is contained in:
Schimon Jehudah 2023-11-13 13:45:10 +00:00
parent 9d6a211d36
commit 031eb6ce53
8 changed files with 2535 additions and 751 deletions

View file

@ -1,13 +1,42 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# TODO
#
# 0) sql prepared statements
# 1) Autodetect feed:
# if page is not feed (or HTML) and contains <link rel="alternate">
# 2) OPML import/export
# 3) 2022-12-30 reduce async to (maybe) prevent inner lock. async on task: commands, downloader, updater
"""
FIXME
1) Check feed duplication on runtime.
When feed is valid and is not yet in the database it is
posible to send a batch which would result in duplication.
Consequently, it might result in database lock error upon
feed removal attempt
TODO
1) SQL prepared statements
2) Machine Learning for scrapping Title, Link, Summary and Timstamp
3) Support MUC
4) Support categories
5) Default prepackaged list of feeds
6) XMPP commands
7) Bot as transport
8) OMEMO
9) Logging
10) Default feeds (e.g. Blacklisted News, TBOT etc.)
11) Download and upload/send article (xHTML, xHTMLZ, Markdown, MHTML, TXT)
Use Readability
"""
# vars and their meanings:
# jid = Jabber ID (XMPP)

View file

@ -1,6 +1,15 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
TODO
1) Use file settings.csv and pathnames.txt instead:
See get_value_default and get_default_list
"""
import os
import sys
@ -8,16 +17,21 @@ def get_default_dbdir():
"""
Determine the directory path where dbfile will be stored.
If $XDG_DATA_HOME is defined, use it
else if $HOME exists, use it
else if the platform is Windows, use %APPDATA%
else use the current directory.
* If $XDG_DATA_HOME is defined, use it;
* else if $HOME exists, use it;
* else if the platform is Windows, use %APPDATA%;
* else use the current directory.
:return: Path to database file.
Returns
-------
str
Path to database file.
Note
----
This code was taken from the buku project.
This function was taken from project buku.
See https://github.com/jarun/buku
* Arun Prakash Jana (jarun)
* Dmitry Marakasov (AMDmi3)
@ -41,12 +55,15 @@ def get_default_confdir():
"""
Determine the directory path where configuration will be stored.
If $XDG_CONFIG_HOME is defined, use it
else if $HOME exists, use it
else if the platform is Windows, use %APPDATA%
else use the current directory.
* If $XDG_CONFIG_HOME is defined, use it;
* else if $HOME exists, use it;
* else if the platform is Windows, use %APPDATA%;
* else use the current directory.
:return: Path to configueation directory.
Returns
-------
str
Path to configueation directory.
"""
# config_home = xdg.BaseDirectory.xdg_config_home
config_home = os.environ.get('XDG_CONFIG_HOME')
@ -67,24 +84,69 @@ async def get_value_default(key):
"""
Get settings default value.
:param key: "enabled", "interval", "quantum".
:return: Integer.
Parameters
----------
key : str
Key: enabled, filter-allow, filter-deny,
interval, quantum, random.
Returns
-------
result : int or str
Value.
"""
if key == "enabled":
result = 1
elif key == "quantum":
result = 4
elif key == "interval":
result = 30
match key:
case "enabled":
result = 1
case "filter-allow":
result = "hitler,sadam,saddam"
case "filter-deny":
result = "crim,dead,death,disaster,holocaust,murder,war"
case "interval":
result = 30
case "quantum":
result = 4
case "random":
result = 0
return result
def get_list():
"""
Get dictionary file.
Returns
-------
paths : list
Dictionary of pathnames.
"""
paths = []
cfg_dir = get_default_confdir()
if not os.path.isdir(cfg_dir):
os.mkdir(cfg_dir)
cfg_file = os.path.join(cfg_dir, r"url_paths.txt")
if not os.path.isfile(cfg_file):
# confighandler.generate_dictionary()
list = get_default_list()
file = open(cfg_file, "w")
file.writelines("\n".join(list))
file.close()
file = open(cfg_file, "r")
lines = file.readlines()
for line in lines:
paths.extend([line.strip()])
return paths
# async def generate_dictionary():
def get_default_list():
"""
Generate a dictionary file.
:return: List.
Returns
-------
paths : list
Dictionary of pathnames.
"""
paths = [
".atom",
@ -139,6 +201,8 @@ def get_default_list():
# "/rss.json",
"/rss.php",
"/rss.xml",
"/syndication.php?type=atom1.0", #mybb
"/syndication.php?type=rss2.0",
"/timeline.rss",
"/videos.atom",
# "/videos.json",

View file

@ -1,29 +1,75 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import feedparser
import aiohttp
import asyncio
import feedparser
import os
import sqlitehandler
import confighandler
import datetimehandler
import filterhandler
from http.client import IncompleteRead
from asyncio.exceptions import IncompleteReadError
from http.client import IncompleteRead
from urllib import error
from bs4 import BeautifulSoup
# from xml.etree.ElementTree import ElementTree, ParseError
from urllib.parse import urlparse
from urllib.parse import urljoin
from urllib.parse import urlsplit
from urllib.parse import urlunsplit
from lxml import html
async def download_updates(db_file):
# NOTE Perhaps this needs to be executed
# just once per program execution
async def initdb(jid, callback, message=None):
"""
Callback function to instantiate action on database.
Parameters
----------
jid : str
Jabber ID.
callback : ?
Function name.
message : str, optional
Optional kwarg when a message is a part or
required argument. The default is None.
Returns
-------
object
Coroutine object.
"""
db_dir = confighandler.get_default_dbdir()
if not os.path.isdir(db_dir):
os.mkdir(db_dir)
db_file = os.path.join(db_dir, r"{}.db".format(jid))
sqlitehandler.create_tables(db_file)
# await sqlitehandler.set_default_values(db_file)
if message:
return await callback(db_file, message)
else:
return await callback(db_file)
async def download_updates(db_file, url=None):
"""
Check feeds for new entries.
:param db_file: Database filename.
Parameters
----------
db_file : str
Path to database file.
url : str, optional
URL. The default is None.
"""
urls = await sqlitehandler.get_subscriptions(db_file)
if url:
urls = [url] # Valid [url] and [url,] and (url,)
else:
urls = await sqlitehandler.get_feeds_url(db_file)
for url in urls:
# print(os.path.basename(db_file), url[0])
source = url[0]
@ -34,31 +80,42 @@ async def download_updates(db_file):
# urls.next()
# next(urls)
continue
await sqlitehandler.update_source_status(db_file, res[1], source)
await sqlitehandler.update_source_status(
db_file,
res[1],
source
)
if res[0]:
try:
feed = feedparser.parse(res[0])
if feed.bozo:
# bozo = ("WARNING: Bozo detected for feed <{}>. "
# "For more information, visit "
# "https://pythonhosted.org/feedparser/bozo.html"
# .format(source))
# print(bozo)
bozo = (
"WARNING: Bozo detected for feed: {}\n"
"For more information, visit "
"https://pythonhosted.org/feedparser/bozo.html"
).format(source)
print(bozo)
valid = 0
else:
valid = 1
await sqlitehandler.update_source_validity(db_file, source, valid)
except (IncompleteReadError, IncompleteRead, error.URLError) as e:
print(e)
await sqlitehandler.update_source_validity(
db_file,
source,
valid)
except (
IncompleteReadError,
IncompleteRead,
error.URLError
) as e:
# print(e)
# TODO Print error to log
None
# NOTE I don't think there should be "return"
# because then we might stop scanning next URLs
# return
# TODO Place these couple of lines back down
# NOTE Need to correct the SQL statement to do so
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
if res[1] == 200:
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
# TODO Place these couple of lines back down
@ -66,25 +123,60 @@ async def download_updates(db_file):
entries = feed.entries
# length = len(entries)
# await sqlitehandler.remove_entry(db_file, source, length)
await sqlitehandler.remove_nonexistent_entries(db_file, feed, source)
new_entry = 0
await sqlitehandler.remove_nonexistent_entries(
db_file,
feed,
source
)
# new_entry = 0
for entry in entries:
if entry.has_key("id"):
eid = entry.id
if entry.has_key("title"):
title = entry.title
else:
title = feed["feed"]["title"]
if entry.has_key("link"):
link = entry.link
# link = complete_url(source, entry.link)
link = await join_url(source, entry.link)
link = await trim_url(link)
else:
link = source
exist = await sqlitehandler.check_entry_exist(db_file, title, link)
# TODO Pass date too for comparion check
if entry.has_key("published"):
date = entry.published
date = await datetimehandler.rfc2822_to_iso8601(date)
else:
date = None
exist = await sqlitehandler.check_entry_exist(
db_file,
source,
eid=eid,
title=title,
link=link,
date=date
)
if not exist:
new_entry = new_entry + 1
# new_entry = new_entry + 1
if entry.has_key("published"):
date = entry.published
date = await datetimehandler.rfc2822_to_iso8601(date)
# try:
# date = datetime.strptime(date, "%a, %d %b %Y %H:%M:%S %z")
# except:
# date = datetime.strptime(date, '%a, %d %b %Y %H:%M:%S %Z')
# finally:
# date = date.isoformat()
# if parsedate(date): # Is RFC 2822 format
# date = parsedate_to_datetime(date) # Process timestamp
# date = date.isoformat() # Convert to ISO 8601
else:
# TODO Just set date = "*** No date ***"
# date = datetime.now().isoformat()
date = await datetimehandler.now()
# NOTE Would seconds result in better database performance
# date = datetime.datetime(date)
# date = (date-datetime.datetime(1970,1,1)).total_seconds()
# TODO Enhance summary
if entry.has_key("summary"):
summary = entry.summary
@ -93,164 +185,156 @@ async def download_updates(db_file):
# TODO Limit text length
summary = summary.replace("\n\n", "\n")[:300] + " ‍⃨"
else:
summary = '*** No summary ***'
entry = (title, summary, link, source, 0);
await sqlitehandler.add_entry_and_set_date(db_file, source, entry)
summary = "*** No summary ***"
read_status = 0
pathname = urlsplit(link).path
string = (
"{} {} {}"
).format(
title,
summary,
pathname
)
allow_list = await filterhandler.is_listed(
db_file,
"allow",
string
)
if not allow_list:
reject_list = await filterhandler.is_listed(
db_file,
"deny",
string
)
if reject_list:
print(">>> REJECTED", title)
summary = "REJECTED"
# summary = ""
read_status = 1
entry = (
title,
summary,
link,
eid,
source,
date,
read_status
)
await sqlitehandler.add_entry_and_set_date(
db_file,
source,
entry
)
# print(await datetimehandler.current_time(), entry, title)
# else:
# print(await datetimehandler.current_time(), exist, title)
async def add_feed_no_check(db_file, data):
"""
Add given feed without validity check.
Parameters
----------
db_file : str
Path to database file.
data : str
URL or URL and Title.
Returns
-------
msg : str
Status message.
"""
url = data[0]
title = data[1]
url = await trim_url(url)
exist = await sqlitehandler.check_feed_exist(db_file, url)
if not exist:
msg = await sqlitehandler.add_feed(db_file, url, title)
await download_updates(db_file, [url])
else:
ix = exist[0]
name = exist[1]
msg = (
"> {}\nNews source \"{}\" is already "
"listed in the subscription list at "
"index {}".format(url, name, ix)
)
return msg
async def add_feed(db_file, url):
"""
Check whether feed exist, otherwise process it.
:param db_file: Database filename.
:param url: URL.
:return: Status message.
Parameters
----------
db_file : str
Path to database file.
url : str
URL.
Returns
-------
msg : str
Status message.
"""
msg = None
url = await trim_url(url)
exist = await sqlitehandler.check_feed_exist(db_file, url)
if not exist:
res = await download_feed(url)
if res[0]:
feed = feedparser.parse(res[0])
title = await get_title(url, feed)
if feed.bozo:
bozo = ("WARNING: Bozo detected. Failed to load <{}>.".format(url))
bozo = (
"Bozo detected. Failed to load: {}."
).format(url)
print(bozo)
try:
# tree = etree.fromstring(res[0]) # etree is for xml
tree = html.fromstring(res[0])
except:
return "Failed to parse URL <{}> as feed".format(url)
print("RSS Auto-Discovery Engaged")
xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]"""
# xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href"""
# xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href"
feeds = tree.xpath(xpath_query)
if len(feeds) > 1:
msg = "RSS Auto-Discovery has found {} feeds:\n\n".format(len(feeds))
for feed in feeds:
# # The following code works;
# # The following code will catch
# # only valid resources (i.e. not 404);
# # The following code requires more bandwidth.
# res = await download_feed(feed)
# if res[0]:
# disco = feedparser.parse(res[0])
# title = disco["feed"]["title"]
# msg += "{} \n {} \n\n".format(title, feed)
feed_name = feed.xpath('@title')[0]
feed_addr = feed.xpath('@href')[0]
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
msg += "The above feeds were extracted from\n{}".format(url)
return msg
elif feeds:
url = feeds[0].xpath('@href')[0]
# Why wouldn't add_feed return a message
# upon success unless return is explicitly
# mentioned, yet upon failure it wouldn't?
return await add_feed(db_file, url)
print("RSS Scan Mode Engaged")
feeds = {}
paths = []
# TODO Test
cfg_dir = confighandler.get_default_confdir()
if not os.path.isdir(cfg_dir):
os.mkdir(cfg_dir)
cfg_file = os.path.join(cfg_dir, r"url_paths.txt")
if not os.path.isfile(cfg_file):
# confighandler.generate_dictionary()
list = confighandler.get_default_list()
file = open(cfg_file, "w")
file.writelines("\n".join(list))
file.close()
file = open(cfg_file, "r")
lines = file.readlines()
for line in lines:
paths.extend([line.strip()])
for path in paths:
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
xpath_query = "//a[contains(@href,'{}')]".format(path)
addresses = tree.xpath(xpath_query)
parted_url = urlparse(url)
# NOTE Should number of addresses be limited or
# perhaps be N from the start and N from the end
for address in addresses:
address = address.xpath('@href')[0]
if address.startswith('/'):
address = parted_url.scheme + '://' + parted_url.netloc + address
res = await download_feed(address)
if res[1] == 200:
try:
feeds[address] = feedparser.parse(res[0])["feed"]["title"]
except:
continue
if len(feeds) > 1:
msg = "RSS URL scan has found {} feeds:\n\n".format(len(feeds))
for feed in feeds:
# try:
# res = await download_feed(feed)
# except:
# continue
feed_name = feeds[feed]
feed_addr = feed
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
msg += "The above feeds were extracted from\n{}".format(url)
return msg
elif feeds:
url = list(feeds)[0]
return await add_feed(db_file, url)
# (HTTP) Request(s) Paths
print("RSS Arbitrary Mode Engaged")
feeds = {}
parted_url = urlparse(url)
for path in paths:
address = parted_url.scheme + '://' + parted_url.netloc + path
res = await download_feed(address)
if res[1] == 200:
# print(feedparser.parse(res[0])["feed"]["title"])
# feeds[address] = feedparser.parse(res[0])["feed"]["title"]
try:
title = feedparser.parse(res[0])["feed"]["title"]
except:
title = '*** No Title ***'
feeds[address] = title
# Check whether URL has path (i.e. not root)
if parted_url.path.split('/')[1]:
paths.extend([".atom", ".feed", ".rdf", ".rss"]) if '.rss' not in paths else -1
# if paths.index('.rss'):
# paths.extend([".atom", ".feed", ".rdf", ".rss"])
address = parted_url.scheme + '://' + parted_url.netloc + '/' + parted_url.path.split('/')[1] + path
res = await download_feed(address)
if res[1] == 200:
try:
title = feedparser.parse(res[0])["feed"]["title"]
except:
title = '*** No Title ***'
feeds[address] = title
if len(feeds) > 1:
msg = "RSS URL discovery has found {} feeds:\n\n".format(len(feeds))
for feed in feeds:
feed_name = feeds[feed]
feed_addr = feed
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
msg += "The above feeds were extracted from\n{}".format(url)
elif feeds:
url = list(feeds)[0]
msg = await add_feed(db_file, url)
else:
msg = "No news feeds were found for URL <{}>.".format(url)
msg = (
"> {}\nFailed to parse URL as feed."
).format(url)
if not msg:
print("RSS Auto-Discovery Engaged")
msg = await feed_mode_auto_discovery(db_file, url, tree)
if not msg:
print("RSS Scan Mode Engaged")
msg = await feed_mode_scan(db_file, url, tree)
if not msg:
print("RSS Arbitrary Mode Engaged")
msg = await feed_mode_request(db_file, url, tree)
if not msg:
msg = (
"> {}\nNo news feeds were found for URL."
).format(url)
else:
msg = await sqlitehandler.add_feed(db_file, title, url, res)
status = res[1]
msg = await sqlitehandler.add_feed(
db_file,
url,
title,
status
)
await download_updates(db_file, [url])
else:
msg = "Failed to get URL <{}>. Reason: {}".format(url, res[1])
status = res[1]
msg = (
"> {}\nFailed to get URL. Reason: {}"
).format(url, status)
else:
ix = exist[0]
name = exist[1]
msg = "> {}\nNews source \"{}\" is already listed in the subscription list at index {}".format(url, name, ix)
msg = (
"> {}\nNews source \"{}\" is already "
"listed in the subscription list at "
"index {}".format(url, name, ix)
)
return msg
@ -258,8 +342,15 @@ async def download_feed(url):
"""
Download content of given URL.
:param url: URL.
:return: Document or error message.
Parameters
----------
url : str
URL.
Returns
-------
msg: list or str
Document or error message.
"""
timeout = aiohttp.ClientTimeout(total=10)
async with aiohttp.ClientSession() as session:
@ -271,30 +362,438 @@ async def download_feed(url):
try:
doc = await response.text()
# print (response.content_type)
return [doc, status]
msg = [
doc,
status
]
except:
# return [False, "The content of this document doesn't appear to be textual."]
return [False, "Document is too large or is not textual."]
# msg = [
# False,
# ("The content of this document "
# "doesn't appear to be textual."
# )
# ]
msg = [
False,
"Document is too large or is not textual."
]
else:
return [False, "HTTP Error: " + str(status)]
msg = [
False,
"HTTP Error: " + str(status)
]
except aiohttp.ClientError as e:
print('Error', str(e))
return [False, "Error: " + str(e)]
# print('Error', str(e))
msg = [
False,
"Error: " + str(e)
]
except asyncio.TimeoutError as e:
# print('Timeout:', str(e))
return [False, "Timeout: " + str(e)]
msg = [
False,
"Timeout: " + str(e)
]
return msg
async def get_title(url, feed):
"""
Get title of feed.
:param url: URL
:param feed: Parsed feed
:return: Title or URL hostname.
Parameters
----------
url : str
URL.
feed : dict
Parsed feed document.
Returns
-------
title : str
Title or URL hostname.
"""
try:
title = feed["feed"]["title"]
except:
title = urlparse(url).netloc
title = urlsplit(url).netloc
return title
# NOTE Read the documentation
# https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin
def complete_url(source, link):
"""
Check if URL is pathname and complete it into URL.
Parameters
----------
source : str
Feed URL.
link : str
Link URL or pathname.
Returns
-------
str
URL.
"""
if link.startswith("www."):
return "http://" + link
parted_link = urlsplit(link)
parted_feed = urlsplit(source)
if parted_link.scheme == "magnet" and parted_link.query:
return link
if parted_link.scheme and parted_link.netloc:
return link
if link.startswith("//"):
if parted_link.netloc and parted_link.path:
new_link = urlunsplit([
parted_feed.scheme,
parted_link.netloc,
parted_link.path,
parted_link.query,
parted_link.fragment
])
elif link.startswith("/"):
new_link = urlunsplit([
parted_feed.scheme,
parted_feed.netloc,
parted_link.path,
parted_link.query,
parted_link.fragment
])
elif link.startswith("../"):
pathlink = parted_link.path.split("/")
pathfeed = parted_feed.path.split("/")
for i in pathlink:
if i == "..":
if pathlink.index("..") == 0:
pathfeed.pop()
else:
break
while pathlink.count(".."):
if pathlink.index("..") == 0:
pathlink.remove("..")
else:
break
pathlink = "/".join(pathlink)
pathfeed.extend([pathlink])
new_link = urlunsplit([
parted_feed.scheme,
parted_feed.netloc,
"/".join(pathfeed),
parted_link.query,
parted_link.fragment
])
else:
pathlink = parted_link.path.split("/")
pathfeed = parted_feed.path.split("/")
if link.startswith("./"):
pathlink.remove(".")
if not source.endswith("/"):
pathfeed.pop()
pathlink = "/".join(pathlink)
pathfeed.extend([pathlink])
new_link = urlunsplit([
parted_feed.scheme,
parted_feed.netloc,
"/".join(pathfeed),
parted_link.query,
parted_link.fragment
])
return new_link
"""
TODO
Feed https://www.ocaml.org/feed.xml
Link %20https://frama-c.com/fc-versions/cobalt.html%20
FIXME
Feed https://cyber.dabamos.de/blog/feed.rss
Link https://cyber.dabamos.de/blog/#article-2022-07-15
"""
async def join_url(source, link):
"""
Join base URL with given pathname.
Parameters
----------
source : str
Feed URL.
link : str
Link URL or pathname.
Returns
-------
str
URL.
"""
if link.startswith("www."):
new_link = "http://" + link
elif link.startswith("%20") and link.endswith("%20"):
old_link = link.split("%20")
del old_link[0]
old_link.pop()
new_link = "".join(old_link)
else:
new_link = urljoin(source, link)
return new_link
async def trim_url(url):
"""
Check URL pathname for double slash.
Parameters
----------
url : str
URL.
Returns
-------
url : str
URL.
"""
parted_url = urlsplit(url)
protocol = parted_url.scheme
hostname = parted_url.netloc
pathname = parted_url.path
queries = parted_url.query
fragment = parted_url.fragment
while "//" in pathname:
pathname = pathname.replace("//", "/")
url = urlunsplit([
protocol,
hostname,
pathname,
queries,
fragment
])
return url
# TODO Improve scan by gradual decreasing of path
async def feed_mode_request(db_file, url, tree):
"""
Lookup for feeds by pathname using HTTP Requests.
Parameters
----------
db_file : str
Path to database file.
url : str
URL.
tree : TYPE
DESCRIPTION.
Returns
-------
msg : str
Message with URLs.
"""
feeds = {}
parted_url = urlsplit(url)
paths = confighandler.get_list()
for path in paths:
address = urlunsplit([
parted_url.scheme,
parted_url.netloc,
path,
None,
None
])
res = await download_feed(address)
if res[1] == 200:
# print(feedparser.parse(res[0])["feed"]["title"])
# feeds[address] = feedparser.parse(res[0])["feed"]["title"]
try:
title = feedparser.parse(res[0])["feed"]["title"]
except:
title = '*** No Title ***'
feeds[address] = title
# Check whether URL has path (i.e. not root)
if parted_url.path.split('/')[1]:
paths.extend(
[".atom", ".feed", ".rdf", ".rss"]
) if '.rss' not in paths else -1
# if paths.index('.rss'):
# paths.extend([".atom", ".feed", ".rdf", ".rss"])
address = urlunsplit([
parted_url.scheme,
parted_url.netloc,
parted_url.path.split('/')[1] + path,
None,
None
])
res = await download_feed(address)
if res[1] == 200:
try:
title = feedparser.parse(res[0])["feed"]["title"]
except:
title = '*** No Title ***'
feeds[address] = title
if len(feeds) > 1:
msg = (
"RSS URL discovery has found {} feeds:\n```\n"
).format(len(feeds))
for feed in feeds:
feed_name = feeds[feed]
feed_addr = feed
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
msg += (
"```\nThe above feeds were extracted from\n{}"
).format(url)
elif feeds:
feed_addr = list(feeds)[0]
msg = await add_feed(db_file, feed_addr)
return msg
async def feed_mode_scan(db_file, url, tree):
"""
Scan page for potential feeds by pathname.
Parameters
----------
db_file : str
Path to database file.
url : str
URL.
tree : TYPE
DESCRIPTION.
Returns
-------
msg : str
Message with URLs.
"""
feeds = {}
# paths = []
# TODO Test
paths = confighandler.get_list()
for path in paths:
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
xpath_query = "//a[contains(@href,'{}')]".format(path)
addresses = tree.xpath(xpath_query)
parted_url = urlsplit(url)
# NOTE Should number of addresses be limited or
# perhaps be N from the start and N from the end
for address in addresses:
print(address.xpath('@href')[0])
print(addresses)
address = address.xpath('@href')[0]
if "/" not in address:
protocol = parted_url.scheme
hostname = parted_url.netloc
pathname = address
address = urlunsplit([
protocol,
hostname,
pathname,
None,
None
])
if address.startswith('/'):
protocol = parted_url.scheme
hostname = parted_url.netloc
pathname = address
address = urlunsplit([
protocol,
hostname,
pathname,
None,
None
])
res = await download_feed(address)
if res[1] == 200:
try:
feeds[address] = feedparser.parse(res[0])["feed"]["title"]
print(feeds)
except:
continue
if len(feeds) > 1:
msg = (
"RSS URL scan has found {} feeds:\n```\n"
).format(len(feeds))
for feed in feeds:
# try:
# res = await download_feed(feed)
# except:
# continue
feed_name = feeds[feed]
feed_addr = feed
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
msg += (
"```\nThe above feeds were extracted from\n{}"
).format(url)
return msg
elif feeds:
feed_addr = list(feeds)[0]
msg = await add_feed(db_file, feed_addr)
return msg
async def feed_mode_auto_discovery(db_file, url, tree):
"""
Lookup for feeds using RSS autodiscovery technique.
See: https://www.rssboard.org/rss-autodiscovery
Parameters
----------
db_file : str
Path to database file.
url : str
URL.
tree : TYPE
DESCRIPTION.
Returns
-------
msg : str
Message with URLs.
"""
xpath_query = (
'//link[(@rel="alternate") and '
'(@type="application/atom+xml" or '
'@type="application/rdf+xml" or '
'@type="application/rss+xml")]'
)
# xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href"""
# xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href"
feeds = tree.xpath(xpath_query)
if len(feeds) > 1:
msg = (
"RSS Auto-Discovery has found {} feeds:\n```\n"
).format(len(feeds))
for feed in feeds:
# # The following code works;
# # The following code will catch
# # only valid resources (i.e. not 404);
# # The following code requires more bandwidth.
# res = await download_feed(feed)
# if res[0]:
# disco = feedparser.parse(res[0])
# title = disco["feed"]["title"]
# msg += "{} \n {} \n\n".format(title, feed)
feed_name = feed.xpath('@title')[0]
feed_addr = await join_url(url, feed.xpath('@href')[0])
# if feed_addr.startswith("/"):
# feed_addr = url + feed_addr
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
msg += (
"```\nThe above feeds were extracted from\n{}"
).format(url)
return msg
elif feeds:
feed_addr = await join_url(url, feeds[0].xpath('@href')[0])
# if feed_addr.startswith("/"):
# feed_addr = url + feed_addr
# NOTE Why wouldn't add_feed return a message
# upon success unless return is explicitly
# mentioned, yet upon failure it wouldn't?
# return await add_feed(db_file, feed_addr)
msg = await add_feed(db_file, feed_addr)
return msg

View file

@ -0,0 +1,81 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
https://feedparser.readthedocs.io/en/latest/date-parsing.html
"""
from datetime import datetime
from dateutil.parser import parse
from email.utils import parsedate
from email.utils import parsedate_to_datetime
async def now():
"""
ISO 8601 Timestamp.
Returns
-------
date : ?
ISO 8601 Timestamp.
"""
date = datetime.now().isoformat()
return date
async def current_time():
"""
Print HH:MM:SS timestamp.
Returns
-------
date : ?
HH:MM:SS timestamp.
"""
now = datetime.now()
time = now.strftime("%H:%M:%S")
return time
async def validate(date):
"""
Validate date format.
Parameters
----------
date : str
Timestamp.
Returns
-------
date : str
Timestamp.
"""
try:
parse(date)
except:
date = now()
return date
async def rfc2822_to_iso8601(date):
"""
Convert RFC 2822 into ISO 8601.
Parameters
----------
date : str
RFC 2822 Timestamp.
Returns
-------
date : str
ISO 8601 Timestamp.
"""
if parsedate(date):
try:
date = parsedate_to_datetime(date)
date = date.isoformat()
except:
date = now()
return date

105
slixfeed/filterhandler.py Normal file
View file

@ -0,0 +1,105 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
TODO
1) Website-specific filter (i.e. audiobookbay).
2) Exclude websites from filtering (e.g. metapedia).
3) Filter phrases:
Refer to sqlitehandler.search_entries for implementation.
It is expected to be more complex than function search_entries.
"""
import sqlitehandler
async def set_filter(newwords, keywords):
"""
Append new keywords to filter.
Parameters
----------
newwords : str
List of new keywords.
keywords : str
List of current keywords.
Returns
-------
val : str
List of current keywords and new keywords.
"""
try:
keywords = keywords.split(",")
except:
keywords = []
newwords = newwords.lower().split(",")
for word in newwords:
word = word.strip()
if len(word) and word not in keywords:
keywords.extend([word])
keywords.sort()
val = ",".join(keywords)
return val
async def is_listed(db_file, type, string):
# async def reject(db_file, string):
# async def is_blacklisted(db_file, string):
filter_type = "filter-" + type
list = await sqlitehandler.get_settings_value(
db_file,
filter_type
)
if list:
list = list.split(",")
for i in list:
if not i or len(i) < 2:
continue
if i in string.lower():
print(">>> ACTIVATE", i)
return 1
else:
return None
"""
This code was tested at module datahandler
reject = 0
blacklist = await sqlitehandler.get_settings_value(
db_file,
"filter-deny"
)
# print(">>> blacklist:")
# print(blacklist)
# breakpoint()
if blacklist:
blacklist = blacklist.split(",")
# print(">>> blacklist.split")
# print(blacklist)
# breakpoint()
for i in blacklist:
# print(">>> length", len(i))
# breakpoint()
# if len(i):
if not i or len(i) < 2:
print(">>> continue due to length", len(i))
# breakpoint()
continue
# print(title)
# print(">>> blacklisted word:", i)
# breakpoint()
test = (title + " " + summary + " " + link)
if i in test.lower():
reject = 1
break
if reject:
print("rejected:",title)
entry = (title, '', link, source, date, 1);
"""

56
slixfeed/opmlhandler.py Normal file
View file

@ -0,0 +1,56 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
{
'bozo': False,
'bozo_exception': None,
'feeds': [
{
'url': 'https://kurtmckee.org/tag/listparser/feed',
'title': 'listparser blog',
'categories': [],
'tags': []
},
{
'url': 'https://github.com/kurtmckee/listparser/commits/develop.atom',
'title': 'listparser changelog',
'categories': [],
'tags': []
}
],
'lists': [],
'opportunities': [],
'meta': {
'title': 'listparser project feeds',
'author': {
'name': 'Kurt McKee',
'email': 'contactme@kurtmckee.org',
'url': 'https://kurtmckee.org/'
}
},
'version': 'opml2'
}
"""
import listparser
import lxml
import sqlitehandler
import datahandler
async def import_opml(db_file, opml_doc):
feeds = listparser.parse(opml_doc)['feeds']
for feed in feeds:
url = feed['url']
title = feed['title']
# categories = feed['categories']
# tags = feed['tags']
await datahandler.add_feed_no_check(db_file, [url, title])
# NOTE Use OPyML or LXML
async def export_opml():
result = await sqlitehandler.get_feeds()

File diff suppressed because it is too large Load diff

View file

@ -1,38 +1,56 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from datetime import datetime
"""
TODO
1) Deprecate "add" (see above) and make it interactive.
Slixfeed: Do you still want to add this URL to subscription list?
See: case _ if message_lowercase.startswith("add"):
2) Use loop (with gather) instead of TaskGroup
"""
import asyncio
import os
import slixmpp
from slixmpp.plugins.xep_0363.http_upload import FileTooBig, HTTPError, UploadServiceNotFound
import confighandler
import datahandler
import datetimehandler
import filterhandler
import sqlitehandler
main_task = []
jid_tasker = {}
task_manager = {}
loop = asyncio.get_event_loop()
# asyncio.set_event_loop(loop)
time_now = datetime.now()
# time_now = datetime.now()
# time_now = time_now.strftime("%H:%M:%S")
def print_time():
# return datetime.now().strftime("%H:%M:%S")
now = datetime.now()
current_time = now.strftime("%H:%M:%S")
return current_time
# def print_time():
# # return datetime.now().strftime("%H:%M:%S")
# now = datetime.now()
# current_time = now.strftime("%H:%M:%S")
# return current_time
async def handle_event():
print("Event handled!")
class Slixfeed(slixmpp.ClientXMPP):
"""
Slixmpp news bot that will send updates
from feeds it receives.
Slixmpp
-------
News bot that sends updates from RSS feeds.
"""
print("slixmpp.ClientXMPP")
print(repr(slixmpp.ClientXMPP))
def __init__(self, jid, password):
slixmpp.ClientXMPP.__init__(self, jid, password)
@ -52,7 +70,7 @@ class Slixfeed(slixmpp.ClientXMPP):
self.add_event_handler("message", self.message)
self.add_event_handler("disconnected", self.reconnect)
# Initialize event loop
self.loop = asyncio.get_event_loop()
# self.loop = asyncio.get_event_loop()
async def start(self, event):
@ -70,116 +88,316 @@ class Slixfeed(slixmpp.ClientXMPP):
"""
self.send_presence()
await self.get_roster()
await self.select_file()
self.send_presence(
pshow="away",
pstatus="Slixmpp has been restarted.",
pto="sch@pimux.de"
)
# for task in main_task:
# task.cancel()
if not main_task:
await self.select_file()
async def message(self, msg):
"""
Process incoming message stanzas. Be aware that this also
includes MUC messages and error messages. It is usually
a good idea to check the messages's type before processing
or sending replies.
a good practice to check the messages's type before
processing or sending replies.
Arguments:
msg -- The received message stanza. See the documentation
for stanza objects and the Message stanza to see
how it may be used.
Parameters
----------
self : ?
Self.
msg : str
The received message stanza. See the documentation
for stanza objects and the Message stanza to see
how it may be used.
"""
if msg["type"] in ("chat", "normal"):
action = 0
jid = msg["from"].bare
db_dir = confighandler.get_default_dbdir()
os.chdir(db_dir)
if jid + ".db" not in os.listdir():
await self.task_jid(jid)
message = " ".join(msg["body"].split())
message = message.lower()
if message.startswith("help"):
action = print_help()
# NOTE: Might not need it
# elif message.startswith("add "):
# url = message[4:]
elif message.startswith("http"):
url = message
action = await initdb(jid, datahandler.add_feed, url)
# action = "> " + message + "\n" + action
elif message.startswith("quantum "):
key = message[:7]
val = message[8:]
# action = "Every update will contain {} news items.".format(action)
action = await initdb(jid, sqlitehandler.set_settings_value, [key, val])
await self.refresh_task(jid, key, val)
elif message.startswith("interval "):
key = message[:8]
val = message[9:]
# action = "Updates will be sent every {} minutes.".format(action)
action = await initdb(jid, sqlitehandler.set_settings_value, [key, val])
await self.refresh_task(jid, key, val)
elif message.startswith("list"):
action = await initdb(jid, sqlitehandler.list_subscriptions)
elif message.startswith("recent "):
num = message[7:]
action = await initdb(jid, sqlitehandler.last_entries, num)
elif message.startswith("remove "):
ix = message[7:]
action = await initdb(jid, sqlitehandler.remove_feed, ix)
elif message.startswith("search "):
query = message[7:]
action = await initdb(jid, sqlitehandler.search_entries, query)
elif message.startswith("start"):
# action = "Updates are enabled."
key = "enabled"
val = 1
actiona = await initdb(jid, sqlitehandler.set_settings_value, [key, val])
asyncio.create_task(self.task_jid(jid))
# print(print_time(), "task_manager[jid]")
# print(task_manager[jid])
elif message.startswith("stats"):
action = await initdb(jid, sqlitehandler.statistics)
elif message.startswith("status "):
ix = message[7:]
action = await initdb(jid, sqlitehandler.toggle_status, ix)
elif message.startswith("stop"):
message_lowercase = message.lower()
print(await datetimehandler.current_time(), "ACCOUNT: " + str(msg["from"]))
print(await datetimehandler.current_time(), "COMMAND:", message)
match message_lowercase:
case "help":
action = print_help()
case _ if message_lowercase in ["greetings", "hello", "hey"]:
action = (
"Greeting! I'm Slixfeed The News Bot!"
"\n"
"Send a URL of a news website to start."
)
case _ if message_lowercase.startswith("add"):
message = message[4:]
url = message.split(" ")[0]
title = " ".join(message.split(" ")[1:])
if url.startswith("http"):
action = await datahandler.initdb(
jid,
datahandler.add_feed_no_check,
[url, title]
)
await self.send_status(jid)
else:
action = "Missing URL."
case _ if message_lowercase.startswith("allow"):
key = "filter-" + message[:5]
val = message[6:]
if val:
keywords = await datahandler.initdb(
jid,
sqlitehandler.get_settings_value,
key
)
val = await filterhandler.set_filter(
val,
keywords
)
await datahandler.initdb(
jid,
sqlitehandler.set_settings_value,
[key, val]
)
action = (
"Approved keywords\n"
"```\n{}\n```"
).format(val)
else:
action = "Missing keywords."
case _ if message_lowercase.startswith("deny"):
key = "filter-" + message[:4]
val = message[5:]
if val:
keywords = await datahandler.initdb(
jid,
sqlitehandler.get_settings_value,
key
)
val = await filterhandler.set_filter(
val,
keywords
)
await datahandler.initdb(
jid,
sqlitehandler.set_settings_value,
[key, val]
)
action = (
"Rejected keywords\n"
"```\n{}\n```"
).format(val)
else:
action = "Missing keywords."
case _ if message_lowercase.startswith("http"):
url = message
action = await datahandler.initdb(
jid,
datahandler.add_feed,
url
)
# action = "> " + message + "\n" + action
await self.send_status(jid)
case _ if message_lowercase.startswith("feeds"):
query = message[6:]
if query:
if len(query) > 3:
action = await datahandler.initdb(
jid,
sqlitehandler.search_feeds,
query
)
else:
action = (
"Enter at least 4 characters to search"
)
else:
action = await datahandler.initdb(
jid,
sqlitehandler.list_feeds
)
case _ if message_lowercase.startswith("interval"):
# FIXME
# The following error occurs only upon first attempt to set interval.
# /usr/lib/python3.11/asyncio/events.py:73: RuntimeWarning: coroutine 'Slixfeed.send_update' was never awaited
# self._args = None
# RuntimeWarning: Enable tracemalloc to get the object allocation traceback
key = message[:8]
val = message[9:]
if val:
# action = (
# "Updates will be sent every {} minutes."
# ).format(action)
await datahandler.initdb(
jid,
sqlitehandler.set_settings_value,
[key, val]
)
await self.refresh_task(
jid,
self.send_update,
key,
val
)
action = (
"Updates will be sent every {} minutes."
).format(val)
else:
action = "Missing value."
case _ if message_lowercase.startswith("next"):
num = message[5:]
await self.send_update(jid, num)
await self.send_status(jid)
# await self.refresh_task(jid, key, val)
case _ if message_lowercase.startswith("quantum"):
key = message[:7]
val = message[8:]
if val:
# action = (
# "Every update will contain {} news items."
# ).format(action)
await datahandler.initdb(
jid,
sqlitehandler.set_settings_value,
[key, val]
)
action = (
"Next update will contain {} news items."
).format(val)
else:
action = "Missing value."
case _ if message_lowercase.startswith("random"):
action = "Updates will be sent randomly."
case _ if message_lowercase.startswith("recent"):
num = message[7:]
if num:
action = await datahandler.initdb(
jid,
sqlitehandler.last_entries,
num
)
else:
action = "Missing value."
case _ if message_lowercase.startswith("remove"):
ix = message[7:]
if ix:
action = await datahandler.initdb(
jid,
sqlitehandler.remove_feed,
ix
)
await self.send_status(jid)
else:
action = "Missing feed ID."
case _ if message_lowercase.startswith("search"):
query = message[7:]
if query:
if len(query) > 1:
action = await datahandler.initdb(
jid,
sqlitehandler.search_entries,
query
)
else:
action = (
"Enter at least 2 characters to search"
)
else:
action = "Missing search query."
case "start":
# action = "Updates are enabled."
key = "enabled"
val = 1
await datahandler.initdb(
jid,
sqlitehandler.set_settings_value,
[key, val]
)
asyncio.create_task(self.task_jid(jid))
action = "Updates are enabled."
# print(await datetimehandler.current_time(), "task_manager[jid]")
# print(task_manager[jid])
case "stats":
action = await datahandler.initdb(
jid,
sqlitehandler.statistics
)
case _ if message_lowercase.startswith("status "):
ix = message[7:]
action = await datahandler.initdb(
jid,
sqlitehandler.toggle_status,
ix
)
case "stop":
# FIXME
# The following error occurs only upon first attempt to stop.
# /usr/lib/python3.11/asyncio/events.py:73: RuntimeWarning: coroutine 'Slixfeed.send_update' was never awaited
# self._args = None
# RuntimeWarning: Enable tracemalloc to get the object allocation traceback
# action = "Updates are disabled."
try:
task_manager[jid]["check"].cancel()
# task_manager[jid]["status"].cancel()
task_manager[jid]["interval"].cancel()
# try:
# # task_manager[jid]["check"].cancel()
# # task_manager[jid]["status"].cancel()
# task_manager[jid]["interval"].cancel()
# key = "enabled"
# val = 0
# action = await datahandler.initdb(
# jid,
# sqlitehandler.set_settings_value,
# [key, val]
# )
# except:
# action = "Updates are already disabled."
# # print("Updates are already disabled. Nothing to do.")
# # await self.send_status(jid)
key = "enabled"
val = 0
actiona = await initdb(jid, sqlitehandler.set_settings_value, [key, val])
await self.send_status(jid)
print(print_time(), "task_manager[jid]")
print(task_manager[jid])
except:
# action = "Updates are already disabled."
await self.send_status(jid)
else:
action = "Unknown command. Press \"help\" for list of commands"
await datahandler.initdb(
jid,
sqlitehandler.set_settings_value,
[key, val]
)
await self.task_jid(jid)
action = "Updates are disabled."
case "support":
# TODO Send an invitation.
action = "xmpp:slixmpp@muc.poez.io?join"
case _:
action = (
"Unknown command. "
"Press \"help\" for list of commands"
)
if action: msg.reply(action).send()
print(print_time(), "COMMAND ACCOUNT")
print("COMMAND:", message)
print("ACCOUNT: " + str(msg["from"]))
async def select_file(self):
"""
Initiate actions by JID (Jabber ID).
:param self: Self
Parameters
----------
self : ?
Self.
"""
while True:
db_dir = confighandler.get_default_dbdir()
if not os.path.isdir(db_dir):
msg = ("Slixfeed can not work without a database. \n"
"To create a database, follow these steps: \n"
"Add Slixfeed contact to your roster \n"
"Send a feed to the bot by: \n"
"add https://reclaimthenet.org/feed/")
print(print_time(), msg)
msg = (
"Slixfeed can not work without a database.\n"
"To create a database, follow these steps:\n"
"Add Slixfeed contact to your roster.\n"
"Send a feed to the bot by URL:\n"
"https://reclaimthenet.org/feed/"
)
# print(await datetimehandler.current_time(), msg)
print(msg)
else:
os.chdir(db_dir)
@ -191,114 +409,165 @@ class Slixfeed(slixmpp.ClientXMPP):
# jid_tasker[jid] = asyncio.create_task(self.task_jid(jid))
# await jid_tasker[jid]
async with asyncio.TaskGroup() as tg:
print("main task")
print(print_time(), "repr(tg)")
print(repr(tg)) # <TaskGroup entered>
for file in files:
if file.endswith(".db") and not file.endswith(".db-jour.db"):
jid = file[:-3]
tg.create_task(self.task_jid(jid))
main_task.extend([tg.create_task(self.task_jid(jid))])
# main_task = [tg.create_task(self.task_jid(jid))]
# task_manager.update({jid: tg})
# print(task_manager) # {}
print(print_time(), "repr(tg) id(tg)")
print(jid, repr(tg)) # sch@pimux.de <TaskGroup tasks=1 entered>
print(jid, id(tg)) # sch@pimux.de 139879835500624
# <xmpphandler.Slixfeed object at 0x7f24922124d0> <TaskGroup tasks=2 entered>
# <xmpphandler.Slixfeed object at 0x7f24922124d0> 139879835500624
async def task_jid(self, jid):
"""
JID (Jabber ID) task manager.
:param self: Self
:param jid: Jabber ID
Parameters
----------
self : ?
Self.
jid : str
Jabber ID.
"""
enabled = await initdb(
enabled = await datahandler.initdb(
jid,
sqlitehandler.get_settings_value,
"enabled"
)
print(print_time(), "enabled", enabled, jid)
# print(await datetimehandler.current_time(), "enabled", enabled, jid)
if enabled:
print("sub task")
print(print_time(), "repr(self) id(self)")
print(repr(self))
print(id(self))
task_manager[jid] = {}
task_manager[jid]["check"] = asyncio.create_task(check_updates(jid))
task_manager[jid]["status"] = asyncio.create_task(self.send_status(jid))
task_manager[jid]["interval"] = asyncio.create_task(self.send_update(jid))
task_manager[jid]["check"] = asyncio.create_task(
check_updates(jid)
)
task_manager[jid]["status"] = asyncio.create_task(
self.send_status(jid)
)
task_manager[jid]["interval"] = asyncio.create_task(
self.send_update(jid)
)
await task_manager[jid]["check"]
await task_manager[jid]["status"]
await task_manager[jid]["interval"]
print(print_time(), "task_manager[jid].items()")
print(task_manager[jid].items())
print(print_time(), "task_manager[jid]")
print(task_manager[jid])
print(print_time(), "task_manager")
print(task_manager)
else:
# FIXME
# The following error occurs only upon first attempt to stop.
# /usr/lib/python3.11/asyncio/events.py:73: RuntimeWarning: coroutine 'Slixfeed.send_update' was never awaited
# self._args = None
# RuntimeWarning: Enable tracemalloc to get the object allocation traceback
try:
task_manager[jid]["interval"].cancel()
except:
None
await self.send_status(jid)
async def send_update(self, jid):
async def send_update(self, jid, num=None):
"""
Send news items as messages.
:param self: Self
:param jid: Jabber ID
Parameters
----------
self : ?
Self.
jid : str
Jabber ID.
num : str, optional
Number. The default is None.
"""
new = await initdb(
# print("Starting send_update()")
# print(jid)
new = await datahandler.initdb(
jid,
sqlitehandler.get_entry_unread
sqlitehandler.get_entry_unread,
num
)
if new:
print(print_time(), "> SEND UPDATE",jid)
print(await datetimehandler.current_time(), "> SEND UPDATE",jid)
self.send_message(
mto=jid,
mbody=new,
mtype="chat"
)
interval = await initdb(
await self.refresh_task(
jid,
sqlitehandler.get_settings_value,
self.send_update,
"interval"
)
)
# interval = await datahandler.initdb(
# jid,
# sqlitehandler.get_settings_value,
# "interval"
# )
# task_manager[jid]["interval"] = loop.call_at(
# loop.time() + 60 * interval,
# loop.create_task,
# self.send_update(jid)
# )
# print(await datetimehandler.current_time(), "asyncio.get_event_loop().time()")
# print(await datetimehandler.current_time(), asyncio.get_event_loop().time())
# await asyncio.sleep(60 * interval)
self.loop.call_at(
self.loop.time() + 60 * interval,
self.loop.create_task,
self.send_update(jid)
)
# loop.call_later(
# 60 * interval,
# loop.create_task,
# self.send_update(jid)
# )
# print
# await handle_event()
async def send_status(self, jid):
"""
Send status message.
:param self: Self
:param jid: Jabber ID
Parameters
----------
self : ?
Self.
jid : str
Jabber ID.
"""
print(print_time(), "> SEND STATUS",jid)
unread = await initdb(
jid,
sqlitehandler.get_number_of_entries_unread
)
if unread:
status_text = "📰 News items: {}".format(str(unread))
status_mode = "chat"
else:
status_text = "🗞 No News"
status_mode = "available"
enabled = await initdb(
print(await datetimehandler.current_time(), "> SEND STATUS",jid)
enabled = await datahandler.initdb(
jid,
sqlitehandler.get_settings_value,
"enabled"
)
if not enabled:
status_mode = "xa"
status_text = "Send \"Start\" to receive news."
else:
feeds = await datahandler.initdb(
jid,
sqlitehandler.get_number_of_items,
"feeds"
)
if not feeds:
status_mode = "available"
status_text = (
"📂️ Send a URL from a blog or a news website."
)
else:
unread = await datahandler.initdb(
jid,
sqlitehandler.get_number_of_entries_unread
)
if unread:
status_mode = "chat"
status_text = (
"📰 You have {} news items to read."
).format(str(unread))
# status_text = (
# "📰 News items: {}"
# ).format(str(unread))
# status_text = (
# "📰 You have {} news items"
# ).format(str(unread))
else:
status_mode = "available"
status_text = "🗞 No news"
# print(status_text, "for", jid)
self.send_presence(
@ -306,37 +575,55 @@ class Slixfeed(slixmpp.ClientXMPP):
pstatus=status_text,
pto=jid,
#pfrom=None
)
await asyncio.sleep(60 * 20)
# self.loop.call_at(
# self.loop.time() + 60 * 20,
# self.loop.create_task,
)
# await asyncio.sleep(60 * 20)
await self.refresh_task(
jid,
self.send_status,
"status",
"20"
)
# loop.call_at(
# loop.time() + 60 * 20,
# loop.create_task,
# self.send_status(jid)
# )
async def refresh_task(self, jid, key, val):
async def refresh_task(self, jid, callback, key, val=None):
"""
Apply settings on runtime.
Apply new setting at runtime.
:param self: Self
:param jid: Jabber ID
:param key: Key
:param val: Value
Parameters
----------
self : ?
Self.
jid : str
Jabber ID.
key : str
Key.
val : str, optional
Value. The default is None.
"""
if not val:
val = await datahandler.initdb(
jid,
sqlitehandler.get_settings_value,
key
)
if jid in task_manager:
task_manager[jid][key].cancel()
loop = asyncio.get_event_loop()
print(print_time(), "loop")
print(loop)
print(print_time(), "loop")
task_manager[jid][key] = loop.call_at(
loop.time() + 60 * float(val),
loop.create_task,
self.send_update(jid)
callback(jid)
# self.send_update(jid)
)
# task_manager[jid][key] = loop.call_later(
# 60 * float(val),
# loop.create_task,
# self.send_update(jid)
# )
# task_manager[jid][key] = self.send_update.loop.call_at(
# self.send_update.loop.time() + 60 * val,
# self.send_update.loop.create_task,
@ -350,16 +637,19 @@ async def check_updates(jid):
"""
Start calling for update check up.
:param jid: Jabber ID
Parameters
----------
jid : str
Jabber ID.
"""
while True:
print(print_time(), "> CHCK UPDATE",jid)
await initdb(jid, datahandler.download_updates)
print(await datetimehandler.current_time(), "> CHCK UPDATE",jid)
await datahandler.initdb(jid, datahandler.download_updates)
await asyncio.sleep(60 * 90)
# Schedule to call this function again in 90 minutes
# self.loop.call_at(
# self.loop.time() + 60 * 90,
# self.loop.create_task,
# loop.call_at(
# loop.time() + 60 * 90,
# loop.create_task,
# self.check_updates(jid)
# )
@ -367,84 +657,123 @@ async def check_updates(jid):
def print_help():
"""
Print help manual.
Returns
-------
msg : str
Message.
"""
msg = ("Slixfeed - News syndication bot for Jabber/XMPP \n"
"\n"
"DESCRIPTION: \n"
" Slixfeed is a news aggregator bot for online news feeds. \n"
" Supported filetypes: Atom, RDF and RSS. \n"
"\n"
"BASIC USAGE: \n"
" start \n"
" Enable bot and send updates. \n"
" Stop \n"
" Disable bot and stop updates. \n"
" batch N \n"
" Send N updates for each interval. \n"
" interval N \n"
" Send an update every N minutes. \n"
" feed list \n"
" List subscriptions. \n"
"\n"
"EDIT OPTIONS: \n"
" add URL \n"
" Add URL to subscription list. \n"
" remove ID \n"
" Remove feed from subscription list. \n"
" status ID \n"
" Toggle update status of feed. \n"
"\n"
"SEARCH OPTIONS: \n"
" search TEXT \n"
" Search news items by given keywords. \n"
" recent N \n"
" List recent N news items (up to 50 items). \n"
"\n"
"STATISTICS OPTIONS: \n"
" analyses \n"
" Show report and statistics of feeds. \n"
" obsolete \n"
" List feeds that are not available. \n"
" unread \n"
" Print number of unread news items. \n"
"\n"
"BACKUP OPTIONS: \n"
" export opml \n"
" Send an OPML file with your feeds. \n"
" backup news html\n"
" Send an HTML formatted file of your news items. \n"
" backup news md \n"
" Send a Markdown file of your news items. \n"
" backup news text \n"
" Send a Plain Text file of your news items. \n"
"\n"
"DOCUMENTATION: \n"
" Slixfeed \n"
" https://gitgud.io/sjehuda/slixfeed \n"
" Slixmpp \n"
" https://slixmpp.readthedocs.io/ \n"
" feedparser \n"
" https://pythonhosted.org/feedparser")
msg = (
"```\n"
"NAME\n"
"Slixfeed - News syndication bot for Jabber/XMPP\n"
"\n"
"DESCRIPTION\n"
" Slixfeed is a news aggregator bot for online news feeds.\n"
" This program is primarily designed for XMPP.\n"
" For more information, visit https://xmpp.org/software/\n"
"\n"
"BASIC USAGE\n"
" start\n"
" Enable bot and send updates.\n"
" stop\n"
" Disable bot and stop updates.\n"
" feeds\n"
" List subscriptions.\n"
" interval N\n"
" Set interval update to every N minutes.\n"
" next N\n"
" Send N next updates.\n"
" quantum N\n"
" Set N updates for each interval.\n"
"\n"
"FILTER OPTIONS\n"
" allow\n"
" Keywords to allow (comma separates).\n"
" deny\n"
" Keywords to block (comma separates).\n"
# " filter clear allow\n"
# " Reset allow list.\n"
# " filter clear deny\n"
# " Reset deny list.\n"
"\n"
"EDIT OPTIONS\n"
" URL\n"
" Add URL to subscription list.\n"
" add URL TITLE\n"
" Add URL to subscription list (without validity check).\n"
" remove ID\n"
" Remove feed from subscription list.\n"
" status ID\n"
" Toggle update status of feed.\n"
"\n"
"SEARCH OPTIONS\n"
" feeds TEXT\n"
" Search subscriptions by given keywords.\n"
" search TEXT\n"
" Search news items by given keywords.\n"
" recent N\n"
" List recent N news items (up to 50 items).\n"
"\n"
# "STATISTICS OPTIONS\n"
# " analyses\n"
# " Show report and statistics of feeds.\n"
# " obsolete\n"
# " List feeds that are not available.\n"
# " unread\n"
# " Print number of unread news items.\n"
# "\n"
# "BACKUP OPTIONS\n"
# " export opml\n"
# " Send an OPML file with your feeds.\n"
# " backup news html\n"
# " Send an HTML formatted file of your news items.\n"
# " backup news md\n"
# " Send a Markdown file of your news items.\n"
# " backup news text\n"
# " Send a Plain Text file of your news items.\n"
# "\n"
"SUPPORT\n"
" support"
" Join xmpp:slixmpp@muc.poez.io?join\n"
"\n"
# "PROTOCOLS\n"
# " Supported prootcols are IRC, Matrix and XMPP.\n"
# " For the best experience, we recommend you to use XMPP.\n"
# "\n"
"FILETYPES\n"
" Supported filetypes are Atom, RDF and RSS.\n"
"\n"
"AUTHORS\n"
" Laura Harbinger, Schimon Zackary.\n"
"\n"
"COPYRIGHT\n"
" Slixfeed is free software; you can redistribute it and/or\n"
" modify it under the terms of the GNU General Public License\n"
" as published by the Free Software Foundation; version 3 only\n"
"\n"
" Slixfeed is distributed in the hope that it will be useful,\n"
" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
" GNU General Public License for more details.\n"
"\n"
"NOTE\n"
" Make Slixfeed your own.\n"
"\n"
" You can run Slixfeed on your own computer, server, and\n"
" even on a Linux phone (i.e. Droidian, Mobian NixOS,\n"
" postmarketOS). You can also use Termux.\n"
"\n"
" All you need is one of the above and an XMPP account to\n"
" connect Slixfeed to.\n"
"\n"
"DOCUMENTATION\n"
" Slixfeed\n"
" https://gitgud.io/sjehuda/slixfeed\n"
" Slixmpp\n"
" https://slixmpp.readthedocs.io/\n"
" feedparser\n"
" https://pythonhosted.org/feedparser\n"
"\n```"
)
return msg
# TODO Perhaps this needs to be executed
# just once per program execution
async def initdb(jid, callback, message=None):
"""
Callback function to instantiate action on database.
:param jid: JID (Jabber ID).
:param callback: Function name.
:param massage: Optional kwarg when a message is a part or required argument.
"""
db_dir = confighandler.get_default_dbdir()
if not os.path.isdir(db_dir):
os.mkdir(db_dir)
db_file = os.path.join(db_dir, r"{}.db".format(jid))
sqlitehandler.create_tables(db_file)
# await sqlitehandler.set_default_values(db_file)
if message:
return await callback(db_file, message)
else:
return await callback(db_file)