forked from sch/Slixfeed
Update 8 files
- /slixfeed/sqlitehandler.py - /slixfeed/xmpphandler.py - /slixfeed/opmlhandler.py - /slixfeed/datahandler.py - /slixfeed/datetimehandler.py - /slixfeed/__main__.py - /slixfeed/confighandler.py - /slixfeed/filterhandler.py
This commit is contained in:
parent
9d6a211d36
commit
031eb6ce53
8 changed files with 2535 additions and 751 deletions
|
@ -1,13 +1,42 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# TODO
|
||||
#
|
||||
# 0) sql prepared statements
|
||||
# 1) Autodetect feed:
|
||||
# if page is not feed (or HTML) and contains <link rel="alternate">
|
||||
# 2) OPML import/export
|
||||
# 3) 2022-12-30 reduce async to (maybe) prevent inner lock. async on task: commands, downloader, updater
|
||||
"""
|
||||
|
||||
FIXME
|
||||
|
||||
1) Check feed duplication on runtime.
|
||||
When feed is valid and is not yet in the database it is
|
||||
posible to send a batch which would result in duplication.
|
||||
Consequently, it might result in database lock error upon
|
||||
feed removal attempt
|
||||
|
||||
TODO
|
||||
|
||||
1) SQL prepared statements
|
||||
|
||||
2) Machine Learning for scrapping Title, Link, Summary and Timstamp
|
||||
|
||||
3) Support MUC
|
||||
|
||||
4) Support categories
|
||||
|
||||
5) Default prepackaged list of feeds
|
||||
|
||||
6) XMPP commands
|
||||
|
||||
7) Bot as transport
|
||||
|
||||
8) OMEMO
|
||||
|
||||
9) Logging
|
||||
|
||||
10) Default feeds (e.g. Blacklisted News, TBOT etc.)
|
||||
|
||||
11) Download and upload/send article (xHTML, xHTMLZ, Markdown, MHTML, TXT)
|
||||
Use Readability
|
||||
|
||||
"""
|
||||
|
||||
# vars and their meanings:
|
||||
# jid = Jabber ID (XMPP)
|
||||
|
|
|
@ -1,6 +1,15 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
|
||||
TODO
|
||||
|
||||
1) Use file settings.csv and pathnames.txt instead:
|
||||
See get_value_default and get_default_list
|
||||
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
@ -8,16 +17,21 @@ def get_default_dbdir():
|
|||
"""
|
||||
Determine the directory path where dbfile will be stored.
|
||||
|
||||
If $XDG_DATA_HOME is defined, use it
|
||||
else if $HOME exists, use it
|
||||
else if the platform is Windows, use %APPDATA%
|
||||
else use the current directory.
|
||||
* If $XDG_DATA_HOME is defined, use it;
|
||||
* else if $HOME exists, use it;
|
||||
* else if the platform is Windows, use %APPDATA%;
|
||||
* else use the current directory.
|
||||
|
||||
:return: Path to database file.
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Path to database file.
|
||||
|
||||
Note
|
||||
----
|
||||
This code was taken from the buku project.
|
||||
This function was taken from project buku.
|
||||
|
||||
See https://github.com/jarun/buku
|
||||
|
||||
* Arun Prakash Jana (jarun)
|
||||
* Dmitry Marakasov (AMDmi3)
|
||||
|
@ -41,12 +55,15 @@ def get_default_confdir():
|
|||
"""
|
||||
Determine the directory path where configuration will be stored.
|
||||
|
||||
If $XDG_CONFIG_HOME is defined, use it
|
||||
else if $HOME exists, use it
|
||||
else if the platform is Windows, use %APPDATA%
|
||||
else use the current directory.
|
||||
* If $XDG_CONFIG_HOME is defined, use it;
|
||||
* else if $HOME exists, use it;
|
||||
* else if the platform is Windows, use %APPDATA%;
|
||||
* else use the current directory.
|
||||
|
||||
:return: Path to configueation directory.
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Path to configueation directory.
|
||||
"""
|
||||
# config_home = xdg.BaseDirectory.xdg_config_home
|
||||
config_home = os.environ.get('XDG_CONFIG_HOME')
|
||||
|
@ -67,24 +84,69 @@ async def get_value_default(key):
|
|||
"""
|
||||
Get settings default value.
|
||||
|
||||
:param key: "enabled", "interval", "quantum".
|
||||
:return: Integer.
|
||||
Parameters
|
||||
----------
|
||||
key : str
|
||||
Key: enabled, filter-allow, filter-deny,
|
||||
interval, quantum, random.
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : int or str
|
||||
Value.
|
||||
"""
|
||||
if key == "enabled":
|
||||
result = 1
|
||||
elif key == "quantum":
|
||||
result = 4
|
||||
elif key == "interval":
|
||||
result = 30
|
||||
match key:
|
||||
case "enabled":
|
||||
result = 1
|
||||
case "filter-allow":
|
||||
result = "hitler,sadam,saddam"
|
||||
case "filter-deny":
|
||||
result = "crim,dead,death,disaster,holocaust,murder,war"
|
||||
case "interval":
|
||||
result = 30
|
||||
case "quantum":
|
||||
result = 4
|
||||
case "random":
|
||||
result = 0
|
||||
return result
|
||||
|
||||
|
||||
def get_list():
|
||||
"""
|
||||
Get dictionary file.
|
||||
|
||||
Returns
|
||||
-------
|
||||
paths : list
|
||||
Dictionary of pathnames.
|
||||
"""
|
||||
paths = []
|
||||
cfg_dir = get_default_confdir()
|
||||
if not os.path.isdir(cfg_dir):
|
||||
os.mkdir(cfg_dir)
|
||||
cfg_file = os.path.join(cfg_dir, r"url_paths.txt")
|
||||
if not os.path.isfile(cfg_file):
|
||||
# confighandler.generate_dictionary()
|
||||
list = get_default_list()
|
||||
file = open(cfg_file, "w")
|
||||
file.writelines("\n".join(list))
|
||||
file.close()
|
||||
file = open(cfg_file, "r")
|
||||
lines = file.readlines()
|
||||
for line in lines:
|
||||
paths.extend([line.strip()])
|
||||
return paths
|
||||
|
||||
|
||||
# async def generate_dictionary():
|
||||
def get_default_list():
|
||||
"""
|
||||
Generate a dictionary file.
|
||||
|
||||
:return: List.
|
||||
Returns
|
||||
-------
|
||||
paths : list
|
||||
Dictionary of pathnames.
|
||||
"""
|
||||
paths = [
|
||||
".atom",
|
||||
|
@ -139,6 +201,8 @@ def get_default_list():
|
|||
# "/rss.json",
|
||||
"/rss.php",
|
||||
"/rss.xml",
|
||||
"/syndication.php?type=atom1.0", #mybb
|
||||
"/syndication.php?type=rss2.0",
|
||||
"/timeline.rss",
|
||||
"/videos.atom",
|
||||
# "/videos.json",
|
||||
|
|
|
@ -1,29 +1,75 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import feedparser
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import feedparser
|
||||
import os
|
||||
|
||||
import sqlitehandler
|
||||
import confighandler
|
||||
import datetimehandler
|
||||
import filterhandler
|
||||
|
||||
from http.client import IncompleteRead
|
||||
from asyncio.exceptions import IncompleteReadError
|
||||
from http.client import IncompleteRead
|
||||
from urllib import error
|
||||
from bs4 import BeautifulSoup
|
||||
# from xml.etree.ElementTree import ElementTree, ParseError
|
||||
from urllib.parse import urlparse
|
||||
from urllib.parse import urljoin
|
||||
from urllib.parse import urlsplit
|
||||
from urllib.parse import urlunsplit
|
||||
from lxml import html
|
||||
|
||||
async def download_updates(db_file):
|
||||
|
||||
# NOTE Perhaps this needs to be executed
|
||||
# just once per program execution
|
||||
async def initdb(jid, callback, message=None):
|
||||
"""
|
||||
Callback function to instantiate action on database.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
jid : str
|
||||
Jabber ID.
|
||||
callback : ?
|
||||
Function name.
|
||||
message : str, optional
|
||||
Optional kwarg when a message is a part or
|
||||
required argument. The default is None.
|
||||
|
||||
Returns
|
||||
-------
|
||||
object
|
||||
Coroutine object.
|
||||
"""
|
||||
db_dir = confighandler.get_default_dbdir()
|
||||
if not os.path.isdir(db_dir):
|
||||
os.mkdir(db_dir)
|
||||
db_file = os.path.join(db_dir, r"{}.db".format(jid))
|
||||
sqlitehandler.create_tables(db_file)
|
||||
# await sqlitehandler.set_default_values(db_file)
|
||||
if message:
|
||||
return await callback(db_file, message)
|
||||
else:
|
||||
return await callback(db_file)
|
||||
|
||||
|
||||
async def download_updates(db_file, url=None):
|
||||
"""
|
||||
Check feeds for new entries.
|
||||
|
||||
:param db_file: Database filename.
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
url : str, optional
|
||||
URL. The default is None.
|
||||
"""
|
||||
urls = await sqlitehandler.get_subscriptions(db_file)
|
||||
|
||||
if url:
|
||||
urls = [url] # Valid [url] and [url,] and (url,)
|
||||
else:
|
||||
urls = await sqlitehandler.get_feeds_url(db_file)
|
||||
for url in urls:
|
||||
# print(os.path.basename(db_file), url[0])
|
||||
source = url[0]
|
||||
|
@ -34,31 +80,42 @@ async def download_updates(db_file):
|
|||
# urls.next()
|
||||
# next(urls)
|
||||
continue
|
||||
|
||||
await sqlitehandler.update_source_status(db_file, res[1], source)
|
||||
|
||||
await sqlitehandler.update_source_status(
|
||||
db_file,
|
||||
res[1],
|
||||
source
|
||||
)
|
||||
if res[0]:
|
||||
try:
|
||||
feed = feedparser.parse(res[0])
|
||||
if feed.bozo:
|
||||
# bozo = ("WARNING: Bozo detected for feed <{}>. "
|
||||
# "For more information, visit "
|
||||
# "https://pythonhosted.org/feedparser/bozo.html"
|
||||
# .format(source))
|
||||
# print(bozo)
|
||||
bozo = (
|
||||
"WARNING: Bozo detected for feed: {}\n"
|
||||
"For more information, visit "
|
||||
"https://pythonhosted.org/feedparser/bozo.html"
|
||||
).format(source)
|
||||
print(bozo)
|
||||
valid = 0
|
||||
else:
|
||||
valid = 1
|
||||
await sqlitehandler.update_source_validity(db_file, source, valid)
|
||||
except (IncompleteReadError, IncompleteRead, error.URLError) as e:
|
||||
print(e)
|
||||
await sqlitehandler.update_source_validity(
|
||||
db_file,
|
||||
source,
|
||||
valid)
|
||||
except (
|
||||
IncompleteReadError,
|
||||
IncompleteRead,
|
||||
error.URLError
|
||||
) as e:
|
||||
# print(e)
|
||||
# TODO Print error to log
|
||||
None
|
||||
# NOTE I don't think there should be "return"
|
||||
# because then we might stop scanning next URLs
|
||||
# return
|
||||
# TODO Place these couple of lines back down
|
||||
# NOTE Need to correct the SQL statement to do so
|
||||
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
|
||||
|
||||
if res[1] == 200:
|
||||
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
|
||||
# TODO Place these couple of lines back down
|
||||
|
@ -66,25 +123,60 @@ async def download_updates(db_file):
|
|||
entries = feed.entries
|
||||
# length = len(entries)
|
||||
# await sqlitehandler.remove_entry(db_file, source, length)
|
||||
await sqlitehandler.remove_nonexistent_entries(db_file, feed, source)
|
||||
|
||||
new_entry = 0
|
||||
await sqlitehandler.remove_nonexistent_entries(
|
||||
db_file,
|
||||
feed,
|
||||
source
|
||||
)
|
||||
# new_entry = 0
|
||||
for entry in entries:
|
||||
|
||||
if entry.has_key("id"):
|
||||
eid = entry.id
|
||||
if entry.has_key("title"):
|
||||
title = entry.title
|
||||
else:
|
||||
title = feed["feed"]["title"]
|
||||
|
||||
if entry.has_key("link"):
|
||||
link = entry.link
|
||||
# link = complete_url(source, entry.link)
|
||||
link = await join_url(source, entry.link)
|
||||
link = await trim_url(link)
|
||||
else:
|
||||
link = source
|
||||
|
||||
exist = await sqlitehandler.check_entry_exist(db_file, title, link)
|
||||
|
||||
# TODO Pass date too for comparion check
|
||||
if entry.has_key("published"):
|
||||
date = entry.published
|
||||
date = await datetimehandler.rfc2822_to_iso8601(date)
|
||||
else:
|
||||
date = None
|
||||
exist = await sqlitehandler.check_entry_exist(
|
||||
db_file,
|
||||
source,
|
||||
eid=eid,
|
||||
title=title,
|
||||
link=link,
|
||||
date=date
|
||||
)
|
||||
if not exist:
|
||||
new_entry = new_entry + 1
|
||||
# new_entry = new_entry + 1
|
||||
if entry.has_key("published"):
|
||||
date = entry.published
|
||||
date = await datetimehandler.rfc2822_to_iso8601(date)
|
||||
# try:
|
||||
# date = datetime.strptime(date, "%a, %d %b %Y %H:%M:%S %z")
|
||||
# except:
|
||||
# date = datetime.strptime(date, '%a, %d %b %Y %H:%M:%S %Z')
|
||||
# finally:
|
||||
# date = date.isoformat()
|
||||
# if parsedate(date): # Is RFC 2822 format
|
||||
# date = parsedate_to_datetime(date) # Process timestamp
|
||||
# date = date.isoformat() # Convert to ISO 8601
|
||||
else:
|
||||
# TODO Just set date = "*** No date ***"
|
||||
# date = datetime.now().isoformat()
|
||||
date = await datetimehandler.now()
|
||||
# NOTE Would seconds result in better database performance
|
||||
# date = datetime.datetime(date)
|
||||
# date = (date-datetime.datetime(1970,1,1)).total_seconds()
|
||||
# TODO Enhance summary
|
||||
if entry.has_key("summary"):
|
||||
summary = entry.summary
|
||||
|
@ -93,164 +185,156 @@ async def download_updates(db_file):
|
|||
# TODO Limit text length
|
||||
summary = summary.replace("\n\n", "\n")[:300] + " ⃨"
|
||||
else:
|
||||
summary = '*** No summary ***'
|
||||
entry = (title, summary, link, source, 0);
|
||||
await sqlitehandler.add_entry_and_set_date(db_file, source, entry)
|
||||
summary = "*** No summary ***"
|
||||
read_status = 0
|
||||
pathname = urlsplit(link).path
|
||||
string = (
|
||||
"{} {} {}"
|
||||
).format(
|
||||
title,
|
||||
summary,
|
||||
pathname
|
||||
)
|
||||
allow_list = await filterhandler.is_listed(
|
||||
db_file,
|
||||
"allow",
|
||||
string
|
||||
)
|
||||
if not allow_list:
|
||||
reject_list = await filterhandler.is_listed(
|
||||
db_file,
|
||||
"deny",
|
||||
string
|
||||
)
|
||||
if reject_list:
|
||||
print(">>> REJECTED", title)
|
||||
summary = "REJECTED"
|
||||
# summary = ""
|
||||
read_status = 1
|
||||
entry = (
|
||||
title,
|
||||
summary,
|
||||
link,
|
||||
eid,
|
||||
source,
|
||||
date,
|
||||
read_status
|
||||
)
|
||||
await sqlitehandler.add_entry_and_set_date(
|
||||
db_file,
|
||||
source,
|
||||
entry
|
||||
)
|
||||
# print(await datetimehandler.current_time(), entry, title)
|
||||
# else:
|
||||
# print(await datetimehandler.current_time(), exist, title)
|
||||
|
||||
|
||||
async def add_feed_no_check(db_file, data):
|
||||
"""
|
||||
Add given feed without validity check.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
data : str
|
||||
URL or URL and Title.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Status message.
|
||||
"""
|
||||
url = data[0]
|
||||
title = data[1]
|
||||
url = await trim_url(url)
|
||||
exist = await sqlitehandler.check_feed_exist(db_file, url)
|
||||
if not exist:
|
||||
msg = await sqlitehandler.add_feed(db_file, url, title)
|
||||
await download_updates(db_file, [url])
|
||||
else:
|
||||
ix = exist[0]
|
||||
name = exist[1]
|
||||
msg = (
|
||||
"> {}\nNews source \"{}\" is already "
|
||||
"listed in the subscription list at "
|
||||
"index {}".format(url, name, ix)
|
||||
)
|
||||
return msg
|
||||
|
||||
|
||||
async def add_feed(db_file, url):
|
||||
"""
|
||||
Check whether feed exist, otherwise process it.
|
||||
|
||||
:param db_file: Database filename.
|
||||
:param url: URL.
|
||||
:return: Status message.
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
url : str
|
||||
URL.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Status message.
|
||||
"""
|
||||
msg = None
|
||||
url = await trim_url(url)
|
||||
exist = await sqlitehandler.check_feed_exist(db_file, url)
|
||||
|
||||
if not exist:
|
||||
res = await download_feed(url)
|
||||
if res[0]:
|
||||
feed = feedparser.parse(res[0])
|
||||
title = await get_title(url, feed)
|
||||
if feed.bozo:
|
||||
bozo = ("WARNING: Bozo detected. Failed to load <{}>.".format(url))
|
||||
bozo = (
|
||||
"Bozo detected. Failed to load: {}."
|
||||
).format(url)
|
||||
print(bozo)
|
||||
try:
|
||||
# tree = etree.fromstring(res[0]) # etree is for xml
|
||||
tree = html.fromstring(res[0])
|
||||
except:
|
||||
return "Failed to parse URL <{}> as feed".format(url)
|
||||
|
||||
print("RSS Auto-Discovery Engaged")
|
||||
xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]"""
|
||||
# xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href"""
|
||||
# xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href"
|
||||
feeds = tree.xpath(xpath_query)
|
||||
if len(feeds) > 1:
|
||||
msg = "RSS Auto-Discovery has found {} feeds:\n\n".format(len(feeds))
|
||||
for feed in feeds:
|
||||
# # The following code works;
|
||||
# # The following code will catch
|
||||
# # only valid resources (i.e. not 404);
|
||||
# # The following code requires more bandwidth.
|
||||
# res = await download_feed(feed)
|
||||
# if res[0]:
|
||||
# disco = feedparser.parse(res[0])
|
||||
# title = disco["feed"]["title"]
|
||||
# msg += "{} \n {} \n\n".format(title, feed)
|
||||
feed_name = feed.xpath('@title')[0]
|
||||
feed_addr = feed.xpath('@href')[0]
|
||||
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
|
||||
msg += "The above feeds were extracted from\n{}".format(url)
|
||||
return msg
|
||||
elif feeds:
|
||||
url = feeds[0].xpath('@href')[0]
|
||||
# Why wouldn't add_feed return a message
|
||||
# upon success unless return is explicitly
|
||||
# mentioned, yet upon failure it wouldn't?
|
||||
return await add_feed(db_file, url)
|
||||
|
||||
print("RSS Scan Mode Engaged")
|
||||
feeds = {}
|
||||
paths = []
|
||||
# TODO Test
|
||||
cfg_dir = confighandler.get_default_confdir()
|
||||
if not os.path.isdir(cfg_dir):
|
||||
os.mkdir(cfg_dir)
|
||||
cfg_file = os.path.join(cfg_dir, r"url_paths.txt")
|
||||
if not os.path.isfile(cfg_file):
|
||||
# confighandler.generate_dictionary()
|
||||
list = confighandler.get_default_list()
|
||||
file = open(cfg_file, "w")
|
||||
file.writelines("\n".join(list))
|
||||
file.close()
|
||||
file = open(cfg_file, "r")
|
||||
lines = file.readlines()
|
||||
for line in lines:
|
||||
paths.extend([line.strip()])
|
||||
for path in paths:
|
||||
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
|
||||
xpath_query = "//a[contains(@href,'{}')]".format(path)
|
||||
addresses = tree.xpath(xpath_query)
|
||||
parted_url = urlparse(url)
|
||||
# NOTE Should number of addresses be limited or
|
||||
# perhaps be N from the start and N from the end
|
||||
for address in addresses:
|
||||
address = address.xpath('@href')[0]
|
||||
if address.startswith('/'):
|
||||
address = parted_url.scheme + '://' + parted_url.netloc + address
|
||||
res = await download_feed(address)
|
||||
if res[1] == 200:
|
||||
try:
|
||||
feeds[address] = feedparser.parse(res[0])["feed"]["title"]
|
||||
except:
|
||||
continue
|
||||
if len(feeds) > 1:
|
||||
msg = "RSS URL scan has found {} feeds:\n\n".format(len(feeds))
|
||||
for feed in feeds:
|
||||
# try:
|
||||
# res = await download_feed(feed)
|
||||
# except:
|
||||
# continue
|
||||
feed_name = feeds[feed]
|
||||
feed_addr = feed
|
||||
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
|
||||
msg += "The above feeds were extracted from\n{}".format(url)
|
||||
return msg
|
||||
elif feeds:
|
||||
url = list(feeds)[0]
|
||||
return await add_feed(db_file, url)
|
||||
|
||||
# (HTTP) Request(s) Paths
|
||||
print("RSS Arbitrary Mode Engaged")
|
||||
feeds = {}
|
||||
parted_url = urlparse(url)
|
||||
for path in paths:
|
||||
address = parted_url.scheme + '://' + parted_url.netloc + path
|
||||
res = await download_feed(address)
|
||||
if res[1] == 200:
|
||||
# print(feedparser.parse(res[0])["feed"]["title"])
|
||||
# feeds[address] = feedparser.parse(res[0])["feed"]["title"]
|
||||
try:
|
||||
title = feedparser.parse(res[0])["feed"]["title"]
|
||||
except:
|
||||
title = '*** No Title ***'
|
||||
feeds[address] = title
|
||||
|
||||
# Check whether URL has path (i.e. not root)
|
||||
if parted_url.path.split('/')[1]:
|
||||
paths.extend([".atom", ".feed", ".rdf", ".rss"]) if '.rss' not in paths else -1
|
||||
# if paths.index('.rss'):
|
||||
# paths.extend([".atom", ".feed", ".rdf", ".rss"])
|
||||
address = parted_url.scheme + '://' + parted_url.netloc + '/' + parted_url.path.split('/')[1] + path
|
||||
res = await download_feed(address)
|
||||
if res[1] == 200:
|
||||
try:
|
||||
title = feedparser.parse(res[0])["feed"]["title"]
|
||||
except:
|
||||
title = '*** No Title ***'
|
||||
feeds[address] = title
|
||||
if len(feeds) > 1:
|
||||
msg = "RSS URL discovery has found {} feeds:\n\n".format(len(feeds))
|
||||
for feed in feeds:
|
||||
feed_name = feeds[feed]
|
||||
feed_addr = feed
|
||||
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
|
||||
msg += "The above feeds were extracted from\n{}".format(url)
|
||||
elif feeds:
|
||||
url = list(feeds)[0]
|
||||
msg = await add_feed(db_file, url)
|
||||
else:
|
||||
msg = "No news feeds were found for URL <{}>.".format(url)
|
||||
msg = (
|
||||
"> {}\nFailed to parse URL as feed."
|
||||
).format(url)
|
||||
if not msg:
|
||||
print("RSS Auto-Discovery Engaged")
|
||||
msg = await feed_mode_auto_discovery(db_file, url, tree)
|
||||
if not msg:
|
||||
print("RSS Scan Mode Engaged")
|
||||
msg = await feed_mode_scan(db_file, url, tree)
|
||||
if not msg:
|
||||
print("RSS Arbitrary Mode Engaged")
|
||||
msg = await feed_mode_request(db_file, url, tree)
|
||||
if not msg:
|
||||
msg = (
|
||||
"> {}\nNo news feeds were found for URL."
|
||||
).format(url)
|
||||
else:
|
||||
msg = await sqlitehandler.add_feed(db_file, title, url, res)
|
||||
status = res[1]
|
||||
msg = await sqlitehandler.add_feed(
|
||||
db_file,
|
||||
url,
|
||||
title,
|
||||
status
|
||||
)
|
||||
await download_updates(db_file, [url])
|
||||
else:
|
||||
msg = "Failed to get URL <{}>. Reason: {}".format(url, res[1])
|
||||
status = res[1]
|
||||
msg = (
|
||||
"> {}\nFailed to get URL. Reason: {}"
|
||||
).format(url, status)
|
||||
else:
|
||||
ix = exist[0]
|
||||
name = exist[1]
|
||||
msg = "> {}\nNews source \"{}\" is already listed in the subscription list at index {}".format(url, name, ix)
|
||||
msg = (
|
||||
"> {}\nNews source \"{}\" is already "
|
||||
"listed in the subscription list at "
|
||||
"index {}".format(url, name, ix)
|
||||
)
|
||||
return msg
|
||||
|
||||
|
||||
|
@ -258,8 +342,15 @@ async def download_feed(url):
|
|||
"""
|
||||
Download content of given URL.
|
||||
|
||||
:param url: URL.
|
||||
:return: Document or error message.
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg: list or str
|
||||
Document or error message.
|
||||
"""
|
||||
timeout = aiohttp.ClientTimeout(total=10)
|
||||
async with aiohttp.ClientSession() as session:
|
||||
|
@ -271,30 +362,438 @@ async def download_feed(url):
|
|||
try:
|
||||
doc = await response.text()
|
||||
# print (response.content_type)
|
||||
return [doc, status]
|
||||
msg = [
|
||||
doc,
|
||||
status
|
||||
]
|
||||
except:
|
||||
# return [False, "The content of this document doesn't appear to be textual."]
|
||||
return [False, "Document is too large or is not textual."]
|
||||
# msg = [
|
||||
# False,
|
||||
# ("The content of this document "
|
||||
# "doesn't appear to be textual."
|
||||
# )
|
||||
# ]
|
||||
msg = [
|
||||
False,
|
||||
"Document is too large or is not textual."
|
||||
]
|
||||
else:
|
||||
return [False, "HTTP Error: " + str(status)]
|
||||
msg = [
|
||||
False,
|
||||
"HTTP Error: " + str(status)
|
||||
]
|
||||
except aiohttp.ClientError as e:
|
||||
print('Error', str(e))
|
||||
return [False, "Error: " + str(e)]
|
||||
# print('Error', str(e))
|
||||
msg = [
|
||||
False,
|
||||
"Error: " + str(e)
|
||||
]
|
||||
except asyncio.TimeoutError as e:
|
||||
# print('Timeout:', str(e))
|
||||
return [False, "Timeout: " + str(e)]
|
||||
msg = [
|
||||
False,
|
||||
"Timeout: " + str(e)
|
||||
]
|
||||
return msg
|
||||
|
||||
|
||||
async def get_title(url, feed):
|
||||
"""
|
||||
Get title of feed.
|
||||
|
||||
:param url: URL
|
||||
:param feed: Parsed feed
|
||||
:return: Title or URL hostname.
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL.
|
||||
feed : dict
|
||||
Parsed feed document.
|
||||
|
||||
Returns
|
||||
-------
|
||||
title : str
|
||||
Title or URL hostname.
|
||||
"""
|
||||
try:
|
||||
title = feed["feed"]["title"]
|
||||
except:
|
||||
title = urlparse(url).netloc
|
||||
title = urlsplit(url).netloc
|
||||
return title
|
||||
|
||||
|
||||
# NOTE Read the documentation
|
||||
# https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin
|
||||
def complete_url(source, link):
|
||||
"""
|
||||
Check if URL is pathname and complete it into URL.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source : str
|
||||
Feed URL.
|
||||
link : str
|
||||
Link URL or pathname.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
URL.
|
||||
"""
|
||||
if link.startswith("www."):
|
||||
return "http://" + link
|
||||
parted_link = urlsplit(link)
|
||||
parted_feed = urlsplit(source)
|
||||
if parted_link.scheme == "magnet" and parted_link.query:
|
||||
return link
|
||||
if parted_link.scheme and parted_link.netloc:
|
||||
return link
|
||||
if link.startswith("//"):
|
||||
if parted_link.netloc and parted_link.path:
|
||||
new_link = urlunsplit([
|
||||
parted_feed.scheme,
|
||||
parted_link.netloc,
|
||||
parted_link.path,
|
||||
parted_link.query,
|
||||
parted_link.fragment
|
||||
])
|
||||
elif link.startswith("/"):
|
||||
new_link = urlunsplit([
|
||||
parted_feed.scheme,
|
||||
parted_feed.netloc,
|
||||
parted_link.path,
|
||||
parted_link.query,
|
||||
parted_link.fragment
|
||||
])
|
||||
elif link.startswith("../"):
|
||||
pathlink = parted_link.path.split("/")
|
||||
pathfeed = parted_feed.path.split("/")
|
||||
for i in pathlink:
|
||||
if i == "..":
|
||||
if pathlink.index("..") == 0:
|
||||
pathfeed.pop()
|
||||
else:
|
||||
break
|
||||
while pathlink.count(".."):
|
||||
if pathlink.index("..") == 0:
|
||||
pathlink.remove("..")
|
||||
else:
|
||||
break
|
||||
pathlink = "/".join(pathlink)
|
||||
pathfeed.extend([pathlink])
|
||||
new_link = urlunsplit([
|
||||
parted_feed.scheme,
|
||||
parted_feed.netloc,
|
||||
"/".join(pathfeed),
|
||||
parted_link.query,
|
||||
parted_link.fragment
|
||||
])
|
||||
else:
|
||||
pathlink = parted_link.path.split("/")
|
||||
pathfeed = parted_feed.path.split("/")
|
||||
if link.startswith("./"):
|
||||
pathlink.remove(".")
|
||||
if not source.endswith("/"):
|
||||
pathfeed.pop()
|
||||
pathlink = "/".join(pathlink)
|
||||
pathfeed.extend([pathlink])
|
||||
new_link = urlunsplit([
|
||||
parted_feed.scheme,
|
||||
parted_feed.netloc,
|
||||
"/".join(pathfeed),
|
||||
parted_link.query,
|
||||
parted_link.fragment
|
||||
])
|
||||
return new_link
|
||||
|
||||
|
||||
"""
|
||||
TODO
|
||||
Feed https://www.ocaml.org/feed.xml
|
||||
Link %20https://frama-c.com/fc-versions/cobalt.html%20
|
||||
|
||||
FIXME
|
||||
Feed https://cyber.dabamos.de/blog/feed.rss
|
||||
Link https://cyber.dabamos.de/blog/#article-2022-07-15
|
||||
"""
|
||||
async def join_url(source, link):
|
||||
"""
|
||||
Join base URL with given pathname.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source : str
|
||||
Feed URL.
|
||||
link : str
|
||||
Link URL or pathname.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
URL.
|
||||
"""
|
||||
if link.startswith("www."):
|
||||
new_link = "http://" + link
|
||||
elif link.startswith("%20") and link.endswith("%20"):
|
||||
old_link = link.split("%20")
|
||||
del old_link[0]
|
||||
old_link.pop()
|
||||
new_link = "".join(old_link)
|
||||
else:
|
||||
new_link = urljoin(source, link)
|
||||
return new_link
|
||||
|
||||
|
||||
async def trim_url(url):
|
||||
"""
|
||||
Check URL pathname for double slash.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL.
|
||||
|
||||
Returns
|
||||
-------
|
||||
url : str
|
||||
URL.
|
||||
"""
|
||||
parted_url = urlsplit(url)
|
||||
protocol = parted_url.scheme
|
||||
hostname = parted_url.netloc
|
||||
pathname = parted_url.path
|
||||
queries = parted_url.query
|
||||
fragment = parted_url.fragment
|
||||
while "//" in pathname:
|
||||
pathname = pathname.replace("//", "/")
|
||||
url = urlunsplit([
|
||||
protocol,
|
||||
hostname,
|
||||
pathname,
|
||||
queries,
|
||||
fragment
|
||||
])
|
||||
return url
|
||||
|
||||
|
||||
# TODO Improve scan by gradual decreasing of path
|
||||
async def feed_mode_request(db_file, url, tree):
|
||||
"""
|
||||
Lookup for feeds by pathname using HTTP Requests.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
url : str
|
||||
URL.
|
||||
tree : TYPE
|
||||
DESCRIPTION.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Message with URLs.
|
||||
"""
|
||||
feeds = {}
|
||||
parted_url = urlsplit(url)
|
||||
paths = confighandler.get_list()
|
||||
for path in paths:
|
||||
address = urlunsplit([
|
||||
parted_url.scheme,
|
||||
parted_url.netloc,
|
||||
path,
|
||||
None,
|
||||
None
|
||||
])
|
||||
res = await download_feed(address)
|
||||
if res[1] == 200:
|
||||
# print(feedparser.parse(res[0])["feed"]["title"])
|
||||
# feeds[address] = feedparser.parse(res[0])["feed"]["title"]
|
||||
try:
|
||||
title = feedparser.parse(res[0])["feed"]["title"]
|
||||
except:
|
||||
title = '*** No Title ***'
|
||||
feeds[address] = title
|
||||
# Check whether URL has path (i.e. not root)
|
||||
if parted_url.path.split('/')[1]:
|
||||
paths.extend(
|
||||
[".atom", ".feed", ".rdf", ".rss"]
|
||||
) if '.rss' not in paths else -1
|
||||
# if paths.index('.rss'):
|
||||
# paths.extend([".atom", ".feed", ".rdf", ".rss"])
|
||||
address = urlunsplit([
|
||||
parted_url.scheme,
|
||||
parted_url.netloc,
|
||||
parted_url.path.split('/')[1] + path,
|
||||
None,
|
||||
None
|
||||
])
|
||||
res = await download_feed(address)
|
||||
if res[1] == 200:
|
||||
try:
|
||||
title = feedparser.parse(res[0])["feed"]["title"]
|
||||
except:
|
||||
title = '*** No Title ***'
|
||||
feeds[address] = title
|
||||
if len(feeds) > 1:
|
||||
msg = (
|
||||
"RSS URL discovery has found {} feeds:\n```\n"
|
||||
).format(len(feeds))
|
||||
for feed in feeds:
|
||||
feed_name = feeds[feed]
|
||||
feed_addr = feed
|
||||
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
|
||||
msg += (
|
||||
"```\nThe above feeds were extracted from\n{}"
|
||||
).format(url)
|
||||
elif feeds:
|
||||
feed_addr = list(feeds)[0]
|
||||
msg = await add_feed(db_file, feed_addr)
|
||||
return msg
|
||||
|
||||
|
||||
async def feed_mode_scan(db_file, url, tree):
|
||||
"""
|
||||
Scan page for potential feeds by pathname.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
url : str
|
||||
URL.
|
||||
tree : TYPE
|
||||
DESCRIPTION.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Message with URLs.
|
||||
"""
|
||||
feeds = {}
|
||||
# paths = []
|
||||
# TODO Test
|
||||
paths = confighandler.get_list()
|
||||
for path in paths:
|
||||
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
|
||||
xpath_query = "//a[contains(@href,'{}')]".format(path)
|
||||
addresses = tree.xpath(xpath_query)
|
||||
parted_url = urlsplit(url)
|
||||
# NOTE Should number of addresses be limited or
|
||||
# perhaps be N from the start and N from the end
|
||||
for address in addresses:
|
||||
print(address.xpath('@href')[0])
|
||||
print(addresses)
|
||||
address = address.xpath('@href')[0]
|
||||
if "/" not in address:
|
||||
protocol = parted_url.scheme
|
||||
hostname = parted_url.netloc
|
||||
pathname = address
|
||||
address = urlunsplit([
|
||||
protocol,
|
||||
hostname,
|
||||
pathname,
|
||||
None,
|
||||
None
|
||||
])
|
||||
if address.startswith('/'):
|
||||
protocol = parted_url.scheme
|
||||
hostname = parted_url.netloc
|
||||
pathname = address
|
||||
address = urlunsplit([
|
||||
protocol,
|
||||
hostname,
|
||||
pathname,
|
||||
None,
|
||||
None
|
||||
])
|
||||
res = await download_feed(address)
|
||||
if res[1] == 200:
|
||||
try:
|
||||
feeds[address] = feedparser.parse(res[0])["feed"]["title"]
|
||||
print(feeds)
|
||||
except:
|
||||
continue
|
||||
if len(feeds) > 1:
|
||||
msg = (
|
||||
"RSS URL scan has found {} feeds:\n```\n"
|
||||
).format(len(feeds))
|
||||
for feed in feeds:
|
||||
# try:
|
||||
# res = await download_feed(feed)
|
||||
# except:
|
||||
# continue
|
||||
feed_name = feeds[feed]
|
||||
feed_addr = feed
|
||||
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
|
||||
msg += (
|
||||
"```\nThe above feeds were extracted from\n{}"
|
||||
).format(url)
|
||||
return msg
|
||||
elif feeds:
|
||||
feed_addr = list(feeds)[0]
|
||||
msg = await add_feed(db_file, feed_addr)
|
||||
return msg
|
||||
|
||||
|
||||
async def feed_mode_auto_discovery(db_file, url, tree):
|
||||
"""
|
||||
Lookup for feeds using RSS autodiscovery technique.
|
||||
|
||||
See: https://www.rssboard.org/rss-autodiscovery
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
url : str
|
||||
URL.
|
||||
tree : TYPE
|
||||
DESCRIPTION.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Message with URLs.
|
||||
"""
|
||||
xpath_query = (
|
||||
'//link[(@rel="alternate") and '
|
||||
'(@type="application/atom+xml" or '
|
||||
'@type="application/rdf+xml" or '
|
||||
'@type="application/rss+xml")]'
|
||||
)
|
||||
# xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href"""
|
||||
# xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href"
|
||||
feeds = tree.xpath(xpath_query)
|
||||
if len(feeds) > 1:
|
||||
msg = (
|
||||
"RSS Auto-Discovery has found {} feeds:\n```\n"
|
||||
).format(len(feeds))
|
||||
for feed in feeds:
|
||||
# # The following code works;
|
||||
# # The following code will catch
|
||||
# # only valid resources (i.e. not 404);
|
||||
# # The following code requires more bandwidth.
|
||||
# res = await download_feed(feed)
|
||||
# if res[0]:
|
||||
# disco = feedparser.parse(res[0])
|
||||
# title = disco["feed"]["title"]
|
||||
# msg += "{} \n {} \n\n".format(title, feed)
|
||||
feed_name = feed.xpath('@title')[0]
|
||||
feed_addr = await join_url(url, feed.xpath('@href')[0])
|
||||
# if feed_addr.startswith("/"):
|
||||
# feed_addr = url + feed_addr
|
||||
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
|
||||
msg += (
|
||||
"```\nThe above feeds were extracted from\n{}"
|
||||
).format(url)
|
||||
return msg
|
||||
elif feeds:
|
||||
feed_addr = await join_url(url, feeds[0].xpath('@href')[0])
|
||||
# if feed_addr.startswith("/"):
|
||||
# feed_addr = url + feed_addr
|
||||
# NOTE Why wouldn't add_feed return a message
|
||||
# upon success unless return is explicitly
|
||||
# mentioned, yet upon failure it wouldn't?
|
||||
# return await add_feed(db_file, feed_addr)
|
||||
msg = await add_feed(db_file, feed_addr)
|
||||
return msg
|
81
slixfeed/datetimehandler.py
Normal file
81
slixfeed/datetimehandler.py
Normal file
|
@ -0,0 +1,81 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
https://feedparser.readthedocs.io/en/latest/date-parsing.html
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from dateutil.parser import parse
|
||||
from email.utils import parsedate
|
||||
from email.utils import parsedate_to_datetime
|
||||
|
||||
async def now():
|
||||
"""
|
||||
ISO 8601 Timestamp.
|
||||
|
||||
Returns
|
||||
-------
|
||||
date : ?
|
||||
ISO 8601 Timestamp.
|
||||
"""
|
||||
date = datetime.now().isoformat()
|
||||
return date
|
||||
|
||||
|
||||
async def current_time():
|
||||
"""
|
||||
Print HH:MM:SS timestamp.
|
||||
|
||||
Returns
|
||||
-------
|
||||
date : ?
|
||||
HH:MM:SS timestamp.
|
||||
"""
|
||||
now = datetime.now()
|
||||
time = now.strftime("%H:%M:%S")
|
||||
return time
|
||||
|
||||
|
||||
async def validate(date):
|
||||
"""
|
||||
Validate date format.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
date : str
|
||||
Timestamp.
|
||||
|
||||
Returns
|
||||
-------
|
||||
date : str
|
||||
Timestamp.
|
||||
"""
|
||||
try:
|
||||
parse(date)
|
||||
except:
|
||||
date = now()
|
||||
return date
|
||||
|
||||
|
||||
async def rfc2822_to_iso8601(date):
|
||||
"""
|
||||
Convert RFC 2822 into ISO 8601.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
date : str
|
||||
RFC 2822 Timestamp.
|
||||
|
||||
Returns
|
||||
-------
|
||||
date : str
|
||||
ISO 8601 Timestamp.
|
||||
"""
|
||||
if parsedate(date):
|
||||
try:
|
||||
date = parsedate_to_datetime(date)
|
||||
date = date.isoformat()
|
||||
except:
|
||||
date = now()
|
||||
return date
|
105
slixfeed/filterhandler.py
Normal file
105
slixfeed/filterhandler.py
Normal file
|
@ -0,0 +1,105 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
|
||||
TODO
|
||||
|
||||
1) Website-specific filter (i.e. audiobookbay).
|
||||
|
||||
2) Exclude websites from filtering (e.g. metapedia).
|
||||
|
||||
3) Filter phrases:
|
||||
Refer to sqlitehandler.search_entries for implementation.
|
||||
It is expected to be more complex than function search_entries.
|
||||
|
||||
"""
|
||||
|
||||
import sqlitehandler
|
||||
|
||||
async def set_filter(newwords, keywords):
|
||||
"""
|
||||
Append new keywords to filter.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
newwords : str
|
||||
List of new keywords.
|
||||
keywords : str
|
||||
List of current keywords.
|
||||
|
||||
Returns
|
||||
-------
|
||||
val : str
|
||||
List of current keywords and new keywords.
|
||||
"""
|
||||
try:
|
||||
keywords = keywords.split(",")
|
||||
except:
|
||||
keywords = []
|
||||
newwords = newwords.lower().split(",")
|
||||
for word in newwords:
|
||||
word = word.strip()
|
||||
if len(word) and word not in keywords:
|
||||
keywords.extend([word])
|
||||
keywords.sort()
|
||||
val = ",".join(keywords)
|
||||
return val
|
||||
|
||||
async def is_listed(db_file, type, string):
|
||||
# async def reject(db_file, string):
|
||||
# async def is_blacklisted(db_file, string):
|
||||
filter_type = "filter-" + type
|
||||
list = await sqlitehandler.get_settings_value(
|
||||
db_file,
|
||||
filter_type
|
||||
)
|
||||
if list:
|
||||
list = list.split(",")
|
||||
for i in list:
|
||||
if not i or len(i) < 2:
|
||||
continue
|
||||
if i in string.lower():
|
||||
print(">>> ACTIVATE", i)
|
||||
return 1
|
||||
else:
|
||||
return None
|
||||
|
||||
"""
|
||||
|
||||
This code was tested at module datahandler
|
||||
|
||||
reject = 0
|
||||
blacklist = await sqlitehandler.get_settings_value(
|
||||
db_file,
|
||||
"filter-deny"
|
||||
)
|
||||
# print(">>> blacklist:")
|
||||
# print(blacklist)
|
||||
# breakpoint()
|
||||
if blacklist:
|
||||
blacklist = blacklist.split(",")
|
||||
# print(">>> blacklist.split")
|
||||
# print(blacklist)
|
||||
# breakpoint()
|
||||
for i in blacklist:
|
||||
# print(">>> length", len(i))
|
||||
# breakpoint()
|
||||
# if len(i):
|
||||
if not i or len(i) < 2:
|
||||
print(">>> continue due to length", len(i))
|
||||
# breakpoint()
|
||||
continue
|
||||
# print(title)
|
||||
# print(">>> blacklisted word:", i)
|
||||
# breakpoint()
|
||||
test = (title + " " + summary + " " + link)
|
||||
if i in test.lower():
|
||||
reject = 1
|
||||
break
|
||||
|
||||
if reject:
|
||||
print("rejected:",title)
|
||||
entry = (title, '', link, source, date, 1);
|
||||
|
||||
"""
|
56
slixfeed/opmlhandler.py
Normal file
56
slixfeed/opmlhandler.py
Normal file
|
@ -0,0 +1,56 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
|
||||
{
|
||||
'bozo': False,
|
||||
'bozo_exception': None,
|
||||
'feeds': [
|
||||
{
|
||||
'url': 'https://kurtmckee.org/tag/listparser/feed',
|
||||
'title': 'listparser blog',
|
||||
'categories': [],
|
||||
'tags': []
|
||||
},
|
||||
{
|
||||
'url': 'https://github.com/kurtmckee/listparser/commits/develop.atom',
|
||||
'title': 'listparser changelog',
|
||||
'categories': [],
|
||||
'tags': []
|
||||
}
|
||||
],
|
||||
'lists': [],
|
||||
'opportunities': [],
|
||||
'meta': {
|
||||
'title': 'listparser project feeds',
|
||||
'author': {
|
||||
'name': 'Kurt McKee',
|
||||
'email': 'contactme@kurtmckee.org',
|
||||
'url': 'https://kurtmckee.org/'
|
||||
}
|
||||
},
|
||||
'version': 'opml2'
|
||||
}
|
||||
|
||||
"""
|
||||
|
||||
import listparser
|
||||
import lxml
|
||||
|
||||
import sqlitehandler
|
||||
import datahandler
|
||||
|
||||
async def import_opml(db_file, opml_doc):
|
||||
feeds = listparser.parse(opml_doc)['feeds']
|
||||
for feed in feeds:
|
||||
url = feed['url']
|
||||
title = feed['title']
|
||||
# categories = feed['categories']
|
||||
# tags = feed['tags']
|
||||
await datahandler.add_feed_no_check(db_file, [url, title])
|
||||
|
||||
|
||||
# NOTE Use OPyML or LXML
|
||||
async def export_opml():
|
||||
result = await sqlitehandler.get_feeds()
|
File diff suppressed because it is too large
Load diff
|
@ -1,38 +1,56 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from datetime import datetime
|
||||
"""
|
||||
|
||||
TODO
|
||||
|
||||
1) Deprecate "add" (see above) and make it interactive.
|
||||
Slixfeed: Do you still want to add this URL to subscription list?
|
||||
See: case _ if message_lowercase.startswith("add"):
|
||||
|
||||
2) Use loop (with gather) instead of TaskGroup
|
||||
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import os
|
||||
import slixmpp
|
||||
|
||||
from slixmpp.plugins.xep_0363.http_upload import FileTooBig, HTTPError, UploadServiceNotFound
|
||||
|
||||
import confighandler
|
||||
import datahandler
|
||||
import datetimehandler
|
||||
import filterhandler
|
||||
import sqlitehandler
|
||||
|
||||
main_task = []
|
||||
jid_tasker = {}
|
||||
task_manager = {}
|
||||
loop = asyncio.get_event_loop()
|
||||
# asyncio.set_event_loop(loop)
|
||||
|
||||
time_now = datetime.now()
|
||||
# time_now = datetime.now()
|
||||
# time_now = time_now.strftime("%H:%M:%S")
|
||||
|
||||
def print_time():
|
||||
# return datetime.now().strftime("%H:%M:%S")
|
||||
now = datetime.now()
|
||||
current_time = now.strftime("%H:%M:%S")
|
||||
return current_time
|
||||
# def print_time():
|
||||
# # return datetime.now().strftime("%H:%M:%S")
|
||||
# now = datetime.now()
|
||||
# current_time = now.strftime("%H:%M:%S")
|
||||
# return current_time
|
||||
|
||||
|
||||
async def handle_event():
|
||||
print("Event handled!")
|
||||
|
||||
|
||||
class Slixfeed(slixmpp.ClientXMPP):
|
||||
"""
|
||||
Slixmpp news bot that will send updates
|
||||
from feeds it receives.
|
||||
Slixmpp
|
||||
-------
|
||||
News bot that sends updates from RSS feeds.
|
||||
"""
|
||||
|
||||
print("slixmpp.ClientXMPP")
|
||||
print(repr(slixmpp.ClientXMPP))
|
||||
|
||||
def __init__(self, jid, password):
|
||||
slixmpp.ClientXMPP.__init__(self, jid, password)
|
||||
|
||||
|
@ -52,7 +70,7 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
self.add_event_handler("message", self.message)
|
||||
self.add_event_handler("disconnected", self.reconnect)
|
||||
# Initialize event loop
|
||||
self.loop = asyncio.get_event_loop()
|
||||
# self.loop = asyncio.get_event_loop()
|
||||
|
||||
|
||||
async def start(self, event):
|
||||
|
@ -70,116 +88,316 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
"""
|
||||
self.send_presence()
|
||||
await self.get_roster()
|
||||
await self.select_file()
|
||||
|
||||
self.send_presence(
|
||||
pshow="away",
|
||||
pstatus="Slixmpp has been restarted.",
|
||||
pto="sch@pimux.de"
|
||||
)
|
||||
# for task in main_task:
|
||||
# task.cancel()
|
||||
if not main_task:
|
||||
await self.select_file()
|
||||
|
||||
|
||||
async def message(self, msg):
|
||||
"""
|
||||
Process incoming message stanzas. Be aware that this also
|
||||
includes MUC messages and error messages. It is usually
|
||||
a good idea to check the messages's type before processing
|
||||
or sending replies.
|
||||
a good practice to check the messages's type before
|
||||
processing or sending replies.
|
||||
|
||||
Arguments:
|
||||
msg -- The received message stanza. See the documentation
|
||||
for stanza objects and the Message stanza to see
|
||||
how it may be used.
|
||||
Parameters
|
||||
----------
|
||||
self : ?
|
||||
Self.
|
||||
msg : str
|
||||
The received message stanza. See the documentation
|
||||
for stanza objects and the Message stanza to see
|
||||
how it may be used.
|
||||
"""
|
||||
if msg["type"] in ("chat", "normal"):
|
||||
action = 0
|
||||
jid = msg["from"].bare
|
||||
|
||||
db_dir = confighandler.get_default_dbdir()
|
||||
os.chdir(db_dir)
|
||||
if jid + ".db" not in os.listdir():
|
||||
await self.task_jid(jid)
|
||||
|
||||
message = " ".join(msg["body"].split())
|
||||
message = message.lower()
|
||||
if message.startswith("help"):
|
||||
action = print_help()
|
||||
# NOTE: Might not need it
|
||||
# elif message.startswith("add "):
|
||||
# url = message[4:]
|
||||
elif message.startswith("http"):
|
||||
url = message
|
||||
action = await initdb(jid, datahandler.add_feed, url)
|
||||
# action = "> " + message + "\n" + action
|
||||
elif message.startswith("quantum "):
|
||||
key = message[:7]
|
||||
val = message[8:]
|
||||
# action = "Every update will contain {} news items.".format(action)
|
||||
action = await initdb(jid, sqlitehandler.set_settings_value, [key, val])
|
||||
await self.refresh_task(jid, key, val)
|
||||
elif message.startswith("interval "):
|
||||
key = message[:8]
|
||||
val = message[9:]
|
||||
# action = "Updates will be sent every {} minutes.".format(action)
|
||||
action = await initdb(jid, sqlitehandler.set_settings_value, [key, val])
|
||||
await self.refresh_task(jid, key, val)
|
||||
elif message.startswith("list"):
|
||||
action = await initdb(jid, sqlitehandler.list_subscriptions)
|
||||
elif message.startswith("recent "):
|
||||
num = message[7:]
|
||||
action = await initdb(jid, sqlitehandler.last_entries, num)
|
||||
elif message.startswith("remove "):
|
||||
ix = message[7:]
|
||||
action = await initdb(jid, sqlitehandler.remove_feed, ix)
|
||||
elif message.startswith("search "):
|
||||
query = message[7:]
|
||||
action = await initdb(jid, sqlitehandler.search_entries, query)
|
||||
elif message.startswith("start"):
|
||||
# action = "Updates are enabled."
|
||||
key = "enabled"
|
||||
val = 1
|
||||
actiona = await initdb(jid, sqlitehandler.set_settings_value, [key, val])
|
||||
asyncio.create_task(self.task_jid(jid))
|
||||
# print(print_time(), "task_manager[jid]")
|
||||
# print(task_manager[jid])
|
||||
elif message.startswith("stats"):
|
||||
action = await initdb(jid, sqlitehandler.statistics)
|
||||
elif message.startswith("status "):
|
||||
ix = message[7:]
|
||||
action = await initdb(jid, sqlitehandler.toggle_status, ix)
|
||||
elif message.startswith("stop"):
|
||||
message_lowercase = message.lower()
|
||||
|
||||
print(await datetimehandler.current_time(), "ACCOUNT: " + str(msg["from"]))
|
||||
print(await datetimehandler.current_time(), "COMMAND:", message)
|
||||
|
||||
match message_lowercase:
|
||||
case "help":
|
||||
action = print_help()
|
||||
case _ if message_lowercase in ["greetings", "hello", "hey"]:
|
||||
action = (
|
||||
"Greeting! I'm Slixfeed The News Bot!"
|
||||
"\n"
|
||||
"Send a URL of a news website to start."
|
||||
)
|
||||
case _ if message_lowercase.startswith("add"):
|
||||
message = message[4:]
|
||||
url = message.split(" ")[0]
|
||||
title = " ".join(message.split(" ")[1:])
|
||||
if url.startswith("http"):
|
||||
action = await datahandler.initdb(
|
||||
jid,
|
||||
datahandler.add_feed_no_check,
|
||||
[url, title]
|
||||
)
|
||||
await self.send_status(jid)
|
||||
else:
|
||||
action = "Missing URL."
|
||||
case _ if message_lowercase.startswith("allow"):
|
||||
key = "filter-" + message[:5]
|
||||
val = message[6:]
|
||||
if val:
|
||||
keywords = await datahandler.initdb(
|
||||
jid,
|
||||
sqlitehandler.get_settings_value,
|
||||
key
|
||||
)
|
||||
val = await filterhandler.set_filter(
|
||||
val,
|
||||
keywords
|
||||
)
|
||||
await datahandler.initdb(
|
||||
jid,
|
||||
sqlitehandler.set_settings_value,
|
||||
[key, val]
|
||||
)
|
||||
action = (
|
||||
"Approved keywords\n"
|
||||
"```\n{}\n```"
|
||||
).format(val)
|
||||
else:
|
||||
action = "Missing keywords."
|
||||
case _ if message_lowercase.startswith("deny"):
|
||||
key = "filter-" + message[:4]
|
||||
val = message[5:]
|
||||
if val:
|
||||
keywords = await datahandler.initdb(
|
||||
jid,
|
||||
sqlitehandler.get_settings_value,
|
||||
key
|
||||
)
|
||||
val = await filterhandler.set_filter(
|
||||
val,
|
||||
keywords
|
||||
)
|
||||
await datahandler.initdb(
|
||||
jid,
|
||||
sqlitehandler.set_settings_value,
|
||||
[key, val]
|
||||
)
|
||||
action = (
|
||||
"Rejected keywords\n"
|
||||
"```\n{}\n```"
|
||||
).format(val)
|
||||
else:
|
||||
action = "Missing keywords."
|
||||
case _ if message_lowercase.startswith("http"):
|
||||
url = message
|
||||
action = await datahandler.initdb(
|
||||
jid,
|
||||
datahandler.add_feed,
|
||||
url
|
||||
)
|
||||
# action = "> " + message + "\n" + action
|
||||
await self.send_status(jid)
|
||||
case _ if message_lowercase.startswith("feeds"):
|
||||
query = message[6:]
|
||||
if query:
|
||||
if len(query) > 3:
|
||||
action = await datahandler.initdb(
|
||||
jid,
|
||||
sqlitehandler.search_feeds,
|
||||
query
|
||||
)
|
||||
else:
|
||||
action = (
|
||||
"Enter at least 4 characters to search"
|
||||
)
|
||||
else:
|
||||
action = await datahandler.initdb(
|
||||
jid,
|
||||
sqlitehandler.list_feeds
|
||||
)
|
||||
case _ if message_lowercase.startswith("interval"):
|
||||
# FIXME
|
||||
# The following error occurs only upon first attempt to set interval.
|
||||
# /usr/lib/python3.11/asyncio/events.py:73: RuntimeWarning: coroutine 'Slixfeed.send_update' was never awaited
|
||||
# self._args = None
|
||||
# RuntimeWarning: Enable tracemalloc to get the object allocation traceback
|
||||
key = message[:8]
|
||||
val = message[9:]
|
||||
if val:
|
||||
# action = (
|
||||
# "Updates will be sent every {} minutes."
|
||||
# ).format(action)
|
||||
await datahandler.initdb(
|
||||
jid,
|
||||
sqlitehandler.set_settings_value,
|
||||
[key, val]
|
||||
)
|
||||
await self.refresh_task(
|
||||
jid,
|
||||
self.send_update,
|
||||
key,
|
||||
val
|
||||
)
|
||||
action = (
|
||||
"Updates will be sent every {} minutes."
|
||||
).format(val)
|
||||
else:
|
||||
action = "Missing value."
|
||||
case _ if message_lowercase.startswith("next"):
|
||||
num = message[5:]
|
||||
await self.send_update(jid, num)
|
||||
await self.send_status(jid)
|
||||
# await self.refresh_task(jid, key, val)
|
||||
case _ if message_lowercase.startswith("quantum"):
|
||||
key = message[:7]
|
||||
val = message[8:]
|
||||
if val:
|
||||
# action = (
|
||||
# "Every update will contain {} news items."
|
||||
# ).format(action)
|
||||
await datahandler.initdb(
|
||||
jid,
|
||||
sqlitehandler.set_settings_value,
|
||||
[key, val]
|
||||
)
|
||||
action = (
|
||||
"Next update will contain {} news items."
|
||||
).format(val)
|
||||
else:
|
||||
action = "Missing value."
|
||||
case _ if message_lowercase.startswith("random"):
|
||||
action = "Updates will be sent randomly."
|
||||
case _ if message_lowercase.startswith("recent"):
|
||||
num = message[7:]
|
||||
if num:
|
||||
action = await datahandler.initdb(
|
||||
jid,
|
||||
sqlitehandler.last_entries,
|
||||
num
|
||||
)
|
||||
else:
|
||||
action = "Missing value."
|
||||
case _ if message_lowercase.startswith("remove"):
|
||||
ix = message[7:]
|
||||
if ix:
|
||||
action = await datahandler.initdb(
|
||||
jid,
|
||||
sqlitehandler.remove_feed,
|
||||
ix
|
||||
)
|
||||
await self.send_status(jid)
|
||||
else:
|
||||
action = "Missing feed ID."
|
||||
case _ if message_lowercase.startswith("search"):
|
||||
query = message[7:]
|
||||
if query:
|
||||
if len(query) > 1:
|
||||
action = await datahandler.initdb(
|
||||
jid,
|
||||
sqlitehandler.search_entries,
|
||||
query
|
||||
)
|
||||
else:
|
||||
action = (
|
||||
"Enter at least 2 characters to search"
|
||||
)
|
||||
else:
|
||||
action = "Missing search query."
|
||||
case "start":
|
||||
# action = "Updates are enabled."
|
||||
key = "enabled"
|
||||
val = 1
|
||||
await datahandler.initdb(
|
||||
jid,
|
||||
sqlitehandler.set_settings_value,
|
||||
[key, val]
|
||||
)
|
||||
asyncio.create_task(self.task_jid(jid))
|
||||
action = "Updates are enabled."
|
||||
# print(await datetimehandler.current_time(), "task_manager[jid]")
|
||||
# print(task_manager[jid])
|
||||
case "stats":
|
||||
action = await datahandler.initdb(
|
||||
jid,
|
||||
sqlitehandler.statistics
|
||||
)
|
||||
case _ if message_lowercase.startswith("status "):
|
||||
ix = message[7:]
|
||||
action = await datahandler.initdb(
|
||||
jid,
|
||||
sqlitehandler.toggle_status,
|
||||
ix
|
||||
)
|
||||
case "stop":
|
||||
# FIXME
|
||||
# The following error occurs only upon first attempt to stop.
|
||||
# /usr/lib/python3.11/asyncio/events.py:73: RuntimeWarning: coroutine 'Slixfeed.send_update' was never awaited
|
||||
# self._args = None
|
||||
# RuntimeWarning: Enable tracemalloc to get the object allocation traceback
|
||||
# action = "Updates are disabled."
|
||||
try:
|
||||
task_manager[jid]["check"].cancel()
|
||||
# task_manager[jid]["status"].cancel()
|
||||
task_manager[jid]["interval"].cancel()
|
||||
# try:
|
||||
# # task_manager[jid]["check"].cancel()
|
||||
# # task_manager[jid]["status"].cancel()
|
||||
# task_manager[jid]["interval"].cancel()
|
||||
# key = "enabled"
|
||||
# val = 0
|
||||
# action = await datahandler.initdb(
|
||||
# jid,
|
||||
# sqlitehandler.set_settings_value,
|
||||
# [key, val]
|
||||
# )
|
||||
# except:
|
||||
# action = "Updates are already disabled."
|
||||
# # print("Updates are already disabled. Nothing to do.")
|
||||
# # await self.send_status(jid)
|
||||
key = "enabled"
|
||||
val = 0
|
||||
actiona = await initdb(jid, sqlitehandler.set_settings_value, [key, val])
|
||||
await self.send_status(jid)
|
||||
print(print_time(), "task_manager[jid]")
|
||||
print(task_manager[jid])
|
||||
except:
|
||||
# action = "Updates are already disabled."
|
||||
await self.send_status(jid)
|
||||
else:
|
||||
action = "Unknown command. Press \"help\" for list of commands"
|
||||
await datahandler.initdb(
|
||||
jid,
|
||||
sqlitehandler.set_settings_value,
|
||||
[key, val]
|
||||
)
|
||||
await self.task_jid(jid)
|
||||
action = "Updates are disabled."
|
||||
case "support":
|
||||
# TODO Send an invitation.
|
||||
action = "xmpp:slixmpp@muc.poez.io?join"
|
||||
case _:
|
||||
action = (
|
||||
"Unknown command. "
|
||||
"Press \"help\" for list of commands"
|
||||
)
|
||||
if action: msg.reply(action).send()
|
||||
|
||||
print(print_time(), "COMMAND ACCOUNT")
|
||||
print("COMMAND:", message)
|
||||
print("ACCOUNT: " + str(msg["from"]))
|
||||
|
||||
|
||||
async def select_file(self):
|
||||
"""
|
||||
Initiate actions by JID (Jabber ID).
|
||||
|
||||
:param self: Self
|
||||
Parameters
|
||||
----------
|
||||
self : ?
|
||||
Self.
|
||||
"""
|
||||
while True:
|
||||
db_dir = confighandler.get_default_dbdir()
|
||||
if not os.path.isdir(db_dir):
|
||||
msg = ("Slixfeed can not work without a database. \n"
|
||||
"To create a database, follow these steps: \n"
|
||||
"Add Slixfeed contact to your roster \n"
|
||||
"Send a feed to the bot by: \n"
|
||||
"add https://reclaimthenet.org/feed/")
|
||||
print(print_time(), msg)
|
||||
msg = (
|
||||
"Slixfeed can not work without a database.\n"
|
||||
"To create a database, follow these steps:\n"
|
||||
"Add Slixfeed contact to your roster.\n"
|
||||
"Send a feed to the bot by URL:\n"
|
||||
"https://reclaimthenet.org/feed/"
|
||||
)
|
||||
# print(await datetimehandler.current_time(), msg)
|
||||
print(msg)
|
||||
else:
|
||||
os.chdir(db_dir)
|
||||
|
@ -191,114 +409,165 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
# jid_tasker[jid] = asyncio.create_task(self.task_jid(jid))
|
||||
# await jid_tasker[jid]
|
||||
async with asyncio.TaskGroup() as tg:
|
||||
print("main task")
|
||||
print(print_time(), "repr(tg)")
|
||||
print(repr(tg)) # <TaskGroup entered>
|
||||
for file in files:
|
||||
if file.endswith(".db") and not file.endswith(".db-jour.db"):
|
||||
jid = file[:-3]
|
||||
tg.create_task(self.task_jid(jid))
|
||||
main_task.extend([tg.create_task(self.task_jid(jid))])
|
||||
# main_task = [tg.create_task(self.task_jid(jid))]
|
||||
# task_manager.update({jid: tg})
|
||||
# print(task_manager) # {}
|
||||
print(print_time(), "repr(tg) id(tg)")
|
||||
print(jid, repr(tg)) # sch@pimux.de <TaskGroup tasks=1 entered>
|
||||
print(jid, id(tg)) # sch@pimux.de 139879835500624
|
||||
# <xmpphandler.Slixfeed object at 0x7f24922124d0> <TaskGroup tasks=2 entered>
|
||||
# <xmpphandler.Slixfeed object at 0x7f24922124d0> 139879835500624
|
||||
|
||||
|
||||
async def task_jid(self, jid):
|
||||
"""
|
||||
JID (Jabber ID) task manager.
|
||||
|
||||
:param self: Self
|
||||
:param jid: Jabber ID
|
||||
Parameters
|
||||
----------
|
||||
self : ?
|
||||
Self.
|
||||
jid : str
|
||||
Jabber ID.
|
||||
"""
|
||||
enabled = await initdb(
|
||||
enabled = await datahandler.initdb(
|
||||
jid,
|
||||
sqlitehandler.get_settings_value,
|
||||
"enabled"
|
||||
)
|
||||
print(print_time(), "enabled", enabled, jid)
|
||||
# print(await datetimehandler.current_time(), "enabled", enabled, jid)
|
||||
if enabled:
|
||||
print("sub task")
|
||||
print(print_time(), "repr(self) id(self)")
|
||||
print(repr(self))
|
||||
print(id(self))
|
||||
task_manager[jid] = {}
|
||||
task_manager[jid]["check"] = asyncio.create_task(check_updates(jid))
|
||||
task_manager[jid]["status"] = asyncio.create_task(self.send_status(jid))
|
||||
task_manager[jid]["interval"] = asyncio.create_task(self.send_update(jid))
|
||||
task_manager[jid]["check"] = asyncio.create_task(
|
||||
check_updates(jid)
|
||||
)
|
||||
task_manager[jid]["status"] = asyncio.create_task(
|
||||
self.send_status(jid)
|
||||
)
|
||||
task_manager[jid]["interval"] = asyncio.create_task(
|
||||
self.send_update(jid)
|
||||
)
|
||||
await task_manager[jid]["check"]
|
||||
await task_manager[jid]["status"]
|
||||
await task_manager[jid]["interval"]
|
||||
print(print_time(), "task_manager[jid].items()")
|
||||
print(task_manager[jid].items())
|
||||
print(print_time(), "task_manager[jid]")
|
||||
print(task_manager[jid])
|
||||
print(print_time(), "task_manager")
|
||||
print(task_manager)
|
||||
else:
|
||||
# FIXME
|
||||
# The following error occurs only upon first attempt to stop.
|
||||
# /usr/lib/python3.11/asyncio/events.py:73: RuntimeWarning: coroutine 'Slixfeed.send_update' was never awaited
|
||||
# self._args = None
|
||||
# RuntimeWarning: Enable tracemalloc to get the object allocation traceback
|
||||
try:
|
||||
task_manager[jid]["interval"].cancel()
|
||||
except:
|
||||
None
|
||||
await self.send_status(jid)
|
||||
|
||||
async def send_update(self, jid):
|
||||
|
||||
async def send_update(self, jid, num=None):
|
||||
"""
|
||||
Send news items as messages.
|
||||
|
||||
:param self: Self
|
||||
:param jid: Jabber ID
|
||||
Parameters
|
||||
----------
|
||||
self : ?
|
||||
Self.
|
||||
jid : str
|
||||
Jabber ID.
|
||||
num : str, optional
|
||||
Number. The default is None.
|
||||
"""
|
||||
new = await initdb(
|
||||
# print("Starting send_update()")
|
||||
# print(jid)
|
||||
new = await datahandler.initdb(
|
||||
jid,
|
||||
sqlitehandler.get_entry_unread
|
||||
sqlitehandler.get_entry_unread,
|
||||
num
|
||||
)
|
||||
if new:
|
||||
print(print_time(), "> SEND UPDATE",jid)
|
||||
print(await datetimehandler.current_time(), "> SEND UPDATE",jid)
|
||||
self.send_message(
|
||||
mto=jid,
|
||||
mbody=new,
|
||||
mtype="chat"
|
||||
)
|
||||
interval = await initdb(
|
||||
await self.refresh_task(
|
||||
jid,
|
||||
sqlitehandler.get_settings_value,
|
||||
self.send_update,
|
||||
"interval"
|
||||
)
|
||||
)
|
||||
# interval = await datahandler.initdb(
|
||||
# jid,
|
||||
# sqlitehandler.get_settings_value,
|
||||
# "interval"
|
||||
# )
|
||||
# task_manager[jid]["interval"] = loop.call_at(
|
||||
# loop.time() + 60 * interval,
|
||||
# loop.create_task,
|
||||
# self.send_update(jid)
|
||||
# )
|
||||
|
||||
# print(await datetimehandler.current_time(), "asyncio.get_event_loop().time()")
|
||||
# print(await datetimehandler.current_time(), asyncio.get_event_loop().time())
|
||||
# await asyncio.sleep(60 * interval)
|
||||
self.loop.call_at(
|
||||
self.loop.time() + 60 * interval,
|
||||
self.loop.create_task,
|
||||
self.send_update(jid)
|
||||
)
|
||||
|
||||
# loop.call_later(
|
||||
# 60 * interval,
|
||||
# loop.create_task,
|
||||
# self.send_update(jid)
|
||||
# )
|
||||
|
||||
# print
|
||||
# await handle_event()
|
||||
|
||||
|
||||
async def send_status(self, jid):
|
||||
"""
|
||||
Send status message.
|
||||
|
||||
:param self: Self
|
||||
:param jid: Jabber ID
|
||||
Parameters
|
||||
----------
|
||||
self : ?
|
||||
Self.
|
||||
jid : str
|
||||
Jabber ID.
|
||||
"""
|
||||
print(print_time(), "> SEND STATUS",jid)
|
||||
unread = await initdb(
|
||||
jid,
|
||||
sqlitehandler.get_number_of_entries_unread
|
||||
)
|
||||
|
||||
if unread:
|
||||
status_text = "📰 News items: {}".format(str(unread))
|
||||
status_mode = "chat"
|
||||
else:
|
||||
status_text = "🗞 No News"
|
||||
status_mode = "available"
|
||||
|
||||
enabled = await initdb(
|
||||
print(await datetimehandler.current_time(), "> SEND STATUS",jid)
|
||||
enabled = await datahandler.initdb(
|
||||
jid,
|
||||
sqlitehandler.get_settings_value,
|
||||
"enabled"
|
||||
)
|
||||
|
||||
if not enabled:
|
||||
status_mode = "xa"
|
||||
status_text = "Send \"Start\" to receive news."
|
||||
else:
|
||||
feeds = await datahandler.initdb(
|
||||
jid,
|
||||
sqlitehandler.get_number_of_items,
|
||||
"feeds"
|
||||
)
|
||||
if not feeds:
|
||||
status_mode = "available"
|
||||
status_text = (
|
||||
"📂️ Send a URL from a blog or a news website."
|
||||
)
|
||||
else:
|
||||
unread = await datahandler.initdb(
|
||||
jid,
|
||||
sqlitehandler.get_number_of_entries_unread
|
||||
)
|
||||
if unread:
|
||||
status_mode = "chat"
|
||||
status_text = (
|
||||
"📰 You have {} news items to read."
|
||||
).format(str(unread))
|
||||
# status_text = (
|
||||
# "📰 News items: {}"
|
||||
# ).format(str(unread))
|
||||
# status_text = (
|
||||
# "📰 You have {} news items"
|
||||
# ).format(str(unread))
|
||||
else:
|
||||
status_mode = "available"
|
||||
status_text = "🗞 No news"
|
||||
|
||||
# print(status_text, "for", jid)
|
||||
self.send_presence(
|
||||
|
@ -306,37 +575,55 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
pstatus=status_text,
|
||||
pto=jid,
|
||||
#pfrom=None
|
||||
)
|
||||
|
||||
await asyncio.sleep(60 * 20)
|
||||
|
||||
# self.loop.call_at(
|
||||
# self.loop.time() + 60 * 20,
|
||||
# self.loop.create_task,
|
||||
)
|
||||
# await asyncio.sleep(60 * 20)
|
||||
await self.refresh_task(
|
||||
jid,
|
||||
self.send_status,
|
||||
"status",
|
||||
"20"
|
||||
)
|
||||
# loop.call_at(
|
||||
# loop.time() + 60 * 20,
|
||||
# loop.create_task,
|
||||
# self.send_status(jid)
|
||||
# )
|
||||
|
||||
|
||||
async def refresh_task(self, jid, key, val):
|
||||
async def refresh_task(self, jid, callback, key, val=None):
|
||||
"""
|
||||
Apply settings on runtime.
|
||||
Apply new setting at runtime.
|
||||
|
||||
:param self: Self
|
||||
:param jid: Jabber ID
|
||||
:param key: Key
|
||||
:param val: Value
|
||||
Parameters
|
||||
----------
|
||||
self : ?
|
||||
Self.
|
||||
jid : str
|
||||
Jabber ID.
|
||||
key : str
|
||||
Key.
|
||||
val : str, optional
|
||||
Value. The default is None.
|
||||
"""
|
||||
if not val:
|
||||
val = await datahandler.initdb(
|
||||
jid,
|
||||
sqlitehandler.get_settings_value,
|
||||
key
|
||||
)
|
||||
if jid in task_manager:
|
||||
task_manager[jid][key].cancel()
|
||||
loop = asyncio.get_event_loop()
|
||||
print(print_time(), "loop")
|
||||
print(loop)
|
||||
print(print_time(), "loop")
|
||||
task_manager[jid][key] = loop.call_at(
|
||||
loop.time() + 60 * float(val),
|
||||
loop.create_task,
|
||||
self.send_update(jid)
|
||||
callback(jid)
|
||||
# self.send_update(jid)
|
||||
)
|
||||
# task_manager[jid][key] = loop.call_later(
|
||||
# 60 * float(val),
|
||||
# loop.create_task,
|
||||
# self.send_update(jid)
|
||||
# )
|
||||
# task_manager[jid][key] = self.send_update.loop.call_at(
|
||||
# self.send_update.loop.time() + 60 * val,
|
||||
# self.send_update.loop.create_task,
|
||||
|
@ -350,16 +637,19 @@ async def check_updates(jid):
|
|||
"""
|
||||
Start calling for update check up.
|
||||
|
||||
:param jid: Jabber ID
|
||||
Parameters
|
||||
----------
|
||||
jid : str
|
||||
Jabber ID.
|
||||
"""
|
||||
while True:
|
||||
print(print_time(), "> CHCK UPDATE",jid)
|
||||
await initdb(jid, datahandler.download_updates)
|
||||
print(await datetimehandler.current_time(), "> CHCK UPDATE",jid)
|
||||
await datahandler.initdb(jid, datahandler.download_updates)
|
||||
await asyncio.sleep(60 * 90)
|
||||
# Schedule to call this function again in 90 minutes
|
||||
# self.loop.call_at(
|
||||
# self.loop.time() + 60 * 90,
|
||||
# self.loop.create_task,
|
||||
# loop.call_at(
|
||||
# loop.time() + 60 * 90,
|
||||
# loop.create_task,
|
||||
# self.check_updates(jid)
|
||||
# )
|
||||
|
||||
|
@ -367,84 +657,123 @@ async def check_updates(jid):
|
|||
def print_help():
|
||||
"""
|
||||
Print help manual.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Message.
|
||||
"""
|
||||
msg = ("Slixfeed - News syndication bot for Jabber/XMPP \n"
|
||||
"\n"
|
||||
"DESCRIPTION: \n"
|
||||
" Slixfeed is a news aggregator bot for online news feeds. \n"
|
||||
" Supported filetypes: Atom, RDF and RSS. \n"
|
||||
"\n"
|
||||
"BASIC USAGE: \n"
|
||||
" start \n"
|
||||
" Enable bot and send updates. \n"
|
||||
" Stop \n"
|
||||
" Disable bot and stop updates. \n"
|
||||
" batch N \n"
|
||||
" Send N updates for each interval. \n"
|
||||
" interval N \n"
|
||||
" Send an update every N minutes. \n"
|
||||
" feed list \n"
|
||||
" List subscriptions. \n"
|
||||
"\n"
|
||||
"EDIT OPTIONS: \n"
|
||||
" add URL \n"
|
||||
" Add URL to subscription list. \n"
|
||||
" remove ID \n"
|
||||
" Remove feed from subscription list. \n"
|
||||
" status ID \n"
|
||||
" Toggle update status of feed. \n"
|
||||
"\n"
|
||||
"SEARCH OPTIONS: \n"
|
||||
" search TEXT \n"
|
||||
" Search news items by given keywords. \n"
|
||||
" recent N \n"
|
||||
" List recent N news items (up to 50 items). \n"
|
||||
"\n"
|
||||
"STATISTICS OPTIONS: \n"
|
||||
" analyses \n"
|
||||
" Show report and statistics of feeds. \n"
|
||||
" obsolete \n"
|
||||
" List feeds that are not available. \n"
|
||||
" unread \n"
|
||||
" Print number of unread news items. \n"
|
||||
"\n"
|
||||
"BACKUP OPTIONS: \n"
|
||||
" export opml \n"
|
||||
" Send an OPML file with your feeds. \n"
|
||||
" backup news html\n"
|
||||
" Send an HTML formatted file of your news items. \n"
|
||||
" backup news md \n"
|
||||
" Send a Markdown file of your news items. \n"
|
||||
" backup news text \n"
|
||||
" Send a Plain Text file of your news items. \n"
|
||||
"\n"
|
||||
"DOCUMENTATION: \n"
|
||||
" Slixfeed \n"
|
||||
" https://gitgud.io/sjehuda/slixfeed \n"
|
||||
" Slixmpp \n"
|
||||
" https://slixmpp.readthedocs.io/ \n"
|
||||
" feedparser \n"
|
||||
" https://pythonhosted.org/feedparser")
|
||||
msg = (
|
||||
"```\n"
|
||||
"NAME\n"
|
||||
"Slixfeed - News syndication bot for Jabber/XMPP\n"
|
||||
"\n"
|
||||
"DESCRIPTION\n"
|
||||
" Slixfeed is a news aggregator bot for online news feeds.\n"
|
||||
" This program is primarily designed for XMPP.\n"
|
||||
" For more information, visit https://xmpp.org/software/\n"
|
||||
"\n"
|
||||
"BASIC USAGE\n"
|
||||
" start\n"
|
||||
" Enable bot and send updates.\n"
|
||||
" stop\n"
|
||||
" Disable bot and stop updates.\n"
|
||||
" feeds\n"
|
||||
" List subscriptions.\n"
|
||||
" interval N\n"
|
||||
" Set interval update to every N minutes.\n"
|
||||
" next N\n"
|
||||
" Send N next updates.\n"
|
||||
" quantum N\n"
|
||||
" Set N updates for each interval.\n"
|
||||
"\n"
|
||||
"FILTER OPTIONS\n"
|
||||
" allow\n"
|
||||
" Keywords to allow (comma separates).\n"
|
||||
" deny\n"
|
||||
" Keywords to block (comma separates).\n"
|
||||
# " filter clear allow\n"
|
||||
# " Reset allow list.\n"
|
||||
# " filter clear deny\n"
|
||||
# " Reset deny list.\n"
|
||||
"\n"
|
||||
"EDIT OPTIONS\n"
|
||||
" URL\n"
|
||||
" Add URL to subscription list.\n"
|
||||
" add URL TITLE\n"
|
||||
" Add URL to subscription list (without validity check).\n"
|
||||
" remove ID\n"
|
||||
" Remove feed from subscription list.\n"
|
||||
" status ID\n"
|
||||
" Toggle update status of feed.\n"
|
||||
"\n"
|
||||
"SEARCH OPTIONS\n"
|
||||
" feeds TEXT\n"
|
||||
" Search subscriptions by given keywords.\n"
|
||||
" search TEXT\n"
|
||||
" Search news items by given keywords.\n"
|
||||
" recent N\n"
|
||||
" List recent N news items (up to 50 items).\n"
|
||||
"\n"
|
||||
# "STATISTICS OPTIONS\n"
|
||||
# " analyses\n"
|
||||
# " Show report and statistics of feeds.\n"
|
||||
# " obsolete\n"
|
||||
# " List feeds that are not available.\n"
|
||||
# " unread\n"
|
||||
# " Print number of unread news items.\n"
|
||||
# "\n"
|
||||
# "BACKUP OPTIONS\n"
|
||||
# " export opml\n"
|
||||
# " Send an OPML file with your feeds.\n"
|
||||
# " backup news html\n"
|
||||
# " Send an HTML formatted file of your news items.\n"
|
||||
# " backup news md\n"
|
||||
# " Send a Markdown file of your news items.\n"
|
||||
# " backup news text\n"
|
||||
# " Send a Plain Text file of your news items.\n"
|
||||
# "\n"
|
||||
"SUPPORT\n"
|
||||
" support"
|
||||
" Join xmpp:slixmpp@muc.poez.io?join\n"
|
||||
"\n"
|
||||
# "PROTOCOLS\n"
|
||||
# " Supported prootcols are IRC, Matrix and XMPP.\n"
|
||||
# " For the best experience, we recommend you to use XMPP.\n"
|
||||
# "\n"
|
||||
"FILETYPES\n"
|
||||
" Supported filetypes are Atom, RDF and RSS.\n"
|
||||
"\n"
|
||||
"AUTHORS\n"
|
||||
" Laura Harbinger, Schimon Zackary.\n"
|
||||
"\n"
|
||||
"COPYRIGHT\n"
|
||||
" Slixfeed is free software; you can redistribute it and/or\n"
|
||||
" modify it under the terms of the GNU General Public License\n"
|
||||
" as published by the Free Software Foundation; version 3 only\n"
|
||||
"\n"
|
||||
" Slixfeed is distributed in the hope that it will be useful,\n"
|
||||
" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
|
||||
" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
|
||||
" GNU General Public License for more details.\n"
|
||||
"\n"
|
||||
"NOTE\n"
|
||||
" Make Slixfeed your own.\n"
|
||||
"\n"
|
||||
" You can run Slixfeed on your own computer, server, and\n"
|
||||
" even on a Linux phone (i.e. Droidian, Mobian NixOS,\n"
|
||||
" postmarketOS). You can also use Termux.\n"
|
||||
"\n"
|
||||
" All you need is one of the above and an XMPP account to\n"
|
||||
" connect Slixfeed to.\n"
|
||||
"\n"
|
||||
"DOCUMENTATION\n"
|
||||
" Slixfeed\n"
|
||||
" https://gitgud.io/sjehuda/slixfeed\n"
|
||||
" Slixmpp\n"
|
||||
" https://slixmpp.readthedocs.io/\n"
|
||||
" feedparser\n"
|
||||
" https://pythonhosted.org/feedparser\n"
|
||||
"\n```"
|
||||
)
|
||||
return msg
|
||||
|
||||
|
||||
# TODO Perhaps this needs to be executed
|
||||
# just once per program execution
|
||||
async def initdb(jid, callback, message=None):
|
||||
"""
|
||||
Callback function to instantiate action on database.
|
||||
|
||||
:param jid: JID (Jabber ID).
|
||||
:param callback: Function name.
|
||||
:param massage: Optional kwarg when a message is a part or required argument.
|
||||
"""
|
||||
db_dir = confighandler.get_default_dbdir()
|
||||
if not os.path.isdir(db_dir):
|
||||
os.mkdir(db_dir)
|
||||
db_file = os.path.join(db_dir, r"{}.db".format(jid))
|
||||
sqlitehandler.create_tables(db_file)
|
||||
# await sqlitehandler.set_default_values(db_file)
|
||||
if message:
|
||||
return await callback(db_file, message)
|
||||
else:
|
||||
return await callback(db_file)
|
||||
|
|
Loading…
Reference in a new issue