Do not send updates when updates are not enabled. Add a new module for handling URLs. Remove awaitables from functions of mere runtime actions.
This commit is contained in:
parent
d8203abb20
commit
d4b0d08480
7 changed files with 393 additions and 337 deletions
|
@ -25,8 +25,8 @@ async def get_value_default(key):
|
|||
Parameters
|
||||
----------
|
||||
key : str
|
||||
Key: archive, enabled, allow, deny,interval,
|
||||
length, old, quantum, random, replace.
|
||||
Key: archive, enabled, interval,
|
||||
length, old, quantum, random.
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
|
|
@ -23,6 +23,7 @@ from feedparser import parse
|
|||
from http.client import IncompleteRead
|
||||
from lxml import html
|
||||
from datetimehandler import now, rfc2822_to_iso8601
|
||||
from urlhandler import complete_url, join_url, trim_url
|
||||
from confighandler import get_list
|
||||
from listhandler import is_listed
|
||||
import sqlitehandler as sqlite
|
||||
|
@ -109,14 +110,14 @@ async def download_updates(db_file, url=None):
|
|||
# TODO Pass date too for comparion check
|
||||
if entry.has_key("published"):
|
||||
date = entry.published
|
||||
date = await rfc2822_to_iso8601(date)
|
||||
date = rfc2822_to_iso8601(date)
|
||||
elif entry.has_key("updated"):
|
||||
date = entry.updated
|
||||
date = await rfc2822_to_iso8601(date)
|
||||
date = rfc2822_to_iso8601(date)
|
||||
else:
|
||||
# TODO Just set date = "*** No date ***"
|
||||
# date = await datetime.now().isoformat()
|
||||
date = await now()
|
||||
date = now()
|
||||
# NOTE Would seconds result in better database performance
|
||||
# date = datetime.datetime(date)
|
||||
# date = (date-datetime.datetime(1970,1,1)).total_seconds()
|
||||
|
@ -128,8 +129,8 @@ async def download_updates(db_file, url=None):
|
|||
# title = feed["feed"]["title"]
|
||||
if entry.has_key("link"):
|
||||
# link = complete_url(source, entry.link)
|
||||
link = await join_url(source, entry.link)
|
||||
link = await trim_url(link)
|
||||
link = join_url(source, entry.link)
|
||||
link = trim_url(link)
|
||||
else:
|
||||
link = source
|
||||
if entry.has_key("id"):
|
||||
|
@ -208,9 +209,9 @@ async def download_updates(db_file, url=None):
|
|||
source,
|
||||
entry
|
||||
)
|
||||
# print(await current_time(), entry, title)
|
||||
# print(current_time(), entry, title)
|
||||
# else:
|
||||
# print(await current_time(), exist, title)
|
||||
# print(current_time(), exist, title)
|
||||
|
||||
|
||||
# NOTE Why (if result[0]) and (if result[1] == 200)?
|
||||
|
@ -256,7 +257,8 @@ async def view_feed(url):
|
|||
).format(url, e)
|
||||
# breakpoint()
|
||||
if result[1] == 200:
|
||||
title = await get_title(url, result[0])
|
||||
feed = parse(result[0])
|
||||
title = get_title(url, feed)
|
||||
entries = feed.entries
|
||||
msg = "Preview of {}:\n```\n".format(title)
|
||||
count = 0
|
||||
|
@ -268,16 +270,16 @@ async def view_feed(url):
|
|||
title = "*** No title ***"
|
||||
if entry.has_key("link"):
|
||||
# link = complete_url(source, entry.link)
|
||||
link = await join_url(url, entry.link)
|
||||
link = await trim_url(link)
|
||||
link = join_url(url, entry.link)
|
||||
link = trim_url(link)
|
||||
else:
|
||||
link = "*** No link ***"
|
||||
if entry.has_key("published"):
|
||||
date = entry.published
|
||||
date = await rfc2822_to_iso8601(date)
|
||||
date = rfc2822_to_iso8601(date)
|
||||
elif entry.has_key("updated"):
|
||||
date = entry.updated
|
||||
date = await rfc2822_to_iso8601(date)
|
||||
date = rfc2822_to_iso8601(date)
|
||||
else:
|
||||
date = "*** No date ***"
|
||||
msg += (
|
||||
|
@ -333,7 +335,7 @@ async def view_entry(url, num):
|
|||
# breakpoint()
|
||||
if result[1] == 200:
|
||||
feed = parse(result[0])
|
||||
title = await get_title(url, result[0])
|
||||
title = get_title(url, result[0])
|
||||
entries = feed.entries
|
||||
num = int(num) - 1
|
||||
entry = entries[num]
|
||||
|
@ -343,10 +345,10 @@ async def view_entry(url, num):
|
|||
title = "*** No title ***"
|
||||
if entry.has_key("published"):
|
||||
date = entry.published
|
||||
date = await rfc2822_to_iso8601(date)
|
||||
date = rfc2822_to_iso8601(date)
|
||||
elif entry.has_key("updated"):
|
||||
date = entry.updated
|
||||
date = await rfc2822_to_iso8601(date)
|
||||
date = rfc2822_to_iso8601(date)
|
||||
else:
|
||||
date = "*** No date ***"
|
||||
if entry.has_key("summary"):
|
||||
|
@ -359,8 +361,8 @@ async def view_entry(url, num):
|
|||
summary = "*** No summary ***"
|
||||
if entry.has_key("link"):
|
||||
# link = complete_url(source, entry.link)
|
||||
link = await join_url(url, entry.link)
|
||||
link = await trim_url(link)
|
||||
link = join_url(url, entry.link)
|
||||
link = trim_url(link)
|
||||
else:
|
||||
link = "*** No link ***"
|
||||
msg = (
|
||||
|
@ -402,7 +404,7 @@ async def add_feed_no_check(db_file, data):
|
|||
"""
|
||||
url = data[0]
|
||||
title = data[1]
|
||||
url = await trim_url(url)
|
||||
url = trim_url(url)
|
||||
exist = await sqlite.check_feed_exist(db_file, url)
|
||||
if not exist:
|
||||
msg = await sqlite.insert_feed(db_file, url, title)
|
||||
|
@ -435,13 +437,13 @@ async def add_feed(db_file, url):
|
|||
Status message.
|
||||
"""
|
||||
msg = None
|
||||
url = await trim_url(url)
|
||||
url = trim_url(url)
|
||||
exist = await sqlite.check_feed_exist(db_file, url)
|
||||
if not exist:
|
||||
res = await download_feed(url)
|
||||
if res[0]:
|
||||
feed = parse(res[0])
|
||||
title = await get_title(url, feed)
|
||||
title = get_title(url, feed)
|
||||
if feed.bozo:
|
||||
bozo = (
|
||||
"Bozo detected. Failed to load: {}."
|
||||
|
@ -570,7 +572,7 @@ async def download_feed(url):
|
|||
return msg
|
||||
|
||||
|
||||
async def get_title(url, feed):
|
||||
def get_title(url, feed):
|
||||
"""
|
||||
Get title of feed.
|
||||
|
||||
|
@ -593,160 +595,6 @@ async def get_title(url, feed):
|
|||
return title
|
||||
|
||||
|
||||
# NOTE Read the documentation
|
||||
# https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin
|
||||
def complete_url(source, link):
|
||||
"""
|
||||
Check if URL is pathname and complete it into URL.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source : str
|
||||
Feed URL.
|
||||
link : str
|
||||
Link URL or pathname.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
URL.
|
||||
"""
|
||||
if link.startswith("www."):
|
||||
return "http://" + link
|
||||
parted_link = urlsplit(link)
|
||||
parted_feed = urlsplit(source)
|
||||
if parted_link.scheme == "magnet" and parted_link.query:
|
||||
return link
|
||||
if parted_link.scheme and parted_link.netloc:
|
||||
return link
|
||||
if link.startswith("//"):
|
||||
if parted_link.netloc and parted_link.path:
|
||||
new_link = urlunsplit([
|
||||
parted_feed.scheme,
|
||||
parted_link.netloc,
|
||||
parted_link.path,
|
||||
parted_link.query,
|
||||
parted_link.fragment
|
||||
])
|
||||
elif link.startswith("/"):
|
||||
new_link = urlunsplit([
|
||||
parted_feed.scheme,
|
||||
parted_feed.netloc,
|
||||
parted_link.path,
|
||||
parted_link.query,
|
||||
parted_link.fragment
|
||||
])
|
||||
elif link.startswith("../"):
|
||||
pathlink = parted_link.path.split("/")
|
||||
pathfeed = parted_feed.path.split("/")
|
||||
for i in pathlink:
|
||||
if i == "..":
|
||||
if pathlink.index("..") == 0:
|
||||
pathfeed.pop()
|
||||
else:
|
||||
break
|
||||
while pathlink.count(".."):
|
||||
if pathlink.index("..") == 0:
|
||||
pathlink.remove("..")
|
||||
else:
|
||||
break
|
||||
pathlink = "/".join(pathlink)
|
||||
pathfeed.extend([pathlink])
|
||||
new_link = urlunsplit([
|
||||
parted_feed.scheme,
|
||||
parted_feed.netloc,
|
||||
"/".join(pathfeed),
|
||||
parted_link.query,
|
||||
parted_link.fragment
|
||||
])
|
||||
else:
|
||||
pathlink = parted_link.path.split("/")
|
||||
pathfeed = parted_feed.path.split("/")
|
||||
if link.startswith("./"):
|
||||
pathlink.remove(".")
|
||||
if not source.endswith("/"):
|
||||
pathfeed.pop()
|
||||
pathlink = "/".join(pathlink)
|
||||
pathfeed.extend([pathlink])
|
||||
new_link = urlunsplit([
|
||||
parted_feed.scheme,
|
||||
parted_feed.netloc,
|
||||
"/".join(pathfeed),
|
||||
parted_link.query,
|
||||
parted_link.fragment
|
||||
])
|
||||
return new_link
|
||||
|
||||
|
||||
"""
|
||||
TODO
|
||||
Feed https://www.ocaml.org/feed.xml
|
||||
Link %20https://frama-c.com/fc-versions/cobalt.html%20
|
||||
|
||||
FIXME
|
||||
Feed https://cyber.dabamos.de/blog/feed.rss
|
||||
Link https://cyber.dabamos.de/blog/#article-2022-07-15
|
||||
"""
|
||||
async def join_url(source, link):
|
||||
"""
|
||||
Join base URL with given pathname.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source : str
|
||||
Feed URL.
|
||||
link : str
|
||||
Link URL or pathname.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
URL.
|
||||
"""
|
||||
if link.startswith("www."):
|
||||
new_link = "http://" + link
|
||||
elif link.startswith("%20") and link.endswith("%20"):
|
||||
old_link = link.split("%20")
|
||||
del old_link[0]
|
||||
old_link.pop()
|
||||
new_link = "".join(old_link)
|
||||
else:
|
||||
new_link = urljoin(source, link)
|
||||
return new_link
|
||||
|
||||
|
||||
async def trim_url(url):
|
||||
"""
|
||||
Check URL pathname for double slash.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL.
|
||||
|
||||
Returns
|
||||
-------
|
||||
url : str
|
||||
URL.
|
||||
"""
|
||||
parted_url = urlsplit(url)
|
||||
protocol = parted_url.scheme
|
||||
hostname = parted_url.netloc
|
||||
pathname = parted_url.path
|
||||
queries = parted_url.query
|
||||
fragment = parted_url.fragment
|
||||
while "//" in pathname:
|
||||
pathname = pathname.replace("//", "/")
|
||||
url = urlunsplit([
|
||||
protocol,
|
||||
hostname,
|
||||
pathname,
|
||||
queries,
|
||||
fragment
|
||||
])
|
||||
return url
|
||||
|
||||
|
||||
# TODO Improve scan by gradual decreasing of path
|
||||
async def feed_mode_request(url, tree):
|
||||
"""
|
||||
|
@ -993,7 +841,7 @@ async def feed_mode_auto_discovery(url, tree):
|
|||
# title = disco["feed"]["title"]
|
||||
# msg += "{} \n {} \n\n".format(title, feed)
|
||||
feed_name = feed.xpath('@title')[0]
|
||||
feed_addr = await join_url(url, feed.xpath('@href')[0])
|
||||
feed_addr = join_url(url, feed.xpath('@href')[0])
|
||||
# if feed_addr.startswith("/"):
|
||||
# feed_addr = url + feed_addr
|
||||
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
|
||||
|
@ -1002,76 +850,5 @@ async def feed_mode_auto_discovery(url, tree):
|
|||
).format(url)
|
||||
return msg
|
||||
elif feeds:
|
||||
feed_addr = await join_url(url, feeds[0].xpath('@href')[0])
|
||||
feed_addr = join_url(url, feeds[0].xpath('@href')[0])
|
||||
return [feed_addr]
|
||||
|
||||
|
||||
async def feed_to_http(url):
|
||||
"""
|
||||
Replace scheme FEED by HTTP.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL.
|
||||
|
||||
Returns
|
||||
-------
|
||||
new_url : str
|
||||
URL.
|
||||
"""
|
||||
par_url = urlsplit(url)
|
||||
new_url = urlunsplit([
|
||||
"http",
|
||||
par_url.netloc,
|
||||
par_url.path,
|
||||
par_url.query,
|
||||
par_url.fragment
|
||||
])
|
||||
return new_url
|
||||
|
||||
|
||||
"""TODO"""
|
||||
async def activitypub_to_http(namespace):
|
||||
"""
|
||||
Replace ActivityPub namespace by HTTP.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace : str
|
||||
Namespace.
|
||||
|
||||
Returns
|
||||
-------
|
||||
new_url : str
|
||||
URL.
|
||||
"""
|
||||
par_url = urlsplit(namespace)
|
||||
new_url = urlunsplit([
|
||||
"http",
|
||||
par_url.netloc,
|
||||
par_url.path,
|
||||
par_url.query,
|
||||
par_url.fragment
|
||||
])
|
||||
return new_url
|
||||
|
||||
|
||||
async def check_xmpp_uri(uri):
|
||||
"""
|
||||
Check validity of XMPP URI.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
uri : str
|
||||
URI.
|
||||
|
||||
Returns
|
||||
-------
|
||||
jid : str
|
||||
JID or None.
|
||||
"""
|
||||
jid = urlsplit(uri).path
|
||||
if parseaddr(jid)[1] != jid:
|
||||
jid = False
|
||||
return jid
|
||||
|
|
|
@ -9,7 +9,7 @@ from datetime import datetime
|
|||
from dateutil.parser import parse
|
||||
from email.utils import parsedate, parsedate_to_datetime
|
||||
|
||||
async def now():
|
||||
def now():
|
||||
"""
|
||||
ISO 8601 Timestamp.
|
||||
|
||||
|
@ -22,7 +22,7 @@ async def now():
|
|||
return date
|
||||
|
||||
|
||||
async def current_time():
|
||||
def current_time():
|
||||
"""
|
||||
Print HH:MM:SS timestamp.
|
||||
|
||||
|
@ -36,7 +36,7 @@ async def current_time():
|
|||
return time
|
||||
|
||||
|
||||
async def validate(date):
|
||||
def validate(date):
|
||||
"""
|
||||
Validate date format.
|
||||
|
||||
|
@ -57,7 +57,7 @@ async def validate(date):
|
|||
return date
|
||||
|
||||
|
||||
async def rfc2822_to_iso8601(date):
|
||||
def rfc2822_to_iso8601(date):
|
||||
"""
|
||||
Convert RFC 2822 into ISO 8601.
|
||||
|
||||
|
|
|
@ -896,12 +896,12 @@ async def add_entry(cur, entry):
|
|||
try:
|
||||
cur.execute(sql, entry)
|
||||
except:
|
||||
print(await current_time(), "COROUTINE OBJECT NOW")
|
||||
print(current_time(), "COROUTINE OBJECT NOW")
|
||||
print(entry[6])
|
||||
print(type(entry[6]))
|
||||
print(entry)
|
||||
print(type(entry))
|
||||
print(await current_time(), "COROUTINE OBJECT NOW")
|
||||
print(current_time(), "COROUTINE OBJECT NOW")
|
||||
# breakpoint()
|
||||
|
||||
|
||||
|
@ -990,14 +990,14 @@ async def remove_nonexistent_entries(db_file, feed, source):
|
|||
else:
|
||||
title = feed["feed"]["title"]
|
||||
if entry.has_key("link"):
|
||||
link = await datahandler.join_url(source, entry.link)
|
||||
link = datahandler.join_url(source, entry.link)
|
||||
else:
|
||||
link = source
|
||||
if entry.has_key("published") and item[4]:
|
||||
# print("compare11:", title, link, time)
|
||||
# print("compare22:", item[1], item[2], item[4])
|
||||
# print("============")
|
||||
time = await rfc2822_to_iso8601(entry.published)
|
||||
time = rfc2822_to_iso8601(entry.published)
|
||||
if (item[1] == title and
|
||||
item[2] == link and
|
||||
item[4] == time):
|
||||
|
@ -1413,8 +1413,8 @@ async def check_entry_exist(db_file, source, eid=None,
|
|||
"timestamp": date
|
||||
}).fetchone()
|
||||
except:
|
||||
print(await current_time(), "ERROR DATE: source =", source)
|
||||
print(await current_time(), "ERROR DATE: date =", date)
|
||||
print(current_time(), "ERROR DATE: source =", source)
|
||||
print(current_time(), "ERROR DATE: date =", date)
|
||||
else:
|
||||
sql = (
|
||||
"SELECT id "
|
||||
|
@ -1431,7 +1431,7 @@ async def check_entry_exist(db_file, source, eid=None,
|
|||
else:
|
||||
None
|
||||
except:
|
||||
print(await current_time(), "ERROR DATE: result =", source)
|
||||
print(current_time(), "ERROR DATE: result =", source)
|
||||
|
||||
|
||||
async def set_settings_value(db_file, key_value):
|
||||
|
@ -1444,8 +1444,7 @@ async def set_settings_value(db_file, key_value):
|
|||
Path to database file.
|
||||
key_value : list
|
||||
key : str
|
||||
enabled, filter-allow, filter-deny,
|
||||
interval, masters, quantum, random.
|
||||
enabled, interval, masters, quantum, random.
|
||||
value : int
|
||||
Numeric value.
|
||||
"""
|
||||
|
|
|
@ -77,6 +77,7 @@ await taskhandler.start_tasks(
|
|||
|
||||
"""
|
||||
async def start_tasks_xmpp(self, jid, tasks):
|
||||
print("start_tasks_xmpp", jid, tasks)
|
||||
task_manager[jid] = {}
|
||||
for task in tasks:
|
||||
# print("task:", task)
|
||||
|
@ -107,6 +108,7 @@ async def start_tasks_xmpp(self, jid, tasks):
|
|||
# await task
|
||||
|
||||
async def clean_tasks_xmpp(jid, tasks):
|
||||
print("clean_tasks_xmpp", jid, tasks)
|
||||
for task in tasks:
|
||||
# if task_manager[jid][task]:
|
||||
try:
|
||||
|
@ -129,6 +131,7 @@ Consider callback e.g. Slixfeed.send_status.
|
|||
Or taskhandler for each protocol or specific taskhandler function.
|
||||
"""
|
||||
async def task_jid(self, jid):
|
||||
print("task_jid", jid)
|
||||
"""
|
||||
JID (Jabber ID) task manager.
|
||||
|
||||
|
@ -183,6 +186,7 @@ async def task_jid(self, jid):
|
|||
|
||||
|
||||
async def send_update(self, jid, num=None):
|
||||
print("send_update", jid)
|
||||
# print(await current_time(), jid, "def send_update")
|
||||
"""
|
||||
Send news items as messages.
|
||||
|
@ -196,6 +200,12 @@ async def send_update(self, jid, num=None):
|
|||
"""
|
||||
# print("Starting send_update()")
|
||||
# print(jid)
|
||||
enabled = await initdb(
|
||||
jid,
|
||||
get_settings_value,
|
||||
"enabled"
|
||||
)
|
||||
if enabled:
|
||||
new = await initdb(
|
||||
jid,
|
||||
get_entry_unread,
|
||||
|
@ -204,10 +214,9 @@ async def send_update(self, jid, num=None):
|
|||
if new:
|
||||
# TODO Add while loop to assure delivery.
|
||||
# print(await current_time(), ">>> ACT send_message",jid)
|
||||
if await xmpphandler.Slixfeed.is_muc(self, jid):
|
||||
chat_type = "groupchat"
|
||||
else:
|
||||
chat_type = "chat"
|
||||
chat_type = await xmpphandler.Slixfeed.is_muc(self, jid)
|
||||
# NOTE Do we need "if statement"? See NOTE at is_muc.
|
||||
if chat_type in ("chat", "groupchat"):
|
||||
xmpphandler.Slixfeed.send_message(
|
||||
self,
|
||||
mto=jid,
|
||||
|
@ -248,6 +257,7 @@ async def send_update(self, jid, num=None):
|
|||
|
||||
|
||||
async def send_status(self, jid):
|
||||
print("send_status", jid)
|
||||
# print(await current_time(), jid, "def send_status")
|
||||
"""
|
||||
Send status message.
|
||||
|
@ -325,6 +335,7 @@ async def send_status(self, jid):
|
|||
|
||||
|
||||
async def refresh_task(self, jid, callback, key, val=None):
|
||||
print("refresh_task", jid, key)
|
||||
"""
|
||||
Apply new setting at runtime.
|
||||
|
||||
|
@ -370,6 +381,7 @@ async def refresh_task(self, jid, callback, key, val=None):
|
|||
# TODO Take this function out of
|
||||
# <class 'slixmpp.clientxmpp.ClientXMPP'>
|
||||
async def check_updates(jid):
|
||||
print("check_updates", jid)
|
||||
# print(await current_time(), jid, "def check_updates")
|
||||
"""
|
||||
Start calling for update check up.
|
||||
|
|
228
slixfeed/urlhandler.py
Normal file
228
slixfeed/urlhandler.py
Normal file
|
@ -0,0 +1,228 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
|
||||
TODO
|
||||
|
||||
1) ActivityPub URL revealer activitypub_to_http
|
||||
|
||||
"""
|
||||
|
||||
from email.utils import parseaddr
|
||||
from urllib.parse import urljoin, urlsplit, urlunsplit
|
||||
|
||||
|
||||
def feed_to_http(url):
|
||||
"""
|
||||
Replace scheme FEED by HTTP.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL.
|
||||
|
||||
Returns
|
||||
-------
|
||||
new_url : str
|
||||
URL.
|
||||
"""
|
||||
par_url = urlsplit(url)
|
||||
new_url = urlunsplit([
|
||||
"http",
|
||||
par_url.netloc,
|
||||
par_url.path,
|
||||
par_url.query,
|
||||
par_url.fragment
|
||||
])
|
||||
return new_url
|
||||
|
||||
|
||||
def activitypub_to_http(namespace):
|
||||
"""
|
||||
Replace ActivityPub namespace by HTTP.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace : str
|
||||
Namespace.
|
||||
|
||||
Returns
|
||||
-------
|
||||
new_url : str
|
||||
URL.
|
||||
"""
|
||||
|
||||
|
||||
def check_xmpp_uri(uri):
|
||||
"""
|
||||
Check validity of XMPP URI.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
uri : str
|
||||
URI.
|
||||
|
||||
Returns
|
||||
-------
|
||||
jid : str
|
||||
JID or None.
|
||||
"""
|
||||
jid = urlsplit(uri).path
|
||||
if parseaddr(jid)[1] != jid:
|
||||
jid = False
|
||||
return jid
|
||||
|
||||
|
||||
# NOTE Read the documentation
|
||||
# https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin
|
||||
def complete_url(source, link):
|
||||
"""
|
||||
Check if URL is pathname and complete it into URL.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source : str
|
||||
Feed URL.
|
||||
link : str
|
||||
Link URL or pathname.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
URL.
|
||||
"""
|
||||
if link.startswith("www."):
|
||||
return "http://" + link
|
||||
parted_link = urlsplit(link)
|
||||
parted_feed = urlsplit(source)
|
||||
if parted_link.scheme == "magnet" and parted_link.query:
|
||||
return link
|
||||
if parted_link.scheme and parted_link.netloc:
|
||||
return link
|
||||
if link.startswith("//"):
|
||||
if parted_link.netloc and parted_link.path:
|
||||
new_link = urlunsplit([
|
||||
parted_feed.scheme,
|
||||
parted_link.netloc,
|
||||
parted_link.path,
|
||||
parted_link.query,
|
||||
parted_link.fragment
|
||||
])
|
||||
elif link.startswith("/"):
|
||||
new_link = urlunsplit([
|
||||
parted_feed.scheme,
|
||||
parted_feed.netloc,
|
||||
parted_link.path,
|
||||
parted_link.query,
|
||||
parted_link.fragment
|
||||
])
|
||||
elif link.startswith("../"):
|
||||
pathlink = parted_link.path.split("/")
|
||||
pathfeed = parted_feed.path.split("/")
|
||||
for i in pathlink:
|
||||
if i == "..":
|
||||
if pathlink.index("..") == 0:
|
||||
pathfeed.pop()
|
||||
else:
|
||||
break
|
||||
while pathlink.count(".."):
|
||||
if pathlink.index("..") == 0:
|
||||
pathlink.remove("..")
|
||||
else:
|
||||
break
|
||||
pathlink = "/".join(pathlink)
|
||||
pathfeed.extend([pathlink])
|
||||
new_link = urlunsplit([
|
||||
parted_feed.scheme,
|
||||
parted_feed.netloc,
|
||||
"/".join(pathfeed),
|
||||
parted_link.query,
|
||||
parted_link.fragment
|
||||
])
|
||||
else:
|
||||
pathlink = parted_link.path.split("/")
|
||||
pathfeed = parted_feed.path.split("/")
|
||||
if link.startswith("./"):
|
||||
pathlink.remove(".")
|
||||
if not source.endswith("/"):
|
||||
pathfeed.pop()
|
||||
pathlink = "/".join(pathlink)
|
||||
pathfeed.extend([pathlink])
|
||||
new_link = urlunsplit([
|
||||
parted_feed.scheme,
|
||||
parted_feed.netloc,
|
||||
"/".join(pathfeed),
|
||||
parted_link.query,
|
||||
parted_link.fragment
|
||||
])
|
||||
return new_link
|
||||
|
||||
|
||||
"""
|
||||
TODO
|
||||
Feed https://www.ocaml.org/feed.xml
|
||||
Link %20https://frama-c.com/fc-versions/cobalt.html%20
|
||||
|
||||
FIXME
|
||||
Feed https://cyber.dabamos.de/blog/feed.rss
|
||||
Link https://cyber.dabamos.de/blog/#article-2022-07-15
|
||||
"""
|
||||
def join_url(source, link):
|
||||
"""
|
||||
Join base URL with given pathname.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source : str
|
||||
Feed URL.
|
||||
link : str
|
||||
Link URL or pathname.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
URL.
|
||||
"""
|
||||
if link.startswith("www."):
|
||||
new_link = "http://" + link
|
||||
elif link.startswith("%20") and link.endswith("%20"):
|
||||
old_link = link.split("%20")
|
||||
del old_link[0]
|
||||
old_link.pop()
|
||||
new_link = "".join(old_link)
|
||||
else:
|
||||
new_link = urljoin(source, link)
|
||||
return new_link
|
||||
|
||||
|
||||
def trim_url(url):
|
||||
"""
|
||||
Check URL pathname for double slash.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL.
|
||||
|
||||
Returns
|
||||
-------
|
||||
url : str
|
||||
URL.
|
||||
"""
|
||||
parted_url = urlsplit(url)
|
||||
protocol = parted_url.scheme
|
||||
hostname = parted_url.netloc
|
||||
pathname = parted_url.path
|
||||
queries = parted_url.query
|
||||
fragment = parted_url.fragment
|
||||
while "//" in pathname:
|
||||
pathname = pathname.replace("//", "/")
|
||||
url = urlunsplit([
|
||||
protocol,
|
||||
hostname,
|
||||
pathname,
|
||||
queries,
|
||||
fragment
|
||||
])
|
||||
return url
|
|
@ -55,7 +55,7 @@ import asyncio
|
|||
import logging
|
||||
# import os
|
||||
import slixmpp
|
||||
from slixmpp.exceptions import IqError
|
||||
from slixmpp.exceptions import IqError, IqTimeout
|
||||
from random import randrange
|
||||
import datahandler as fetcher
|
||||
from datetimehandler import current_time
|
||||
|
@ -63,6 +63,7 @@ from filehandler import initdb
|
|||
import listhandler as lister
|
||||
import sqlitehandler as sqlite
|
||||
import taskhandler as tasker
|
||||
import urlhandler as urlfixer
|
||||
|
||||
from slixmpp.plugins.xep_0363.http_upload import FileTooBig, HTTPError, UploadServiceNotFound
|
||||
# from slixmpp.plugins.xep_0402 import BookmarkStorage, Conference
|
||||
|
@ -97,6 +98,9 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
def __init__(self, jid, password, nick):
|
||||
slixmpp.ClientXMPP.__init__(self, jid, password)
|
||||
|
||||
# NOTE
|
||||
# The bot works fine when the nickname is hardcoded; or
|
||||
# The bot won't join some MUCs when its nickname has brackets
|
||||
self.nick = nick
|
||||
|
||||
# The session_start event will be triggered when
|
||||
|
@ -170,8 +174,9 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
"""
|
||||
async def presence_available(self, presence):
|
||||
# print("def presence_available", presence["from"].bare)
|
||||
if presence["from"].bare not in self.boundjid.bare:
|
||||
jid = presence["from"].bare
|
||||
print("presence_available", jid)
|
||||
if jid not in self.boundjid.bare:
|
||||
await tasker.clean_tasks_xmpp(
|
||||
jid,
|
||||
["interval", "status", "check"]
|
||||
|
@ -258,17 +263,27 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
# If a room password is needed, use:
|
||||
# password=the_room_password,
|
||||
)
|
||||
await self.add_muc_to_bookmarks(muc_jid)
|
||||
messages = [
|
||||
"Greetings!",
|
||||
"I'm {}, the news anchor.".format(self.nick),
|
||||
"My job is to bring you the latest news "
|
||||
"from sources you provide me with.",
|
||||
"You may always reach me via "
|
||||
"xmpp:{}?message".format(self.boundjid.bare)
|
||||
]
|
||||
for message in messages:
|
||||
self.send_message(
|
||||
mto=muc_jid,
|
||||
mbody=message,
|
||||
mtype="groupchat"
|
||||
)
|
||||
|
||||
|
||||
async def add_muc_to_bookmarks(self, muc_jid):
|
||||
result = await self.plugin['xep_0048'].get_bookmarks()
|
||||
bookmarks = result["private"]["bookmarks"]
|
||||
conferences = bookmarks["conferences"]
|
||||
print("RESULT")
|
||||
print(result)
|
||||
print("BOOKMARKS")
|
||||
print(bookmarks)
|
||||
print("CONFERENCES")
|
||||
print(conferences)
|
||||
# breakpoint()
|
||||
mucs = []
|
||||
for conference in conferences:
|
||||
jid = conference["jid"]
|
||||
|
@ -292,14 +307,29 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
# await self['xep_0402'].publish(bm)
|
||||
|
||||
|
||||
async def remove_and_leave_muc(self, muc_jid):
|
||||
async def close_muc(self, muc_jid):
|
||||
messages = [
|
||||
"Whenever you need an RSS service again, "
|
||||
"please don’t hesitate to contact me.",
|
||||
"My personal contact is xmpp:{}?message".format(self.boundjid.bare),
|
||||
"Farewell, and take care."
|
||||
]
|
||||
for message in messages:
|
||||
self.send_message(
|
||||
mto=muc_jid,
|
||||
mbody=(
|
||||
"If you need me again, contact me directly at {}\n"
|
||||
"Goodbye!"
|
||||
).format(self.boundjid.bare)
|
||||
mbody=message,
|
||||
mtype="groupchat"
|
||||
)
|
||||
await self.remove_muc_from_bookmarks(muc_jid)
|
||||
self.plugin['xep_0045'].leave_muc(
|
||||
muc_jid,
|
||||
self.nick,
|
||||
"Goodbye!",
|
||||
self.boundjid.bare
|
||||
)
|
||||
|
||||
|
||||
async def remove_muc_from_bookmarks(self, muc_jid):
|
||||
result = await self.plugin['xep_0048'].get_bookmarks()
|
||||
bookmarks = result["private"]["bookmarks"]
|
||||
conferences = bookmarks["conferences"]
|
||||
|
@ -317,12 +347,6 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
autojoin=True
|
||||
)
|
||||
await self.plugin['xep_0048'].set_bookmarks(bookmarks)
|
||||
self.plugin['xep_0045'].leave_muc(
|
||||
muc_jid,
|
||||
self.nick,
|
||||
"Goodbye!",
|
||||
self.boundjid.bare
|
||||
)
|
||||
|
||||
|
||||
async def autojoin_muc(self, event):
|
||||
|
@ -332,7 +356,7 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
for conference in conferences:
|
||||
if conference["autojoin"]:
|
||||
muc = conference["jid"]
|
||||
print(muc)
|
||||
print(current_time(), "Autojoining groupchat", muc)
|
||||
self.plugin['xep_0045'].join_muc(
|
||||
muc,
|
||||
self.nick,
|
||||
|
@ -342,14 +366,14 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
|
||||
|
||||
async def on_session_end(self, event):
|
||||
print(await current_time(), "Session ended. Attempting to reconnect.")
|
||||
print(current_time(), "Session ended. Attempting to reconnect.")
|
||||
print(event)
|
||||
logging.warning("Session ended. Attempting to reconnect.")
|
||||
await self.recover_connection(event)
|
||||
|
||||
|
||||
async def on_connection_failed(self, event):
|
||||
print(await current_time(), "Connection failed. Attempting to reconnect.")
|
||||
print(current_time(), "Connection failed. Attempting to reconnect.")
|
||||
print(event)
|
||||
logging.warning("Connection failed. Attempting to reconnect.")
|
||||
await self.recover_connection(event)
|
||||
|
@ -360,10 +384,13 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
# if self.connection_attempts <= self.max_connection_attempts:
|
||||
# self.reconnect(wait=5.0) # wait a bit before attempting to reconnect
|
||||
# else:
|
||||
# print(await current_time(),"Maximum connection attempts exceeded.")
|
||||
# print(current_time(),"Maximum connection attempts exceeded.")
|
||||
# logging.error("Maximum connection attempts exceeded.")
|
||||
print("Attempt:", self.connection_attempts)
|
||||
print(current_time(), "Attempt number", self.connection_attempts)
|
||||
self.reconnect(wait=5.0)
|
||||
seconds = 5
|
||||
print(current_time(), "Next attempt within", seconds, "seconds")
|
||||
await asyncio.sleep(seconds)
|
||||
|
||||
|
||||
async def inspect_connection(self, event):
|
||||
|
@ -481,18 +508,31 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
|
||||
Returns
|
||||
-------
|
||||
boolean
|
||||
True or False.
|
||||
str
|
||||
"chat" or "groupchat.
|
||||
"""
|
||||
try:
|
||||
iqresult = await self["xep_0030"].get_info(jid=jid)
|
||||
features = iqresult["disco_info"]["features"]
|
||||
# identity = iqresult['disco_info']['identities']
|
||||
# if 'account' in indentity:
|
||||
# if 'conference' in indentity:
|
||||
if 'http://jabber.org/protocol/muc' in features:
|
||||
return True
|
||||
return "groupchat"
|
||||
# TODO elif <feature var='jabber:iq:gateway'/>
|
||||
# NOTE Is it needed? We do not interact with gateways or services
|
||||
else:
|
||||
return False
|
||||
return "chat"
|
||||
# TODO Test whether this exception is realized
|
||||
except IqTimeout as e:
|
||||
messages = [
|
||||
("Timeout IQ"),
|
||||
("IQ Stanza:", e),
|
||||
("Jabber ID:", jid)
|
||||
]
|
||||
for message in messages:
|
||||
print(current_time(), message)
|
||||
logging.error(current_time(), message)
|
||||
|
||||
|
||||
async def settle(self, msg):
|
||||
|
@ -538,7 +578,7 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
# mtype="headline",
|
||||
msubject="RSS News Bot",
|
||||
mbody=(
|
||||
"Accept subscription request to receive updates"
|
||||
"Accept subscription request to receive updates."
|
||||
),
|
||||
mfrom=self.boundjid.bare,
|
||||
mnick=self.nick
|
||||
|
@ -548,7 +588,7 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
pfrom=self.boundjid.bare,
|
||||
# Accept symbol 🉑️ 👍️ ✍
|
||||
pstatus=(
|
||||
"✒️ Accept subscription request to receive updates"
|
||||
"✒️ Accept subscription request to receive updates."
|
||||
),
|
||||
# ptype="subscribe",
|
||||
pnick=self.nick
|
||||
|
@ -656,8 +696,8 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
message = message[1:]
|
||||
message_lowercase = message.lower()
|
||||
|
||||
print(await current_time(), "ACCOUNT: " + str(msg["from"]))
|
||||
print(await current_time(), "COMMAND:", message)
|
||||
print(current_time(), "ACCOUNT: " + str(msg["from"]))
|
||||
print(current_time(), "COMMAND:", message)
|
||||
|
||||
match message_lowercase:
|
||||
case "commands":
|
||||
|
@ -863,7 +903,7 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
message_lowercase.startswith("feed:")):
|
||||
url = message
|
||||
if url.startswith("feed:"):
|
||||
url = await fetcher.feed_to_http(url)
|
||||
url = urlfixer.feed_to_http(url)
|
||||
await tasker.clean_tasks_xmpp(
|
||||
jid,
|
||||
["status"]
|
||||
|
@ -934,7 +974,7 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
)
|
||||
case "goodbye":
|
||||
if msg["type"] == "groupchat":
|
||||
await self.remove_and_leave_muc(jid)
|
||||
await self.close_muc(jid)
|
||||
else:
|
||||
action = "This command is valid for groupchat only."
|
||||
case _ if message_lowercase.startswith("interval"):
|
||||
|
@ -969,7 +1009,7 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
else:
|
||||
action = "Missing value."
|
||||
case _ if message_lowercase.startswith("join"):
|
||||
muc = await fetcher.check_xmpp_uri(message[5:])
|
||||
muc = urlfixer.check_xmpp_uri(message[5:])
|
||||
if muc:
|
||||
"TODO probe JID and confirm it's a groupchat"
|
||||
await self.join_muc(jid, muc)
|
||||
|
@ -1100,7 +1140,7 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
["status"]
|
||||
)
|
||||
if url.startswith("feed:"):
|
||||
url = await fetcher.feed_to_http(url)
|
||||
url = urlfixer.feed_to_http(url)
|
||||
match len(data):
|
||||
case 1:
|
||||
if url.startswith("http"):
|
||||
|
@ -1116,7 +1156,7 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
case _:
|
||||
action = (
|
||||
"Enter command as follows:\n"
|
||||
"`read URL` or `read URL NUMBER`\n"
|
||||
"`read <url>` or `read <url> <number>`\n"
|
||||
"URL must not contain white space."
|
||||
)
|
||||
await tasker.start_tasks_xmpp(
|
||||
|
@ -1166,7 +1206,7 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
if len(query) > 1:
|
||||
action = await initdb(
|
||||
jid,
|
||||
search_entries,
|
||||
sqlite.search_entries,
|
||||
query
|
||||
)
|
||||
else:
|
||||
|
@ -1191,7 +1231,7 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
["interval", "status", "check"]
|
||||
)
|
||||
action = "Updates are enabled."
|
||||
# print(await current_time(), "task_manager[jid]")
|
||||
# print(current_time(), "task_manager[jid]")
|
||||
# print(task_manager[jid])
|
||||
case "stats":
|
||||
action = await initdb(
|
||||
|
@ -1248,7 +1288,7 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
# TODO Send an invitation.
|
||||
action = "Join xmpp:slixmpp@muc.poez.io?join"
|
||||
case _ if message_lowercase.startswith("xmpp:"):
|
||||
muc = await fetcher.check_xmpp_uri(message)
|
||||
muc = urlfixer.check_xmpp_uri(message)
|
||||
if muc:
|
||||
"TODO probe JID and confirm it's a groupchat"
|
||||
await self.join_muc(jid, muc)
|
||||
|
|
Loading…
Reference in a new issue