forked from sch/Slixfeed
Improve connectivity recovery
This commit is contained in:
parent
0566589a9d
commit
56d0da9a76
5 changed files with 97 additions and 62 deletions
|
@ -13,42 +13,44 @@ FIXME
|
|||
|
||||
TODO
|
||||
|
||||
0) from slixfeed.FILENAME import XYZ
|
||||
See project feed2toot
|
||||
1) from slixfeed.FILENAME import XYZ
|
||||
See project /chaica/feed2toot
|
||||
|
||||
1) SQL prepared statements.
|
||||
2) SQL prepared statements;
|
||||
|
||||
2) Machine Learning for scrapping Title, Link, Summary and Timstamp.
|
||||
3) Machine Learning for scrapping Title, Link, Summary and Timstamp;
|
||||
Scrape element </article> (example: Liferea)
|
||||
http://intertwingly.net/blog/
|
||||
https://www.brandenburg.de/
|
||||
|
||||
3) Set MUC subject
|
||||
4) Set MUC subject
|
||||
Feeds which entries are to be set as groupchat subject.
|
||||
Perhaps not, as it would require to check every feed for this setting.
|
||||
Maybe a separate bot.
|
||||
Maybe a separate bot;
|
||||
|
||||
4) Support categories.
|
||||
5) Support categories;
|
||||
|
||||
5) Default prepackaged list of feeds.
|
||||
6) XMPP commands;
|
||||
|
||||
6) XMPP commands.
|
||||
7) Bot as transport;
|
||||
|
||||
7) Bot as transport.
|
||||
8) OMEMO;
|
||||
|
||||
8) OMEMO.
|
||||
9) Logging;
|
||||
https://docs.python.org/3/howto/logging.html
|
||||
|
||||
9) Logging.
|
||||
10) Readability
|
||||
See project /buriy/python-readability
|
||||
|
||||
10) Default feeds (e.g. Blacklisted News, TBOT etc.)
|
||||
|
||||
11) Download and upload/send article (xHTML, xHTMLZ, Markdown, MHTML, TXT).
|
||||
Use Readability.
|
||||
11) Download and upload/send article (xHTML, HTMLZ, Markdown, MHTML, TXT).
|
||||
|
||||
12) Fetch summary from URL, instead of storing summary, or
|
||||
Store 5 upcoming summaries.
|
||||
This would help making the database files smaller.
|
||||
|
||||
13) Support protocol Gopher
|
||||
https://github.com/michael-lazar/pygopherd
|
||||
https://github.com/gopherball/gb
|
||||
See project /michael-lazar/pygopherd
|
||||
See project /gopherball/gb
|
||||
|
||||
14) Support ActivityPub @person@domain (see Tip Of The Day).
|
||||
|
||||
|
@ -60,7 +62,11 @@ TODO
|
|||
|
||||
16) Brand: News Broker, Newsman, Newsdealer, Laura Harbinger
|
||||
|
||||
17) See project offpunk/offblocklist.py
|
||||
17) See project /offpunk/offblocklist.py
|
||||
|
||||
18) Search messages of government regulated publishers, and promote other sources.
|
||||
Dear reader, we couldn't get news from XYZ as they don't provide RSS feeds.
|
||||
However, you might want to get news from (1) (2) and (3) instead!
|
||||
|
||||
"""
|
||||
|
||||
|
|
|
@ -19,14 +19,14 @@ from asyncio import TimeoutError
|
|||
from asyncio.exceptions import IncompleteReadError
|
||||
from bs4 import BeautifulSoup
|
||||
from confighandler import get_list, get_value_default
|
||||
from datetimehandler import now, rfc2822_to_iso8601
|
||||
from email.utils import parseaddr
|
||||
from feedparser import parse
|
||||
from http.client import IncompleteRead
|
||||
from lxml import html
|
||||
from datetimehandler import now, rfc2822_to_iso8601
|
||||
from urlhandler import complete_url, join_url, trim_url
|
||||
from listhandler import is_listed
|
||||
from lxml import html
|
||||
import sqlitehandler as sqlite
|
||||
from urlhandler import complete_url, join_url, trim_url
|
||||
from urllib import error
|
||||
# from xml.etree.ElementTree import ElementTree, ParseError
|
||||
from urllib.parse import urljoin, urlsplit, urlunsplit
|
||||
|
@ -202,8 +202,8 @@ async def download_updates(db_file, url=None):
|
|||
print("PROBLEM: date is int")
|
||||
print(date)
|
||||
# breakpoint()
|
||||
print(source)
|
||||
print(date)
|
||||
# print(source)
|
||||
# print(date)
|
||||
await sqlite.add_entry_and_set_date(
|
||||
db_file,
|
||||
source,
|
||||
|
@ -261,9 +261,9 @@ async def view_feed(url):
|
|||
title = get_title(url, feed)
|
||||
entries = feed.entries
|
||||
msg = "Preview of {}:\n```\n".format(title)
|
||||
count = 0
|
||||
counter = 0
|
||||
for entry in entries:
|
||||
count += 1
|
||||
counter += 1
|
||||
if entry.has_key("title"):
|
||||
title = entry.title
|
||||
else:
|
||||
|
@ -292,9 +292,9 @@ async def view_feed(url):
|
|||
title,
|
||||
date,
|
||||
link,
|
||||
count
|
||||
counter
|
||||
)
|
||||
if count > 4:
|
||||
if counter > 4:
|
||||
break
|
||||
msg += (
|
||||
"```\nSource: {}"
|
||||
|
@ -446,7 +446,7 @@ async def add_feed(db_file, url):
|
|||
title = get_title(url, feed)
|
||||
if feed.bozo:
|
||||
bozo = (
|
||||
"Bozo detected. Failed to load: {}."
|
||||
"Bozo detected. Failed to load: {}"
|
||||
).format(url)
|
||||
print(bozo)
|
||||
msg = await probe_page(add_feed, url, res[0], db_file=db_file)
|
||||
|
@ -505,7 +505,7 @@ async def probe_page(callback, url, doc, num=None, db_file=None):
|
|||
elif isinstance(msg, list):
|
||||
url = msg[0]
|
||||
if db_file:
|
||||
print("if db_file", db_file)
|
||||
# print("if db_file", db_file)
|
||||
return await callback(db_file, url)
|
||||
elif num:
|
||||
return await callback(url, num)
|
||||
|
@ -531,6 +531,8 @@ async def download_feed(url):
|
|||
user_agent = await get_value_default("user-agent", "Network")
|
||||
except:
|
||||
user_agent = "Slixfeed/0.1"
|
||||
if not len(user_agent):
|
||||
user_agent = "Slixfeed/0.1"
|
||||
timeout = ClientTimeout(total=10)
|
||||
headers = {'User-Agent': user_agent}
|
||||
async with ClientSession(headers=headers) as session:
|
||||
|
@ -597,6 +599,8 @@ def get_title(url, feed):
|
|||
title = feed["feed"]["title"]
|
||||
except:
|
||||
title = urlsplit(url).netloc
|
||||
if not title:
|
||||
title = urlsplit(url).netloc
|
||||
return title
|
||||
|
||||
|
||||
|
@ -621,7 +625,7 @@ async def feed_mode_request(url, tree):
|
|||
"""
|
||||
feeds = {}
|
||||
parted_url = urlsplit(url)
|
||||
paths = await get_list("pathnames")
|
||||
paths = await get_list("pathnames", "lists.yaml")
|
||||
for path in paths:
|
||||
address = urlunsplit([
|
||||
parted_url.scheme,
|
||||
|
@ -693,7 +697,7 @@ async def feed_mode_request(url, tree):
|
|||
).format(url)
|
||||
if not positive:
|
||||
msg = (
|
||||
"No feeds were found for {}."
|
||||
"No feeds were found for {}"
|
||||
).format(url)
|
||||
return msg
|
||||
elif feeds:
|
||||
|
@ -721,17 +725,21 @@ async def feed_mode_scan(url, tree):
|
|||
feeds = {}
|
||||
# paths = []
|
||||
# TODO Test
|
||||
paths = await get_list("pathnames")
|
||||
paths = await get_list("pathnames", "lists.yaml")
|
||||
for path in paths:
|
||||
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
|
||||
xpath_query = "//a[contains(@href,'{}')]".format(path)
|
||||
# xpath_query = "//a[contains(@href,'{}')]".format(path)
|
||||
num = 5
|
||||
xpath_query = "(//a[contains(@href,'{}')])[position()<={}]".format(path, num)
|
||||
addresses = tree.xpath(xpath_query)
|
||||
xpath_query = "(//a[contains(@href,'{}')])[position()>last()-{}]".format(path, num)
|
||||
addresses += tree.xpath(xpath_query)
|
||||
parted_url = urlsplit(url)
|
||||
# NOTE Should number of addresses be limited or
|
||||
# perhaps be N from the start and N from the end
|
||||
for address in addresses:
|
||||
print(address.xpath('@href')[0])
|
||||
print(addresses)
|
||||
# print(address.xpath('@href')[0])
|
||||
# print(addresses)
|
||||
address = address.xpath('@href')[0]
|
||||
if "/" not in address:
|
||||
protocol = parted_url.scheme
|
||||
|
@ -759,11 +767,15 @@ async def feed_mode_scan(url, tree):
|
|||
if res[1] == 200:
|
||||
try:
|
||||
feeds[address] = parse(res[0])
|
||||
# print(feeds[address])
|
||||
# breakpoint()
|
||||
# print(feeds)
|
||||
except:
|
||||
continue
|
||||
if len(feeds) > 1:
|
||||
positive = 0
|
||||
# print(feeds)
|
||||
# breakpoint()
|
||||
counter = 0
|
||||
msg = (
|
||||
"RSS URL scan has found {} feeds:\n```\n"
|
||||
).format(len(feeds))
|
||||
|
@ -779,23 +791,32 @@ async def feed_mode_scan(url, tree):
|
|||
feed_addr = feed
|
||||
feed_amnt = len(feeds[feed].entries)
|
||||
if feed_amnt:
|
||||
positive = 1
|
||||
# NOTE Because there could be many false positives
|
||||
# which are revealed in second phase of scan, we
|
||||
# could end with a single feed, which would be
|
||||
# listed instead of fetched, so feed_mark is
|
||||
# utilized in order to make fetch possible.
|
||||
feed_mark = [feed_addr]
|
||||
counter += 1
|
||||
msg += (
|
||||
"Title: {}\n"
|
||||
" Link: {}\n"
|
||||
"Count: {}\n"
|
||||
"Title : {}\n"
|
||||
"Link : {}\n"
|
||||
"Count : {}\n"
|
||||
"\n"
|
||||
).format(
|
||||
feed_name,
|
||||
feed_addr,
|
||||
feed_amnt
|
||||
)
|
||||
msg += (
|
||||
"```\nThe above feeds were extracted from\n{}"
|
||||
).format(url)
|
||||
if not positive:
|
||||
if counter > 1:
|
||||
msg += (
|
||||
"```\nThe above feeds were extracted from\n{}"
|
||||
).format(url)
|
||||
elif feed_mark:
|
||||
return feed_mark
|
||||
else:
|
||||
msg = (
|
||||
"No feeds were found for {}."
|
||||
"No feeds were found for {}"
|
||||
).format(url)
|
||||
return msg
|
||||
elif feeds:
|
||||
|
|
|
@ -471,7 +471,10 @@ async def get_entry_unread(db_file, num=None):
|
|||
title = result[1]
|
||||
summary = result[2]
|
||||
# Remove HTML tags
|
||||
summary = BeautifulSoup(summary, "lxml").text
|
||||
try:
|
||||
summary = BeautifulSoup(summary, "lxml").text
|
||||
except:
|
||||
print(result[2])
|
||||
# TODO Limit text length
|
||||
summary = summary.replace("\n\n\n", "\n\n")
|
||||
length = await get_settings_value(db_file, "length")
|
||||
|
|
|
@ -78,7 +78,7 @@ await taskhandler.start_tasks(
|
|||
|
||||
"""
|
||||
async def start_tasks_xmpp(self, jid, tasks):
|
||||
print("start_tasks_xmpp", jid, tasks)
|
||||
# print("start_tasks_xmpp", jid, tasks)
|
||||
task_manager[jid] = {}
|
||||
for task in tasks:
|
||||
# print("task:", task)
|
||||
|
@ -109,7 +109,7 @@ async def start_tasks_xmpp(self, jid, tasks):
|
|||
# await task
|
||||
|
||||
async def clean_tasks_xmpp(jid, tasks):
|
||||
print("clean_tasks_xmpp", jid, tasks)
|
||||
# print("clean_tasks_xmpp", jid, tasks)
|
||||
for task in tasks:
|
||||
# if task_manager[jid][task]:
|
||||
try:
|
||||
|
@ -132,7 +132,7 @@ Consider callback e.g. Slixfeed.send_status.
|
|||
Or taskhandler for each protocol or specific taskhandler function.
|
||||
"""
|
||||
async def task_jid(self, jid):
|
||||
print("task_jid", jid)
|
||||
# print("task_jid", jid)
|
||||
"""
|
||||
JID (Jabber ID) task manager.
|
||||
|
||||
|
@ -258,7 +258,7 @@ async def send_update(self, jid, num=None):
|
|||
|
||||
|
||||
async def send_status(self, jid):
|
||||
print("send_status", jid)
|
||||
# print("send_status", jid)
|
||||
# print(await current_time(), jid, "def send_status")
|
||||
"""
|
||||
Send status message.
|
||||
|
@ -336,7 +336,7 @@ async def send_status(self, jid):
|
|||
|
||||
|
||||
async def refresh_task(self, jid, callback, key, val=None):
|
||||
print("refresh_task", jid, key)
|
||||
# print("refresh_task", jid, key)
|
||||
"""
|
||||
Apply new setting at runtime.
|
||||
|
||||
|
@ -382,7 +382,7 @@ async def refresh_task(self, jid, callback, key, val=None):
|
|||
# TODO Take this function out of
|
||||
# <class 'slixmpp.clientxmpp.ClientXMPP'>
|
||||
async def check_updates(jid):
|
||||
print("check_updates", jid)
|
||||
# print("check_updates", jid)
|
||||
# print(await current_time(), jid, "def check_updates")
|
||||
"""
|
||||
Start calling for update check up.
|
||||
|
|
|
@ -64,6 +64,7 @@ import listhandler as lister
|
|||
import sqlitehandler as sqlite
|
||||
import taskhandler as tasker
|
||||
import urlhandler as urlfixer
|
||||
from time import sleep
|
||||
|
||||
from slixmpp.plugins.xep_0363.http_upload import FileTooBig, HTTPError, UploadServiceNotFound
|
||||
# from slixmpp.plugins.xep_0402 import BookmarkStorage, Conference
|
||||
|
@ -102,7 +103,6 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
# The bot works fine when the nickname is hardcoded; or
|
||||
# The bot won't join some MUCs when its nickname has brackets
|
||||
self.nick = nick
|
||||
|
||||
# The session_start event will be triggered when
|
||||
# the bot establishes its connection with the server
|
||||
# and the XML streams are ready for use. We want to
|
||||
|
@ -387,10 +387,12 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
# print(current_time(),"Maximum connection attempts exceeded.")
|
||||
# logging.error("Maximum connection attempts exceeded.")
|
||||
print(current_time(), "Attempt number", self.connection_attempts)
|
||||
self.reconnect(wait=5.0)
|
||||
seconds = 5
|
||||
seconds = 30
|
||||
print(current_time(), "Next attempt within", seconds, "seconds")
|
||||
await asyncio.sleep(seconds)
|
||||
# NOTE asyncio.sleep doesn't interval as expected
|
||||
# await asyncio.sleep(seconds)
|
||||
sleep(seconds)
|
||||
self.reconnect(wait=5.0)
|
||||
|
||||
|
||||
async def inspect_connection(self, event):
|
||||
|
@ -912,7 +914,7 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
["status"]
|
||||
)
|
||||
task = (
|
||||
"📫️ Processing request to fetch data from {} ..."
|
||||
"📫️ Processing request to fetch data from {}"
|
||||
).format(url)
|
||||
process_task_message(self, jid, task)
|
||||
action = await initdb(
|
||||
|
@ -1080,8 +1082,9 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
action = (
|
||||
"Only new items of newly added feeds will be sent."
|
||||
)
|
||||
case _ if message_lowercase.startswith("next"):
|
||||
num = message[5:]
|
||||
# TODO Will you add support for number of messages?
|
||||
case "next":
|
||||
# num = message[5:]
|
||||
await tasker.clean_tasks_xmpp(
|
||||
jid,
|
||||
["interval", "status"]
|
||||
|
@ -1137,13 +1140,15 @@ class Slixfeed(slixmpp.ClientXMPP):
|
|||
else:
|
||||
action = "Missing value."
|
||||
case "random":
|
||||
action = "Updates will be sent randomly."
|
||||
# TODO /questions/2279706/select-random-row-from-a-sqlite-table
|
||||
# NOTE sqlitehandler.get_entry_unread
|
||||
action = "Updates will be sent by random order."
|
||||
case _ if message_lowercase.startswith("read"):
|
||||
data = message[5:]
|
||||
data = data.split()
|
||||
url = data[0]
|
||||
task = (
|
||||
"📫️ Processing request to fetch data from {} ..."
|
||||
"📫️ Processing request to fetch data from {}"
|
||||
).format(url)
|
||||
process_task_message(self, jid, task)
|
||||
await tasker.clean_tasks_xmpp(
|
||||
|
|
Loading…
Reference in a new issue