forked from sch/Slixfeed
Improve connectivity recovery
This commit is contained in:
parent
0566589a9d
commit
56d0da9a76
5 changed files with 97 additions and 62 deletions
|
@ -13,42 +13,44 @@ FIXME
|
||||||
|
|
||||||
TODO
|
TODO
|
||||||
|
|
||||||
0) from slixfeed.FILENAME import XYZ
|
1) from slixfeed.FILENAME import XYZ
|
||||||
See project feed2toot
|
See project /chaica/feed2toot
|
||||||
|
|
||||||
1) SQL prepared statements.
|
2) SQL prepared statements;
|
||||||
|
|
||||||
2) Machine Learning for scrapping Title, Link, Summary and Timstamp.
|
3) Machine Learning for scrapping Title, Link, Summary and Timstamp;
|
||||||
|
Scrape element </article> (example: Liferea)
|
||||||
|
http://intertwingly.net/blog/
|
||||||
|
https://www.brandenburg.de/
|
||||||
|
|
||||||
3) Set MUC subject
|
4) Set MUC subject
|
||||||
Feeds which entries are to be set as groupchat subject.
|
Feeds which entries are to be set as groupchat subject.
|
||||||
Perhaps not, as it would require to check every feed for this setting.
|
Perhaps not, as it would require to check every feed for this setting.
|
||||||
Maybe a separate bot.
|
Maybe a separate bot;
|
||||||
|
|
||||||
4) Support categories.
|
5) Support categories;
|
||||||
|
|
||||||
5) Default prepackaged list of feeds.
|
6) XMPP commands;
|
||||||
|
|
||||||
6) XMPP commands.
|
7) Bot as transport;
|
||||||
|
|
||||||
7) Bot as transport.
|
8) OMEMO;
|
||||||
|
|
||||||
8) OMEMO.
|
9) Logging;
|
||||||
|
https://docs.python.org/3/howto/logging.html
|
||||||
|
|
||||||
9) Logging.
|
10) Readability
|
||||||
|
See project /buriy/python-readability
|
||||||
|
|
||||||
10) Default feeds (e.g. Blacklisted News, TBOT etc.)
|
11) Download and upload/send article (xHTML, HTMLZ, Markdown, MHTML, TXT).
|
||||||
|
|
||||||
11) Download and upload/send article (xHTML, xHTMLZ, Markdown, MHTML, TXT).
|
|
||||||
Use Readability.
|
|
||||||
|
|
||||||
12) Fetch summary from URL, instead of storing summary, or
|
12) Fetch summary from URL, instead of storing summary, or
|
||||||
Store 5 upcoming summaries.
|
Store 5 upcoming summaries.
|
||||||
This would help making the database files smaller.
|
This would help making the database files smaller.
|
||||||
|
|
||||||
13) Support protocol Gopher
|
13) Support protocol Gopher
|
||||||
https://github.com/michael-lazar/pygopherd
|
See project /michael-lazar/pygopherd
|
||||||
https://github.com/gopherball/gb
|
See project /gopherball/gb
|
||||||
|
|
||||||
14) Support ActivityPub @person@domain (see Tip Of The Day).
|
14) Support ActivityPub @person@domain (see Tip Of The Day).
|
||||||
|
|
||||||
|
@ -60,7 +62,11 @@ TODO
|
||||||
|
|
||||||
16) Brand: News Broker, Newsman, Newsdealer, Laura Harbinger
|
16) Brand: News Broker, Newsman, Newsdealer, Laura Harbinger
|
||||||
|
|
||||||
17) See project offpunk/offblocklist.py
|
17) See project /offpunk/offblocklist.py
|
||||||
|
|
||||||
|
18) Search messages of government regulated publishers, and promote other sources.
|
||||||
|
Dear reader, we couldn't get news from XYZ as they don't provide RSS feeds.
|
||||||
|
However, you might want to get news from (1) (2) and (3) instead!
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
|
@ -19,14 +19,14 @@ from asyncio import TimeoutError
|
||||||
from asyncio.exceptions import IncompleteReadError
|
from asyncio.exceptions import IncompleteReadError
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from confighandler import get_list, get_value_default
|
from confighandler import get_list, get_value_default
|
||||||
|
from datetimehandler import now, rfc2822_to_iso8601
|
||||||
from email.utils import parseaddr
|
from email.utils import parseaddr
|
||||||
from feedparser import parse
|
from feedparser import parse
|
||||||
from http.client import IncompleteRead
|
from http.client import IncompleteRead
|
||||||
from lxml import html
|
|
||||||
from datetimehandler import now, rfc2822_to_iso8601
|
|
||||||
from urlhandler import complete_url, join_url, trim_url
|
|
||||||
from listhandler import is_listed
|
from listhandler import is_listed
|
||||||
|
from lxml import html
|
||||||
import sqlitehandler as sqlite
|
import sqlitehandler as sqlite
|
||||||
|
from urlhandler import complete_url, join_url, trim_url
|
||||||
from urllib import error
|
from urllib import error
|
||||||
# from xml.etree.ElementTree import ElementTree, ParseError
|
# from xml.etree.ElementTree import ElementTree, ParseError
|
||||||
from urllib.parse import urljoin, urlsplit, urlunsplit
|
from urllib.parse import urljoin, urlsplit, urlunsplit
|
||||||
|
@ -202,8 +202,8 @@ async def download_updates(db_file, url=None):
|
||||||
print("PROBLEM: date is int")
|
print("PROBLEM: date is int")
|
||||||
print(date)
|
print(date)
|
||||||
# breakpoint()
|
# breakpoint()
|
||||||
print(source)
|
# print(source)
|
||||||
print(date)
|
# print(date)
|
||||||
await sqlite.add_entry_and_set_date(
|
await sqlite.add_entry_and_set_date(
|
||||||
db_file,
|
db_file,
|
||||||
source,
|
source,
|
||||||
|
@ -261,9 +261,9 @@ async def view_feed(url):
|
||||||
title = get_title(url, feed)
|
title = get_title(url, feed)
|
||||||
entries = feed.entries
|
entries = feed.entries
|
||||||
msg = "Preview of {}:\n```\n".format(title)
|
msg = "Preview of {}:\n```\n".format(title)
|
||||||
count = 0
|
counter = 0
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
count += 1
|
counter += 1
|
||||||
if entry.has_key("title"):
|
if entry.has_key("title"):
|
||||||
title = entry.title
|
title = entry.title
|
||||||
else:
|
else:
|
||||||
|
@ -292,9 +292,9 @@ async def view_feed(url):
|
||||||
title,
|
title,
|
||||||
date,
|
date,
|
||||||
link,
|
link,
|
||||||
count
|
counter
|
||||||
)
|
)
|
||||||
if count > 4:
|
if counter > 4:
|
||||||
break
|
break
|
||||||
msg += (
|
msg += (
|
||||||
"```\nSource: {}"
|
"```\nSource: {}"
|
||||||
|
@ -446,7 +446,7 @@ async def add_feed(db_file, url):
|
||||||
title = get_title(url, feed)
|
title = get_title(url, feed)
|
||||||
if feed.bozo:
|
if feed.bozo:
|
||||||
bozo = (
|
bozo = (
|
||||||
"Bozo detected. Failed to load: {}."
|
"Bozo detected. Failed to load: {}"
|
||||||
).format(url)
|
).format(url)
|
||||||
print(bozo)
|
print(bozo)
|
||||||
msg = await probe_page(add_feed, url, res[0], db_file=db_file)
|
msg = await probe_page(add_feed, url, res[0], db_file=db_file)
|
||||||
|
@ -505,7 +505,7 @@ async def probe_page(callback, url, doc, num=None, db_file=None):
|
||||||
elif isinstance(msg, list):
|
elif isinstance(msg, list):
|
||||||
url = msg[0]
|
url = msg[0]
|
||||||
if db_file:
|
if db_file:
|
||||||
print("if db_file", db_file)
|
# print("if db_file", db_file)
|
||||||
return await callback(db_file, url)
|
return await callback(db_file, url)
|
||||||
elif num:
|
elif num:
|
||||||
return await callback(url, num)
|
return await callback(url, num)
|
||||||
|
@ -531,6 +531,8 @@ async def download_feed(url):
|
||||||
user_agent = await get_value_default("user-agent", "Network")
|
user_agent = await get_value_default("user-agent", "Network")
|
||||||
except:
|
except:
|
||||||
user_agent = "Slixfeed/0.1"
|
user_agent = "Slixfeed/0.1"
|
||||||
|
if not len(user_agent):
|
||||||
|
user_agent = "Slixfeed/0.1"
|
||||||
timeout = ClientTimeout(total=10)
|
timeout = ClientTimeout(total=10)
|
||||||
headers = {'User-Agent': user_agent}
|
headers = {'User-Agent': user_agent}
|
||||||
async with ClientSession(headers=headers) as session:
|
async with ClientSession(headers=headers) as session:
|
||||||
|
@ -597,6 +599,8 @@ def get_title(url, feed):
|
||||||
title = feed["feed"]["title"]
|
title = feed["feed"]["title"]
|
||||||
except:
|
except:
|
||||||
title = urlsplit(url).netloc
|
title = urlsplit(url).netloc
|
||||||
|
if not title:
|
||||||
|
title = urlsplit(url).netloc
|
||||||
return title
|
return title
|
||||||
|
|
||||||
|
|
||||||
|
@ -621,7 +625,7 @@ async def feed_mode_request(url, tree):
|
||||||
"""
|
"""
|
||||||
feeds = {}
|
feeds = {}
|
||||||
parted_url = urlsplit(url)
|
parted_url = urlsplit(url)
|
||||||
paths = await get_list("pathnames")
|
paths = await get_list("pathnames", "lists.yaml")
|
||||||
for path in paths:
|
for path in paths:
|
||||||
address = urlunsplit([
|
address = urlunsplit([
|
||||||
parted_url.scheme,
|
parted_url.scheme,
|
||||||
|
@ -693,7 +697,7 @@ async def feed_mode_request(url, tree):
|
||||||
).format(url)
|
).format(url)
|
||||||
if not positive:
|
if not positive:
|
||||||
msg = (
|
msg = (
|
||||||
"No feeds were found for {}."
|
"No feeds were found for {}"
|
||||||
).format(url)
|
).format(url)
|
||||||
return msg
|
return msg
|
||||||
elif feeds:
|
elif feeds:
|
||||||
|
@ -721,17 +725,21 @@ async def feed_mode_scan(url, tree):
|
||||||
feeds = {}
|
feeds = {}
|
||||||
# paths = []
|
# paths = []
|
||||||
# TODO Test
|
# TODO Test
|
||||||
paths = await get_list("pathnames")
|
paths = await get_list("pathnames", "lists.yaml")
|
||||||
for path in paths:
|
for path in paths:
|
||||||
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
|
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
|
||||||
xpath_query = "//a[contains(@href,'{}')]".format(path)
|
# xpath_query = "//a[contains(@href,'{}')]".format(path)
|
||||||
|
num = 5
|
||||||
|
xpath_query = "(//a[contains(@href,'{}')])[position()<={}]".format(path, num)
|
||||||
addresses = tree.xpath(xpath_query)
|
addresses = tree.xpath(xpath_query)
|
||||||
|
xpath_query = "(//a[contains(@href,'{}')])[position()>last()-{}]".format(path, num)
|
||||||
|
addresses += tree.xpath(xpath_query)
|
||||||
parted_url = urlsplit(url)
|
parted_url = urlsplit(url)
|
||||||
# NOTE Should number of addresses be limited or
|
# NOTE Should number of addresses be limited or
|
||||||
# perhaps be N from the start and N from the end
|
# perhaps be N from the start and N from the end
|
||||||
for address in addresses:
|
for address in addresses:
|
||||||
print(address.xpath('@href')[0])
|
# print(address.xpath('@href')[0])
|
||||||
print(addresses)
|
# print(addresses)
|
||||||
address = address.xpath('@href')[0]
|
address = address.xpath('@href')[0]
|
||||||
if "/" not in address:
|
if "/" not in address:
|
||||||
protocol = parted_url.scheme
|
protocol = parted_url.scheme
|
||||||
|
@ -759,11 +767,15 @@ async def feed_mode_scan(url, tree):
|
||||||
if res[1] == 200:
|
if res[1] == 200:
|
||||||
try:
|
try:
|
||||||
feeds[address] = parse(res[0])
|
feeds[address] = parse(res[0])
|
||||||
|
# print(feeds[address])
|
||||||
|
# breakpoint()
|
||||||
# print(feeds)
|
# print(feeds)
|
||||||
except:
|
except:
|
||||||
continue
|
continue
|
||||||
if len(feeds) > 1:
|
if len(feeds) > 1:
|
||||||
positive = 0
|
# print(feeds)
|
||||||
|
# breakpoint()
|
||||||
|
counter = 0
|
||||||
msg = (
|
msg = (
|
||||||
"RSS URL scan has found {} feeds:\n```\n"
|
"RSS URL scan has found {} feeds:\n```\n"
|
||||||
).format(len(feeds))
|
).format(len(feeds))
|
||||||
|
@ -779,23 +791,32 @@ async def feed_mode_scan(url, tree):
|
||||||
feed_addr = feed
|
feed_addr = feed
|
||||||
feed_amnt = len(feeds[feed].entries)
|
feed_amnt = len(feeds[feed].entries)
|
||||||
if feed_amnt:
|
if feed_amnt:
|
||||||
positive = 1
|
# NOTE Because there could be many false positives
|
||||||
|
# which are revealed in second phase of scan, we
|
||||||
|
# could end with a single feed, which would be
|
||||||
|
# listed instead of fetched, so feed_mark is
|
||||||
|
# utilized in order to make fetch possible.
|
||||||
|
feed_mark = [feed_addr]
|
||||||
|
counter += 1
|
||||||
msg += (
|
msg += (
|
||||||
"Title: {}\n"
|
"Title : {}\n"
|
||||||
" Link: {}\n"
|
"Link : {}\n"
|
||||||
"Count: {}\n"
|
"Count : {}\n"
|
||||||
"\n"
|
"\n"
|
||||||
).format(
|
).format(
|
||||||
feed_name,
|
feed_name,
|
||||||
feed_addr,
|
feed_addr,
|
||||||
feed_amnt
|
feed_amnt
|
||||||
)
|
)
|
||||||
msg += (
|
if counter > 1:
|
||||||
"```\nThe above feeds were extracted from\n{}"
|
msg += (
|
||||||
).format(url)
|
"```\nThe above feeds were extracted from\n{}"
|
||||||
if not positive:
|
).format(url)
|
||||||
|
elif feed_mark:
|
||||||
|
return feed_mark
|
||||||
|
else:
|
||||||
msg = (
|
msg = (
|
||||||
"No feeds were found for {}."
|
"No feeds were found for {}"
|
||||||
).format(url)
|
).format(url)
|
||||||
return msg
|
return msg
|
||||||
elif feeds:
|
elif feeds:
|
||||||
|
|
|
@ -471,7 +471,10 @@ async def get_entry_unread(db_file, num=None):
|
||||||
title = result[1]
|
title = result[1]
|
||||||
summary = result[2]
|
summary = result[2]
|
||||||
# Remove HTML tags
|
# Remove HTML tags
|
||||||
summary = BeautifulSoup(summary, "lxml").text
|
try:
|
||||||
|
summary = BeautifulSoup(summary, "lxml").text
|
||||||
|
except:
|
||||||
|
print(result[2])
|
||||||
# TODO Limit text length
|
# TODO Limit text length
|
||||||
summary = summary.replace("\n\n\n", "\n\n")
|
summary = summary.replace("\n\n\n", "\n\n")
|
||||||
length = await get_settings_value(db_file, "length")
|
length = await get_settings_value(db_file, "length")
|
||||||
|
|
|
@ -78,7 +78,7 @@ await taskhandler.start_tasks(
|
||||||
|
|
||||||
"""
|
"""
|
||||||
async def start_tasks_xmpp(self, jid, tasks):
|
async def start_tasks_xmpp(self, jid, tasks):
|
||||||
print("start_tasks_xmpp", jid, tasks)
|
# print("start_tasks_xmpp", jid, tasks)
|
||||||
task_manager[jid] = {}
|
task_manager[jid] = {}
|
||||||
for task in tasks:
|
for task in tasks:
|
||||||
# print("task:", task)
|
# print("task:", task)
|
||||||
|
@ -109,7 +109,7 @@ async def start_tasks_xmpp(self, jid, tasks):
|
||||||
# await task
|
# await task
|
||||||
|
|
||||||
async def clean_tasks_xmpp(jid, tasks):
|
async def clean_tasks_xmpp(jid, tasks):
|
||||||
print("clean_tasks_xmpp", jid, tasks)
|
# print("clean_tasks_xmpp", jid, tasks)
|
||||||
for task in tasks:
|
for task in tasks:
|
||||||
# if task_manager[jid][task]:
|
# if task_manager[jid][task]:
|
||||||
try:
|
try:
|
||||||
|
@ -132,7 +132,7 @@ Consider callback e.g. Slixfeed.send_status.
|
||||||
Or taskhandler for each protocol or specific taskhandler function.
|
Or taskhandler for each protocol or specific taskhandler function.
|
||||||
"""
|
"""
|
||||||
async def task_jid(self, jid):
|
async def task_jid(self, jid):
|
||||||
print("task_jid", jid)
|
# print("task_jid", jid)
|
||||||
"""
|
"""
|
||||||
JID (Jabber ID) task manager.
|
JID (Jabber ID) task manager.
|
||||||
|
|
||||||
|
@ -258,7 +258,7 @@ async def send_update(self, jid, num=None):
|
||||||
|
|
||||||
|
|
||||||
async def send_status(self, jid):
|
async def send_status(self, jid):
|
||||||
print("send_status", jid)
|
# print("send_status", jid)
|
||||||
# print(await current_time(), jid, "def send_status")
|
# print(await current_time(), jid, "def send_status")
|
||||||
"""
|
"""
|
||||||
Send status message.
|
Send status message.
|
||||||
|
@ -336,7 +336,7 @@ async def send_status(self, jid):
|
||||||
|
|
||||||
|
|
||||||
async def refresh_task(self, jid, callback, key, val=None):
|
async def refresh_task(self, jid, callback, key, val=None):
|
||||||
print("refresh_task", jid, key)
|
# print("refresh_task", jid, key)
|
||||||
"""
|
"""
|
||||||
Apply new setting at runtime.
|
Apply new setting at runtime.
|
||||||
|
|
||||||
|
@ -382,7 +382,7 @@ async def refresh_task(self, jid, callback, key, val=None):
|
||||||
# TODO Take this function out of
|
# TODO Take this function out of
|
||||||
# <class 'slixmpp.clientxmpp.ClientXMPP'>
|
# <class 'slixmpp.clientxmpp.ClientXMPP'>
|
||||||
async def check_updates(jid):
|
async def check_updates(jid):
|
||||||
print("check_updates", jid)
|
# print("check_updates", jid)
|
||||||
# print(await current_time(), jid, "def check_updates")
|
# print(await current_time(), jid, "def check_updates")
|
||||||
"""
|
"""
|
||||||
Start calling for update check up.
|
Start calling for update check up.
|
||||||
|
|
|
@ -64,6 +64,7 @@ import listhandler as lister
|
||||||
import sqlitehandler as sqlite
|
import sqlitehandler as sqlite
|
||||||
import taskhandler as tasker
|
import taskhandler as tasker
|
||||||
import urlhandler as urlfixer
|
import urlhandler as urlfixer
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
from slixmpp.plugins.xep_0363.http_upload import FileTooBig, HTTPError, UploadServiceNotFound
|
from slixmpp.plugins.xep_0363.http_upload import FileTooBig, HTTPError, UploadServiceNotFound
|
||||||
# from slixmpp.plugins.xep_0402 import BookmarkStorage, Conference
|
# from slixmpp.plugins.xep_0402 import BookmarkStorage, Conference
|
||||||
|
@ -102,7 +103,6 @@ class Slixfeed(slixmpp.ClientXMPP):
|
||||||
# The bot works fine when the nickname is hardcoded; or
|
# The bot works fine when the nickname is hardcoded; or
|
||||||
# The bot won't join some MUCs when its nickname has brackets
|
# The bot won't join some MUCs when its nickname has brackets
|
||||||
self.nick = nick
|
self.nick = nick
|
||||||
|
|
||||||
# The session_start event will be triggered when
|
# The session_start event will be triggered when
|
||||||
# the bot establishes its connection with the server
|
# the bot establishes its connection with the server
|
||||||
# and the XML streams are ready for use. We want to
|
# and the XML streams are ready for use. We want to
|
||||||
|
@ -387,10 +387,12 @@ class Slixfeed(slixmpp.ClientXMPP):
|
||||||
# print(current_time(),"Maximum connection attempts exceeded.")
|
# print(current_time(),"Maximum connection attempts exceeded.")
|
||||||
# logging.error("Maximum connection attempts exceeded.")
|
# logging.error("Maximum connection attempts exceeded.")
|
||||||
print(current_time(), "Attempt number", self.connection_attempts)
|
print(current_time(), "Attempt number", self.connection_attempts)
|
||||||
self.reconnect(wait=5.0)
|
seconds = 30
|
||||||
seconds = 5
|
|
||||||
print(current_time(), "Next attempt within", seconds, "seconds")
|
print(current_time(), "Next attempt within", seconds, "seconds")
|
||||||
await asyncio.sleep(seconds)
|
# NOTE asyncio.sleep doesn't interval as expected
|
||||||
|
# await asyncio.sleep(seconds)
|
||||||
|
sleep(seconds)
|
||||||
|
self.reconnect(wait=5.0)
|
||||||
|
|
||||||
|
|
||||||
async def inspect_connection(self, event):
|
async def inspect_connection(self, event):
|
||||||
|
@ -912,7 +914,7 @@ class Slixfeed(slixmpp.ClientXMPP):
|
||||||
["status"]
|
["status"]
|
||||||
)
|
)
|
||||||
task = (
|
task = (
|
||||||
"📫️ Processing request to fetch data from {} ..."
|
"📫️ Processing request to fetch data from {}"
|
||||||
).format(url)
|
).format(url)
|
||||||
process_task_message(self, jid, task)
|
process_task_message(self, jid, task)
|
||||||
action = await initdb(
|
action = await initdb(
|
||||||
|
@ -1080,8 +1082,9 @@ class Slixfeed(slixmpp.ClientXMPP):
|
||||||
action = (
|
action = (
|
||||||
"Only new items of newly added feeds will be sent."
|
"Only new items of newly added feeds will be sent."
|
||||||
)
|
)
|
||||||
case _ if message_lowercase.startswith("next"):
|
# TODO Will you add support for number of messages?
|
||||||
num = message[5:]
|
case "next":
|
||||||
|
# num = message[5:]
|
||||||
await tasker.clean_tasks_xmpp(
|
await tasker.clean_tasks_xmpp(
|
||||||
jid,
|
jid,
|
||||||
["interval", "status"]
|
["interval", "status"]
|
||||||
|
@ -1137,13 +1140,15 @@ class Slixfeed(slixmpp.ClientXMPP):
|
||||||
else:
|
else:
|
||||||
action = "Missing value."
|
action = "Missing value."
|
||||||
case "random":
|
case "random":
|
||||||
action = "Updates will be sent randomly."
|
# TODO /questions/2279706/select-random-row-from-a-sqlite-table
|
||||||
|
# NOTE sqlitehandler.get_entry_unread
|
||||||
|
action = "Updates will be sent by random order."
|
||||||
case _ if message_lowercase.startswith("read"):
|
case _ if message_lowercase.startswith("read"):
|
||||||
data = message[5:]
|
data = message[5:]
|
||||||
data = data.split()
|
data = data.split()
|
||||||
url = data[0]
|
url = data[0]
|
||||||
task = (
|
task = (
|
||||||
"📫️ Processing request to fetch data from {} ..."
|
"📫️ Processing request to fetch data from {}"
|
||||||
).format(url)
|
).format(url)
|
||||||
process_task_message(self, jid, task)
|
process_task_message(self, jid, task)
|
||||||
await tasker.clean_tasks_xmpp(
|
await tasker.clean_tasks_xmpp(
|
||||||
|
|
Loading…
Reference in a new issue