forked from sch/Slixfeed
Segregate code into more particular functions
This commit is contained in:
parent
96f3369539
commit
7135994888
13 changed files with 995 additions and 937 deletions
369
slixfeed/action.py
Normal file
369
slixfeed/action.py
Normal file
|
@ -0,0 +1,369 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from asyncio.exceptions import IncompleteReadError
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from http.client import IncompleteRead
|
||||||
|
from feedparser import parse
|
||||||
|
import slixfeed.config as config
|
||||||
|
import slixfeed.crawl as crawl
|
||||||
|
from slixfeed.datetime import now, rfc2822_to_iso8601
|
||||||
|
import slixfeed.fetch as fetch
|
||||||
|
import slixfeed.sqlite as sqlite
|
||||||
|
import slixfeed.read as read
|
||||||
|
import slixfeed.task as task
|
||||||
|
from slixfeed.url import complete_url, join_url, trim_url
|
||||||
|
from urllib import error
|
||||||
|
from urllib.parse import urlsplit
|
||||||
|
|
||||||
|
|
||||||
|
async def add_feed(db_file, url):
|
||||||
|
while True:
|
||||||
|
exist = await sqlite.is_feed_exist(db_file, url)
|
||||||
|
if not exist:
|
||||||
|
result = await fetch.download_feed([url])
|
||||||
|
document = result[0]
|
||||||
|
status = result[1]
|
||||||
|
if document:
|
||||||
|
feed = parse(document)
|
||||||
|
# if read.is_feed(url, feed):
|
||||||
|
if read.is_feed(feed):
|
||||||
|
try:
|
||||||
|
title = feed["feed"]["title"]
|
||||||
|
except:
|
||||||
|
title = urlsplit(url).netloc
|
||||||
|
await sqlite.insert_feed(
|
||||||
|
db_file, url, title, status)
|
||||||
|
await organize_items(
|
||||||
|
db_file, [url])
|
||||||
|
old = await sqlite.get_settings_value(
|
||||||
|
db_file, "old")
|
||||||
|
if not old:
|
||||||
|
await sqlite.mark_source_as_read(
|
||||||
|
db_file, url)
|
||||||
|
response = (
|
||||||
|
"> {}\nNews source {} has been "
|
||||||
|
"added to subscription list."
|
||||||
|
).format(url, title)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
result = await crawl.probe_page(
|
||||||
|
url, document)
|
||||||
|
# TODO Check length and for a write a
|
||||||
|
# unified message for a set of feeds.
|
||||||
|
# Use logging if you so choose to
|
||||||
|
# distinct the methods
|
||||||
|
if isinstance(result, list):
|
||||||
|
url = result[0]
|
||||||
|
elif isinstance(result, str):
|
||||||
|
response = result
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
response = (
|
||||||
|
"> {}\nFailed to load URL. Reason: {}"
|
||||||
|
).format(url, status)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
ix = exist[0]
|
||||||
|
name = exist[1]
|
||||||
|
response = (
|
||||||
|
"> {}\nNews source \"{}\" is already "
|
||||||
|
"listed in the subscription list at "
|
||||||
|
"index {}".format(url, name, ix)
|
||||||
|
)
|
||||||
|
break
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
async def view_feed(url):
|
||||||
|
while True:
|
||||||
|
result = await fetch.download_feed([url])
|
||||||
|
document = result[0]
|
||||||
|
status = result[1]
|
||||||
|
if document:
|
||||||
|
feed = parse(document)
|
||||||
|
# if read.is_feed(url, feed):
|
||||||
|
if read.is_feed(feed):
|
||||||
|
try:
|
||||||
|
title = feed["feed"]["title"]
|
||||||
|
except:
|
||||||
|
title = urlsplit(url).netloc
|
||||||
|
entries = feed.entries
|
||||||
|
response = "Preview of {}:\n\n```\n".format(title)
|
||||||
|
counter = 0
|
||||||
|
for entry in entries:
|
||||||
|
counter += 1
|
||||||
|
if entry.has_key("title"):
|
||||||
|
title = entry.title
|
||||||
|
else:
|
||||||
|
title = "*** No title ***"
|
||||||
|
if entry.has_key("link"):
|
||||||
|
# link = complete_url(source, entry.link)
|
||||||
|
link = join_url(url, entry.link)
|
||||||
|
link = trim_url(link)
|
||||||
|
else:
|
||||||
|
link = "*** No link ***"
|
||||||
|
if entry.has_key("published"):
|
||||||
|
date = entry.published
|
||||||
|
date = rfc2822_to_iso8601(date)
|
||||||
|
elif entry.has_key("updated"):
|
||||||
|
date = entry.updated
|
||||||
|
date = rfc2822_to_iso8601(date)
|
||||||
|
else:
|
||||||
|
date = "*** No date ***"
|
||||||
|
response += (
|
||||||
|
"Title : {}\n"
|
||||||
|
"Date : {}\n"
|
||||||
|
"Link : {}\n"
|
||||||
|
"Count : {}\n"
|
||||||
|
"\n"
|
||||||
|
).format(title, date, link, counter)
|
||||||
|
if counter > 4:
|
||||||
|
break
|
||||||
|
response += (
|
||||||
|
"```\nSource: {}"
|
||||||
|
).format(url)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
result = await crawl.probe_page(
|
||||||
|
url, document)
|
||||||
|
# TODO Check length and for a write a
|
||||||
|
# unified message for a set of feeds.
|
||||||
|
# Use logging if you so choose to
|
||||||
|
# distinct the methods
|
||||||
|
if isinstance(result, list):
|
||||||
|
url = result[0]
|
||||||
|
elif isinstance(result, str):
|
||||||
|
response = result
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
response = (
|
||||||
|
"> {}\nFailed to load URL. Reason: {}"
|
||||||
|
).format(url, status)
|
||||||
|
break
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
async def view_entry(url, num):
|
||||||
|
while True:
|
||||||
|
result = await fetch.download_feed([url])
|
||||||
|
document = result[0]
|
||||||
|
status = result[1]
|
||||||
|
if document:
|
||||||
|
feed = parse(document)
|
||||||
|
# if read.is_feed(url, feed):
|
||||||
|
if read.is_feed(feed):
|
||||||
|
try:
|
||||||
|
title = feed["feed"]["title"]
|
||||||
|
except:
|
||||||
|
title = urlsplit(url).netloc
|
||||||
|
entries = feed.entries
|
||||||
|
num = int(num) - 1
|
||||||
|
entry = entries[num]
|
||||||
|
response = "Preview of {}:\n\n```\n".format(title)
|
||||||
|
if entry.has_key("title"):
|
||||||
|
title = entry.title
|
||||||
|
else:
|
||||||
|
title = "*** No title ***"
|
||||||
|
if entry.has_key("published"):
|
||||||
|
date = entry.published
|
||||||
|
date = rfc2822_to_iso8601(date)
|
||||||
|
elif entry.has_key("updated"):
|
||||||
|
date = entry.updated
|
||||||
|
date = rfc2822_to_iso8601(date)
|
||||||
|
else:
|
||||||
|
date = "*** No date ***"
|
||||||
|
if entry.has_key("summary"):
|
||||||
|
summary = entry.summary
|
||||||
|
# Remove HTML tags
|
||||||
|
summary = BeautifulSoup(summary, "lxml").text
|
||||||
|
# TODO Limit text length
|
||||||
|
summary = summary.replace("\n\n\n", "\n\n")
|
||||||
|
else:
|
||||||
|
summary = "*** No summary ***"
|
||||||
|
if entry.has_key("link"):
|
||||||
|
# link = complete_url(source, entry.link)
|
||||||
|
link = join_url(url, entry.link)
|
||||||
|
link = trim_url(link)
|
||||||
|
else:
|
||||||
|
link = "*** No link ***"
|
||||||
|
response = (
|
||||||
|
"{}\n"
|
||||||
|
"\n"
|
||||||
|
# "> {}\n"
|
||||||
|
"{}\n"
|
||||||
|
"\n"
|
||||||
|
"{}\n"
|
||||||
|
"\n"
|
||||||
|
).format(title, summary, link)
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
result = await crawl.probe_page(
|
||||||
|
url, document)
|
||||||
|
# TODO Check length and for a write a
|
||||||
|
# unified message for a set of feeds.
|
||||||
|
# Use logging if you so choose to
|
||||||
|
# distinct the methods
|
||||||
|
if isinstance(result, list):
|
||||||
|
url = result[0]
|
||||||
|
elif isinstance(result, str):
|
||||||
|
response = result
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
response = (
|
||||||
|
"> {}\nFailed to load URL. Reason: {}"
|
||||||
|
).format(url, status)
|
||||||
|
break
|
||||||
|
return response
|
||||||
|
|
||||||
|
|
||||||
|
# NOTE Why (if res[0]) and (if res[1] == 200)?
|
||||||
|
async def organize_items(db_file, urls):
|
||||||
|
"""
|
||||||
|
Check feeds for new entries.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
db_file : str
|
||||||
|
Path to database file.
|
||||||
|
url : str, optional
|
||||||
|
URL. The default is None.
|
||||||
|
"""
|
||||||
|
for url in urls:
|
||||||
|
# print(os.path.basename(db_file), url[0])
|
||||||
|
source = url[0]
|
||||||
|
res = await fetch.download_feed(source)
|
||||||
|
# TypeError: 'NoneType' object is not subscriptable
|
||||||
|
if res is None:
|
||||||
|
# Skip to next feed
|
||||||
|
# urls.next()
|
||||||
|
# next(urls)
|
||||||
|
continue
|
||||||
|
await sqlite.update_source_status(
|
||||||
|
db_file, res[1], source)
|
||||||
|
if res[0]:
|
||||||
|
try:
|
||||||
|
feed = parse(res[0])
|
||||||
|
if feed.bozo:
|
||||||
|
# bozo = (
|
||||||
|
# "WARNING: Bozo detected for feed: {}\n"
|
||||||
|
# "For more information, visit "
|
||||||
|
# "https://pythonhosted.org/feedparser/bozo.html"
|
||||||
|
# ).format(source)
|
||||||
|
# print(bozo)
|
||||||
|
valid = 0
|
||||||
|
else:
|
||||||
|
valid = 1
|
||||||
|
await sqlite.update_source_validity(
|
||||||
|
db_file, source, valid)
|
||||||
|
except (
|
||||||
|
IncompleteReadError,
|
||||||
|
IncompleteRead,
|
||||||
|
error.URLError
|
||||||
|
) as e:
|
||||||
|
# print(e)
|
||||||
|
# TODO Print error to log
|
||||||
|
None
|
||||||
|
# NOTE I don't think there should be "return"
|
||||||
|
# because then we might stop scanning next URLs
|
||||||
|
# return
|
||||||
|
# TODO Place these couple of lines back down
|
||||||
|
# NOTE Need to correct the SQL statement to do so
|
||||||
|
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
|
||||||
|
if res[1] == 200:
|
||||||
|
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
|
||||||
|
# TODO Place these couple of lines back down
|
||||||
|
# NOTE Need to correct the SQL statement to do so
|
||||||
|
entries = feed.entries
|
||||||
|
# length = len(entries)
|
||||||
|
# await remove_entry(db_file, source, length)
|
||||||
|
await sqlite.remove_nonexistent_entries(
|
||||||
|
db_file, feed, source)
|
||||||
|
# new_entry = 0
|
||||||
|
for entry in entries:
|
||||||
|
# TODO Pass date too for comparion check
|
||||||
|
if entry.has_key("published"):
|
||||||
|
date = entry.published
|
||||||
|
date = rfc2822_to_iso8601(date)
|
||||||
|
elif entry.has_key("updated"):
|
||||||
|
date = entry.updated
|
||||||
|
date = rfc2822_to_iso8601(date)
|
||||||
|
else:
|
||||||
|
# TODO Just set date = "*** No date ***"
|
||||||
|
# date = await datetime.now().isoformat()
|
||||||
|
date = now()
|
||||||
|
# NOTE Would seconds result in better database performance
|
||||||
|
# date = datetime.datetime(date)
|
||||||
|
# date = (date-datetime.datetime(1970,1,1)).total_seconds()
|
||||||
|
if entry.has_key("title"):
|
||||||
|
title = entry.title
|
||||||
|
# title = "{}: *{}*".format(feed["feed"]["title"], entry.title)
|
||||||
|
else:
|
||||||
|
title = date
|
||||||
|
# title = feed["feed"]["title"]
|
||||||
|
if entry.has_key("link"):
|
||||||
|
# link = complete_url(source, entry.link)
|
||||||
|
link = join_url(source, entry.link)
|
||||||
|
link = trim_url(link)
|
||||||
|
else:
|
||||||
|
link = source
|
||||||
|
if entry.has_key("id"):
|
||||||
|
eid = entry.id
|
||||||
|
else:
|
||||||
|
eid = link
|
||||||
|
exist = await sqlite.check_entry_exist(
|
||||||
|
db_file, source, eid=eid,
|
||||||
|
title=title, link=link, date=date)
|
||||||
|
if not exist:
|
||||||
|
# new_entry = new_entry + 1
|
||||||
|
# TODO Enhance summary
|
||||||
|
if entry.has_key("summary"):
|
||||||
|
summary = entry.summary
|
||||||
|
# # Remove HTML tags
|
||||||
|
# summary = BeautifulSoup(summary, "lxml").text
|
||||||
|
# # TODO Limit text length
|
||||||
|
# summary = summary.replace("\n\n\n", "\n\n")
|
||||||
|
# summary = summary[:300] + " […]⃨"
|
||||||
|
# summary = summary.strip().split('\n')
|
||||||
|
# summary = ["> " + line for line in summary]
|
||||||
|
# summary = "\n".join(summary)
|
||||||
|
else:
|
||||||
|
summary = "> *** No summary ***"
|
||||||
|
read_status = 0
|
||||||
|
pathname = urlsplit(link).path
|
||||||
|
string = (
|
||||||
|
"{} {} {}"
|
||||||
|
).format(
|
||||||
|
title,
|
||||||
|
summary,
|
||||||
|
pathname
|
||||||
|
)
|
||||||
|
allow_list = await config.is_listed(
|
||||||
|
db_file, "filter-allow", string)
|
||||||
|
if not allow_list:
|
||||||
|
reject_list = await config.is_listed(
|
||||||
|
db_file, "filter-deny", string)
|
||||||
|
if reject_list:
|
||||||
|
# print(">>> REJECTED", title)
|
||||||
|
summary = (
|
||||||
|
"REJECTED {}".format(
|
||||||
|
reject_list.upper()
|
||||||
|
)
|
||||||
|
)
|
||||||
|
# summary = ""
|
||||||
|
read_status = 1
|
||||||
|
entry = (
|
||||||
|
title, link, eid, source, date, read_status)
|
||||||
|
if isinstance(date, int):
|
||||||
|
print("PROBLEM: date is int")
|
||||||
|
print(date)
|
||||||
|
# breakpoint()
|
||||||
|
# print(source)
|
||||||
|
# print(date)
|
||||||
|
await sqlite.add_entry_and_set_date(
|
||||||
|
db_file, source, entry)
|
||||||
|
# print(current_time(), entry, title)
|
||||||
|
# else:
|
||||||
|
# print(current_time(), exist, title)
|
||||||
|
|
||||||
|
|
|
@ -59,8 +59,9 @@ def get_value(filename, section, keys):
|
||||||
for key in keys:
|
for key in keys:
|
||||||
try:
|
try:
|
||||||
value = section_res[key]
|
value = section_res[key]
|
||||||
logging.debug("Found value {} for key {}".format(
|
logging.debug(
|
||||||
value, key))
|
"Found value {} for key {}".format(value, key)
|
||||||
|
)
|
||||||
except:
|
except:
|
||||||
value = ''
|
value = ''
|
||||||
logging.error("Missing key:", key)
|
logging.error("Missing key:", key)
|
||||||
|
@ -70,7 +71,8 @@ def get_value(filename, section, keys):
|
||||||
try:
|
try:
|
||||||
result = section_res[key]
|
result = section_res[key]
|
||||||
logging.debug(
|
logging.debug(
|
||||||
"Found value {} for key {}".format(result, key))
|
"Found value {} for key {}".format(result, key)
|
||||||
|
)
|
||||||
except:
|
except:
|
||||||
result = ''
|
result = ''
|
||||||
# logging.error("Missing key:", key)
|
# logging.error("Missing key:", key)
|
||||||
|
@ -78,7 +80,8 @@ def get_value(filename, section, keys):
|
||||||
logging.error(
|
logging.error(
|
||||||
"Check configuration file {}.ini for "
|
"Check configuration file {}.ini for "
|
||||||
"missing key(s) \"{}\" under section [{}].".format(
|
"missing key(s) \"{}\" under section [{}].".format(
|
||||||
filename, keys, section))
|
filename, keys, section)
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
@ -171,7 +174,9 @@ def get_default_dbdir():
|
||||||
else:
|
else:
|
||||||
return os.path.abspath('.')
|
return os.path.abspath('.')
|
||||||
else:
|
else:
|
||||||
data_home = os.path.join(os.environ.get('HOME'), '.local', 'share')
|
data_home = os.path.join(
|
||||||
|
os.environ.get('HOME'), '.local', 'share'
|
||||||
|
)
|
||||||
return os.path.join(data_home, 'slixfeed')
|
return os.path.join(data_home, 'slixfeed')
|
||||||
|
|
||||||
|
|
||||||
|
@ -200,7 +205,9 @@ def get_default_confdir():
|
||||||
else:
|
else:
|
||||||
return os.path.abspath('.')
|
return os.path.abspath('.')
|
||||||
else:
|
else:
|
||||||
config_home = os.path.join(os.environ.get('HOME'), '.config')
|
config_home = os.path.join(
|
||||||
|
os.environ.get('HOME'), '.config'
|
||||||
|
)
|
||||||
return os.path.join(config_home, 'slixfeed')
|
return os.path.join(config_home, 'slixfeed')
|
||||||
|
|
||||||
|
|
||||||
|
|
382
slixfeed/crawl.py
Normal file
382
slixfeed/crawl.py
Normal file
|
@ -0,0 +1,382 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
TODO
|
||||||
|
|
||||||
|
1.1) Do not compose messages.
|
||||||
|
|
||||||
|
1.2) Return URLs, nothing else other (e.g. processed messages).
|
||||||
|
|
||||||
|
1.3) Correction of URLs is aceptable.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from aiohttp import ClientError, ClientSession, ClientTimeout
|
||||||
|
from feedparser import parse
|
||||||
|
from lxml import html
|
||||||
|
import slixfeed.config as config
|
||||||
|
from slixfeed.fetch import download_feed
|
||||||
|
from slixfeed.url import complete_url, join_url, trim_url
|
||||||
|
from urllib.parse import urlsplit, urlunsplit
|
||||||
|
|
||||||
|
|
||||||
|
# TODO Use boolean as a flag to determine whether a single URL was found
|
||||||
|
# async def probe_page(
|
||||||
|
# callback, url, document, num=None, db_file=None):
|
||||||
|
# result = None
|
||||||
|
# try:
|
||||||
|
# # tree = etree.fromstring(res[0]) # etree is for xml
|
||||||
|
# tree = html.fromstring(document)
|
||||||
|
# except:
|
||||||
|
# result = (
|
||||||
|
# "> {}\nFailed to parse URL as feed."
|
||||||
|
# ).format(url)
|
||||||
|
# if not result:
|
||||||
|
# print("RSS Auto-Discovery Engaged")
|
||||||
|
# result = await feed_mode_auto_discovery(url, tree)
|
||||||
|
# if not result:
|
||||||
|
# print("RSS Scan Mode Engaged")
|
||||||
|
# result = await feed_mode_scan(url, tree)
|
||||||
|
# if not result:
|
||||||
|
# print("RSS Arbitrary Mode Engaged")
|
||||||
|
# result = await feed_mode_request(url, tree)
|
||||||
|
# if not result:
|
||||||
|
# result = (
|
||||||
|
# "> {}\nNo news feeds were found for URL."
|
||||||
|
# ).format(url)
|
||||||
|
# # elif msg:
|
||||||
|
# else:
|
||||||
|
# if isinstance(result, str):
|
||||||
|
# return result
|
||||||
|
# elif isinstance(result, list):
|
||||||
|
# url = result[0]
|
||||||
|
# if db_file:
|
||||||
|
# # print("if db_file", db_file)
|
||||||
|
# return await callback(db_file, url)
|
||||||
|
# elif num:
|
||||||
|
# return await callback(url, num)
|
||||||
|
# else:
|
||||||
|
# return await callback(url)
|
||||||
|
|
||||||
|
|
||||||
|
async def probe_page(url, document):
|
||||||
|
"""
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
url : str
|
||||||
|
URL.
|
||||||
|
document : TYPE
|
||||||
|
DESCRIPTION.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
result : list or str
|
||||||
|
Single URL as list or selection of URLs as str.
|
||||||
|
"""
|
||||||
|
result = None
|
||||||
|
try:
|
||||||
|
# tree = etree.fromstring(res[0]) # etree is for xml
|
||||||
|
tree = html.fromstring(document)
|
||||||
|
except:
|
||||||
|
result = (
|
||||||
|
"> {}\nFailed to parse URL as feed."
|
||||||
|
).format(url)
|
||||||
|
if not result:
|
||||||
|
print("RSS Auto-Discovery Engaged")
|
||||||
|
result = await feed_mode_auto_discovery(url, tree)
|
||||||
|
if not result:
|
||||||
|
print("RSS Scan Mode Engaged")
|
||||||
|
result = await feed_mode_scan(url, tree)
|
||||||
|
if not result:
|
||||||
|
print("RSS Arbitrary Mode Engaged")
|
||||||
|
result = await feed_mode_request(url, tree)
|
||||||
|
if not result:
|
||||||
|
result = (
|
||||||
|
"> {}\nNo news feeds were found for URL."
|
||||||
|
).format(url)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# TODO Improve scan by gradual decreasing of path
|
||||||
|
async def feed_mode_request(url, tree):
|
||||||
|
"""
|
||||||
|
Lookup for feeds by pathname using HTTP Requests.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
db_file : str
|
||||||
|
Path to database file.
|
||||||
|
url : str
|
||||||
|
URL.
|
||||||
|
tree : TYPE
|
||||||
|
DESCRIPTION.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
msg : str
|
||||||
|
Message with URLs.
|
||||||
|
"""
|
||||||
|
feeds = {}
|
||||||
|
parted_url = urlsplit(url)
|
||||||
|
paths = config.get_list("lists.yaml")
|
||||||
|
paths = paths["pathnames"]
|
||||||
|
for path in paths:
|
||||||
|
address = urlunsplit([
|
||||||
|
parted_url.scheme,
|
||||||
|
parted_url.netloc,
|
||||||
|
path,
|
||||||
|
None,
|
||||||
|
None
|
||||||
|
])
|
||||||
|
res = await download_feed(address)
|
||||||
|
if res[1] == 200:
|
||||||
|
# print(parse(res[0])["feed"]["title"])
|
||||||
|
# feeds[address] = parse(res[0])["feed"]["title"]
|
||||||
|
try:
|
||||||
|
title = parse(res[0])["feed"]["title"]
|
||||||
|
except:
|
||||||
|
title = '*** No Title ***'
|
||||||
|
feeds[address] = title
|
||||||
|
# Check whether URL has path (i.e. not root)
|
||||||
|
# Check parted_url.path to avoid error in case root wasn't given
|
||||||
|
# TODO Make more tests
|
||||||
|
if parted_url.path and parted_url.path.split('/')[1]:
|
||||||
|
paths.extend(
|
||||||
|
[".atom", ".feed", ".rdf", ".rss"]
|
||||||
|
) if '.rss' not in paths else -1
|
||||||
|
# if paths.index('.rss'):
|
||||||
|
# paths.extend([".atom", ".feed", ".rdf", ".rss"])
|
||||||
|
address = urlunsplit([
|
||||||
|
parted_url.scheme,
|
||||||
|
parted_url.netloc,
|
||||||
|
parted_url.path.split('/')[1] + path,
|
||||||
|
None,
|
||||||
|
None
|
||||||
|
])
|
||||||
|
res = await download_feed(address)
|
||||||
|
if res[1] == 200:
|
||||||
|
try:
|
||||||
|
feeds[address] = parse(res[0])
|
||||||
|
# print(feeds)
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
if len(feeds) > 1:
|
||||||
|
counter = 0
|
||||||
|
msg = (
|
||||||
|
"RSS URL discovery has found {} feeds:\n\n```\n"
|
||||||
|
).format(len(feeds))
|
||||||
|
feed_mark = 0
|
||||||
|
for feed in feeds:
|
||||||
|
try:
|
||||||
|
feed_name = feeds[feed]["feed"]["title"]
|
||||||
|
except:
|
||||||
|
feed_name = urlsplit(feed).netloc
|
||||||
|
feed_addr = feed
|
||||||
|
# AttributeError: 'str' object has no attribute 'entries'
|
||||||
|
try:
|
||||||
|
feed_amnt = len(feeds[feed].entries)
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
if feed_amnt:
|
||||||
|
# NOTE Because there could be many false positives
|
||||||
|
# which are revealed in second phase of scan, we
|
||||||
|
# could end with a single feed, which would be
|
||||||
|
# listed instead of fetched, so feed_mark is
|
||||||
|
# utilized in order to make fetch possible.
|
||||||
|
feed_mark = [feed_addr]
|
||||||
|
counter += 1
|
||||||
|
msg += (
|
||||||
|
"Title: {}\n"
|
||||||
|
"Link : {}\n"
|
||||||
|
"Items: {}\n"
|
||||||
|
"\n"
|
||||||
|
).format(feed_name, feed_addr, feed_amnt)
|
||||||
|
if counter > 1:
|
||||||
|
msg += (
|
||||||
|
"```\nThe above feeds were extracted from\n{}"
|
||||||
|
).format(url)
|
||||||
|
elif feed_mark:
|
||||||
|
return feed_mark
|
||||||
|
else:
|
||||||
|
msg = (
|
||||||
|
"No feeds were found for {}"
|
||||||
|
).format(url)
|
||||||
|
return msg
|
||||||
|
elif feeds:
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
|
||||||
|
async def feed_mode_scan(url, tree):
|
||||||
|
"""
|
||||||
|
Scan page for potential feeds by pathname.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
db_file : str
|
||||||
|
Path to database file.
|
||||||
|
url : str
|
||||||
|
URL.
|
||||||
|
tree : TYPE
|
||||||
|
DESCRIPTION.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
msg : str
|
||||||
|
Message with URLs.
|
||||||
|
"""
|
||||||
|
feeds = {}
|
||||||
|
# paths = []
|
||||||
|
# TODO Test
|
||||||
|
paths = config.get_list("lists.yaml")
|
||||||
|
paths = paths["pathnames"]
|
||||||
|
for path in paths:
|
||||||
|
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
|
||||||
|
# xpath_query = "//a[contains(@href,'{}')]".format(path)
|
||||||
|
num = 5
|
||||||
|
xpath_query = "(//a[contains(@href,'{}')])[position()<={}]".format(path, num)
|
||||||
|
addresses = tree.xpath(xpath_query)
|
||||||
|
xpath_query = "(//a[contains(@href,'{}')])[position()>last()-{}]".format(path, num)
|
||||||
|
addresses += tree.xpath(xpath_query)
|
||||||
|
parted_url = urlsplit(url)
|
||||||
|
# NOTE Should number of addresses be limited or
|
||||||
|
# perhaps be N from the start and N from the end
|
||||||
|
for address in addresses:
|
||||||
|
# print(address.xpath('@href')[0])
|
||||||
|
# print(addresses)
|
||||||
|
address = address.xpath('@href')[0]
|
||||||
|
if "/" not in address:
|
||||||
|
protocol = parted_url.scheme
|
||||||
|
hostname = parted_url.netloc
|
||||||
|
pathname = address
|
||||||
|
address = urlunsplit([
|
||||||
|
protocol,
|
||||||
|
hostname,
|
||||||
|
pathname,
|
||||||
|
None,
|
||||||
|
None
|
||||||
|
])
|
||||||
|
if address.startswith('/'):
|
||||||
|
protocol = parted_url.scheme
|
||||||
|
hostname = parted_url.netloc
|
||||||
|
pathname = address
|
||||||
|
address = urlunsplit([
|
||||||
|
protocol,
|
||||||
|
hostname,
|
||||||
|
pathname,
|
||||||
|
None,
|
||||||
|
None
|
||||||
|
])
|
||||||
|
res = await download_feed(address)
|
||||||
|
if res[1] == 200:
|
||||||
|
try:
|
||||||
|
feeds[address] = parse(res[0])
|
||||||
|
# print(feeds[address])
|
||||||
|
# breakpoint()
|
||||||
|
# print(feeds)
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
if len(feeds) > 1:
|
||||||
|
# print(feeds)
|
||||||
|
# breakpoint()
|
||||||
|
counter = 0
|
||||||
|
msg = (
|
||||||
|
"RSS URL scan has found {} feeds:\n\n```\n"
|
||||||
|
).format(len(feeds))
|
||||||
|
feed_mark = 0
|
||||||
|
for feed in feeds:
|
||||||
|
# try:
|
||||||
|
# res = await download_feed(feed)
|
||||||
|
# except:
|
||||||
|
# continue
|
||||||
|
try:
|
||||||
|
feed_name = feeds[feed]["feed"]["title"]
|
||||||
|
except:
|
||||||
|
feed_name = urlsplit(feed).netloc
|
||||||
|
feed_addr = feed
|
||||||
|
feed_amnt = len(feeds[feed].entries)
|
||||||
|
if feed_amnt:
|
||||||
|
# NOTE Because there could be many false positives
|
||||||
|
# which are revealed in second phase of scan, we
|
||||||
|
# could end with a single feed, which would be
|
||||||
|
# listed instead of fetched, so feed_mark is
|
||||||
|
# utilized in order to make fetch possible.
|
||||||
|
feed_mark = [feed_addr]
|
||||||
|
counter += 1
|
||||||
|
msg += (
|
||||||
|
"Title : {}\n"
|
||||||
|
"Link : {}\n"
|
||||||
|
"Count : {}\n"
|
||||||
|
"\n"
|
||||||
|
).format(feed_name, feed_addr, feed_amnt)
|
||||||
|
if counter > 1:
|
||||||
|
msg += (
|
||||||
|
"```\nThe above feeds were extracted from\n{}"
|
||||||
|
).format(url)
|
||||||
|
elif feed_mark:
|
||||||
|
return feed_mark
|
||||||
|
else:
|
||||||
|
msg = (
|
||||||
|
"No feeds were found for {}"
|
||||||
|
).format(url)
|
||||||
|
return msg
|
||||||
|
elif feeds:
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
|
||||||
|
async def feed_mode_auto_discovery(url, tree):
|
||||||
|
"""
|
||||||
|
Lookup for feeds using RSS autodiscovery technique.
|
||||||
|
|
||||||
|
See: https://www.rssboard.org/rss-autodiscovery
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
db_file : str
|
||||||
|
Path to database file.
|
||||||
|
url : str
|
||||||
|
URL.
|
||||||
|
tree : TYPE
|
||||||
|
DESCRIPTION.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
msg : str
|
||||||
|
Message with URLs.
|
||||||
|
"""
|
||||||
|
xpath_query = (
|
||||||
|
'//link[(@rel="alternate") and '
|
||||||
|
'(@type="application/atom+xml" or '
|
||||||
|
'@type="application/rdf+xml" or '
|
||||||
|
'@type="application/rss+xml")]'
|
||||||
|
)
|
||||||
|
# xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href"""
|
||||||
|
# xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href"
|
||||||
|
feeds = tree.xpath(xpath_query)
|
||||||
|
if len(feeds) > 1:
|
||||||
|
msg = (
|
||||||
|
"RSS Auto-Discovery has found {} feeds:\n\n```\n"
|
||||||
|
).format(len(feeds))
|
||||||
|
for feed in feeds:
|
||||||
|
# # The following code works;
|
||||||
|
# # The following code will catch
|
||||||
|
# # only valid resources (i.e. not 404);
|
||||||
|
# # The following code requires more bandwidth.
|
||||||
|
# res = await download_feed(feed)
|
||||||
|
# if res[0]:
|
||||||
|
# disco = parse(res[0])
|
||||||
|
# title = disco["feed"]["title"]
|
||||||
|
# msg += "{} \n {} \n\n".format(title, feed)
|
||||||
|
feed_name = feed.xpath('@title')[0]
|
||||||
|
feed_addr = join_url(url, feed.xpath('@href')[0])
|
||||||
|
# if feed_addr.startswith("/"):
|
||||||
|
# feed_addr = url + feed_addr
|
||||||
|
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
|
||||||
|
msg += (
|
||||||
|
"```\nThe above feeds were extracted from\n{}"
|
||||||
|
).format(url)
|
||||||
|
return msg
|
||||||
|
elif feeds:
|
||||||
|
feed_addr = join_url(url, feeds[0].xpath('@href')[0])
|
||||||
|
return [feed_addr]
|
|
@ -33,454 +33,24 @@ from http.client import IncompleteRead
|
||||||
from lxml import html
|
from lxml import html
|
||||||
import slixfeed.config as config
|
import slixfeed.config as config
|
||||||
from slixfeed.datetime import now, rfc2822_to_iso8601
|
from slixfeed.datetime import now, rfc2822_to_iso8601
|
||||||
import slixfeed.utility as utility
|
|
||||||
import slixfeed.sqlite as sqlite
|
import slixfeed.sqlite as sqlite
|
||||||
from slixfeed.url import complete_url, join_url, trim_url
|
from slixfeed.url import complete_url, join_url, trim_url
|
||||||
from urllib import error
|
from urllib import error
|
||||||
# from xml.etree.ElementTree import ElementTree, ParseError
|
# from xml.etree.ElementTree import ElementTree, ParseError
|
||||||
from urllib.parse import urlsplit, urlunsplit
|
from urllib.parse import urlsplit, urlunsplit
|
||||||
|
|
||||||
# NOTE Why (if res[0]) and (if res[1] == 200)?
|
|
||||||
async def download_updates(db_file, url=None):
|
|
||||||
"""
|
|
||||||
Check feeds for new entries.
|
|
||||||
|
|
||||||
Parameters
|
# async def dat():
|
||||||
----------
|
|
||||||
db_file : str
|
|
||||||
Path to database file.
|
|
||||||
url : str, optional
|
|
||||||
URL. The default is None.
|
|
||||||
"""
|
|
||||||
if url:
|
|
||||||
urls = [url] # Valid [url] and [url,] and (url,)
|
|
||||||
else:
|
|
||||||
urls = await sqlite.get_feeds_url(db_file)
|
|
||||||
for url in urls:
|
|
||||||
# print(os.path.basename(db_file), url[0])
|
|
||||||
source = url[0]
|
|
||||||
res = await download_feed(source)
|
|
||||||
# TypeError: 'NoneType' object is not subscriptable
|
|
||||||
if res is None:
|
|
||||||
# Skip to next feed
|
|
||||||
# urls.next()
|
|
||||||
# next(urls)
|
|
||||||
continue
|
|
||||||
await sqlite.update_source_status(
|
|
||||||
db_file, res[1], source)
|
|
||||||
if res[0]:
|
|
||||||
try:
|
|
||||||
feed = parse(res[0])
|
|
||||||
if feed.bozo:
|
|
||||||
# bozo = (
|
|
||||||
# "WARNING: Bozo detected for feed: {}\n"
|
|
||||||
# "For more information, visit "
|
|
||||||
# "https://pythonhosted.org/feedparser/bozo.html"
|
|
||||||
# ).format(source)
|
|
||||||
# print(bozo)
|
|
||||||
valid = 0
|
|
||||||
else:
|
|
||||||
valid = 1
|
|
||||||
await sqlite.update_source_validity(
|
|
||||||
db_file, source, valid)
|
|
||||||
except (
|
|
||||||
IncompleteReadError,
|
|
||||||
IncompleteRead,
|
|
||||||
error.URLError
|
|
||||||
) as e:
|
|
||||||
# print(e)
|
|
||||||
# TODO Print error to log
|
|
||||||
None
|
|
||||||
# NOTE I don't think there should be "return"
|
|
||||||
# because then we might stop scanning next URLs
|
|
||||||
# return
|
|
||||||
# TODO Place these couple of lines back down
|
|
||||||
# NOTE Need to correct the SQL statement to do so
|
|
||||||
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
|
|
||||||
if res[1] == 200:
|
|
||||||
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
|
|
||||||
# TODO Place these couple of lines back down
|
|
||||||
# NOTE Need to correct the SQL statement to do so
|
|
||||||
entries = feed.entries
|
|
||||||
# length = len(entries)
|
|
||||||
# await remove_entry(db_file, source, length)
|
|
||||||
await sqlite.remove_nonexistent_entries(
|
|
||||||
db_file, feed, source)
|
|
||||||
# new_entry = 0
|
|
||||||
for entry in entries:
|
|
||||||
# TODO Pass date too for comparion check
|
|
||||||
if entry.has_key("published"):
|
|
||||||
date = entry.published
|
|
||||||
date = rfc2822_to_iso8601(date)
|
|
||||||
elif entry.has_key("updated"):
|
|
||||||
date = entry.updated
|
|
||||||
date = rfc2822_to_iso8601(date)
|
|
||||||
else:
|
|
||||||
# TODO Just set date = "*** No date ***"
|
|
||||||
# date = await datetime.now().isoformat()
|
|
||||||
date = now()
|
|
||||||
# NOTE Would seconds result in better database performance
|
|
||||||
# date = datetime.datetime(date)
|
|
||||||
# date = (date-datetime.datetime(1970,1,1)).total_seconds()
|
|
||||||
if entry.has_key("title"):
|
|
||||||
title = entry.title
|
|
||||||
# title = "{}: *{}*".format(feed["feed"]["title"], entry.title)
|
|
||||||
else:
|
|
||||||
title = date
|
|
||||||
# title = feed["feed"]["title"]
|
|
||||||
if entry.has_key("link"):
|
|
||||||
# link = complete_url(source, entry.link)
|
|
||||||
link = join_url(source, entry.link)
|
|
||||||
link = trim_url(link)
|
|
||||||
else:
|
|
||||||
link = source
|
|
||||||
if entry.has_key("id"):
|
|
||||||
eid = entry.id
|
|
||||||
else:
|
|
||||||
eid = link
|
|
||||||
exist = await sqlite.check_entry_exist(
|
|
||||||
db_file, source, eid=eid,
|
|
||||||
title=title, link=link, date=date)
|
|
||||||
if not exist:
|
|
||||||
# new_entry = new_entry + 1
|
|
||||||
# TODO Enhance summary
|
|
||||||
if entry.has_key("summary"):
|
|
||||||
summary = entry.summary
|
|
||||||
# # Remove HTML tags
|
|
||||||
# summary = BeautifulSoup(summary, "lxml").text
|
|
||||||
# # TODO Limit text length
|
|
||||||
# summary = summary.replace("\n\n\n", "\n\n")
|
|
||||||
# summary = summary[:300] + " […]⃨"
|
|
||||||
# summary = summary.strip().split('\n')
|
|
||||||
# summary = ["> " + line for line in summary]
|
|
||||||
# summary = "\n".join(summary)
|
|
||||||
else:
|
|
||||||
summary = "> *** No summary ***"
|
|
||||||
read_status = 0
|
|
||||||
pathname = urlsplit(link).path
|
|
||||||
string = (
|
|
||||||
"{} {} {}"
|
|
||||||
).format(
|
|
||||||
title,
|
|
||||||
summary,
|
|
||||||
pathname
|
|
||||||
)
|
|
||||||
allow_list = await config.is_listed(
|
|
||||||
db_file, "filter-allow", string)
|
|
||||||
if not allow_list:
|
|
||||||
reject_list = await config.is_listed(
|
|
||||||
db_file, "filter-deny", string)
|
|
||||||
if reject_list:
|
|
||||||
# print(">>> REJECTED", title)
|
|
||||||
summary = (
|
|
||||||
"REJECTED {}".format(
|
|
||||||
reject_list.upper()
|
|
||||||
)
|
|
||||||
)
|
|
||||||
# summary = ""
|
|
||||||
read_status = 1
|
|
||||||
entry = (
|
|
||||||
title, link, eid, source, date, read_status)
|
|
||||||
if isinstance(date, int):
|
|
||||||
print("PROBLEM: date is int")
|
|
||||||
print(date)
|
|
||||||
# breakpoint()
|
|
||||||
# print(source)
|
|
||||||
# print(date)
|
|
||||||
await sqlite.add_entry_and_set_date(
|
|
||||||
db_file, source, entry)
|
|
||||||
# print(current_time(), entry, title)
|
|
||||||
# else:
|
|
||||||
# print(current_time(), exist, title)
|
|
||||||
|
|
||||||
|
# async def ftp():
|
||||||
|
|
||||||
# NOTE Why (if result[0]) and (if result[1] == 200)?
|
# async def gemini():
|
||||||
async def view_feed(url):
|
|
||||||
"""
|
|
||||||
Check feeds for new entries.
|
|
||||||
|
|
||||||
Parameters
|
# async def gopher():
|
||||||
----------
|
|
||||||
db_file : str
|
|
||||||
Path to database file.
|
|
||||||
url : str, optional
|
|
||||||
URL. The default is None.
|
|
||||||
|
|
||||||
Returns
|
# async def http():
|
||||||
-------
|
|
||||||
msg : str
|
|
||||||
Feed content or error message.
|
|
||||||
"""
|
|
||||||
result = await download_feed(url)
|
|
||||||
if result[0]:
|
|
||||||
try:
|
|
||||||
feed = parse(result[0])
|
|
||||||
if feed.bozo:
|
|
||||||
# msg = (
|
|
||||||
# ">{}\n"
|
|
||||||
# "WARNING: Bozo detected!\n"
|
|
||||||
# "For more information, visit "
|
|
||||||
# "https://pythonhosted.org/feedparser/bozo.html"
|
|
||||||
# ).format(url)
|
|
||||||
msg = await probe_page(view_feed, url, result[0])
|
|
||||||
return msg
|
|
||||||
except (
|
|
||||||
IncompleteReadError,
|
|
||||||
IncompleteRead,
|
|
||||||
error.URLError
|
|
||||||
) as e:
|
|
||||||
# print(e)
|
|
||||||
# TODO Print error to log
|
|
||||||
msg = (
|
|
||||||
"> {}\n"
|
|
||||||
"Error: {}"
|
|
||||||
).format(url, e)
|
|
||||||
# breakpoint()
|
|
||||||
if result[1] == 200:
|
|
||||||
feed = parse(result[0])
|
|
||||||
title = utility.get_title(url, feed)
|
|
||||||
entries = feed.entries
|
|
||||||
msg = "Preview of {}:\n\n```\n".format(title)
|
|
||||||
counter = 0
|
|
||||||
for entry in entries:
|
|
||||||
counter += 1
|
|
||||||
if entry.has_key("title"):
|
|
||||||
title = entry.title
|
|
||||||
else:
|
|
||||||
title = "*** No title ***"
|
|
||||||
if entry.has_key("link"):
|
|
||||||
# link = complete_url(source, entry.link)
|
|
||||||
link = join_url(url, entry.link)
|
|
||||||
link = trim_url(link)
|
|
||||||
else:
|
|
||||||
link = "*** No link ***"
|
|
||||||
if entry.has_key("published"):
|
|
||||||
date = entry.published
|
|
||||||
date = rfc2822_to_iso8601(date)
|
|
||||||
elif entry.has_key("updated"):
|
|
||||||
date = entry.updated
|
|
||||||
date = rfc2822_to_iso8601(date)
|
|
||||||
else:
|
|
||||||
date = "*** No date ***"
|
|
||||||
msg += (
|
|
||||||
"Title : {}\n"
|
|
||||||
"Date : {}\n"
|
|
||||||
"Link : {}\n"
|
|
||||||
"Count : {}\n"
|
|
||||||
"\n"
|
|
||||||
).format(title, date, link, counter)
|
|
||||||
if counter > 4:
|
|
||||||
break
|
|
||||||
msg += (
|
|
||||||
"```\nSource: {}"
|
|
||||||
).format(url)
|
|
||||||
else:
|
|
||||||
msg = (
|
|
||||||
">{}\nFailed to load URL. Reason: {}"
|
|
||||||
).format(url, result[1])
|
|
||||||
return msg
|
|
||||||
|
|
||||||
|
|
||||||
# NOTE Why (if result[0]) and (if result[1] == 200)?
|
|
||||||
async def view_entry(url, num):
|
|
||||||
result = await download_feed(url)
|
|
||||||
if result[0]:
|
|
||||||
try:
|
|
||||||
feed = parse(result[0])
|
|
||||||
if feed.bozo:
|
|
||||||
# msg = (
|
|
||||||
# ">{}\n"
|
|
||||||
# "WARNING: Bozo detected!\n"
|
|
||||||
# "For more information, visit "
|
|
||||||
# "https://pythonhosted.org/feedparser/bozo.html"
|
|
||||||
# ).format(url)
|
|
||||||
msg = await probe_page(view_entry, url, result[0], num=num)
|
|
||||||
return msg
|
|
||||||
except (
|
|
||||||
IncompleteReadError,
|
|
||||||
IncompleteRead,
|
|
||||||
error.URLError
|
|
||||||
) as e:
|
|
||||||
# print(e)
|
|
||||||
# TODO Print error to log
|
|
||||||
msg = (
|
|
||||||
"> {}\n"
|
|
||||||
"Error: {}"
|
|
||||||
).format(url, e)
|
|
||||||
# breakpoint()
|
|
||||||
if result[1] == 200:
|
|
||||||
feed = parse(result[0])
|
|
||||||
title = utility.get_title(url, result[0])
|
|
||||||
entries = feed.entries
|
|
||||||
num = int(num) - 1
|
|
||||||
entry = entries[num]
|
|
||||||
if entry.has_key("title"):
|
|
||||||
title = entry.title
|
|
||||||
else:
|
|
||||||
title = "*** No title ***"
|
|
||||||
if entry.has_key("published"):
|
|
||||||
date = entry.published
|
|
||||||
date = rfc2822_to_iso8601(date)
|
|
||||||
elif entry.has_key("updated"):
|
|
||||||
date = entry.updated
|
|
||||||
date = rfc2822_to_iso8601(date)
|
|
||||||
else:
|
|
||||||
date = "*** No date ***"
|
|
||||||
if entry.has_key("summary"):
|
|
||||||
summary = entry.summary
|
|
||||||
# Remove HTML tags
|
|
||||||
summary = BeautifulSoup(summary, "lxml").text
|
|
||||||
# TODO Limit text length
|
|
||||||
summary = summary.replace("\n\n\n", "\n\n")
|
|
||||||
else:
|
|
||||||
summary = "*** No summary ***"
|
|
||||||
if entry.has_key("link"):
|
|
||||||
# link = complete_url(source, entry.link)
|
|
||||||
link = join_url(url, entry.link)
|
|
||||||
link = trim_url(link)
|
|
||||||
else:
|
|
||||||
link = "*** No link ***"
|
|
||||||
msg = (
|
|
||||||
"{}\n"
|
|
||||||
"\n"
|
|
||||||
"> {}\n"
|
|
||||||
"\n"
|
|
||||||
"{}\n"
|
|
||||||
"\n"
|
|
||||||
).format(title, summary, link)
|
|
||||||
else:
|
|
||||||
msg = (
|
|
||||||
">{}\n"
|
|
||||||
"Failed to load URL. Reason: {}\n"
|
|
||||||
"Try again momentarily."
|
|
||||||
).format(url, result[1])
|
|
||||||
return msg
|
|
||||||
|
|
||||||
|
|
||||||
async def add_feed_no_check(db_file, data):
|
|
||||||
"""
|
|
||||||
Add given feed without validity check.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
db_file : str
|
|
||||||
Path to database file.
|
|
||||||
data : str
|
|
||||||
URL or URL and Title.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
msg : str
|
|
||||||
Status message.
|
|
||||||
"""
|
|
||||||
url = data[0]
|
|
||||||
title = data[1]
|
|
||||||
url = trim_url(url)
|
|
||||||
exist = await sqlite.is_feed_exist(db_file, url)
|
|
||||||
if not exist:
|
|
||||||
msg = await sqlite.insert_feed(db_file, url, title)
|
|
||||||
await download_updates(db_file, [url])
|
|
||||||
else:
|
|
||||||
ix = exist[0]
|
|
||||||
name = exist[1]
|
|
||||||
msg = (
|
|
||||||
"> {}\nNews source \"{}\" is already "
|
|
||||||
"listed in the subscription list at "
|
|
||||||
"index {}".format(url, name, ix)
|
|
||||||
)
|
|
||||||
return msg
|
|
||||||
|
|
||||||
|
|
||||||
async def add_feed(db_file, url):
|
|
||||||
"""
|
|
||||||
Check whether feed exist, otherwise process it.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
db_file : str
|
|
||||||
Path to database file.
|
|
||||||
url : str
|
|
||||||
URL.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
msg : str
|
|
||||||
Status message.
|
|
||||||
"""
|
|
||||||
msg = None
|
|
||||||
url = trim_url(url)
|
|
||||||
exist = await sqlite.is_feed_exist(db_file, url)
|
|
||||||
if not exist:
|
|
||||||
res = await download_feed(url)
|
|
||||||
if res[0]:
|
|
||||||
feed = parse(res[0])
|
|
||||||
title = utility.get_title(url, feed)
|
|
||||||
if utility.is_feed(url, feed):
|
|
||||||
status = res[1]
|
|
||||||
await sqlite.insert_feed(
|
|
||||||
db_file, url, title, status)
|
|
||||||
await download_updates(db_file, [url])
|
|
||||||
title = title if title else url
|
|
||||||
msg = (
|
|
||||||
"> {}\nNews source \"{}\" has been added "
|
|
||||||
"to subscription list."
|
|
||||||
).format(url, title)
|
|
||||||
else:
|
|
||||||
msg = await probe_page(
|
|
||||||
add_feed, url, res[0], db_file=db_file)
|
|
||||||
else:
|
|
||||||
status = res[1]
|
|
||||||
msg = (
|
|
||||||
"> {}\nFailed to load URL. Reason: {}"
|
|
||||||
).format(url, status)
|
|
||||||
else:
|
|
||||||
ix = exist[0]
|
|
||||||
name = exist[1]
|
|
||||||
msg = (
|
|
||||||
"> {}\nNews source \"{}\" is already "
|
|
||||||
"listed in the subscription list at "
|
|
||||||
"index {}".format(url, name, ix)
|
|
||||||
)
|
|
||||||
return msg
|
|
||||||
|
|
||||||
|
|
||||||
# TODO callback for use with add_feed and view_feed
|
|
||||||
async def probe_page(callback, url, doc, num=None, db_file=None):
|
|
||||||
msg = None
|
|
||||||
try:
|
|
||||||
# tree = etree.fromstring(res[0]) # etree is for xml
|
|
||||||
tree = html.fromstring(doc)
|
|
||||||
except:
|
|
||||||
msg = (
|
|
||||||
"> {}\nFailed to parse URL as feed."
|
|
||||||
).format(url)
|
|
||||||
if not msg:
|
|
||||||
print("RSS Auto-Discovery Engaged")
|
|
||||||
msg = await feed_mode_auto_discovery(url, tree)
|
|
||||||
if not msg:
|
|
||||||
print("RSS Scan Mode Engaged")
|
|
||||||
msg = await feed_mode_scan(url, tree)
|
|
||||||
if not msg:
|
|
||||||
print("RSS Arbitrary Mode Engaged")
|
|
||||||
msg = await feed_mode_request(url, tree)
|
|
||||||
if not msg:
|
|
||||||
msg = (
|
|
||||||
"> {}\nNo news feeds were found for URL."
|
|
||||||
).format(url)
|
|
||||||
# elif msg:
|
|
||||||
else:
|
|
||||||
if isinstance(msg, str):
|
|
||||||
return msg
|
|
||||||
elif isinstance(msg, list):
|
|
||||||
url = msg[0]
|
|
||||||
if db_file:
|
|
||||||
# print("if db_file", db_file)
|
|
||||||
return await callback(db_file, url)
|
|
||||||
elif num:
|
|
||||||
return await callback(url, num)
|
|
||||||
else:
|
|
||||||
return await callback(url)
|
|
||||||
|
|
||||||
|
# async def ipfs():
|
||||||
|
|
||||||
async def download_feed(url):
|
async def download_feed(url):
|
||||||
"""
|
"""
|
||||||
|
@ -488,7 +58,7 @@ async def download_feed(url):
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
url : str
|
url : list
|
||||||
URL.
|
URL.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
|
@ -502,27 +72,23 @@ async def download_feed(url):
|
||||||
user_agent = "Slixfeed/0.1"
|
user_agent = "Slixfeed/0.1"
|
||||||
if not len(user_agent):
|
if not len(user_agent):
|
||||||
user_agent = "Slixfeed/0.1"
|
user_agent = "Slixfeed/0.1"
|
||||||
|
headers = {'User-Agent': user_agent}
|
||||||
|
url = url[0]
|
||||||
proxy = (config.get_value("settings", "Network", "http_proxy")) or ''
|
proxy = (config.get_value("settings", "Network", "http_proxy")) or ''
|
||||||
timeout = ClientTimeout(total=10)
|
timeout = ClientTimeout(total=10)
|
||||||
headers = {'User-Agent': user_agent}
|
|
||||||
async with ClientSession(headers=headers) as session:
|
async with ClientSession(headers=headers) as session:
|
||||||
# async with ClientSession(trust_env=True) as session:
|
# async with ClientSession(trust_env=True) as session:
|
||||||
try:
|
try:
|
||||||
async with session.get(
|
async with session.get(url, proxy=proxy,
|
||||||
url,
|
# proxy_auth=(proxy_username, proxy_password),
|
||||||
proxy=proxy,
|
timeout=timeout
|
||||||
# proxy_auth=(proxy_username, proxy_password)
|
) as response:
|
||||||
timeout=timeout
|
|
||||||
) as response:
|
|
||||||
status = response.status
|
status = response.status
|
||||||
if response.status == 200:
|
if response.status == 200:
|
||||||
try:
|
try:
|
||||||
doc = await response.text()
|
doc = await response.text()
|
||||||
# print (response.content_type)
|
# print (response.content_type)
|
||||||
msg = [
|
msg = [doc, status]
|
||||||
doc,
|
|
||||||
status
|
|
||||||
]
|
|
||||||
except:
|
except:
|
||||||
# msg = [
|
# msg = [
|
||||||
# False,
|
# False,
|
||||||
|
@ -531,307 +97,20 @@ async def download_feed(url):
|
||||||
# )
|
# )
|
||||||
# ]
|
# ]
|
||||||
msg = [
|
msg = [
|
||||||
False,
|
False, "Document is too large or is not textual."
|
||||||
"Document is too large or is not textual."
|
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
msg = [
|
msg = [
|
||||||
False,
|
False, "HTTP Error: " + str(status)
|
||||||
"HTTP Error: " + str(status)
|
|
||||||
]
|
]
|
||||||
except ClientError as e:
|
except ClientError as e:
|
||||||
# print('Error', str(e))
|
# print('Error', str(e))
|
||||||
msg = [
|
msg = [
|
||||||
False,
|
False, "Error: " + str(e)
|
||||||
"Error: " + str(e)
|
|
||||||
]
|
]
|
||||||
except TimeoutError as e:
|
except TimeoutError as e:
|
||||||
# print('Timeout:', str(e))
|
# print('Timeout:', str(e))
|
||||||
msg = [
|
msg = [
|
||||||
False,
|
False, "Timeout: " + str(e)
|
||||||
"Timeout: " + str(e)
|
|
||||||
]
|
]
|
||||||
return msg
|
return msg
|
||||||
|
|
||||||
|
|
||||||
# TODO Improve scan by gradual decreasing of path
|
|
||||||
async def feed_mode_request(url, tree):
|
|
||||||
"""
|
|
||||||
Lookup for feeds by pathname using HTTP Requests.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
db_file : str
|
|
||||||
Path to database file.
|
|
||||||
url : str
|
|
||||||
URL.
|
|
||||||
tree : TYPE
|
|
||||||
DESCRIPTION.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
msg : str
|
|
||||||
Message with URLs.
|
|
||||||
"""
|
|
||||||
feeds = {}
|
|
||||||
parted_url = urlsplit(url)
|
|
||||||
paths = config.get_list("lists.yaml")
|
|
||||||
paths = paths["pathnames"]
|
|
||||||
for path in paths:
|
|
||||||
address = urlunsplit([
|
|
||||||
parted_url.scheme,
|
|
||||||
parted_url.netloc,
|
|
||||||
path,
|
|
||||||
None,
|
|
||||||
None
|
|
||||||
])
|
|
||||||
res = await download_feed(address)
|
|
||||||
if res[1] == 200:
|
|
||||||
# print(parse(res[0])["feed"]["title"])
|
|
||||||
# feeds[address] = parse(res[0])["feed"]["title"]
|
|
||||||
try:
|
|
||||||
title = parse(res[0])["feed"]["title"]
|
|
||||||
except:
|
|
||||||
title = '*** No Title ***'
|
|
||||||
feeds[address] = title
|
|
||||||
# Check whether URL has path (i.e. not root)
|
|
||||||
# Check parted_url.path to avoid error in case root wasn't given
|
|
||||||
# TODO Make more tests
|
|
||||||
if parted_url.path and parted_url.path.split('/')[1]:
|
|
||||||
paths.extend(
|
|
||||||
[".atom", ".feed", ".rdf", ".rss"]
|
|
||||||
) if '.rss' not in paths else -1
|
|
||||||
# if paths.index('.rss'):
|
|
||||||
# paths.extend([".atom", ".feed", ".rdf", ".rss"])
|
|
||||||
address = urlunsplit([
|
|
||||||
parted_url.scheme,
|
|
||||||
parted_url.netloc,
|
|
||||||
parted_url.path.split('/')[1] + path,
|
|
||||||
None,
|
|
||||||
None
|
|
||||||
])
|
|
||||||
res = await download_feed(address)
|
|
||||||
if res[1] == 200:
|
|
||||||
try:
|
|
||||||
feeds[address] = parse(res[0])
|
|
||||||
# print(feeds)
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
if len(feeds) > 1:
|
|
||||||
counter = 0
|
|
||||||
msg = (
|
|
||||||
"RSS URL discovery has found {} feeds:\n\n```\n"
|
|
||||||
).format(len(feeds))
|
|
||||||
feed_mark = 0
|
|
||||||
for feed in feeds:
|
|
||||||
try:
|
|
||||||
feed_name = feeds[feed]["feed"]["title"]
|
|
||||||
except:
|
|
||||||
feed_name = urlsplit(feed).netloc
|
|
||||||
feed_addr = feed
|
|
||||||
# AttributeError: 'str' object has no attribute 'entries'
|
|
||||||
try:
|
|
||||||
feed_amnt = len(feeds[feed].entries)
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
if feed_amnt:
|
|
||||||
# NOTE Because there could be many false positives
|
|
||||||
# which are revealed in second phase of scan, we
|
|
||||||
# could end with a single feed, which would be
|
|
||||||
# listed instead of fetched, so feed_mark is
|
|
||||||
# utilized in order to make fetch possible.
|
|
||||||
feed_mark = [feed_addr]
|
|
||||||
counter += 1
|
|
||||||
msg += (
|
|
||||||
"Title: {}\n"
|
|
||||||
"Link : {}\n"
|
|
||||||
"Items: {}\n"
|
|
||||||
"\n"
|
|
||||||
).format(feed_name, feed_addr, feed_amnt)
|
|
||||||
if counter > 1:
|
|
||||||
msg += (
|
|
||||||
"```\nThe above feeds were extracted from\n{}"
|
|
||||||
).format(url)
|
|
||||||
elif feed_mark:
|
|
||||||
return feed_mark
|
|
||||||
else:
|
|
||||||
msg = (
|
|
||||||
"No feeds were found for {}"
|
|
||||||
).format(url)
|
|
||||||
return msg
|
|
||||||
elif feeds:
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
|
|
||||||
async def feed_mode_scan(url, tree):
|
|
||||||
"""
|
|
||||||
Scan page for potential feeds by pathname.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
db_file : str
|
|
||||||
Path to database file.
|
|
||||||
url : str
|
|
||||||
URL.
|
|
||||||
tree : TYPE
|
|
||||||
DESCRIPTION.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
msg : str
|
|
||||||
Message with URLs.
|
|
||||||
"""
|
|
||||||
feeds = {}
|
|
||||||
# paths = []
|
|
||||||
# TODO Test
|
|
||||||
paths = config.get_list("lists.yaml")
|
|
||||||
paths = paths["pathnames"]
|
|
||||||
for path in paths:
|
|
||||||
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
|
|
||||||
# xpath_query = "//a[contains(@href,'{}')]".format(path)
|
|
||||||
num = 5
|
|
||||||
xpath_query = "(//a[contains(@href,'{}')])[position()<={}]".format(path, num)
|
|
||||||
addresses = tree.xpath(xpath_query)
|
|
||||||
xpath_query = "(//a[contains(@href,'{}')])[position()>last()-{}]".format(path, num)
|
|
||||||
addresses += tree.xpath(xpath_query)
|
|
||||||
parted_url = urlsplit(url)
|
|
||||||
# NOTE Should number of addresses be limited or
|
|
||||||
# perhaps be N from the start and N from the end
|
|
||||||
for address in addresses:
|
|
||||||
# print(address.xpath('@href')[0])
|
|
||||||
# print(addresses)
|
|
||||||
address = address.xpath('@href')[0]
|
|
||||||
if "/" not in address:
|
|
||||||
protocol = parted_url.scheme
|
|
||||||
hostname = parted_url.netloc
|
|
||||||
pathname = address
|
|
||||||
address = urlunsplit([
|
|
||||||
protocol,
|
|
||||||
hostname,
|
|
||||||
pathname,
|
|
||||||
None,
|
|
||||||
None
|
|
||||||
])
|
|
||||||
if address.startswith('/'):
|
|
||||||
protocol = parted_url.scheme
|
|
||||||
hostname = parted_url.netloc
|
|
||||||
pathname = address
|
|
||||||
address = urlunsplit([
|
|
||||||
protocol,
|
|
||||||
hostname,
|
|
||||||
pathname,
|
|
||||||
None,
|
|
||||||
None
|
|
||||||
])
|
|
||||||
res = await download_feed(address)
|
|
||||||
if res[1] == 200:
|
|
||||||
try:
|
|
||||||
feeds[address] = parse(res[0])
|
|
||||||
# print(feeds[address])
|
|
||||||
# breakpoint()
|
|
||||||
# print(feeds)
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
if len(feeds) > 1:
|
|
||||||
# print(feeds)
|
|
||||||
# breakpoint()
|
|
||||||
counter = 0
|
|
||||||
msg = (
|
|
||||||
"RSS URL scan has found {} feeds:\n\n```\n"
|
|
||||||
).format(len(feeds))
|
|
||||||
feed_mark = 0
|
|
||||||
for feed in feeds:
|
|
||||||
# try:
|
|
||||||
# res = await download_feed(feed)
|
|
||||||
# except:
|
|
||||||
# continue
|
|
||||||
try:
|
|
||||||
feed_name = feeds[feed]["feed"]["title"]
|
|
||||||
except:
|
|
||||||
feed_name = urlsplit(feed).netloc
|
|
||||||
feed_addr = feed
|
|
||||||
feed_amnt = len(feeds[feed].entries)
|
|
||||||
if feed_amnt:
|
|
||||||
# NOTE Because there could be many false positives
|
|
||||||
# which are revealed in second phase of scan, we
|
|
||||||
# could end with a single feed, which would be
|
|
||||||
# listed instead of fetched, so feed_mark is
|
|
||||||
# utilized in order to make fetch possible.
|
|
||||||
feed_mark = [feed_addr]
|
|
||||||
counter += 1
|
|
||||||
msg += (
|
|
||||||
"Title : {}\n"
|
|
||||||
"Link : {}\n"
|
|
||||||
"Count : {}\n"
|
|
||||||
"\n"
|
|
||||||
).format(feed_name, feed_addr, feed_amnt)
|
|
||||||
if counter > 1:
|
|
||||||
msg += (
|
|
||||||
"```\nThe above feeds were extracted from\n{}"
|
|
||||||
).format(url)
|
|
||||||
elif feed_mark:
|
|
||||||
return feed_mark
|
|
||||||
else:
|
|
||||||
msg = (
|
|
||||||
"No feeds were found for {}"
|
|
||||||
).format(url)
|
|
||||||
return msg
|
|
||||||
elif feeds:
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
|
|
||||||
async def feed_mode_auto_discovery(url, tree):
|
|
||||||
"""
|
|
||||||
Lookup for feeds using RSS autodiscovery technique.
|
|
||||||
|
|
||||||
See: https://www.rssboard.org/rss-autodiscovery
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
db_file : str
|
|
||||||
Path to database file.
|
|
||||||
url : str
|
|
||||||
URL.
|
|
||||||
tree : TYPE
|
|
||||||
DESCRIPTION.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
msg : str
|
|
||||||
Message with URLs.
|
|
||||||
"""
|
|
||||||
xpath_query = (
|
|
||||||
'//link[(@rel="alternate") and '
|
|
||||||
'(@type="application/atom+xml" or '
|
|
||||||
'@type="application/rdf+xml" or '
|
|
||||||
'@type="application/rss+xml")]'
|
|
||||||
)
|
|
||||||
# xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href"""
|
|
||||||
# xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href"
|
|
||||||
feeds = tree.xpath(xpath_query)
|
|
||||||
if len(feeds) > 1:
|
|
||||||
msg = (
|
|
||||||
"RSS Auto-Discovery has found {} feeds:\n\n```\n"
|
|
||||||
).format(len(feeds))
|
|
||||||
for feed in feeds:
|
|
||||||
# # The following code works;
|
|
||||||
# # The following code will catch
|
|
||||||
# # only valid resources (i.e. not 404);
|
|
||||||
# # The following code requires more bandwidth.
|
|
||||||
# res = await download_feed(feed)
|
|
||||||
# if res[0]:
|
|
||||||
# disco = parse(res[0])
|
|
||||||
# title = disco["feed"]["title"]
|
|
||||||
# msg += "{} \n {} \n\n".format(title, feed)
|
|
||||||
feed_name = feed.xpath('@title')[0]
|
|
||||||
feed_addr = join_url(url, feed.xpath('@href')[0])
|
|
||||||
# if feed_addr.startswith("/"):
|
|
||||||
# feed_addr = url + feed_addr
|
|
||||||
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
|
|
||||||
msg += (
|
|
||||||
"```\nThe above feeds were extracted from\n{}"
|
|
||||||
).format(url)
|
|
||||||
return msg
|
|
||||||
elif feeds:
|
|
||||||
feed_addr = join_url(url, feeds[0].xpath('@href')[0])
|
|
||||||
return [feed_addr]
|
|
||||||
|
|
33
slixfeed/log.py
Normal file
33
slixfeed/log.py
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
|
||||||
|
def markdown(timestamp, filename, jid, message):
|
||||||
|
"""
|
||||||
|
Log message to file.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
timestamp : str
|
||||||
|
Time stamp.
|
||||||
|
filename : str
|
||||||
|
Jabber ID as name of file.
|
||||||
|
jid : str
|
||||||
|
Jabber ID.
|
||||||
|
message : str
|
||||||
|
Message content.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
None.
|
||||||
|
|
||||||
|
"""
|
||||||
|
with open(filename + '.md', 'a') as file:
|
||||||
|
# entry = "{} {}:\n{}\n\n".format(timestamp, jid, message)
|
||||||
|
entry = (
|
||||||
|
"## {}\n"
|
||||||
|
"### {}\n\n"
|
||||||
|
"{}\n\n").format(jid, timestamp, message)
|
||||||
|
file.write(entry)
|
||||||
|
|
||||||
|
|
74
slixfeed/read.py
Normal file
74
slixfeed/read.py
Normal file
|
@ -0,0 +1,74 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
TODO
|
||||||
|
|
||||||
|
1) is_feed: Look into the type ("atom", "rss2" etc.)
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def title(feed):
|
||||||
|
"""
|
||||||
|
Get title of feed.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
url : str
|
||||||
|
URL.
|
||||||
|
feed : dict
|
||||||
|
Parsed feed document.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
title : str
|
||||||
|
Title or None.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
title = feed["feed"]["title"]
|
||||||
|
except:
|
||||||
|
title = None
|
||||||
|
return title
|
||||||
|
|
||||||
|
|
||||||
|
def is_feed(feed):
|
||||||
|
"""
|
||||||
|
Determine whether document is feed or not.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
feed : dict
|
||||||
|
Parsed feed.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
val : boolean
|
||||||
|
True or False.
|
||||||
|
"""
|
||||||
|
msg = None
|
||||||
|
if not feed.entries:
|
||||||
|
try:
|
||||||
|
feed["feed"]["title"]
|
||||||
|
val = True
|
||||||
|
# msg = (
|
||||||
|
# "Empty feed for {}"
|
||||||
|
# ).format(url)
|
||||||
|
except:
|
||||||
|
val = False
|
||||||
|
# msg = (
|
||||||
|
# "No entries nor title for {}"
|
||||||
|
# ).format(url)
|
||||||
|
elif feed.bozo:
|
||||||
|
val = False
|
||||||
|
# msg = (
|
||||||
|
# "Bozo detected for {}"
|
||||||
|
# ).format(url)
|
||||||
|
else:
|
||||||
|
val = True
|
||||||
|
# msg = (
|
||||||
|
# "Good feed for {}"
|
||||||
|
# ).format(url)
|
||||||
|
print(msg)
|
||||||
|
return val
|
|
@ -222,9 +222,6 @@ async def remove_feed(db_file, ix):
|
||||||
"FROM feeds "
|
"FROM feeds "
|
||||||
"WHERE id = ?"
|
"WHERE id = ?"
|
||||||
)
|
)
|
||||||
# cur
|
|
||||||
# for i in url:
|
|
||||||
# url = i[0]
|
|
||||||
url = cur.execute(sql, (ix,)).fetchone()[0]
|
url = cur.execute(sql, (ix,)).fetchone()[0]
|
||||||
# NOTE Should we move DBLOCK to this line? 2022-12-23
|
# NOTE Should we move DBLOCK to this line? 2022-12-23
|
||||||
sql = (
|
sql = (
|
||||||
|
@ -246,8 +243,10 @@ async def remove_feed(db_file, ix):
|
||||||
cur.execute(sql, (ix,))
|
cur.execute(sql, (ix,))
|
||||||
|
|
||||||
|
|
||||||
|
# TODO Rename function name
|
||||||
async def is_feed_exist(db_file, url):
|
async def is_feed_exist(db_file, url):
|
||||||
"""
|
"""
|
||||||
|
Get Id and Name of feed.
|
||||||
Check whether a feed exists.
|
Check whether a feed exists.
|
||||||
Query for feeds by given url.
|
Query for feeds by given url.
|
||||||
|
|
||||||
|
@ -270,8 +269,7 @@ async def is_feed_exist(db_file, url):
|
||||||
"WHERE address = ?"
|
"WHERE address = ?"
|
||||||
)
|
)
|
||||||
result = cur.execute(sql, (url,)).fetchone()
|
result = cur.execute(sql, (url,)).fetchone()
|
||||||
if result:
|
return result
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
async def get_number_of_items(db_file, table):
|
async def get_number_of_items(db_file, table):
|
||||||
|
|
|
@ -49,13 +49,14 @@ from slixfeed.config import (
|
||||||
get_default_dbdir,
|
get_default_dbdir,
|
||||||
get_value_default)
|
get_value_default)
|
||||||
from slixfeed.datetime import current_time
|
from slixfeed.datetime import current_time
|
||||||
from slixfeed.fetch import download_updates
|
from slixfeed.action import organize_items
|
||||||
from slixfeed.sqlite import (
|
from slixfeed.sqlite import (
|
||||||
get_unread_entries,
|
|
||||||
get_feed_title,
|
get_feed_title,
|
||||||
get_settings_value,
|
get_feeds_url,
|
||||||
get_number_of_items,
|
get_number_of_items,
|
||||||
get_number_of_entries_unread,
|
get_number_of_entries_unread,
|
||||||
|
get_settings_value,
|
||||||
|
get_unread_entries,
|
||||||
mark_as_read,
|
mark_as_read,
|
||||||
mark_entry_as_read,
|
mark_entry_as_read,
|
||||||
delete_archived_entry
|
delete_archived_entry
|
||||||
|
@ -329,7 +330,9 @@ async def refresh_task(self, jid, callback, key, val=None):
|
||||||
val : str, optional
|
val : str, optional
|
||||||
Value. The default is None.
|
Value. The default is None.
|
||||||
"""
|
"""
|
||||||
logging.debug("Refreshing task {} for JID {}".format(callback, jid))
|
logging.debug(
|
||||||
|
"Refreshing task {} for JID {}".format(callback, jid)
|
||||||
|
)
|
||||||
if not val:
|
if not val:
|
||||||
db_file = get_pathname_to_database(jid)
|
db_file = get_pathname_to_database(jid)
|
||||||
val = await get_settings_value(db_file, key)
|
val = await get_settings_value(db_file, key)
|
||||||
|
@ -340,7 +343,8 @@ async def refresh_task(self, jid, callback, key, val=None):
|
||||||
except:
|
except:
|
||||||
logging.debug(
|
logging.debug(
|
||||||
"No task of type {} to cancel for "
|
"No task of type {} to cancel for "
|
||||||
"JID {} (clean_tasks)".format(key, jid))
|
"JID {} (clean_tasks)".format(key, jid)
|
||||||
|
)
|
||||||
# task_manager[jid][key] = loop.call_at(
|
# task_manager[jid][key] = loop.call_at(
|
||||||
# loop.time() + 60 * float(val),
|
# loop.time() + 60 * float(val),
|
||||||
# loop.create_task,
|
# loop.create_task,
|
||||||
|
@ -378,10 +382,13 @@ async def check_updates(jid):
|
||||||
jid : str
|
jid : str
|
||||||
Jabber ID.
|
Jabber ID.
|
||||||
"""
|
"""
|
||||||
logging.debug("Scanning for updates for JID {}".format(jid))
|
logging.debug(
|
||||||
|
"Scanning for updates for JID {}".format(jid)
|
||||||
|
)
|
||||||
while True:
|
while True:
|
||||||
db_file = get_pathname_to_database(jid)
|
db_file = get_pathname_to_database(jid)
|
||||||
await download_updates(db_file)
|
urls = await get_feeds_url(db_file)
|
||||||
|
await organize_items(db_file, urls)
|
||||||
val = get_value_default("settings", "Settings", "check")
|
val = get_value_default("settings", "Settings", "check")
|
||||||
await asyncio.sleep(60 * float(val))
|
await asyncio.sleep(60 * float(val))
|
||||||
# Schedule to call this function again in 90 minutes
|
# Schedule to call this function again in 90 minutes
|
||||||
|
@ -394,12 +401,16 @@ async def check_updates(jid):
|
||||||
|
|
||||||
async def start_tasks(self, presence):
|
async def start_tasks(self, presence):
|
||||||
jid = presence["from"].bare
|
jid = presence["from"].bare
|
||||||
logging.debug("Beginning tasks for JID {}".format(jid))
|
logging.debug(
|
||||||
|
"Beginning tasks for JID {}".format(jid)
|
||||||
|
)
|
||||||
if jid not in self.boundjid.bare:
|
if jid not in self.boundjid.bare:
|
||||||
await clean_tasks_xmpp(
|
await clean_tasks_xmpp(
|
||||||
jid, ["interval", "status", "check"])
|
jid, ["interval", "status", "check"]
|
||||||
|
)
|
||||||
await start_tasks_xmpp(
|
await start_tasks_xmpp(
|
||||||
self, jid, ["interval", "status", "check"])
|
self, jid, ["interval", "status", "check"]
|
||||||
|
)
|
||||||
# await task_jid(self, jid)
|
# await task_jid(self, jid)
|
||||||
# main_task.extend([asyncio.create_task(task_jid(jid))])
|
# main_task.extend([asyncio.create_task(task_jid(jid))])
|
||||||
# print(main_task)
|
# print(main_task)
|
||||||
|
@ -408,9 +419,12 @@ async def start_tasks(self, presence):
|
||||||
async def stop_tasks(self, presence):
|
async def stop_tasks(self, presence):
|
||||||
if not self.boundjid.bare:
|
if not self.boundjid.bare:
|
||||||
jid = presence["from"].bare
|
jid = presence["from"].bare
|
||||||
logging.debug("Stopping tasks for JID {}".format(jid))
|
logging.debug(
|
||||||
|
"Stopping tasks for JID {}".format(jid)
|
||||||
|
)
|
||||||
await clean_tasks_xmpp(
|
await clean_tasks_xmpp(
|
||||||
jid, ["interval", "status", "check"])
|
jid, ["interval", "status", "check"]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def check_readiness(self, presence):
|
async def check_readiness(self, presence):
|
||||||
|
@ -434,7 +448,9 @@ async def check_readiness(self, presence):
|
||||||
|
|
||||||
jid = presence["from"].bare
|
jid = presence["from"].bare
|
||||||
if presence["show"] in ("away", "dnd", "xa"):
|
if presence["show"] in ("away", "dnd", "xa"):
|
||||||
logging.debug("Stopping updates for JID {}".format(jid))
|
logging.debug(
|
||||||
|
"Stopping updates for JID {}".format(jid)
|
||||||
|
)
|
||||||
await clean_tasks_xmpp(
|
await clean_tasks_xmpp(
|
||||||
jid, ["interval"])
|
jid, ["interval"])
|
||||||
await start_tasks_xmpp(
|
await start_tasks_xmpp(
|
||||||
|
@ -477,7 +493,9 @@ async def select_file(self):
|
||||||
if (file.endswith(".db") and
|
if (file.endswith(".db") and
|
||||||
not file.endswith(".db-jour.db")):
|
not file.endswith(".db-jour.db")):
|
||||||
jid = file[:-3]
|
jid = file[:-3]
|
||||||
main_task.extend([tg.create_task(self.task_jid(jid))])
|
main_task.extend(
|
||||||
|
[tg.create_task(self.task_jid(jid))]
|
||||||
|
)
|
||||||
# main_task = [tg.create_task(self.task_jid(jid))]
|
# main_task = [tg.create_task(self.task_jid(jid))]
|
||||||
# task_manager.update({jid: tg})
|
# task_manager.update({jid: tg})
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,7 @@ from urllib.parse import (
|
||||||
parse_qs,
|
parse_qs,
|
||||||
urlencode,
|
urlencode,
|
||||||
urljoin,
|
urljoin,
|
||||||
urlparse,
|
# urlparse,
|
||||||
urlsplit,
|
urlsplit,
|
||||||
urlunsplit
|
urlunsplit
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,109 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
TODO
|
|
||||||
|
|
||||||
1) is_feed: Look into the type ("atom", "rss2" etc.)
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from urllib.parse import urlsplit
|
|
||||||
|
|
||||||
|
|
||||||
def log_as_markdown(timestamp, filename, jid, message):
|
|
||||||
"""
|
|
||||||
Log message to file.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
timestamp : str
|
|
||||||
Time stamp.
|
|
||||||
filename : str
|
|
||||||
Jabber ID as name of file.
|
|
||||||
jid : str
|
|
||||||
Jabber ID.
|
|
||||||
message : str
|
|
||||||
Message content.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
None.
|
|
||||||
|
|
||||||
"""
|
|
||||||
with open(filename + '.md', 'a') as file:
|
|
||||||
# entry = "{} {}:\n{}\n\n".format(timestamp, jid, message)
|
|
||||||
entry = (
|
|
||||||
"## {}\n"
|
|
||||||
"### {}\n\n"
|
|
||||||
"{}\n\n").format(jid, timestamp, message)
|
|
||||||
file.write(entry)
|
|
||||||
|
|
||||||
|
|
||||||
def get_title(url, feed):
|
|
||||||
"""
|
|
||||||
Get title of feed.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
url : str
|
|
||||||
URL.
|
|
||||||
feed : dict
|
|
||||||
Parsed feed document.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
title : str
|
|
||||||
Title or URL hostname.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
title = feed["feed"]["title"]
|
|
||||||
except:
|
|
||||||
title = urlsplit(url).netloc
|
|
||||||
if not title:
|
|
||||||
title = urlsplit(url).netloc
|
|
||||||
return title
|
|
||||||
|
|
||||||
|
|
||||||
def is_feed(url, feed):
|
|
||||||
"""
|
|
||||||
Determine whether document is feed or not.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
url : str
|
|
||||||
URL.
|
|
||||||
feed : dict
|
|
||||||
Parsed feed.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
val : boolean
|
|
||||||
True or False.
|
|
||||||
"""
|
|
||||||
msg = None
|
|
||||||
if not feed.entries:
|
|
||||||
try:
|
|
||||||
feed["feed"]["title"]
|
|
||||||
val = True
|
|
||||||
msg = (
|
|
||||||
"Empty feed for {}"
|
|
||||||
).format(url)
|
|
||||||
except:
|
|
||||||
val = False
|
|
||||||
msg = (
|
|
||||||
"No entries nor title for {}"
|
|
||||||
).format(url)
|
|
||||||
elif feed.bozo:
|
|
||||||
val = False
|
|
||||||
msg = (
|
|
||||||
"Bozo detected for {}"
|
|
||||||
).format(url)
|
|
||||||
else:
|
|
||||||
val = True
|
|
||||||
msg = (
|
|
||||||
"Good feed for {}"
|
|
||||||
).format(url)
|
|
||||||
print(msg)
|
|
||||||
return val
|
|
|
@ -48,26 +48,20 @@ NOTE
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
from slixfeed.config import add_to_list, get_list, remove_from_list
|
|
||||||
import slixfeed.fetch as fetcher
|
|
||||||
from slixfeed.datetime import current_time
|
|
||||||
import logging
|
import logging
|
||||||
# import os
|
# import os
|
||||||
from random import randrange
|
from random import randrange
|
||||||
import slixmpp
|
import slixmpp
|
||||||
from slixmpp.exceptions import IqError, IqTimeout
|
|
||||||
import slixfeed.sqlite as sqlite
|
|
||||||
import slixfeed.task as task
|
import slixfeed.task as task
|
||||||
import slixfeed.url as urlfixer
|
|
||||||
from time import sleep
|
from time import sleep
|
||||||
|
|
||||||
from slixmpp.plugins.xep_0363.http_upload import FileTooBig, HTTPError, UploadServiceNotFound
|
from slixmpp.plugins.xep_0363.http_upload import FileTooBig, HTTPError, UploadServiceNotFound
|
||||||
# from slixmpp.plugins.xep_0402 import BookmarkStorage, Conference
|
# from slixmpp.plugins.xep_0402 import BookmarkStorage, Conference
|
||||||
from slixmpp.plugins.xep_0048.stanza import Bookmarks
|
from slixmpp.plugins.xep_0048.stanza import Bookmarks
|
||||||
|
|
||||||
import xmltodict
|
# import xmltodict
|
||||||
import xml.etree.ElementTree as ET
|
# import xml.etree.ElementTree as ET
|
||||||
from lxml import etree
|
# from lxml import etree
|
||||||
|
|
||||||
import slixfeed.xmpp.connect as connect
|
import slixfeed.xmpp.connect as connect
|
||||||
import slixfeed.xmpp.process as process
|
import slixfeed.xmpp.process as process
|
||||||
|
|
|
@ -17,7 +17,8 @@ async def recover_connection(self, event, message):
|
||||||
# print(current_time(),"Maximum connection attempts exceeded.")
|
# print(current_time(),"Maximum connection attempts exceeded.")
|
||||||
# logging.error("Maximum connection attempts exceeded.")
|
# logging.error("Maximum connection attempts exceeded.")
|
||||||
print(current_time(), "Attempt number", self.connection_attempts)
|
print(current_time(), "Attempt number", self.connection_attempts)
|
||||||
seconds = (get_value("accounts", "XMPP Connect", "reconnect_timeout")) or 30
|
seconds = (get_value(
|
||||||
|
"accounts", "XMPP Connect", "reconnect_timeout")) or 30
|
||||||
seconds = int(seconds)
|
seconds = int(seconds)
|
||||||
print(current_time(), "Next attempt within", seconds, "seconds")
|
print(current_time(), "Next attempt within", seconds, "seconds")
|
||||||
# NOTE asyncio.sleep doesn't interval as expected
|
# NOTE asyncio.sleep doesn't interval as expected
|
||||||
|
|
|
@ -19,19 +19,22 @@ TODO
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import slixfeed.action as action
|
||||||
from slixfeed.config import (
|
from slixfeed.config import (
|
||||||
add_to_list,
|
add_to_list,
|
||||||
get_default_dbdir,
|
get_default_dbdir,
|
||||||
get_value,
|
get_value,
|
||||||
get_pathname_to_database,
|
get_pathname_to_database,
|
||||||
remove_from_list)
|
remove_from_list)
|
||||||
|
import slixfeed.crawl as crawl
|
||||||
from slixfeed.datetime import current_time, timestamp
|
from slixfeed.datetime import current_time, timestamp
|
||||||
import slixfeed.export as export
|
import slixfeed.export as export
|
||||||
import slixfeed.fetch as fetcher
|
import slixfeed.fetch as fetch
|
||||||
import slixfeed.opml as opml
|
import slixfeed.opml as opml
|
||||||
import slixfeed.sqlite as sqlite
|
import slixfeed.sqlite as sqlite
|
||||||
import slixfeed.task as task
|
import slixfeed.task as task
|
||||||
import slixfeed.utility as utility
|
import slixfeed.log as log
|
||||||
|
import slixfeed.read as read
|
||||||
import slixfeed.url as uri
|
import slixfeed.url as uri
|
||||||
import slixfeed.xmpp.bookmark as bookmark
|
import slixfeed.xmpp.bookmark as bookmark
|
||||||
import slixfeed.xmpp.compose as compose
|
import slixfeed.xmpp.compose as compose
|
||||||
|
@ -40,6 +43,7 @@ import slixfeed.xmpp.status as status
|
||||||
import slixfeed.xmpp.text as text
|
import slixfeed.xmpp.text as text
|
||||||
import slixfeed.xmpp.upload as upload
|
import slixfeed.xmpp.upload as upload
|
||||||
from slixfeed.xmpp.utility import jid_type
|
from slixfeed.xmpp.utility import jid_type
|
||||||
|
from urllib.parse import urlsplit, urlunsplit
|
||||||
|
|
||||||
|
|
||||||
async def event(self, event):
|
async def event(self, event):
|
||||||
|
@ -210,20 +214,35 @@ async def message(self, message):
|
||||||
# else:
|
# else:
|
||||||
# response = "This command is valid for groupchat only."
|
# response = "This command is valid for groupchat only."
|
||||||
case _ if message_lowercase.startswith("add"):
|
case _ if message_lowercase.startswith("add"):
|
||||||
|
# Add given feed without validity check.
|
||||||
message_text = message_text[4:]
|
message_text = message_text[4:]
|
||||||
url = message_text.split(" ")[0]
|
url = message_text.split(" ")[0]
|
||||||
title = " ".join(message_text.split(" ")[1:])
|
title = " ".join(message_text.split(" ")[1:])
|
||||||
if url.startswith("http"):
|
if url.startswith("http"):
|
||||||
db_file = get_pathname_to_database(jid)
|
db_file = get_pathname_to_database(jid)
|
||||||
response = await fetcher.add_feed_no_check(db_file, [url, title])
|
exist = await sqlite.is_feed_exist(db_file, url)
|
||||||
old = await sqlite.get_settings_value(db_file, "old")
|
if not exist:
|
||||||
if old:
|
await sqlite.insert_feed(db_file, url, title)
|
||||||
await task.clean_tasks_xmpp(jid, ["status"])
|
await action.organize_items(db_file, [url])
|
||||||
# await send_status(jid)
|
old = await sqlite.get_settings_value(db_file, "old")
|
||||||
await task.start_tasks_xmpp(self, jid, ["status"])
|
if old:
|
||||||
|
await task.clean_tasks_xmpp(jid, ["status"])
|
||||||
|
# await send_status(jid)
|
||||||
|
await task.start_tasks_xmpp(self, jid, ["status"])
|
||||||
|
else:
|
||||||
|
await sqlite.mark_source_as_read(db_file, url)
|
||||||
|
response = (
|
||||||
|
"> {}\nNews source has been "
|
||||||
|
"added to subscription list."
|
||||||
|
).format(url)
|
||||||
else:
|
else:
|
||||||
db_file = get_pathname_to_database(jid)
|
ix = exist[0]
|
||||||
await sqlite.mark_source_as_read(db_file, url)
|
name = exist[1]
|
||||||
|
response = (
|
||||||
|
"> {}\nNews source \"{}\" is already "
|
||||||
|
"listed in the subscription list at "
|
||||||
|
"index {}".format(url, name, ix)
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
response = "Missing URL."
|
response = "Missing URL."
|
||||||
send_reply_message(self, message, response)
|
send_reply_message(self, message, response)
|
||||||
|
@ -388,31 +407,13 @@ async def message(self, message):
|
||||||
send_status_message(self, jid, status_type, status_message)
|
send_status_message(self, jid, status_type, status_message)
|
||||||
if url.startswith("feed:"):
|
if url.startswith("feed:"):
|
||||||
url = uri.feed_to_http(url)
|
url = uri.feed_to_http(url)
|
||||||
# url_alt = await uri.replace_hostname(url, "feed")
|
|
||||||
# if url_alt:
|
|
||||||
# url = url_alt
|
|
||||||
url = (uri.replace_hostname(url, "feed")) or url
|
url = (uri.replace_hostname(url, "feed")) or url
|
||||||
db_file = get_pathname_to_database(jid)
|
db_file = get_pathname_to_database(jid)
|
||||||
response = await fetcher.add_feed(db_file, url)
|
response = await action.add_feed(db_file, url)
|
||||||
await task.start_tasks_xmpp(self, jid, ["status"])
|
await task.clean_tasks_xmpp(
|
||||||
# response = "> " + message + "\n" + response
|
jid, ["status"])
|
||||||
# FIXME Make the taskhandler to update status message
|
await task.start_tasks_xmpp(
|
||||||
# await refresh_task(
|
self, jid, ["status"])
|
||||||
# self,
|
|
||||||
# jid,
|
|
||||||
# send_status,
|
|
||||||
# "status",
|
|
||||||
# 20
|
|
||||||
# )
|
|
||||||
# NOTE This would show the number of new unread entries
|
|
||||||
old = await sqlite.get_settings_value(db_file, "old")
|
|
||||||
if old:
|
|
||||||
await task.clean_tasks_xmpp(jid, ["status"])
|
|
||||||
# await send_status(jid)
|
|
||||||
await task.start_tasks_xmpp(self, jid, ["status"])
|
|
||||||
else:
|
|
||||||
db_file = get_pathname_to_database(jid)
|
|
||||||
await sqlite.mark_source_as_read(db_file, url)
|
|
||||||
send_reply_message(self, message, response)
|
send_reply_message(self, message, response)
|
||||||
case _ if message_lowercase.startswith("feeds"):
|
case _ if message_lowercase.startswith("feeds"):
|
||||||
query = message_text[6:]
|
query = message_text[6:]
|
||||||
|
@ -521,7 +522,7 @@ async def message(self, message):
|
||||||
send_reply_message(self, message, response)
|
send_reply_message(self, message, response)
|
||||||
case "new":
|
case "new":
|
||||||
db_file = get_pathname_to_database(jid)
|
db_file = get_pathname_to_database(jid)
|
||||||
sqlite.set_settings_value(db_file, ["old", 0])
|
await sqlite.set_settings_value(db_file, ["old", 0])
|
||||||
response = (
|
response = (
|
||||||
"Only new items of newly added feeds will be sent."
|
"Only new items of newly added feeds will be sent."
|
||||||
)
|
)
|
||||||
|
@ -581,7 +582,8 @@ async def message(self, message):
|
||||||
data = message_text[5:]
|
data = message_text[5:]
|
||||||
data = data.split()
|
data = data.split()
|
||||||
url = data[0]
|
url = data[0]
|
||||||
await task.clean_tasks_xmpp(jid, ["status"])
|
await task.clean_tasks_xmpp(
|
||||||
|
jid, ["status"])
|
||||||
status_type = "dnd"
|
status_type = "dnd"
|
||||||
status_message = (
|
status_message = (
|
||||||
"📫️ Processing request to fetch data from {}"
|
"📫️ Processing request to fetch data from {}"
|
||||||
|
@ -593,13 +595,13 @@ async def message(self, message):
|
||||||
match len(data):
|
match len(data):
|
||||||
case 1:
|
case 1:
|
||||||
if url.startswith("http"):
|
if url.startswith("http"):
|
||||||
response = await fetcher.view_feed(url)
|
response = await action.view_feed(url)
|
||||||
else:
|
else:
|
||||||
response = "Missing URL."
|
response = "Missing URL."
|
||||||
case 2:
|
case 2:
|
||||||
num = data[1]
|
num = data[1]
|
||||||
if url.startswith("http"):
|
if url.startswith("http"):
|
||||||
response = await fetcher.view_entry(url, num)
|
response = await action.view_entry(url, num)
|
||||||
else:
|
else:
|
||||||
response = "Missing URL."
|
response = "Missing URL."
|
||||||
case _:
|
case _:
|
||||||
|
@ -627,15 +629,15 @@ async def message(self, message):
|
||||||
response = "Missing value."
|
response = "Missing value."
|
||||||
send_reply_message(self, message, response)
|
send_reply_message(self, message, response)
|
||||||
# NOTE Should people be asked for numeric value?
|
# NOTE Should people be asked for numeric value?
|
||||||
case _ if message_lowercase.startswith("remove"):
|
case _ if message_lowercase.startswith("remove "):
|
||||||
ix = message_text[7:]
|
ix = message_text[7:]
|
||||||
if ix:
|
if ix:
|
||||||
db_file = get_pathname_to_database(jid)
|
db_file = get_pathname_to_database(jid)
|
||||||
try:
|
try:
|
||||||
await sqlite.remove_feed(db_file, ix)
|
await sqlite.remove_feed(db_file, ix)
|
||||||
response = (
|
response = (
|
||||||
"> {}\nNews source has been removed "
|
"News source {} has been removed "
|
||||||
"from subscription list.").format(url)
|
"from subscription list.").format(ix)
|
||||||
# await refresh_task(
|
# await refresh_task(
|
||||||
# self,
|
# self,
|
||||||
# jid,
|
# jid,
|
||||||
|
@ -643,10 +645,13 @@ async def message(self, message):
|
||||||
# "status",
|
# "status",
|
||||||
# 20
|
# 20
|
||||||
# )
|
# )
|
||||||
await task.clean_tasks_xmpp(jid, ["status"])
|
await task.clean_tasks_xmpp(
|
||||||
await task.start_tasks_xmpp(self, jid, ["status"])
|
jid, ["status"])
|
||||||
|
await task.start_tasks_xmpp(
|
||||||
|
self, jid, ["status"])
|
||||||
except:
|
except:
|
||||||
response = "No news source with ID {}.".format(ix)
|
response = (
|
||||||
|
"No news source with ID {}.".format(ix))
|
||||||
else:
|
else:
|
||||||
response = "Missing feed ID."
|
response = "Missing feed ID."
|
||||||
send_reply_message(self, message, response)
|
send_reply_message(self, message, response)
|
||||||
|
@ -655,7 +660,8 @@ async def message(self, message):
|
||||||
await task.clean_tasks_xmpp(jid, ["status"])
|
await task.clean_tasks_xmpp(jid, ["status"])
|
||||||
status_type = "dnd"
|
status_type = "dnd"
|
||||||
status_message = "📫️ Marking entries as read..."
|
status_message = "📫️ Marking entries as read..."
|
||||||
send_status_message(self, jid, status_type, status_message)
|
send_status_message(
|
||||||
|
self, jid, status_type, status_message)
|
||||||
if source:
|
if source:
|
||||||
db_file = get_pathname_to_database(jid)
|
db_file = get_pathname_to_database(jid)
|
||||||
await sqlite.mark_source_as_read(db_file, source)
|
await sqlite.mark_source_as_read(db_file, source)
|
||||||
|
@ -688,9 +694,11 @@ async def message(self, message):
|
||||||
key = "enabled"
|
key = "enabled"
|
||||||
val = 1
|
val = 1
|
||||||
db_file = get_pathname_to_database(jid)
|
db_file = get_pathname_to_database(jid)
|
||||||
await sqlite.set_settings_value(db_file, [key, val])
|
await sqlite.set_settings_value(
|
||||||
|
db_file, [key, val])
|
||||||
# asyncio.create_task(task_jid(self, jid))
|
# asyncio.create_task(task_jid(self, jid))
|
||||||
await task.start_tasks_xmpp(self, jid, ["interval", "status", "check"])
|
await task.start_tasks_xmpp(
|
||||||
|
self, jid, ["interval", "status", "check"])
|
||||||
response = "Updates are enabled."
|
response = "Updates are enabled."
|
||||||
# print(current_time(), "task_manager[jid]")
|
# print(current_time(), "task_manager[jid]")
|
||||||
# print(task_manager[jid])
|
# print(task_manager[jid])
|
||||||
|
@ -747,13 +755,17 @@ async def message(self, message):
|
||||||
key = "enabled"
|
key = "enabled"
|
||||||
val = 0
|
val = 0
|
||||||
db_file = get_pathname_to_database(jid)
|
db_file = get_pathname_to_database(jid)
|
||||||
await sqlite.set_settings_value(db_file, [key, val])
|
await sqlite.set_settings_value(
|
||||||
await task.clean_tasks_xmpp(jid, ["interval", "status"])
|
db_file, [key, val])
|
||||||
|
await task.clean_tasks_xmpp(
|
||||||
|
jid, ["interval", "status"])
|
||||||
response = "Updates are disabled."
|
response = "Updates are disabled."
|
||||||
send_reply_message(self, message, response)
|
send_reply_message(self, message, response)
|
||||||
status_type = "xa"
|
status_type = "xa"
|
||||||
status_message = "💡️ Send \"Start\" to receive Jabber updates"
|
status_message = (
|
||||||
send_status_message(self, jid, status_type, status_message)
|
"💡️ Send \"Start\" to receive Jabber updates")
|
||||||
|
send_status_message(
|
||||||
|
self, jid, status_type, status_message)
|
||||||
case "support":
|
case "support":
|
||||||
# TODO Send an invitation.
|
# TODO Send an invitation.
|
||||||
response = (
|
response = (
|
||||||
|
@ -789,10 +801,10 @@ async def message(self, message):
|
||||||
os.mkdir(data_dir)
|
os.mkdir(data_dir)
|
||||||
if not os.path.isdir(data_dir + '/logs/'):
|
if not os.path.isdir(data_dir + '/logs/'):
|
||||||
os.mkdir(data_dir + '/logs/')
|
os.mkdir(data_dir + '/logs/')
|
||||||
utility.log_as_markdown(
|
log.markdown(
|
||||||
current_time(), os.path.join(data_dir, "logs", jid),
|
current_time(), os.path.join(data_dir, "logs", jid),
|
||||||
jid, message_text)
|
jid, message_text)
|
||||||
utility.log_as_markdown(
|
log.markdown(
|
||||||
current_time(), os.path.join(data_dir, "logs", jid),
|
current_time(), os.path.join(data_dir, "logs", jid),
|
||||||
self.boundjid.bare, response)
|
self.boundjid.bare, response)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue