forked from sch/Slixfeed
Segregate code into more particular functions
This commit is contained in:
parent
96f3369539
commit
7135994888
13 changed files with 995 additions and 937 deletions
369
slixfeed/action.py
Normal file
369
slixfeed/action.py
Normal file
|
@ -0,0 +1,369 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from asyncio.exceptions import IncompleteReadError
|
||||
from bs4 import BeautifulSoup
|
||||
from http.client import IncompleteRead
|
||||
from feedparser import parse
|
||||
import slixfeed.config as config
|
||||
import slixfeed.crawl as crawl
|
||||
from slixfeed.datetime import now, rfc2822_to_iso8601
|
||||
import slixfeed.fetch as fetch
|
||||
import slixfeed.sqlite as sqlite
|
||||
import slixfeed.read as read
|
||||
import slixfeed.task as task
|
||||
from slixfeed.url import complete_url, join_url, trim_url
|
||||
from urllib import error
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
|
||||
async def add_feed(db_file, url):
|
||||
while True:
|
||||
exist = await sqlite.is_feed_exist(db_file, url)
|
||||
if not exist:
|
||||
result = await fetch.download_feed([url])
|
||||
document = result[0]
|
||||
status = result[1]
|
||||
if document:
|
||||
feed = parse(document)
|
||||
# if read.is_feed(url, feed):
|
||||
if read.is_feed(feed):
|
||||
try:
|
||||
title = feed["feed"]["title"]
|
||||
except:
|
||||
title = urlsplit(url).netloc
|
||||
await sqlite.insert_feed(
|
||||
db_file, url, title, status)
|
||||
await organize_items(
|
||||
db_file, [url])
|
||||
old = await sqlite.get_settings_value(
|
||||
db_file, "old")
|
||||
if not old:
|
||||
await sqlite.mark_source_as_read(
|
||||
db_file, url)
|
||||
response = (
|
||||
"> {}\nNews source {} has been "
|
||||
"added to subscription list."
|
||||
).format(url, title)
|
||||
break
|
||||
else:
|
||||
result = await crawl.probe_page(
|
||||
url, document)
|
||||
# TODO Check length and for a write a
|
||||
# unified message for a set of feeds.
|
||||
# Use logging if you so choose to
|
||||
# distinct the methods
|
||||
if isinstance(result, list):
|
||||
url = result[0]
|
||||
elif isinstance(result, str):
|
||||
response = result
|
||||
break
|
||||
else:
|
||||
response = (
|
||||
"> {}\nFailed to load URL. Reason: {}"
|
||||
).format(url, status)
|
||||
break
|
||||
else:
|
||||
ix = exist[0]
|
||||
name = exist[1]
|
||||
response = (
|
||||
"> {}\nNews source \"{}\" is already "
|
||||
"listed in the subscription list at "
|
||||
"index {}".format(url, name, ix)
|
||||
)
|
||||
break
|
||||
return response
|
||||
|
||||
|
||||
async def view_feed(url):
|
||||
while True:
|
||||
result = await fetch.download_feed([url])
|
||||
document = result[0]
|
||||
status = result[1]
|
||||
if document:
|
||||
feed = parse(document)
|
||||
# if read.is_feed(url, feed):
|
||||
if read.is_feed(feed):
|
||||
try:
|
||||
title = feed["feed"]["title"]
|
||||
except:
|
||||
title = urlsplit(url).netloc
|
||||
entries = feed.entries
|
||||
response = "Preview of {}:\n\n```\n".format(title)
|
||||
counter = 0
|
||||
for entry in entries:
|
||||
counter += 1
|
||||
if entry.has_key("title"):
|
||||
title = entry.title
|
||||
else:
|
||||
title = "*** No title ***"
|
||||
if entry.has_key("link"):
|
||||
# link = complete_url(source, entry.link)
|
||||
link = join_url(url, entry.link)
|
||||
link = trim_url(link)
|
||||
else:
|
||||
link = "*** No link ***"
|
||||
if entry.has_key("published"):
|
||||
date = entry.published
|
||||
date = rfc2822_to_iso8601(date)
|
||||
elif entry.has_key("updated"):
|
||||
date = entry.updated
|
||||
date = rfc2822_to_iso8601(date)
|
||||
else:
|
||||
date = "*** No date ***"
|
||||
response += (
|
||||
"Title : {}\n"
|
||||
"Date : {}\n"
|
||||
"Link : {}\n"
|
||||
"Count : {}\n"
|
||||
"\n"
|
||||
).format(title, date, link, counter)
|
||||
if counter > 4:
|
||||
break
|
||||
response += (
|
||||
"```\nSource: {}"
|
||||
).format(url)
|
||||
break
|
||||
else:
|
||||
result = await crawl.probe_page(
|
||||
url, document)
|
||||
# TODO Check length and for a write a
|
||||
# unified message for a set of feeds.
|
||||
# Use logging if you so choose to
|
||||
# distinct the methods
|
||||
if isinstance(result, list):
|
||||
url = result[0]
|
||||
elif isinstance(result, str):
|
||||
response = result
|
||||
break
|
||||
else:
|
||||
response = (
|
||||
"> {}\nFailed to load URL. Reason: {}"
|
||||
).format(url, status)
|
||||
break
|
||||
return response
|
||||
|
||||
|
||||
async def view_entry(url, num):
|
||||
while True:
|
||||
result = await fetch.download_feed([url])
|
||||
document = result[0]
|
||||
status = result[1]
|
||||
if document:
|
||||
feed = parse(document)
|
||||
# if read.is_feed(url, feed):
|
||||
if read.is_feed(feed):
|
||||
try:
|
||||
title = feed["feed"]["title"]
|
||||
except:
|
||||
title = urlsplit(url).netloc
|
||||
entries = feed.entries
|
||||
num = int(num) - 1
|
||||
entry = entries[num]
|
||||
response = "Preview of {}:\n\n```\n".format(title)
|
||||
if entry.has_key("title"):
|
||||
title = entry.title
|
||||
else:
|
||||
title = "*** No title ***"
|
||||
if entry.has_key("published"):
|
||||
date = entry.published
|
||||
date = rfc2822_to_iso8601(date)
|
||||
elif entry.has_key("updated"):
|
||||
date = entry.updated
|
||||
date = rfc2822_to_iso8601(date)
|
||||
else:
|
||||
date = "*** No date ***"
|
||||
if entry.has_key("summary"):
|
||||
summary = entry.summary
|
||||
# Remove HTML tags
|
||||
summary = BeautifulSoup(summary, "lxml").text
|
||||
# TODO Limit text length
|
||||
summary = summary.replace("\n\n\n", "\n\n")
|
||||
else:
|
||||
summary = "*** No summary ***"
|
||||
if entry.has_key("link"):
|
||||
# link = complete_url(source, entry.link)
|
||||
link = join_url(url, entry.link)
|
||||
link = trim_url(link)
|
||||
else:
|
||||
link = "*** No link ***"
|
||||
response = (
|
||||
"{}\n"
|
||||
"\n"
|
||||
# "> {}\n"
|
||||
"{}\n"
|
||||
"\n"
|
||||
"{}\n"
|
||||
"\n"
|
||||
).format(title, summary, link)
|
||||
break
|
||||
else:
|
||||
result = await crawl.probe_page(
|
||||
url, document)
|
||||
# TODO Check length and for a write a
|
||||
# unified message for a set of feeds.
|
||||
# Use logging if you so choose to
|
||||
# distinct the methods
|
||||
if isinstance(result, list):
|
||||
url = result[0]
|
||||
elif isinstance(result, str):
|
||||
response = result
|
||||
break
|
||||
else:
|
||||
response = (
|
||||
"> {}\nFailed to load URL. Reason: {}"
|
||||
).format(url, status)
|
||||
break
|
||||
return response
|
||||
|
||||
|
||||
# NOTE Why (if res[0]) and (if res[1] == 200)?
|
||||
async def organize_items(db_file, urls):
|
||||
"""
|
||||
Check feeds for new entries.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
url : str, optional
|
||||
URL. The default is None.
|
||||
"""
|
||||
for url in urls:
|
||||
# print(os.path.basename(db_file), url[0])
|
||||
source = url[0]
|
||||
res = await fetch.download_feed(source)
|
||||
# TypeError: 'NoneType' object is not subscriptable
|
||||
if res is None:
|
||||
# Skip to next feed
|
||||
# urls.next()
|
||||
# next(urls)
|
||||
continue
|
||||
await sqlite.update_source_status(
|
||||
db_file, res[1], source)
|
||||
if res[0]:
|
||||
try:
|
||||
feed = parse(res[0])
|
||||
if feed.bozo:
|
||||
# bozo = (
|
||||
# "WARNING: Bozo detected for feed: {}\n"
|
||||
# "For more information, visit "
|
||||
# "https://pythonhosted.org/feedparser/bozo.html"
|
||||
# ).format(source)
|
||||
# print(bozo)
|
||||
valid = 0
|
||||
else:
|
||||
valid = 1
|
||||
await sqlite.update_source_validity(
|
||||
db_file, source, valid)
|
||||
except (
|
||||
IncompleteReadError,
|
||||
IncompleteRead,
|
||||
error.URLError
|
||||
) as e:
|
||||
# print(e)
|
||||
# TODO Print error to log
|
||||
None
|
||||
# NOTE I don't think there should be "return"
|
||||
# because then we might stop scanning next URLs
|
||||
# return
|
||||
# TODO Place these couple of lines back down
|
||||
# NOTE Need to correct the SQL statement to do so
|
||||
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
|
||||
if res[1] == 200:
|
||||
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
|
||||
# TODO Place these couple of lines back down
|
||||
# NOTE Need to correct the SQL statement to do so
|
||||
entries = feed.entries
|
||||
# length = len(entries)
|
||||
# await remove_entry(db_file, source, length)
|
||||
await sqlite.remove_nonexistent_entries(
|
||||
db_file, feed, source)
|
||||
# new_entry = 0
|
||||
for entry in entries:
|
||||
# TODO Pass date too for comparion check
|
||||
if entry.has_key("published"):
|
||||
date = entry.published
|
||||
date = rfc2822_to_iso8601(date)
|
||||
elif entry.has_key("updated"):
|
||||
date = entry.updated
|
||||
date = rfc2822_to_iso8601(date)
|
||||
else:
|
||||
# TODO Just set date = "*** No date ***"
|
||||
# date = await datetime.now().isoformat()
|
||||
date = now()
|
||||
# NOTE Would seconds result in better database performance
|
||||
# date = datetime.datetime(date)
|
||||
# date = (date-datetime.datetime(1970,1,1)).total_seconds()
|
||||
if entry.has_key("title"):
|
||||
title = entry.title
|
||||
# title = "{}: *{}*".format(feed["feed"]["title"], entry.title)
|
||||
else:
|
||||
title = date
|
||||
# title = feed["feed"]["title"]
|
||||
if entry.has_key("link"):
|
||||
# link = complete_url(source, entry.link)
|
||||
link = join_url(source, entry.link)
|
||||
link = trim_url(link)
|
||||
else:
|
||||
link = source
|
||||
if entry.has_key("id"):
|
||||
eid = entry.id
|
||||
else:
|
||||
eid = link
|
||||
exist = await sqlite.check_entry_exist(
|
||||
db_file, source, eid=eid,
|
||||
title=title, link=link, date=date)
|
||||
if not exist:
|
||||
# new_entry = new_entry + 1
|
||||
# TODO Enhance summary
|
||||
if entry.has_key("summary"):
|
||||
summary = entry.summary
|
||||
# # Remove HTML tags
|
||||
# summary = BeautifulSoup(summary, "lxml").text
|
||||
# # TODO Limit text length
|
||||
# summary = summary.replace("\n\n\n", "\n\n")
|
||||
# summary = summary[:300] + " […]⃨"
|
||||
# summary = summary.strip().split('\n')
|
||||
# summary = ["> " + line for line in summary]
|
||||
# summary = "\n".join(summary)
|
||||
else:
|
||||
summary = "> *** No summary ***"
|
||||
read_status = 0
|
||||
pathname = urlsplit(link).path
|
||||
string = (
|
||||
"{} {} {}"
|
||||
).format(
|
||||
title,
|
||||
summary,
|
||||
pathname
|
||||
)
|
||||
allow_list = await config.is_listed(
|
||||
db_file, "filter-allow", string)
|
||||
if not allow_list:
|
||||
reject_list = await config.is_listed(
|
||||
db_file, "filter-deny", string)
|
||||
if reject_list:
|
||||
# print(">>> REJECTED", title)
|
||||
summary = (
|
||||
"REJECTED {}".format(
|
||||
reject_list.upper()
|
||||
)
|
||||
)
|
||||
# summary = ""
|
||||
read_status = 1
|
||||
entry = (
|
||||
title, link, eid, source, date, read_status)
|
||||
if isinstance(date, int):
|
||||
print("PROBLEM: date is int")
|
||||
print(date)
|
||||
# breakpoint()
|
||||
# print(source)
|
||||
# print(date)
|
||||
await sqlite.add_entry_and_set_date(
|
||||
db_file, source, entry)
|
||||
# print(current_time(), entry, title)
|
||||
# else:
|
||||
# print(current_time(), exist, title)
|
||||
|
||||
|
|
@ -59,8 +59,9 @@ def get_value(filename, section, keys):
|
|||
for key in keys:
|
||||
try:
|
||||
value = section_res[key]
|
||||
logging.debug("Found value {} for key {}".format(
|
||||
value, key))
|
||||
logging.debug(
|
||||
"Found value {} for key {}".format(value, key)
|
||||
)
|
||||
except:
|
||||
value = ''
|
||||
logging.error("Missing key:", key)
|
||||
|
@ -70,7 +71,8 @@ def get_value(filename, section, keys):
|
|||
try:
|
||||
result = section_res[key]
|
||||
logging.debug(
|
||||
"Found value {} for key {}".format(result, key))
|
||||
"Found value {} for key {}".format(result, key)
|
||||
)
|
||||
except:
|
||||
result = ''
|
||||
# logging.error("Missing key:", key)
|
||||
|
@ -78,7 +80,8 @@ def get_value(filename, section, keys):
|
|||
logging.error(
|
||||
"Check configuration file {}.ini for "
|
||||
"missing key(s) \"{}\" under section [{}].".format(
|
||||
filename, keys, section))
|
||||
filename, keys, section)
|
||||
)
|
||||
else:
|
||||
return result
|
||||
|
||||
|
@ -171,7 +174,9 @@ def get_default_dbdir():
|
|||
else:
|
||||
return os.path.abspath('.')
|
||||
else:
|
||||
data_home = os.path.join(os.environ.get('HOME'), '.local', 'share')
|
||||
data_home = os.path.join(
|
||||
os.environ.get('HOME'), '.local', 'share'
|
||||
)
|
||||
return os.path.join(data_home, 'slixfeed')
|
||||
|
||||
|
||||
|
@ -200,7 +205,9 @@ def get_default_confdir():
|
|||
else:
|
||||
return os.path.abspath('.')
|
||||
else:
|
||||
config_home = os.path.join(os.environ.get('HOME'), '.config')
|
||||
config_home = os.path.join(
|
||||
os.environ.get('HOME'), '.config'
|
||||
)
|
||||
return os.path.join(config_home, 'slixfeed')
|
||||
|
||||
|
||||
|
|
382
slixfeed/crawl.py
Normal file
382
slixfeed/crawl.py
Normal file
|
@ -0,0 +1,382 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
|
||||
TODO
|
||||
|
||||
1.1) Do not compose messages.
|
||||
|
||||
1.2) Return URLs, nothing else other (e.g. processed messages).
|
||||
|
||||
1.3) Correction of URLs is aceptable.
|
||||
|
||||
"""
|
||||
|
||||
from aiohttp import ClientError, ClientSession, ClientTimeout
|
||||
from feedparser import parse
|
||||
from lxml import html
|
||||
import slixfeed.config as config
|
||||
from slixfeed.fetch import download_feed
|
||||
from slixfeed.url import complete_url, join_url, trim_url
|
||||
from urllib.parse import urlsplit, urlunsplit
|
||||
|
||||
|
||||
# TODO Use boolean as a flag to determine whether a single URL was found
|
||||
# async def probe_page(
|
||||
# callback, url, document, num=None, db_file=None):
|
||||
# result = None
|
||||
# try:
|
||||
# # tree = etree.fromstring(res[0]) # etree is for xml
|
||||
# tree = html.fromstring(document)
|
||||
# except:
|
||||
# result = (
|
||||
# "> {}\nFailed to parse URL as feed."
|
||||
# ).format(url)
|
||||
# if not result:
|
||||
# print("RSS Auto-Discovery Engaged")
|
||||
# result = await feed_mode_auto_discovery(url, tree)
|
||||
# if not result:
|
||||
# print("RSS Scan Mode Engaged")
|
||||
# result = await feed_mode_scan(url, tree)
|
||||
# if not result:
|
||||
# print("RSS Arbitrary Mode Engaged")
|
||||
# result = await feed_mode_request(url, tree)
|
||||
# if not result:
|
||||
# result = (
|
||||
# "> {}\nNo news feeds were found for URL."
|
||||
# ).format(url)
|
||||
# # elif msg:
|
||||
# else:
|
||||
# if isinstance(result, str):
|
||||
# return result
|
||||
# elif isinstance(result, list):
|
||||
# url = result[0]
|
||||
# if db_file:
|
||||
# # print("if db_file", db_file)
|
||||
# return await callback(db_file, url)
|
||||
# elif num:
|
||||
# return await callback(url, num)
|
||||
# else:
|
||||
# return await callback(url)
|
||||
|
||||
|
||||
async def probe_page(url, document):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL.
|
||||
document : TYPE
|
||||
DESCRIPTION.
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : list or str
|
||||
Single URL as list or selection of URLs as str.
|
||||
"""
|
||||
result = None
|
||||
try:
|
||||
# tree = etree.fromstring(res[0]) # etree is for xml
|
||||
tree = html.fromstring(document)
|
||||
except:
|
||||
result = (
|
||||
"> {}\nFailed to parse URL as feed."
|
||||
).format(url)
|
||||
if not result:
|
||||
print("RSS Auto-Discovery Engaged")
|
||||
result = await feed_mode_auto_discovery(url, tree)
|
||||
if not result:
|
||||
print("RSS Scan Mode Engaged")
|
||||
result = await feed_mode_scan(url, tree)
|
||||
if not result:
|
||||
print("RSS Arbitrary Mode Engaged")
|
||||
result = await feed_mode_request(url, tree)
|
||||
if not result:
|
||||
result = (
|
||||
"> {}\nNo news feeds were found for URL."
|
||||
).format(url)
|
||||
return result
|
||||
|
||||
|
||||
# TODO Improve scan by gradual decreasing of path
|
||||
async def feed_mode_request(url, tree):
|
||||
"""
|
||||
Lookup for feeds by pathname using HTTP Requests.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
url : str
|
||||
URL.
|
||||
tree : TYPE
|
||||
DESCRIPTION.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Message with URLs.
|
||||
"""
|
||||
feeds = {}
|
||||
parted_url = urlsplit(url)
|
||||
paths = config.get_list("lists.yaml")
|
||||
paths = paths["pathnames"]
|
||||
for path in paths:
|
||||
address = urlunsplit([
|
||||
parted_url.scheme,
|
||||
parted_url.netloc,
|
||||
path,
|
||||
None,
|
||||
None
|
||||
])
|
||||
res = await download_feed(address)
|
||||
if res[1] == 200:
|
||||
# print(parse(res[0])["feed"]["title"])
|
||||
# feeds[address] = parse(res[0])["feed"]["title"]
|
||||
try:
|
||||
title = parse(res[0])["feed"]["title"]
|
||||
except:
|
||||
title = '*** No Title ***'
|
||||
feeds[address] = title
|
||||
# Check whether URL has path (i.e. not root)
|
||||
# Check parted_url.path to avoid error in case root wasn't given
|
||||
# TODO Make more tests
|
||||
if parted_url.path and parted_url.path.split('/')[1]:
|
||||
paths.extend(
|
||||
[".atom", ".feed", ".rdf", ".rss"]
|
||||
) if '.rss' not in paths else -1
|
||||
# if paths.index('.rss'):
|
||||
# paths.extend([".atom", ".feed", ".rdf", ".rss"])
|
||||
address = urlunsplit([
|
||||
parted_url.scheme,
|
||||
parted_url.netloc,
|
||||
parted_url.path.split('/')[1] + path,
|
||||
None,
|
||||
None
|
||||
])
|
||||
res = await download_feed(address)
|
||||
if res[1] == 200:
|
||||
try:
|
||||
feeds[address] = parse(res[0])
|
||||
# print(feeds)
|
||||
except:
|
||||
continue
|
||||
if len(feeds) > 1:
|
||||
counter = 0
|
||||
msg = (
|
||||
"RSS URL discovery has found {} feeds:\n\n```\n"
|
||||
).format(len(feeds))
|
||||
feed_mark = 0
|
||||
for feed in feeds:
|
||||
try:
|
||||
feed_name = feeds[feed]["feed"]["title"]
|
||||
except:
|
||||
feed_name = urlsplit(feed).netloc
|
||||
feed_addr = feed
|
||||
# AttributeError: 'str' object has no attribute 'entries'
|
||||
try:
|
||||
feed_amnt = len(feeds[feed].entries)
|
||||
except:
|
||||
continue
|
||||
if feed_amnt:
|
||||
# NOTE Because there could be many false positives
|
||||
# which are revealed in second phase of scan, we
|
||||
# could end with a single feed, which would be
|
||||
# listed instead of fetched, so feed_mark is
|
||||
# utilized in order to make fetch possible.
|
||||
feed_mark = [feed_addr]
|
||||
counter += 1
|
||||
msg += (
|
||||
"Title: {}\n"
|
||||
"Link : {}\n"
|
||||
"Items: {}\n"
|
||||
"\n"
|
||||
).format(feed_name, feed_addr, feed_amnt)
|
||||
if counter > 1:
|
||||
msg += (
|
||||
"```\nThe above feeds were extracted from\n{}"
|
||||
).format(url)
|
||||
elif feed_mark:
|
||||
return feed_mark
|
||||
else:
|
||||
msg = (
|
||||
"No feeds were found for {}"
|
||||
).format(url)
|
||||
return msg
|
||||
elif feeds:
|
||||
return feeds
|
||||
|
||||
|
||||
async def feed_mode_scan(url, tree):
|
||||
"""
|
||||
Scan page for potential feeds by pathname.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
url : str
|
||||
URL.
|
||||
tree : TYPE
|
||||
DESCRIPTION.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Message with URLs.
|
||||
"""
|
||||
feeds = {}
|
||||
# paths = []
|
||||
# TODO Test
|
||||
paths = config.get_list("lists.yaml")
|
||||
paths = paths["pathnames"]
|
||||
for path in paths:
|
||||
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
|
||||
# xpath_query = "//a[contains(@href,'{}')]".format(path)
|
||||
num = 5
|
||||
xpath_query = "(//a[contains(@href,'{}')])[position()<={}]".format(path, num)
|
||||
addresses = tree.xpath(xpath_query)
|
||||
xpath_query = "(//a[contains(@href,'{}')])[position()>last()-{}]".format(path, num)
|
||||
addresses += tree.xpath(xpath_query)
|
||||
parted_url = urlsplit(url)
|
||||
# NOTE Should number of addresses be limited or
|
||||
# perhaps be N from the start and N from the end
|
||||
for address in addresses:
|
||||
# print(address.xpath('@href')[0])
|
||||
# print(addresses)
|
||||
address = address.xpath('@href')[0]
|
||||
if "/" not in address:
|
||||
protocol = parted_url.scheme
|
||||
hostname = parted_url.netloc
|
||||
pathname = address
|
||||
address = urlunsplit([
|
||||
protocol,
|
||||
hostname,
|
||||
pathname,
|
||||
None,
|
||||
None
|
||||
])
|
||||
if address.startswith('/'):
|
||||
protocol = parted_url.scheme
|
||||
hostname = parted_url.netloc
|
||||
pathname = address
|
||||
address = urlunsplit([
|
||||
protocol,
|
||||
hostname,
|
||||
pathname,
|
||||
None,
|
||||
None
|
||||
])
|
||||
res = await download_feed(address)
|
||||
if res[1] == 200:
|
||||
try:
|
||||
feeds[address] = parse(res[0])
|
||||
# print(feeds[address])
|
||||
# breakpoint()
|
||||
# print(feeds)
|
||||
except:
|
||||
continue
|
||||
if len(feeds) > 1:
|
||||
# print(feeds)
|
||||
# breakpoint()
|
||||
counter = 0
|
||||
msg = (
|
||||
"RSS URL scan has found {} feeds:\n\n```\n"
|
||||
).format(len(feeds))
|
||||
feed_mark = 0
|
||||
for feed in feeds:
|
||||
# try:
|
||||
# res = await download_feed(feed)
|
||||
# except:
|
||||
# continue
|
||||
try:
|
||||
feed_name = feeds[feed]["feed"]["title"]
|
||||
except:
|
||||
feed_name = urlsplit(feed).netloc
|
||||
feed_addr = feed
|
||||
feed_amnt = len(feeds[feed].entries)
|
||||
if feed_amnt:
|
||||
# NOTE Because there could be many false positives
|
||||
# which are revealed in second phase of scan, we
|
||||
# could end with a single feed, which would be
|
||||
# listed instead of fetched, so feed_mark is
|
||||
# utilized in order to make fetch possible.
|
||||
feed_mark = [feed_addr]
|
||||
counter += 1
|
||||
msg += (
|
||||
"Title : {}\n"
|
||||
"Link : {}\n"
|
||||
"Count : {}\n"
|
||||
"\n"
|
||||
).format(feed_name, feed_addr, feed_amnt)
|
||||
if counter > 1:
|
||||
msg += (
|
||||
"```\nThe above feeds were extracted from\n{}"
|
||||
).format(url)
|
||||
elif feed_mark:
|
||||
return feed_mark
|
||||
else:
|
||||
msg = (
|
||||
"No feeds were found for {}"
|
||||
).format(url)
|
||||
return msg
|
||||
elif feeds:
|
||||
return feeds
|
||||
|
||||
|
||||
async def feed_mode_auto_discovery(url, tree):
|
||||
"""
|
||||
Lookup for feeds using RSS autodiscovery technique.
|
||||
|
||||
See: https://www.rssboard.org/rss-autodiscovery
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
url : str
|
||||
URL.
|
||||
tree : TYPE
|
||||
DESCRIPTION.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Message with URLs.
|
||||
"""
|
||||
xpath_query = (
|
||||
'//link[(@rel="alternate") and '
|
||||
'(@type="application/atom+xml" or '
|
||||
'@type="application/rdf+xml" or '
|
||||
'@type="application/rss+xml")]'
|
||||
)
|
||||
# xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href"""
|
||||
# xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href"
|
||||
feeds = tree.xpath(xpath_query)
|
||||
if len(feeds) > 1:
|
||||
msg = (
|
||||
"RSS Auto-Discovery has found {} feeds:\n\n```\n"
|
||||
).format(len(feeds))
|
||||
for feed in feeds:
|
||||
# # The following code works;
|
||||
# # The following code will catch
|
||||
# # only valid resources (i.e. not 404);
|
||||
# # The following code requires more bandwidth.
|
||||
# res = await download_feed(feed)
|
||||
# if res[0]:
|
||||
# disco = parse(res[0])
|
||||
# title = disco["feed"]["title"]
|
||||
# msg += "{} \n {} \n\n".format(title, feed)
|
||||
feed_name = feed.xpath('@title')[0]
|
||||
feed_addr = join_url(url, feed.xpath('@href')[0])
|
||||
# if feed_addr.startswith("/"):
|
||||
# feed_addr = url + feed_addr
|
||||
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
|
||||
msg += (
|
||||
"```\nThe above feeds were extracted from\n{}"
|
||||
).format(url)
|
||||
return msg
|
||||
elif feeds:
|
||||
feed_addr = join_url(url, feeds[0].xpath('@href')[0])
|
||||
return [feed_addr]
|
|
@ -33,454 +33,24 @@ from http.client import IncompleteRead
|
|||
from lxml import html
|
||||
import slixfeed.config as config
|
||||
from slixfeed.datetime import now, rfc2822_to_iso8601
|
||||
import slixfeed.utility as utility
|
||||
import slixfeed.sqlite as sqlite
|
||||
from slixfeed.url import complete_url, join_url, trim_url
|
||||
from urllib import error
|
||||
# from xml.etree.ElementTree import ElementTree, ParseError
|
||||
from urllib.parse import urlsplit, urlunsplit
|
||||
|
||||
# NOTE Why (if res[0]) and (if res[1] == 200)?
|
||||
async def download_updates(db_file, url=None):
|
||||
"""
|
||||
Check feeds for new entries.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
url : str, optional
|
||||
URL. The default is None.
|
||||
"""
|
||||
if url:
|
||||
urls = [url] # Valid [url] and [url,] and (url,)
|
||||
else:
|
||||
urls = await sqlite.get_feeds_url(db_file)
|
||||
for url in urls:
|
||||
# print(os.path.basename(db_file), url[0])
|
||||
source = url[0]
|
||||
res = await download_feed(source)
|
||||
# TypeError: 'NoneType' object is not subscriptable
|
||||
if res is None:
|
||||
# Skip to next feed
|
||||
# urls.next()
|
||||
# next(urls)
|
||||
continue
|
||||
await sqlite.update_source_status(
|
||||
db_file, res[1], source)
|
||||
if res[0]:
|
||||
try:
|
||||
feed = parse(res[0])
|
||||
if feed.bozo:
|
||||
# bozo = (
|
||||
# "WARNING: Bozo detected for feed: {}\n"
|
||||
# "For more information, visit "
|
||||
# "https://pythonhosted.org/feedparser/bozo.html"
|
||||
# ).format(source)
|
||||
# print(bozo)
|
||||
valid = 0
|
||||
else:
|
||||
valid = 1
|
||||
await sqlite.update_source_validity(
|
||||
db_file, source, valid)
|
||||
except (
|
||||
IncompleteReadError,
|
||||
IncompleteRead,
|
||||
error.URLError
|
||||
) as e:
|
||||
# print(e)
|
||||
# TODO Print error to log
|
||||
None
|
||||
# NOTE I don't think there should be "return"
|
||||
# because then we might stop scanning next URLs
|
||||
# return
|
||||
# TODO Place these couple of lines back down
|
||||
# NOTE Need to correct the SQL statement to do so
|
||||
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
|
||||
if res[1] == 200:
|
||||
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
|
||||
# TODO Place these couple of lines back down
|
||||
# NOTE Need to correct the SQL statement to do so
|
||||
entries = feed.entries
|
||||
# length = len(entries)
|
||||
# await remove_entry(db_file, source, length)
|
||||
await sqlite.remove_nonexistent_entries(
|
||||
db_file, feed, source)
|
||||
# new_entry = 0
|
||||
for entry in entries:
|
||||
# TODO Pass date too for comparion check
|
||||
if entry.has_key("published"):
|
||||
date = entry.published
|
||||
date = rfc2822_to_iso8601(date)
|
||||
elif entry.has_key("updated"):
|
||||
date = entry.updated
|
||||
date = rfc2822_to_iso8601(date)
|
||||
else:
|
||||
# TODO Just set date = "*** No date ***"
|
||||
# date = await datetime.now().isoformat()
|
||||
date = now()
|
||||
# NOTE Would seconds result in better database performance
|
||||
# date = datetime.datetime(date)
|
||||
# date = (date-datetime.datetime(1970,1,1)).total_seconds()
|
||||
if entry.has_key("title"):
|
||||
title = entry.title
|
||||
# title = "{}: *{}*".format(feed["feed"]["title"], entry.title)
|
||||
else:
|
||||
title = date
|
||||
# title = feed["feed"]["title"]
|
||||
if entry.has_key("link"):
|
||||
# link = complete_url(source, entry.link)
|
||||
link = join_url(source, entry.link)
|
||||
link = trim_url(link)
|
||||
else:
|
||||
link = source
|
||||
if entry.has_key("id"):
|
||||
eid = entry.id
|
||||
else:
|
||||
eid = link
|
||||
exist = await sqlite.check_entry_exist(
|
||||
db_file, source, eid=eid,
|
||||
title=title, link=link, date=date)
|
||||
if not exist:
|
||||
# new_entry = new_entry + 1
|
||||
# TODO Enhance summary
|
||||
if entry.has_key("summary"):
|
||||
summary = entry.summary
|
||||
# # Remove HTML tags
|
||||
# summary = BeautifulSoup(summary, "lxml").text
|
||||
# # TODO Limit text length
|
||||
# summary = summary.replace("\n\n\n", "\n\n")
|
||||
# summary = summary[:300] + " […]⃨"
|
||||
# summary = summary.strip().split('\n')
|
||||
# summary = ["> " + line for line in summary]
|
||||
# summary = "\n".join(summary)
|
||||
else:
|
||||
summary = "> *** No summary ***"
|
||||
read_status = 0
|
||||
pathname = urlsplit(link).path
|
||||
string = (
|
||||
"{} {} {}"
|
||||
).format(
|
||||
title,
|
||||
summary,
|
||||
pathname
|
||||
)
|
||||
allow_list = await config.is_listed(
|
||||
db_file, "filter-allow", string)
|
||||
if not allow_list:
|
||||
reject_list = await config.is_listed(
|
||||
db_file, "filter-deny", string)
|
||||
if reject_list:
|
||||
# print(">>> REJECTED", title)
|
||||
summary = (
|
||||
"REJECTED {}".format(
|
||||
reject_list.upper()
|
||||
)
|
||||
)
|
||||
# summary = ""
|
||||
read_status = 1
|
||||
entry = (
|
||||
title, link, eid, source, date, read_status)
|
||||
if isinstance(date, int):
|
||||
print("PROBLEM: date is int")
|
||||
print(date)
|
||||
# breakpoint()
|
||||
# print(source)
|
||||
# print(date)
|
||||
await sqlite.add_entry_and_set_date(
|
||||
db_file, source, entry)
|
||||
# print(current_time(), entry, title)
|
||||
# else:
|
||||
# print(current_time(), exist, title)
|
||||
# async def dat():
|
||||
|
||||
# async def ftp():
|
||||
|
||||
# async def gemini():
|
||||
|
||||
# NOTE Why (if result[0]) and (if result[1] == 200)?
|
||||
async def view_feed(url):
|
||||
"""
|
||||
Check feeds for new entries.
|
||||
# async def gopher():
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
url : str, optional
|
||||
URL. The default is None.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Feed content or error message.
|
||||
"""
|
||||
result = await download_feed(url)
|
||||
if result[0]:
|
||||
try:
|
||||
feed = parse(result[0])
|
||||
if feed.bozo:
|
||||
# msg = (
|
||||
# ">{}\n"
|
||||
# "WARNING: Bozo detected!\n"
|
||||
# "For more information, visit "
|
||||
# "https://pythonhosted.org/feedparser/bozo.html"
|
||||
# ).format(url)
|
||||
msg = await probe_page(view_feed, url, result[0])
|
||||
return msg
|
||||
except (
|
||||
IncompleteReadError,
|
||||
IncompleteRead,
|
||||
error.URLError
|
||||
) as e:
|
||||
# print(e)
|
||||
# TODO Print error to log
|
||||
msg = (
|
||||
"> {}\n"
|
||||
"Error: {}"
|
||||
).format(url, e)
|
||||
# breakpoint()
|
||||
if result[1] == 200:
|
||||
feed = parse(result[0])
|
||||
title = utility.get_title(url, feed)
|
||||
entries = feed.entries
|
||||
msg = "Preview of {}:\n\n```\n".format(title)
|
||||
counter = 0
|
||||
for entry in entries:
|
||||
counter += 1
|
||||
if entry.has_key("title"):
|
||||
title = entry.title
|
||||
else:
|
||||
title = "*** No title ***"
|
||||
if entry.has_key("link"):
|
||||
# link = complete_url(source, entry.link)
|
||||
link = join_url(url, entry.link)
|
||||
link = trim_url(link)
|
||||
else:
|
||||
link = "*** No link ***"
|
||||
if entry.has_key("published"):
|
||||
date = entry.published
|
||||
date = rfc2822_to_iso8601(date)
|
||||
elif entry.has_key("updated"):
|
||||
date = entry.updated
|
||||
date = rfc2822_to_iso8601(date)
|
||||
else:
|
||||
date = "*** No date ***"
|
||||
msg += (
|
||||
"Title : {}\n"
|
||||
"Date : {}\n"
|
||||
"Link : {}\n"
|
||||
"Count : {}\n"
|
||||
"\n"
|
||||
).format(title, date, link, counter)
|
||||
if counter > 4:
|
||||
break
|
||||
msg += (
|
||||
"```\nSource: {}"
|
||||
).format(url)
|
||||
else:
|
||||
msg = (
|
||||
">{}\nFailed to load URL. Reason: {}"
|
||||
).format(url, result[1])
|
||||
return msg
|
||||
|
||||
|
||||
# NOTE Why (if result[0]) and (if result[1] == 200)?
|
||||
async def view_entry(url, num):
|
||||
result = await download_feed(url)
|
||||
if result[0]:
|
||||
try:
|
||||
feed = parse(result[0])
|
||||
if feed.bozo:
|
||||
# msg = (
|
||||
# ">{}\n"
|
||||
# "WARNING: Bozo detected!\n"
|
||||
# "For more information, visit "
|
||||
# "https://pythonhosted.org/feedparser/bozo.html"
|
||||
# ).format(url)
|
||||
msg = await probe_page(view_entry, url, result[0], num=num)
|
||||
return msg
|
||||
except (
|
||||
IncompleteReadError,
|
||||
IncompleteRead,
|
||||
error.URLError
|
||||
) as e:
|
||||
# print(e)
|
||||
# TODO Print error to log
|
||||
msg = (
|
||||
"> {}\n"
|
||||
"Error: {}"
|
||||
).format(url, e)
|
||||
# breakpoint()
|
||||
if result[1] == 200:
|
||||
feed = parse(result[0])
|
||||
title = utility.get_title(url, result[0])
|
||||
entries = feed.entries
|
||||
num = int(num) - 1
|
||||
entry = entries[num]
|
||||
if entry.has_key("title"):
|
||||
title = entry.title
|
||||
else:
|
||||
title = "*** No title ***"
|
||||
if entry.has_key("published"):
|
||||
date = entry.published
|
||||
date = rfc2822_to_iso8601(date)
|
||||
elif entry.has_key("updated"):
|
||||
date = entry.updated
|
||||
date = rfc2822_to_iso8601(date)
|
||||
else:
|
||||
date = "*** No date ***"
|
||||
if entry.has_key("summary"):
|
||||
summary = entry.summary
|
||||
# Remove HTML tags
|
||||
summary = BeautifulSoup(summary, "lxml").text
|
||||
# TODO Limit text length
|
||||
summary = summary.replace("\n\n\n", "\n\n")
|
||||
else:
|
||||
summary = "*** No summary ***"
|
||||
if entry.has_key("link"):
|
||||
# link = complete_url(source, entry.link)
|
||||
link = join_url(url, entry.link)
|
||||
link = trim_url(link)
|
||||
else:
|
||||
link = "*** No link ***"
|
||||
msg = (
|
||||
"{}\n"
|
||||
"\n"
|
||||
"> {}\n"
|
||||
"\n"
|
||||
"{}\n"
|
||||
"\n"
|
||||
).format(title, summary, link)
|
||||
else:
|
||||
msg = (
|
||||
">{}\n"
|
||||
"Failed to load URL. Reason: {}\n"
|
||||
"Try again momentarily."
|
||||
).format(url, result[1])
|
||||
return msg
|
||||
|
||||
|
||||
async def add_feed_no_check(db_file, data):
|
||||
"""
|
||||
Add given feed without validity check.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
data : str
|
||||
URL or URL and Title.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Status message.
|
||||
"""
|
||||
url = data[0]
|
||||
title = data[1]
|
||||
url = trim_url(url)
|
||||
exist = await sqlite.is_feed_exist(db_file, url)
|
||||
if not exist:
|
||||
msg = await sqlite.insert_feed(db_file, url, title)
|
||||
await download_updates(db_file, [url])
|
||||
else:
|
||||
ix = exist[0]
|
||||
name = exist[1]
|
||||
msg = (
|
||||
"> {}\nNews source \"{}\" is already "
|
||||
"listed in the subscription list at "
|
||||
"index {}".format(url, name, ix)
|
||||
)
|
||||
return msg
|
||||
|
||||
|
||||
async def add_feed(db_file, url):
|
||||
"""
|
||||
Check whether feed exist, otherwise process it.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
url : str
|
||||
URL.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Status message.
|
||||
"""
|
||||
msg = None
|
||||
url = trim_url(url)
|
||||
exist = await sqlite.is_feed_exist(db_file, url)
|
||||
if not exist:
|
||||
res = await download_feed(url)
|
||||
if res[0]:
|
||||
feed = parse(res[0])
|
||||
title = utility.get_title(url, feed)
|
||||
if utility.is_feed(url, feed):
|
||||
status = res[1]
|
||||
await sqlite.insert_feed(
|
||||
db_file, url, title, status)
|
||||
await download_updates(db_file, [url])
|
||||
title = title if title else url
|
||||
msg = (
|
||||
"> {}\nNews source \"{}\" has been added "
|
||||
"to subscription list."
|
||||
).format(url, title)
|
||||
else:
|
||||
msg = await probe_page(
|
||||
add_feed, url, res[0], db_file=db_file)
|
||||
else:
|
||||
status = res[1]
|
||||
msg = (
|
||||
"> {}\nFailed to load URL. Reason: {}"
|
||||
).format(url, status)
|
||||
else:
|
||||
ix = exist[0]
|
||||
name = exist[1]
|
||||
msg = (
|
||||
"> {}\nNews source \"{}\" is already "
|
||||
"listed in the subscription list at "
|
||||
"index {}".format(url, name, ix)
|
||||
)
|
||||
return msg
|
||||
|
||||
|
||||
# TODO callback for use with add_feed and view_feed
|
||||
async def probe_page(callback, url, doc, num=None, db_file=None):
|
||||
msg = None
|
||||
try:
|
||||
# tree = etree.fromstring(res[0]) # etree is for xml
|
||||
tree = html.fromstring(doc)
|
||||
except:
|
||||
msg = (
|
||||
"> {}\nFailed to parse URL as feed."
|
||||
).format(url)
|
||||
if not msg:
|
||||
print("RSS Auto-Discovery Engaged")
|
||||
msg = await feed_mode_auto_discovery(url, tree)
|
||||
if not msg:
|
||||
print("RSS Scan Mode Engaged")
|
||||
msg = await feed_mode_scan(url, tree)
|
||||
if not msg:
|
||||
print("RSS Arbitrary Mode Engaged")
|
||||
msg = await feed_mode_request(url, tree)
|
||||
if not msg:
|
||||
msg = (
|
||||
"> {}\nNo news feeds were found for URL."
|
||||
).format(url)
|
||||
# elif msg:
|
||||
else:
|
||||
if isinstance(msg, str):
|
||||
return msg
|
||||
elif isinstance(msg, list):
|
||||
url = msg[0]
|
||||
if db_file:
|
||||
# print("if db_file", db_file)
|
||||
return await callback(db_file, url)
|
||||
elif num:
|
||||
return await callback(url, num)
|
||||
else:
|
||||
return await callback(url)
|
||||
# async def http():
|
||||
|
||||
# async def ipfs():
|
||||
|
||||
async def download_feed(url):
|
||||
"""
|
||||
|
@ -488,7 +58,7 @@ async def download_feed(url):
|
|||
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
url : list
|
||||
URL.
|
||||
|
||||
Returns
|
||||
|
@ -502,27 +72,23 @@ async def download_feed(url):
|
|||
user_agent = "Slixfeed/0.1"
|
||||
if not len(user_agent):
|
||||
user_agent = "Slixfeed/0.1"
|
||||
headers = {'User-Agent': user_agent}
|
||||
url = url[0]
|
||||
proxy = (config.get_value("settings", "Network", "http_proxy")) or ''
|
||||
timeout = ClientTimeout(total=10)
|
||||
headers = {'User-Agent': user_agent}
|
||||
async with ClientSession(headers=headers) as session:
|
||||
# async with ClientSession(trust_env=True) as session:
|
||||
try:
|
||||
async with session.get(
|
||||
url,
|
||||
proxy=proxy,
|
||||
# proxy_auth=(proxy_username, proxy_password)
|
||||
timeout=timeout
|
||||
) as response:
|
||||
async with session.get(url, proxy=proxy,
|
||||
# proxy_auth=(proxy_username, proxy_password),
|
||||
timeout=timeout
|
||||
) as response:
|
||||
status = response.status
|
||||
if response.status == 200:
|
||||
try:
|
||||
doc = await response.text()
|
||||
# print (response.content_type)
|
||||
msg = [
|
||||
doc,
|
||||
status
|
||||
]
|
||||
msg = [doc, status]
|
||||
except:
|
||||
# msg = [
|
||||
# False,
|
||||
|
@ -531,307 +97,20 @@ async def download_feed(url):
|
|||
# )
|
||||
# ]
|
||||
msg = [
|
||||
False,
|
||||
"Document is too large or is not textual."
|
||||
False, "Document is too large or is not textual."
|
||||
]
|
||||
else:
|
||||
msg = [
|
||||
False,
|
||||
"HTTP Error: " + str(status)
|
||||
False, "HTTP Error: " + str(status)
|
||||
]
|
||||
except ClientError as e:
|
||||
# print('Error', str(e))
|
||||
msg = [
|
||||
False,
|
||||
"Error: " + str(e)
|
||||
False, "Error: " + str(e)
|
||||
]
|
||||
except TimeoutError as e:
|
||||
# print('Timeout:', str(e))
|
||||
msg = [
|
||||
False,
|
||||
"Timeout: " + str(e)
|
||||
False, "Timeout: " + str(e)
|
||||
]
|
||||
return msg
|
||||
|
||||
|
||||
# TODO Improve scan by gradual decreasing of path
|
||||
async def feed_mode_request(url, tree):
|
||||
"""
|
||||
Lookup for feeds by pathname using HTTP Requests.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
url : str
|
||||
URL.
|
||||
tree : TYPE
|
||||
DESCRIPTION.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Message with URLs.
|
||||
"""
|
||||
feeds = {}
|
||||
parted_url = urlsplit(url)
|
||||
paths = config.get_list("lists.yaml")
|
||||
paths = paths["pathnames"]
|
||||
for path in paths:
|
||||
address = urlunsplit([
|
||||
parted_url.scheme,
|
||||
parted_url.netloc,
|
||||
path,
|
||||
None,
|
||||
None
|
||||
])
|
||||
res = await download_feed(address)
|
||||
if res[1] == 200:
|
||||
# print(parse(res[0])["feed"]["title"])
|
||||
# feeds[address] = parse(res[0])["feed"]["title"]
|
||||
try:
|
||||
title = parse(res[0])["feed"]["title"]
|
||||
except:
|
||||
title = '*** No Title ***'
|
||||
feeds[address] = title
|
||||
# Check whether URL has path (i.e. not root)
|
||||
# Check parted_url.path to avoid error in case root wasn't given
|
||||
# TODO Make more tests
|
||||
if parted_url.path and parted_url.path.split('/')[1]:
|
||||
paths.extend(
|
||||
[".atom", ".feed", ".rdf", ".rss"]
|
||||
) if '.rss' not in paths else -1
|
||||
# if paths.index('.rss'):
|
||||
# paths.extend([".atom", ".feed", ".rdf", ".rss"])
|
||||
address = urlunsplit([
|
||||
parted_url.scheme,
|
||||
parted_url.netloc,
|
||||
parted_url.path.split('/')[1] + path,
|
||||
None,
|
||||
None
|
||||
])
|
||||
res = await download_feed(address)
|
||||
if res[1] == 200:
|
||||
try:
|
||||
feeds[address] = parse(res[0])
|
||||
# print(feeds)
|
||||
except:
|
||||
continue
|
||||
if len(feeds) > 1:
|
||||
counter = 0
|
||||
msg = (
|
||||
"RSS URL discovery has found {} feeds:\n\n```\n"
|
||||
).format(len(feeds))
|
||||
feed_mark = 0
|
||||
for feed in feeds:
|
||||
try:
|
||||
feed_name = feeds[feed]["feed"]["title"]
|
||||
except:
|
||||
feed_name = urlsplit(feed).netloc
|
||||
feed_addr = feed
|
||||
# AttributeError: 'str' object has no attribute 'entries'
|
||||
try:
|
||||
feed_amnt = len(feeds[feed].entries)
|
||||
except:
|
||||
continue
|
||||
if feed_amnt:
|
||||
# NOTE Because there could be many false positives
|
||||
# which are revealed in second phase of scan, we
|
||||
# could end with a single feed, which would be
|
||||
# listed instead of fetched, so feed_mark is
|
||||
# utilized in order to make fetch possible.
|
||||
feed_mark = [feed_addr]
|
||||
counter += 1
|
||||
msg += (
|
||||
"Title: {}\n"
|
||||
"Link : {}\n"
|
||||
"Items: {}\n"
|
||||
"\n"
|
||||
).format(feed_name, feed_addr, feed_amnt)
|
||||
if counter > 1:
|
||||
msg += (
|
||||
"```\nThe above feeds were extracted from\n{}"
|
||||
).format(url)
|
||||
elif feed_mark:
|
||||
return feed_mark
|
||||
else:
|
||||
msg = (
|
||||
"No feeds were found for {}"
|
||||
).format(url)
|
||||
return msg
|
||||
elif feeds:
|
||||
return feeds
|
||||
|
||||
|
||||
async def feed_mode_scan(url, tree):
|
||||
"""
|
||||
Scan page for potential feeds by pathname.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
url : str
|
||||
URL.
|
||||
tree : TYPE
|
||||
DESCRIPTION.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Message with URLs.
|
||||
"""
|
||||
feeds = {}
|
||||
# paths = []
|
||||
# TODO Test
|
||||
paths = config.get_list("lists.yaml")
|
||||
paths = paths["pathnames"]
|
||||
for path in paths:
|
||||
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
|
||||
# xpath_query = "//a[contains(@href,'{}')]".format(path)
|
||||
num = 5
|
||||
xpath_query = "(//a[contains(@href,'{}')])[position()<={}]".format(path, num)
|
||||
addresses = tree.xpath(xpath_query)
|
||||
xpath_query = "(//a[contains(@href,'{}')])[position()>last()-{}]".format(path, num)
|
||||
addresses += tree.xpath(xpath_query)
|
||||
parted_url = urlsplit(url)
|
||||
# NOTE Should number of addresses be limited or
|
||||
# perhaps be N from the start and N from the end
|
||||
for address in addresses:
|
||||
# print(address.xpath('@href')[0])
|
||||
# print(addresses)
|
||||
address = address.xpath('@href')[0]
|
||||
if "/" not in address:
|
||||
protocol = parted_url.scheme
|
||||
hostname = parted_url.netloc
|
||||
pathname = address
|
||||
address = urlunsplit([
|
||||
protocol,
|
||||
hostname,
|
||||
pathname,
|
||||
None,
|
||||
None
|
||||
])
|
||||
if address.startswith('/'):
|
||||
protocol = parted_url.scheme
|
||||
hostname = parted_url.netloc
|
||||
pathname = address
|
||||
address = urlunsplit([
|
||||
protocol,
|
||||
hostname,
|
||||
pathname,
|
||||
None,
|
||||
None
|
||||
])
|
||||
res = await download_feed(address)
|
||||
if res[1] == 200:
|
||||
try:
|
||||
feeds[address] = parse(res[0])
|
||||
# print(feeds[address])
|
||||
# breakpoint()
|
||||
# print(feeds)
|
||||
except:
|
||||
continue
|
||||
if len(feeds) > 1:
|
||||
# print(feeds)
|
||||
# breakpoint()
|
||||
counter = 0
|
||||
msg = (
|
||||
"RSS URL scan has found {} feeds:\n\n```\n"
|
||||
).format(len(feeds))
|
||||
feed_mark = 0
|
||||
for feed in feeds:
|
||||
# try:
|
||||
# res = await download_feed(feed)
|
||||
# except:
|
||||
# continue
|
||||
try:
|
||||
feed_name = feeds[feed]["feed"]["title"]
|
||||
except:
|
||||
feed_name = urlsplit(feed).netloc
|
||||
feed_addr = feed
|
||||
feed_amnt = len(feeds[feed].entries)
|
||||
if feed_amnt:
|
||||
# NOTE Because there could be many false positives
|
||||
# which are revealed in second phase of scan, we
|
||||
# could end with a single feed, which would be
|
||||
# listed instead of fetched, so feed_mark is
|
||||
# utilized in order to make fetch possible.
|
||||
feed_mark = [feed_addr]
|
||||
counter += 1
|
||||
msg += (
|
||||
"Title : {}\n"
|
||||
"Link : {}\n"
|
||||
"Count : {}\n"
|
||||
"\n"
|
||||
).format(feed_name, feed_addr, feed_amnt)
|
||||
if counter > 1:
|
||||
msg += (
|
||||
"```\nThe above feeds were extracted from\n{}"
|
||||
).format(url)
|
||||
elif feed_mark:
|
||||
return feed_mark
|
||||
else:
|
||||
msg = (
|
||||
"No feeds were found for {}"
|
||||
).format(url)
|
||||
return msg
|
||||
elif feeds:
|
||||
return feeds
|
||||
|
||||
|
||||
async def feed_mode_auto_discovery(url, tree):
|
||||
"""
|
||||
Lookup for feeds using RSS autodiscovery technique.
|
||||
|
||||
See: https://www.rssboard.org/rss-autodiscovery
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
url : str
|
||||
URL.
|
||||
tree : TYPE
|
||||
DESCRIPTION.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Message with URLs.
|
||||
"""
|
||||
xpath_query = (
|
||||
'//link[(@rel="alternate") and '
|
||||
'(@type="application/atom+xml" or '
|
||||
'@type="application/rdf+xml" or '
|
||||
'@type="application/rss+xml")]'
|
||||
)
|
||||
# xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href"""
|
||||
# xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href"
|
||||
feeds = tree.xpath(xpath_query)
|
||||
if len(feeds) > 1:
|
||||
msg = (
|
||||
"RSS Auto-Discovery has found {} feeds:\n\n```\n"
|
||||
).format(len(feeds))
|
||||
for feed in feeds:
|
||||
# # The following code works;
|
||||
# # The following code will catch
|
||||
# # only valid resources (i.e. not 404);
|
||||
# # The following code requires more bandwidth.
|
||||
# res = await download_feed(feed)
|
||||
# if res[0]:
|
||||
# disco = parse(res[0])
|
||||
# title = disco["feed"]["title"]
|
||||
# msg += "{} \n {} \n\n".format(title, feed)
|
||||
feed_name = feed.xpath('@title')[0]
|
||||
feed_addr = join_url(url, feed.xpath('@href')[0])
|
||||
# if feed_addr.startswith("/"):
|
||||
# feed_addr = url + feed_addr
|
||||
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
|
||||
msg += (
|
||||
"```\nThe above feeds were extracted from\n{}"
|
||||
).format(url)
|
||||
return msg
|
||||
elif feeds:
|
||||
feed_addr = join_url(url, feeds[0].xpath('@href')[0])
|
||||
return [feed_addr]
|
||||
|
|
33
slixfeed/log.py
Normal file
33
slixfeed/log.py
Normal file
|
@ -0,0 +1,33 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
def markdown(timestamp, filename, jid, message):
|
||||
"""
|
||||
Log message to file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
timestamp : str
|
||||
Time stamp.
|
||||
filename : str
|
||||
Jabber ID as name of file.
|
||||
jid : str
|
||||
Jabber ID.
|
||||
message : str
|
||||
Message content.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None.
|
||||
|
||||
"""
|
||||
with open(filename + '.md', 'a') as file:
|
||||
# entry = "{} {}:\n{}\n\n".format(timestamp, jid, message)
|
||||
entry = (
|
||||
"## {}\n"
|
||||
"### {}\n\n"
|
||||
"{}\n\n").format(jid, timestamp, message)
|
||||
file.write(entry)
|
||||
|
||||
|
74
slixfeed/read.py
Normal file
74
slixfeed/read.py
Normal file
|
@ -0,0 +1,74 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
|
||||
TODO
|
||||
|
||||
1) is_feed: Look into the type ("atom", "rss2" etc.)
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def title(feed):
|
||||
"""
|
||||
Get title of feed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL.
|
||||
feed : dict
|
||||
Parsed feed document.
|
||||
|
||||
Returns
|
||||
-------
|
||||
title : str
|
||||
Title or None.
|
||||
"""
|
||||
try:
|
||||
title = feed["feed"]["title"]
|
||||
except:
|
||||
title = None
|
||||
return title
|
||||
|
||||
|
||||
def is_feed(feed):
|
||||
"""
|
||||
Determine whether document is feed or not.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
feed : dict
|
||||
Parsed feed.
|
||||
|
||||
Returns
|
||||
-------
|
||||
val : boolean
|
||||
True or False.
|
||||
"""
|
||||
msg = None
|
||||
if not feed.entries:
|
||||
try:
|
||||
feed["feed"]["title"]
|
||||
val = True
|
||||
# msg = (
|
||||
# "Empty feed for {}"
|
||||
# ).format(url)
|
||||
except:
|
||||
val = False
|
||||
# msg = (
|
||||
# "No entries nor title for {}"
|
||||
# ).format(url)
|
||||
elif feed.bozo:
|
||||
val = False
|
||||
# msg = (
|
||||
# "Bozo detected for {}"
|
||||
# ).format(url)
|
||||
else:
|
||||
val = True
|
||||
# msg = (
|
||||
# "Good feed for {}"
|
||||
# ).format(url)
|
||||
print(msg)
|
||||
return val
|
|
@ -222,9 +222,6 @@ async def remove_feed(db_file, ix):
|
|||
"FROM feeds "
|
||||
"WHERE id = ?"
|
||||
)
|
||||
# cur
|
||||
# for i in url:
|
||||
# url = i[0]
|
||||
url = cur.execute(sql, (ix,)).fetchone()[0]
|
||||
# NOTE Should we move DBLOCK to this line? 2022-12-23
|
||||
sql = (
|
||||
|
@ -246,8 +243,10 @@ async def remove_feed(db_file, ix):
|
|||
cur.execute(sql, (ix,))
|
||||
|
||||
|
||||
# TODO Rename function name
|
||||
async def is_feed_exist(db_file, url):
|
||||
"""
|
||||
Get Id and Name of feed.
|
||||
Check whether a feed exists.
|
||||
Query for feeds by given url.
|
||||
|
||||
|
@ -270,8 +269,7 @@ async def is_feed_exist(db_file, url):
|
|||
"WHERE address = ?"
|
||||
)
|
||||
result = cur.execute(sql, (url,)).fetchone()
|
||||
if result:
|
||||
return True
|
||||
return result
|
||||
|
||||
|
||||
async def get_number_of_items(db_file, table):
|
||||
|
|
|
@ -49,13 +49,14 @@ from slixfeed.config import (
|
|||
get_default_dbdir,
|
||||
get_value_default)
|
||||
from slixfeed.datetime import current_time
|
||||
from slixfeed.fetch import download_updates
|
||||
from slixfeed.action import organize_items
|
||||
from slixfeed.sqlite import (
|
||||
get_unread_entries,
|
||||
get_feed_title,
|
||||
get_settings_value,
|
||||
get_feeds_url,
|
||||
get_number_of_items,
|
||||
get_number_of_entries_unread,
|
||||
get_settings_value,
|
||||
get_unread_entries,
|
||||
mark_as_read,
|
||||
mark_entry_as_read,
|
||||
delete_archived_entry
|
||||
|
@ -329,7 +330,9 @@ async def refresh_task(self, jid, callback, key, val=None):
|
|||
val : str, optional
|
||||
Value. The default is None.
|
||||
"""
|
||||
logging.debug("Refreshing task {} for JID {}".format(callback, jid))
|
||||
logging.debug(
|
||||
"Refreshing task {} for JID {}".format(callback, jid)
|
||||
)
|
||||
if not val:
|
||||
db_file = get_pathname_to_database(jid)
|
||||
val = await get_settings_value(db_file, key)
|
||||
|
@ -340,7 +343,8 @@ async def refresh_task(self, jid, callback, key, val=None):
|
|||
except:
|
||||
logging.debug(
|
||||
"No task of type {} to cancel for "
|
||||
"JID {} (clean_tasks)".format(key, jid))
|
||||
"JID {} (clean_tasks)".format(key, jid)
|
||||
)
|
||||
# task_manager[jid][key] = loop.call_at(
|
||||
# loop.time() + 60 * float(val),
|
||||
# loop.create_task,
|
||||
|
@ -378,10 +382,13 @@ async def check_updates(jid):
|
|||
jid : str
|
||||
Jabber ID.
|
||||
"""
|
||||
logging.debug("Scanning for updates for JID {}".format(jid))
|
||||
logging.debug(
|
||||
"Scanning for updates for JID {}".format(jid)
|
||||
)
|
||||
while True:
|
||||
db_file = get_pathname_to_database(jid)
|
||||
await download_updates(db_file)
|
||||
urls = await get_feeds_url(db_file)
|
||||
await organize_items(db_file, urls)
|
||||
val = get_value_default("settings", "Settings", "check")
|
||||
await asyncio.sleep(60 * float(val))
|
||||
# Schedule to call this function again in 90 minutes
|
||||
|
@ -394,12 +401,16 @@ async def check_updates(jid):
|
|||
|
||||
async def start_tasks(self, presence):
|
||||
jid = presence["from"].bare
|
||||
logging.debug("Beginning tasks for JID {}".format(jid))
|
||||
logging.debug(
|
||||
"Beginning tasks for JID {}".format(jid)
|
||||
)
|
||||
if jid not in self.boundjid.bare:
|
||||
await clean_tasks_xmpp(
|
||||
jid, ["interval", "status", "check"])
|
||||
jid, ["interval", "status", "check"]
|
||||
)
|
||||
await start_tasks_xmpp(
|
||||
self, jid, ["interval", "status", "check"])
|
||||
self, jid, ["interval", "status", "check"]
|
||||
)
|
||||
# await task_jid(self, jid)
|
||||
# main_task.extend([asyncio.create_task(task_jid(jid))])
|
||||
# print(main_task)
|
||||
|
@ -408,9 +419,12 @@ async def start_tasks(self, presence):
|
|||
async def stop_tasks(self, presence):
|
||||
if not self.boundjid.bare:
|
||||
jid = presence["from"].bare
|
||||
logging.debug("Stopping tasks for JID {}".format(jid))
|
||||
logging.debug(
|
||||
"Stopping tasks for JID {}".format(jid)
|
||||
)
|
||||
await clean_tasks_xmpp(
|
||||
jid, ["interval", "status", "check"])
|
||||
jid, ["interval", "status", "check"]
|
||||
)
|
||||
|
||||
|
||||
async def check_readiness(self, presence):
|
||||
|
@ -434,7 +448,9 @@ async def check_readiness(self, presence):
|
|||
|
||||
jid = presence["from"].bare
|
||||
if presence["show"] in ("away", "dnd", "xa"):
|
||||
logging.debug("Stopping updates for JID {}".format(jid))
|
||||
logging.debug(
|
||||
"Stopping updates for JID {}".format(jid)
|
||||
)
|
||||
await clean_tasks_xmpp(
|
||||
jid, ["interval"])
|
||||
await start_tasks_xmpp(
|
||||
|
@ -477,7 +493,9 @@ async def select_file(self):
|
|||
if (file.endswith(".db") and
|
||||
not file.endswith(".db-jour.db")):
|
||||
jid = file[:-3]
|
||||
main_task.extend([tg.create_task(self.task_jid(jid))])
|
||||
main_task.extend(
|
||||
[tg.create_task(self.task_jid(jid))]
|
||||
)
|
||||
# main_task = [tg.create_task(self.task_jid(jid))]
|
||||
# task_manager.update({jid: tg})
|
||||
|
||||
|
|
|
@ -21,7 +21,7 @@ from urllib.parse import (
|
|||
parse_qs,
|
||||
urlencode,
|
||||
urljoin,
|
||||
urlparse,
|
||||
# urlparse,
|
||||
urlsplit,
|
||||
urlunsplit
|
||||
)
|
||||
|
|
|
@ -1,109 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
|
||||
TODO
|
||||
|
||||
1) is_feed: Look into the type ("atom", "rss2" etc.)
|
||||
|
||||
"""
|
||||
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
|
||||
def log_as_markdown(timestamp, filename, jid, message):
|
||||
"""
|
||||
Log message to file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
timestamp : str
|
||||
Time stamp.
|
||||
filename : str
|
||||
Jabber ID as name of file.
|
||||
jid : str
|
||||
Jabber ID.
|
||||
message : str
|
||||
Message content.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None.
|
||||
|
||||
"""
|
||||
with open(filename + '.md', 'a') as file:
|
||||
# entry = "{} {}:\n{}\n\n".format(timestamp, jid, message)
|
||||
entry = (
|
||||
"## {}\n"
|
||||
"### {}\n\n"
|
||||
"{}\n\n").format(jid, timestamp, message)
|
||||
file.write(entry)
|
||||
|
||||
|
||||
def get_title(url, feed):
|
||||
"""
|
||||
Get title of feed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL.
|
||||
feed : dict
|
||||
Parsed feed document.
|
||||
|
||||
Returns
|
||||
-------
|
||||
title : str
|
||||
Title or URL hostname.
|
||||
"""
|
||||
try:
|
||||
title = feed["feed"]["title"]
|
||||
except:
|
||||
title = urlsplit(url).netloc
|
||||
if not title:
|
||||
title = urlsplit(url).netloc
|
||||
return title
|
||||
|
||||
|
||||
def is_feed(url, feed):
|
||||
"""
|
||||
Determine whether document is feed or not.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL.
|
||||
feed : dict
|
||||
Parsed feed.
|
||||
|
||||
Returns
|
||||
-------
|
||||
val : boolean
|
||||
True or False.
|
||||
"""
|
||||
msg = None
|
||||
if not feed.entries:
|
||||
try:
|
||||
feed["feed"]["title"]
|
||||
val = True
|
||||
msg = (
|
||||
"Empty feed for {}"
|
||||
).format(url)
|
||||
except:
|
||||
val = False
|
||||
msg = (
|
||||
"No entries nor title for {}"
|
||||
).format(url)
|
||||
elif feed.bozo:
|
||||
val = False
|
||||
msg = (
|
||||
"Bozo detected for {}"
|
||||
).format(url)
|
||||
else:
|
||||
val = True
|
||||
msg = (
|
||||
"Good feed for {}"
|
||||
).format(url)
|
||||
print(msg)
|
||||
return val
|
|
@ -48,26 +48,20 @@ NOTE
|
|||
"""
|
||||
|
||||
import asyncio
|
||||
from slixfeed.config import add_to_list, get_list, remove_from_list
|
||||
import slixfeed.fetch as fetcher
|
||||
from slixfeed.datetime import current_time
|
||||
import logging
|
||||
# import os
|
||||
from random import randrange
|
||||
import slixmpp
|
||||
from slixmpp.exceptions import IqError, IqTimeout
|
||||
import slixfeed.sqlite as sqlite
|
||||
import slixfeed.task as task
|
||||
import slixfeed.url as urlfixer
|
||||
from time import sleep
|
||||
|
||||
from slixmpp.plugins.xep_0363.http_upload import FileTooBig, HTTPError, UploadServiceNotFound
|
||||
# from slixmpp.plugins.xep_0402 import BookmarkStorage, Conference
|
||||
from slixmpp.plugins.xep_0048.stanza import Bookmarks
|
||||
|
||||
import xmltodict
|
||||
import xml.etree.ElementTree as ET
|
||||
from lxml import etree
|
||||
# import xmltodict
|
||||
# import xml.etree.ElementTree as ET
|
||||
# from lxml import etree
|
||||
|
||||
import slixfeed.xmpp.connect as connect
|
||||
import slixfeed.xmpp.process as process
|
||||
|
|
|
@ -17,7 +17,8 @@ async def recover_connection(self, event, message):
|
|||
# print(current_time(),"Maximum connection attempts exceeded.")
|
||||
# logging.error("Maximum connection attempts exceeded.")
|
||||
print(current_time(), "Attempt number", self.connection_attempts)
|
||||
seconds = (get_value("accounts", "XMPP Connect", "reconnect_timeout")) or 30
|
||||
seconds = (get_value(
|
||||
"accounts", "XMPP Connect", "reconnect_timeout")) or 30
|
||||
seconds = int(seconds)
|
||||
print(current_time(), "Next attempt within", seconds, "seconds")
|
||||
# NOTE asyncio.sleep doesn't interval as expected
|
||||
|
|
|
@ -19,19 +19,22 @@ TODO
|
|||
"""
|
||||
|
||||
import os
|
||||
import slixfeed.action as action
|
||||
from slixfeed.config import (
|
||||
add_to_list,
|
||||
get_default_dbdir,
|
||||
get_value,
|
||||
get_pathname_to_database,
|
||||
remove_from_list)
|
||||
import slixfeed.crawl as crawl
|
||||
from slixfeed.datetime import current_time, timestamp
|
||||
import slixfeed.export as export
|
||||
import slixfeed.fetch as fetcher
|
||||
import slixfeed.fetch as fetch
|
||||
import slixfeed.opml as opml
|
||||
import slixfeed.sqlite as sqlite
|
||||
import slixfeed.task as task
|
||||
import slixfeed.utility as utility
|
||||
import slixfeed.log as log
|
||||
import slixfeed.read as read
|
||||
import slixfeed.url as uri
|
||||
import slixfeed.xmpp.bookmark as bookmark
|
||||
import slixfeed.xmpp.compose as compose
|
||||
|
@ -40,6 +43,7 @@ import slixfeed.xmpp.status as status
|
|||
import slixfeed.xmpp.text as text
|
||||
import slixfeed.xmpp.upload as upload
|
||||
from slixfeed.xmpp.utility import jid_type
|
||||
from urllib.parse import urlsplit, urlunsplit
|
||||
|
||||
|
||||
async def event(self, event):
|
||||
|
@ -210,20 +214,35 @@ async def message(self, message):
|
|||
# else:
|
||||
# response = "This command is valid for groupchat only."
|
||||
case _ if message_lowercase.startswith("add"):
|
||||
# Add given feed without validity check.
|
||||
message_text = message_text[4:]
|
||||
url = message_text.split(" ")[0]
|
||||
title = " ".join(message_text.split(" ")[1:])
|
||||
if url.startswith("http"):
|
||||
db_file = get_pathname_to_database(jid)
|
||||
response = await fetcher.add_feed_no_check(db_file, [url, title])
|
||||
old = await sqlite.get_settings_value(db_file, "old")
|
||||
if old:
|
||||
await task.clean_tasks_xmpp(jid, ["status"])
|
||||
# await send_status(jid)
|
||||
await task.start_tasks_xmpp(self, jid, ["status"])
|
||||
exist = await sqlite.is_feed_exist(db_file, url)
|
||||
if not exist:
|
||||
await sqlite.insert_feed(db_file, url, title)
|
||||
await action.organize_items(db_file, [url])
|
||||
old = await sqlite.get_settings_value(db_file, "old")
|
||||
if old:
|
||||
await task.clean_tasks_xmpp(jid, ["status"])
|
||||
# await send_status(jid)
|
||||
await task.start_tasks_xmpp(self, jid, ["status"])
|
||||
else:
|
||||
await sqlite.mark_source_as_read(db_file, url)
|
||||
response = (
|
||||
"> {}\nNews source has been "
|
||||
"added to subscription list."
|
||||
).format(url)
|
||||
else:
|
||||
db_file = get_pathname_to_database(jid)
|
||||
await sqlite.mark_source_as_read(db_file, url)
|
||||
ix = exist[0]
|
||||
name = exist[1]
|
||||
response = (
|
||||
"> {}\nNews source \"{}\" is already "
|
||||
"listed in the subscription list at "
|
||||
"index {}".format(url, name, ix)
|
||||
)
|
||||
else:
|
||||
response = "Missing URL."
|
||||
send_reply_message(self, message, response)
|
||||
|
@ -388,31 +407,13 @@ async def message(self, message):
|
|||
send_status_message(self, jid, status_type, status_message)
|
||||
if url.startswith("feed:"):
|
||||
url = uri.feed_to_http(url)
|
||||
# url_alt = await uri.replace_hostname(url, "feed")
|
||||
# if url_alt:
|
||||
# url = url_alt
|
||||
url = (uri.replace_hostname(url, "feed")) or url
|
||||
db_file = get_pathname_to_database(jid)
|
||||
response = await fetcher.add_feed(db_file, url)
|
||||
await task.start_tasks_xmpp(self, jid, ["status"])
|
||||
# response = "> " + message + "\n" + response
|
||||
# FIXME Make the taskhandler to update status message
|
||||
# await refresh_task(
|
||||
# self,
|
||||
# jid,
|
||||
# send_status,
|
||||
# "status",
|
||||
# 20
|
||||
# )
|
||||
# NOTE This would show the number of new unread entries
|
||||
old = await sqlite.get_settings_value(db_file, "old")
|
||||
if old:
|
||||
await task.clean_tasks_xmpp(jid, ["status"])
|
||||
# await send_status(jid)
|
||||
await task.start_tasks_xmpp(self, jid, ["status"])
|
||||
else:
|
||||
db_file = get_pathname_to_database(jid)
|
||||
await sqlite.mark_source_as_read(db_file, url)
|
||||
response = await action.add_feed(db_file, url)
|
||||
await task.clean_tasks_xmpp(
|
||||
jid, ["status"])
|
||||
await task.start_tasks_xmpp(
|
||||
self, jid, ["status"])
|
||||
send_reply_message(self, message, response)
|
||||
case _ if message_lowercase.startswith("feeds"):
|
||||
query = message_text[6:]
|
||||
|
@ -521,7 +522,7 @@ async def message(self, message):
|
|||
send_reply_message(self, message, response)
|
||||
case "new":
|
||||
db_file = get_pathname_to_database(jid)
|
||||
sqlite.set_settings_value(db_file, ["old", 0])
|
||||
await sqlite.set_settings_value(db_file, ["old", 0])
|
||||
response = (
|
||||
"Only new items of newly added feeds will be sent."
|
||||
)
|
||||
|
@ -581,7 +582,8 @@ async def message(self, message):
|
|||
data = message_text[5:]
|
||||
data = data.split()
|
||||
url = data[0]
|
||||
await task.clean_tasks_xmpp(jid, ["status"])
|
||||
await task.clean_tasks_xmpp(
|
||||
jid, ["status"])
|
||||
status_type = "dnd"
|
||||
status_message = (
|
||||
"📫️ Processing request to fetch data from {}"
|
||||
|
@ -593,13 +595,13 @@ async def message(self, message):
|
|||
match len(data):
|
||||
case 1:
|
||||
if url.startswith("http"):
|
||||
response = await fetcher.view_feed(url)
|
||||
response = await action.view_feed(url)
|
||||
else:
|
||||
response = "Missing URL."
|
||||
case 2:
|
||||
num = data[1]
|
||||
if url.startswith("http"):
|
||||
response = await fetcher.view_entry(url, num)
|
||||
response = await action.view_entry(url, num)
|
||||
else:
|
||||
response = "Missing URL."
|
||||
case _:
|
||||
|
@ -627,15 +629,15 @@ async def message(self, message):
|
|||
response = "Missing value."
|
||||
send_reply_message(self, message, response)
|
||||
# NOTE Should people be asked for numeric value?
|
||||
case _ if message_lowercase.startswith("remove"):
|
||||
case _ if message_lowercase.startswith("remove "):
|
||||
ix = message_text[7:]
|
||||
if ix:
|
||||
db_file = get_pathname_to_database(jid)
|
||||
try:
|
||||
await sqlite.remove_feed(db_file, ix)
|
||||
response = (
|
||||
"> {}\nNews source has been removed "
|
||||
"from subscription list.").format(url)
|
||||
"News source {} has been removed "
|
||||
"from subscription list.").format(ix)
|
||||
# await refresh_task(
|
||||
# self,
|
||||
# jid,
|
||||
|
@ -643,10 +645,13 @@ async def message(self, message):
|
|||
# "status",
|
||||
# 20
|
||||
# )
|
||||
await task.clean_tasks_xmpp(jid, ["status"])
|
||||
await task.start_tasks_xmpp(self, jid, ["status"])
|
||||
await task.clean_tasks_xmpp(
|
||||
jid, ["status"])
|
||||
await task.start_tasks_xmpp(
|
||||
self, jid, ["status"])
|
||||
except:
|
||||
response = "No news source with ID {}.".format(ix)
|
||||
response = (
|
||||
"No news source with ID {}.".format(ix))
|
||||
else:
|
||||
response = "Missing feed ID."
|
||||
send_reply_message(self, message, response)
|
||||
|
@ -655,7 +660,8 @@ async def message(self, message):
|
|||
await task.clean_tasks_xmpp(jid, ["status"])
|
||||
status_type = "dnd"
|
||||
status_message = "📫️ Marking entries as read..."
|
||||
send_status_message(self, jid, status_type, status_message)
|
||||
send_status_message(
|
||||
self, jid, status_type, status_message)
|
||||
if source:
|
||||
db_file = get_pathname_to_database(jid)
|
||||
await sqlite.mark_source_as_read(db_file, source)
|
||||
|
@ -688,9 +694,11 @@ async def message(self, message):
|
|||
key = "enabled"
|
||||
val = 1
|
||||
db_file = get_pathname_to_database(jid)
|
||||
await sqlite.set_settings_value(db_file, [key, val])
|
||||
await sqlite.set_settings_value(
|
||||
db_file, [key, val])
|
||||
# asyncio.create_task(task_jid(self, jid))
|
||||
await task.start_tasks_xmpp(self, jid, ["interval", "status", "check"])
|
||||
await task.start_tasks_xmpp(
|
||||
self, jid, ["interval", "status", "check"])
|
||||
response = "Updates are enabled."
|
||||
# print(current_time(), "task_manager[jid]")
|
||||
# print(task_manager[jid])
|
||||
|
@ -747,13 +755,17 @@ async def message(self, message):
|
|||
key = "enabled"
|
||||
val = 0
|
||||
db_file = get_pathname_to_database(jid)
|
||||
await sqlite.set_settings_value(db_file, [key, val])
|
||||
await task.clean_tasks_xmpp(jid, ["interval", "status"])
|
||||
await sqlite.set_settings_value(
|
||||
db_file, [key, val])
|
||||
await task.clean_tasks_xmpp(
|
||||
jid, ["interval", "status"])
|
||||
response = "Updates are disabled."
|
||||
send_reply_message(self, message, response)
|
||||
status_type = "xa"
|
||||
status_message = "💡️ Send \"Start\" to receive Jabber updates"
|
||||
send_status_message(self, jid, status_type, status_message)
|
||||
status_message = (
|
||||
"💡️ Send \"Start\" to receive Jabber updates")
|
||||
send_status_message(
|
||||
self, jid, status_type, status_message)
|
||||
case "support":
|
||||
# TODO Send an invitation.
|
||||
response = (
|
||||
|
@ -789,10 +801,10 @@ async def message(self, message):
|
|||
os.mkdir(data_dir)
|
||||
if not os.path.isdir(data_dir + '/logs/'):
|
||||
os.mkdir(data_dir + '/logs/')
|
||||
utility.log_as_markdown(
|
||||
log.markdown(
|
||||
current_time(), os.path.join(data_dir, "logs", jid),
|
||||
jid, message_text)
|
||||
utility.log_as_markdown(
|
||||
log.markdown(
|
||||
current_time(), os.path.join(data_dir, "logs", jid),
|
||||
self.boundjid.bare, response)
|
||||
|
||||
|
|
Loading…
Reference in a new issue