Improve code of module crawl.py
This commit is contained in:
parent
956ce69fcb
commit
9709c052ee
4 changed files with 267 additions and 267 deletions
|
@ -81,7 +81,7 @@ def is_feed(feed):
|
||||||
True or False.
|
True or False.
|
||||||
"""
|
"""
|
||||||
value = False
|
value = False
|
||||||
message = None
|
# message = None
|
||||||
if not feed.entries:
|
if not feed.entries:
|
||||||
if "version" in feed.keys():
|
if "version" in feed.keys():
|
||||||
feed["version"]
|
feed["version"]
|
||||||
|
@ -110,7 +110,6 @@ def is_feed(feed):
|
||||||
# message = (
|
# message = (
|
||||||
# "Good feed for {}"
|
# "Good feed for {}"
|
||||||
# ).format(url)
|
# ).format(url)
|
||||||
print(message)
|
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
@ -402,15 +401,11 @@ async def add_feed(db_file, url):
|
||||||
else:
|
else:
|
||||||
result = await crawl.probe_page(
|
result = await crawl.probe_page(
|
||||||
url, document)
|
url, document)
|
||||||
# TODO Check length and for a write a
|
if isinstance(result, str):
|
||||||
# unified message for a set of feeds.
|
|
||||||
# Use logging if you so choose to
|
|
||||||
# distinct the methods
|
|
||||||
if isinstance(result, list):
|
|
||||||
url = result[0]
|
|
||||||
elif isinstance(result, str):
|
|
||||||
response = result
|
response = result
|
||||||
break
|
break
|
||||||
|
else:
|
||||||
|
url = result[0]
|
||||||
else:
|
else:
|
||||||
response = (
|
response = (
|
||||||
"> {}\nFailed to load URL. Reason: {}"
|
"> {}\nFailed to load URL. Reason: {}"
|
||||||
|
@ -480,15 +475,11 @@ async def view_feed(url):
|
||||||
else:
|
else:
|
||||||
result = await crawl.probe_page(
|
result = await crawl.probe_page(
|
||||||
url, document)
|
url, document)
|
||||||
# TODO Check length and for a write a
|
if isinstance(result, str):
|
||||||
# unified message for a set of feeds.
|
|
||||||
# Use logging if you so choose to
|
|
||||||
# distinct the methods
|
|
||||||
if isinstance(result, list):
|
|
||||||
url = result[0]
|
|
||||||
elif isinstance(result, str):
|
|
||||||
response = result
|
response = result
|
||||||
break
|
break
|
||||||
|
else:
|
||||||
|
url = result[0]
|
||||||
else:
|
else:
|
||||||
response = (
|
response = (
|
||||||
"> {}\nFailed to load URL. Reason: {}"
|
"> {}\nFailed to load URL. Reason: {}"
|
||||||
|
@ -553,15 +544,11 @@ async def view_entry(url, num):
|
||||||
else:
|
else:
|
||||||
result = await crawl.probe_page(
|
result = await crawl.probe_page(
|
||||||
url, document)
|
url, document)
|
||||||
# TODO Check length and for a write a
|
if isinstance(result, str):
|
||||||
# unified message for a set of feeds.
|
|
||||||
# Use logging if you so choose to
|
|
||||||
# distinct the methods
|
|
||||||
if isinstance(result, list):
|
|
||||||
url = result[0]
|
|
||||||
elif isinstance(result, str):
|
|
||||||
response = result
|
response = result
|
||||||
break
|
break
|
||||||
|
else:
|
||||||
|
url = result[0]
|
||||||
else:
|
else:
|
||||||
response = (
|
response = (
|
||||||
"> {}\nFailed to load URL. Reason: {}"
|
"> {}\nFailed to load URL. Reason: {}"
|
||||||
|
@ -660,8 +647,11 @@ async def scan(db_file, url):
|
||||||
db_file, "filter-deny", string)
|
db_file, "filter-deny", string)
|
||||||
if reject_list:
|
if reject_list:
|
||||||
read_status = 1
|
read_status = 1
|
||||||
|
logging.debug(
|
||||||
|
"Rejected due to keyword {}".format(reject_list))
|
||||||
if isinstance(date, int):
|
if isinstance(date, int):
|
||||||
logging.error("Variable 'date' is int:", date)
|
logging.error(
|
||||||
|
"Variable 'date' is int: {}".format(date))
|
||||||
await sqlite.add_entry(
|
await sqlite.add_entry(
|
||||||
db_file, title, link, entry_id,
|
db_file, title, link, entry_id,
|
||||||
url, date, read_status)
|
url, date, read_status)
|
||||||
|
@ -723,7 +713,7 @@ async def organize_items(db_file, urls):
|
||||||
IncompleteRead,
|
IncompleteRead,
|
||||||
error.URLError
|
error.URLError
|
||||||
) as e:
|
) as e:
|
||||||
print(e)
|
logging.error(e)
|
||||||
# TODO Print error to log
|
# TODO Print error to log
|
||||||
# None
|
# None
|
||||||
# NOTE I don't think there should be "return"
|
# NOTE I don't think there should be "return"
|
||||||
|
|
|
@ -19,6 +19,7 @@ TODO
|
||||||
|
|
||||||
from aiohttp import ClientError, ClientSession, ClientTimeout
|
from aiohttp import ClientError, ClientSession, ClientTimeout
|
||||||
from feedparser import parse
|
from feedparser import parse
|
||||||
|
import logging
|
||||||
from lxml import html
|
from lxml import html
|
||||||
import slixfeed.config as config
|
import slixfeed.config as config
|
||||||
from slixfeed.fetch import download_feed
|
from slixfeed.fetch import download_feed
|
||||||
|
@ -88,15 +89,20 @@ async def probe_page(url, document):
|
||||||
"> {}\nFailed to parse URL as feed."
|
"> {}\nFailed to parse URL as feed."
|
||||||
).format(url)
|
).format(url)
|
||||||
if not result:
|
if not result:
|
||||||
print("RSS Auto-Discovery Engaged")
|
logging.debug(
|
||||||
|
"Feed auto-discovery engaged for {}".format(url))
|
||||||
result = await feed_mode_auto_discovery(url, tree)
|
result = await feed_mode_auto_discovery(url, tree)
|
||||||
if not result:
|
if not result:
|
||||||
print("RSS Scan Mode Engaged")
|
logging.debug(
|
||||||
|
"Feed link scan mode engaged for {}".format(url))
|
||||||
result = await feed_mode_scan(url, tree)
|
result = await feed_mode_scan(url, tree)
|
||||||
if not result:
|
if not result:
|
||||||
print("RSS Arbitrary Mode Engaged")
|
logging.debug(
|
||||||
result = await feed_mode_request(url, tree)
|
"Feed arbitrary mode engaged for {}".format(url))
|
||||||
|
result = await feed_mode_guess(url, tree)
|
||||||
if not result:
|
if not result:
|
||||||
|
logging.debug(
|
||||||
|
"No feeds were found for {}".format(url))
|
||||||
result = (
|
result = (
|
||||||
"> {}\nNo news feeds were found for URL."
|
"> {}\nNo news feeds were found for URL."
|
||||||
).format(url)
|
).format(url)
|
||||||
|
@ -104,7 +110,7 @@ async def probe_page(url, document):
|
||||||
|
|
||||||
|
|
||||||
# TODO Improve scan by gradual decreasing of path
|
# TODO Improve scan by gradual decreasing of path
|
||||||
async def feed_mode_request(url, tree):
|
async def feed_mode_guess(url, tree):
|
||||||
"""
|
"""
|
||||||
Lookup for feeds by pathname using HTTP Requests.
|
Lookup for feeds by pathname using HTTP Requests.
|
||||||
|
|
||||||
|
@ -122,94 +128,26 @@ async def feed_mode_request(url, tree):
|
||||||
msg : str
|
msg : str
|
||||||
Message with URLs.
|
Message with URLs.
|
||||||
"""
|
"""
|
||||||
feeds = {}
|
urls = []
|
||||||
parted_url = urlsplit(url)
|
parted_url = urlsplit(url)
|
||||||
paths = config.get_list("lists.yaml", "pathnames")
|
paths = config.get_list("lists.yaml", "pathnames")
|
||||||
|
# Check whether URL has path (i.e. not root)
|
||||||
|
# Check parted_url.path to avoid error in case root wasn't given
|
||||||
|
# TODO Make more tests
|
||||||
|
if parted_url.path and parted_url.path.split('/')[1]:
|
||||||
|
paths.extend(
|
||||||
|
[".atom", ".feed", ".rdf", ".rss"]
|
||||||
|
) if '.rss' not in paths else -1
|
||||||
|
# if paths.index('.rss'):
|
||||||
|
# paths.extend([".atom", ".feed", ".rdf", ".rss"])
|
||||||
for path in paths:
|
for path in paths:
|
||||||
address = urlunsplit([
|
address = join_url(url, parted_url.path.split('/')[1] + path)
|
||||||
parted_url.scheme,
|
if address not in urls:
|
||||||
parted_url.netloc,
|
urls.extend([address])
|
||||||
path,
|
# breakpoint()
|
||||||
None,
|
# print("feed_mode_guess")
|
||||||
None
|
urls = await process_feed_selection(url, urls)
|
||||||
])
|
return urls
|
||||||
res = await download_feed(address)
|
|
||||||
if res[1] == 200:
|
|
||||||
# print(parse(res[0])["feed"]["title"])
|
|
||||||
# feeds[address] = parse(res[0])["feed"]["title"]
|
|
||||||
try:
|
|
||||||
title = parse(res[0])["feed"]["title"]
|
|
||||||
except:
|
|
||||||
title = '*** No Title ***'
|
|
||||||
feeds[address] = title
|
|
||||||
# Check whether URL has path (i.e. not root)
|
|
||||||
# Check parted_url.path to avoid error in case root wasn't given
|
|
||||||
# TODO Make more tests
|
|
||||||
if parted_url.path and parted_url.path.split('/')[1]:
|
|
||||||
paths.extend(
|
|
||||||
[".atom", ".feed", ".rdf", ".rss"]
|
|
||||||
) if '.rss' not in paths else -1
|
|
||||||
# if paths.index('.rss'):
|
|
||||||
# paths.extend([".atom", ".feed", ".rdf", ".rss"])
|
|
||||||
address = urlunsplit([
|
|
||||||
parted_url.scheme,
|
|
||||||
parted_url.netloc,
|
|
||||||
parted_url.path.split('/')[1] + path,
|
|
||||||
None,
|
|
||||||
None
|
|
||||||
])
|
|
||||||
res = await download_feed(address)
|
|
||||||
if res[1] == 200:
|
|
||||||
try:
|
|
||||||
feeds[address] = parse(res[0])
|
|
||||||
# print(feeds)
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
# TODO return feeds
|
|
||||||
if len(feeds) > 1:
|
|
||||||
counter = 0
|
|
||||||
msg = (
|
|
||||||
"RSS URL discovery has found {} feeds:\n\n```\n"
|
|
||||||
).format(len(feeds))
|
|
||||||
feed_mark = 0
|
|
||||||
for feed in feeds:
|
|
||||||
try:
|
|
||||||
feed_name = feeds[feed]["feed"]["title"]
|
|
||||||
except:
|
|
||||||
feed_name = urlsplit(feed).netloc
|
|
||||||
feed_addr = feed
|
|
||||||
# AttributeError: 'str' object has no attribute 'entries'
|
|
||||||
try:
|
|
||||||
feed_amnt = len(feeds[feed].entries)
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
if feed_amnt:
|
|
||||||
# NOTE Because there could be many false positives
|
|
||||||
# which are revealed in second phase of scan, we
|
|
||||||
# could end with a single feed, which would be
|
|
||||||
# listed instead of fetched, so feed_mark is
|
|
||||||
# utilized in order to make fetch possible.
|
|
||||||
feed_mark = [feed_addr]
|
|
||||||
counter += 1
|
|
||||||
msg += (
|
|
||||||
"Title: {}\n"
|
|
||||||
"Link : {}\n"
|
|
||||||
"Items: {}\n"
|
|
||||||
"\n"
|
|
||||||
).format(feed_name, feed_addr, feed_amnt)
|
|
||||||
if counter > 1:
|
|
||||||
msg += (
|
|
||||||
"```\nThe above feeds were extracted from\n{}"
|
|
||||||
).format(url)
|
|
||||||
elif feed_mark:
|
|
||||||
return feed_mark
|
|
||||||
else:
|
|
||||||
msg = (
|
|
||||||
"No feeds were found for {}"
|
|
||||||
).format(url)
|
|
||||||
return msg
|
|
||||||
elif feeds:
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
|
|
||||||
async def feed_mode_scan(url, tree):
|
async def feed_mode_scan(url, tree):
|
||||||
|
@ -230,9 +168,7 @@ async def feed_mode_scan(url, tree):
|
||||||
msg : str
|
msg : str
|
||||||
Message with URLs.
|
Message with URLs.
|
||||||
"""
|
"""
|
||||||
feeds = {}
|
urls = []
|
||||||
# paths = []
|
|
||||||
# TODO Test
|
|
||||||
paths = config.get_list("lists.yaml", "pathnames")
|
paths = config.get_list("lists.yaml", "pathnames")
|
||||||
for path in paths:
|
for path in paths:
|
||||||
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
|
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
|
||||||
|
@ -242,91 +178,16 @@ async def feed_mode_scan(url, tree):
|
||||||
addresses = tree.xpath(xpath_query)
|
addresses = tree.xpath(xpath_query)
|
||||||
xpath_query = "(//a[contains(@href,'{}')])[position()>last()-{}]".format(path, num)
|
xpath_query = "(//a[contains(@href,'{}')])[position()>last()-{}]".format(path, num)
|
||||||
addresses += tree.xpath(xpath_query)
|
addresses += tree.xpath(xpath_query)
|
||||||
parted_url = urlsplit(url)
|
|
||||||
# NOTE Should number of addresses be limited or
|
# NOTE Should number of addresses be limited or
|
||||||
# perhaps be N from the start and N from the end
|
# perhaps be N from the start and N from the end
|
||||||
for address in addresses:
|
for address in addresses:
|
||||||
# print(address.xpath('@href')[0])
|
address = join_url(url, address.xpath('@href')[0])
|
||||||
# print(addresses)
|
if address not in urls:
|
||||||
address = address.xpath('@href')[0]
|
urls.extend([address])
|
||||||
if "/" not in address:
|
# breakpoint()
|
||||||
protocol = parted_url.scheme
|
# print("feed_mode_scan")
|
||||||
hostname = parted_url.netloc
|
urls = await process_feed_selection(url, urls)
|
||||||
pathname = address
|
return urls
|
||||||
address = urlunsplit([
|
|
||||||
protocol,
|
|
||||||
hostname,
|
|
||||||
pathname,
|
|
||||||
None,
|
|
||||||
None
|
|
||||||
])
|
|
||||||
if address.startswith('/'):
|
|
||||||
protocol = parted_url.scheme
|
|
||||||
hostname = parted_url.netloc
|
|
||||||
pathname = address
|
|
||||||
address = urlunsplit([
|
|
||||||
protocol,
|
|
||||||
hostname,
|
|
||||||
pathname,
|
|
||||||
None,
|
|
||||||
None
|
|
||||||
])
|
|
||||||
res = await download_feed(address)
|
|
||||||
if res[1] == 200:
|
|
||||||
try:
|
|
||||||
feeds[address] = parse(res[0])
|
|
||||||
# print(feeds[address])
|
|
||||||
# breakpoint()
|
|
||||||
# print(feeds)
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
# TODO return feeds
|
|
||||||
if len(feeds) > 1:
|
|
||||||
# print(feeds)
|
|
||||||
# breakpoint()
|
|
||||||
counter = 0
|
|
||||||
msg = (
|
|
||||||
"RSS URL scan has found {} feeds:\n\n```\n"
|
|
||||||
).format(len(feeds))
|
|
||||||
feed_mark = 0
|
|
||||||
for feed in feeds:
|
|
||||||
# try:
|
|
||||||
# res = await download_feed(feed)
|
|
||||||
# except:
|
|
||||||
# continue
|
|
||||||
try:
|
|
||||||
feed_name = feeds[feed]["feed"]["title"]
|
|
||||||
except:
|
|
||||||
feed_name = urlsplit(feed).netloc
|
|
||||||
feed_addr = feed
|
|
||||||
feed_amnt = len(feeds[feed].entries)
|
|
||||||
if feed_amnt:
|
|
||||||
# NOTE Because there could be many false positives
|
|
||||||
# which are revealed in second phase of scan, we
|
|
||||||
# could end with a single feed, which would be
|
|
||||||
# listed instead of fetched, so feed_mark is
|
|
||||||
# utilized in order to make fetch possible.
|
|
||||||
feed_mark = [feed_addr]
|
|
||||||
counter += 1
|
|
||||||
msg += (
|
|
||||||
"Title : {}\n"
|
|
||||||
"Link : {}\n"
|
|
||||||
"Count : {}\n"
|
|
||||||
"\n"
|
|
||||||
).format(feed_name, feed_addr, feed_amnt)
|
|
||||||
if counter > 1:
|
|
||||||
msg += (
|
|
||||||
"```\nThe above feeds were extracted from\n{}"
|
|
||||||
).format(url)
|
|
||||||
elif feed_mark:
|
|
||||||
return feed_mark
|
|
||||||
else:
|
|
||||||
msg = (
|
|
||||||
"No feeds were found for {}"
|
|
||||||
).format(url)
|
|
||||||
return msg
|
|
||||||
elif feeds:
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
|
|
||||||
async def feed_mode_auto_discovery(url, tree):
|
async def feed_mode_auto_discovery(url, tree):
|
||||||
|
@ -358,11 +219,8 @@ async def feed_mode_auto_discovery(url, tree):
|
||||||
# xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href"""
|
# xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href"""
|
||||||
# xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href"
|
# xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href"
|
||||||
feeds = tree.xpath(xpath_query)
|
feeds = tree.xpath(xpath_query)
|
||||||
# TODO return feeds
|
if feeds:
|
||||||
if len(feeds) > 1:
|
urls = []
|
||||||
msg = (
|
|
||||||
"RSS Auto-Discovery has found {} feeds:\n\n```\n"
|
|
||||||
).format(len(feeds))
|
|
||||||
for feed in feeds:
|
for feed in feeds:
|
||||||
# # The following code works;
|
# # The following code works;
|
||||||
# # The following code will catch
|
# # The following code will catch
|
||||||
|
@ -373,15 +231,129 @@ async def feed_mode_auto_discovery(url, tree):
|
||||||
# disco = parse(res[0])
|
# disco = parse(res[0])
|
||||||
# title = disco["feed"]["title"]
|
# title = disco["feed"]["title"]
|
||||||
# msg += "{} \n {} \n\n".format(title, feed)
|
# msg += "{} \n {} \n\n".format(title, feed)
|
||||||
feed_name = feed.xpath('@title')[0]
|
|
||||||
feed_addr = join_url(url, feed.xpath('@href')[0])
|
# feed_name = feed.xpath('@title')[0]
|
||||||
|
# feed_addr = join_url(url, feed.xpath('@href')[0])
|
||||||
|
|
||||||
# if feed_addr.startswith("/"):
|
# if feed_addr.startswith("/"):
|
||||||
# feed_addr = url + feed_addr
|
# feed_addr = url + feed_addr
|
||||||
msg += "{}\n{}\n\n".format(feed_name, feed_addr)
|
address = join_url(url, feed.xpath('@href')[0])
|
||||||
msg += (
|
if address not in urls:
|
||||||
"```\nThe above feeds were extracted from\n{}"
|
urls.extend([address])
|
||||||
).format(url)
|
# breakpoint()
|
||||||
return msg
|
# print("feed_mode_auto_discovery")
|
||||||
elif feeds:
|
urls = await process_feed_selection(url, urls)
|
||||||
feed_addr = join_url(url, feeds[0].xpath('@href')[0])
|
return urls
|
||||||
return [feed_addr]
|
|
||||||
|
|
||||||
|
# TODO Segregate function into function that returns
|
||||||
|
# URLs (string) and Feeds (dict) and function that
|
||||||
|
# composes text message (string).
|
||||||
|
# Maybe that's not necessary.
|
||||||
|
async def process_feed_selection(url, urls):
|
||||||
|
feeds = {}
|
||||||
|
for i in urls:
|
||||||
|
res = await download_feed(i)
|
||||||
|
if res[1] == 200:
|
||||||
|
try:
|
||||||
|
feeds[i] = [parse(res[0])]
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
message = (
|
||||||
|
"Web feeds found for {}\n\n```\n"
|
||||||
|
).format(url)
|
||||||
|
counter = 0
|
||||||
|
feed_url_mark = 0
|
||||||
|
for feed_url in feeds:
|
||||||
|
# try:
|
||||||
|
# res = await download_feed(feed)
|
||||||
|
# except:
|
||||||
|
# continue
|
||||||
|
feed_name = None
|
||||||
|
if "title" in feeds[feed_url][0]["feed"].keys():
|
||||||
|
feed_name = feeds[feed_url][0].feed.title
|
||||||
|
feed_name = feed_name if feed_name else "Untitled"
|
||||||
|
# feed_name = feed_name if feed_name else urlsplit(feed_url).netloc
|
||||||
|
# AttributeError: 'str' object has no attribute 'entries'
|
||||||
|
if "entries" in feeds[feed_url][0].keys():
|
||||||
|
feed_amnt = feeds[feed_url][0].entries
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
if feed_amnt:
|
||||||
|
# NOTE Because there could be many false positives
|
||||||
|
# which are revealed in second phase of scan, we
|
||||||
|
# could end with a single feed, which would be
|
||||||
|
# listed instead of fetched, so feed_url_mark is
|
||||||
|
# utilized in order to make fetch possible.
|
||||||
|
feed_url_mark = [feed_url]
|
||||||
|
counter += 1
|
||||||
|
message += (
|
||||||
|
"Title : {}\n"
|
||||||
|
"Link : {}\n"
|
||||||
|
"\n"
|
||||||
|
).format(feed_name, feed_url)
|
||||||
|
if counter > 1:
|
||||||
|
message += (
|
||||||
|
"```\nTotal of {} feeds."
|
||||||
|
).format(counter)
|
||||||
|
result = message
|
||||||
|
elif feed_url_mark:
|
||||||
|
result = feed_url_mark
|
||||||
|
else:
|
||||||
|
result = None
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# def get_discovered_feeds(url, urls):
|
||||||
|
# message = (
|
||||||
|
# "Found {} web feeds:\n\n```\n"
|
||||||
|
# ).format(len(urls))
|
||||||
|
# if len(urls) > 1:
|
||||||
|
# for urls in urls:
|
||||||
|
# message += (
|
||||||
|
# "Title : {}\n"
|
||||||
|
# "Link : {}\n"
|
||||||
|
# "\n"
|
||||||
|
# ).format(url, url.title)
|
||||||
|
# message += (
|
||||||
|
# "```\nThe above feeds were extracted from\n{}"
|
||||||
|
# ).format(url)
|
||||||
|
# elif len(urls) > 0:
|
||||||
|
# result = urls
|
||||||
|
# else:
|
||||||
|
# message = (
|
||||||
|
# "No feeds were found for {}"
|
||||||
|
# ).format(url)
|
||||||
|
# return result
|
||||||
|
|
||||||
|
|
||||||
|
# Test module
|
||||||
|
# TODO ModuleNotFoundError: No module named 'slixfeed'
|
||||||
|
# import slixfeed.fetch as fetch
|
||||||
|
# from slixfeed.action import is_feed, process_feed_selection
|
||||||
|
|
||||||
|
# async def start(url):
|
||||||
|
# while True:
|
||||||
|
# result = await fetch.download_feed(url)
|
||||||
|
# document = result[0]
|
||||||
|
# status = result[1]
|
||||||
|
# if document:
|
||||||
|
# feed = parse(document)
|
||||||
|
# if is_feed(feed):
|
||||||
|
# print(url)
|
||||||
|
# else:
|
||||||
|
# urls = await probe_page(
|
||||||
|
# url, document)
|
||||||
|
# if len(urls) > 1:
|
||||||
|
# await process_feed_selection(urls)
|
||||||
|
# elif urls:
|
||||||
|
# url = urls[0]
|
||||||
|
# else:
|
||||||
|
# response = (
|
||||||
|
# "> {}\nFailed to load URL. Reason: {}"
|
||||||
|
# ).format(url, status)
|
||||||
|
# break
|
||||||
|
# return response
|
||||||
|
|
||||||
|
# url = "https://www.smh.com.au/rssheadlines"
|
||||||
|
# start(url)
|
|
@ -18,6 +18,7 @@ TODO
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
import slixfeed.action as action
|
import slixfeed.action as action
|
||||||
from slixfeed.config import (
|
from slixfeed.config import (
|
||||||
|
@ -78,6 +79,38 @@ async def message(self, message):
|
||||||
"""
|
"""
|
||||||
if message["type"] in ("chat", "groupchat", "normal"):
|
if message["type"] in ("chat", "groupchat", "normal"):
|
||||||
jid = message["from"].bare
|
jid = message["from"].bare
|
||||||
|
message_text = " ".join(message["body"].split())
|
||||||
|
|
||||||
|
# BOTE This is an exceptional case in which we treat
|
||||||
|
# type groupchat the same as type chat.
|
||||||
|
if (message_text.lower().startswith("http")) and(
|
||||||
|
message_text.lower().endswith(".opml")):
|
||||||
|
url = message_text
|
||||||
|
await task.clean_tasks_xmpp(
|
||||||
|
jid, ["status"])
|
||||||
|
status_type = "dnd"
|
||||||
|
status_message = (
|
||||||
|
"📥️ Procesing request to import feeds ..."
|
||||||
|
)
|
||||||
|
send_status_message(
|
||||||
|
self, jid, status_type, status_message)
|
||||||
|
db_file = get_pathname_to_database(jid)
|
||||||
|
count = await action.import_opml(db_file, url)
|
||||||
|
if count:
|
||||||
|
response = (
|
||||||
|
"Successfully imported {} feeds"
|
||||||
|
).format(count)
|
||||||
|
else:
|
||||||
|
response = (
|
||||||
|
"OPML file was not imported."
|
||||||
|
)
|
||||||
|
await task.clean_tasks_xmpp(
|
||||||
|
jid, ["status"])
|
||||||
|
await task.start_tasks_xmpp(
|
||||||
|
self, jid, ["status"])
|
||||||
|
send_reply_message(self, message, response)
|
||||||
|
|
||||||
|
|
||||||
if message["type"] == "groupchat":
|
if message["type"] == "groupchat":
|
||||||
# nick = message["from"][message["from"].index("/")+1:]
|
# nick = message["from"][message["from"].index("/")+1:]
|
||||||
nick = str(message["from"])
|
nick = str(message["from"])
|
||||||
|
@ -135,18 +168,26 @@ async def message(self, message):
|
||||||
|
|
||||||
# await compose.message(self, jid, message)
|
# await compose.message(self, jid, message)
|
||||||
|
|
||||||
message_text = " ".join(message["body"].split())
|
|
||||||
if message["type"] == "groupchat":
|
if message["type"] == "groupchat":
|
||||||
message_text = message_text[1:]
|
message_text = message_text[1:]
|
||||||
message_lowercase = message_text.lower()
|
message_lowercase = message_text.lower()
|
||||||
|
|
||||||
print(current_time(), "ACCOUNT: " + str(message["from"]))
|
logging.debug(
|
||||||
print(current_time(), "COMMAND:", message_text)
|
[str(message["from"]), ":", message_text])
|
||||||
response = 0
|
response = None
|
||||||
match message_lowercase:
|
match message_lowercase:
|
||||||
# case "breakpoint":
|
# case "breakpoint":
|
||||||
# if jid == get_value("accounts", "XMPP", "operator"):
|
# if jid == get_value("accounts", "XMPP", "operator"):
|
||||||
# breakpoint()
|
# breakpoint()
|
||||||
|
# print("task_manager[jid]")
|
||||||
|
# print(task_manager[jid])
|
||||||
|
# await self.get_roster()
|
||||||
|
# print("roster 1")
|
||||||
|
# print(self.client_roster)
|
||||||
|
# print("roster 2")
|
||||||
|
# print(self.client_roster.keys())
|
||||||
|
# print("jid")
|
||||||
|
# print(jid)
|
||||||
# else:
|
# else:
|
||||||
# response = (
|
# response = (
|
||||||
# "This action is restricted. "
|
# "This action is restricted. "
|
||||||
|
@ -171,15 +212,6 @@ async def message(self, message):
|
||||||
"Send \"help\" for instructions.\n"
|
"Send \"help\" for instructions.\n"
|
||||||
)
|
)
|
||||||
send_reply_message(self, message, response)
|
send_reply_message(self, message, response)
|
||||||
# print("task_manager[jid]")
|
|
||||||
# print(task_manager[jid])
|
|
||||||
await self.get_roster()
|
|
||||||
print("roster 1")
|
|
||||||
print(self.client_roster)
|
|
||||||
print("roster 2")
|
|
||||||
print(self.client_roster.keys())
|
|
||||||
print("jid")
|
|
||||||
print(jid)
|
|
||||||
|
|
||||||
# case _ if message_lowercase.startswith("activate"):
|
# case _ if message_lowercase.startswith("activate"):
|
||||||
# if message["type"] == "groupchat":
|
# if message["type"] == "groupchat":
|
||||||
|
@ -242,8 +274,8 @@ async def message(self, message):
|
||||||
response = (
|
response = (
|
||||||
"> {}\nNews source \"{}\" is already "
|
"> {}\nNews source \"{}\" is already "
|
||||||
"listed in the subscription list at "
|
"listed in the subscription list at "
|
||||||
"index {}".format(url, name, ix)
|
"index {}"
|
||||||
)
|
).format(url, name, ix)
|
||||||
else:
|
else:
|
||||||
response = "Missing URL."
|
response = "Missing URL."
|
||||||
send_reply_message(self, message, response)
|
send_reply_message(self, message, response)
|
||||||
|
@ -406,32 +438,32 @@ async def message(self, message):
|
||||||
message_lowercase.startswith("gopher:")):
|
message_lowercase.startswith("gopher:")):
|
||||||
response = "Gemini and Gopher are not supported yet."
|
response = "Gemini and Gopher are not supported yet."
|
||||||
send_reply_message(self, message, response)
|
send_reply_message(self, message, response)
|
||||||
case _ if (message_lowercase.startswith("http")) and(
|
# case _ if (message_lowercase.startswith("http")) and(
|
||||||
message_lowercase.endswith(".opml")):
|
# message_lowercase.endswith(".opml")):
|
||||||
url = message_text
|
# url = message_text
|
||||||
await task.clean_tasks_xmpp(
|
# await task.clean_tasks_xmpp(
|
||||||
jid, ["status"])
|
# jid, ["status"])
|
||||||
status_type = "dnd"
|
# status_type = "dnd"
|
||||||
status_message = (
|
# status_message = (
|
||||||
"📥️ Procesing request to import feeds ..."
|
# "📥️ Procesing request to import feeds ..."
|
||||||
)
|
# )
|
||||||
send_status_message(
|
# send_status_message(
|
||||||
self, jid, status_type, status_message)
|
# self, jid, status_type, status_message)
|
||||||
db_file = get_pathname_to_database(jid)
|
# db_file = get_pathname_to_database(jid)
|
||||||
count = await action.import_opml(db_file, url)
|
# count = await action.import_opml(db_file, url)
|
||||||
if count:
|
# if count:
|
||||||
response = (
|
# response = (
|
||||||
"Successfully imported {} feeds"
|
# "Successfully imported {} feeds"
|
||||||
).format(count)
|
# ).format(count)
|
||||||
else:
|
# else:
|
||||||
response = (
|
# response = (
|
||||||
"OPML file was not imported."
|
# "OPML file was not imported."
|
||||||
)
|
# )
|
||||||
await task.clean_tasks_xmpp(
|
# await task.clean_tasks_xmpp(
|
||||||
jid, ["status"])
|
# jid, ["status"])
|
||||||
await task.start_tasks_xmpp(
|
# await task.start_tasks_xmpp(
|
||||||
self, jid, ["status"])
|
# self, jid, ["status"])
|
||||||
send_reply_message(self, message, response)
|
# send_reply_message(self, message, response)
|
||||||
case _ if (message_lowercase.startswith("http") or
|
case _ if (message_lowercase.startswith("http") or
|
||||||
message_lowercase.startswith("feed:")):
|
message_lowercase.startswith("feed:")):
|
||||||
url = message_text
|
url = message_text
|
||||||
|
@ -447,7 +479,8 @@ async def message(self, message):
|
||||||
url = uri.feed_to_http(url)
|
url = uri.feed_to_http(url)
|
||||||
url = (uri.replace_hostname(url, "feed")) or url
|
url = (uri.replace_hostname(url, "feed")) or url
|
||||||
db_file = get_pathname_to_database(jid)
|
db_file = get_pathname_to_database(jid)
|
||||||
response = await action.add_feed(db_file, url)
|
response = await action.add_feed(
|
||||||
|
db_file, url)
|
||||||
await task.clean_tasks_xmpp(
|
await task.clean_tasks_xmpp(
|
||||||
jid, ["status"])
|
jid, ["status"])
|
||||||
await task.start_tasks_xmpp(
|
await task.start_tasks_xmpp(
|
||||||
|
@ -458,8 +491,10 @@ async def message(self, message):
|
||||||
if query:
|
if query:
|
||||||
if len(query) > 3:
|
if len(query) > 3:
|
||||||
db_file = get_pathname_to_database(jid)
|
db_file = get_pathname_to_database(jid)
|
||||||
result = await sqlite.search_feeds(db_file, query)
|
result = await sqlite.search_feeds(
|
||||||
response = action.list_feeds_by_query(query, result)
|
db_file, query)
|
||||||
|
response = action.list_feeds_by_query(
|
||||||
|
query, result)
|
||||||
else:
|
else:
|
||||||
response = (
|
response = (
|
||||||
"Enter at least 4 characters to search"
|
"Enter at least 4 characters to search"
|
||||||
|
@ -506,11 +541,11 @@ async def message(self, message):
|
||||||
await groupchat.join(self, jid, muc_jid)
|
await groupchat.join(self, jid, muc_jid)
|
||||||
response = (
|
response = (
|
||||||
"Joined groupchat {}"
|
"Joined groupchat {}"
|
||||||
).format(message_text)
|
).format(message_text)
|
||||||
else:
|
else:
|
||||||
response = (
|
response = (
|
||||||
"> {}\nXMPP URI is not valid."
|
"> {}\nXMPP URI is not valid."
|
||||||
).format(message_text)
|
).format(message_text)
|
||||||
send_reply_message(self, message, response)
|
send_reply_message(self, message, response)
|
||||||
case _ if message_lowercase.startswith("length"):
|
case _ if message_lowercase.startswith("length"):
|
||||||
key = message_text[:6]
|
key = message_text[:6]
|
||||||
|
@ -685,16 +720,19 @@ async def message(self, message):
|
||||||
db_file, ix)
|
db_file, ix)
|
||||||
response = (
|
response = (
|
||||||
"> {}\nNews source {} has been removed "
|
"> {}\nNews source {} has been removed "
|
||||||
"from subscription list.").format(url, ix)
|
"from subscription list."
|
||||||
|
).format(url, ix)
|
||||||
except:
|
except:
|
||||||
response = (
|
response = (
|
||||||
"No news source with ID {}.".format(ix))
|
"No news source with ID {}."
|
||||||
|
).format(ix)
|
||||||
except:
|
except:
|
||||||
url = ix_url
|
url = ix_url
|
||||||
await sqlite.remove_feed_by_url(db_file, url)
|
await sqlite.remove_feed_by_url(db_file, url)
|
||||||
response = (
|
response = (
|
||||||
"> {}\nNews source has been removed "
|
"> {}\nNews source has been removed "
|
||||||
"from subscription list.").format(url)
|
"from subscription list."
|
||||||
|
).format(url)
|
||||||
# await refresh_task(
|
# await refresh_task(
|
||||||
# self,
|
# self,
|
||||||
# jid,
|
# jid,
|
||||||
|
@ -835,11 +873,11 @@ async def message(self, message):
|
||||||
await groupchat.join(self, jid, muc_jid)
|
await groupchat.join(self, jid, muc_jid)
|
||||||
response = (
|
response = (
|
||||||
"Joined groupchat {}"
|
"Joined groupchat {}"
|
||||||
).format(message_text)
|
).format(message_text)
|
||||||
else:
|
else:
|
||||||
response = (
|
response = (
|
||||||
"> {}\nXMPP URI is not valid."
|
"> {}\nXMPP URI is not valid."
|
||||||
).format(message_text)
|
).format(message_text)
|
||||||
send_reply_message(self, message, response)
|
send_reply_message(self, message, response)
|
||||||
case _:
|
case _:
|
||||||
response = (
|
response = (
|
||||||
|
|
|
@ -216,7 +216,7 @@ def print_help():
|
||||||
" info\n"
|
" info\n"
|
||||||
" Print information page.\n"
|
" Print information page.\n"
|
||||||
" support\n"
|
" support\n"
|
||||||
" Join xmpp:slixmpp@muc.poez.io?join\n"
|
" Join xmpp:slixfeed@chat.woodpeckersnest.space?join\n"
|
||||||
# "\n"
|
# "\n"
|
||||||
# "PROTOCOLS\n"
|
# "PROTOCOLS\n"
|
||||||
# " Supported prootcols are IRC, Matrix and XMPP.\n"
|
# " Supported prootcols are IRC, Matrix and XMPP.\n"
|
||||||
|
|
Loading…
Reference in a new issue