forked from sch/Slixfeed
Fix command export; Restructure code.
This commit is contained in:
parent
15e6a1de66
commit
93ea8a9fab
17 changed files with 1066 additions and 1184 deletions
|
@ -1,436 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
FIXME
|
|
||||||
|
|
||||||
1) https://wiki.pine64.org
|
|
||||||
File "/slixfeed/crawl.py", line 178, in feed_mode_guess
|
|
||||||
address = join_url(url, parted_url.path.split('/')[1] + path)
|
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~^^^
|
|
||||||
IndexError: list index out of range
|
|
||||||
|
|
||||||
TODO
|
|
||||||
|
|
||||||
1.1) Attempt to scan more paths: /blog/, /news/ etc., including root /
|
|
||||||
Attempt to scan sub domains
|
|
||||||
https://esmailelbob.xyz/en/
|
|
||||||
https://blog.esmailelbob.xyz/feed/
|
|
||||||
|
|
||||||
1.2) Consider utilizing fetch.http_response
|
|
||||||
|
|
||||||
2) Consider merging with module fetch.py
|
|
||||||
|
|
||||||
FEEDS CRAWLER PROJECT
|
|
||||||
|
|
||||||
3) Mark redirects for manual check
|
|
||||||
|
|
||||||
Title : JSON Feed
|
|
||||||
Link : https://www.jsonfeed.org/feed.json.xml
|
|
||||||
|
|
||||||
Title : JSON Feed
|
|
||||||
Link : https://www.jsonfeed.org/feed.json/atom.xml
|
|
||||||
|
|
||||||
Title : JSON Feed
|
|
||||||
Link : https://www.jsonfeed.org/feed.json/feed.xml
|
|
||||||
|
|
||||||
Title : JSON Feed
|
|
||||||
Link : https://www.jsonfeed.org/feed.json/feeds/rss/news.xml.php
|
|
||||||
|
|
||||||
Title : JSON Feed
|
|
||||||
Link : https://www.jsonfeed.org/feed.json/jekyll/feed.xml
|
|
||||||
|
|
||||||
Title : JSON Feed
|
|
||||||
Link : https://www.jsonfeed.org/feed.json/news.xml
|
|
||||||
|
|
||||||
Title : JSON Feed
|
|
||||||
Link : https://www.jsonfeed.org/feed.json/news.xml.php
|
|
||||||
|
|
||||||
Title : JSON Feed
|
|
||||||
Link : https://www.jsonfeed.org/feed.json/rdf.xml
|
|
||||||
|
|
||||||
Title : JSON Feed
|
|
||||||
Link : https://www.jsonfeed.org/feed.json/rss.xml
|
|
||||||
|
|
||||||
Title : JSON Feed
|
|
||||||
Link : https://www.jsonfeed.org/feed.json/videos.xml
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from aiohttp import ClientError, ClientSession, ClientTimeout
|
|
||||||
from feedparser import parse
|
|
||||||
import logging
|
|
||||||
from lxml import etree
|
|
||||||
from lxml import html
|
|
||||||
from lxml.etree import fromstring
|
|
||||||
import slixfeed.config as config
|
|
||||||
import slixfeed.fetch as fetch
|
|
||||||
from slixfeed.log import Logger
|
|
||||||
from slixfeed.url import complete_url, join_url, trim_url
|
|
||||||
from urllib.parse import urlsplit, urlunsplit
|
|
||||||
|
|
||||||
|
|
||||||
# TODO Use boolean as a flag to determine whether a single URL was found
|
|
||||||
# async def probe_page(
|
|
||||||
# callback, url, document, num=None, db_file=None):
|
|
||||||
# result = None
|
|
||||||
# try:
|
|
||||||
# # tree = etree.fromstring(res[0]) # etree is for xml
|
|
||||||
# tree = html.fromstring(document)
|
|
||||||
# except:
|
|
||||||
# result = (
|
|
||||||
# "> {}\nFailed to parse URL as feed."
|
|
||||||
# ).format(url)
|
|
||||||
# if not result:
|
|
||||||
# print("RSS Auto-Discovery Engaged")
|
|
||||||
# result = await feed_mode_auto_discovery(url, tree)
|
|
||||||
# if not result:
|
|
||||||
# print("RSS Scan Mode Engaged")
|
|
||||||
# result = await feed_mode_scan(url, tree)
|
|
||||||
# if not result:
|
|
||||||
# print("RSS Arbitrary Mode Engaged")
|
|
||||||
# result = await feed_mode_request(url, tree)
|
|
||||||
# if not result:
|
|
||||||
# result = (
|
|
||||||
# "> {}\nNo news feeds were found for URL."
|
|
||||||
# ).format(url)
|
|
||||||
# # elif msg:
|
|
||||||
# else:
|
|
||||||
# if isinstance(result, str):
|
|
||||||
# return result
|
|
||||||
# elif isinstance(result, list):
|
|
||||||
# url = result[0]
|
|
||||||
# if db_file:
|
|
||||||
# # print("if db_file", db_file)
|
|
||||||
# return await callback(db_file, url)
|
|
||||||
# elif num:
|
|
||||||
# return await callback(url, num)
|
|
||||||
# else:
|
|
||||||
# return await callback(url)
|
|
||||||
|
|
||||||
logger = Logger(__name__)
|
|
||||||
|
|
||||||
async def probe_page(url, document=None):
|
|
||||||
"""
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
url : str
|
|
||||||
URL.
|
|
||||||
document : TYPE
|
|
||||||
DESCRIPTION.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
result : list or str
|
|
||||||
Single URL as list or selection of URLs as str.
|
|
||||||
"""
|
|
||||||
if not document:
|
|
||||||
response = await fetch.http(url)
|
|
||||||
if not response['error']:
|
|
||||||
document = response['content']
|
|
||||||
try:
|
|
||||||
# tree = etree.fromstring(res[0]) # etree is for xml
|
|
||||||
tree = html.fromstring(document)
|
|
||||||
result = None
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(str(e))
|
|
||||||
try:
|
|
||||||
# /questions/15830421/xml-unicode-strings-with-encoding-declaration-are-not-supported
|
|
||||||
# xml = html.fromstring(document.encode('utf-8'))
|
|
||||||
# parser = etree.XMLParser(ns_clean=True, recover=True, encoding='utf-8')
|
|
||||||
# tree = fromstring(xml, parser=parser)
|
|
||||||
|
|
||||||
# /questions/57833080/how-to-fix-unicode-strings-with-encoding-declaration-are-not-supported
|
|
||||||
#tree = html.fromstring(bytes(document, encoding='utf8'))
|
|
||||||
|
|
||||||
# https://twigstechtips.blogspot.com/2013/06/python-lxml-strings-with-encoding.html
|
|
||||||
#parser = etree.XMLParser(recover=True)
|
|
||||||
#tree = etree.fromstring(document, parser)
|
|
||||||
|
|
||||||
tree = html.fromstring(document.encode('utf-8'))
|
|
||||||
result = None
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(str(e))
|
|
||||||
logger.warning("Failed to parse URL as feed for {}.".format(url))
|
|
||||||
result = {'link' : None,
|
|
||||||
'index' : None,
|
|
||||||
'name' : None,
|
|
||||||
'code' : None,
|
|
||||||
'error' : True,
|
|
||||||
'exist' : None}
|
|
||||||
if not result:
|
|
||||||
logger.debug("Feed auto-discovery engaged for {}".format(url))
|
|
||||||
result = await feed_mode_auto_discovery(url, tree)
|
|
||||||
if not result:
|
|
||||||
logger.debug("Feed link scan mode engaged for {}".format(url))
|
|
||||||
result = await feed_mode_scan(url, tree)
|
|
||||||
if not result:
|
|
||||||
logger.debug("Feed arbitrary mode engaged for {}".format(url))
|
|
||||||
result = await feed_mode_guess(url, tree)
|
|
||||||
if not result:
|
|
||||||
logger.debug("No feeds were found for {}".format(url))
|
|
||||||
result = None
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
# TODO Improve scan by gradual decreasing of path
|
|
||||||
async def feed_mode_guess(url, tree):
|
|
||||||
"""
|
|
||||||
Lookup for feeds by pathname using HTTP Requests.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
db_file : str
|
|
||||||
Path to database file.
|
|
||||||
url : str
|
|
||||||
URL.
|
|
||||||
tree : TYPE
|
|
||||||
DESCRIPTION.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
msg : str
|
|
||||||
Message with URLs.
|
|
||||||
"""
|
|
||||||
urls = []
|
|
||||||
parted_url = urlsplit(url)
|
|
||||||
paths = config.open_config_file("lists.toml")["pathnames"]
|
|
||||||
# Check whether URL has path (i.e. not root)
|
|
||||||
# Check parted_url.path to avoid error in case root wasn't given
|
|
||||||
# TODO Make more tests
|
|
||||||
if parted_url.path and parted_url.path.split('/')[1]:
|
|
||||||
paths.extend(
|
|
||||||
[".atom", ".feed", ".rdf", ".rss"]
|
|
||||||
) if '.rss' not in paths else -1
|
|
||||||
# if paths.index('.rss'):
|
|
||||||
# paths.extend([".atom", ".feed", ".rdf", ".rss"])
|
|
||||||
parted_url_path = parted_url.path if parted_url.path else '/'
|
|
||||||
for path in paths:
|
|
||||||
address = join_url(url, parted_url_path.split('/')[1] + path)
|
|
||||||
if address not in urls:
|
|
||||||
urls.extend([address])
|
|
||||||
# breakpoint()
|
|
||||||
# print("feed_mode_guess")
|
|
||||||
urls = await process_feed_selection(url, urls)
|
|
||||||
return urls
|
|
||||||
|
|
||||||
|
|
||||||
async def feed_mode_scan(url, tree):
|
|
||||||
"""
|
|
||||||
Scan page for potential feeds by pathname.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
db_file : str
|
|
||||||
Path to database file.
|
|
||||||
url : str
|
|
||||||
URL.
|
|
||||||
tree : TYPE
|
|
||||||
DESCRIPTION.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
msg : str
|
|
||||||
Message with URLs.
|
|
||||||
"""
|
|
||||||
urls = []
|
|
||||||
paths = config.open_config_file("lists.toml")["pathnames"]
|
|
||||||
for path in paths:
|
|
||||||
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
|
|
||||||
# xpath_query = "//a[contains(@href,'{}')]".format(path)
|
|
||||||
num = 5
|
|
||||||
xpath_query = (
|
|
||||||
"(//a[contains(@href,'{}')])[position()<={}]"
|
|
||||||
).format(path, num)
|
|
||||||
addresses = tree.xpath(xpath_query)
|
|
||||||
xpath_query = (
|
|
||||||
"(//a[contains(@href,'{}')])[position()>last()-{}]"
|
|
||||||
).format(path, num)
|
|
||||||
addresses += tree.xpath(xpath_query)
|
|
||||||
# NOTE Should number of addresses be limited or
|
|
||||||
# perhaps be N from the start and N from the end
|
|
||||||
for address in addresses:
|
|
||||||
address = join_url(url, address.xpath('@href')[0])
|
|
||||||
if address not in urls:
|
|
||||||
urls.extend([address])
|
|
||||||
# breakpoint()
|
|
||||||
# print("feed_mode_scan")
|
|
||||||
urls = await process_feed_selection(url, urls)
|
|
||||||
return urls
|
|
||||||
|
|
||||||
|
|
||||||
async def feed_mode_auto_discovery(url, tree):
|
|
||||||
"""
|
|
||||||
Lookup for feeds using RSS autodiscovery technique.
|
|
||||||
|
|
||||||
See: https://www.rssboard.org/rss-autodiscovery
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
db_file : str
|
|
||||||
Path to database file.
|
|
||||||
url : str
|
|
||||||
URL.
|
|
||||||
tree : TYPE
|
|
||||||
DESCRIPTION.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
msg : str
|
|
||||||
Message with URLs.
|
|
||||||
"""
|
|
||||||
xpath_query = (
|
|
||||||
'//link[(@rel="alternate") and '
|
|
||||||
'(@type="application/atom+xml" or '
|
|
||||||
'@type="application/rdf+xml" or '
|
|
||||||
'@type="application/rss+xml")]'
|
|
||||||
)
|
|
||||||
# xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href"""
|
|
||||||
# xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href"
|
|
||||||
feeds = tree.xpath(xpath_query)
|
|
||||||
if feeds:
|
|
||||||
urls = []
|
|
||||||
for feed in feeds:
|
|
||||||
# # The following code works;
|
|
||||||
# # The following code will catch
|
|
||||||
# # only valid resources (i.e. not 404);
|
|
||||||
# # The following code requires more bandwidth.
|
|
||||||
# res = await fetch.http(feed)
|
|
||||||
# if res[0]:
|
|
||||||
# disco = parse(res[0])
|
|
||||||
# title = disco["feed"]["title"]
|
|
||||||
# msg += "{} \n {} \n\n".format(title, feed)
|
|
||||||
|
|
||||||
# feed_name = feed.xpath('@title')[0]
|
|
||||||
# feed_addr = join_url(url, feed.xpath('@href')[0])
|
|
||||||
|
|
||||||
# if feed_addr.startswith("/"):
|
|
||||||
# feed_addr = url + feed_addr
|
|
||||||
address = join_url(url, feed.xpath('@href')[0])
|
|
||||||
if address not in urls:
|
|
||||||
urls.extend([address])
|
|
||||||
# breakpoint()
|
|
||||||
# print("feed_mode_auto_discovery")
|
|
||||||
urls = await process_feed_selection(url, urls)
|
|
||||||
return urls
|
|
||||||
|
|
||||||
|
|
||||||
# TODO Segregate function into function that returns
|
|
||||||
# URLs (string) and Feeds (dict) and function that
|
|
||||||
# composes text message (string).
|
|
||||||
# Maybe that's not necessary.
|
|
||||||
async def process_feed_selection(url, urls):
|
|
||||||
feeds = {}
|
|
||||||
for i in urls:
|
|
||||||
result = await fetch.http(i)
|
|
||||||
if not result['error']:
|
|
||||||
document = result['content']
|
|
||||||
status_code = result['status_code']
|
|
||||||
if status_code == 200: # NOTE This line might be redundant
|
|
||||||
try:
|
|
||||||
feeds[i] = [parse(document)]
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
message = (
|
|
||||||
"Web feeds found for {}\n\n```\n"
|
|
||||||
).format(url)
|
|
||||||
urls = []
|
|
||||||
for feed_url in feeds:
|
|
||||||
# try:
|
|
||||||
# res = await fetch.http(feed)
|
|
||||||
# except:
|
|
||||||
# continue
|
|
||||||
feed_name = None
|
|
||||||
if "title" in feeds[feed_url][0]["feed"].keys():
|
|
||||||
feed_name = feeds[feed_url][0].feed.title
|
|
||||||
feed_name = feed_name if feed_name else "Untitled"
|
|
||||||
# feed_name = feed_name if feed_name else urlsplit(feed_url).netloc
|
|
||||||
# AttributeError: 'str' object has no attribute 'entries'
|
|
||||||
if "entries" in feeds[feed_url][0].keys():
|
|
||||||
feed_amnt = feeds[feed_url][0].entries
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
if feed_amnt:
|
|
||||||
# NOTE Because there could be many false positives
|
|
||||||
# which are revealed in second phase of scan, we
|
|
||||||
# could end with a single feed, which would be
|
|
||||||
# listed instead of fetched, so feed_url_mark is
|
|
||||||
# utilized in order to make fetch possible.
|
|
||||||
# NOTE feed_url_mark was a variable which stored
|
|
||||||
# single URL (probably first accepted as valid)
|
|
||||||
# in order to get an indication whether a single
|
|
||||||
# URL has been fetched, so that the receiving
|
|
||||||
# function will scan that single URL instead of
|
|
||||||
# listing it as a message.
|
|
||||||
url = {'link' : feed_url,
|
|
||||||
'index' : None,
|
|
||||||
'name' : feed_name,
|
|
||||||
'code' : status_code,
|
|
||||||
'error' : False,
|
|
||||||
'exist' : None}
|
|
||||||
urls.extend([url])
|
|
||||||
count = len(urls)
|
|
||||||
if count > 1:
|
|
||||||
result = urls
|
|
||||||
elif count:
|
|
||||||
result = urls[0]
|
|
||||||
else:
|
|
||||||
result = None
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
# def get_discovered_feeds(url, urls):
|
|
||||||
# message = (
|
|
||||||
# "Found {} web feeds:\n\n```\n"
|
|
||||||
# ).format(len(urls))
|
|
||||||
# if len(urls) > 1:
|
|
||||||
# for urls in urls:
|
|
||||||
# message += (
|
|
||||||
# "Title : {}\n"
|
|
||||||
# "Link : {}\n"
|
|
||||||
# "\n"
|
|
||||||
# ).format(url, url.title)
|
|
||||||
# message += (
|
|
||||||
# "```\nThe above feeds were extracted from\n{}"
|
|
||||||
# ).format(url)
|
|
||||||
# elif len(urls) > 0:
|
|
||||||
# result = urls
|
|
||||||
# else:
|
|
||||||
# message = (
|
|
||||||
# "No feeds were found for {}"
|
|
||||||
# ).format(url)
|
|
||||||
# return result
|
|
||||||
|
|
||||||
|
|
||||||
# Test module
|
|
||||||
# TODO ModuleNotFoundError: No module named 'slixfeed'
|
|
||||||
# import slixfeed.fetch as fetch
|
|
||||||
# from slixfeed.action import is_feed, process_feed_selection
|
|
||||||
|
|
||||||
# async def start(url):
|
|
||||||
# while True:
|
|
||||||
# result = await fetch.http(url)
|
|
||||||
# document = result[0]
|
|
||||||
# status = result[1]
|
|
||||||
# if document:
|
|
||||||
# feed = parse(document)
|
|
||||||
# if is_feed(feed):
|
|
||||||
# print(url)
|
|
||||||
# else:
|
|
||||||
# urls = await probe_page(
|
|
||||||
# url, document)
|
|
||||||
# if len(urls) > 1:
|
|
||||||
# await process_feed_selection(urls)
|
|
||||||
# elif urls:
|
|
||||||
# url = urls[0]
|
|
||||||
# else:
|
|
||||||
# response = (
|
|
||||||
# "> {}\nFailed to load URL. Reason: {}"
|
|
||||||
# ).format(url, status)
|
|
||||||
# break
|
|
||||||
# return response
|
|
||||||
|
|
||||||
# url = "https://www.smh.com.au/rssheadlines"
|
|
||||||
# start(url)
|
|
114
slixfeed/dt.py
114
slixfeed/dt.py
|
@ -1,114 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
"""
|
|
||||||
https://feedparser.readthedocs.io/en/latest/date-parsing.html
|
|
||||||
"""
|
|
||||||
|
|
||||||
from datetime import datetime
|
|
||||||
from dateutil.parser import parse
|
|
||||||
from email.utils import parsedate, parsedate_to_datetime
|
|
||||||
|
|
||||||
def now():
|
|
||||||
"""
|
|
||||||
ISO 8601 Timestamp.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
date : ???
|
|
||||||
ISO 8601 Timestamp.
|
|
||||||
"""
|
|
||||||
date = datetime.now().isoformat()
|
|
||||||
return date
|
|
||||||
|
|
||||||
|
|
||||||
def convert_struct_time_to_iso8601(struct_time):
|
|
||||||
date = datetime(*struct_time[:6])
|
|
||||||
date = date.isoformat()
|
|
||||||
return date
|
|
||||||
|
|
||||||
|
|
||||||
def current_date():
|
|
||||||
"""
|
|
||||||
Print MM DD, YYYY (Weekday Time) timestamp.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
date : str
|
|
||||||
MM DD, YYYY (Weekday Time) timestamp.
|
|
||||||
"""
|
|
||||||
now = datetime.now()
|
|
||||||
time = now.strftime("%B %d, %Y (%A %T)")
|
|
||||||
return time
|
|
||||||
|
|
||||||
|
|
||||||
def current_time():
|
|
||||||
"""
|
|
||||||
Print HH:MM:SS timestamp.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
date : str
|
|
||||||
HH:MM:SS timestamp.
|
|
||||||
"""
|
|
||||||
now = datetime.now()
|
|
||||||
time = now.strftime("%H:%M:%S")
|
|
||||||
return time
|
|
||||||
|
|
||||||
|
|
||||||
def timestamp():
|
|
||||||
"""
|
|
||||||
Print time stamp to be used in filename.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
formatted_time : str
|
|
||||||
%Y%m%d-%H%M%S timestamp.
|
|
||||||
"""
|
|
||||||
now = datetime.now()
|
|
||||||
formatted_time = now.strftime("%Y%m%d-%H%M%S")
|
|
||||||
return formatted_time
|
|
||||||
|
|
||||||
|
|
||||||
def validate(date):
|
|
||||||
"""
|
|
||||||
Validate date format.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
date : str
|
|
||||||
Timestamp.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
date : str
|
|
||||||
Timestamp.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
parse(date)
|
|
||||||
except:
|
|
||||||
date = now()
|
|
||||||
return date
|
|
||||||
|
|
||||||
|
|
||||||
def rfc2822_to_iso8601(date):
|
|
||||||
"""
|
|
||||||
Convert RFC 2822 into ISO 8601.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
date : str
|
|
||||||
RFC 2822 Timestamp.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
date : str
|
|
||||||
ISO 8601 Timestamp.
|
|
||||||
"""
|
|
||||||
if parsedate(date):
|
|
||||||
try:
|
|
||||||
date = parsedate_to_datetime(date)
|
|
||||||
date = date.isoformat()
|
|
||||||
except:
|
|
||||||
date = now()
|
|
||||||
return date
|
|
|
@ -1,19 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
TODO
|
|
||||||
|
|
||||||
Move code from sqlite.get_entry_unread
|
|
||||||
|
|
||||||
if num > 1:
|
|
||||||
news_list += (
|
|
||||||
"\n{}\n{}\n{}\n"
|
|
||||||
).format(str(title), str(link), str(feed_title))
|
|
||||||
else:
|
|
||||||
news_list = (
|
|
||||||
"{}\n{}\n{}"
|
|
||||||
).format(str(title), str(link), str(feed_title))
|
|
||||||
|
|
||||||
"""
|
|
|
@ -1,74 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
TODO
|
|
||||||
|
|
||||||
1) is_feed: Look into the type ("atom", "rss2" etc.)
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
def title(feed):
|
|
||||||
"""
|
|
||||||
Get title of feed.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
url : str
|
|
||||||
URL.
|
|
||||||
feed : dict
|
|
||||||
Parsed feed document.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
title : str
|
|
||||||
Title or None.
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
title = feed["feed"]["title"]
|
|
||||||
except:
|
|
||||||
title = None
|
|
||||||
return title
|
|
||||||
|
|
||||||
|
|
||||||
def is_feed(feed):
|
|
||||||
"""
|
|
||||||
Determine whether document is feed or not.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
feed : dict
|
|
||||||
Parsed feed.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
val : boolean
|
|
||||||
True or False.
|
|
||||||
"""
|
|
||||||
msg = None
|
|
||||||
if not feed.entries:
|
|
||||||
try:
|
|
||||||
feed["feed"]["title"]
|
|
||||||
val = True
|
|
||||||
# msg = (
|
|
||||||
# "Empty feed for {}"
|
|
||||||
# ).format(url)
|
|
||||||
except:
|
|
||||||
val = False
|
|
||||||
# msg = (
|
|
||||||
# "No entries nor title for {}"
|
|
||||||
# ).format(url)
|
|
||||||
elif feed.bozo:
|
|
||||||
val = False
|
|
||||||
# msg = (
|
|
||||||
# "Bozo detected for {}"
|
|
||||||
# ).format(url)
|
|
||||||
else:
|
|
||||||
val = True
|
|
||||||
# msg = (
|
|
||||||
# "Good feed for {}"
|
|
||||||
# ).format(url)
|
|
||||||
print(msg)
|
|
||||||
return val
|
|
|
@ -20,9 +20,8 @@ TODO
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from asyncio import Lock
|
from asyncio import Lock
|
||||||
import slixfeed.dt as dt
|
|
||||||
from slixfeed.log import Logger
|
from slixfeed.log import Logger
|
||||||
from slixfeed.url import join_url
|
from slixfeed.utilities import DateAndTime, Url
|
||||||
from sqlite3 import connect, Error, IntegrityError
|
from sqlite3 import connect, Error, IntegrityError
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
|
@ -2736,7 +2735,7 @@ def get_invalid_entries(db_file, url, feed):
|
||||||
title = feed["feed"]["title"]
|
title = feed["feed"]["title"]
|
||||||
# Prepare a link to compare
|
# Prepare a link to compare
|
||||||
if entry.has_key("link"):
|
if entry.has_key("link"):
|
||||||
link = join_url(url, entry.link)
|
link = Url.join_url(url, entry.link)
|
||||||
else:
|
else:
|
||||||
link = url
|
link = url
|
||||||
# Compare date, link and title
|
# Compare date, link and title
|
||||||
|
@ -2745,7 +2744,7 @@ def get_invalid_entries(db_file, url, feed):
|
||||||
# print("compare published:", title, link, time)
|
# print("compare published:", title, link, time)
|
||||||
# print("compare published:", entry_title, entry_link, timestamp)
|
# print("compare published:", entry_title, entry_link, timestamp)
|
||||||
# print("============")
|
# print("============")
|
||||||
time = dt.rfc2822_to_iso8601(entry.published)
|
time = DateAndTime.rfc2822_to_iso8601(entry.published)
|
||||||
if (entry_title == title and
|
if (entry_title == title and
|
||||||
entry_link == link and
|
entry_link == link and
|
||||||
timestamp == time):
|
timestamp == time):
|
||||||
|
|
|
@ -29,12 +29,10 @@ from feedparser import parse
|
||||||
import os
|
import os
|
||||||
import slixfeed.config as config
|
import slixfeed.config as config
|
||||||
from slixfeed.config import Config
|
from slixfeed.config import Config
|
||||||
import slixfeed.crawl as crawl
|
|
||||||
import slixfeed.dt as dt
|
|
||||||
import slixfeed.fetch as fetch
|
import slixfeed.fetch as fetch
|
||||||
from slixfeed.log import Logger
|
from slixfeed.log import Logger
|
||||||
import slixfeed.sqlite as sqlite
|
import slixfeed.sqlite as sqlite
|
||||||
from slixfeed.url import join_url, trim_url
|
from slixfeed.utilities import DateAndTime, Url
|
||||||
from slixfeed.utilities import Html, MD
|
from slixfeed.utilities import Html, MD
|
||||||
from slixmpp.xmlstream import ET
|
from slixmpp.xmlstream import ET
|
||||||
import sys
|
import sys
|
||||||
|
@ -56,7 +54,7 @@ class Feed:
|
||||||
if not os.path.isdir(cache_dir + '/' + ext):
|
if not os.path.isdir(cache_dir + '/' + ext):
|
||||||
os.mkdir(cache_dir + '/' + ext)
|
os.mkdir(cache_dir + '/' + ext)
|
||||||
filename = os.path.join(
|
filename = os.path.join(
|
||||||
cache_dir, ext, 'slixfeed_' + dt.timestamp() + '.' + ext)
|
cache_dir, ext, 'slixfeed_' + DateAndTime.timestamp() + '.' + ext)
|
||||||
db_file = config.get_pathname_to_database(jid_bare)
|
db_file = config.get_pathname_to_database(jid_bare)
|
||||||
results = sqlite.get_feeds(db_file)
|
results = sqlite.get_feeds(db_file)
|
||||||
match ext:
|
match ext:
|
||||||
|
@ -220,6 +218,7 @@ class Feed:
|
||||||
return node_entry
|
return node_entry
|
||||||
|
|
||||||
|
|
||||||
|
# Look into the type ("atom", "rss2" etc.)
|
||||||
def is_feed(url, feed):
|
def is_feed(url, feed):
|
||||||
"""
|
"""
|
||||||
Determine whether document is feed or not.
|
Determine whether document is feed or not.
|
||||||
|
@ -301,7 +300,7 @@ class Feed:
|
||||||
if "updated_parsed" in feed["feed"].keys():
|
if "updated_parsed" in feed["feed"].keys():
|
||||||
updated = feed["feed"]["updated_parsed"]
|
updated = feed["feed"]["updated_parsed"]
|
||||||
try:
|
try:
|
||||||
updated = dt.convert_struct_time_to_iso8601(updated)
|
updated = DateAndTime.convert_struct_time_to_iso8601(updated)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(str(e))
|
logger.error(str(e))
|
||||||
updated = ''
|
updated = ''
|
||||||
|
@ -325,7 +324,7 @@ class Feed:
|
||||||
if feed.has_key('updated_parsed'):
|
if feed.has_key('updated_parsed'):
|
||||||
feed_updated = feed.updated_parsed
|
feed_updated = feed.updated_parsed
|
||||||
try:
|
try:
|
||||||
feed_updated = dt.convert_struct_time_to_iso8601(feed_updated)
|
feed_updated = DateAndTime.convert_struct_time_to_iso8601(feed_updated)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(str(e))
|
logger.error(str(e))
|
||||||
feed_updated = None
|
feed_updated = None
|
||||||
|
@ -357,7 +356,7 @@ class Feed:
|
||||||
# NOTE Do not be tempted to return a compact dictionary.
|
# NOTE Do not be tempted to return a compact dictionary.
|
||||||
# That is, dictionary within dictionary
|
# That is, dictionary within dictionary
|
||||||
# Return multiple dictionaries in a list or tuple.
|
# Return multiple dictionaries in a list or tuple.
|
||||||
result = await crawl.probe_page(url, document)
|
result = await FeedDiscovery.probe_page(url, document)
|
||||||
if not result:
|
if not result:
|
||||||
# Get out of the loop with dict indicating error.
|
# Get out of the loop with dict indicating error.
|
||||||
result_final = {'link' : url,
|
result_final = {'link' : url,
|
||||||
|
@ -437,16 +436,16 @@ class Feed:
|
||||||
title = "*** No title ***"
|
title = "*** No title ***"
|
||||||
if entry.has_key("link"):
|
if entry.has_key("link"):
|
||||||
# link = complete_url(source, entry.link)
|
# link = complete_url(source, entry.link)
|
||||||
link = join_url(url, entry.link)
|
link = Url.join_url(url, entry.link)
|
||||||
link = trim_url(link)
|
link = Url.trim_url(link)
|
||||||
else:
|
else:
|
||||||
link = "*** No link ***"
|
link = "*** No link ***"
|
||||||
if entry.has_key("published"):
|
if entry.has_key("published"):
|
||||||
date = entry.published
|
date = entry.published
|
||||||
date = dt.rfc2822_to_iso8601(date)
|
date = DateAndTime.rfc2822_to_iso8601(date)
|
||||||
elif entry.has_key("updated"):
|
elif entry.has_key("updated"):
|
||||||
date = entry.updated
|
date = entry.updated
|
||||||
date = dt.rfc2822_to_iso8601(date)
|
date = DateAndTime.rfc2822_to_iso8601(date)
|
||||||
else:
|
else:
|
||||||
date = "*** No date ***"
|
date = "*** No date ***"
|
||||||
response += ("Title : {}\n"
|
response += ("Title : {}\n"
|
||||||
|
@ -481,10 +480,10 @@ class Feed:
|
||||||
title = '*** No title ***'
|
title = '*** No title ***'
|
||||||
if entry.has_key("published"):
|
if entry.has_key("published"):
|
||||||
date = entry.published
|
date = entry.published
|
||||||
date = dt.rfc2822_to_iso8601(date)
|
date = DateAndTime.rfc2822_to_iso8601(date)
|
||||||
elif entry.has_key("updated"):
|
elif entry.has_key("updated"):
|
||||||
date = entry.updated
|
date = entry.updated
|
||||||
date = dt.rfc2822_to_iso8601(date)
|
date = DateAndTime.rfc2822_to_iso8601(date)
|
||||||
else:
|
else:
|
||||||
date = '*** No date ***'
|
date = '*** No date ***'
|
||||||
if entry.has_key("summary"):
|
if entry.has_key("summary"):
|
||||||
|
@ -500,8 +499,8 @@ class Feed:
|
||||||
summary = '*** No summary ***'
|
summary = '*** No summary ***'
|
||||||
if entry.has_key("link"):
|
if entry.has_key("link"):
|
||||||
# link = complete_url(source, entry.link)
|
# link = complete_url(source, entry.link)
|
||||||
link = join_url(url, entry.link)
|
link = Url.join_url(url, entry.link)
|
||||||
link = trim_url(link)
|
link = Url.trim_url(link)
|
||||||
else:
|
else:
|
||||||
link = '*** No link ***'
|
link = '*** No link ***'
|
||||||
response = ("{}\n"
|
response = ("{}\n"
|
||||||
|
@ -543,7 +542,7 @@ class Feed:
|
||||||
if feed.has_key('updated_parsed'):
|
if feed.has_key('updated_parsed'):
|
||||||
feed_updated = feed.updated_parsed
|
feed_updated = feed.updated_parsed
|
||||||
try:
|
try:
|
||||||
feed_updated = dt.convert_struct_time_to_iso8601(feed_updated)
|
feed_updated = DateAndTime.convert_struct_time_to_iso8601(feed_updated)
|
||||||
except:
|
except:
|
||||||
feed_updated = ''
|
feed_updated = ''
|
||||||
else:
|
else:
|
||||||
|
@ -598,18 +597,18 @@ class Feed:
|
||||||
logger.debug('{}: entry: {}'.format(function_name, entry.link))
|
logger.debug('{}: entry: {}'.format(function_name, entry.link))
|
||||||
if entry.has_key("published"):
|
if entry.has_key("published"):
|
||||||
entry_published = entry.published
|
entry_published = entry.published
|
||||||
entry_published = dt.rfc2822_to_iso8601(entry_published)
|
entry_published = DateAndTime.rfc2822_to_iso8601(entry_published)
|
||||||
else:
|
else:
|
||||||
entry_published = ''
|
entry_published = ''
|
||||||
if entry.has_key("updated"):
|
if entry.has_key("updated"):
|
||||||
entry_updated = entry.updated
|
entry_updated = entry.updated
|
||||||
entry_updated = dt.rfc2822_to_iso8601(entry_updated)
|
entry_updated = DateAndTime.rfc2822_to_iso8601(entry_updated)
|
||||||
else:
|
else:
|
||||||
entry_updated = dt.now()
|
entry_updated = DateAndTime.now()
|
||||||
if entry.has_key("link"):
|
if entry.has_key("link"):
|
||||||
# link = complete_url(source, entry.link)
|
# link = complete_url(source, entry.link)
|
||||||
entry_link = join_url(feed_url, entry.link)
|
entry_link = Url.join_url(feed_url, entry.link)
|
||||||
entry_link = trim_url(entry_link)
|
entry_link = Url.trim_url(entry_link)
|
||||||
else:
|
else:
|
||||||
entry_link = feed_url
|
entry_link = feed_url
|
||||||
# title = feed["feed"]["title"]
|
# title = feed["feed"]["title"]
|
||||||
|
@ -783,8 +782,8 @@ class Feed:
|
||||||
# if (e_link.rel == "enclosure" and
|
# if (e_link.rel == "enclosure" and
|
||||||
# media_type in ("audio", "image", "video")):
|
# media_type in ("audio", "image", "video")):
|
||||||
# media_link = e_link.href
|
# media_link = e_link.href
|
||||||
# media_link = join_url(url, e_link.href)
|
# media_link = Url.join_url(url, e_link.href)
|
||||||
# media_link = trim_url(media_link)
|
# media_link = Url.trim_url(media_link)
|
||||||
|
|
||||||
###########################################################
|
###########################################################
|
||||||
|
|
||||||
|
@ -821,6 +820,442 @@ class Feed:
|
||||||
return new_entries
|
return new_entries
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
FIXME
|
||||||
|
|
||||||
|
1) https://wiki.pine64.org
|
||||||
|
File "/slixfeed/crawl.py", line 178, in feed_mode_guess
|
||||||
|
address = Url.join_url(url, parted_url.path.split('/')[1] + path)
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~^^^
|
||||||
|
IndexError: list index out of range
|
||||||
|
|
||||||
|
TODO
|
||||||
|
|
||||||
|
1.1) Attempt to scan more paths: /blog/, /news/ etc., including root /
|
||||||
|
Attempt to scan sub domains
|
||||||
|
https://esmailelbob.xyz/en/
|
||||||
|
https://blog.esmailelbob.xyz/feed/
|
||||||
|
|
||||||
|
1.2) Consider utilizing fetch.http_response
|
||||||
|
|
||||||
|
2) DeviantArt
|
||||||
|
https://www.deviantart.com/nedesem/gallery
|
||||||
|
https://backend.deviantart.com/rss.xml?q=gallery:nedesem
|
||||||
|
https://backend.deviantart.com/rss.xml?q=nedesem
|
||||||
|
|
||||||
|
https://www.deviantart.com/search?q=
|
||||||
|
https://backend.deviantart.com/rss.xml?q=search:
|
||||||
|
|
||||||
|
FEEDS CRAWLER PROJECT
|
||||||
|
|
||||||
|
3) Mark redirects for manual check
|
||||||
|
|
||||||
|
Title : JSON Feed
|
||||||
|
Link : https://www.jsonfeed.org/feed.json.xml
|
||||||
|
|
||||||
|
Title : JSON Feed
|
||||||
|
Link : https://www.jsonfeed.org/feed.json/atom.xml
|
||||||
|
|
||||||
|
Title : JSON Feed
|
||||||
|
Link : https://www.jsonfeed.org/feed.json/feed.xml
|
||||||
|
|
||||||
|
Title : JSON Feed
|
||||||
|
Link : https://www.jsonfeed.org/feed.json/feeds/rss/news.xml.php
|
||||||
|
|
||||||
|
Title : JSON Feed
|
||||||
|
Link : https://www.jsonfeed.org/feed.json/jekyll/feed.xml
|
||||||
|
|
||||||
|
Title : JSON Feed
|
||||||
|
Link : https://www.jsonfeed.org/feed.json/news.xml
|
||||||
|
|
||||||
|
Title : JSON Feed
|
||||||
|
Link : https://www.jsonfeed.org/feed.json/news.xml.php
|
||||||
|
|
||||||
|
Title : JSON Feed
|
||||||
|
Link : https://www.jsonfeed.org/feed.json/rdf.xml
|
||||||
|
|
||||||
|
Title : JSON Feed
|
||||||
|
Link : https://www.jsonfeed.org/feed.json/rss.xml
|
||||||
|
|
||||||
|
Title : JSON Feed
|
||||||
|
Link : https://www.jsonfeed.org/feed.json/videos.xml
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from aiohttp import ClientError, ClientSession, ClientTimeout
|
||||||
|
from lxml import etree
|
||||||
|
from lxml import html
|
||||||
|
from lxml.etree import fromstring
|
||||||
|
|
||||||
|
|
||||||
|
class FeedDiscovery:
|
||||||
|
|
||||||
|
|
||||||
|
# TODO Use boolean as a flag to determine whether a single URL was found
|
||||||
|
# async def probe_page(
|
||||||
|
# callback, url, document, num=None, db_file=None):
|
||||||
|
# result = None
|
||||||
|
# try:
|
||||||
|
# # tree = etree.fromstring(res[0]) # etree is for xml
|
||||||
|
# tree = html.fromstring(document)
|
||||||
|
# except:
|
||||||
|
# result = (
|
||||||
|
# "> {}\nFailed to parse URL as feed."
|
||||||
|
# ).format(url)
|
||||||
|
# if not result:
|
||||||
|
# print("RSS Auto-Discovery Engaged")
|
||||||
|
# result = await feed_mode_auto_discovery(url, tree)
|
||||||
|
# if not result:
|
||||||
|
# print("RSS Scan Mode Engaged")
|
||||||
|
# result = await feed_mode_scan(url, tree)
|
||||||
|
# if not result:
|
||||||
|
# print("RSS Arbitrary Mode Engaged")
|
||||||
|
# result = await feed_mode_request(url, tree)
|
||||||
|
# if not result:
|
||||||
|
# result = (
|
||||||
|
# "> {}\nNo news feeds were found for URL."
|
||||||
|
# ).format(url)
|
||||||
|
# # elif msg:
|
||||||
|
# else:
|
||||||
|
# if isinstance(result, str):
|
||||||
|
# return result
|
||||||
|
# elif isinstance(result, list):
|
||||||
|
# url = result[0]
|
||||||
|
# if db_file:
|
||||||
|
# # print("if db_file", db_file)
|
||||||
|
# return await callback(db_file, url)
|
||||||
|
# elif num:
|
||||||
|
# return await callback(url, num)
|
||||||
|
# else:
|
||||||
|
# return await callback(url)
|
||||||
|
|
||||||
|
async def probe_page(url, document=None):
|
||||||
|
"""
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
url : str
|
||||||
|
URL.
|
||||||
|
document : TYPE
|
||||||
|
DESCRIPTION.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
result : list or str
|
||||||
|
Single URL as list or selection of URLs as str.
|
||||||
|
"""
|
||||||
|
if not document:
|
||||||
|
response = await fetch.http(url)
|
||||||
|
if not response['error']:
|
||||||
|
document = response['content']
|
||||||
|
try:
|
||||||
|
# tree = etree.fromstring(res[0]) # etree is for xml
|
||||||
|
tree = html.fromstring(document)
|
||||||
|
result = None
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(str(e))
|
||||||
|
try:
|
||||||
|
# /questions/15830421/xml-unicode-strings-with-encoding-declaration-are-not-supported
|
||||||
|
# xml = html.fromstring(document.encode('utf-8'))
|
||||||
|
# parser = etree.XMLParser(ns_clean=True, recover=True, encoding='utf-8')
|
||||||
|
# tree = fromstring(xml, parser=parser)
|
||||||
|
|
||||||
|
# /questions/57833080/how-to-fix-unicode-strings-with-encoding-declaration-are-not-supported
|
||||||
|
#tree = html.fromstring(bytes(document, encoding='utf8'))
|
||||||
|
|
||||||
|
# https://twigstechtips.blogspot.com/2013/06/python-lxml-strings-with-encoding.html
|
||||||
|
#parser = etree.XMLParser(recover=True)
|
||||||
|
#tree = etree.fromstring(document, parser)
|
||||||
|
|
||||||
|
tree = html.fromstring(document.encode('utf-8'))
|
||||||
|
result = None
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(str(e))
|
||||||
|
logger.warning("Failed to parse URL as feed for {}.".format(url))
|
||||||
|
result = {'link' : None,
|
||||||
|
'index' : None,
|
||||||
|
'name' : None,
|
||||||
|
'code' : None,
|
||||||
|
'error' : True,
|
||||||
|
'exist' : None}
|
||||||
|
if not result:
|
||||||
|
logger.debug("Feed auto-discovery engaged for {}".format(url))
|
||||||
|
result = FeedDiscovery.feed_mode_auto_discovery(url, tree)
|
||||||
|
if not result:
|
||||||
|
logger.debug("Feed link scan mode engaged for {}".format(url))
|
||||||
|
result = FeedDiscovery.feed_mode_scan(url, tree)
|
||||||
|
if not result:
|
||||||
|
logger.debug("Feed arbitrary mode engaged for {}".format(url))
|
||||||
|
result = FeedDiscovery.feed_mode_guess(url, tree)
|
||||||
|
if not result:
|
||||||
|
logger.debug("No feeds were found for {}".format(url))
|
||||||
|
result = None
|
||||||
|
result = await FeedDiscovery.process_feed_selection(url, result)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# TODO Improve scan by gradual decreasing of path
|
||||||
|
def feed_mode_guess(url, tree):
|
||||||
|
"""
|
||||||
|
Lookup for feeds by pathname using HTTP Requests.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
db_file : str
|
||||||
|
Path to database file.
|
||||||
|
url : str
|
||||||
|
URL.
|
||||||
|
tree : TYPE
|
||||||
|
DESCRIPTION.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
msg : str
|
||||||
|
Message with URLs.
|
||||||
|
"""
|
||||||
|
urls = []
|
||||||
|
parted_url = urlsplit(url)
|
||||||
|
paths = config.open_config_file("lists.toml")["pathnames"]
|
||||||
|
# Check whether URL has path (i.e. not root)
|
||||||
|
# Check parted_url.path to avoid error in case root wasn't given
|
||||||
|
# TODO Make more tests
|
||||||
|
if parted_url.path and parted_url.path.split('/')[1]:
|
||||||
|
paths.extend(
|
||||||
|
[".atom", ".feed", ".rdf", ".rss"]
|
||||||
|
) if '.rss' not in paths else -1
|
||||||
|
# if paths.index('.rss'):
|
||||||
|
# paths.extend([".atom", ".feed", ".rdf", ".rss"])
|
||||||
|
parted_url_path = parted_url.path if parted_url.path else '/'
|
||||||
|
for path in paths:
|
||||||
|
address = Url.join_url(url, parted_url_path.split('/')[1] + path)
|
||||||
|
if address not in urls:
|
||||||
|
urls.extend([address])
|
||||||
|
# breakpoint()
|
||||||
|
# print("feed_mode_guess")
|
||||||
|
return urls
|
||||||
|
|
||||||
|
|
||||||
|
def feed_mode_scan(url, tree):
|
||||||
|
"""
|
||||||
|
Scan page for potential feeds by pathname.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
db_file : str
|
||||||
|
Path to database file.
|
||||||
|
url : str
|
||||||
|
URL.
|
||||||
|
tree : TYPE
|
||||||
|
DESCRIPTION.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
msg : str
|
||||||
|
Message with URLs.
|
||||||
|
"""
|
||||||
|
urls = []
|
||||||
|
paths = config.open_config_file("lists.toml")["pathnames"]
|
||||||
|
for path in paths:
|
||||||
|
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
|
||||||
|
# xpath_query = "//a[contains(@href,'{}')]".format(path)
|
||||||
|
num = 5
|
||||||
|
xpath_query = (
|
||||||
|
"(//a[contains(@href,'{}')])[position()<={}]"
|
||||||
|
).format(path, num)
|
||||||
|
addresses = tree.xpath(xpath_query)
|
||||||
|
xpath_query = (
|
||||||
|
"(//a[contains(@href,'{}')])[position()>last()-{}]"
|
||||||
|
).format(path, num)
|
||||||
|
addresses += tree.xpath(xpath_query)
|
||||||
|
# NOTE Should number of addresses be limited or
|
||||||
|
# perhaps be N from the start and N from the end
|
||||||
|
for address in addresses:
|
||||||
|
address = Url.join_url(url, address.xpath('@href')[0])
|
||||||
|
if address not in urls:
|
||||||
|
urls.extend([address])
|
||||||
|
# breakpoint()
|
||||||
|
# print("feed_mode_scan")
|
||||||
|
return urls
|
||||||
|
|
||||||
|
|
||||||
|
def feed_mode_auto_discovery(url, tree):
|
||||||
|
"""
|
||||||
|
Lookup for feeds using RSS autodiscovery technique.
|
||||||
|
|
||||||
|
See: https://www.rssboard.org/rss-autodiscovery
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
db_file : str
|
||||||
|
Path to database file.
|
||||||
|
url : str
|
||||||
|
URL.
|
||||||
|
tree : TYPE
|
||||||
|
DESCRIPTION.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
msg : str
|
||||||
|
Message with URLs.
|
||||||
|
"""
|
||||||
|
xpath_query = (
|
||||||
|
'//link[(@rel="alternate") and '
|
||||||
|
'(@type="application/atom+xml" or '
|
||||||
|
'@type="application/rdf+xml" or '
|
||||||
|
'@type="application/rss+xml")]'
|
||||||
|
)
|
||||||
|
# xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href"""
|
||||||
|
# xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href"
|
||||||
|
feeds = tree.xpath(xpath_query)
|
||||||
|
if feeds:
|
||||||
|
urls = []
|
||||||
|
for feed in feeds:
|
||||||
|
# # The following code works;
|
||||||
|
# # The following code will catch
|
||||||
|
# # only valid resources (i.e. not 404);
|
||||||
|
# # The following code requires more bandwidth.
|
||||||
|
# res = await fetch.http(feed)
|
||||||
|
# if res[0]:
|
||||||
|
# disco = parse(res[0])
|
||||||
|
# title = disco["feed"]["title"]
|
||||||
|
# msg += "{} \n {} \n\n".format(title, feed)
|
||||||
|
|
||||||
|
# feed_name = feed.xpath('@title')[0]
|
||||||
|
# feed_addr = Url.join_url(url, feed.xpath('@href')[0])
|
||||||
|
|
||||||
|
# if feed_addr.startswith("/"):
|
||||||
|
# feed_addr = url + feed_addr
|
||||||
|
address = Url.join_url(url, feed.xpath('@href')[0])
|
||||||
|
if address not in urls:
|
||||||
|
urls.extend([address])
|
||||||
|
# breakpoint()
|
||||||
|
# print("feed_mode_auto_discovery")
|
||||||
|
return urls
|
||||||
|
|
||||||
|
|
||||||
|
# TODO Segregate function into function that returns
|
||||||
|
# URLs (string) and Feeds (dict) and function that
|
||||||
|
# composes text message (string).
|
||||||
|
# Maybe that's not necessary.
|
||||||
|
async def process_feed_selection(url, urls):
|
||||||
|
feeds = {}
|
||||||
|
for i in urls:
|
||||||
|
result = await fetch.http(i)
|
||||||
|
if not result['error']:
|
||||||
|
document = result['content']
|
||||||
|
status_code = result['status_code']
|
||||||
|
if status_code == 200: # NOTE This line might be redundant
|
||||||
|
try:
|
||||||
|
feeds[i] = [parse(document)]
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
message = (
|
||||||
|
"Web feeds found for {}\n\n```\n"
|
||||||
|
).format(url)
|
||||||
|
urls = []
|
||||||
|
for feed_url in feeds:
|
||||||
|
# try:
|
||||||
|
# res = await fetch.http(feed)
|
||||||
|
# except:
|
||||||
|
# continue
|
||||||
|
feed_name = None
|
||||||
|
if "title" in feeds[feed_url][0]["feed"].keys():
|
||||||
|
feed_name = feeds[feed_url][0].feed.title
|
||||||
|
feed_name = feed_name if feed_name else "Untitled"
|
||||||
|
# feed_name = feed_name if feed_name else urlsplit(feed_url).netloc
|
||||||
|
# AttributeError: 'str' object has no attribute 'entries'
|
||||||
|
if "entries" in feeds[feed_url][0].keys():
|
||||||
|
feed_amnt = feeds[feed_url][0].entries
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
if feed_amnt:
|
||||||
|
# NOTE Because there could be many false positives
|
||||||
|
# which are revealed in second phase of scan, we
|
||||||
|
# could end with a single feed, which would be
|
||||||
|
# listed instead of fetched, so feed_url_mark is
|
||||||
|
# utilized in order to make fetch possible.
|
||||||
|
# NOTE feed_url_mark was a variable which stored
|
||||||
|
# single URL (probably first accepted as valid)
|
||||||
|
# in order to get an indication whether a single
|
||||||
|
# URL has been fetched, so that the receiving
|
||||||
|
# function will scan that single URL instead of
|
||||||
|
# listing it as a message.
|
||||||
|
url = {'link' : feed_url,
|
||||||
|
'index' : None,
|
||||||
|
'name' : feed_name,
|
||||||
|
'code' : status_code,
|
||||||
|
'error' : False,
|
||||||
|
'exist' : None}
|
||||||
|
urls.extend([url])
|
||||||
|
count = len(urls)
|
||||||
|
if count > 1:
|
||||||
|
result = urls
|
||||||
|
elif count:
|
||||||
|
result = urls[0]
|
||||||
|
else:
|
||||||
|
result = None
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# def get_discovered_feeds(url, urls):
|
||||||
|
# message = (
|
||||||
|
# "Found {} web feeds:\n\n```\n"
|
||||||
|
# ).format(len(urls))
|
||||||
|
# if len(urls) > 1:
|
||||||
|
# for urls in urls:
|
||||||
|
# message += (
|
||||||
|
# "Title : {}\n"
|
||||||
|
# "Link : {}\n"
|
||||||
|
# "\n"
|
||||||
|
# ).format(url, url.title)
|
||||||
|
# message += (
|
||||||
|
# "```\nThe above feeds were extracted from\n{}"
|
||||||
|
# ).format(url)
|
||||||
|
# elif len(urls) > 0:
|
||||||
|
# result = urls
|
||||||
|
# else:
|
||||||
|
# message = (
|
||||||
|
# "No feeds were found for {}"
|
||||||
|
# ).format(url)
|
||||||
|
# return result
|
||||||
|
|
||||||
|
|
||||||
|
# Test module
|
||||||
|
# TODO ModuleNotFoundError: No module named 'slixfeed'
|
||||||
|
# import slixfeed.fetch as fetch
|
||||||
|
# from slixfeed.action import is_feed, process_feed_selection
|
||||||
|
|
||||||
|
# async def start(url):
|
||||||
|
# while True:
|
||||||
|
# result = await fetch.http(url)
|
||||||
|
# document = result[0]
|
||||||
|
# status = result[1]
|
||||||
|
# if document:
|
||||||
|
# feed = parse(document)
|
||||||
|
# if is_feed(feed):
|
||||||
|
# print(url)
|
||||||
|
# else:
|
||||||
|
# urls = await probe_page(
|
||||||
|
# url, document)
|
||||||
|
# if len(urls) > 1:
|
||||||
|
# await process_feed_selection(urls)
|
||||||
|
# elif urls:
|
||||||
|
# url = urls[0]
|
||||||
|
# else:
|
||||||
|
# response = (
|
||||||
|
# "> {}\nFailed to load URL. Reason: {}"
|
||||||
|
# ).format(url, status)
|
||||||
|
# break
|
||||||
|
# return response
|
||||||
|
|
||||||
|
# url = "https://www.smh.com.au/rssheadlines"
|
||||||
|
# start(url)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class FeedTask:
|
class FeedTask:
|
||||||
|
|
||||||
|
|
||||||
|
@ -921,7 +1356,7 @@ class Opml:
|
||||||
ETR.SubElement(head, "generator").text = "Slixfeed"
|
ETR.SubElement(head, "generator").text = "Slixfeed"
|
||||||
ETR.SubElement(head, "urlPublic").text = (
|
ETR.SubElement(head, "urlPublic").text = (
|
||||||
"https://slixfeed.woodpeckersnest.space/")
|
"https://slixfeed.woodpeckersnest.space/")
|
||||||
time_stamp = dt.current_time()
|
time_stamp = DateAndTime.current_time()
|
||||||
ETR.SubElement(head, "dateCreated").text = time_stamp
|
ETR.SubElement(head, "dateCreated").text = time_stamp
|
||||||
ETR.SubElement(head, "dateModified").text = time_stamp
|
ETR.SubElement(head, "dateModified").text = time_stamp
|
||||||
body = ETR.SubElement(root, "body")
|
body = ETR.SubElement(root, "body")
|
||||||
|
|
352
slixfeed/url.py
352
slixfeed/url.py
|
@ -1,352 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
FIXME
|
|
||||||
|
|
||||||
1) Do not handle base64
|
|
||||||
https://www.lilithsaintcrow.com/2024/02/love-anonymous/
|
|
||||||
data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAABaAAAAeAAQAAAAAQ6M16AAAAAnRSTlMAAHaTzTgAAAFmSURBVBgZ7cEBAQAAAIKg/q92SMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADgWE3LAAGyZmPPAAAAAElFTkSuQmCC
|
|
||||||
https://www.lilithsaintcrow.com/2024/02/love-anonymous//image/png;base64,iVBORw0KGgoAAAANSUhEUgAABaAAAAeAAQAAAAAQ6M16AAAAAnRSTlMAAHaTzTgAAAFmSURBVBgZ7cEBAQAAAIKg/q92SMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADgWE3LAAGyZmPPAAAAAElFTkSuQmCC
|
|
||||||
|
|
||||||
TODO
|
|
||||||
|
|
||||||
1) ActivityPub URL revealer activitypub_to_http.
|
|
||||||
|
|
||||||
2) SQLite preference "instance" for preferred instances.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from email.utils import parseaddr
|
|
||||||
import os
|
|
||||||
import random
|
|
||||||
import slixfeed.config as config
|
|
||||||
import slixfeed.fetch as fetch
|
|
||||||
from slixfeed.log import Logger
|
|
||||||
from urllib.parse import (
|
|
||||||
parse_qs,
|
|
||||||
urlencode,
|
|
||||||
urljoin,
|
|
||||||
# urlparse,
|
|
||||||
urlsplit,
|
|
||||||
urlunsplit
|
|
||||||
)
|
|
||||||
|
|
||||||
logger = Logger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
# NOTE
|
|
||||||
# hostname and protocol are listed as one in file proxies.toml.
|
|
||||||
# Perhaps a better practice would be to have them separated.
|
|
||||||
|
|
||||||
# NOTE
|
|
||||||
# File proxies.toml will remain as it is, in order to be
|
|
||||||
# coordinated with the dataset of project LibRedirect, even
|
|
||||||
# though rule-sets might be adopted (see )Privacy Redirect).
|
|
||||||
|
|
||||||
def get_hostname(url):
|
|
||||||
parted_url = urlsplit(url)
|
|
||||||
hostname = parted_url.netloc
|
|
||||||
if hostname.startswith('www.'): hostname = hostname.replace('www.', '')
|
|
||||||
return hostname
|
|
||||||
|
|
||||||
|
|
||||||
async def replace_hostname(url, url_type):
|
|
||||||
"""
|
|
||||||
Replace hostname.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
url : str
|
|
||||||
URL.
|
|
||||||
url_type : str
|
|
||||||
"feed" or "link".
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
url : str
|
|
||||||
URL.
|
|
||||||
"""
|
|
||||||
url_new = None
|
|
||||||
parted_url = urlsplit(url)
|
|
||||||
# protocol = parted_url.scheme
|
|
||||||
hostname = parted_url.netloc
|
|
||||||
hostname = hostname.replace('www.','')
|
|
||||||
pathname = parted_url.path
|
|
||||||
queries = parted_url.query
|
|
||||||
fragment = parted_url.fragment
|
|
||||||
proxies = config.open_config_file('proxies.toml')['proxies']
|
|
||||||
for proxy_name in proxies:
|
|
||||||
proxy = proxies[proxy_name]
|
|
||||||
if hostname in proxy['hostname'] and url_type in proxy['type']:
|
|
||||||
while not url_new:
|
|
||||||
print('>>>')
|
|
||||||
print(url_new)
|
|
||||||
proxy_type = 'clearnet'
|
|
||||||
proxy_list = proxy[proxy_type]
|
|
||||||
if len(proxy_list):
|
|
||||||
# proxy_list = proxies[proxy_name][proxy_type]
|
|
||||||
proxy_url = random.choice(proxy_list)
|
|
||||||
parted_proxy_url = urlsplit(proxy_url)
|
|
||||||
protocol_new = parted_proxy_url.scheme
|
|
||||||
hostname_new = parted_proxy_url.netloc
|
|
||||||
url_new = urlunsplit([protocol_new, hostname_new,
|
|
||||||
pathname, queries, fragment])
|
|
||||||
print(proxy_url)
|
|
||||||
print(url_new)
|
|
||||||
print('>>>')
|
|
||||||
response = await fetch.http(url_new)
|
|
||||||
if (response and
|
|
||||||
response['status_code'] == 200 and
|
|
||||||
# response.reason == 'OK' and
|
|
||||||
url_new.startswith(proxy_url)):
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
config_dir = config.get_default_config_directory()
|
|
||||||
proxies_obsolete_file = config_dir + '/proxies_obsolete.toml'
|
|
||||||
proxies_file = config_dir + '/proxies.toml'
|
|
||||||
if not os.path.isfile(proxies_obsolete_file):
|
|
||||||
config.create_skeleton(proxies_file)
|
|
||||||
config.backup_obsolete(proxies_obsolete_file,
|
|
||||||
proxy_name, proxy_type,
|
|
||||||
proxy_url)
|
|
||||||
try:
|
|
||||||
config.update_proxies(proxies_file, proxy_name,
|
|
||||||
proxy_type, proxy_url)
|
|
||||||
except ValueError as e:
|
|
||||||
logger.error([str(e), proxy_url])
|
|
||||||
url_new = None
|
|
||||||
else:
|
|
||||||
logger.warning('No proxy URLs for {}. '
|
|
||||||
'Please update proxies.toml'
|
|
||||||
.format(proxy_name))
|
|
||||||
url_new = url
|
|
||||||
break
|
|
||||||
return url_new
|
|
||||||
|
|
||||||
|
|
||||||
def remove_tracking_parameters(url):
|
|
||||||
"""
|
|
||||||
Remove queries with tracking parameters.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
url : str
|
|
||||||
URL.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
url : str
|
|
||||||
URL.
|
|
||||||
"""
|
|
||||||
if url.startswith('data:') and ';base64,' in url:
|
|
||||||
return url
|
|
||||||
parted_url = urlsplit(url)
|
|
||||||
protocol = parted_url.scheme
|
|
||||||
hostname = parted_url.netloc
|
|
||||||
pathname = parted_url.path
|
|
||||||
queries = parse_qs(parted_url.query)
|
|
||||||
fragment = parted_url.fragment
|
|
||||||
trackers = config.open_config_file('queries.toml')['trackers']
|
|
||||||
for tracker in trackers:
|
|
||||||
if tracker in queries: del queries[tracker]
|
|
||||||
queries_new = urlencode(queries, doseq=True)
|
|
||||||
url = urlunsplit([protocol, hostname, pathname, queries_new, fragment])
|
|
||||||
return url
|
|
||||||
|
|
||||||
|
|
||||||
def feed_to_http(url):
|
|
||||||
"""
|
|
||||||
Replace scheme FEED by HTTP.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
url : str
|
|
||||||
URL.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
new_url : str
|
|
||||||
URL.
|
|
||||||
"""
|
|
||||||
par_url = urlsplit(url)
|
|
||||||
new_url = urlunsplit(['http', par_url.netloc, par_url.path, par_url.query,
|
|
||||||
par_url.fragment])
|
|
||||||
return new_url
|
|
||||||
|
|
||||||
|
|
||||||
def check_xmpp_uri(uri):
|
|
||||||
"""
|
|
||||||
Check validity of XMPP URI.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
uri : str
|
|
||||||
URI.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
jid : str
|
|
||||||
JID or None.
|
|
||||||
"""
|
|
||||||
jid = urlsplit(uri).path
|
|
||||||
if parseaddr(jid)[1] != jid:
|
|
||||||
jid = False
|
|
||||||
return jid
|
|
||||||
|
|
||||||
|
|
||||||
# NOTE Read the documentation
|
|
||||||
# https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin
|
|
||||||
def complete_url(source, link):
|
|
||||||
"""
|
|
||||||
Check if URL is pathname and complete it into URL.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
source : str
|
|
||||||
Feed URL.
|
|
||||||
link : str
|
|
||||||
Link URL or pathname.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
str
|
|
||||||
URL.
|
|
||||||
"""
|
|
||||||
if link.startswith('data:') and ';base64,' in link:
|
|
||||||
return link
|
|
||||||
if link.startswith('www.'):
|
|
||||||
return 'http://' + link
|
|
||||||
parted_link = urlsplit(link)
|
|
||||||
parted_feed = urlsplit(source)
|
|
||||||
if parted_link.scheme == 'magnet' and parted_link.query:
|
|
||||||
return link
|
|
||||||
if parted_link.scheme and parted_link.netloc:
|
|
||||||
return link
|
|
||||||
if link.startswith('//'):
|
|
||||||
if parted_link.netloc and parted_link.path:
|
|
||||||
new_link = urlunsplit([parted_feed.scheme, parted_link.netloc,
|
|
||||||
parted_link.path, parted_link.query,
|
|
||||||
parted_link.fragment])
|
|
||||||
elif link.startswith('/'):
|
|
||||||
new_link = urlunsplit([parted_feed.scheme, parted_feed.netloc,
|
|
||||||
parted_link.path, parted_link.query,
|
|
||||||
parted_link.fragment])
|
|
||||||
elif link.startswith('../'):
|
|
||||||
pathlink = parted_link.path.split('/')
|
|
||||||
pathfeed = parted_feed.path.split('/')
|
|
||||||
for i in pathlink:
|
|
||||||
if i == '..':
|
|
||||||
if pathlink.index('..') == 0:
|
|
||||||
pathfeed.pop()
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
while pathlink.count('..'):
|
|
||||||
if pathlink.index('..') == 0:
|
|
||||||
pathlink.remove('..')
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
pathlink = '/'.join(pathlink)
|
|
||||||
pathfeed.extend([pathlink])
|
|
||||||
new_link = urlunsplit([parted_feed.scheme, parted_feed.netloc,
|
|
||||||
'/'.join(pathfeed), parted_link.query,
|
|
||||||
parted_link.fragment])
|
|
||||||
else:
|
|
||||||
pathlink = parted_link.path.split('/')
|
|
||||||
pathfeed = parted_feed.path.split('/')
|
|
||||||
if link.startswith('./'):
|
|
||||||
pathlink.remove('.')
|
|
||||||
if not source.endswith('/'):
|
|
||||||
pathfeed.pop()
|
|
||||||
pathlink = '/'.join(pathlink)
|
|
||||||
pathfeed.extend([pathlink])
|
|
||||||
new_link = urlunsplit([parted_feed.scheme, parted_feed.netloc,
|
|
||||||
'/'.join(pathfeed), parted_link.query,
|
|
||||||
parted_link.fragment])
|
|
||||||
return new_link
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# TODO
|
|
||||||
|
|
||||||
# Feed https://www.ocaml.org/feed.xml
|
|
||||||
# Link %20https://frama-c.com/fc-versions/cobalt.html%20
|
|
||||||
|
|
||||||
# FIXME
|
|
||||||
|
|
||||||
# Feed https://cyber.dabamos.de/blog/feed.rss
|
|
||||||
# Link https://cyber.dabamos.de/blog/#article-2022-07-15
|
|
||||||
|
|
||||||
def join_url(source, link):
|
|
||||||
"""
|
|
||||||
Join base URL with given pathname.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
source : str
|
|
||||||
Feed URL.
|
|
||||||
link : str
|
|
||||||
Link URL or pathname.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
str
|
|
||||||
URL.
|
|
||||||
"""
|
|
||||||
if link.startswith('data:') and ';base64,' in link:
|
|
||||||
return link
|
|
||||||
if link.startswith('www.'):
|
|
||||||
new_link = 'http://' + link
|
|
||||||
elif link.startswith('%20') and link.endswith('%20'):
|
|
||||||
old_link = link.split('%20')
|
|
||||||
del old_link[0]
|
|
||||||
old_link.pop()
|
|
||||||
new_link = ''.join(old_link)
|
|
||||||
else:
|
|
||||||
new_link = urljoin(source, link)
|
|
||||||
return new_link
|
|
||||||
|
|
||||||
|
|
||||||
def trim_url(url):
|
|
||||||
"""
|
|
||||||
Check URL pathname for double slash.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
url : str
|
|
||||||
URL.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
url : str
|
|
||||||
URL.
|
|
||||||
"""
|
|
||||||
if url.startswith('data:') and ';base64,' in url:
|
|
||||||
return url
|
|
||||||
parted_url = urlsplit(url)
|
|
||||||
protocol = parted_url.scheme
|
|
||||||
hostname = parted_url.netloc
|
|
||||||
pathname = parted_url.path
|
|
||||||
queries = parted_url.query
|
|
||||||
fragment = parted_url.fragment
|
|
||||||
while '//' in pathname:
|
|
||||||
pathname = pathname.replace('//', '/')
|
|
||||||
url = urlunsplit([protocol, hostname, pathname, queries, fragment])
|
|
||||||
return url
|
|
||||||
|
|
||||||
|
|
||||||
def activitypub_to_http(namespace):
|
|
||||||
"""
|
|
||||||
Replace ActivityPub namespace by HTTP.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
namespace : str
|
|
||||||
Namespace.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
new_url : str
|
|
||||||
URL.
|
|
||||||
"""
|
|
|
@ -39,16 +39,27 @@ TODO
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
from email.utils import parseaddr
|
||||||
|
from dateutil.parser import parse
|
||||||
|
from email.utils import parsedate, parsedate_to_datetime
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import os
|
||||||
|
import random
|
||||||
import slixfeed.config as config
|
import slixfeed.config as config
|
||||||
from slixfeed.config import Config
|
|
||||||
from lxml import etree, html
|
from lxml import etree, html
|
||||||
import slixfeed.dt as dt
|
import slixfeed.dt as dt
|
||||||
import slixfeed.fetch as fetch
|
import slixfeed.fetch as fetch
|
||||||
from slixfeed.log import Logger
|
from slixfeed.log import Logger
|
||||||
import slixfeed.sqlite as sqlite
|
|
||||||
from slixfeed.url import join_url, complete_url
|
|
||||||
import sys
|
import sys
|
||||||
|
from urllib.parse import (
|
||||||
|
parse_qs,
|
||||||
|
urlencode,
|
||||||
|
urljoin,
|
||||||
|
# urlparse,
|
||||||
|
urlsplit,
|
||||||
|
urlunsplit
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import tomllib
|
import tomllib
|
||||||
|
@ -58,6 +69,115 @@ except:
|
||||||
logger = Logger(__name__)
|
logger = Logger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class DateAndTime:
|
||||||
|
|
||||||
|
#https://feedparser.readthedocs.io/en/latest/date-parsing.html
|
||||||
|
|
||||||
|
def now():
|
||||||
|
"""
|
||||||
|
ISO 8601 Timestamp.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
date : ???
|
||||||
|
ISO 8601 Timestamp.
|
||||||
|
"""
|
||||||
|
date = datetime.now().isoformat()
|
||||||
|
return date
|
||||||
|
|
||||||
|
|
||||||
|
def convert_struct_time_to_iso8601(struct_time):
|
||||||
|
date = datetime(*struct_time[:6])
|
||||||
|
date = date.isoformat()
|
||||||
|
return date
|
||||||
|
|
||||||
|
|
||||||
|
def current_date():
|
||||||
|
"""
|
||||||
|
Print MM DD, YYYY (Weekday Time) timestamp.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
date : str
|
||||||
|
MM DD, YYYY (Weekday Time) timestamp.
|
||||||
|
"""
|
||||||
|
now = datetime.now()
|
||||||
|
time = now.strftime("%B %d, %Y (%A %T)")
|
||||||
|
return time
|
||||||
|
|
||||||
|
|
||||||
|
def current_time():
|
||||||
|
"""
|
||||||
|
Print HH:MM:SS timestamp.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
date : str
|
||||||
|
HH:MM:SS timestamp.
|
||||||
|
"""
|
||||||
|
now = datetime.now()
|
||||||
|
time = now.strftime("%H:%M:%S")
|
||||||
|
return time
|
||||||
|
|
||||||
|
|
||||||
|
def timestamp():
|
||||||
|
"""
|
||||||
|
Print time stamp to be used in filename.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
formatted_time : str
|
||||||
|
%Y%m%d-%H%M%S timestamp.
|
||||||
|
"""
|
||||||
|
now = datetime.now()
|
||||||
|
formatted_time = now.strftime("%Y%m%d-%H%M%S")
|
||||||
|
return formatted_time
|
||||||
|
|
||||||
|
|
||||||
|
def validate(date):
|
||||||
|
"""
|
||||||
|
Validate date format.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
date : str
|
||||||
|
Timestamp.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
date : str
|
||||||
|
Timestamp.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
parse(date)
|
||||||
|
except:
|
||||||
|
date = DateAndTime.now()
|
||||||
|
return date
|
||||||
|
|
||||||
|
|
||||||
|
def rfc2822_to_iso8601(date):
|
||||||
|
"""
|
||||||
|
Convert RFC 2822 into ISO 8601.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
date : str
|
||||||
|
RFC 2822 Timestamp.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
date : str
|
||||||
|
ISO 8601 Timestamp.
|
||||||
|
"""
|
||||||
|
if parsedate(date):
|
||||||
|
try:
|
||||||
|
date = parsedate_to_datetime(date)
|
||||||
|
date = date.isoformat()
|
||||||
|
except:
|
||||||
|
date = DateAndTime.now()
|
||||||
|
return date
|
||||||
|
|
||||||
|
|
||||||
class Documentation:
|
class Documentation:
|
||||||
|
|
||||||
|
|
||||||
|
@ -120,7 +240,7 @@ class Html:
|
||||||
if len(images):
|
if len(images):
|
||||||
image = images[0]
|
image = images[0]
|
||||||
image = str(image)
|
image = str(image)
|
||||||
image_url = complete_url(url, image)
|
image_url = Url.complete_url(url, image)
|
||||||
return image_url
|
return image_url
|
||||||
|
|
||||||
|
|
||||||
|
@ -224,6 +344,343 @@ class Task:
|
||||||
.format(task, jid_bare))
|
.format(task, jid_bare))
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
FIXME
|
||||||
|
|
||||||
|
1) Do not handle base64
|
||||||
|
https://www.lilithsaintcrow.com/2024/02/love-anonymous/
|
||||||
|
data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAABaAAAAeAAQAAAAAQ6M16AAAAAnRSTlMAAHaTzTgAAAFmSURBVBgZ7cEBAQAAAIKg/q92SMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADgWE3LAAGyZmPPAAAAAElFTkSuQmCC
|
||||||
|
https://www.lilithsaintcrow.com/2024/02/love-anonymous//image/png;base64,iVBORw0KGgoAAAANSUhEUgAABaAAAAeAAQAAAAAQ6M16AAAAAnRSTlMAAHaTzTgAAAFmSURBVBgZ7cEBAQAAAIKg/q92SMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADgWE3LAAGyZmPPAAAAAElFTkSuQmCC
|
||||||
|
|
||||||
|
TODO
|
||||||
|
|
||||||
|
1) ActivityPub URL revealer activitypub_to_http.
|
||||||
|
|
||||||
|
2) SQLite preference "instance" for preferred instances.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class Url:
|
||||||
|
|
||||||
|
# NOTE
|
||||||
|
# hostname and protocol are listed as one in file proxies.toml.
|
||||||
|
# Perhaps a better practice would be to have them separated.
|
||||||
|
|
||||||
|
# NOTE
|
||||||
|
# File proxies.toml will remain as it is, in order to be
|
||||||
|
# coordinated with the dataset of project LibRedirect, even
|
||||||
|
# though rule-sets might be adopted (see )Privacy Redirect).
|
||||||
|
|
||||||
|
def get_hostname(url):
|
||||||
|
parted_url = urlsplit(url)
|
||||||
|
hostname = parted_url.netloc
|
||||||
|
if hostname.startswith('www.'): hostname = hostname.replace('www.', '')
|
||||||
|
return hostname
|
||||||
|
|
||||||
|
|
||||||
|
async def replace_hostname(url, url_type):
|
||||||
|
"""
|
||||||
|
Replace hostname.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
url : str
|
||||||
|
URL.
|
||||||
|
url_type : str
|
||||||
|
"feed" or "link".
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
url : str
|
||||||
|
URL.
|
||||||
|
"""
|
||||||
|
url_new = None
|
||||||
|
parted_url = urlsplit(url)
|
||||||
|
# protocol = parted_url.scheme
|
||||||
|
hostname = parted_url.netloc
|
||||||
|
hostname = hostname.replace('www.','')
|
||||||
|
pathname = parted_url.path
|
||||||
|
queries = parted_url.query
|
||||||
|
fragment = parted_url.fragment
|
||||||
|
proxies = config.open_config_file('proxies.toml')['proxies']
|
||||||
|
for proxy_name in proxies:
|
||||||
|
proxy = proxies[proxy_name]
|
||||||
|
if hostname in proxy['hostname'] and url_type in proxy['type']:
|
||||||
|
while not url_new:
|
||||||
|
print('>>>')
|
||||||
|
print(url_new)
|
||||||
|
proxy_type = 'clearnet'
|
||||||
|
proxy_list = proxy[proxy_type]
|
||||||
|
if len(proxy_list):
|
||||||
|
# proxy_list = proxies[proxy_name][proxy_type]
|
||||||
|
proxy_url = random.choice(proxy_list)
|
||||||
|
parted_proxy_url = urlsplit(proxy_url)
|
||||||
|
protocol_new = parted_proxy_url.scheme
|
||||||
|
hostname_new = parted_proxy_url.netloc
|
||||||
|
url_new = urlunsplit([protocol_new, hostname_new,
|
||||||
|
pathname, queries, fragment])
|
||||||
|
print(proxy_url)
|
||||||
|
print(url_new)
|
||||||
|
print('>>>')
|
||||||
|
response = await fetch.http(url_new)
|
||||||
|
if (response and
|
||||||
|
response['status_code'] == 200 and
|
||||||
|
# response.reason == 'OK' and
|
||||||
|
url_new.startswith(proxy_url)):
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
config_dir = config.get_default_config_directory()
|
||||||
|
proxies_obsolete_file = config_dir + '/proxies_obsolete.toml'
|
||||||
|
proxies_file = config_dir + '/proxies.toml'
|
||||||
|
if not os.path.isfile(proxies_obsolete_file):
|
||||||
|
config.create_skeleton(proxies_file)
|
||||||
|
config.backup_obsolete(proxies_obsolete_file,
|
||||||
|
proxy_name, proxy_type,
|
||||||
|
proxy_url)
|
||||||
|
try:
|
||||||
|
config.update_proxies(proxies_file, proxy_name,
|
||||||
|
proxy_type, proxy_url)
|
||||||
|
except ValueError as e:
|
||||||
|
logger.error([str(e), proxy_url])
|
||||||
|
url_new = None
|
||||||
|
else:
|
||||||
|
logger.warning('No proxy URLs for {}. '
|
||||||
|
'Please update proxies.toml'
|
||||||
|
.format(proxy_name))
|
||||||
|
url_new = url
|
||||||
|
break
|
||||||
|
return url_new
|
||||||
|
|
||||||
|
|
||||||
|
def remove_tracking_parameters(url):
|
||||||
|
"""
|
||||||
|
Remove queries with tracking parameters.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
url : str
|
||||||
|
URL.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
url : str
|
||||||
|
URL.
|
||||||
|
"""
|
||||||
|
if url.startswith('data:') and ';base64,' in url:
|
||||||
|
return url
|
||||||
|
parted_url = urlsplit(url)
|
||||||
|
protocol = parted_url.scheme
|
||||||
|
hostname = parted_url.netloc
|
||||||
|
pathname = parted_url.path
|
||||||
|
queries = parse_qs(parted_url.query)
|
||||||
|
fragment = parted_url.fragment
|
||||||
|
trackers = config.open_config_file('queries.toml')['trackers']
|
||||||
|
for tracker in trackers:
|
||||||
|
if tracker in queries: del queries[tracker]
|
||||||
|
queries_new = urlencode(queries, doseq=True)
|
||||||
|
url = urlunsplit([protocol, hostname, pathname, queries_new, fragment])
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
def feed_to_http(url):
|
||||||
|
"""
|
||||||
|
Replace scheme FEED by HTTP.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
url : str
|
||||||
|
URL.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
new_url : str
|
||||||
|
URL.
|
||||||
|
"""
|
||||||
|
par_url = urlsplit(url)
|
||||||
|
new_url = urlunsplit(['http', par_url.netloc, par_url.path, par_url.query,
|
||||||
|
par_url.fragment])
|
||||||
|
return new_url
|
||||||
|
|
||||||
|
|
||||||
|
def check_xmpp_uri(uri):
|
||||||
|
"""
|
||||||
|
Check validity of XMPP URI.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
uri : str
|
||||||
|
URI.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
jid : str
|
||||||
|
JID or None.
|
||||||
|
"""
|
||||||
|
jid = urlsplit(uri).path
|
||||||
|
if parseaddr(jid)[1] != jid:
|
||||||
|
jid = False
|
||||||
|
return jid
|
||||||
|
|
||||||
|
|
||||||
|
# NOTE Read the documentation
|
||||||
|
# https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin
|
||||||
|
def complete_url(source, link):
|
||||||
|
"""
|
||||||
|
Check if URL is pathname and complete it into URL.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
source : str
|
||||||
|
Feed URL.
|
||||||
|
link : str
|
||||||
|
Link URL or pathname.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
str
|
||||||
|
URL.
|
||||||
|
"""
|
||||||
|
if link.startswith('data:') and ';base64,' in link:
|
||||||
|
return link
|
||||||
|
if link.startswith('www.'):
|
||||||
|
return 'http://' + link
|
||||||
|
parted_link = urlsplit(link)
|
||||||
|
parted_feed = urlsplit(source)
|
||||||
|
if parted_link.scheme == 'magnet' and parted_link.query:
|
||||||
|
return link
|
||||||
|
if parted_link.scheme and parted_link.netloc:
|
||||||
|
return link
|
||||||
|
if link.startswith('//'):
|
||||||
|
if parted_link.netloc and parted_link.path:
|
||||||
|
new_link = urlunsplit([parted_feed.scheme, parted_link.netloc,
|
||||||
|
parted_link.path, parted_link.query,
|
||||||
|
parted_link.fragment])
|
||||||
|
elif link.startswith('/'):
|
||||||
|
new_link = urlunsplit([parted_feed.scheme, parted_feed.netloc,
|
||||||
|
parted_link.path, parted_link.query,
|
||||||
|
parted_link.fragment])
|
||||||
|
elif link.startswith('../'):
|
||||||
|
pathlink = parted_link.path.split('/')
|
||||||
|
pathfeed = parted_feed.path.split('/')
|
||||||
|
for i in pathlink:
|
||||||
|
if i == '..':
|
||||||
|
if pathlink.index('..') == 0:
|
||||||
|
pathfeed.pop()
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
while pathlink.count('..'):
|
||||||
|
if pathlink.index('..') == 0:
|
||||||
|
pathlink.remove('..')
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
pathlink = '/'.join(pathlink)
|
||||||
|
pathfeed.extend([pathlink])
|
||||||
|
new_link = urlunsplit([parted_feed.scheme, parted_feed.netloc,
|
||||||
|
'/'.join(pathfeed), parted_link.query,
|
||||||
|
parted_link.fragment])
|
||||||
|
else:
|
||||||
|
pathlink = parted_link.path.split('/')
|
||||||
|
pathfeed = parted_feed.path.split('/')
|
||||||
|
if link.startswith('./'):
|
||||||
|
pathlink.remove('.')
|
||||||
|
if not source.endswith('/'):
|
||||||
|
pathfeed.pop()
|
||||||
|
pathlink = '/'.join(pathlink)
|
||||||
|
pathfeed.extend([pathlink])
|
||||||
|
new_link = urlunsplit([parted_feed.scheme, parted_feed.netloc,
|
||||||
|
'/'.join(pathfeed), parted_link.query,
|
||||||
|
parted_link.fragment])
|
||||||
|
return new_link
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# TODO
|
||||||
|
|
||||||
|
# Feed https://www.ocaml.org/feed.xml
|
||||||
|
# Link %20https://frama-c.com/fc-versions/cobalt.html%20
|
||||||
|
|
||||||
|
# FIXME
|
||||||
|
|
||||||
|
# Feed https://cyber.dabamos.de/blog/feed.rss
|
||||||
|
# Link https://cyber.dabamos.de/blog/#article-2022-07-15
|
||||||
|
|
||||||
|
def join_url(source, link):
|
||||||
|
"""
|
||||||
|
Join base URL with given pathname.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
source : str
|
||||||
|
Feed URL.
|
||||||
|
link : str
|
||||||
|
Link URL or pathname.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
str
|
||||||
|
URL.
|
||||||
|
"""
|
||||||
|
if link.startswith('data:') and ';base64,' in link:
|
||||||
|
return link
|
||||||
|
if link.startswith('www.'):
|
||||||
|
new_link = 'http://' + link
|
||||||
|
elif link.startswith('%20') and link.endswith('%20'):
|
||||||
|
old_link = link.split('%20')
|
||||||
|
del old_link[0]
|
||||||
|
old_link.pop()
|
||||||
|
new_link = ''.join(old_link)
|
||||||
|
else:
|
||||||
|
new_link = urljoin(source, link)
|
||||||
|
return new_link
|
||||||
|
|
||||||
|
|
||||||
|
def trim_url(url):
|
||||||
|
"""
|
||||||
|
Check URL pathname for double slash.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
url : str
|
||||||
|
URL.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
url : str
|
||||||
|
URL.
|
||||||
|
"""
|
||||||
|
if url.startswith('data:') and ';base64,' in url:
|
||||||
|
return url
|
||||||
|
parted_url = urlsplit(url)
|
||||||
|
protocol = parted_url.scheme
|
||||||
|
hostname = parted_url.netloc
|
||||||
|
pathname = parted_url.path
|
||||||
|
queries = parted_url.query
|
||||||
|
fragment = parted_url.fragment
|
||||||
|
while '//' in pathname:
|
||||||
|
pathname = pathname.replace('//', '/')
|
||||||
|
url = urlunsplit([protocol, hostname, pathname, queries, fragment])
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
def activitypub_to_http(namespace):
|
||||||
|
"""
|
||||||
|
Replace ActivityPub namespace by HTTP.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
namespace : str
|
||||||
|
Namespace.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
new_url : str
|
||||||
|
URL.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class Utilities:
|
class Utilities:
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,2 +1,2 @@
|
||||||
__version__ = '0.1.81'
|
__version__ = '0.1.82'
|
||||||
__version_info__ = (0, 1, 81)
|
__version_info__ = (0, 1, 82)
|
||||||
|
|
|
@ -29,16 +29,11 @@ import slixfeed.config as config
|
||||||
from slixfeed.config import Config
|
from slixfeed.config import Config
|
||||||
from slixfeed.log import Logger
|
from slixfeed.log import Logger
|
||||||
import slixfeed.sqlite as sqlite
|
import slixfeed.sqlite as sqlite
|
||||||
from slixfeed.url import (
|
|
||||||
remove_tracking_parameters,
|
|
||||||
replace_hostname,
|
|
||||||
)
|
|
||||||
from slixfeed.syndication import FeedTask
|
from slixfeed.syndication import FeedTask
|
||||||
from slixfeed.utilities import Documentation, Html, MD, Task
|
from slixfeed.utilities import Documentation, Html, MD, Task, Url
|
||||||
from slixfeed.xmpp.commands import XmppCommands
|
from slixfeed.xmpp.commands import XmppCommands
|
||||||
from slixfeed.xmpp.message import XmppMessage
|
from slixfeed.xmpp.message import XmppMessage
|
||||||
from slixfeed.xmpp.presence import XmppPresence
|
from slixfeed.xmpp.presence import XmppPresence
|
||||||
from slixfeed.xmpp.privilege import is_operator, is_moderator
|
|
||||||
from slixfeed.xmpp.status import XmppStatusTask
|
from slixfeed.xmpp.status import XmppStatusTask
|
||||||
from slixfeed.xmpp.upload import XmppUpload
|
from slixfeed.xmpp.upload import XmppUpload
|
||||||
from slixfeed.xmpp.utilities import XmppUtilities
|
from slixfeed.xmpp.utilities import XmppUtilities
|
||||||
|
@ -89,7 +84,7 @@ class XmppChat:
|
||||||
if (message['muc']['nick'] == self.alias):
|
if (message['muc']['nick'] == self.alias):
|
||||||
return
|
return
|
||||||
jid_full = str(message['from'])
|
jid_full = str(message['from'])
|
||||||
if not is_moderator(self, jid_bare, jid_full):
|
if not XmppUtilities.is_moderator(self, jid_bare, jid_full):
|
||||||
return
|
return
|
||||||
|
|
||||||
if message['type'] == 'groupchat':
|
if message['type'] == 'groupchat':
|
||||||
|
@ -115,7 +110,7 @@ class XmppChat:
|
||||||
# return
|
# return
|
||||||
# approved = False
|
# approved = False
|
||||||
jid_full = str(message['from'])
|
jid_full = str(message['from'])
|
||||||
if not is_moderator(self, jid_bare, jid_full):
|
if not XmppUtilities.is_moderator(self, jid_bare, jid_full):
|
||||||
return
|
return
|
||||||
# if role == 'moderator':
|
# if role == 'moderator':
|
||||||
# approved = True
|
# approved = True
|
||||||
|
@ -257,7 +252,7 @@ class XmppChat:
|
||||||
response = 'Current value for archive: '
|
response = 'Current value for archive: '
|
||||||
response += XmppCommands.get_archive(self, jid_bare)
|
response += XmppCommands.get_archive(self, jid_bare)
|
||||||
case _ if command_lowercase.startswith('bookmark +'):
|
case _ if command_lowercase.startswith('bookmark +'):
|
||||||
if is_operator(self, jid_bare):
|
if XmppUtilities.is_operator(self, jid_bare):
|
||||||
muc_jid = command[11:]
|
muc_jid = command[11:]
|
||||||
response = await XmppCommands.bookmark_add(
|
response = await XmppCommands.bookmark_add(
|
||||||
self, muc_jid)
|
self, muc_jid)
|
||||||
|
@ -265,7 +260,7 @@ class XmppChat:
|
||||||
response = ('This action is restricted. '
|
response = ('This action is restricted. '
|
||||||
'Type: adding bookmarks.')
|
'Type: adding bookmarks.')
|
||||||
case _ if command_lowercase.startswith('bookmark -'):
|
case _ if command_lowercase.startswith('bookmark -'):
|
||||||
if is_operator(self, jid_bare):
|
if XmppUtilities.is_operator(self, jid_bare):
|
||||||
muc_jid = command[11:]
|
muc_jid = command[11:]
|
||||||
response = await XmppCommands.bookmark_del(
|
response = await XmppCommands.bookmark_del(
|
||||||
self, muc_jid)
|
self, muc_jid)
|
||||||
|
@ -273,7 +268,7 @@ class XmppChat:
|
||||||
response = ('This action is restricted. '
|
response = ('This action is restricted. '
|
||||||
'Type: removing bookmarks.')
|
'Type: removing bookmarks.')
|
||||||
case 'bookmarks':
|
case 'bookmarks':
|
||||||
if is_operator(self, jid_bare):
|
if XmppUtilities.is_operator(self, jid_bare):
|
||||||
response = await XmppCommands.print_bookmarks(self)
|
response = await XmppCommands.print_bookmarks(self)
|
||||||
else:
|
else:
|
||||||
response = ('This action is restricted. '
|
response = ('This action is restricted. '
|
||||||
|
@ -333,7 +328,7 @@ class XmppChat:
|
||||||
XmppPresence.send(self, jid_bare, status_message,
|
XmppPresence.send(self, jid_bare, status_message,
|
||||||
status_type=status_type)
|
status_type=status_type)
|
||||||
filename, response = XmppCommands.export_feeds(
|
filename, response = XmppCommands.export_feeds(
|
||||||
self, jid_bare, ext)
|
jid_bare, ext)
|
||||||
url = await XmppUpload.start(self, jid_bare, filename)
|
url = await XmppUpload.start(self, jid_bare, filename)
|
||||||
# response = (
|
# response = (
|
||||||
# 'Feeds exported successfully to {}.\n{}'
|
# 'Feeds exported successfully to {}.\n{}'
|
||||||
|
@ -388,7 +383,7 @@ class XmppChat:
|
||||||
response = await XmppCommands.pubsub_list(self, jid)
|
response = await XmppCommands.pubsub_list(self, jid)
|
||||||
response += '```'
|
response += '```'
|
||||||
case _ if command_lowercase.startswith('pubsub send'):
|
case _ if command_lowercase.startswith('pubsub send'):
|
||||||
if is_operator(self, jid_bare):
|
if XmppUtilities.is_operator(self, jid_bare):
|
||||||
info = command[12:]
|
info = command[12:]
|
||||||
info = info.split(' ')
|
info = info.split(' ')
|
||||||
jid = info[0]
|
jid = info[0]
|
||||||
|
@ -461,7 +456,7 @@ class XmppChat:
|
||||||
await XmppChatAction.send_unread_items(self, jid_bare, num)
|
await XmppChatAction.send_unread_items(self, jid_bare, num)
|
||||||
XmppStatusTask.restart_task(self, jid_bare)
|
XmppStatusTask.restart_task(self, jid_bare)
|
||||||
case _ if command_lowercase.startswith('node delete'):
|
case _ if command_lowercase.startswith('node delete'):
|
||||||
if is_operator(self, jid_bare):
|
if XmppUtilities.is_operator(self, jid_bare):
|
||||||
info = command[12:]
|
info = command[12:]
|
||||||
info = info.split(' ')
|
info = info.split(' ')
|
||||||
response = XmppCommands.node_delete(self, info)
|
response = XmppCommands.node_delete(self, info)
|
||||||
|
@ -469,7 +464,7 @@ class XmppChat:
|
||||||
response = ('This action is restricted. '
|
response = ('This action is restricted. '
|
||||||
'Type: sending news to PubSub.')
|
'Type: sending news to PubSub.')
|
||||||
case _ if command_lowercase.startswith('node purge'):
|
case _ if command_lowercase.startswith('node purge'):
|
||||||
if is_operator(self, jid_bare):
|
if XmppUtilities.is_operator(self, jid_bare):
|
||||||
info = command[11:]
|
info = command[11:]
|
||||||
info = info.split(' ')
|
info = info.split(' ')
|
||||||
response = XmppCommands.node_purge(self, info)
|
response = XmppCommands.node_purge(self, info)
|
||||||
|
@ -770,8 +765,8 @@ class XmppChatAction:
|
||||||
else:
|
else:
|
||||||
summary = '*** No summary ***'
|
summary = '*** No summary ***'
|
||||||
link = result[2]
|
link = result[2]
|
||||||
link = remove_tracking_parameters(link)
|
link = Url.remove_tracking_parameters(link)
|
||||||
link = await replace_hostname(link, "link") or link
|
link = await Url.replace_hostname(link, "link") or link
|
||||||
feed_id = result[4]
|
feed_id = result[4]
|
||||||
# news_item = ("\n{}\n{}\n{} [{}]\n").format(str(title), str(link),
|
# news_item = ("\n{}\n{}\n{} [{}]\n").format(str(title), str(link),
|
||||||
# str(feed_title), str(ix))
|
# str(feed_title), str(ix))
|
||||||
|
|
|
@ -44,14 +44,11 @@ import slixmpp
|
||||||
|
|
||||||
import slixfeed.config as config
|
import slixfeed.config as config
|
||||||
from slixfeed.config import Config
|
from slixfeed.config import Config
|
||||||
import slixfeed.crawl as crawl
|
|
||||||
import slixfeed.dt as dt
|
|
||||||
import slixfeed.fetch as fetch
|
import slixfeed.fetch as fetch
|
||||||
from slixfeed.log import Logger
|
from slixfeed.log import Logger
|
||||||
import slixfeed.sqlite as sqlite
|
import slixfeed.sqlite as sqlite
|
||||||
from slixfeed.syndication import Feed, FeedTask, Opml
|
from slixfeed.syndication import Feed, FeedDiscovery, FeedTask, Opml
|
||||||
import slixfeed.url as uri
|
from slixfeed.utilities import DateAndTime, Html, Task, Url, Utilities
|
||||||
from slixfeed.utilities import Html, Task, Utilities
|
|
||||||
from slixfeed.version import __version__
|
from slixfeed.version import __version__
|
||||||
from slixfeed.xmpp.bookmark import XmppBookmark
|
from slixfeed.xmpp.bookmark import XmppBookmark
|
||||||
from slixfeed.xmpp.chat import XmppChat, XmppChatTask
|
from slixfeed.xmpp.chat import XmppChat, XmppChatTask
|
||||||
|
@ -62,7 +59,6 @@ from slixfeed.xmpp.message import XmppMessage
|
||||||
from slixfeed.xmpp.muc import XmppMuc
|
from slixfeed.xmpp.muc import XmppMuc
|
||||||
from slixfeed.xmpp.groupchat import XmppGroupchat
|
from slixfeed.xmpp.groupchat import XmppGroupchat
|
||||||
from slixfeed.xmpp.presence import XmppPresence
|
from slixfeed.xmpp.presence import XmppPresence
|
||||||
from slixfeed.xmpp.privilege import is_operator, is_access
|
|
||||||
import slixfeed.xmpp.profile as profile
|
import slixfeed.xmpp.profile as profile
|
||||||
from slixfeed.xmpp.publish import XmppPubsub, XmppPubsubAction, XmppPubsubTask
|
from slixfeed.xmpp.publish import XmppPubsub, XmppPubsubAction, XmppPubsubTask
|
||||||
from slixfeed.xmpp.roster import XmppRoster
|
from slixfeed.xmpp.roster import XmppRoster
|
||||||
|
@ -791,7 +787,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
# )
|
# )
|
||||||
|
|
||||||
# NOTE https://codeberg.org/poezio/slixmpp/issues/3515
|
# NOTE https://codeberg.org/poezio/slixmpp/issues/3515
|
||||||
# if is_operator(self, jid_bare):
|
# if XmppUtilities.is_operator(self, jid_bare):
|
||||||
self['xep_0050'].add_command(node='subscription',
|
self['xep_0050'].add_command(node='subscription',
|
||||||
name='🪶️ Subscribe',
|
name='🪶️ Subscribe',
|
||||||
handler=self._handle_subscription_add)
|
handler=self._handle_subscription_add)
|
||||||
|
@ -842,7 +838,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
.format(function_name, jid_full))
|
.format(function_name, jid_full))
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
||||||
if is_access(self, jid_bare, jid_full, chat_type):
|
if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
|
||||||
form = self['xep_0004'].make_form('form', 'PubSub')
|
form = self['xep_0004'].make_form('form', 'PubSub')
|
||||||
form['instructions'] = 'Publish news items to PubSub nodes.'
|
form['instructions'] = 'Publish news items to PubSub nodes.'
|
||||||
options = form.add_field(desc='From which medium source do you '
|
options = form.add_field(desc='From which medium source do you '
|
||||||
|
@ -863,7 +859,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
session['prev'] = None
|
session['prev'] = None
|
||||||
session['payload'] = form
|
session['payload'] = form
|
||||||
else:
|
else:
|
||||||
if not is_operator(self, jid_bare):
|
if not XmppUtilities.is_operator(self, jid_bare):
|
||||||
text_warn = 'This resource is restricted to operators.'
|
text_warn = 'This resource is restricted to operators.'
|
||||||
elif chat_type == 'groupchat':
|
elif chat_type == 'groupchat':
|
||||||
text_warn = ('This resource is restricted to moderators of {}.'
|
text_warn = ('This resource is restricted to moderators of {}.'
|
||||||
|
@ -883,7 +879,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
.format(function_name, jid_full))
|
.format(function_name, jid_full))
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
||||||
if is_access(self, jid_bare, jid_full, chat_type):
|
if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
|
||||||
values = payload['values']
|
values = payload['values']
|
||||||
form = self['xep_0004'].make_form('form', 'Publish')
|
form = self['xep_0004'].make_form('form', 'Publish')
|
||||||
form['instructions'] = ('Choose a PubSub Jabber ID and verify '
|
form['instructions'] = ('Choose a PubSub Jabber ID and verify '
|
||||||
|
@ -971,7 +967,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
session['has_next'] = True
|
session['has_next'] = True
|
||||||
session['prev'] = self._handle_publish
|
session['prev'] = self._handle_publish
|
||||||
else:
|
else:
|
||||||
if not is_operator(self, jid_bare):
|
if not XmppUtilities.is_operator(self, jid_bare):
|
||||||
text_warn = 'This resource is restricted to operators.'
|
text_warn = 'This resource is restricted to operators.'
|
||||||
elif chat_type == 'groupchat':
|
elif chat_type == 'groupchat':
|
||||||
text_warn = ('This resource is restricted to moderators of {}.'
|
text_warn = ('This resource is restricted to moderators of {}.'
|
||||||
|
@ -994,7 +990,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
print(values['jid'])
|
print(values['jid'])
|
||||||
jid = values['jid'] if 'jid' in values else None
|
jid = values['jid'] if 'jid' in values else None
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
if jid != jid_bare and not is_operator(self, jid_bare):
|
if jid != jid_bare and not XmppUtilities.is_operator(self, jid_bare):
|
||||||
text_warn = ('Posting to {} is restricted to operators only.'
|
text_warn = ('Posting to {} is restricted to operators only.'
|
||||||
.format(jid_bare)) # Should not this be self.boundjid.bare?
|
.format(jid_bare)) # Should not this be self.boundjid.bare?
|
||||||
session['allow_prev'] = False
|
session['allow_prev'] = False
|
||||||
|
@ -1065,7 +1061,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
ixs = values['entries']
|
ixs = values['entries']
|
||||||
#if jid: jid = jid[0] if isinstance(jid, list) else jid
|
#if jid: jid = jid[0] if isinstance(jid, list) else jid
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
if jid != jid_bare and not is_operator(self, jid_bare):
|
if jid != jid_bare and not XmppUtilities.is_operator(self, jid_bare):
|
||||||
# TODO Report incident
|
# TODO Report incident
|
||||||
text_warn = 'You are not suppose to be here.'
|
text_warn = 'You are not suppose to be here.'
|
||||||
session['allow_prev'] = False
|
session['allow_prev'] = False
|
||||||
|
@ -1100,7 +1096,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
values = payload['values']
|
values = payload['values']
|
||||||
jid = values['jid'] if 'jid' in values else None
|
jid = values['jid'] if 'jid' in values else None
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
if jid != jid_bare and not is_operator(self, jid_bare):
|
if jid != jid_bare and not XmppUtilities.is_operator(self, jid_bare):
|
||||||
# TODO Report incident
|
# TODO Report incident
|
||||||
text_warn = 'You are not suppose to be here.'
|
text_warn = 'You are not suppose to be here.'
|
||||||
# text_warn = ('Posting to {} is restricted to operators only.'
|
# text_warn = ('Posting to {} is restricted to operators only.'
|
||||||
|
@ -1119,7 +1115,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
if jid == self.boundjid.bare:
|
if jid == self.boundjid.bare:
|
||||||
node = 'urn:xmpp:microblog:0'
|
node = 'urn:xmpp:microblog:0'
|
||||||
else:
|
else:
|
||||||
node = uri.get_hostname(url)
|
node = Url.get_hostname(url)
|
||||||
form = self['xep_0004'].make_form('form', 'Publish')
|
form = self['xep_0004'].make_form('form', 'Publish')
|
||||||
while True:
|
while True:
|
||||||
result = await fetch.http(url)
|
result = await fetch.http(url)
|
||||||
|
@ -1137,7 +1133,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
if "title" in feed["feed"].keys():
|
if "title" in feed["feed"].keys():
|
||||||
title = feed["feed"]["title"]
|
title = feed["feed"]["title"]
|
||||||
else:
|
else:
|
||||||
title = uri.get_hostname(url)
|
title = Url.get_hostname(url)
|
||||||
entries = feed.entries
|
entries = feed.entries
|
||||||
entry_ix = 0
|
entry_ix = 0
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
|
@ -1146,10 +1142,10 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
else:
|
else:
|
||||||
if entry.has_key("published"):
|
if entry.has_key("published"):
|
||||||
title = entry.published
|
title = entry.published
|
||||||
title = dt.rfc2822_to_iso8601(title)
|
title = DateAndTime.rfc2822_to_iso8601(title)
|
||||||
elif entry.has_key("updated"):
|
elif entry.has_key("updated"):
|
||||||
title = entry.updated
|
title = entry.updated
|
||||||
title = dt.rfc2822_to_iso8601(title)
|
title = DateAndTime.rfc2822_to_iso8601(title)
|
||||||
else:
|
else:
|
||||||
title = "*** No title ***"
|
title = "*** No title ***"
|
||||||
options.addOption(title, str(entry_ix))
|
options.addOption(title, str(entry_ix))
|
||||||
|
@ -1164,7 +1160,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
session['payload'] = form
|
session['payload'] = form
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
result = await crawl.probe_page(url, document)
|
result = await FeedDiscovery.probe_page(url, document)
|
||||||
if isinstance(result, list):
|
if isinstance(result, list):
|
||||||
results = result
|
results = result
|
||||||
form['instructions'] = ('Discovered {} subscriptions '
|
form['instructions'] = ('Discovered {} subscriptions '
|
||||||
|
@ -1225,7 +1221,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
jid = values['jid'][0] if 'jid' in values else None
|
jid = values['jid'][0] if 'jid' in values else None
|
||||||
#if jid: jid = jid[0] if isinstance(jid, list) else jid
|
#if jid: jid = jid[0] if isinstance(jid, list) else jid
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
if jid != jid_bare and not is_operator(self, jid_bare):
|
if jid != jid_bare and not XmppUtilities.is_operator(self, jid_bare):
|
||||||
# TODO Report incident
|
# TODO Report incident
|
||||||
text_warn = 'You are not suppose to be here.'
|
text_warn = 'You are not suppose to be here.'
|
||||||
session['allow_prev'] = False
|
session['allow_prev'] = False
|
||||||
|
@ -1262,10 +1258,10 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
# else:
|
# else:
|
||||||
# if feed.entries[entry].has_key("published"):
|
# if feed.entries[entry].has_key("published"):
|
||||||
# title = feed.entries[entry].published
|
# title = feed.entries[entry].published
|
||||||
# title = dt.rfc2822_to_iso8601(title)
|
# title = DateAndTime.rfc2822_to_iso8601(title)
|
||||||
# elif feed.entries[entry].has_key("updated"):
|
# elif feed.entries[entry].has_key("updated"):
|
||||||
# title = feed.entries[entry].updated
|
# title = feed.entries[entry].updated
|
||||||
# title = dt.rfc2822_to_iso8601(title)
|
# title = DateAndTime.rfc2822_to_iso8601(title)
|
||||||
# else:
|
# else:
|
||||||
# title = "*** No title ***"
|
# title = "*** No title ***"
|
||||||
# if feed.entries[entry].has_key("summary"):
|
# if feed.entries[entry].has_key("summary"):
|
||||||
|
@ -1393,7 +1389,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
.format(function_name, jid_full))
|
.format(function_name, jid_full))
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
||||||
if is_access(self, jid_bare, jid_full, chat_type):
|
if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
|
||||||
jid = session['from'].bare
|
jid = session['from'].bare
|
||||||
db_file = config.get_pathname_to_database(jid_bare)
|
db_file = config.get_pathname_to_database(jid_bare)
|
||||||
form = self['xep_0004'].make_form('form', 'Filters')
|
form = self['xep_0004'].make_form('form', 'Filters')
|
||||||
|
@ -1432,7 +1428,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
session['next'] = self._handle_filters_complete
|
session['next'] = self._handle_filters_complete
|
||||||
session['payload'] = form
|
session['payload'] = form
|
||||||
else:
|
else:
|
||||||
if not is_operator(self, jid_bare):
|
if not XmppUtilities.is_operator(self, jid_bare):
|
||||||
text_warn = 'This resource is restricted to operators.'
|
text_warn = 'This resource is restricted to operators.'
|
||||||
elif chat_type == 'groupchat':
|
elif chat_type == 'groupchat':
|
||||||
text_warn = ('This resource is restricted to moderators of {}.'
|
text_warn = ('This resource is restricted to moderators of {}.'
|
||||||
|
@ -1502,7 +1498,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
.format(function_name, jid_full))
|
.format(function_name, jid_full))
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
||||||
if is_access(self, jid_bare, jid_full, chat_type):
|
if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
|
||||||
form = self['xep_0004'].make_form('form', 'Subscribe')
|
form = self['xep_0004'].make_form('form', 'Subscribe')
|
||||||
# form['instructions'] = 'Add a new custom subscription.'
|
# form['instructions'] = 'Add a new custom subscription.'
|
||||||
form.add_field(desc='Enter a URL.',
|
form.add_field(desc='Enter a URL.',
|
||||||
|
@ -1517,7 +1513,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
required=True,
|
required=True,
|
||||||
value='http://',
|
value='http://',
|
||||||
var='subscription')
|
var='subscription')
|
||||||
if is_operator(self, jid_bare):
|
if XmppUtilities.is_operator(self, jid_bare):
|
||||||
# form['instructions'] = ('Special section for operators:\n'
|
# form['instructions'] = ('Special section for operators:\n'
|
||||||
# 'This section allows you to add '
|
# 'This section allows you to add '
|
||||||
# 'subscriptions for a JID of your '
|
# 'subscriptions for a JID of your '
|
||||||
|
@ -1544,7 +1540,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
session['prev'] = None
|
session['prev'] = None
|
||||||
session['payload'] = form
|
session['payload'] = form
|
||||||
else:
|
else:
|
||||||
if not is_operator(self, jid_bare):
|
if not XmppUtilities.is_operator(self, jid_bare):
|
||||||
text_warn = 'This resource is restricted to operators.'
|
text_warn = 'This resource is restricted to operators.'
|
||||||
elif chat_type == 'groupchat':
|
elif chat_type == 'groupchat':
|
||||||
text_warn = ('This resource is restricted to moderators of {}.'
|
text_warn = ('This resource is restricted to moderators of {}.'
|
||||||
|
@ -1576,7 +1572,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
# options.addOption('News by tag', 'tag')
|
# options.addOption('News by tag', 'tag')
|
||||||
options.addOption('Rejected', 'reject')
|
options.addOption('Rejected', 'reject')
|
||||||
options.addOption('Unread', 'unread')
|
options.addOption('Unread', 'unread')
|
||||||
if is_operator(self, jid_bare):
|
if XmppUtilities.is_operator(self, jid_bare):
|
||||||
# form['instructions'] = ('Special section for operators:\n'
|
# form['instructions'] = ('Special section for operators:\n'
|
||||||
# 'This section allows you to view news items '
|
# 'This section allows you to view news items '
|
||||||
# 'of a JID of your choice.')
|
# 'of a JID of your choice.')
|
||||||
|
@ -1617,7 +1613,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
values = payload['values']
|
values = payload['values']
|
||||||
form = self['xep_0004'].make_form('form', 'Updates')
|
form = self['xep_0004'].make_form('form', 'Updates')
|
||||||
if is_operator(self, jid_bare) and 'jid' in values:
|
if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
|
||||||
jid_bare = values['jid']
|
jid_bare = values['jid']
|
||||||
form.add_field(var='jid',
|
form.add_field(var='jid',
|
||||||
ftype='hidden',
|
ftype='hidden',
|
||||||
|
@ -1675,7 +1671,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
ix = values['update']
|
ix = values['update']
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
form = self['xep_0004'].make_form('form', 'Article')
|
form = self['xep_0004'].make_form('form', 'Article')
|
||||||
if is_operator(self, jid_bare) and 'jid' in values:
|
if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
|
||||||
jid = values['jid']
|
jid = values['jid']
|
||||||
jid_bare = jid[0] if isinstance(jid, list) else jid
|
jid_bare = jid[0] if isinstance(jid, list) else jid
|
||||||
form.add_field(var='jid',
|
form.add_field(var='jid',
|
||||||
|
@ -1688,9 +1684,9 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
url = sqlite.get_entry_url(db_file, ix)
|
url = sqlite.get_entry_url(db_file, ix)
|
||||||
url = url[0] # TODO Handle a situation when index is no longer exist
|
url = url[0] # TODO Handle a situation when index is no longer exist
|
||||||
logger.debug('Original URL: {}'.format(url))
|
logger.debug('Original URL: {}'.format(url))
|
||||||
url = uri.remove_tracking_parameters(url)
|
url = Url.remove_tracking_parameters(url)
|
||||||
logger.debug('Processed URL (tracker removal): {}'.format(url))
|
logger.debug('Processed URL (tracker removal): {}'.format(url))
|
||||||
url = (await uri.replace_hostname(url, 'link')) or url
|
url = (await Url.replace_hostname(url, 'link')) or url
|
||||||
logger.debug('Processed URL (replace hostname): {}'.format(url))
|
logger.debug('Processed URL (replace hostname): {}'.format(url))
|
||||||
# result = await fetch.http(url)
|
# result = await fetch.http(url)
|
||||||
# if 'content' in result:
|
# if 'content' in result:
|
||||||
|
@ -1750,7 +1746,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
identifier = values['identifier'] if 'identifier' in values else None
|
identifier = values['identifier'] if 'identifier' in values else None
|
||||||
url = values['subscription']
|
url = values['subscription']
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
if is_operator(self, jid_bare) and 'jid' in values:
|
if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
|
||||||
custom_jid = values['jid']
|
custom_jid = values['jid']
|
||||||
jid_bare = custom_jid[0] if isinstance(custom_jid, list) else jid_bare
|
jid_bare = custom_jid[0] if isinstance(custom_jid, list) else jid_bare
|
||||||
# jid_bare = custom_jid[0] if custom_jid else jid_bare
|
# jid_bare = custom_jid[0] if custom_jid else jid_bare
|
||||||
|
@ -1780,7 +1776,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
session['prev'] = None
|
session['prev'] = None
|
||||||
# elif not identifier:
|
# elif not identifier:
|
||||||
# counter = 0
|
# counter = 0
|
||||||
# hostname = uri.get_hostname(url)
|
# hostname = Url.get_hostname(url)
|
||||||
# identifier = hostname + ':' + str(counter)
|
# identifier = hostname + ':' + str(counter)
|
||||||
# while True:
|
# while True:
|
||||||
# if sqlite.check_identifier_exist(db_file, identifier):
|
# if sqlite.check_identifier_exist(db_file, identifier):
|
||||||
|
@ -1797,7 +1793,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
exist_count = 0
|
exist_count = 0
|
||||||
for url in urls:
|
for url in urls:
|
||||||
counter = 0
|
counter = 0
|
||||||
hostname = uri.get_hostname(url)
|
hostname = Url.get_hostname(url)
|
||||||
identifier = hostname + ':' + str(counter)
|
identifier = hostname + ':' + str(counter)
|
||||||
while True:
|
while True:
|
||||||
if sqlite.check_identifier_exist(db_file, identifier):
|
if sqlite.check_identifier_exist(db_file, identifier):
|
||||||
|
@ -1830,7 +1826,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
if isinstance(url, list):
|
if isinstance(url, list):
|
||||||
url = url[0]
|
url = url[0]
|
||||||
counter = 0
|
counter = 0
|
||||||
hostname = uri.get_hostname(url)
|
hostname = Url.get_hostname(url)
|
||||||
identifier = hostname + ':' + str(counter)
|
identifier = hostname + ':' + str(counter)
|
||||||
while True:
|
while True:
|
||||||
if sqlite.check_identifier_exist(db_file, identifier):
|
if sqlite.check_identifier_exist(db_file, identifier):
|
||||||
|
@ -1956,7 +1952,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
.format(function_name, jid_full))
|
.format(function_name, jid_full))
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
values = payload['values']
|
values = payload['values']
|
||||||
if is_operator(self, jid_bare) and 'jid' in values:
|
if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
|
||||||
jid_bare = values['jid'][0]
|
jid_bare = values['jid'][0]
|
||||||
del values['jid']
|
del values['jid']
|
||||||
db_file = config.get_pathname_to_database(jid_bare)
|
db_file = config.get_pathname_to_database(jid_bare)
|
||||||
|
@ -1981,7 +1977,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
.format(function_name, jid_full))
|
.format(function_name, jid_full))
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
values = payload['values']
|
values = payload['values']
|
||||||
if is_operator(self, jid_bare) and 'jid' in values:
|
if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
|
||||||
jid_bare = values['jid'][0]
|
jid_bare = values['jid'][0]
|
||||||
del values['jid']
|
del values['jid']
|
||||||
db_file = config.get_pathname_to_database(jid_bare)
|
db_file = config.get_pathname_to_database(jid_bare)
|
||||||
|
@ -2022,7 +2018,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
.format(function_name, jid_full))
|
.format(function_name, jid_full))
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
||||||
if is_access(self, jid_bare, jid_full, chat_type):
|
if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
|
||||||
form = self['xep_0004'].make_form('form', 'Discover & Search')
|
form = self['xep_0004'].make_form('form', 'Discover & Search')
|
||||||
form['instructions'] = 'Discover news subscriptions of all kinds'
|
form['instructions'] = 'Discover news subscriptions of all kinds'
|
||||||
options = form.add_field(desc='Select type of search.',
|
options = form.add_field(desc='Select type of search.',
|
||||||
|
@ -2039,7 +2035,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
session['payload'] = form
|
session['payload'] = form
|
||||||
session['prev'] = None
|
session['prev'] = None
|
||||||
else:
|
else:
|
||||||
if not is_operator(self, jid_bare):
|
if not XmppUtilities.is_operator(self, jid_bare):
|
||||||
text_warn = 'This resource is restricted to operators.'
|
text_warn = 'This resource is restricted to operators.'
|
||||||
elif chat_type == 'groupchat':
|
elif chat_type == 'groupchat':
|
||||||
text_warn = ('This resource is restricted to moderators of {}.'
|
text_warn = ('This resource is restricted to moderators of {}.'
|
||||||
|
@ -2146,7 +2142,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
.format(function_name, jid_full))
|
.format(function_name, jid_full))
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
||||||
if is_access(self, jid_bare, jid_full, chat_type):
|
if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
|
||||||
form = self['xep_0004'].make_form('form', 'Subscriptions')
|
form = self['xep_0004'].make_form('form', 'Subscriptions')
|
||||||
form['instructions'] = ('Browse, view, toggle or remove '
|
form['instructions'] = ('Browse, view, toggle or remove '
|
||||||
'tags and subscriptions.')
|
'tags and subscriptions.')
|
||||||
|
@ -2160,7 +2156,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
options.addOption('Browse tags', 'tag')
|
options.addOption('Browse tags', 'tag')
|
||||||
options.addOption('Remove subscriptions', 'delete')
|
options.addOption('Remove subscriptions', 'delete')
|
||||||
options.addOption('Toggle subscriptions', 'toggle')
|
options.addOption('Toggle subscriptions', 'toggle')
|
||||||
if is_operator(self, jid_bare):
|
if XmppUtilities.is_operator(self, jid_bare):
|
||||||
form['instructions'] = None
|
form['instructions'] = None
|
||||||
# form['instructions'] = ('Special section for operators:\n'
|
# form['instructions'] = ('Special section for operators:\n'
|
||||||
# 'This section allows you to change '
|
# 'This section allows you to change '
|
||||||
|
@ -2190,7 +2186,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
session['next'] = self._handle_subscriptions_result
|
session['next'] = self._handle_subscriptions_result
|
||||||
session['has_next'] = True
|
session['has_next'] = True
|
||||||
else:
|
else:
|
||||||
if not is_operator(self, jid_bare):
|
if not XmppUtilities.is_operator(self, jid_bare):
|
||||||
text_warn = 'This resource is restricted to operators.'
|
text_warn = 'This resource is restricted to operators.'
|
||||||
elif chat_type == 'groupchat':
|
elif chat_type == 'groupchat':
|
||||||
text_warn = ('This resource is restricted to moderators of {}.'
|
text_warn = ('This resource is restricted to moderators of {}.'
|
||||||
|
@ -2212,7 +2208,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
values = payload['values']
|
values = payload['values']
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
form = self['xep_0004'].make_form('form', 'Subscriptions')
|
form = self['xep_0004'].make_form('form', 'Subscriptions')
|
||||||
if is_operator(self, jid_bare) and 'jid' in values:
|
if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
|
||||||
jid_bare = values['jid']
|
jid_bare = values['jid']
|
||||||
form.add_field(ftype='hidden',
|
form.add_field(ftype='hidden',
|
||||||
value=jid_bare,
|
value=jid_bare,
|
||||||
|
@ -2306,7 +2302,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
form = self['xep_0004'].make_form('form', 'Subscriptions')
|
form = self['xep_0004'].make_form('form', 'Subscriptions')
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
values = payload['values']
|
values = payload['values']
|
||||||
if is_operator(self, jid_bare) and 'jid' in values:
|
if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
|
||||||
jid_bare = values['jid'][0]
|
jid_bare = values['jid'][0]
|
||||||
form.add_field(ftype='hidden',
|
form.add_field(ftype='hidden',
|
||||||
value=jid_bare,
|
value=jid_bare,
|
||||||
|
@ -2344,7 +2340,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
form = self['xep_0004'].make_form('form', 'Subscription')
|
form = self['xep_0004'].make_form('form', 'Subscription')
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
values = payload['values']
|
values = payload['values']
|
||||||
if is_operator(self, jid_bare) and 'jid' in values:
|
if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
|
||||||
jid_bare = values['jid'][0] if values['jid'] else jid_bare
|
jid_bare = values['jid'][0] if values['jid'] else jid_bare
|
||||||
form.add_field(ftype='hidden',
|
form.add_field(ftype='hidden',
|
||||||
value=jid_bare,
|
value=jid_bare,
|
||||||
|
@ -2440,7 +2436,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
.format(function_name, jid_full))
|
.format(function_name, jid_full))
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
values = payload['values']
|
values = payload['values']
|
||||||
if is_operator(self, jid_bare) and 'jid' in values:
|
if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
|
||||||
jid_bare = values['jid'][0]
|
jid_bare = values['jid'][0]
|
||||||
db_file = config.get_pathname_to_database(jid_bare)
|
db_file = config.get_pathname_to_database(jid_bare)
|
||||||
# url = values['url']
|
# url = values['url']
|
||||||
|
@ -2506,14 +2502,14 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
.format(function_name, jid_full))
|
.format(function_name, jid_full))
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
||||||
if is_access(self, jid_bare, jid_full, chat_type):
|
if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
|
||||||
form = self['xep_0004'].make_form('form', 'Advanced')
|
form = self['xep_0004'].make_form('form', 'Advanced')
|
||||||
form['instructions'] = 'Extended options'
|
form['instructions'] = 'Extended options'
|
||||||
options = form.add_field(ftype='list-single',
|
options = form.add_field(ftype='list-single',
|
||||||
label='Choose',
|
label='Choose',
|
||||||
required=True,
|
required=True,
|
||||||
var='option')
|
var='option')
|
||||||
if is_operator(self, jid_bare):
|
if XmppUtilities.is_operator(self, jid_bare):
|
||||||
options.addOption('Administration', 'admin')
|
options.addOption('Administration', 'admin')
|
||||||
# options.addOption('Activity', 'activity')
|
# options.addOption('Activity', 'activity')
|
||||||
# options.addOption('Filters', 'filter')
|
# options.addOption('Filters', 'filter')
|
||||||
|
@ -2527,7 +2523,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
session['next'] = self._handle_advanced_result
|
session['next'] = self._handle_advanced_result
|
||||||
session['prev'] = self._handle_advanced
|
session['prev'] = self._handle_advanced
|
||||||
else:
|
else:
|
||||||
if not is_operator(self, jid_bare):
|
if not XmppUtilities.is_operator(self, jid_bare):
|
||||||
text_warn = 'This resource is restricted to operators.'
|
text_warn = 'This resource is restricted to operators.'
|
||||||
elif chat_type == 'groupchat':
|
elif chat_type == 'groupchat':
|
||||||
text_warn = ('This resource is restricted to moderators of {}.'
|
text_warn = ('This resource is restricted to moderators of {}.'
|
||||||
|
@ -2556,7 +2552,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
case 'admin':
|
case 'admin':
|
||||||
# NOTE Even though this check is already conducted on previous
|
# NOTE Even though this check is already conducted on previous
|
||||||
# form, this check is being done just in case.
|
# form, this check is being done just in case.
|
||||||
if is_operator(self, jid_bare):
|
if XmppUtilities.is_operator(self, jid_bare):
|
||||||
if self.is_component:
|
if self.is_component:
|
||||||
# NOTE This will be changed with XEP-0222 XEP-0223
|
# NOTE This will be changed with XEP-0222 XEP-0223
|
||||||
text_info = ('Subscriber management options are '
|
text_info = ('Subscriber management options are '
|
||||||
|
@ -2589,7 +2585,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
else:
|
else:
|
||||||
logger.warning('An unauthorized attempt to access '
|
logger.warning('An unauthorized attempt to access '
|
||||||
'bookmarks has been detected for JID {} at '
|
'bookmarks has been detected for JID {} at '
|
||||||
'{}'.format(jid_bare, dt.timestamp()))
|
'{}'.format(jid_bare, DateAndTime.timestamp()))
|
||||||
text_warn = 'This resource is restricted.'
|
text_warn = 'This resource is restricted.'
|
||||||
session['notes'] = [['warn', text_warn]]
|
session['notes'] = [['warn', text_warn]]
|
||||||
session['has_next'] = False
|
session['has_next'] = False
|
||||||
|
@ -2617,7 +2613,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
required=True,
|
required=True,
|
||||||
var='url')
|
var='url')
|
||||||
url['validate']['datatype'] = 'xs:anyURI'
|
url['validate']['datatype'] = 'xs:anyURI'
|
||||||
if is_operator(self, jid_bare):
|
if XmppUtilities.is_operator(self, jid_bare):
|
||||||
form.add_field(ftype='fixed',
|
form.add_field(ftype='fixed',
|
||||||
label='* Operators',
|
label='* Operators',
|
||||||
desc='This section allows you to import '
|
desc='This section allows you to import '
|
||||||
|
@ -2651,7 +2647,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
options.addOption('OPML', 'opml')
|
options.addOption('OPML', 'opml')
|
||||||
# options.addOption('HTML', 'html')
|
# options.addOption('HTML', 'html')
|
||||||
# options.addOption('XBEL', 'xbel')
|
# options.addOption('XBEL', 'xbel')
|
||||||
if is_operator(self, jid_bare):
|
if XmppUtilities.is_operator(self, jid_bare):
|
||||||
# form['instructions'] = ('Special section for operators:\n'
|
# form['instructions'] = ('Special section for operators:\n'
|
||||||
# 'This section allows you to '
|
# 'This section allows you to '
|
||||||
# 'import and export subscriptions '
|
# 'import and export subscriptions '
|
||||||
|
@ -2841,7 +2837,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
url = values['url']
|
url = values['url']
|
||||||
if url.startswith('http') and url.endswith('.opml'):
|
if url.startswith('http') and url.endswith('.opml'):
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
if is_operator(self, jid_bare) and 'jid' in values:
|
if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
|
||||||
jid = values['jid']
|
jid = values['jid']
|
||||||
jid_bare = jid[0] if isinstance(jid, list) else jid
|
jid_bare = jid[0] if isinstance(jid, list) else jid
|
||||||
db_file = config.get_pathname_to_database(jid_bare)
|
db_file = config.get_pathname_to_database(jid_bare)
|
||||||
|
@ -2882,7 +2878,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
# form['type'] = 'result'
|
# form['type'] = 'result'
|
||||||
values = payload['values']
|
values = payload['values']
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
if is_operator(self, jid_bare) and 'jid' in values:
|
if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
|
||||||
jid = values['jid']
|
jid = values['jid']
|
||||||
jid_bare = jid[0] if isinstance(jid, list) else jid
|
jid_bare = jid[0] if isinstance(jid, list) else jid
|
||||||
# form = self['xep_0004'].make_form('result', 'Done')
|
# form = self['xep_0004'].make_form('result', 'Done')
|
||||||
|
@ -2915,7 +2911,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
jid_full = str(session['from'])
|
jid_full = str(session['from'])
|
||||||
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
||||||
if is_access(self, jid_bare, jid_full, chat_type):
|
if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
|
||||||
form = self['xep_0004'].make_form('form', 'Subscribe')
|
form = self['xep_0004'].make_form('form', 'Subscribe')
|
||||||
# NOTE Refresh button would be of use
|
# NOTE Refresh button would be of use
|
||||||
form['instructions'] = 'Featured subscriptions'
|
form['instructions'] = 'Featured subscriptions'
|
||||||
|
@ -2938,7 +2934,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
if '@' in jid_bare:
|
if '@' in jid_bare:
|
||||||
hostname = jid_bare.split('@')[1]
|
hostname = jid_bare.split('@')[1]
|
||||||
url = 'http://' + hostname
|
url = 'http://' + hostname
|
||||||
result = await crawl.probe_page(url)
|
result = await FeedDiscovery.probe_page(url)
|
||||||
if not result:
|
if not result:
|
||||||
url = {'url' : url,
|
url = {'url' : url,
|
||||||
'index' : None,
|
'index' : None,
|
||||||
|
@ -2966,7 +2962,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
session['payload'] = form
|
session['payload'] = form
|
||||||
session['prev'] = self._handle_promoted
|
session['prev'] = self._handle_promoted
|
||||||
else:
|
else:
|
||||||
if not is_operator(self, jid_bare):
|
if not XmppUtilities.is_operator(self, jid_bare):
|
||||||
text_warn = 'This resource is restricted to operators.'
|
text_warn = 'This resource is restricted to operators.'
|
||||||
elif chat_type == 'groupchat':
|
elif chat_type == 'groupchat':
|
||||||
text_warn = ('This resource is restricted to moderators of {}.'
|
text_warn = ('This resource is restricted to moderators of {}.'
|
||||||
|
@ -3620,7 +3616,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
.format(function_name, jid_full))
|
.format(function_name, jid_full))
|
||||||
jid_bare = session['from'].bare
|
jid_bare = session['from'].bare
|
||||||
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
||||||
if is_access(self, jid_bare, jid_full, chat_type):
|
if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
|
||||||
db_file = config.get_pathname_to_database(jid_bare)
|
db_file = config.get_pathname_to_database(jid_bare)
|
||||||
if jid_bare not in self.settings:
|
if jid_bare not in self.settings:
|
||||||
Config.add_settings_jid(self.settings, jid_bare, db_file)
|
Config.add_settings_jid(self.settings, jid_bare, db_file)
|
||||||
|
@ -3718,7 +3714,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
||||||
session['next'] = self._handle_settings_complete
|
session['next'] = self._handle_settings_complete
|
||||||
session['payload'] = form
|
session['payload'] = form
|
||||||
else:
|
else:
|
||||||
if not is_operator(self, jid_bare):
|
if not XmppUtilities.is_operator(self, jid_bare):
|
||||||
text_warn = 'This resource is restricted to operators.'
|
text_warn = 'This resource is restricted to operators.'
|
||||||
elif chat_type == 'groupchat':
|
elif chat_type == 'groupchat':
|
||||||
text_warn = ('This resource is restricted to moderators of {}.'
|
text_warn = ('This resource is restricted to moderators of {}.'
|
||||||
|
|
|
@ -5,14 +5,11 @@ from feedparser import parse
|
||||||
from random import randrange
|
from random import randrange
|
||||||
import slixfeed.config as config
|
import slixfeed.config as config
|
||||||
from slixfeed.config import Config
|
from slixfeed.config import Config
|
||||||
import slixfeed.crawl as crawl
|
|
||||||
import slixfeed.dt as dt
|
|
||||||
import slixfeed.fetch as fetch
|
import slixfeed.fetch as fetch
|
||||||
from slixfeed.log import Logger
|
from slixfeed.log import Logger
|
||||||
import slixfeed.sqlite as sqlite
|
import slixfeed.sqlite as sqlite
|
||||||
from slixfeed.syndication import Feed, Opml
|
from slixfeed.syndication import Feed, FeedDiscovery, Opml
|
||||||
import slixfeed.url as uri
|
from slixfeed.utilities import DateAndTime, Documentation, Url, Utilities
|
||||||
from slixfeed.utilities import Documentation, Utilities
|
|
||||||
from slixfeed.version import __version__
|
from slixfeed.version import __version__
|
||||||
from slixfeed.xmpp.bookmark import XmppBookmark
|
from slixfeed.xmpp.bookmark import XmppBookmark
|
||||||
from slixfeed.xmpp.muc import XmppMuc
|
from slixfeed.xmpp.muc import XmppMuc
|
||||||
|
@ -121,9 +118,9 @@ class XmppCommands:
|
||||||
"""
|
"""
|
||||||
if url.startswith('http'):
|
if url.startswith('http'):
|
||||||
if not title:
|
if not title:
|
||||||
title = uri.get_hostname(url)
|
title = Url.get_hostname(url)
|
||||||
counter = 0
|
counter = 0
|
||||||
hostname = uri.get_hostname(url)
|
hostname = Url.get_hostname(url)
|
||||||
hostname = hostname.replace('.','-')
|
hostname = hostname.replace('.','-')
|
||||||
identifier = hostname + ':' + str(counter)
|
identifier = hostname + ':' + str(counter)
|
||||||
while True:
|
while True:
|
||||||
|
@ -148,7 +145,7 @@ class XmppCommands:
|
||||||
if feed.has_key('updated_parsed'):
|
if feed.has_key('updated_parsed'):
|
||||||
feed_updated = feed.updated_parsed
|
feed_updated = feed.updated_parsed
|
||||||
try:
|
try:
|
||||||
feed_updated = dt.convert_struct_time_to_iso8601(
|
feed_updated = DateAndTime.convert_struct_time_to_iso8601(
|
||||||
feed_updated)
|
feed_updated)
|
||||||
except:
|
except:
|
||||||
feed_updated = None
|
feed_updated = None
|
||||||
|
@ -393,7 +390,7 @@ class XmppCommands:
|
||||||
identifier = info[2]
|
identifier = info[2]
|
||||||
else:
|
else:
|
||||||
counter = 0
|
counter = 0
|
||||||
hostname = uri.get_hostname(url)
|
hostname = Url.get_hostname(url)
|
||||||
hostname = hostname.replace('.','-')
|
hostname = hostname.replace('.','-')
|
||||||
identifier = hostname + ':' + str(counter)
|
identifier = hostname + ':' + str(counter)
|
||||||
while True:
|
while True:
|
||||||
|
@ -417,8 +414,8 @@ class XmppCommands:
|
||||||
if (url.startswith('feed:/') or
|
if (url.startswith('feed:/') or
|
||||||
url.startswith('itpc:/') or
|
url.startswith('itpc:/') or
|
||||||
url.startswith('rss:/')):
|
url.startswith('rss:/')):
|
||||||
url = uri.feed_to_http(url)
|
url = Url.feed_to_http(url)
|
||||||
url = (await uri.replace_hostname(url, 'feed')) or url
|
url = (await Url.replace_hostname(url, 'feed')) or url
|
||||||
result = await Feed.add_feed(self, jid_bare, db_file, url,
|
result = await Feed.add_feed(self, jid_bare, db_file, url,
|
||||||
identifier)
|
identifier)
|
||||||
if isinstance(result, list):
|
if isinstance(result, list):
|
||||||
|
@ -479,10 +476,10 @@ class XmppCommands:
|
||||||
# both interfaces Chat and IPC
|
# both interfaces Chat and IPC
|
||||||
async def fetch_http(self, url, db_file, jid_bare):
|
async def fetch_http(self, url, db_file, jid_bare):
|
||||||
if url.startswith('feed:/') or url.startswith('rss:/'):
|
if url.startswith('feed:/') or url.startswith('rss:/'):
|
||||||
url = uri.feed_to_http(url)
|
url = Url.feed_to_http(url)
|
||||||
url = (await uri.replace_hostname(url, 'feed')) or url
|
url = (await Url.replace_hostname(url, 'feed')) or url
|
||||||
counter = 0
|
counter = 0
|
||||||
hostname = uri.get_hostname(url)
|
hostname = Url.get_hostname(url)
|
||||||
hostname = hostname.replace('.','-')
|
hostname = hostname.replace('.','-')
|
||||||
identifier = hostname + ':' + str(counter)
|
identifier = hostname + ':' + str(counter)
|
||||||
while True:
|
while True:
|
||||||
|
@ -581,7 +578,7 @@ class XmppCommands:
|
||||||
|
|
||||||
async def muc_join(self, command):
|
async def muc_join(self, command):
|
||||||
if command:
|
if command:
|
||||||
muc_jid = uri.check_xmpp_uri(command)
|
muc_jid = Url.check_xmpp_uri(command)
|
||||||
if muc_jid:
|
if muc_jid:
|
||||||
# TODO probe JID and confirm it's a groupchat
|
# TODO probe JID and confirm it's a groupchat
|
||||||
result = await XmppMuc.join(self, muc_jid)
|
result = await XmppMuc.join(self, muc_jid)
|
||||||
|
@ -735,8 +732,8 @@ class XmppCommands:
|
||||||
|
|
||||||
async def feed_read(self, jid_bare, data, url):
|
async def feed_read(self, jid_bare, data, url):
|
||||||
if url.startswith('feed:/') or url.startswith('rss:/'):
|
if url.startswith('feed:/') or url.startswith('rss:/'):
|
||||||
url = uri.feed_to_http(url)
|
url = Url.feed_to_http(url)
|
||||||
url = (await uri.replace_hostname(url, 'feed')) or url
|
url = (await Url.replace_hostname(url, 'feed')) or url
|
||||||
match len(data):
|
match len(data):
|
||||||
case 1:
|
case 1:
|
||||||
if url.startswith('http'):
|
if url.startswith('http'):
|
||||||
|
@ -750,7 +747,7 @@ class XmppCommands:
|
||||||
message = Feed.view_feed(url, feed)
|
message = Feed.view_feed(url, feed)
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
result = await crawl.probe_page(url, document)
|
result = await FeedDiscovery.probe_page(url, document)
|
||||||
if isinstance(result, list):
|
if isinstance(result, list):
|
||||||
results = result
|
results = result
|
||||||
message = ("Syndication feeds found for {}\n\n```\n"
|
message = ("Syndication feeds found for {}\n\n```\n"
|
||||||
|
@ -786,7 +783,7 @@ class XmppCommands:
|
||||||
message = Feed.view_entry(url, feed, num)
|
message = Feed.view_entry(url, feed, num)
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
result = await crawl.probe_page(url, document)
|
result = await FeedDiscovery.probe_page(url, document)
|
||||||
if isinstance(result, list):
|
if isinstance(result, list):
|
||||||
results = result
|
results = result
|
||||||
message = ("Syndication feeds found for {}\n\n```\n"
|
message = ("Syndication feeds found for {}\n\n```\n"
|
||||||
|
|
|
@ -14,7 +14,7 @@ TODO
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
from slixfeed.dt import current_time
|
from slixfeed.utilities import DateAndTime
|
||||||
from slixfeed.log import Logger
|
from slixfeed.log import Logger
|
||||||
from slixmpp.exceptions import IqTimeout, IqError
|
from slixmpp.exceptions import IqTimeout, IqError
|
||||||
from time import sleep
|
from time import sleep
|
||||||
|
@ -62,17 +62,17 @@ class XmppConnect:
|
||||||
|
|
||||||
def recover(self, message):
|
def recover(self, message):
|
||||||
logger.warning(message)
|
logger.warning(message)
|
||||||
print(current_time(), message, 'Attempting to reconnect.')
|
print(DateAndTime.current_time(), message, 'Attempting to reconnect.')
|
||||||
self.connection_attempts += 1
|
self.connection_attempts += 1
|
||||||
# if self.connection_attempts <= self.max_connection_attempts:
|
# if self.connection_attempts <= self.max_connection_attempts:
|
||||||
# self.reconnect(wait=5.0) # wait a bit before attempting to reconnect
|
# self.reconnect(wait=5.0) # wait a bit before attempting to reconnect
|
||||||
# else:
|
# else:
|
||||||
# print(current_time(),"Maximum connection attempts exceeded.")
|
# print(current_time(),"Maximum connection attempts exceeded.")
|
||||||
# logging.error("Maximum connection attempts exceeded.")
|
# logging.error("Maximum connection attempts exceeded.")
|
||||||
print(current_time(), 'Attempt number', self.connection_attempts)
|
print(DateAndTime.current_time(), 'Attempt number', self.connection_attempts)
|
||||||
seconds = self.reconnect_timeout or 30
|
seconds = self.reconnect_timeout or 30
|
||||||
seconds = int(seconds)
|
seconds = int(seconds)
|
||||||
print(current_time(), 'Next attempt within', seconds, 'seconds')
|
print(DateAndTime.current_time(), 'Next attempt within', seconds, 'seconds')
|
||||||
# NOTE asyncio.sleep doesn't interval as expected
|
# NOTE asyncio.sleep doesn't interval as expected
|
||||||
# await asyncio.sleep(seconds)
|
# await asyncio.sleep(seconds)
|
||||||
sleep(seconds)
|
sleep(seconds)
|
||||||
|
|
|
@ -1,49 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
|
|
||||||
def is_access(self, jid_bare, jid_full, chat_type):
|
|
||||||
"""Determine access privilege"""
|
|
||||||
operator = is_operator(self, jid_bare)
|
|
||||||
if operator:
|
|
||||||
if chat_type == 'groupchat':
|
|
||||||
if is_moderator(self, jid_bare, jid_full):
|
|
||||||
access = True
|
|
||||||
else:
|
|
||||||
access = True
|
|
||||||
else:
|
|
||||||
access = False
|
|
||||||
return access
|
|
||||||
|
|
||||||
|
|
||||||
def is_operator(self, jid_bare):
|
|
||||||
"""Check if given JID is an operator"""
|
|
||||||
result = False
|
|
||||||
for operator in self.operators:
|
|
||||||
if jid_bare == operator['jid']:
|
|
||||||
result = True
|
|
||||||
# operator_name = operator['name']
|
|
||||||
break
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def is_moderator(self, jid_bare, jid_full):
|
|
||||||
"""Check if given JID is a moderator"""
|
|
||||||
alias = jid_full[jid_full.index('/')+1:]
|
|
||||||
role = self.plugin['xep_0045'].get_jid_property(jid_bare, alias, 'role')
|
|
||||||
if role == 'moderator':
|
|
||||||
result = True
|
|
||||||
else:
|
|
||||||
result = False
|
|
||||||
return result
|
|
||||||
|
|
||||||
|
|
||||||
def is_member(self, jid_bare, jid_full):
|
|
||||||
"""Check if given JID is a member"""
|
|
||||||
alias = jid_full[jid_full.index('/')+1:]
|
|
||||||
affiliation = self.plugin['xep_0045'].get_jid_property(jid_bare, alias, 'affiliation')
|
|
||||||
if affiliation == 'member':
|
|
||||||
result = True
|
|
||||||
else:
|
|
||||||
result = False
|
|
||||||
return result
|
|
|
@ -16,8 +16,7 @@ from slixfeed.config import Config
|
||||||
from slixfeed.log import Logger
|
from slixfeed.log import Logger
|
||||||
import slixfeed.sqlite as sqlite
|
import slixfeed.sqlite as sqlite
|
||||||
from slixfeed.syndication import Feed
|
from slixfeed.syndication import Feed
|
||||||
import slixfeed.url as uri
|
from slixfeed.utilities import Url, Utilities
|
||||||
from slixfeed.utilities import Utilities
|
|
||||||
from slixfeed.xmpp.iq import XmppIQ
|
from slixfeed.xmpp.iq import XmppIQ
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
@ -337,7 +336,7 @@ class XmppPubsubAction:
|
||||||
node_id = node_id[0]
|
node_id = node_id[0]
|
||||||
if not node_id:
|
if not node_id:
|
||||||
counter = 0
|
counter = 0
|
||||||
hostname = uri.get_hostname(url)
|
hostname = Url.get_hostname(url)
|
||||||
hostname = hostname.replace('.','-')
|
hostname = hostname.replace('.','-')
|
||||||
identifier = hostname + ':' + str(counter)
|
identifier = hostname + ':' + str(counter)
|
||||||
while True:
|
while True:
|
||||||
|
|
|
@ -16,7 +16,7 @@ logger = Logger(__name__)
|
||||||
class XmppUpload:
|
class XmppUpload:
|
||||||
|
|
||||||
async def start(self, jid, filename, domain=None):
|
async def start(self, jid, filename, domain=None):
|
||||||
logger.info('Uploading file %s...', filename)
|
logger.info(['Uploading file %s...', filename])
|
||||||
try:
|
try:
|
||||||
upload_file = self['xep_0363'].upload_file
|
upload_file = self['xep_0363'].upload_file
|
||||||
# if self.encrypted and not self['xep_0454']:
|
# if self.encrypted and not self['xep_0454']:
|
||||||
|
@ -34,7 +34,7 @@ class XmppUpload:
|
||||||
filename, domain, timeout=10,
|
filename, domain, timeout=10,
|
||||||
)
|
)
|
||||||
logger.info('Upload successful!')
|
logger.info('Upload successful!')
|
||||||
logger.info('Sending file to %s', jid)
|
logger.info(['Sending file to %s', jid])
|
||||||
except HTTPError:
|
except HTTPError:
|
||||||
url = ('Error: It appears that this server does not support '
|
url = ('Error: It appears that this server does not support '
|
||||||
'HTTP File Upload.')
|
'HTTP File Upload.')
|
||||||
|
|
|
@ -58,3 +58,51 @@ class XmppUtilities:
|
||||||
# finally:
|
# finally:
|
||||||
# logger.info('Chat type is:', chat_type)
|
# logger.info('Chat type is:', chat_type)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def is_access(self, jid_bare, jid_full, chat_type):
|
||||||
|
"""Determine access privilege"""
|
||||||
|
operator = XmppUtilities.is_operator(self, jid_bare)
|
||||||
|
if operator:
|
||||||
|
if chat_type == 'groupchat':
|
||||||
|
if XmppUtilities.is_moderator(self, jid_bare, jid_full):
|
||||||
|
access = True
|
||||||
|
else:
|
||||||
|
access = True
|
||||||
|
else:
|
||||||
|
access = False
|
||||||
|
return access
|
||||||
|
|
||||||
|
|
||||||
|
def is_operator(self, jid_bare):
|
||||||
|
"""Check if given JID is an operator"""
|
||||||
|
result = False
|
||||||
|
for operator in self.operators:
|
||||||
|
if jid_bare == operator['jid']:
|
||||||
|
result = True
|
||||||
|
# operator_name = operator['name']
|
||||||
|
break
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def is_moderator(self, jid_bare, jid_full):
|
||||||
|
"""Check if given JID is a moderator"""
|
||||||
|
alias = jid_full[jid_full.index('/')+1:]
|
||||||
|
role = self.plugin['xep_0045'].get_jid_property(jid_bare, alias, 'role')
|
||||||
|
if role == 'moderator':
|
||||||
|
result = True
|
||||||
|
else:
|
||||||
|
result = False
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def is_member(self, jid_bare, jid_full):
|
||||||
|
"""Check if given JID is a member"""
|
||||||
|
alias = jid_full[jid_full.index('/')+1:]
|
||||||
|
affiliation = self.plugin['xep_0045'].get_jid_property(jid_bare, alias, 'affiliation')
|
||||||
|
if affiliation == 'member':
|
||||||
|
result = True
|
||||||
|
else:
|
||||||
|
result = False
|
||||||
|
return result
|
Loading…
Reference in a new issue