forked from sch/Slixfeed
Fix command export; Restructure code.
This commit is contained in:
parent
15e6a1de66
commit
93ea8a9fab
17 changed files with 1066 additions and 1184 deletions
|
@ -1,436 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
|
||||
FIXME
|
||||
|
||||
1) https://wiki.pine64.org
|
||||
File "/slixfeed/crawl.py", line 178, in feed_mode_guess
|
||||
address = join_url(url, parted_url.path.split('/')[1] + path)
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~^^^
|
||||
IndexError: list index out of range
|
||||
|
||||
TODO
|
||||
|
||||
1.1) Attempt to scan more paths: /blog/, /news/ etc., including root /
|
||||
Attempt to scan sub domains
|
||||
https://esmailelbob.xyz/en/
|
||||
https://blog.esmailelbob.xyz/feed/
|
||||
|
||||
1.2) Consider utilizing fetch.http_response
|
||||
|
||||
2) Consider merging with module fetch.py
|
||||
|
||||
FEEDS CRAWLER PROJECT
|
||||
|
||||
3) Mark redirects for manual check
|
||||
|
||||
Title : JSON Feed
|
||||
Link : https://www.jsonfeed.org/feed.json.xml
|
||||
|
||||
Title : JSON Feed
|
||||
Link : https://www.jsonfeed.org/feed.json/atom.xml
|
||||
|
||||
Title : JSON Feed
|
||||
Link : https://www.jsonfeed.org/feed.json/feed.xml
|
||||
|
||||
Title : JSON Feed
|
||||
Link : https://www.jsonfeed.org/feed.json/feeds/rss/news.xml.php
|
||||
|
||||
Title : JSON Feed
|
||||
Link : https://www.jsonfeed.org/feed.json/jekyll/feed.xml
|
||||
|
||||
Title : JSON Feed
|
||||
Link : https://www.jsonfeed.org/feed.json/news.xml
|
||||
|
||||
Title : JSON Feed
|
||||
Link : https://www.jsonfeed.org/feed.json/news.xml.php
|
||||
|
||||
Title : JSON Feed
|
||||
Link : https://www.jsonfeed.org/feed.json/rdf.xml
|
||||
|
||||
Title : JSON Feed
|
||||
Link : https://www.jsonfeed.org/feed.json/rss.xml
|
||||
|
||||
Title : JSON Feed
|
||||
Link : https://www.jsonfeed.org/feed.json/videos.xml
|
||||
|
||||
|
||||
"""
|
||||
|
||||
from aiohttp import ClientError, ClientSession, ClientTimeout
|
||||
from feedparser import parse
|
||||
import logging
|
||||
from lxml import etree
|
||||
from lxml import html
|
||||
from lxml.etree import fromstring
|
||||
import slixfeed.config as config
|
||||
import slixfeed.fetch as fetch
|
||||
from slixfeed.log import Logger
|
||||
from slixfeed.url import complete_url, join_url, trim_url
|
||||
from urllib.parse import urlsplit, urlunsplit
|
||||
|
||||
|
||||
# TODO Use boolean as a flag to determine whether a single URL was found
|
||||
# async def probe_page(
|
||||
# callback, url, document, num=None, db_file=None):
|
||||
# result = None
|
||||
# try:
|
||||
# # tree = etree.fromstring(res[0]) # etree is for xml
|
||||
# tree = html.fromstring(document)
|
||||
# except:
|
||||
# result = (
|
||||
# "> {}\nFailed to parse URL as feed."
|
||||
# ).format(url)
|
||||
# if not result:
|
||||
# print("RSS Auto-Discovery Engaged")
|
||||
# result = await feed_mode_auto_discovery(url, tree)
|
||||
# if not result:
|
||||
# print("RSS Scan Mode Engaged")
|
||||
# result = await feed_mode_scan(url, tree)
|
||||
# if not result:
|
||||
# print("RSS Arbitrary Mode Engaged")
|
||||
# result = await feed_mode_request(url, tree)
|
||||
# if not result:
|
||||
# result = (
|
||||
# "> {}\nNo news feeds were found for URL."
|
||||
# ).format(url)
|
||||
# # elif msg:
|
||||
# else:
|
||||
# if isinstance(result, str):
|
||||
# return result
|
||||
# elif isinstance(result, list):
|
||||
# url = result[0]
|
||||
# if db_file:
|
||||
# # print("if db_file", db_file)
|
||||
# return await callback(db_file, url)
|
||||
# elif num:
|
||||
# return await callback(url, num)
|
||||
# else:
|
||||
# return await callback(url)
|
||||
|
||||
logger = Logger(__name__)
|
||||
|
||||
async def probe_page(url, document=None):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL.
|
||||
document : TYPE
|
||||
DESCRIPTION.
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : list or str
|
||||
Single URL as list or selection of URLs as str.
|
||||
"""
|
||||
if not document:
|
||||
response = await fetch.http(url)
|
||||
if not response['error']:
|
||||
document = response['content']
|
||||
try:
|
||||
# tree = etree.fromstring(res[0]) # etree is for xml
|
||||
tree = html.fromstring(document)
|
||||
result = None
|
||||
except Exception as e:
|
||||
logger.error(str(e))
|
||||
try:
|
||||
# /questions/15830421/xml-unicode-strings-with-encoding-declaration-are-not-supported
|
||||
# xml = html.fromstring(document.encode('utf-8'))
|
||||
# parser = etree.XMLParser(ns_clean=True, recover=True, encoding='utf-8')
|
||||
# tree = fromstring(xml, parser=parser)
|
||||
|
||||
# /questions/57833080/how-to-fix-unicode-strings-with-encoding-declaration-are-not-supported
|
||||
#tree = html.fromstring(bytes(document, encoding='utf8'))
|
||||
|
||||
# https://twigstechtips.blogspot.com/2013/06/python-lxml-strings-with-encoding.html
|
||||
#parser = etree.XMLParser(recover=True)
|
||||
#tree = etree.fromstring(document, parser)
|
||||
|
||||
tree = html.fromstring(document.encode('utf-8'))
|
||||
result = None
|
||||
except Exception as e:
|
||||
logger.error(str(e))
|
||||
logger.warning("Failed to parse URL as feed for {}.".format(url))
|
||||
result = {'link' : None,
|
||||
'index' : None,
|
||||
'name' : None,
|
||||
'code' : None,
|
||||
'error' : True,
|
||||
'exist' : None}
|
||||
if not result:
|
||||
logger.debug("Feed auto-discovery engaged for {}".format(url))
|
||||
result = await feed_mode_auto_discovery(url, tree)
|
||||
if not result:
|
||||
logger.debug("Feed link scan mode engaged for {}".format(url))
|
||||
result = await feed_mode_scan(url, tree)
|
||||
if not result:
|
||||
logger.debug("Feed arbitrary mode engaged for {}".format(url))
|
||||
result = await feed_mode_guess(url, tree)
|
||||
if not result:
|
||||
logger.debug("No feeds were found for {}".format(url))
|
||||
result = None
|
||||
return result
|
||||
|
||||
|
||||
# TODO Improve scan by gradual decreasing of path
|
||||
async def feed_mode_guess(url, tree):
|
||||
"""
|
||||
Lookup for feeds by pathname using HTTP Requests.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
url : str
|
||||
URL.
|
||||
tree : TYPE
|
||||
DESCRIPTION.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Message with URLs.
|
||||
"""
|
||||
urls = []
|
||||
parted_url = urlsplit(url)
|
||||
paths = config.open_config_file("lists.toml")["pathnames"]
|
||||
# Check whether URL has path (i.e. not root)
|
||||
# Check parted_url.path to avoid error in case root wasn't given
|
||||
# TODO Make more tests
|
||||
if parted_url.path and parted_url.path.split('/')[1]:
|
||||
paths.extend(
|
||||
[".atom", ".feed", ".rdf", ".rss"]
|
||||
) if '.rss' not in paths else -1
|
||||
# if paths.index('.rss'):
|
||||
# paths.extend([".atom", ".feed", ".rdf", ".rss"])
|
||||
parted_url_path = parted_url.path if parted_url.path else '/'
|
||||
for path in paths:
|
||||
address = join_url(url, parted_url_path.split('/')[1] + path)
|
||||
if address not in urls:
|
||||
urls.extend([address])
|
||||
# breakpoint()
|
||||
# print("feed_mode_guess")
|
||||
urls = await process_feed_selection(url, urls)
|
||||
return urls
|
||||
|
||||
|
||||
async def feed_mode_scan(url, tree):
|
||||
"""
|
||||
Scan page for potential feeds by pathname.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
url : str
|
||||
URL.
|
||||
tree : TYPE
|
||||
DESCRIPTION.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Message with URLs.
|
||||
"""
|
||||
urls = []
|
||||
paths = config.open_config_file("lists.toml")["pathnames"]
|
||||
for path in paths:
|
||||
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
|
||||
# xpath_query = "//a[contains(@href,'{}')]".format(path)
|
||||
num = 5
|
||||
xpath_query = (
|
||||
"(//a[contains(@href,'{}')])[position()<={}]"
|
||||
).format(path, num)
|
||||
addresses = tree.xpath(xpath_query)
|
||||
xpath_query = (
|
||||
"(//a[contains(@href,'{}')])[position()>last()-{}]"
|
||||
).format(path, num)
|
||||
addresses += tree.xpath(xpath_query)
|
||||
# NOTE Should number of addresses be limited or
|
||||
# perhaps be N from the start and N from the end
|
||||
for address in addresses:
|
||||
address = join_url(url, address.xpath('@href')[0])
|
||||
if address not in urls:
|
||||
urls.extend([address])
|
||||
# breakpoint()
|
||||
# print("feed_mode_scan")
|
||||
urls = await process_feed_selection(url, urls)
|
||||
return urls
|
||||
|
||||
|
||||
async def feed_mode_auto_discovery(url, tree):
|
||||
"""
|
||||
Lookup for feeds using RSS autodiscovery technique.
|
||||
|
||||
See: https://www.rssboard.org/rss-autodiscovery
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
url : str
|
||||
URL.
|
||||
tree : TYPE
|
||||
DESCRIPTION.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Message with URLs.
|
||||
"""
|
||||
xpath_query = (
|
||||
'//link[(@rel="alternate") and '
|
||||
'(@type="application/atom+xml" or '
|
||||
'@type="application/rdf+xml" or '
|
||||
'@type="application/rss+xml")]'
|
||||
)
|
||||
# xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href"""
|
||||
# xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href"
|
||||
feeds = tree.xpath(xpath_query)
|
||||
if feeds:
|
||||
urls = []
|
||||
for feed in feeds:
|
||||
# # The following code works;
|
||||
# # The following code will catch
|
||||
# # only valid resources (i.e. not 404);
|
||||
# # The following code requires more bandwidth.
|
||||
# res = await fetch.http(feed)
|
||||
# if res[0]:
|
||||
# disco = parse(res[0])
|
||||
# title = disco["feed"]["title"]
|
||||
# msg += "{} \n {} \n\n".format(title, feed)
|
||||
|
||||
# feed_name = feed.xpath('@title')[0]
|
||||
# feed_addr = join_url(url, feed.xpath('@href')[0])
|
||||
|
||||
# if feed_addr.startswith("/"):
|
||||
# feed_addr = url + feed_addr
|
||||
address = join_url(url, feed.xpath('@href')[0])
|
||||
if address not in urls:
|
||||
urls.extend([address])
|
||||
# breakpoint()
|
||||
# print("feed_mode_auto_discovery")
|
||||
urls = await process_feed_selection(url, urls)
|
||||
return urls
|
||||
|
||||
|
||||
# TODO Segregate function into function that returns
|
||||
# URLs (string) and Feeds (dict) and function that
|
||||
# composes text message (string).
|
||||
# Maybe that's not necessary.
|
||||
async def process_feed_selection(url, urls):
|
||||
feeds = {}
|
||||
for i in urls:
|
||||
result = await fetch.http(i)
|
||||
if not result['error']:
|
||||
document = result['content']
|
||||
status_code = result['status_code']
|
||||
if status_code == 200: # NOTE This line might be redundant
|
||||
try:
|
||||
feeds[i] = [parse(document)]
|
||||
except:
|
||||
continue
|
||||
message = (
|
||||
"Web feeds found for {}\n\n```\n"
|
||||
).format(url)
|
||||
urls = []
|
||||
for feed_url in feeds:
|
||||
# try:
|
||||
# res = await fetch.http(feed)
|
||||
# except:
|
||||
# continue
|
||||
feed_name = None
|
||||
if "title" in feeds[feed_url][0]["feed"].keys():
|
||||
feed_name = feeds[feed_url][0].feed.title
|
||||
feed_name = feed_name if feed_name else "Untitled"
|
||||
# feed_name = feed_name if feed_name else urlsplit(feed_url).netloc
|
||||
# AttributeError: 'str' object has no attribute 'entries'
|
||||
if "entries" in feeds[feed_url][0].keys():
|
||||
feed_amnt = feeds[feed_url][0].entries
|
||||
else:
|
||||
continue
|
||||
if feed_amnt:
|
||||
# NOTE Because there could be many false positives
|
||||
# which are revealed in second phase of scan, we
|
||||
# could end with a single feed, which would be
|
||||
# listed instead of fetched, so feed_url_mark is
|
||||
# utilized in order to make fetch possible.
|
||||
# NOTE feed_url_mark was a variable which stored
|
||||
# single URL (probably first accepted as valid)
|
||||
# in order to get an indication whether a single
|
||||
# URL has been fetched, so that the receiving
|
||||
# function will scan that single URL instead of
|
||||
# listing it as a message.
|
||||
url = {'link' : feed_url,
|
||||
'index' : None,
|
||||
'name' : feed_name,
|
||||
'code' : status_code,
|
||||
'error' : False,
|
||||
'exist' : None}
|
||||
urls.extend([url])
|
||||
count = len(urls)
|
||||
if count > 1:
|
||||
result = urls
|
||||
elif count:
|
||||
result = urls[0]
|
||||
else:
|
||||
result = None
|
||||
return result
|
||||
|
||||
|
||||
# def get_discovered_feeds(url, urls):
|
||||
# message = (
|
||||
# "Found {} web feeds:\n\n```\n"
|
||||
# ).format(len(urls))
|
||||
# if len(urls) > 1:
|
||||
# for urls in urls:
|
||||
# message += (
|
||||
# "Title : {}\n"
|
||||
# "Link : {}\n"
|
||||
# "\n"
|
||||
# ).format(url, url.title)
|
||||
# message += (
|
||||
# "```\nThe above feeds were extracted from\n{}"
|
||||
# ).format(url)
|
||||
# elif len(urls) > 0:
|
||||
# result = urls
|
||||
# else:
|
||||
# message = (
|
||||
# "No feeds were found for {}"
|
||||
# ).format(url)
|
||||
# return result
|
||||
|
||||
|
||||
# Test module
|
||||
# TODO ModuleNotFoundError: No module named 'slixfeed'
|
||||
# import slixfeed.fetch as fetch
|
||||
# from slixfeed.action import is_feed, process_feed_selection
|
||||
|
||||
# async def start(url):
|
||||
# while True:
|
||||
# result = await fetch.http(url)
|
||||
# document = result[0]
|
||||
# status = result[1]
|
||||
# if document:
|
||||
# feed = parse(document)
|
||||
# if is_feed(feed):
|
||||
# print(url)
|
||||
# else:
|
||||
# urls = await probe_page(
|
||||
# url, document)
|
||||
# if len(urls) > 1:
|
||||
# await process_feed_selection(urls)
|
||||
# elif urls:
|
||||
# url = urls[0]
|
||||
# else:
|
||||
# response = (
|
||||
# "> {}\nFailed to load URL. Reason: {}"
|
||||
# ).format(url, status)
|
||||
# break
|
||||
# return response
|
||||
|
||||
# url = "https://www.smh.com.au/rssheadlines"
|
||||
# start(url)
|
114
slixfeed/dt.py
114
slixfeed/dt.py
|
@ -1,114 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
https://feedparser.readthedocs.io/en/latest/date-parsing.html
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from dateutil.parser import parse
|
||||
from email.utils import parsedate, parsedate_to_datetime
|
||||
|
||||
def now():
|
||||
"""
|
||||
ISO 8601 Timestamp.
|
||||
|
||||
Returns
|
||||
-------
|
||||
date : ???
|
||||
ISO 8601 Timestamp.
|
||||
"""
|
||||
date = datetime.now().isoformat()
|
||||
return date
|
||||
|
||||
|
||||
def convert_struct_time_to_iso8601(struct_time):
|
||||
date = datetime(*struct_time[:6])
|
||||
date = date.isoformat()
|
||||
return date
|
||||
|
||||
|
||||
def current_date():
|
||||
"""
|
||||
Print MM DD, YYYY (Weekday Time) timestamp.
|
||||
|
||||
Returns
|
||||
-------
|
||||
date : str
|
||||
MM DD, YYYY (Weekday Time) timestamp.
|
||||
"""
|
||||
now = datetime.now()
|
||||
time = now.strftime("%B %d, %Y (%A %T)")
|
||||
return time
|
||||
|
||||
|
||||
def current_time():
|
||||
"""
|
||||
Print HH:MM:SS timestamp.
|
||||
|
||||
Returns
|
||||
-------
|
||||
date : str
|
||||
HH:MM:SS timestamp.
|
||||
"""
|
||||
now = datetime.now()
|
||||
time = now.strftime("%H:%M:%S")
|
||||
return time
|
||||
|
||||
|
||||
def timestamp():
|
||||
"""
|
||||
Print time stamp to be used in filename.
|
||||
|
||||
Returns
|
||||
-------
|
||||
formatted_time : str
|
||||
%Y%m%d-%H%M%S timestamp.
|
||||
"""
|
||||
now = datetime.now()
|
||||
formatted_time = now.strftime("%Y%m%d-%H%M%S")
|
||||
return formatted_time
|
||||
|
||||
|
||||
def validate(date):
|
||||
"""
|
||||
Validate date format.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
date : str
|
||||
Timestamp.
|
||||
|
||||
Returns
|
||||
-------
|
||||
date : str
|
||||
Timestamp.
|
||||
"""
|
||||
try:
|
||||
parse(date)
|
||||
except:
|
||||
date = now()
|
||||
return date
|
||||
|
||||
|
||||
def rfc2822_to_iso8601(date):
|
||||
"""
|
||||
Convert RFC 2822 into ISO 8601.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
date : str
|
||||
RFC 2822 Timestamp.
|
||||
|
||||
Returns
|
||||
-------
|
||||
date : str
|
||||
ISO 8601 Timestamp.
|
||||
"""
|
||||
if parsedate(date):
|
||||
try:
|
||||
date = parsedate_to_datetime(date)
|
||||
date = date.isoformat()
|
||||
except:
|
||||
date = now()
|
||||
return date
|
|
@ -1,19 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
|
||||
TODO
|
||||
|
||||
Move code from sqlite.get_entry_unread
|
||||
|
||||
if num > 1:
|
||||
news_list += (
|
||||
"\n{}\n{}\n{}\n"
|
||||
).format(str(title), str(link), str(feed_title))
|
||||
else:
|
||||
news_list = (
|
||||
"{}\n{}\n{}"
|
||||
).format(str(title), str(link), str(feed_title))
|
||||
|
||||
"""
|
|
@ -1,74 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
|
||||
TODO
|
||||
|
||||
1) is_feed: Look into the type ("atom", "rss2" etc.)
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def title(feed):
|
||||
"""
|
||||
Get title of feed.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL.
|
||||
feed : dict
|
||||
Parsed feed document.
|
||||
|
||||
Returns
|
||||
-------
|
||||
title : str
|
||||
Title or None.
|
||||
"""
|
||||
try:
|
||||
title = feed["feed"]["title"]
|
||||
except:
|
||||
title = None
|
||||
return title
|
||||
|
||||
|
||||
def is_feed(feed):
|
||||
"""
|
||||
Determine whether document is feed or not.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
feed : dict
|
||||
Parsed feed.
|
||||
|
||||
Returns
|
||||
-------
|
||||
val : boolean
|
||||
True or False.
|
||||
"""
|
||||
msg = None
|
||||
if not feed.entries:
|
||||
try:
|
||||
feed["feed"]["title"]
|
||||
val = True
|
||||
# msg = (
|
||||
# "Empty feed for {}"
|
||||
# ).format(url)
|
||||
except:
|
||||
val = False
|
||||
# msg = (
|
||||
# "No entries nor title for {}"
|
||||
# ).format(url)
|
||||
elif feed.bozo:
|
||||
val = False
|
||||
# msg = (
|
||||
# "Bozo detected for {}"
|
||||
# ).format(url)
|
||||
else:
|
||||
val = True
|
||||
# msg = (
|
||||
# "Good feed for {}"
|
||||
# ).format(url)
|
||||
print(msg)
|
||||
return val
|
|
@ -20,9 +20,8 @@ TODO
|
|||
"""
|
||||
|
||||
from asyncio import Lock
|
||||
import slixfeed.dt as dt
|
||||
from slixfeed.log import Logger
|
||||
from slixfeed.url import join_url
|
||||
from slixfeed.utilities import DateAndTime, Url
|
||||
from sqlite3 import connect, Error, IntegrityError
|
||||
import sys
|
||||
import time
|
||||
|
@ -2736,7 +2735,7 @@ def get_invalid_entries(db_file, url, feed):
|
|||
title = feed["feed"]["title"]
|
||||
# Prepare a link to compare
|
||||
if entry.has_key("link"):
|
||||
link = join_url(url, entry.link)
|
||||
link = Url.join_url(url, entry.link)
|
||||
else:
|
||||
link = url
|
||||
# Compare date, link and title
|
||||
|
@ -2745,7 +2744,7 @@ def get_invalid_entries(db_file, url, feed):
|
|||
# print("compare published:", title, link, time)
|
||||
# print("compare published:", entry_title, entry_link, timestamp)
|
||||
# print("============")
|
||||
time = dt.rfc2822_to_iso8601(entry.published)
|
||||
time = DateAndTime.rfc2822_to_iso8601(entry.published)
|
||||
if (entry_title == title and
|
||||
entry_link == link and
|
||||
timestamp == time):
|
||||
|
|
|
@ -29,12 +29,10 @@ from feedparser import parse
|
|||
import os
|
||||
import slixfeed.config as config
|
||||
from slixfeed.config import Config
|
||||
import slixfeed.crawl as crawl
|
||||
import slixfeed.dt as dt
|
||||
import slixfeed.fetch as fetch
|
||||
from slixfeed.log import Logger
|
||||
import slixfeed.sqlite as sqlite
|
||||
from slixfeed.url import join_url, trim_url
|
||||
from slixfeed.utilities import DateAndTime, Url
|
||||
from slixfeed.utilities import Html, MD
|
||||
from slixmpp.xmlstream import ET
|
||||
import sys
|
||||
|
@ -56,7 +54,7 @@ class Feed:
|
|||
if not os.path.isdir(cache_dir + '/' + ext):
|
||||
os.mkdir(cache_dir + '/' + ext)
|
||||
filename = os.path.join(
|
||||
cache_dir, ext, 'slixfeed_' + dt.timestamp() + '.' + ext)
|
||||
cache_dir, ext, 'slixfeed_' + DateAndTime.timestamp() + '.' + ext)
|
||||
db_file = config.get_pathname_to_database(jid_bare)
|
||||
results = sqlite.get_feeds(db_file)
|
||||
match ext:
|
||||
|
@ -220,6 +218,7 @@ class Feed:
|
|||
return node_entry
|
||||
|
||||
|
||||
# Look into the type ("atom", "rss2" etc.)
|
||||
def is_feed(url, feed):
|
||||
"""
|
||||
Determine whether document is feed or not.
|
||||
|
@ -301,7 +300,7 @@ class Feed:
|
|||
if "updated_parsed" in feed["feed"].keys():
|
||||
updated = feed["feed"]["updated_parsed"]
|
||||
try:
|
||||
updated = dt.convert_struct_time_to_iso8601(updated)
|
||||
updated = DateAndTime.convert_struct_time_to_iso8601(updated)
|
||||
except Exception as e:
|
||||
logger.error(str(e))
|
||||
updated = ''
|
||||
|
@ -325,7 +324,7 @@ class Feed:
|
|||
if feed.has_key('updated_parsed'):
|
||||
feed_updated = feed.updated_parsed
|
||||
try:
|
||||
feed_updated = dt.convert_struct_time_to_iso8601(feed_updated)
|
||||
feed_updated = DateAndTime.convert_struct_time_to_iso8601(feed_updated)
|
||||
except Exception as e:
|
||||
logger.error(str(e))
|
||||
feed_updated = None
|
||||
|
@ -357,7 +356,7 @@ class Feed:
|
|||
# NOTE Do not be tempted to return a compact dictionary.
|
||||
# That is, dictionary within dictionary
|
||||
# Return multiple dictionaries in a list or tuple.
|
||||
result = await crawl.probe_page(url, document)
|
||||
result = await FeedDiscovery.probe_page(url, document)
|
||||
if not result:
|
||||
# Get out of the loop with dict indicating error.
|
||||
result_final = {'link' : url,
|
||||
|
@ -437,16 +436,16 @@ class Feed:
|
|||
title = "*** No title ***"
|
||||
if entry.has_key("link"):
|
||||
# link = complete_url(source, entry.link)
|
||||
link = join_url(url, entry.link)
|
||||
link = trim_url(link)
|
||||
link = Url.join_url(url, entry.link)
|
||||
link = Url.trim_url(link)
|
||||
else:
|
||||
link = "*** No link ***"
|
||||
if entry.has_key("published"):
|
||||
date = entry.published
|
||||
date = dt.rfc2822_to_iso8601(date)
|
||||
date = DateAndTime.rfc2822_to_iso8601(date)
|
||||
elif entry.has_key("updated"):
|
||||
date = entry.updated
|
||||
date = dt.rfc2822_to_iso8601(date)
|
||||
date = DateAndTime.rfc2822_to_iso8601(date)
|
||||
else:
|
||||
date = "*** No date ***"
|
||||
response += ("Title : {}\n"
|
||||
|
@ -481,10 +480,10 @@ class Feed:
|
|||
title = '*** No title ***'
|
||||
if entry.has_key("published"):
|
||||
date = entry.published
|
||||
date = dt.rfc2822_to_iso8601(date)
|
||||
date = DateAndTime.rfc2822_to_iso8601(date)
|
||||
elif entry.has_key("updated"):
|
||||
date = entry.updated
|
||||
date = dt.rfc2822_to_iso8601(date)
|
||||
date = DateAndTime.rfc2822_to_iso8601(date)
|
||||
else:
|
||||
date = '*** No date ***'
|
||||
if entry.has_key("summary"):
|
||||
|
@ -500,8 +499,8 @@ class Feed:
|
|||
summary = '*** No summary ***'
|
||||
if entry.has_key("link"):
|
||||
# link = complete_url(source, entry.link)
|
||||
link = join_url(url, entry.link)
|
||||
link = trim_url(link)
|
||||
link = Url.join_url(url, entry.link)
|
||||
link = Url.trim_url(link)
|
||||
else:
|
||||
link = '*** No link ***'
|
||||
response = ("{}\n"
|
||||
|
@ -543,7 +542,7 @@ class Feed:
|
|||
if feed.has_key('updated_parsed'):
|
||||
feed_updated = feed.updated_parsed
|
||||
try:
|
||||
feed_updated = dt.convert_struct_time_to_iso8601(feed_updated)
|
||||
feed_updated = DateAndTime.convert_struct_time_to_iso8601(feed_updated)
|
||||
except:
|
||||
feed_updated = ''
|
||||
else:
|
||||
|
@ -598,18 +597,18 @@ class Feed:
|
|||
logger.debug('{}: entry: {}'.format(function_name, entry.link))
|
||||
if entry.has_key("published"):
|
||||
entry_published = entry.published
|
||||
entry_published = dt.rfc2822_to_iso8601(entry_published)
|
||||
entry_published = DateAndTime.rfc2822_to_iso8601(entry_published)
|
||||
else:
|
||||
entry_published = ''
|
||||
if entry.has_key("updated"):
|
||||
entry_updated = entry.updated
|
||||
entry_updated = dt.rfc2822_to_iso8601(entry_updated)
|
||||
entry_updated = DateAndTime.rfc2822_to_iso8601(entry_updated)
|
||||
else:
|
||||
entry_updated = dt.now()
|
||||
entry_updated = DateAndTime.now()
|
||||
if entry.has_key("link"):
|
||||
# link = complete_url(source, entry.link)
|
||||
entry_link = join_url(feed_url, entry.link)
|
||||
entry_link = trim_url(entry_link)
|
||||
entry_link = Url.join_url(feed_url, entry.link)
|
||||
entry_link = Url.trim_url(entry_link)
|
||||
else:
|
||||
entry_link = feed_url
|
||||
# title = feed["feed"]["title"]
|
||||
|
@ -783,8 +782,8 @@ class Feed:
|
|||
# if (e_link.rel == "enclosure" and
|
||||
# media_type in ("audio", "image", "video")):
|
||||
# media_link = e_link.href
|
||||
# media_link = join_url(url, e_link.href)
|
||||
# media_link = trim_url(media_link)
|
||||
# media_link = Url.join_url(url, e_link.href)
|
||||
# media_link = Url.trim_url(media_link)
|
||||
|
||||
###########################################################
|
||||
|
||||
|
@ -821,6 +820,442 @@ class Feed:
|
|||
return new_entries
|
||||
|
||||
|
||||
"""
|
||||
|
||||
FIXME
|
||||
|
||||
1) https://wiki.pine64.org
|
||||
File "/slixfeed/crawl.py", line 178, in feed_mode_guess
|
||||
address = Url.join_url(url, parted_url.path.split('/')[1] + path)
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~^^^
|
||||
IndexError: list index out of range
|
||||
|
||||
TODO
|
||||
|
||||
1.1) Attempt to scan more paths: /blog/, /news/ etc., including root /
|
||||
Attempt to scan sub domains
|
||||
https://esmailelbob.xyz/en/
|
||||
https://blog.esmailelbob.xyz/feed/
|
||||
|
||||
1.2) Consider utilizing fetch.http_response
|
||||
|
||||
2) DeviantArt
|
||||
https://www.deviantart.com/nedesem/gallery
|
||||
https://backend.deviantart.com/rss.xml?q=gallery:nedesem
|
||||
https://backend.deviantart.com/rss.xml?q=nedesem
|
||||
|
||||
https://www.deviantart.com/search?q=
|
||||
https://backend.deviantart.com/rss.xml?q=search:
|
||||
|
||||
FEEDS CRAWLER PROJECT
|
||||
|
||||
3) Mark redirects for manual check
|
||||
|
||||
Title : JSON Feed
|
||||
Link : https://www.jsonfeed.org/feed.json.xml
|
||||
|
||||
Title : JSON Feed
|
||||
Link : https://www.jsonfeed.org/feed.json/atom.xml
|
||||
|
||||
Title : JSON Feed
|
||||
Link : https://www.jsonfeed.org/feed.json/feed.xml
|
||||
|
||||
Title : JSON Feed
|
||||
Link : https://www.jsonfeed.org/feed.json/feeds/rss/news.xml.php
|
||||
|
||||
Title : JSON Feed
|
||||
Link : https://www.jsonfeed.org/feed.json/jekyll/feed.xml
|
||||
|
||||
Title : JSON Feed
|
||||
Link : https://www.jsonfeed.org/feed.json/news.xml
|
||||
|
||||
Title : JSON Feed
|
||||
Link : https://www.jsonfeed.org/feed.json/news.xml.php
|
||||
|
||||
Title : JSON Feed
|
||||
Link : https://www.jsonfeed.org/feed.json/rdf.xml
|
||||
|
||||
Title : JSON Feed
|
||||
Link : https://www.jsonfeed.org/feed.json/rss.xml
|
||||
|
||||
Title : JSON Feed
|
||||
Link : https://www.jsonfeed.org/feed.json/videos.xml
|
||||
|
||||
|
||||
"""
|
||||
|
||||
from aiohttp import ClientError, ClientSession, ClientTimeout
|
||||
from lxml import etree
|
||||
from lxml import html
|
||||
from lxml.etree import fromstring
|
||||
|
||||
|
||||
class FeedDiscovery:
|
||||
|
||||
|
||||
# TODO Use boolean as a flag to determine whether a single URL was found
|
||||
# async def probe_page(
|
||||
# callback, url, document, num=None, db_file=None):
|
||||
# result = None
|
||||
# try:
|
||||
# # tree = etree.fromstring(res[0]) # etree is for xml
|
||||
# tree = html.fromstring(document)
|
||||
# except:
|
||||
# result = (
|
||||
# "> {}\nFailed to parse URL as feed."
|
||||
# ).format(url)
|
||||
# if not result:
|
||||
# print("RSS Auto-Discovery Engaged")
|
||||
# result = await feed_mode_auto_discovery(url, tree)
|
||||
# if not result:
|
||||
# print("RSS Scan Mode Engaged")
|
||||
# result = await feed_mode_scan(url, tree)
|
||||
# if not result:
|
||||
# print("RSS Arbitrary Mode Engaged")
|
||||
# result = await feed_mode_request(url, tree)
|
||||
# if not result:
|
||||
# result = (
|
||||
# "> {}\nNo news feeds were found for URL."
|
||||
# ).format(url)
|
||||
# # elif msg:
|
||||
# else:
|
||||
# if isinstance(result, str):
|
||||
# return result
|
||||
# elif isinstance(result, list):
|
||||
# url = result[0]
|
||||
# if db_file:
|
||||
# # print("if db_file", db_file)
|
||||
# return await callback(db_file, url)
|
||||
# elif num:
|
||||
# return await callback(url, num)
|
||||
# else:
|
||||
# return await callback(url)
|
||||
|
||||
async def probe_page(url, document=None):
|
||||
"""
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL.
|
||||
document : TYPE
|
||||
DESCRIPTION.
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : list or str
|
||||
Single URL as list or selection of URLs as str.
|
||||
"""
|
||||
if not document:
|
||||
response = await fetch.http(url)
|
||||
if not response['error']:
|
||||
document = response['content']
|
||||
try:
|
||||
# tree = etree.fromstring(res[0]) # etree is for xml
|
||||
tree = html.fromstring(document)
|
||||
result = None
|
||||
except Exception as e:
|
||||
logger.error(str(e))
|
||||
try:
|
||||
# /questions/15830421/xml-unicode-strings-with-encoding-declaration-are-not-supported
|
||||
# xml = html.fromstring(document.encode('utf-8'))
|
||||
# parser = etree.XMLParser(ns_clean=True, recover=True, encoding='utf-8')
|
||||
# tree = fromstring(xml, parser=parser)
|
||||
|
||||
# /questions/57833080/how-to-fix-unicode-strings-with-encoding-declaration-are-not-supported
|
||||
#tree = html.fromstring(bytes(document, encoding='utf8'))
|
||||
|
||||
# https://twigstechtips.blogspot.com/2013/06/python-lxml-strings-with-encoding.html
|
||||
#parser = etree.XMLParser(recover=True)
|
||||
#tree = etree.fromstring(document, parser)
|
||||
|
||||
tree = html.fromstring(document.encode('utf-8'))
|
||||
result = None
|
||||
except Exception as e:
|
||||
logger.error(str(e))
|
||||
logger.warning("Failed to parse URL as feed for {}.".format(url))
|
||||
result = {'link' : None,
|
||||
'index' : None,
|
||||
'name' : None,
|
||||
'code' : None,
|
||||
'error' : True,
|
||||
'exist' : None}
|
||||
if not result:
|
||||
logger.debug("Feed auto-discovery engaged for {}".format(url))
|
||||
result = FeedDiscovery.feed_mode_auto_discovery(url, tree)
|
||||
if not result:
|
||||
logger.debug("Feed link scan mode engaged for {}".format(url))
|
||||
result = FeedDiscovery.feed_mode_scan(url, tree)
|
||||
if not result:
|
||||
logger.debug("Feed arbitrary mode engaged for {}".format(url))
|
||||
result = FeedDiscovery.feed_mode_guess(url, tree)
|
||||
if not result:
|
||||
logger.debug("No feeds were found for {}".format(url))
|
||||
result = None
|
||||
result = await FeedDiscovery.process_feed_selection(url, result)
|
||||
return result
|
||||
|
||||
|
||||
# TODO Improve scan by gradual decreasing of path
|
||||
def feed_mode_guess(url, tree):
|
||||
"""
|
||||
Lookup for feeds by pathname using HTTP Requests.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
url : str
|
||||
URL.
|
||||
tree : TYPE
|
||||
DESCRIPTION.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Message with URLs.
|
||||
"""
|
||||
urls = []
|
||||
parted_url = urlsplit(url)
|
||||
paths = config.open_config_file("lists.toml")["pathnames"]
|
||||
# Check whether URL has path (i.e. not root)
|
||||
# Check parted_url.path to avoid error in case root wasn't given
|
||||
# TODO Make more tests
|
||||
if parted_url.path and parted_url.path.split('/')[1]:
|
||||
paths.extend(
|
||||
[".atom", ".feed", ".rdf", ".rss"]
|
||||
) if '.rss' not in paths else -1
|
||||
# if paths.index('.rss'):
|
||||
# paths.extend([".atom", ".feed", ".rdf", ".rss"])
|
||||
parted_url_path = parted_url.path if parted_url.path else '/'
|
||||
for path in paths:
|
||||
address = Url.join_url(url, parted_url_path.split('/')[1] + path)
|
||||
if address not in urls:
|
||||
urls.extend([address])
|
||||
# breakpoint()
|
||||
# print("feed_mode_guess")
|
||||
return urls
|
||||
|
||||
|
||||
def feed_mode_scan(url, tree):
|
||||
"""
|
||||
Scan page for potential feeds by pathname.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
url : str
|
||||
URL.
|
||||
tree : TYPE
|
||||
DESCRIPTION.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Message with URLs.
|
||||
"""
|
||||
urls = []
|
||||
paths = config.open_config_file("lists.toml")["pathnames"]
|
||||
for path in paths:
|
||||
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
|
||||
# xpath_query = "//a[contains(@href,'{}')]".format(path)
|
||||
num = 5
|
||||
xpath_query = (
|
||||
"(//a[contains(@href,'{}')])[position()<={}]"
|
||||
).format(path, num)
|
||||
addresses = tree.xpath(xpath_query)
|
||||
xpath_query = (
|
||||
"(//a[contains(@href,'{}')])[position()>last()-{}]"
|
||||
).format(path, num)
|
||||
addresses += tree.xpath(xpath_query)
|
||||
# NOTE Should number of addresses be limited or
|
||||
# perhaps be N from the start and N from the end
|
||||
for address in addresses:
|
||||
address = Url.join_url(url, address.xpath('@href')[0])
|
||||
if address not in urls:
|
||||
urls.extend([address])
|
||||
# breakpoint()
|
||||
# print("feed_mode_scan")
|
||||
return urls
|
||||
|
||||
|
||||
def feed_mode_auto_discovery(url, tree):
|
||||
"""
|
||||
Lookup for feeds using RSS autodiscovery technique.
|
||||
|
||||
See: https://www.rssboard.org/rss-autodiscovery
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
url : str
|
||||
URL.
|
||||
tree : TYPE
|
||||
DESCRIPTION.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Message with URLs.
|
||||
"""
|
||||
xpath_query = (
|
||||
'//link[(@rel="alternate") and '
|
||||
'(@type="application/atom+xml" or '
|
||||
'@type="application/rdf+xml" or '
|
||||
'@type="application/rss+xml")]'
|
||||
)
|
||||
# xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href"""
|
||||
# xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href"
|
||||
feeds = tree.xpath(xpath_query)
|
||||
if feeds:
|
||||
urls = []
|
||||
for feed in feeds:
|
||||
# # The following code works;
|
||||
# # The following code will catch
|
||||
# # only valid resources (i.e. not 404);
|
||||
# # The following code requires more bandwidth.
|
||||
# res = await fetch.http(feed)
|
||||
# if res[0]:
|
||||
# disco = parse(res[0])
|
||||
# title = disco["feed"]["title"]
|
||||
# msg += "{} \n {} \n\n".format(title, feed)
|
||||
|
||||
# feed_name = feed.xpath('@title')[0]
|
||||
# feed_addr = Url.join_url(url, feed.xpath('@href')[0])
|
||||
|
||||
# if feed_addr.startswith("/"):
|
||||
# feed_addr = url + feed_addr
|
||||
address = Url.join_url(url, feed.xpath('@href')[0])
|
||||
if address not in urls:
|
||||
urls.extend([address])
|
||||
# breakpoint()
|
||||
# print("feed_mode_auto_discovery")
|
||||
return urls
|
||||
|
||||
|
||||
# TODO Segregate function into function that returns
|
||||
# URLs (string) and Feeds (dict) and function that
|
||||
# composes text message (string).
|
||||
# Maybe that's not necessary.
|
||||
async def process_feed_selection(url, urls):
|
||||
feeds = {}
|
||||
for i in urls:
|
||||
result = await fetch.http(i)
|
||||
if not result['error']:
|
||||
document = result['content']
|
||||
status_code = result['status_code']
|
||||
if status_code == 200: # NOTE This line might be redundant
|
||||
try:
|
||||
feeds[i] = [parse(document)]
|
||||
except:
|
||||
continue
|
||||
message = (
|
||||
"Web feeds found for {}\n\n```\n"
|
||||
).format(url)
|
||||
urls = []
|
||||
for feed_url in feeds:
|
||||
# try:
|
||||
# res = await fetch.http(feed)
|
||||
# except:
|
||||
# continue
|
||||
feed_name = None
|
||||
if "title" in feeds[feed_url][0]["feed"].keys():
|
||||
feed_name = feeds[feed_url][0].feed.title
|
||||
feed_name = feed_name if feed_name else "Untitled"
|
||||
# feed_name = feed_name if feed_name else urlsplit(feed_url).netloc
|
||||
# AttributeError: 'str' object has no attribute 'entries'
|
||||
if "entries" in feeds[feed_url][0].keys():
|
||||
feed_amnt = feeds[feed_url][0].entries
|
||||
else:
|
||||
continue
|
||||
if feed_amnt:
|
||||
# NOTE Because there could be many false positives
|
||||
# which are revealed in second phase of scan, we
|
||||
# could end with a single feed, which would be
|
||||
# listed instead of fetched, so feed_url_mark is
|
||||
# utilized in order to make fetch possible.
|
||||
# NOTE feed_url_mark was a variable which stored
|
||||
# single URL (probably first accepted as valid)
|
||||
# in order to get an indication whether a single
|
||||
# URL has been fetched, so that the receiving
|
||||
# function will scan that single URL instead of
|
||||
# listing it as a message.
|
||||
url = {'link' : feed_url,
|
||||
'index' : None,
|
||||
'name' : feed_name,
|
||||
'code' : status_code,
|
||||
'error' : False,
|
||||
'exist' : None}
|
||||
urls.extend([url])
|
||||
count = len(urls)
|
||||
if count > 1:
|
||||
result = urls
|
||||
elif count:
|
||||
result = urls[0]
|
||||
else:
|
||||
result = None
|
||||
return result
|
||||
|
||||
|
||||
# def get_discovered_feeds(url, urls):
|
||||
# message = (
|
||||
# "Found {} web feeds:\n\n```\n"
|
||||
# ).format(len(urls))
|
||||
# if len(urls) > 1:
|
||||
# for urls in urls:
|
||||
# message += (
|
||||
# "Title : {}\n"
|
||||
# "Link : {}\n"
|
||||
# "\n"
|
||||
# ).format(url, url.title)
|
||||
# message += (
|
||||
# "```\nThe above feeds were extracted from\n{}"
|
||||
# ).format(url)
|
||||
# elif len(urls) > 0:
|
||||
# result = urls
|
||||
# else:
|
||||
# message = (
|
||||
# "No feeds were found for {}"
|
||||
# ).format(url)
|
||||
# return result
|
||||
|
||||
|
||||
# Test module
|
||||
# TODO ModuleNotFoundError: No module named 'slixfeed'
|
||||
# import slixfeed.fetch as fetch
|
||||
# from slixfeed.action import is_feed, process_feed_selection
|
||||
|
||||
# async def start(url):
|
||||
# while True:
|
||||
# result = await fetch.http(url)
|
||||
# document = result[0]
|
||||
# status = result[1]
|
||||
# if document:
|
||||
# feed = parse(document)
|
||||
# if is_feed(feed):
|
||||
# print(url)
|
||||
# else:
|
||||
# urls = await probe_page(
|
||||
# url, document)
|
||||
# if len(urls) > 1:
|
||||
# await process_feed_selection(urls)
|
||||
# elif urls:
|
||||
# url = urls[0]
|
||||
# else:
|
||||
# response = (
|
||||
# "> {}\nFailed to load URL. Reason: {}"
|
||||
# ).format(url, status)
|
||||
# break
|
||||
# return response
|
||||
|
||||
# url = "https://www.smh.com.au/rssheadlines"
|
||||
# start(url)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class FeedTask:
|
||||
|
||||
|
||||
|
@ -921,7 +1356,7 @@ class Opml:
|
|||
ETR.SubElement(head, "generator").text = "Slixfeed"
|
||||
ETR.SubElement(head, "urlPublic").text = (
|
||||
"https://slixfeed.woodpeckersnest.space/")
|
||||
time_stamp = dt.current_time()
|
||||
time_stamp = DateAndTime.current_time()
|
||||
ETR.SubElement(head, "dateCreated").text = time_stamp
|
||||
ETR.SubElement(head, "dateModified").text = time_stamp
|
||||
body = ETR.SubElement(root, "body")
|
||||
|
|
352
slixfeed/url.py
352
slixfeed/url.py
|
@ -1,352 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
|
||||
FIXME
|
||||
|
||||
1) Do not handle base64
|
||||
https://www.lilithsaintcrow.com/2024/02/love-anonymous/
|
||||
data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAABaAAAAeAAQAAAAAQ6M16AAAAAnRSTlMAAHaTzTgAAAFmSURBVBgZ7cEBAQAAAIKg/q92SMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADgWE3LAAGyZmPPAAAAAElFTkSuQmCC
|
||||
https://www.lilithsaintcrow.com/2024/02/love-anonymous//image/png;base64,iVBORw0KGgoAAAANSUhEUgAABaAAAAeAAQAAAAAQ6M16AAAAAnRSTlMAAHaTzTgAAAFmSURBVBgZ7cEBAQAAAIKg/q92SMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADgWE3LAAGyZmPPAAAAAElFTkSuQmCC
|
||||
|
||||
TODO
|
||||
|
||||
1) ActivityPub URL revealer activitypub_to_http.
|
||||
|
||||
2) SQLite preference "instance" for preferred instances.
|
||||
|
||||
"""
|
||||
|
||||
from email.utils import parseaddr
|
||||
import os
|
||||
import random
|
||||
import slixfeed.config as config
|
||||
import slixfeed.fetch as fetch
|
||||
from slixfeed.log import Logger
|
||||
from urllib.parse import (
|
||||
parse_qs,
|
||||
urlencode,
|
||||
urljoin,
|
||||
# urlparse,
|
||||
urlsplit,
|
||||
urlunsplit
|
||||
)
|
||||
|
||||
logger = Logger(__name__)
|
||||
|
||||
|
||||
# NOTE
|
||||
# hostname and protocol are listed as one in file proxies.toml.
|
||||
# Perhaps a better practice would be to have them separated.
|
||||
|
||||
# NOTE
|
||||
# File proxies.toml will remain as it is, in order to be
|
||||
# coordinated with the dataset of project LibRedirect, even
|
||||
# though rule-sets might be adopted (see )Privacy Redirect).
|
||||
|
||||
def get_hostname(url):
|
||||
parted_url = urlsplit(url)
|
||||
hostname = parted_url.netloc
|
||||
if hostname.startswith('www.'): hostname = hostname.replace('www.', '')
|
||||
return hostname
|
||||
|
||||
|
||||
async def replace_hostname(url, url_type):
|
||||
"""
|
||||
Replace hostname.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL.
|
||||
url_type : str
|
||||
"feed" or "link".
|
||||
|
||||
Returns
|
||||
-------
|
||||
url : str
|
||||
URL.
|
||||
"""
|
||||
url_new = None
|
||||
parted_url = urlsplit(url)
|
||||
# protocol = parted_url.scheme
|
||||
hostname = parted_url.netloc
|
||||
hostname = hostname.replace('www.','')
|
||||
pathname = parted_url.path
|
||||
queries = parted_url.query
|
||||
fragment = parted_url.fragment
|
||||
proxies = config.open_config_file('proxies.toml')['proxies']
|
||||
for proxy_name in proxies:
|
||||
proxy = proxies[proxy_name]
|
||||
if hostname in proxy['hostname'] and url_type in proxy['type']:
|
||||
while not url_new:
|
||||
print('>>>')
|
||||
print(url_new)
|
||||
proxy_type = 'clearnet'
|
||||
proxy_list = proxy[proxy_type]
|
||||
if len(proxy_list):
|
||||
# proxy_list = proxies[proxy_name][proxy_type]
|
||||
proxy_url = random.choice(proxy_list)
|
||||
parted_proxy_url = urlsplit(proxy_url)
|
||||
protocol_new = parted_proxy_url.scheme
|
||||
hostname_new = parted_proxy_url.netloc
|
||||
url_new = urlunsplit([protocol_new, hostname_new,
|
||||
pathname, queries, fragment])
|
||||
print(proxy_url)
|
||||
print(url_new)
|
||||
print('>>>')
|
||||
response = await fetch.http(url_new)
|
||||
if (response and
|
||||
response['status_code'] == 200 and
|
||||
# response.reason == 'OK' and
|
||||
url_new.startswith(proxy_url)):
|
||||
break
|
||||
else:
|
||||
config_dir = config.get_default_config_directory()
|
||||
proxies_obsolete_file = config_dir + '/proxies_obsolete.toml'
|
||||
proxies_file = config_dir + '/proxies.toml'
|
||||
if not os.path.isfile(proxies_obsolete_file):
|
||||
config.create_skeleton(proxies_file)
|
||||
config.backup_obsolete(proxies_obsolete_file,
|
||||
proxy_name, proxy_type,
|
||||
proxy_url)
|
||||
try:
|
||||
config.update_proxies(proxies_file, proxy_name,
|
||||
proxy_type, proxy_url)
|
||||
except ValueError as e:
|
||||
logger.error([str(e), proxy_url])
|
||||
url_new = None
|
||||
else:
|
||||
logger.warning('No proxy URLs for {}. '
|
||||
'Please update proxies.toml'
|
||||
.format(proxy_name))
|
||||
url_new = url
|
||||
break
|
||||
return url_new
|
||||
|
||||
|
||||
def remove_tracking_parameters(url):
|
||||
"""
|
||||
Remove queries with tracking parameters.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL.
|
||||
|
||||
Returns
|
||||
-------
|
||||
url : str
|
||||
URL.
|
||||
"""
|
||||
if url.startswith('data:') and ';base64,' in url:
|
||||
return url
|
||||
parted_url = urlsplit(url)
|
||||
protocol = parted_url.scheme
|
||||
hostname = parted_url.netloc
|
||||
pathname = parted_url.path
|
||||
queries = parse_qs(parted_url.query)
|
||||
fragment = parted_url.fragment
|
||||
trackers = config.open_config_file('queries.toml')['trackers']
|
||||
for tracker in trackers:
|
||||
if tracker in queries: del queries[tracker]
|
||||
queries_new = urlencode(queries, doseq=True)
|
||||
url = urlunsplit([protocol, hostname, pathname, queries_new, fragment])
|
||||
return url
|
||||
|
||||
|
||||
def feed_to_http(url):
|
||||
"""
|
||||
Replace scheme FEED by HTTP.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL.
|
||||
|
||||
Returns
|
||||
-------
|
||||
new_url : str
|
||||
URL.
|
||||
"""
|
||||
par_url = urlsplit(url)
|
||||
new_url = urlunsplit(['http', par_url.netloc, par_url.path, par_url.query,
|
||||
par_url.fragment])
|
||||
return new_url
|
||||
|
||||
|
||||
def check_xmpp_uri(uri):
|
||||
"""
|
||||
Check validity of XMPP URI.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
uri : str
|
||||
URI.
|
||||
|
||||
Returns
|
||||
-------
|
||||
jid : str
|
||||
JID or None.
|
||||
"""
|
||||
jid = urlsplit(uri).path
|
||||
if parseaddr(jid)[1] != jid:
|
||||
jid = False
|
||||
return jid
|
||||
|
||||
|
||||
# NOTE Read the documentation
|
||||
# https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin
|
||||
def complete_url(source, link):
|
||||
"""
|
||||
Check if URL is pathname and complete it into URL.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source : str
|
||||
Feed URL.
|
||||
link : str
|
||||
Link URL or pathname.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
URL.
|
||||
"""
|
||||
if link.startswith('data:') and ';base64,' in link:
|
||||
return link
|
||||
if link.startswith('www.'):
|
||||
return 'http://' + link
|
||||
parted_link = urlsplit(link)
|
||||
parted_feed = urlsplit(source)
|
||||
if parted_link.scheme == 'magnet' and parted_link.query:
|
||||
return link
|
||||
if parted_link.scheme and parted_link.netloc:
|
||||
return link
|
||||
if link.startswith('//'):
|
||||
if parted_link.netloc and parted_link.path:
|
||||
new_link = urlunsplit([parted_feed.scheme, parted_link.netloc,
|
||||
parted_link.path, parted_link.query,
|
||||
parted_link.fragment])
|
||||
elif link.startswith('/'):
|
||||
new_link = urlunsplit([parted_feed.scheme, parted_feed.netloc,
|
||||
parted_link.path, parted_link.query,
|
||||
parted_link.fragment])
|
||||
elif link.startswith('../'):
|
||||
pathlink = parted_link.path.split('/')
|
||||
pathfeed = parted_feed.path.split('/')
|
||||
for i in pathlink:
|
||||
if i == '..':
|
||||
if pathlink.index('..') == 0:
|
||||
pathfeed.pop()
|
||||
else:
|
||||
break
|
||||
while pathlink.count('..'):
|
||||
if pathlink.index('..') == 0:
|
||||
pathlink.remove('..')
|
||||
else:
|
||||
break
|
||||
pathlink = '/'.join(pathlink)
|
||||
pathfeed.extend([pathlink])
|
||||
new_link = urlunsplit([parted_feed.scheme, parted_feed.netloc,
|
||||
'/'.join(pathfeed), parted_link.query,
|
||||
parted_link.fragment])
|
||||
else:
|
||||
pathlink = parted_link.path.split('/')
|
||||
pathfeed = parted_feed.path.split('/')
|
||||
if link.startswith('./'):
|
||||
pathlink.remove('.')
|
||||
if not source.endswith('/'):
|
||||
pathfeed.pop()
|
||||
pathlink = '/'.join(pathlink)
|
||||
pathfeed.extend([pathlink])
|
||||
new_link = urlunsplit([parted_feed.scheme, parted_feed.netloc,
|
||||
'/'.join(pathfeed), parted_link.query,
|
||||
parted_link.fragment])
|
||||
return new_link
|
||||
|
||||
|
||||
|
||||
# TODO
|
||||
|
||||
# Feed https://www.ocaml.org/feed.xml
|
||||
# Link %20https://frama-c.com/fc-versions/cobalt.html%20
|
||||
|
||||
# FIXME
|
||||
|
||||
# Feed https://cyber.dabamos.de/blog/feed.rss
|
||||
# Link https://cyber.dabamos.de/blog/#article-2022-07-15
|
||||
|
||||
def join_url(source, link):
|
||||
"""
|
||||
Join base URL with given pathname.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source : str
|
||||
Feed URL.
|
||||
link : str
|
||||
Link URL or pathname.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
URL.
|
||||
"""
|
||||
if link.startswith('data:') and ';base64,' in link:
|
||||
return link
|
||||
if link.startswith('www.'):
|
||||
new_link = 'http://' + link
|
||||
elif link.startswith('%20') and link.endswith('%20'):
|
||||
old_link = link.split('%20')
|
||||
del old_link[0]
|
||||
old_link.pop()
|
||||
new_link = ''.join(old_link)
|
||||
else:
|
||||
new_link = urljoin(source, link)
|
||||
return new_link
|
||||
|
||||
|
||||
def trim_url(url):
|
||||
"""
|
||||
Check URL pathname for double slash.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL.
|
||||
|
||||
Returns
|
||||
-------
|
||||
url : str
|
||||
URL.
|
||||
"""
|
||||
if url.startswith('data:') and ';base64,' in url:
|
||||
return url
|
||||
parted_url = urlsplit(url)
|
||||
protocol = parted_url.scheme
|
||||
hostname = parted_url.netloc
|
||||
pathname = parted_url.path
|
||||
queries = parted_url.query
|
||||
fragment = parted_url.fragment
|
||||
while '//' in pathname:
|
||||
pathname = pathname.replace('//', '/')
|
||||
url = urlunsplit([protocol, hostname, pathname, queries, fragment])
|
||||
return url
|
||||
|
||||
|
||||
def activitypub_to_http(namespace):
|
||||
"""
|
||||
Replace ActivityPub namespace by HTTP.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace : str
|
||||
Namespace.
|
||||
|
||||
Returns
|
||||
-------
|
||||
new_url : str
|
||||
URL.
|
||||
"""
|
|
@ -39,16 +39,27 @@ TODO
|
|||
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from email.utils import parseaddr
|
||||
from dateutil.parser import parse
|
||||
from email.utils import parsedate, parsedate_to_datetime
|
||||
import hashlib
|
||||
import os
|
||||
import random
|
||||
import slixfeed.config as config
|
||||
from slixfeed.config import Config
|
||||
from lxml import etree, html
|
||||
import slixfeed.dt as dt
|
||||
import slixfeed.fetch as fetch
|
||||
from slixfeed.log import Logger
|
||||
import slixfeed.sqlite as sqlite
|
||||
from slixfeed.url import join_url, complete_url
|
||||
import sys
|
||||
from urllib.parse import (
|
||||
parse_qs,
|
||||
urlencode,
|
||||
urljoin,
|
||||
# urlparse,
|
||||
urlsplit,
|
||||
urlunsplit
|
||||
)
|
||||
|
||||
try:
|
||||
import tomllib
|
||||
|
@ -58,6 +69,115 @@ except:
|
|||
logger = Logger(__name__)
|
||||
|
||||
|
||||
class DateAndTime:
|
||||
|
||||
#https://feedparser.readthedocs.io/en/latest/date-parsing.html
|
||||
|
||||
def now():
|
||||
"""
|
||||
ISO 8601 Timestamp.
|
||||
|
||||
Returns
|
||||
-------
|
||||
date : ???
|
||||
ISO 8601 Timestamp.
|
||||
"""
|
||||
date = datetime.now().isoformat()
|
||||
return date
|
||||
|
||||
|
||||
def convert_struct_time_to_iso8601(struct_time):
|
||||
date = datetime(*struct_time[:6])
|
||||
date = date.isoformat()
|
||||
return date
|
||||
|
||||
|
||||
def current_date():
|
||||
"""
|
||||
Print MM DD, YYYY (Weekday Time) timestamp.
|
||||
|
||||
Returns
|
||||
-------
|
||||
date : str
|
||||
MM DD, YYYY (Weekday Time) timestamp.
|
||||
"""
|
||||
now = datetime.now()
|
||||
time = now.strftime("%B %d, %Y (%A %T)")
|
||||
return time
|
||||
|
||||
|
||||
def current_time():
|
||||
"""
|
||||
Print HH:MM:SS timestamp.
|
||||
|
||||
Returns
|
||||
-------
|
||||
date : str
|
||||
HH:MM:SS timestamp.
|
||||
"""
|
||||
now = datetime.now()
|
||||
time = now.strftime("%H:%M:%S")
|
||||
return time
|
||||
|
||||
|
||||
def timestamp():
|
||||
"""
|
||||
Print time stamp to be used in filename.
|
||||
|
||||
Returns
|
||||
-------
|
||||
formatted_time : str
|
||||
%Y%m%d-%H%M%S timestamp.
|
||||
"""
|
||||
now = datetime.now()
|
||||
formatted_time = now.strftime("%Y%m%d-%H%M%S")
|
||||
return formatted_time
|
||||
|
||||
|
||||
def validate(date):
|
||||
"""
|
||||
Validate date format.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
date : str
|
||||
Timestamp.
|
||||
|
||||
Returns
|
||||
-------
|
||||
date : str
|
||||
Timestamp.
|
||||
"""
|
||||
try:
|
||||
parse(date)
|
||||
except:
|
||||
date = DateAndTime.now()
|
||||
return date
|
||||
|
||||
|
||||
def rfc2822_to_iso8601(date):
|
||||
"""
|
||||
Convert RFC 2822 into ISO 8601.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
date : str
|
||||
RFC 2822 Timestamp.
|
||||
|
||||
Returns
|
||||
-------
|
||||
date : str
|
||||
ISO 8601 Timestamp.
|
||||
"""
|
||||
if parsedate(date):
|
||||
try:
|
||||
date = parsedate_to_datetime(date)
|
||||
date = date.isoformat()
|
||||
except:
|
||||
date = DateAndTime.now()
|
||||
return date
|
||||
|
||||
|
||||
class Documentation:
|
||||
|
||||
|
||||
|
@ -120,7 +240,7 @@ class Html:
|
|||
if len(images):
|
||||
image = images[0]
|
||||
image = str(image)
|
||||
image_url = complete_url(url, image)
|
||||
image_url = Url.complete_url(url, image)
|
||||
return image_url
|
||||
|
||||
|
||||
|
@ -224,6 +344,343 @@ class Task:
|
|||
.format(task, jid_bare))
|
||||
|
||||
|
||||
"""
|
||||
|
||||
FIXME
|
||||
|
||||
1) Do not handle base64
|
||||
https://www.lilithsaintcrow.com/2024/02/love-anonymous/
|
||||
data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAABaAAAAeAAQAAAAAQ6M16AAAAAnRSTlMAAHaTzTgAAAFmSURBVBgZ7cEBAQAAAIKg/q92SMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADgWE3LAAGyZmPPAAAAAElFTkSuQmCC
|
||||
https://www.lilithsaintcrow.com/2024/02/love-anonymous//image/png;base64,iVBORw0KGgoAAAANSUhEUgAABaAAAAeAAQAAAAAQ6M16AAAAAnRSTlMAAHaTzTgAAAFmSURBVBgZ7cEBAQAAAIKg/q92SMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADgWE3LAAGyZmPPAAAAAElFTkSuQmCC
|
||||
|
||||
TODO
|
||||
|
||||
1) ActivityPub URL revealer activitypub_to_http.
|
||||
|
||||
2) SQLite preference "instance" for preferred instances.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class Url:
|
||||
|
||||
# NOTE
|
||||
# hostname and protocol are listed as one in file proxies.toml.
|
||||
# Perhaps a better practice would be to have them separated.
|
||||
|
||||
# NOTE
|
||||
# File proxies.toml will remain as it is, in order to be
|
||||
# coordinated with the dataset of project LibRedirect, even
|
||||
# though rule-sets might be adopted (see )Privacy Redirect).
|
||||
|
||||
def get_hostname(url):
|
||||
parted_url = urlsplit(url)
|
||||
hostname = parted_url.netloc
|
||||
if hostname.startswith('www.'): hostname = hostname.replace('www.', '')
|
||||
return hostname
|
||||
|
||||
|
||||
async def replace_hostname(url, url_type):
|
||||
"""
|
||||
Replace hostname.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL.
|
||||
url_type : str
|
||||
"feed" or "link".
|
||||
|
||||
Returns
|
||||
-------
|
||||
url : str
|
||||
URL.
|
||||
"""
|
||||
url_new = None
|
||||
parted_url = urlsplit(url)
|
||||
# protocol = parted_url.scheme
|
||||
hostname = parted_url.netloc
|
||||
hostname = hostname.replace('www.','')
|
||||
pathname = parted_url.path
|
||||
queries = parted_url.query
|
||||
fragment = parted_url.fragment
|
||||
proxies = config.open_config_file('proxies.toml')['proxies']
|
||||
for proxy_name in proxies:
|
||||
proxy = proxies[proxy_name]
|
||||
if hostname in proxy['hostname'] and url_type in proxy['type']:
|
||||
while not url_new:
|
||||
print('>>>')
|
||||
print(url_new)
|
||||
proxy_type = 'clearnet'
|
||||
proxy_list = proxy[proxy_type]
|
||||
if len(proxy_list):
|
||||
# proxy_list = proxies[proxy_name][proxy_type]
|
||||
proxy_url = random.choice(proxy_list)
|
||||
parted_proxy_url = urlsplit(proxy_url)
|
||||
protocol_new = parted_proxy_url.scheme
|
||||
hostname_new = parted_proxy_url.netloc
|
||||
url_new = urlunsplit([protocol_new, hostname_new,
|
||||
pathname, queries, fragment])
|
||||
print(proxy_url)
|
||||
print(url_new)
|
||||
print('>>>')
|
||||
response = await fetch.http(url_new)
|
||||
if (response and
|
||||
response['status_code'] == 200 and
|
||||
# response.reason == 'OK' and
|
||||
url_new.startswith(proxy_url)):
|
||||
break
|
||||
else:
|
||||
config_dir = config.get_default_config_directory()
|
||||
proxies_obsolete_file = config_dir + '/proxies_obsolete.toml'
|
||||
proxies_file = config_dir + '/proxies.toml'
|
||||
if not os.path.isfile(proxies_obsolete_file):
|
||||
config.create_skeleton(proxies_file)
|
||||
config.backup_obsolete(proxies_obsolete_file,
|
||||
proxy_name, proxy_type,
|
||||
proxy_url)
|
||||
try:
|
||||
config.update_proxies(proxies_file, proxy_name,
|
||||
proxy_type, proxy_url)
|
||||
except ValueError as e:
|
||||
logger.error([str(e), proxy_url])
|
||||
url_new = None
|
||||
else:
|
||||
logger.warning('No proxy URLs for {}. '
|
||||
'Please update proxies.toml'
|
||||
.format(proxy_name))
|
||||
url_new = url
|
||||
break
|
||||
return url_new
|
||||
|
||||
|
||||
def remove_tracking_parameters(url):
|
||||
"""
|
||||
Remove queries with tracking parameters.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL.
|
||||
|
||||
Returns
|
||||
-------
|
||||
url : str
|
||||
URL.
|
||||
"""
|
||||
if url.startswith('data:') and ';base64,' in url:
|
||||
return url
|
||||
parted_url = urlsplit(url)
|
||||
protocol = parted_url.scheme
|
||||
hostname = parted_url.netloc
|
||||
pathname = parted_url.path
|
||||
queries = parse_qs(parted_url.query)
|
||||
fragment = parted_url.fragment
|
||||
trackers = config.open_config_file('queries.toml')['trackers']
|
||||
for tracker in trackers:
|
||||
if tracker in queries: del queries[tracker]
|
||||
queries_new = urlencode(queries, doseq=True)
|
||||
url = urlunsplit([protocol, hostname, pathname, queries_new, fragment])
|
||||
return url
|
||||
|
||||
|
||||
def feed_to_http(url):
|
||||
"""
|
||||
Replace scheme FEED by HTTP.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL.
|
||||
|
||||
Returns
|
||||
-------
|
||||
new_url : str
|
||||
URL.
|
||||
"""
|
||||
par_url = urlsplit(url)
|
||||
new_url = urlunsplit(['http', par_url.netloc, par_url.path, par_url.query,
|
||||
par_url.fragment])
|
||||
return new_url
|
||||
|
||||
|
||||
def check_xmpp_uri(uri):
|
||||
"""
|
||||
Check validity of XMPP URI.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
uri : str
|
||||
URI.
|
||||
|
||||
Returns
|
||||
-------
|
||||
jid : str
|
||||
JID or None.
|
||||
"""
|
||||
jid = urlsplit(uri).path
|
||||
if parseaddr(jid)[1] != jid:
|
||||
jid = False
|
||||
return jid
|
||||
|
||||
|
||||
# NOTE Read the documentation
|
||||
# https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin
|
||||
def complete_url(source, link):
|
||||
"""
|
||||
Check if URL is pathname and complete it into URL.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source : str
|
||||
Feed URL.
|
||||
link : str
|
||||
Link URL or pathname.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
URL.
|
||||
"""
|
||||
if link.startswith('data:') and ';base64,' in link:
|
||||
return link
|
||||
if link.startswith('www.'):
|
||||
return 'http://' + link
|
||||
parted_link = urlsplit(link)
|
||||
parted_feed = urlsplit(source)
|
||||
if parted_link.scheme == 'magnet' and parted_link.query:
|
||||
return link
|
||||
if parted_link.scheme and parted_link.netloc:
|
||||
return link
|
||||
if link.startswith('//'):
|
||||
if parted_link.netloc and parted_link.path:
|
||||
new_link = urlunsplit([parted_feed.scheme, parted_link.netloc,
|
||||
parted_link.path, parted_link.query,
|
||||
parted_link.fragment])
|
||||
elif link.startswith('/'):
|
||||
new_link = urlunsplit([parted_feed.scheme, parted_feed.netloc,
|
||||
parted_link.path, parted_link.query,
|
||||
parted_link.fragment])
|
||||
elif link.startswith('../'):
|
||||
pathlink = parted_link.path.split('/')
|
||||
pathfeed = parted_feed.path.split('/')
|
||||
for i in pathlink:
|
||||
if i == '..':
|
||||
if pathlink.index('..') == 0:
|
||||
pathfeed.pop()
|
||||
else:
|
||||
break
|
||||
while pathlink.count('..'):
|
||||
if pathlink.index('..') == 0:
|
||||
pathlink.remove('..')
|
||||
else:
|
||||
break
|
||||
pathlink = '/'.join(pathlink)
|
||||
pathfeed.extend([pathlink])
|
||||
new_link = urlunsplit([parted_feed.scheme, parted_feed.netloc,
|
||||
'/'.join(pathfeed), parted_link.query,
|
||||
parted_link.fragment])
|
||||
else:
|
||||
pathlink = parted_link.path.split('/')
|
||||
pathfeed = parted_feed.path.split('/')
|
||||
if link.startswith('./'):
|
||||
pathlink.remove('.')
|
||||
if not source.endswith('/'):
|
||||
pathfeed.pop()
|
||||
pathlink = '/'.join(pathlink)
|
||||
pathfeed.extend([pathlink])
|
||||
new_link = urlunsplit([parted_feed.scheme, parted_feed.netloc,
|
||||
'/'.join(pathfeed), parted_link.query,
|
||||
parted_link.fragment])
|
||||
return new_link
|
||||
|
||||
|
||||
|
||||
# TODO
|
||||
|
||||
# Feed https://www.ocaml.org/feed.xml
|
||||
# Link %20https://frama-c.com/fc-versions/cobalt.html%20
|
||||
|
||||
# FIXME
|
||||
|
||||
# Feed https://cyber.dabamos.de/blog/feed.rss
|
||||
# Link https://cyber.dabamos.de/blog/#article-2022-07-15
|
||||
|
||||
def join_url(source, link):
|
||||
"""
|
||||
Join base URL with given pathname.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
source : str
|
||||
Feed URL.
|
||||
link : str
|
||||
Link URL or pathname.
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
URL.
|
||||
"""
|
||||
if link.startswith('data:') and ';base64,' in link:
|
||||
return link
|
||||
if link.startswith('www.'):
|
||||
new_link = 'http://' + link
|
||||
elif link.startswith('%20') and link.endswith('%20'):
|
||||
old_link = link.split('%20')
|
||||
del old_link[0]
|
||||
old_link.pop()
|
||||
new_link = ''.join(old_link)
|
||||
else:
|
||||
new_link = urljoin(source, link)
|
||||
return new_link
|
||||
|
||||
|
||||
def trim_url(url):
|
||||
"""
|
||||
Check URL pathname for double slash.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
url : str
|
||||
URL.
|
||||
|
||||
Returns
|
||||
-------
|
||||
url : str
|
||||
URL.
|
||||
"""
|
||||
if url.startswith('data:') and ';base64,' in url:
|
||||
return url
|
||||
parted_url = urlsplit(url)
|
||||
protocol = parted_url.scheme
|
||||
hostname = parted_url.netloc
|
||||
pathname = parted_url.path
|
||||
queries = parted_url.query
|
||||
fragment = parted_url.fragment
|
||||
while '//' in pathname:
|
||||
pathname = pathname.replace('//', '/')
|
||||
url = urlunsplit([protocol, hostname, pathname, queries, fragment])
|
||||
return url
|
||||
|
||||
|
||||
def activitypub_to_http(namespace):
|
||||
"""
|
||||
Replace ActivityPub namespace by HTTP.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace : str
|
||||
Namespace.
|
||||
|
||||
Returns
|
||||
-------
|
||||
new_url : str
|
||||
URL.
|
||||
"""
|
||||
|
||||
|
||||
|
||||
class Utilities:
|
||||
|
||||
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
__version__ = '0.1.81'
|
||||
__version_info__ = (0, 1, 81)
|
||||
__version__ = '0.1.82'
|
||||
__version_info__ = (0, 1, 82)
|
||||
|
|
|
@ -29,16 +29,11 @@ import slixfeed.config as config
|
|||
from slixfeed.config import Config
|
||||
from slixfeed.log import Logger
|
||||
import slixfeed.sqlite as sqlite
|
||||
from slixfeed.url import (
|
||||
remove_tracking_parameters,
|
||||
replace_hostname,
|
||||
)
|
||||
from slixfeed.syndication import FeedTask
|
||||
from slixfeed.utilities import Documentation, Html, MD, Task
|
||||
from slixfeed.utilities import Documentation, Html, MD, Task, Url
|
||||
from slixfeed.xmpp.commands import XmppCommands
|
||||
from slixfeed.xmpp.message import XmppMessage
|
||||
from slixfeed.xmpp.presence import XmppPresence
|
||||
from slixfeed.xmpp.privilege import is_operator, is_moderator
|
||||
from slixfeed.xmpp.status import XmppStatusTask
|
||||
from slixfeed.xmpp.upload import XmppUpload
|
||||
from slixfeed.xmpp.utilities import XmppUtilities
|
||||
|
@ -89,7 +84,7 @@ class XmppChat:
|
|||
if (message['muc']['nick'] == self.alias):
|
||||
return
|
||||
jid_full = str(message['from'])
|
||||
if not is_moderator(self, jid_bare, jid_full):
|
||||
if not XmppUtilities.is_moderator(self, jid_bare, jid_full):
|
||||
return
|
||||
|
||||
if message['type'] == 'groupchat':
|
||||
|
@ -115,7 +110,7 @@ class XmppChat:
|
|||
# return
|
||||
# approved = False
|
||||
jid_full = str(message['from'])
|
||||
if not is_moderator(self, jid_bare, jid_full):
|
||||
if not XmppUtilities.is_moderator(self, jid_bare, jid_full):
|
||||
return
|
||||
# if role == 'moderator':
|
||||
# approved = True
|
||||
|
@ -257,7 +252,7 @@ class XmppChat:
|
|||
response = 'Current value for archive: '
|
||||
response += XmppCommands.get_archive(self, jid_bare)
|
||||
case _ if command_lowercase.startswith('bookmark +'):
|
||||
if is_operator(self, jid_bare):
|
||||
if XmppUtilities.is_operator(self, jid_bare):
|
||||
muc_jid = command[11:]
|
||||
response = await XmppCommands.bookmark_add(
|
||||
self, muc_jid)
|
||||
|
@ -265,7 +260,7 @@ class XmppChat:
|
|||
response = ('This action is restricted. '
|
||||
'Type: adding bookmarks.')
|
||||
case _ if command_lowercase.startswith('bookmark -'):
|
||||
if is_operator(self, jid_bare):
|
||||
if XmppUtilities.is_operator(self, jid_bare):
|
||||
muc_jid = command[11:]
|
||||
response = await XmppCommands.bookmark_del(
|
||||
self, muc_jid)
|
||||
|
@ -273,7 +268,7 @@ class XmppChat:
|
|||
response = ('This action is restricted. '
|
||||
'Type: removing bookmarks.')
|
||||
case 'bookmarks':
|
||||
if is_operator(self, jid_bare):
|
||||
if XmppUtilities.is_operator(self, jid_bare):
|
||||
response = await XmppCommands.print_bookmarks(self)
|
||||
else:
|
||||
response = ('This action is restricted. '
|
||||
|
@ -333,7 +328,7 @@ class XmppChat:
|
|||
XmppPresence.send(self, jid_bare, status_message,
|
||||
status_type=status_type)
|
||||
filename, response = XmppCommands.export_feeds(
|
||||
self, jid_bare, ext)
|
||||
jid_bare, ext)
|
||||
url = await XmppUpload.start(self, jid_bare, filename)
|
||||
# response = (
|
||||
# 'Feeds exported successfully to {}.\n{}'
|
||||
|
@ -388,7 +383,7 @@ class XmppChat:
|
|||
response = await XmppCommands.pubsub_list(self, jid)
|
||||
response += '```'
|
||||
case _ if command_lowercase.startswith('pubsub send'):
|
||||
if is_operator(self, jid_bare):
|
||||
if XmppUtilities.is_operator(self, jid_bare):
|
||||
info = command[12:]
|
||||
info = info.split(' ')
|
||||
jid = info[0]
|
||||
|
@ -461,7 +456,7 @@ class XmppChat:
|
|||
await XmppChatAction.send_unread_items(self, jid_bare, num)
|
||||
XmppStatusTask.restart_task(self, jid_bare)
|
||||
case _ if command_lowercase.startswith('node delete'):
|
||||
if is_operator(self, jid_bare):
|
||||
if XmppUtilities.is_operator(self, jid_bare):
|
||||
info = command[12:]
|
||||
info = info.split(' ')
|
||||
response = XmppCommands.node_delete(self, info)
|
||||
|
@ -469,7 +464,7 @@ class XmppChat:
|
|||
response = ('This action is restricted. '
|
||||
'Type: sending news to PubSub.')
|
||||
case _ if command_lowercase.startswith('node purge'):
|
||||
if is_operator(self, jid_bare):
|
||||
if XmppUtilities.is_operator(self, jid_bare):
|
||||
info = command[11:]
|
||||
info = info.split(' ')
|
||||
response = XmppCommands.node_purge(self, info)
|
||||
|
@ -770,8 +765,8 @@ class XmppChatAction:
|
|||
else:
|
||||
summary = '*** No summary ***'
|
||||
link = result[2]
|
||||
link = remove_tracking_parameters(link)
|
||||
link = await replace_hostname(link, "link") or link
|
||||
link = Url.remove_tracking_parameters(link)
|
||||
link = await Url.replace_hostname(link, "link") or link
|
||||
feed_id = result[4]
|
||||
# news_item = ("\n{}\n{}\n{} [{}]\n").format(str(title), str(link),
|
||||
# str(feed_title), str(ix))
|
||||
|
|
|
@ -44,14 +44,11 @@ import slixmpp
|
|||
|
||||
import slixfeed.config as config
|
||||
from slixfeed.config import Config
|
||||
import slixfeed.crawl as crawl
|
||||
import slixfeed.dt as dt
|
||||
import slixfeed.fetch as fetch
|
||||
from slixfeed.log import Logger
|
||||
import slixfeed.sqlite as sqlite
|
||||
from slixfeed.syndication import Feed, FeedTask, Opml
|
||||
import slixfeed.url as uri
|
||||
from slixfeed.utilities import Html, Task, Utilities
|
||||
from slixfeed.syndication import Feed, FeedDiscovery, FeedTask, Opml
|
||||
from slixfeed.utilities import DateAndTime, Html, Task, Url, Utilities
|
||||
from slixfeed.version import __version__
|
||||
from slixfeed.xmpp.bookmark import XmppBookmark
|
||||
from slixfeed.xmpp.chat import XmppChat, XmppChatTask
|
||||
|
@ -62,7 +59,6 @@ from slixfeed.xmpp.message import XmppMessage
|
|||
from slixfeed.xmpp.muc import XmppMuc
|
||||
from slixfeed.xmpp.groupchat import XmppGroupchat
|
||||
from slixfeed.xmpp.presence import XmppPresence
|
||||
from slixfeed.xmpp.privilege import is_operator, is_access
|
||||
import slixfeed.xmpp.profile as profile
|
||||
from slixfeed.xmpp.publish import XmppPubsub, XmppPubsubAction, XmppPubsubTask
|
||||
from slixfeed.xmpp.roster import XmppRoster
|
||||
|
@ -791,7 +787,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
# )
|
||||
|
||||
# NOTE https://codeberg.org/poezio/slixmpp/issues/3515
|
||||
# if is_operator(self, jid_bare):
|
||||
# if XmppUtilities.is_operator(self, jid_bare):
|
||||
self['xep_0050'].add_command(node='subscription',
|
||||
name='🪶️ Subscribe',
|
||||
handler=self._handle_subscription_add)
|
||||
|
@ -842,7 +838,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
.format(function_name, jid_full))
|
||||
jid_bare = session['from'].bare
|
||||
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
||||
if is_access(self, jid_bare, jid_full, chat_type):
|
||||
if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
|
||||
form = self['xep_0004'].make_form('form', 'PubSub')
|
||||
form['instructions'] = 'Publish news items to PubSub nodes.'
|
||||
options = form.add_field(desc='From which medium source do you '
|
||||
|
@ -863,7 +859,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
session['prev'] = None
|
||||
session['payload'] = form
|
||||
else:
|
||||
if not is_operator(self, jid_bare):
|
||||
if not XmppUtilities.is_operator(self, jid_bare):
|
||||
text_warn = 'This resource is restricted to operators.'
|
||||
elif chat_type == 'groupchat':
|
||||
text_warn = ('This resource is restricted to moderators of {}.'
|
||||
|
@ -883,7 +879,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
.format(function_name, jid_full))
|
||||
jid_bare = session['from'].bare
|
||||
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
||||
if is_access(self, jid_bare, jid_full, chat_type):
|
||||
if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
|
||||
values = payload['values']
|
||||
form = self['xep_0004'].make_form('form', 'Publish')
|
||||
form['instructions'] = ('Choose a PubSub Jabber ID and verify '
|
||||
|
@ -971,7 +967,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
session['has_next'] = True
|
||||
session['prev'] = self._handle_publish
|
||||
else:
|
||||
if not is_operator(self, jid_bare):
|
||||
if not XmppUtilities.is_operator(self, jid_bare):
|
||||
text_warn = 'This resource is restricted to operators.'
|
||||
elif chat_type == 'groupchat':
|
||||
text_warn = ('This resource is restricted to moderators of {}.'
|
||||
|
@ -994,7 +990,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
print(values['jid'])
|
||||
jid = values['jid'] if 'jid' in values else None
|
||||
jid_bare = session['from'].bare
|
||||
if jid != jid_bare and not is_operator(self, jid_bare):
|
||||
if jid != jid_bare and not XmppUtilities.is_operator(self, jid_bare):
|
||||
text_warn = ('Posting to {} is restricted to operators only.'
|
||||
.format(jid_bare)) # Should not this be self.boundjid.bare?
|
||||
session['allow_prev'] = False
|
||||
|
@ -1065,7 +1061,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
ixs = values['entries']
|
||||
#if jid: jid = jid[0] if isinstance(jid, list) else jid
|
||||
jid_bare = session['from'].bare
|
||||
if jid != jid_bare and not is_operator(self, jid_bare):
|
||||
if jid != jid_bare and not XmppUtilities.is_operator(self, jid_bare):
|
||||
# TODO Report incident
|
||||
text_warn = 'You are not suppose to be here.'
|
||||
session['allow_prev'] = False
|
||||
|
@ -1100,7 +1096,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
values = payload['values']
|
||||
jid = values['jid'] if 'jid' in values else None
|
||||
jid_bare = session['from'].bare
|
||||
if jid != jid_bare and not is_operator(self, jid_bare):
|
||||
if jid != jid_bare and not XmppUtilities.is_operator(self, jid_bare):
|
||||
# TODO Report incident
|
||||
text_warn = 'You are not suppose to be here.'
|
||||
# text_warn = ('Posting to {} is restricted to operators only.'
|
||||
|
@ -1119,7 +1115,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
if jid == self.boundjid.bare:
|
||||
node = 'urn:xmpp:microblog:0'
|
||||
else:
|
||||
node = uri.get_hostname(url)
|
||||
node = Url.get_hostname(url)
|
||||
form = self['xep_0004'].make_form('form', 'Publish')
|
||||
while True:
|
||||
result = await fetch.http(url)
|
||||
|
@ -1137,7 +1133,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
if "title" in feed["feed"].keys():
|
||||
title = feed["feed"]["title"]
|
||||
else:
|
||||
title = uri.get_hostname(url)
|
||||
title = Url.get_hostname(url)
|
||||
entries = feed.entries
|
||||
entry_ix = 0
|
||||
for entry in entries:
|
||||
|
@ -1146,10 +1142,10 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
else:
|
||||
if entry.has_key("published"):
|
||||
title = entry.published
|
||||
title = dt.rfc2822_to_iso8601(title)
|
||||
title = DateAndTime.rfc2822_to_iso8601(title)
|
||||
elif entry.has_key("updated"):
|
||||
title = entry.updated
|
||||
title = dt.rfc2822_to_iso8601(title)
|
||||
title = DateAndTime.rfc2822_to_iso8601(title)
|
||||
else:
|
||||
title = "*** No title ***"
|
||||
options.addOption(title, str(entry_ix))
|
||||
|
@ -1164,7 +1160,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
session['payload'] = form
|
||||
break
|
||||
else:
|
||||
result = await crawl.probe_page(url, document)
|
||||
result = await FeedDiscovery.probe_page(url, document)
|
||||
if isinstance(result, list):
|
||||
results = result
|
||||
form['instructions'] = ('Discovered {} subscriptions '
|
||||
|
@ -1225,7 +1221,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
jid = values['jid'][0] if 'jid' in values else None
|
||||
#if jid: jid = jid[0] if isinstance(jid, list) else jid
|
||||
jid_bare = session['from'].bare
|
||||
if jid != jid_bare and not is_operator(self, jid_bare):
|
||||
if jid != jid_bare and not XmppUtilities.is_operator(self, jid_bare):
|
||||
# TODO Report incident
|
||||
text_warn = 'You are not suppose to be here.'
|
||||
session['allow_prev'] = False
|
||||
|
@ -1262,10 +1258,10 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
# else:
|
||||
# if feed.entries[entry].has_key("published"):
|
||||
# title = feed.entries[entry].published
|
||||
# title = dt.rfc2822_to_iso8601(title)
|
||||
# title = DateAndTime.rfc2822_to_iso8601(title)
|
||||
# elif feed.entries[entry].has_key("updated"):
|
||||
# title = feed.entries[entry].updated
|
||||
# title = dt.rfc2822_to_iso8601(title)
|
||||
# title = DateAndTime.rfc2822_to_iso8601(title)
|
||||
# else:
|
||||
# title = "*** No title ***"
|
||||
# if feed.entries[entry].has_key("summary"):
|
||||
|
@ -1393,7 +1389,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
.format(function_name, jid_full))
|
||||
jid_bare = session['from'].bare
|
||||
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
||||
if is_access(self, jid_bare, jid_full, chat_type):
|
||||
if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
|
||||
jid = session['from'].bare
|
||||
db_file = config.get_pathname_to_database(jid_bare)
|
||||
form = self['xep_0004'].make_form('form', 'Filters')
|
||||
|
@ -1432,7 +1428,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
session['next'] = self._handle_filters_complete
|
||||
session['payload'] = form
|
||||
else:
|
||||
if not is_operator(self, jid_bare):
|
||||
if not XmppUtilities.is_operator(self, jid_bare):
|
||||
text_warn = 'This resource is restricted to operators.'
|
||||
elif chat_type == 'groupchat':
|
||||
text_warn = ('This resource is restricted to moderators of {}.'
|
||||
|
@ -1502,7 +1498,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
.format(function_name, jid_full))
|
||||
jid_bare = session['from'].bare
|
||||
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
||||
if is_access(self, jid_bare, jid_full, chat_type):
|
||||
if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
|
||||
form = self['xep_0004'].make_form('form', 'Subscribe')
|
||||
# form['instructions'] = 'Add a new custom subscription.'
|
||||
form.add_field(desc='Enter a URL.',
|
||||
|
@ -1517,7 +1513,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
required=True,
|
||||
value='http://',
|
||||
var='subscription')
|
||||
if is_operator(self, jid_bare):
|
||||
if XmppUtilities.is_operator(self, jid_bare):
|
||||
# form['instructions'] = ('Special section for operators:\n'
|
||||
# 'This section allows you to add '
|
||||
# 'subscriptions for a JID of your '
|
||||
|
@ -1544,7 +1540,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
session['prev'] = None
|
||||
session['payload'] = form
|
||||
else:
|
||||
if not is_operator(self, jid_bare):
|
||||
if not XmppUtilities.is_operator(self, jid_bare):
|
||||
text_warn = 'This resource is restricted to operators.'
|
||||
elif chat_type == 'groupchat':
|
||||
text_warn = ('This resource is restricted to moderators of {}.'
|
||||
|
@ -1576,7 +1572,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
# options.addOption('News by tag', 'tag')
|
||||
options.addOption('Rejected', 'reject')
|
||||
options.addOption('Unread', 'unread')
|
||||
if is_operator(self, jid_bare):
|
||||
if XmppUtilities.is_operator(self, jid_bare):
|
||||
# form['instructions'] = ('Special section for operators:\n'
|
||||
# 'This section allows you to view news items '
|
||||
# 'of a JID of your choice.')
|
||||
|
@ -1617,7 +1613,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
jid_bare = session['from'].bare
|
||||
values = payload['values']
|
||||
form = self['xep_0004'].make_form('form', 'Updates')
|
||||
if is_operator(self, jid_bare) and 'jid' in values:
|
||||
if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
|
||||
jid_bare = values['jid']
|
||||
form.add_field(var='jid',
|
||||
ftype='hidden',
|
||||
|
@ -1675,7 +1671,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
ix = values['update']
|
||||
jid_bare = session['from'].bare
|
||||
form = self['xep_0004'].make_form('form', 'Article')
|
||||
if is_operator(self, jid_bare) and 'jid' in values:
|
||||
if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
|
||||
jid = values['jid']
|
||||
jid_bare = jid[0] if isinstance(jid, list) else jid
|
||||
form.add_field(var='jid',
|
||||
|
@ -1688,9 +1684,9 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
url = sqlite.get_entry_url(db_file, ix)
|
||||
url = url[0] # TODO Handle a situation when index is no longer exist
|
||||
logger.debug('Original URL: {}'.format(url))
|
||||
url = uri.remove_tracking_parameters(url)
|
||||
url = Url.remove_tracking_parameters(url)
|
||||
logger.debug('Processed URL (tracker removal): {}'.format(url))
|
||||
url = (await uri.replace_hostname(url, 'link')) or url
|
||||
url = (await Url.replace_hostname(url, 'link')) or url
|
||||
logger.debug('Processed URL (replace hostname): {}'.format(url))
|
||||
# result = await fetch.http(url)
|
||||
# if 'content' in result:
|
||||
|
@ -1750,7 +1746,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
identifier = values['identifier'] if 'identifier' in values else None
|
||||
url = values['subscription']
|
||||
jid_bare = session['from'].bare
|
||||
if is_operator(self, jid_bare) and 'jid' in values:
|
||||
if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
|
||||
custom_jid = values['jid']
|
||||
jid_bare = custom_jid[0] if isinstance(custom_jid, list) else jid_bare
|
||||
# jid_bare = custom_jid[0] if custom_jid else jid_bare
|
||||
|
@ -1780,7 +1776,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
session['prev'] = None
|
||||
# elif not identifier:
|
||||
# counter = 0
|
||||
# hostname = uri.get_hostname(url)
|
||||
# hostname = Url.get_hostname(url)
|
||||
# identifier = hostname + ':' + str(counter)
|
||||
# while True:
|
||||
# if sqlite.check_identifier_exist(db_file, identifier):
|
||||
|
@ -1797,7 +1793,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
exist_count = 0
|
||||
for url in urls:
|
||||
counter = 0
|
||||
hostname = uri.get_hostname(url)
|
||||
hostname = Url.get_hostname(url)
|
||||
identifier = hostname + ':' + str(counter)
|
||||
while True:
|
||||
if sqlite.check_identifier_exist(db_file, identifier):
|
||||
|
@ -1830,7 +1826,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
if isinstance(url, list):
|
||||
url = url[0]
|
||||
counter = 0
|
||||
hostname = uri.get_hostname(url)
|
||||
hostname = Url.get_hostname(url)
|
||||
identifier = hostname + ':' + str(counter)
|
||||
while True:
|
||||
if sqlite.check_identifier_exist(db_file, identifier):
|
||||
|
@ -1956,7 +1952,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
.format(function_name, jid_full))
|
||||
jid_bare = session['from'].bare
|
||||
values = payload['values']
|
||||
if is_operator(self, jid_bare) and 'jid' in values:
|
||||
if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
|
||||
jid_bare = values['jid'][0]
|
||||
del values['jid']
|
||||
db_file = config.get_pathname_to_database(jid_bare)
|
||||
|
@ -1981,7 +1977,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
.format(function_name, jid_full))
|
||||
jid_bare = session['from'].bare
|
||||
values = payload['values']
|
||||
if is_operator(self, jid_bare) and 'jid' in values:
|
||||
if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
|
||||
jid_bare = values['jid'][0]
|
||||
del values['jid']
|
||||
db_file = config.get_pathname_to_database(jid_bare)
|
||||
|
@ -2022,7 +2018,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
.format(function_name, jid_full))
|
||||
jid_bare = session['from'].bare
|
||||
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
||||
if is_access(self, jid_bare, jid_full, chat_type):
|
||||
if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
|
||||
form = self['xep_0004'].make_form('form', 'Discover & Search')
|
||||
form['instructions'] = 'Discover news subscriptions of all kinds'
|
||||
options = form.add_field(desc='Select type of search.',
|
||||
|
@ -2039,7 +2035,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
session['payload'] = form
|
||||
session['prev'] = None
|
||||
else:
|
||||
if not is_operator(self, jid_bare):
|
||||
if not XmppUtilities.is_operator(self, jid_bare):
|
||||
text_warn = 'This resource is restricted to operators.'
|
||||
elif chat_type == 'groupchat':
|
||||
text_warn = ('This resource is restricted to moderators of {}.'
|
||||
|
@ -2146,7 +2142,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
.format(function_name, jid_full))
|
||||
jid_bare = session['from'].bare
|
||||
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
||||
if is_access(self, jid_bare, jid_full, chat_type):
|
||||
if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
|
||||
form = self['xep_0004'].make_form('form', 'Subscriptions')
|
||||
form['instructions'] = ('Browse, view, toggle or remove '
|
||||
'tags and subscriptions.')
|
||||
|
@ -2160,7 +2156,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
options.addOption('Browse tags', 'tag')
|
||||
options.addOption('Remove subscriptions', 'delete')
|
||||
options.addOption('Toggle subscriptions', 'toggle')
|
||||
if is_operator(self, jid_bare):
|
||||
if XmppUtilities.is_operator(self, jid_bare):
|
||||
form['instructions'] = None
|
||||
# form['instructions'] = ('Special section for operators:\n'
|
||||
# 'This section allows you to change '
|
||||
|
@ -2190,7 +2186,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
session['next'] = self._handle_subscriptions_result
|
||||
session['has_next'] = True
|
||||
else:
|
||||
if not is_operator(self, jid_bare):
|
||||
if not XmppUtilities.is_operator(self, jid_bare):
|
||||
text_warn = 'This resource is restricted to operators.'
|
||||
elif chat_type == 'groupchat':
|
||||
text_warn = ('This resource is restricted to moderators of {}.'
|
||||
|
@ -2212,7 +2208,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
values = payload['values']
|
||||
jid_bare = session['from'].bare
|
||||
form = self['xep_0004'].make_form('form', 'Subscriptions')
|
||||
if is_operator(self, jid_bare) and 'jid' in values:
|
||||
if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
|
||||
jid_bare = values['jid']
|
||||
form.add_field(ftype='hidden',
|
||||
value=jid_bare,
|
||||
|
@ -2306,7 +2302,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
form = self['xep_0004'].make_form('form', 'Subscriptions')
|
||||
jid_bare = session['from'].bare
|
||||
values = payload['values']
|
||||
if is_operator(self, jid_bare) and 'jid' in values:
|
||||
if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
|
||||
jid_bare = values['jid'][0]
|
||||
form.add_field(ftype='hidden',
|
||||
value=jid_bare,
|
||||
|
@ -2344,7 +2340,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
form = self['xep_0004'].make_form('form', 'Subscription')
|
||||
jid_bare = session['from'].bare
|
||||
values = payload['values']
|
||||
if is_operator(self, jid_bare) and 'jid' in values:
|
||||
if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
|
||||
jid_bare = values['jid'][0] if values['jid'] else jid_bare
|
||||
form.add_field(ftype='hidden',
|
||||
value=jid_bare,
|
||||
|
@ -2440,7 +2436,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
.format(function_name, jid_full))
|
||||
jid_bare = session['from'].bare
|
||||
values = payload['values']
|
||||
if is_operator(self, jid_bare) and 'jid' in values:
|
||||
if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
|
||||
jid_bare = values['jid'][0]
|
||||
db_file = config.get_pathname_to_database(jid_bare)
|
||||
# url = values['url']
|
||||
|
@ -2506,14 +2502,14 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
.format(function_name, jid_full))
|
||||
jid_bare = session['from'].bare
|
||||
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
||||
if is_access(self, jid_bare, jid_full, chat_type):
|
||||
if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
|
||||
form = self['xep_0004'].make_form('form', 'Advanced')
|
||||
form['instructions'] = 'Extended options'
|
||||
options = form.add_field(ftype='list-single',
|
||||
label='Choose',
|
||||
required=True,
|
||||
var='option')
|
||||
if is_operator(self, jid_bare):
|
||||
if XmppUtilities.is_operator(self, jid_bare):
|
||||
options.addOption('Administration', 'admin')
|
||||
# options.addOption('Activity', 'activity')
|
||||
# options.addOption('Filters', 'filter')
|
||||
|
@ -2527,7 +2523,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
session['next'] = self._handle_advanced_result
|
||||
session['prev'] = self._handle_advanced
|
||||
else:
|
||||
if not is_operator(self, jid_bare):
|
||||
if not XmppUtilities.is_operator(self, jid_bare):
|
||||
text_warn = 'This resource is restricted to operators.'
|
||||
elif chat_type == 'groupchat':
|
||||
text_warn = ('This resource is restricted to moderators of {}.'
|
||||
|
@ -2556,7 +2552,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
case 'admin':
|
||||
# NOTE Even though this check is already conducted on previous
|
||||
# form, this check is being done just in case.
|
||||
if is_operator(self, jid_bare):
|
||||
if XmppUtilities.is_operator(self, jid_bare):
|
||||
if self.is_component:
|
||||
# NOTE This will be changed with XEP-0222 XEP-0223
|
||||
text_info = ('Subscriber management options are '
|
||||
|
@ -2589,7 +2585,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
else:
|
||||
logger.warning('An unauthorized attempt to access '
|
||||
'bookmarks has been detected for JID {} at '
|
||||
'{}'.format(jid_bare, dt.timestamp()))
|
||||
'{}'.format(jid_bare, DateAndTime.timestamp()))
|
||||
text_warn = 'This resource is restricted.'
|
||||
session['notes'] = [['warn', text_warn]]
|
||||
session['has_next'] = False
|
||||
|
@ -2617,7 +2613,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
required=True,
|
||||
var='url')
|
||||
url['validate']['datatype'] = 'xs:anyURI'
|
||||
if is_operator(self, jid_bare):
|
||||
if XmppUtilities.is_operator(self, jid_bare):
|
||||
form.add_field(ftype='fixed',
|
||||
label='* Operators',
|
||||
desc='This section allows you to import '
|
||||
|
@ -2651,7 +2647,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
options.addOption('OPML', 'opml')
|
||||
# options.addOption('HTML', 'html')
|
||||
# options.addOption('XBEL', 'xbel')
|
||||
if is_operator(self, jid_bare):
|
||||
if XmppUtilities.is_operator(self, jid_bare):
|
||||
# form['instructions'] = ('Special section for operators:\n'
|
||||
# 'This section allows you to '
|
||||
# 'import and export subscriptions '
|
||||
|
@ -2841,7 +2837,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
url = values['url']
|
||||
if url.startswith('http') and url.endswith('.opml'):
|
||||
jid_bare = session['from'].bare
|
||||
if is_operator(self, jid_bare) and 'jid' in values:
|
||||
if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
|
||||
jid = values['jid']
|
||||
jid_bare = jid[0] if isinstance(jid, list) else jid
|
||||
db_file = config.get_pathname_to_database(jid_bare)
|
||||
|
@ -2882,7 +2878,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
# form['type'] = 'result'
|
||||
values = payload['values']
|
||||
jid_bare = session['from'].bare
|
||||
if is_operator(self, jid_bare) and 'jid' in values:
|
||||
if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
|
||||
jid = values['jid']
|
||||
jid_bare = jid[0] if isinstance(jid, list) else jid
|
||||
# form = self['xep_0004'].make_form('result', 'Done')
|
||||
|
@ -2915,7 +2911,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
jid_bare = session['from'].bare
|
||||
jid_full = str(session['from'])
|
||||
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
||||
if is_access(self, jid_bare, jid_full, chat_type):
|
||||
if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
|
||||
form = self['xep_0004'].make_form('form', 'Subscribe')
|
||||
# NOTE Refresh button would be of use
|
||||
form['instructions'] = 'Featured subscriptions'
|
||||
|
@ -2938,7 +2934,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
if '@' in jid_bare:
|
||||
hostname = jid_bare.split('@')[1]
|
||||
url = 'http://' + hostname
|
||||
result = await crawl.probe_page(url)
|
||||
result = await FeedDiscovery.probe_page(url)
|
||||
if not result:
|
||||
url = {'url' : url,
|
||||
'index' : None,
|
||||
|
@ -2966,7 +2962,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
session['payload'] = form
|
||||
session['prev'] = self._handle_promoted
|
||||
else:
|
||||
if not is_operator(self, jid_bare):
|
||||
if not XmppUtilities.is_operator(self, jid_bare):
|
||||
text_warn = 'This resource is restricted to operators.'
|
||||
elif chat_type == 'groupchat':
|
||||
text_warn = ('This resource is restricted to moderators of {}.'
|
||||
|
@ -3620,7 +3616,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
.format(function_name, jid_full))
|
||||
jid_bare = session['from'].bare
|
||||
chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
|
||||
if is_access(self, jid_bare, jid_full, chat_type):
|
||||
if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
|
||||
db_file = config.get_pathname_to_database(jid_bare)
|
||||
if jid_bare not in self.settings:
|
||||
Config.add_settings_jid(self.settings, jid_bare, db_file)
|
||||
|
@ -3718,7 +3714,7 @@ class XmppClient(slixmpp.ClientXMPP):
|
|||
session['next'] = self._handle_settings_complete
|
||||
session['payload'] = form
|
||||
else:
|
||||
if not is_operator(self, jid_bare):
|
||||
if not XmppUtilities.is_operator(self, jid_bare):
|
||||
text_warn = 'This resource is restricted to operators.'
|
||||
elif chat_type == 'groupchat':
|
||||
text_warn = ('This resource is restricted to moderators of {}.'
|
||||
|
|
|
@ -5,14 +5,11 @@ from feedparser import parse
|
|||
from random import randrange
|
||||
import slixfeed.config as config
|
||||
from slixfeed.config import Config
|
||||
import slixfeed.crawl as crawl
|
||||
import slixfeed.dt as dt
|
||||
import slixfeed.fetch as fetch
|
||||
from slixfeed.log import Logger
|
||||
import slixfeed.sqlite as sqlite
|
||||
from slixfeed.syndication import Feed, Opml
|
||||
import slixfeed.url as uri
|
||||
from slixfeed.utilities import Documentation, Utilities
|
||||
from slixfeed.syndication import Feed, FeedDiscovery, Opml
|
||||
from slixfeed.utilities import DateAndTime, Documentation, Url, Utilities
|
||||
from slixfeed.version import __version__
|
||||
from slixfeed.xmpp.bookmark import XmppBookmark
|
||||
from slixfeed.xmpp.muc import XmppMuc
|
||||
|
@ -121,9 +118,9 @@ class XmppCommands:
|
|||
"""
|
||||
if url.startswith('http'):
|
||||
if not title:
|
||||
title = uri.get_hostname(url)
|
||||
title = Url.get_hostname(url)
|
||||
counter = 0
|
||||
hostname = uri.get_hostname(url)
|
||||
hostname = Url.get_hostname(url)
|
||||
hostname = hostname.replace('.','-')
|
||||
identifier = hostname + ':' + str(counter)
|
||||
while True:
|
||||
|
@ -148,7 +145,7 @@ class XmppCommands:
|
|||
if feed.has_key('updated_parsed'):
|
||||
feed_updated = feed.updated_parsed
|
||||
try:
|
||||
feed_updated = dt.convert_struct_time_to_iso8601(
|
||||
feed_updated = DateAndTime.convert_struct_time_to_iso8601(
|
||||
feed_updated)
|
||||
except:
|
||||
feed_updated = None
|
||||
|
@ -393,7 +390,7 @@ class XmppCommands:
|
|||
identifier = info[2]
|
||||
else:
|
||||
counter = 0
|
||||
hostname = uri.get_hostname(url)
|
||||
hostname = Url.get_hostname(url)
|
||||
hostname = hostname.replace('.','-')
|
||||
identifier = hostname + ':' + str(counter)
|
||||
while True:
|
||||
|
@ -417,8 +414,8 @@ class XmppCommands:
|
|||
if (url.startswith('feed:/') or
|
||||
url.startswith('itpc:/') or
|
||||
url.startswith('rss:/')):
|
||||
url = uri.feed_to_http(url)
|
||||
url = (await uri.replace_hostname(url, 'feed')) or url
|
||||
url = Url.feed_to_http(url)
|
||||
url = (await Url.replace_hostname(url, 'feed')) or url
|
||||
result = await Feed.add_feed(self, jid_bare, db_file, url,
|
||||
identifier)
|
||||
if isinstance(result, list):
|
||||
|
@ -479,10 +476,10 @@ class XmppCommands:
|
|||
# both interfaces Chat and IPC
|
||||
async def fetch_http(self, url, db_file, jid_bare):
|
||||
if url.startswith('feed:/') or url.startswith('rss:/'):
|
||||
url = uri.feed_to_http(url)
|
||||
url = (await uri.replace_hostname(url, 'feed')) or url
|
||||
url = Url.feed_to_http(url)
|
||||
url = (await Url.replace_hostname(url, 'feed')) or url
|
||||
counter = 0
|
||||
hostname = uri.get_hostname(url)
|
||||
hostname = Url.get_hostname(url)
|
||||
hostname = hostname.replace('.','-')
|
||||
identifier = hostname + ':' + str(counter)
|
||||
while True:
|
||||
|
@ -581,7 +578,7 @@ class XmppCommands:
|
|||
|
||||
async def muc_join(self, command):
|
||||
if command:
|
||||
muc_jid = uri.check_xmpp_uri(command)
|
||||
muc_jid = Url.check_xmpp_uri(command)
|
||||
if muc_jid:
|
||||
# TODO probe JID and confirm it's a groupchat
|
||||
result = await XmppMuc.join(self, muc_jid)
|
||||
|
@ -735,8 +732,8 @@ class XmppCommands:
|
|||
|
||||
async def feed_read(self, jid_bare, data, url):
|
||||
if url.startswith('feed:/') or url.startswith('rss:/'):
|
||||
url = uri.feed_to_http(url)
|
||||
url = (await uri.replace_hostname(url, 'feed')) or url
|
||||
url = Url.feed_to_http(url)
|
||||
url = (await Url.replace_hostname(url, 'feed')) or url
|
||||
match len(data):
|
||||
case 1:
|
||||
if url.startswith('http'):
|
||||
|
@ -750,7 +747,7 @@ class XmppCommands:
|
|||
message = Feed.view_feed(url, feed)
|
||||
break
|
||||
else:
|
||||
result = await crawl.probe_page(url, document)
|
||||
result = await FeedDiscovery.probe_page(url, document)
|
||||
if isinstance(result, list):
|
||||
results = result
|
||||
message = ("Syndication feeds found for {}\n\n```\n"
|
||||
|
@ -786,7 +783,7 @@ class XmppCommands:
|
|||
message = Feed.view_entry(url, feed, num)
|
||||
break
|
||||
else:
|
||||
result = await crawl.probe_page(url, document)
|
||||
result = await FeedDiscovery.probe_page(url, document)
|
||||
if isinstance(result, list):
|
||||
results = result
|
||||
message = ("Syndication feeds found for {}\n\n```\n"
|
||||
|
|
|
@ -14,7 +14,7 @@ TODO
|
|||
"""
|
||||
|
||||
import asyncio
|
||||
from slixfeed.dt import current_time
|
||||
from slixfeed.utilities import DateAndTime
|
||||
from slixfeed.log import Logger
|
||||
from slixmpp.exceptions import IqTimeout, IqError
|
||||
from time import sleep
|
||||
|
@ -62,17 +62,17 @@ class XmppConnect:
|
|||
|
||||
def recover(self, message):
|
||||
logger.warning(message)
|
||||
print(current_time(), message, 'Attempting to reconnect.')
|
||||
print(DateAndTime.current_time(), message, 'Attempting to reconnect.')
|
||||
self.connection_attempts += 1
|
||||
# if self.connection_attempts <= self.max_connection_attempts:
|
||||
# self.reconnect(wait=5.0) # wait a bit before attempting to reconnect
|
||||
# else:
|
||||
# print(current_time(),"Maximum connection attempts exceeded.")
|
||||
# logging.error("Maximum connection attempts exceeded.")
|
||||
print(current_time(), 'Attempt number', self.connection_attempts)
|
||||
print(DateAndTime.current_time(), 'Attempt number', self.connection_attempts)
|
||||
seconds = self.reconnect_timeout or 30
|
||||
seconds = int(seconds)
|
||||
print(current_time(), 'Next attempt within', seconds, 'seconds')
|
||||
print(DateAndTime.current_time(), 'Next attempt within', seconds, 'seconds')
|
||||
# NOTE asyncio.sleep doesn't interval as expected
|
||||
# await asyncio.sleep(seconds)
|
||||
sleep(seconds)
|
||||
|
|
|
@ -1,49 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
def is_access(self, jid_bare, jid_full, chat_type):
|
||||
"""Determine access privilege"""
|
||||
operator = is_operator(self, jid_bare)
|
||||
if operator:
|
||||
if chat_type == 'groupchat':
|
||||
if is_moderator(self, jid_bare, jid_full):
|
||||
access = True
|
||||
else:
|
||||
access = True
|
||||
else:
|
||||
access = False
|
||||
return access
|
||||
|
||||
|
||||
def is_operator(self, jid_bare):
|
||||
"""Check if given JID is an operator"""
|
||||
result = False
|
||||
for operator in self.operators:
|
||||
if jid_bare == operator['jid']:
|
||||
result = True
|
||||
# operator_name = operator['name']
|
||||
break
|
||||
return result
|
||||
|
||||
|
||||
def is_moderator(self, jid_bare, jid_full):
|
||||
"""Check if given JID is a moderator"""
|
||||
alias = jid_full[jid_full.index('/')+1:]
|
||||
role = self.plugin['xep_0045'].get_jid_property(jid_bare, alias, 'role')
|
||||
if role == 'moderator':
|
||||
result = True
|
||||
else:
|
||||
result = False
|
||||
return result
|
||||
|
||||
|
||||
def is_member(self, jid_bare, jid_full):
|
||||
"""Check if given JID is a member"""
|
||||
alias = jid_full[jid_full.index('/')+1:]
|
||||
affiliation = self.plugin['xep_0045'].get_jid_property(jid_bare, alias, 'affiliation')
|
||||
if affiliation == 'member':
|
||||
result = True
|
||||
else:
|
||||
result = False
|
||||
return result
|
|
@ -16,8 +16,7 @@ from slixfeed.config import Config
|
|||
from slixfeed.log import Logger
|
||||
import slixfeed.sqlite as sqlite
|
||||
from slixfeed.syndication import Feed
|
||||
import slixfeed.url as uri
|
||||
from slixfeed.utilities import Utilities
|
||||
from slixfeed.utilities import Url, Utilities
|
||||
from slixfeed.xmpp.iq import XmppIQ
|
||||
import sys
|
||||
|
||||
|
@ -337,7 +336,7 @@ class XmppPubsubAction:
|
|||
node_id = node_id[0]
|
||||
if not node_id:
|
||||
counter = 0
|
||||
hostname = uri.get_hostname(url)
|
||||
hostname = Url.get_hostname(url)
|
||||
hostname = hostname.replace('.','-')
|
||||
identifier = hostname + ':' + str(counter)
|
||||
while True:
|
||||
|
|
|
@ -16,7 +16,7 @@ logger = Logger(__name__)
|
|||
class XmppUpload:
|
||||
|
||||
async def start(self, jid, filename, domain=None):
|
||||
logger.info('Uploading file %s...', filename)
|
||||
logger.info(['Uploading file %s...', filename])
|
||||
try:
|
||||
upload_file = self['xep_0363'].upload_file
|
||||
# if self.encrypted and not self['xep_0454']:
|
||||
|
@ -34,7 +34,7 @@ class XmppUpload:
|
|||
filename, domain, timeout=10,
|
||||
)
|
||||
logger.info('Upload successful!')
|
||||
logger.info('Sending file to %s', jid)
|
||||
logger.info(['Sending file to %s', jid])
|
||||
except HTTPError:
|
||||
url = ('Error: It appears that this server does not support '
|
||||
'HTTP File Upload.')
|
||||
|
|
|
@ -58,3 +58,51 @@ class XmppUtilities:
|
|||
# finally:
|
||||
# logger.info('Chat type is:', chat_type)
|
||||
return result
|
||||
|
||||
|
||||
|
||||
def is_access(self, jid_bare, jid_full, chat_type):
|
||||
"""Determine access privilege"""
|
||||
operator = XmppUtilities.is_operator(self, jid_bare)
|
||||
if operator:
|
||||
if chat_type == 'groupchat':
|
||||
if XmppUtilities.is_moderator(self, jid_bare, jid_full):
|
||||
access = True
|
||||
else:
|
||||
access = True
|
||||
else:
|
||||
access = False
|
||||
return access
|
||||
|
||||
|
||||
def is_operator(self, jid_bare):
|
||||
"""Check if given JID is an operator"""
|
||||
result = False
|
||||
for operator in self.operators:
|
||||
if jid_bare == operator['jid']:
|
||||
result = True
|
||||
# operator_name = operator['name']
|
||||
break
|
||||
return result
|
||||
|
||||
|
||||
def is_moderator(self, jid_bare, jid_full):
|
||||
"""Check if given JID is a moderator"""
|
||||
alias = jid_full[jid_full.index('/')+1:]
|
||||
role = self.plugin['xep_0045'].get_jid_property(jid_bare, alias, 'role')
|
||||
if role == 'moderator':
|
||||
result = True
|
||||
else:
|
||||
result = False
|
||||
return result
|
||||
|
||||
|
||||
def is_member(self, jid_bare, jid_full):
|
||||
"""Check if given JID is a member"""
|
||||
alias = jid_full[jid_full.index('/')+1:]
|
||||
affiliation = self.plugin['xep_0045'].get_jid_property(jid_bare, alias, 'affiliation')
|
||||
if affiliation == 'member':
|
||||
result = True
|
||||
else:
|
||||
result = False
|
||||
return result
|
Loading…
Reference in a new issue