Fix command export; Restructure code.

This commit is contained in:
Schimon Jehudah, Adv. 2024-06-16 11:55:22 +03:00
parent 15e6a1de66
commit 93ea8a9fab
17 changed files with 1066 additions and 1184 deletions

View file

@ -1,436 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
FIXME
1) https://wiki.pine64.org
File "/slixfeed/crawl.py", line 178, in feed_mode_guess
address = join_url(url, parted_url.path.split('/')[1] + path)
~~~~~~~~~~~~~~~~~~~~~~~~~~^^^
IndexError: list index out of range
TODO
1.1) Attempt to scan more paths: /blog/, /news/ etc., including root /
Attempt to scan sub domains
https://esmailelbob.xyz/en/
https://blog.esmailelbob.xyz/feed/
1.2) Consider utilizing fetch.http_response
2) Consider merging with module fetch.py
FEEDS CRAWLER PROJECT
3) Mark redirects for manual check
Title : JSON Feed
Link : https://www.jsonfeed.org/feed.json.xml
Title : JSON Feed
Link : https://www.jsonfeed.org/feed.json/atom.xml
Title : JSON Feed
Link : https://www.jsonfeed.org/feed.json/feed.xml
Title : JSON Feed
Link : https://www.jsonfeed.org/feed.json/feeds/rss/news.xml.php
Title : JSON Feed
Link : https://www.jsonfeed.org/feed.json/jekyll/feed.xml
Title : JSON Feed
Link : https://www.jsonfeed.org/feed.json/news.xml
Title : JSON Feed
Link : https://www.jsonfeed.org/feed.json/news.xml.php
Title : JSON Feed
Link : https://www.jsonfeed.org/feed.json/rdf.xml
Title : JSON Feed
Link : https://www.jsonfeed.org/feed.json/rss.xml
Title : JSON Feed
Link : https://www.jsonfeed.org/feed.json/videos.xml
"""
from aiohttp import ClientError, ClientSession, ClientTimeout
from feedparser import parse
import logging
from lxml import etree
from lxml import html
from lxml.etree import fromstring
import slixfeed.config as config
import slixfeed.fetch as fetch
from slixfeed.log import Logger
from slixfeed.url import complete_url, join_url, trim_url
from urllib.parse import urlsplit, urlunsplit
# TODO Use boolean as a flag to determine whether a single URL was found
# async def probe_page(
# callback, url, document, num=None, db_file=None):
# result = None
# try:
# # tree = etree.fromstring(res[0]) # etree is for xml
# tree = html.fromstring(document)
# except:
# result = (
# "> {}\nFailed to parse URL as feed."
# ).format(url)
# if not result:
# print("RSS Auto-Discovery Engaged")
# result = await feed_mode_auto_discovery(url, tree)
# if not result:
# print("RSS Scan Mode Engaged")
# result = await feed_mode_scan(url, tree)
# if not result:
# print("RSS Arbitrary Mode Engaged")
# result = await feed_mode_request(url, tree)
# if not result:
# result = (
# "> {}\nNo news feeds were found for URL."
# ).format(url)
# # elif msg:
# else:
# if isinstance(result, str):
# return result
# elif isinstance(result, list):
# url = result[0]
# if db_file:
# # print("if db_file", db_file)
# return await callback(db_file, url)
# elif num:
# return await callback(url, num)
# else:
# return await callback(url)
logger = Logger(__name__)
async def probe_page(url, document=None):
"""
Parameters
----------
url : str
URL.
document : TYPE
DESCRIPTION.
Returns
-------
result : list or str
Single URL as list or selection of URLs as str.
"""
if not document:
response = await fetch.http(url)
if not response['error']:
document = response['content']
try:
# tree = etree.fromstring(res[0]) # etree is for xml
tree = html.fromstring(document)
result = None
except Exception as e:
logger.error(str(e))
try:
# /questions/15830421/xml-unicode-strings-with-encoding-declaration-are-not-supported
# xml = html.fromstring(document.encode('utf-8'))
# parser = etree.XMLParser(ns_clean=True, recover=True, encoding='utf-8')
# tree = fromstring(xml, parser=parser)
# /questions/57833080/how-to-fix-unicode-strings-with-encoding-declaration-are-not-supported
#tree = html.fromstring(bytes(document, encoding='utf8'))
# https://twigstechtips.blogspot.com/2013/06/python-lxml-strings-with-encoding.html
#parser = etree.XMLParser(recover=True)
#tree = etree.fromstring(document, parser)
tree = html.fromstring(document.encode('utf-8'))
result = None
except Exception as e:
logger.error(str(e))
logger.warning("Failed to parse URL as feed for {}.".format(url))
result = {'link' : None,
'index' : None,
'name' : None,
'code' : None,
'error' : True,
'exist' : None}
if not result:
logger.debug("Feed auto-discovery engaged for {}".format(url))
result = await feed_mode_auto_discovery(url, tree)
if not result:
logger.debug("Feed link scan mode engaged for {}".format(url))
result = await feed_mode_scan(url, tree)
if not result:
logger.debug("Feed arbitrary mode engaged for {}".format(url))
result = await feed_mode_guess(url, tree)
if not result:
logger.debug("No feeds were found for {}".format(url))
result = None
return result
# TODO Improve scan by gradual decreasing of path
async def feed_mode_guess(url, tree):
"""
Lookup for feeds by pathname using HTTP Requests.
Parameters
----------
db_file : str
Path to database file.
url : str
URL.
tree : TYPE
DESCRIPTION.
Returns
-------
msg : str
Message with URLs.
"""
urls = []
parted_url = urlsplit(url)
paths = config.open_config_file("lists.toml")["pathnames"]
# Check whether URL has path (i.e. not root)
# Check parted_url.path to avoid error in case root wasn't given
# TODO Make more tests
if parted_url.path and parted_url.path.split('/')[1]:
paths.extend(
[".atom", ".feed", ".rdf", ".rss"]
) if '.rss' not in paths else -1
# if paths.index('.rss'):
# paths.extend([".atom", ".feed", ".rdf", ".rss"])
parted_url_path = parted_url.path if parted_url.path else '/'
for path in paths:
address = join_url(url, parted_url_path.split('/')[1] + path)
if address not in urls:
urls.extend([address])
# breakpoint()
# print("feed_mode_guess")
urls = await process_feed_selection(url, urls)
return urls
async def feed_mode_scan(url, tree):
"""
Scan page for potential feeds by pathname.
Parameters
----------
db_file : str
Path to database file.
url : str
URL.
tree : TYPE
DESCRIPTION.
Returns
-------
msg : str
Message with URLs.
"""
urls = []
paths = config.open_config_file("lists.toml")["pathnames"]
for path in paths:
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
# xpath_query = "//a[contains(@href,'{}')]".format(path)
num = 5
xpath_query = (
"(//a[contains(@href,'{}')])[position()<={}]"
).format(path, num)
addresses = tree.xpath(xpath_query)
xpath_query = (
"(//a[contains(@href,'{}')])[position()>last()-{}]"
).format(path, num)
addresses += tree.xpath(xpath_query)
# NOTE Should number of addresses be limited or
# perhaps be N from the start and N from the end
for address in addresses:
address = join_url(url, address.xpath('@href')[0])
if address not in urls:
urls.extend([address])
# breakpoint()
# print("feed_mode_scan")
urls = await process_feed_selection(url, urls)
return urls
async def feed_mode_auto_discovery(url, tree):
"""
Lookup for feeds using RSS autodiscovery technique.
See: https://www.rssboard.org/rss-autodiscovery
Parameters
----------
db_file : str
Path to database file.
url : str
URL.
tree : TYPE
DESCRIPTION.
Returns
-------
msg : str
Message with URLs.
"""
xpath_query = (
'//link[(@rel="alternate") and '
'(@type="application/atom+xml" or '
'@type="application/rdf+xml" or '
'@type="application/rss+xml")]'
)
# xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href"""
# xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href"
feeds = tree.xpath(xpath_query)
if feeds:
urls = []
for feed in feeds:
# # The following code works;
# # The following code will catch
# # only valid resources (i.e. not 404);
# # The following code requires more bandwidth.
# res = await fetch.http(feed)
# if res[0]:
# disco = parse(res[0])
# title = disco["feed"]["title"]
# msg += "{} \n {} \n\n".format(title, feed)
# feed_name = feed.xpath('@title')[0]
# feed_addr = join_url(url, feed.xpath('@href')[0])
# if feed_addr.startswith("/"):
# feed_addr = url + feed_addr
address = join_url(url, feed.xpath('@href')[0])
if address not in urls:
urls.extend([address])
# breakpoint()
# print("feed_mode_auto_discovery")
urls = await process_feed_selection(url, urls)
return urls
# TODO Segregate function into function that returns
# URLs (string) and Feeds (dict) and function that
# composes text message (string).
# Maybe that's not necessary.
async def process_feed_selection(url, urls):
feeds = {}
for i in urls:
result = await fetch.http(i)
if not result['error']:
document = result['content']
status_code = result['status_code']
if status_code == 200: # NOTE This line might be redundant
try:
feeds[i] = [parse(document)]
except:
continue
message = (
"Web feeds found for {}\n\n```\n"
).format(url)
urls = []
for feed_url in feeds:
# try:
# res = await fetch.http(feed)
# except:
# continue
feed_name = None
if "title" in feeds[feed_url][0]["feed"].keys():
feed_name = feeds[feed_url][0].feed.title
feed_name = feed_name if feed_name else "Untitled"
# feed_name = feed_name if feed_name else urlsplit(feed_url).netloc
# AttributeError: 'str' object has no attribute 'entries'
if "entries" in feeds[feed_url][0].keys():
feed_amnt = feeds[feed_url][0].entries
else:
continue
if feed_amnt:
# NOTE Because there could be many false positives
# which are revealed in second phase of scan, we
# could end with a single feed, which would be
# listed instead of fetched, so feed_url_mark is
# utilized in order to make fetch possible.
# NOTE feed_url_mark was a variable which stored
# single URL (probably first accepted as valid)
# in order to get an indication whether a single
# URL has been fetched, so that the receiving
# function will scan that single URL instead of
# listing it as a message.
url = {'link' : feed_url,
'index' : None,
'name' : feed_name,
'code' : status_code,
'error' : False,
'exist' : None}
urls.extend([url])
count = len(urls)
if count > 1:
result = urls
elif count:
result = urls[0]
else:
result = None
return result
# def get_discovered_feeds(url, urls):
# message = (
# "Found {} web feeds:\n\n```\n"
# ).format(len(urls))
# if len(urls) > 1:
# for urls in urls:
# message += (
# "Title : {}\n"
# "Link : {}\n"
# "\n"
# ).format(url, url.title)
# message += (
# "```\nThe above feeds were extracted from\n{}"
# ).format(url)
# elif len(urls) > 0:
# result = urls
# else:
# message = (
# "No feeds were found for {}"
# ).format(url)
# return result
# Test module
# TODO ModuleNotFoundError: No module named 'slixfeed'
# import slixfeed.fetch as fetch
# from slixfeed.action import is_feed, process_feed_selection
# async def start(url):
# while True:
# result = await fetch.http(url)
# document = result[0]
# status = result[1]
# if document:
# feed = parse(document)
# if is_feed(feed):
# print(url)
# else:
# urls = await probe_page(
# url, document)
# if len(urls) > 1:
# await process_feed_selection(urls)
# elif urls:
# url = urls[0]
# else:
# response = (
# "> {}\nFailed to load URL. Reason: {}"
# ).format(url, status)
# break
# return response
# url = "https://www.smh.com.au/rssheadlines"
# start(url)

View file

@ -1,114 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
https://feedparser.readthedocs.io/en/latest/date-parsing.html
"""
from datetime import datetime
from dateutil.parser import parse
from email.utils import parsedate, parsedate_to_datetime
def now():
"""
ISO 8601 Timestamp.
Returns
-------
date : ???
ISO 8601 Timestamp.
"""
date = datetime.now().isoformat()
return date
def convert_struct_time_to_iso8601(struct_time):
date = datetime(*struct_time[:6])
date = date.isoformat()
return date
def current_date():
"""
Print MM DD, YYYY (Weekday Time) timestamp.
Returns
-------
date : str
MM DD, YYYY (Weekday Time) timestamp.
"""
now = datetime.now()
time = now.strftime("%B %d, %Y (%A %T)")
return time
def current_time():
"""
Print HH:MM:SS timestamp.
Returns
-------
date : str
HH:MM:SS timestamp.
"""
now = datetime.now()
time = now.strftime("%H:%M:%S")
return time
def timestamp():
"""
Print time stamp to be used in filename.
Returns
-------
formatted_time : str
%Y%m%d-%H%M%S timestamp.
"""
now = datetime.now()
formatted_time = now.strftime("%Y%m%d-%H%M%S")
return formatted_time
def validate(date):
"""
Validate date format.
Parameters
----------
date : str
Timestamp.
Returns
-------
date : str
Timestamp.
"""
try:
parse(date)
except:
date = now()
return date
def rfc2822_to_iso8601(date):
"""
Convert RFC 2822 into ISO 8601.
Parameters
----------
date : str
RFC 2822 Timestamp.
Returns
-------
date : str
ISO 8601 Timestamp.
"""
if parsedate(date):
try:
date = parsedate_to_datetime(date)
date = date.isoformat()
except:
date = now()
return date

View file

@ -1,19 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
TODO
Move code from sqlite.get_entry_unread
if num > 1:
news_list += (
"\n{}\n{}\n{}\n"
).format(str(title), str(link), str(feed_title))
else:
news_list = (
"{}\n{}\n{}"
).format(str(title), str(link), str(feed_title))
"""

View file

@ -1,74 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
TODO
1) is_feed: Look into the type ("atom", "rss2" etc.)
"""
def title(feed):
"""
Get title of feed.
Parameters
----------
url : str
URL.
feed : dict
Parsed feed document.
Returns
-------
title : str
Title or None.
"""
try:
title = feed["feed"]["title"]
except:
title = None
return title
def is_feed(feed):
"""
Determine whether document is feed or not.
Parameters
----------
feed : dict
Parsed feed.
Returns
-------
val : boolean
True or False.
"""
msg = None
if not feed.entries:
try:
feed["feed"]["title"]
val = True
# msg = (
# "Empty feed for {}"
# ).format(url)
except:
val = False
# msg = (
# "No entries nor title for {}"
# ).format(url)
elif feed.bozo:
val = False
# msg = (
# "Bozo detected for {}"
# ).format(url)
else:
val = True
# msg = (
# "Good feed for {}"
# ).format(url)
print(msg)
return val

View file

@ -20,9 +20,8 @@ TODO
""" """
from asyncio import Lock from asyncio import Lock
import slixfeed.dt as dt
from slixfeed.log import Logger from slixfeed.log import Logger
from slixfeed.url import join_url from slixfeed.utilities import DateAndTime, Url
from sqlite3 import connect, Error, IntegrityError from sqlite3 import connect, Error, IntegrityError
import sys import sys
import time import time
@ -2736,7 +2735,7 @@ def get_invalid_entries(db_file, url, feed):
title = feed["feed"]["title"] title = feed["feed"]["title"]
# Prepare a link to compare # Prepare a link to compare
if entry.has_key("link"): if entry.has_key("link"):
link = join_url(url, entry.link) link = Url.join_url(url, entry.link)
else: else:
link = url link = url
# Compare date, link and title # Compare date, link and title
@ -2745,7 +2744,7 @@ def get_invalid_entries(db_file, url, feed):
# print("compare published:", title, link, time) # print("compare published:", title, link, time)
# print("compare published:", entry_title, entry_link, timestamp) # print("compare published:", entry_title, entry_link, timestamp)
# print("============") # print("============")
time = dt.rfc2822_to_iso8601(entry.published) time = DateAndTime.rfc2822_to_iso8601(entry.published)
if (entry_title == title and if (entry_title == title and
entry_link == link and entry_link == link and
timestamp == time): timestamp == time):

View file

@ -29,12 +29,10 @@ from feedparser import parse
import os import os
import slixfeed.config as config import slixfeed.config as config
from slixfeed.config import Config from slixfeed.config import Config
import slixfeed.crawl as crawl
import slixfeed.dt as dt
import slixfeed.fetch as fetch import slixfeed.fetch as fetch
from slixfeed.log import Logger from slixfeed.log import Logger
import slixfeed.sqlite as sqlite import slixfeed.sqlite as sqlite
from slixfeed.url import join_url, trim_url from slixfeed.utilities import DateAndTime, Url
from slixfeed.utilities import Html, MD from slixfeed.utilities import Html, MD
from slixmpp.xmlstream import ET from slixmpp.xmlstream import ET
import sys import sys
@ -56,7 +54,7 @@ class Feed:
if not os.path.isdir(cache_dir + '/' + ext): if not os.path.isdir(cache_dir + '/' + ext):
os.mkdir(cache_dir + '/' + ext) os.mkdir(cache_dir + '/' + ext)
filename = os.path.join( filename = os.path.join(
cache_dir, ext, 'slixfeed_' + dt.timestamp() + '.' + ext) cache_dir, ext, 'slixfeed_' + DateAndTime.timestamp() + '.' + ext)
db_file = config.get_pathname_to_database(jid_bare) db_file = config.get_pathname_to_database(jid_bare)
results = sqlite.get_feeds(db_file) results = sqlite.get_feeds(db_file)
match ext: match ext:
@ -220,6 +218,7 @@ class Feed:
return node_entry return node_entry
# Look into the type ("atom", "rss2" etc.)
def is_feed(url, feed): def is_feed(url, feed):
""" """
Determine whether document is feed or not. Determine whether document is feed or not.
@ -301,7 +300,7 @@ class Feed:
if "updated_parsed" in feed["feed"].keys(): if "updated_parsed" in feed["feed"].keys():
updated = feed["feed"]["updated_parsed"] updated = feed["feed"]["updated_parsed"]
try: try:
updated = dt.convert_struct_time_to_iso8601(updated) updated = DateAndTime.convert_struct_time_to_iso8601(updated)
except Exception as e: except Exception as e:
logger.error(str(e)) logger.error(str(e))
updated = '' updated = ''
@ -325,7 +324,7 @@ class Feed:
if feed.has_key('updated_parsed'): if feed.has_key('updated_parsed'):
feed_updated = feed.updated_parsed feed_updated = feed.updated_parsed
try: try:
feed_updated = dt.convert_struct_time_to_iso8601(feed_updated) feed_updated = DateAndTime.convert_struct_time_to_iso8601(feed_updated)
except Exception as e: except Exception as e:
logger.error(str(e)) logger.error(str(e))
feed_updated = None feed_updated = None
@ -357,7 +356,7 @@ class Feed:
# NOTE Do not be tempted to return a compact dictionary. # NOTE Do not be tempted to return a compact dictionary.
# That is, dictionary within dictionary # That is, dictionary within dictionary
# Return multiple dictionaries in a list or tuple. # Return multiple dictionaries in a list or tuple.
result = await crawl.probe_page(url, document) result = await FeedDiscovery.probe_page(url, document)
if not result: if not result:
# Get out of the loop with dict indicating error. # Get out of the loop with dict indicating error.
result_final = {'link' : url, result_final = {'link' : url,
@ -437,16 +436,16 @@ class Feed:
title = "*** No title ***" title = "*** No title ***"
if entry.has_key("link"): if entry.has_key("link"):
# link = complete_url(source, entry.link) # link = complete_url(source, entry.link)
link = join_url(url, entry.link) link = Url.join_url(url, entry.link)
link = trim_url(link) link = Url.trim_url(link)
else: else:
link = "*** No link ***" link = "*** No link ***"
if entry.has_key("published"): if entry.has_key("published"):
date = entry.published date = entry.published
date = dt.rfc2822_to_iso8601(date) date = DateAndTime.rfc2822_to_iso8601(date)
elif entry.has_key("updated"): elif entry.has_key("updated"):
date = entry.updated date = entry.updated
date = dt.rfc2822_to_iso8601(date) date = DateAndTime.rfc2822_to_iso8601(date)
else: else:
date = "*** No date ***" date = "*** No date ***"
response += ("Title : {}\n" response += ("Title : {}\n"
@ -481,10 +480,10 @@ class Feed:
title = '*** No title ***' title = '*** No title ***'
if entry.has_key("published"): if entry.has_key("published"):
date = entry.published date = entry.published
date = dt.rfc2822_to_iso8601(date) date = DateAndTime.rfc2822_to_iso8601(date)
elif entry.has_key("updated"): elif entry.has_key("updated"):
date = entry.updated date = entry.updated
date = dt.rfc2822_to_iso8601(date) date = DateAndTime.rfc2822_to_iso8601(date)
else: else:
date = '*** No date ***' date = '*** No date ***'
if entry.has_key("summary"): if entry.has_key("summary"):
@ -500,8 +499,8 @@ class Feed:
summary = '*** No summary ***' summary = '*** No summary ***'
if entry.has_key("link"): if entry.has_key("link"):
# link = complete_url(source, entry.link) # link = complete_url(source, entry.link)
link = join_url(url, entry.link) link = Url.join_url(url, entry.link)
link = trim_url(link) link = Url.trim_url(link)
else: else:
link = '*** No link ***' link = '*** No link ***'
response = ("{}\n" response = ("{}\n"
@ -543,7 +542,7 @@ class Feed:
if feed.has_key('updated_parsed'): if feed.has_key('updated_parsed'):
feed_updated = feed.updated_parsed feed_updated = feed.updated_parsed
try: try:
feed_updated = dt.convert_struct_time_to_iso8601(feed_updated) feed_updated = DateAndTime.convert_struct_time_to_iso8601(feed_updated)
except: except:
feed_updated = '' feed_updated = ''
else: else:
@ -598,18 +597,18 @@ class Feed:
logger.debug('{}: entry: {}'.format(function_name, entry.link)) logger.debug('{}: entry: {}'.format(function_name, entry.link))
if entry.has_key("published"): if entry.has_key("published"):
entry_published = entry.published entry_published = entry.published
entry_published = dt.rfc2822_to_iso8601(entry_published) entry_published = DateAndTime.rfc2822_to_iso8601(entry_published)
else: else:
entry_published = '' entry_published = ''
if entry.has_key("updated"): if entry.has_key("updated"):
entry_updated = entry.updated entry_updated = entry.updated
entry_updated = dt.rfc2822_to_iso8601(entry_updated) entry_updated = DateAndTime.rfc2822_to_iso8601(entry_updated)
else: else:
entry_updated = dt.now() entry_updated = DateAndTime.now()
if entry.has_key("link"): if entry.has_key("link"):
# link = complete_url(source, entry.link) # link = complete_url(source, entry.link)
entry_link = join_url(feed_url, entry.link) entry_link = Url.join_url(feed_url, entry.link)
entry_link = trim_url(entry_link) entry_link = Url.trim_url(entry_link)
else: else:
entry_link = feed_url entry_link = feed_url
# title = feed["feed"]["title"] # title = feed["feed"]["title"]
@ -783,8 +782,8 @@ class Feed:
# if (e_link.rel == "enclosure" and # if (e_link.rel == "enclosure" and
# media_type in ("audio", "image", "video")): # media_type in ("audio", "image", "video")):
# media_link = e_link.href # media_link = e_link.href
# media_link = join_url(url, e_link.href) # media_link = Url.join_url(url, e_link.href)
# media_link = trim_url(media_link) # media_link = Url.trim_url(media_link)
########################################################### ###########################################################
@ -821,6 +820,442 @@ class Feed:
return new_entries return new_entries
"""
FIXME
1) https://wiki.pine64.org
File "/slixfeed/crawl.py", line 178, in feed_mode_guess
address = Url.join_url(url, parted_url.path.split('/')[1] + path)
~~~~~~~~~~~~~~~~~~~~~~~~~~^^^
IndexError: list index out of range
TODO
1.1) Attempt to scan more paths: /blog/, /news/ etc., including root /
Attempt to scan sub domains
https://esmailelbob.xyz/en/
https://blog.esmailelbob.xyz/feed/
1.2) Consider utilizing fetch.http_response
2) DeviantArt
https://www.deviantart.com/nedesem/gallery
https://backend.deviantart.com/rss.xml?q=gallery:nedesem
https://backend.deviantart.com/rss.xml?q=nedesem
https://www.deviantart.com/search?q=
https://backend.deviantart.com/rss.xml?q=search:
FEEDS CRAWLER PROJECT
3) Mark redirects for manual check
Title : JSON Feed
Link : https://www.jsonfeed.org/feed.json.xml
Title : JSON Feed
Link : https://www.jsonfeed.org/feed.json/atom.xml
Title : JSON Feed
Link : https://www.jsonfeed.org/feed.json/feed.xml
Title : JSON Feed
Link : https://www.jsonfeed.org/feed.json/feeds/rss/news.xml.php
Title : JSON Feed
Link : https://www.jsonfeed.org/feed.json/jekyll/feed.xml
Title : JSON Feed
Link : https://www.jsonfeed.org/feed.json/news.xml
Title : JSON Feed
Link : https://www.jsonfeed.org/feed.json/news.xml.php
Title : JSON Feed
Link : https://www.jsonfeed.org/feed.json/rdf.xml
Title : JSON Feed
Link : https://www.jsonfeed.org/feed.json/rss.xml
Title : JSON Feed
Link : https://www.jsonfeed.org/feed.json/videos.xml
"""
from aiohttp import ClientError, ClientSession, ClientTimeout
from lxml import etree
from lxml import html
from lxml.etree import fromstring
class FeedDiscovery:
# TODO Use boolean as a flag to determine whether a single URL was found
# async def probe_page(
# callback, url, document, num=None, db_file=None):
# result = None
# try:
# # tree = etree.fromstring(res[0]) # etree is for xml
# tree = html.fromstring(document)
# except:
# result = (
# "> {}\nFailed to parse URL as feed."
# ).format(url)
# if not result:
# print("RSS Auto-Discovery Engaged")
# result = await feed_mode_auto_discovery(url, tree)
# if not result:
# print("RSS Scan Mode Engaged")
# result = await feed_mode_scan(url, tree)
# if not result:
# print("RSS Arbitrary Mode Engaged")
# result = await feed_mode_request(url, tree)
# if not result:
# result = (
# "> {}\nNo news feeds were found for URL."
# ).format(url)
# # elif msg:
# else:
# if isinstance(result, str):
# return result
# elif isinstance(result, list):
# url = result[0]
# if db_file:
# # print("if db_file", db_file)
# return await callback(db_file, url)
# elif num:
# return await callback(url, num)
# else:
# return await callback(url)
async def probe_page(url, document=None):
"""
Parameters
----------
url : str
URL.
document : TYPE
DESCRIPTION.
Returns
-------
result : list or str
Single URL as list or selection of URLs as str.
"""
if not document:
response = await fetch.http(url)
if not response['error']:
document = response['content']
try:
# tree = etree.fromstring(res[0]) # etree is for xml
tree = html.fromstring(document)
result = None
except Exception as e:
logger.error(str(e))
try:
# /questions/15830421/xml-unicode-strings-with-encoding-declaration-are-not-supported
# xml = html.fromstring(document.encode('utf-8'))
# parser = etree.XMLParser(ns_clean=True, recover=True, encoding='utf-8')
# tree = fromstring(xml, parser=parser)
# /questions/57833080/how-to-fix-unicode-strings-with-encoding-declaration-are-not-supported
#tree = html.fromstring(bytes(document, encoding='utf8'))
# https://twigstechtips.blogspot.com/2013/06/python-lxml-strings-with-encoding.html
#parser = etree.XMLParser(recover=True)
#tree = etree.fromstring(document, parser)
tree = html.fromstring(document.encode('utf-8'))
result = None
except Exception as e:
logger.error(str(e))
logger.warning("Failed to parse URL as feed for {}.".format(url))
result = {'link' : None,
'index' : None,
'name' : None,
'code' : None,
'error' : True,
'exist' : None}
if not result:
logger.debug("Feed auto-discovery engaged for {}".format(url))
result = FeedDiscovery.feed_mode_auto_discovery(url, tree)
if not result:
logger.debug("Feed link scan mode engaged for {}".format(url))
result = FeedDiscovery.feed_mode_scan(url, tree)
if not result:
logger.debug("Feed arbitrary mode engaged for {}".format(url))
result = FeedDiscovery.feed_mode_guess(url, tree)
if not result:
logger.debug("No feeds were found for {}".format(url))
result = None
result = await FeedDiscovery.process_feed_selection(url, result)
return result
# TODO Improve scan by gradual decreasing of path
def feed_mode_guess(url, tree):
"""
Lookup for feeds by pathname using HTTP Requests.
Parameters
----------
db_file : str
Path to database file.
url : str
URL.
tree : TYPE
DESCRIPTION.
Returns
-------
msg : str
Message with URLs.
"""
urls = []
parted_url = urlsplit(url)
paths = config.open_config_file("lists.toml")["pathnames"]
# Check whether URL has path (i.e. not root)
# Check parted_url.path to avoid error in case root wasn't given
# TODO Make more tests
if parted_url.path and parted_url.path.split('/')[1]:
paths.extend(
[".atom", ".feed", ".rdf", ".rss"]
) if '.rss' not in paths else -1
# if paths.index('.rss'):
# paths.extend([".atom", ".feed", ".rdf", ".rss"])
parted_url_path = parted_url.path if parted_url.path else '/'
for path in paths:
address = Url.join_url(url, parted_url_path.split('/')[1] + path)
if address not in urls:
urls.extend([address])
# breakpoint()
# print("feed_mode_guess")
return urls
def feed_mode_scan(url, tree):
"""
Scan page for potential feeds by pathname.
Parameters
----------
db_file : str
Path to database file.
url : str
URL.
tree : TYPE
DESCRIPTION.
Returns
-------
msg : str
Message with URLs.
"""
urls = []
paths = config.open_config_file("lists.toml")["pathnames"]
for path in paths:
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
# xpath_query = "//a[contains(@href,'{}')]".format(path)
num = 5
xpath_query = (
"(//a[contains(@href,'{}')])[position()<={}]"
).format(path, num)
addresses = tree.xpath(xpath_query)
xpath_query = (
"(//a[contains(@href,'{}')])[position()>last()-{}]"
).format(path, num)
addresses += tree.xpath(xpath_query)
# NOTE Should number of addresses be limited or
# perhaps be N from the start and N from the end
for address in addresses:
address = Url.join_url(url, address.xpath('@href')[0])
if address not in urls:
urls.extend([address])
# breakpoint()
# print("feed_mode_scan")
return urls
def feed_mode_auto_discovery(url, tree):
"""
Lookup for feeds using RSS autodiscovery technique.
See: https://www.rssboard.org/rss-autodiscovery
Parameters
----------
db_file : str
Path to database file.
url : str
URL.
tree : TYPE
DESCRIPTION.
Returns
-------
msg : str
Message with URLs.
"""
xpath_query = (
'//link[(@rel="alternate") and '
'(@type="application/atom+xml" or '
'@type="application/rdf+xml" or '
'@type="application/rss+xml")]'
)
# xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href"""
# xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href"
feeds = tree.xpath(xpath_query)
if feeds:
urls = []
for feed in feeds:
# # The following code works;
# # The following code will catch
# # only valid resources (i.e. not 404);
# # The following code requires more bandwidth.
# res = await fetch.http(feed)
# if res[0]:
# disco = parse(res[0])
# title = disco["feed"]["title"]
# msg += "{} \n {} \n\n".format(title, feed)
# feed_name = feed.xpath('@title')[0]
# feed_addr = Url.join_url(url, feed.xpath('@href')[0])
# if feed_addr.startswith("/"):
# feed_addr = url + feed_addr
address = Url.join_url(url, feed.xpath('@href')[0])
if address not in urls:
urls.extend([address])
# breakpoint()
# print("feed_mode_auto_discovery")
return urls
# TODO Segregate function into function that returns
# URLs (string) and Feeds (dict) and function that
# composes text message (string).
# Maybe that's not necessary.
async def process_feed_selection(url, urls):
feeds = {}
for i in urls:
result = await fetch.http(i)
if not result['error']:
document = result['content']
status_code = result['status_code']
if status_code == 200: # NOTE This line might be redundant
try:
feeds[i] = [parse(document)]
except:
continue
message = (
"Web feeds found for {}\n\n```\n"
).format(url)
urls = []
for feed_url in feeds:
# try:
# res = await fetch.http(feed)
# except:
# continue
feed_name = None
if "title" in feeds[feed_url][0]["feed"].keys():
feed_name = feeds[feed_url][0].feed.title
feed_name = feed_name if feed_name else "Untitled"
# feed_name = feed_name if feed_name else urlsplit(feed_url).netloc
# AttributeError: 'str' object has no attribute 'entries'
if "entries" in feeds[feed_url][0].keys():
feed_amnt = feeds[feed_url][0].entries
else:
continue
if feed_amnt:
# NOTE Because there could be many false positives
# which are revealed in second phase of scan, we
# could end with a single feed, which would be
# listed instead of fetched, so feed_url_mark is
# utilized in order to make fetch possible.
# NOTE feed_url_mark was a variable which stored
# single URL (probably first accepted as valid)
# in order to get an indication whether a single
# URL has been fetched, so that the receiving
# function will scan that single URL instead of
# listing it as a message.
url = {'link' : feed_url,
'index' : None,
'name' : feed_name,
'code' : status_code,
'error' : False,
'exist' : None}
urls.extend([url])
count = len(urls)
if count > 1:
result = urls
elif count:
result = urls[0]
else:
result = None
return result
# def get_discovered_feeds(url, urls):
# message = (
# "Found {} web feeds:\n\n```\n"
# ).format(len(urls))
# if len(urls) > 1:
# for urls in urls:
# message += (
# "Title : {}\n"
# "Link : {}\n"
# "\n"
# ).format(url, url.title)
# message += (
# "```\nThe above feeds were extracted from\n{}"
# ).format(url)
# elif len(urls) > 0:
# result = urls
# else:
# message = (
# "No feeds were found for {}"
# ).format(url)
# return result
# Test module
# TODO ModuleNotFoundError: No module named 'slixfeed'
# import slixfeed.fetch as fetch
# from slixfeed.action import is_feed, process_feed_selection
# async def start(url):
# while True:
# result = await fetch.http(url)
# document = result[0]
# status = result[1]
# if document:
# feed = parse(document)
# if is_feed(feed):
# print(url)
# else:
# urls = await probe_page(
# url, document)
# if len(urls) > 1:
# await process_feed_selection(urls)
# elif urls:
# url = urls[0]
# else:
# response = (
# "> {}\nFailed to load URL. Reason: {}"
# ).format(url, status)
# break
# return response
# url = "https://www.smh.com.au/rssheadlines"
# start(url)
class FeedTask: class FeedTask:
@ -921,7 +1356,7 @@ class Opml:
ETR.SubElement(head, "generator").text = "Slixfeed" ETR.SubElement(head, "generator").text = "Slixfeed"
ETR.SubElement(head, "urlPublic").text = ( ETR.SubElement(head, "urlPublic").text = (
"https://slixfeed.woodpeckersnest.space/") "https://slixfeed.woodpeckersnest.space/")
time_stamp = dt.current_time() time_stamp = DateAndTime.current_time()
ETR.SubElement(head, "dateCreated").text = time_stamp ETR.SubElement(head, "dateCreated").text = time_stamp
ETR.SubElement(head, "dateModified").text = time_stamp ETR.SubElement(head, "dateModified").text = time_stamp
body = ETR.SubElement(root, "body") body = ETR.SubElement(root, "body")

View file

@ -1,352 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
FIXME
1) Do not handle base64
https://www.lilithsaintcrow.com/2024/02/love-anonymous/
data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAABaAAAAeAAQAAAAAQ6M16AAAAAnRSTlMAAHaTzTgAAAFmSURBVBgZ7cEBAQAAAIKg/q92SMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADgWE3LAAGyZmPPAAAAAElFTkSuQmCC
https://www.lilithsaintcrow.com/2024/02/love-anonymous//image/png;base64,iVBORw0KGgoAAAANSUhEUgAABaAAAAeAAQAAAAAQ6M16AAAAAnRSTlMAAHaTzTgAAAFmSURBVBgZ7cEBAQAAAIKg/q92SMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADgWE3LAAGyZmPPAAAAAElFTkSuQmCC
TODO
1) ActivityPub URL revealer activitypub_to_http.
2) SQLite preference "instance" for preferred instances.
"""
from email.utils import parseaddr
import os
import random
import slixfeed.config as config
import slixfeed.fetch as fetch
from slixfeed.log import Logger
from urllib.parse import (
parse_qs,
urlencode,
urljoin,
# urlparse,
urlsplit,
urlunsplit
)
logger = Logger(__name__)
# NOTE
# hostname and protocol are listed as one in file proxies.toml.
# Perhaps a better practice would be to have them separated.
# NOTE
# File proxies.toml will remain as it is, in order to be
# coordinated with the dataset of project LibRedirect, even
# though rule-sets might be adopted (see )Privacy Redirect).
def get_hostname(url):
parted_url = urlsplit(url)
hostname = parted_url.netloc
if hostname.startswith('www.'): hostname = hostname.replace('www.', '')
return hostname
async def replace_hostname(url, url_type):
"""
Replace hostname.
Parameters
----------
url : str
URL.
url_type : str
"feed" or "link".
Returns
-------
url : str
URL.
"""
url_new = None
parted_url = urlsplit(url)
# protocol = parted_url.scheme
hostname = parted_url.netloc
hostname = hostname.replace('www.','')
pathname = parted_url.path
queries = parted_url.query
fragment = parted_url.fragment
proxies = config.open_config_file('proxies.toml')['proxies']
for proxy_name in proxies:
proxy = proxies[proxy_name]
if hostname in proxy['hostname'] and url_type in proxy['type']:
while not url_new:
print('>>>')
print(url_new)
proxy_type = 'clearnet'
proxy_list = proxy[proxy_type]
if len(proxy_list):
# proxy_list = proxies[proxy_name][proxy_type]
proxy_url = random.choice(proxy_list)
parted_proxy_url = urlsplit(proxy_url)
protocol_new = parted_proxy_url.scheme
hostname_new = parted_proxy_url.netloc
url_new = urlunsplit([protocol_new, hostname_new,
pathname, queries, fragment])
print(proxy_url)
print(url_new)
print('>>>')
response = await fetch.http(url_new)
if (response and
response['status_code'] == 200 and
# response.reason == 'OK' and
url_new.startswith(proxy_url)):
break
else:
config_dir = config.get_default_config_directory()
proxies_obsolete_file = config_dir + '/proxies_obsolete.toml'
proxies_file = config_dir + '/proxies.toml'
if not os.path.isfile(proxies_obsolete_file):
config.create_skeleton(proxies_file)
config.backup_obsolete(proxies_obsolete_file,
proxy_name, proxy_type,
proxy_url)
try:
config.update_proxies(proxies_file, proxy_name,
proxy_type, proxy_url)
except ValueError as e:
logger.error([str(e), proxy_url])
url_new = None
else:
logger.warning('No proxy URLs for {}. '
'Please update proxies.toml'
.format(proxy_name))
url_new = url
break
return url_new
def remove_tracking_parameters(url):
"""
Remove queries with tracking parameters.
Parameters
----------
url : str
URL.
Returns
-------
url : str
URL.
"""
if url.startswith('data:') and ';base64,' in url:
return url
parted_url = urlsplit(url)
protocol = parted_url.scheme
hostname = parted_url.netloc
pathname = parted_url.path
queries = parse_qs(parted_url.query)
fragment = parted_url.fragment
trackers = config.open_config_file('queries.toml')['trackers']
for tracker in trackers:
if tracker in queries: del queries[tracker]
queries_new = urlencode(queries, doseq=True)
url = urlunsplit([protocol, hostname, pathname, queries_new, fragment])
return url
def feed_to_http(url):
"""
Replace scheme FEED by HTTP.
Parameters
----------
url : str
URL.
Returns
-------
new_url : str
URL.
"""
par_url = urlsplit(url)
new_url = urlunsplit(['http', par_url.netloc, par_url.path, par_url.query,
par_url.fragment])
return new_url
def check_xmpp_uri(uri):
"""
Check validity of XMPP URI.
Parameters
----------
uri : str
URI.
Returns
-------
jid : str
JID or None.
"""
jid = urlsplit(uri).path
if parseaddr(jid)[1] != jid:
jid = False
return jid
# NOTE Read the documentation
# https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin
def complete_url(source, link):
"""
Check if URL is pathname and complete it into URL.
Parameters
----------
source : str
Feed URL.
link : str
Link URL or pathname.
Returns
-------
str
URL.
"""
if link.startswith('data:') and ';base64,' in link:
return link
if link.startswith('www.'):
return 'http://' + link
parted_link = urlsplit(link)
parted_feed = urlsplit(source)
if parted_link.scheme == 'magnet' and parted_link.query:
return link
if parted_link.scheme and parted_link.netloc:
return link
if link.startswith('//'):
if parted_link.netloc and parted_link.path:
new_link = urlunsplit([parted_feed.scheme, parted_link.netloc,
parted_link.path, parted_link.query,
parted_link.fragment])
elif link.startswith('/'):
new_link = urlunsplit([parted_feed.scheme, parted_feed.netloc,
parted_link.path, parted_link.query,
parted_link.fragment])
elif link.startswith('../'):
pathlink = parted_link.path.split('/')
pathfeed = parted_feed.path.split('/')
for i in pathlink:
if i == '..':
if pathlink.index('..') == 0:
pathfeed.pop()
else:
break
while pathlink.count('..'):
if pathlink.index('..') == 0:
pathlink.remove('..')
else:
break
pathlink = '/'.join(pathlink)
pathfeed.extend([pathlink])
new_link = urlunsplit([parted_feed.scheme, parted_feed.netloc,
'/'.join(pathfeed), parted_link.query,
parted_link.fragment])
else:
pathlink = parted_link.path.split('/')
pathfeed = parted_feed.path.split('/')
if link.startswith('./'):
pathlink.remove('.')
if not source.endswith('/'):
pathfeed.pop()
pathlink = '/'.join(pathlink)
pathfeed.extend([pathlink])
new_link = urlunsplit([parted_feed.scheme, parted_feed.netloc,
'/'.join(pathfeed), parted_link.query,
parted_link.fragment])
return new_link
# TODO
# Feed https://www.ocaml.org/feed.xml
# Link %20https://frama-c.com/fc-versions/cobalt.html%20
# FIXME
# Feed https://cyber.dabamos.de/blog/feed.rss
# Link https://cyber.dabamos.de/blog/#article-2022-07-15
def join_url(source, link):
"""
Join base URL with given pathname.
Parameters
----------
source : str
Feed URL.
link : str
Link URL or pathname.
Returns
-------
str
URL.
"""
if link.startswith('data:') and ';base64,' in link:
return link
if link.startswith('www.'):
new_link = 'http://' + link
elif link.startswith('%20') and link.endswith('%20'):
old_link = link.split('%20')
del old_link[0]
old_link.pop()
new_link = ''.join(old_link)
else:
new_link = urljoin(source, link)
return new_link
def trim_url(url):
"""
Check URL pathname for double slash.
Parameters
----------
url : str
URL.
Returns
-------
url : str
URL.
"""
if url.startswith('data:') and ';base64,' in url:
return url
parted_url = urlsplit(url)
protocol = parted_url.scheme
hostname = parted_url.netloc
pathname = parted_url.path
queries = parted_url.query
fragment = parted_url.fragment
while '//' in pathname:
pathname = pathname.replace('//', '/')
url = urlunsplit([protocol, hostname, pathname, queries, fragment])
return url
def activitypub_to_http(namespace):
"""
Replace ActivityPub namespace by HTTP.
Parameters
----------
namespace : str
Namespace.
Returns
-------
new_url : str
URL.
"""

View file

@ -39,16 +39,27 @@ TODO
""" """
from datetime import datetime
from email.utils import parseaddr
from dateutil.parser import parse
from email.utils import parsedate, parsedate_to_datetime
import hashlib import hashlib
import os
import random
import slixfeed.config as config import slixfeed.config as config
from slixfeed.config import Config
from lxml import etree, html from lxml import etree, html
import slixfeed.dt as dt import slixfeed.dt as dt
import slixfeed.fetch as fetch import slixfeed.fetch as fetch
from slixfeed.log import Logger from slixfeed.log import Logger
import slixfeed.sqlite as sqlite
from slixfeed.url import join_url, complete_url
import sys import sys
from urllib.parse import (
parse_qs,
urlencode,
urljoin,
# urlparse,
urlsplit,
urlunsplit
)
try: try:
import tomllib import tomllib
@ -58,6 +69,115 @@ except:
logger = Logger(__name__) logger = Logger(__name__)
class DateAndTime:
#https://feedparser.readthedocs.io/en/latest/date-parsing.html
def now():
"""
ISO 8601 Timestamp.
Returns
-------
date : ???
ISO 8601 Timestamp.
"""
date = datetime.now().isoformat()
return date
def convert_struct_time_to_iso8601(struct_time):
date = datetime(*struct_time[:6])
date = date.isoformat()
return date
def current_date():
"""
Print MM DD, YYYY (Weekday Time) timestamp.
Returns
-------
date : str
MM DD, YYYY (Weekday Time) timestamp.
"""
now = datetime.now()
time = now.strftime("%B %d, %Y (%A %T)")
return time
def current_time():
"""
Print HH:MM:SS timestamp.
Returns
-------
date : str
HH:MM:SS timestamp.
"""
now = datetime.now()
time = now.strftime("%H:%M:%S")
return time
def timestamp():
"""
Print time stamp to be used in filename.
Returns
-------
formatted_time : str
%Y%m%d-%H%M%S timestamp.
"""
now = datetime.now()
formatted_time = now.strftime("%Y%m%d-%H%M%S")
return formatted_time
def validate(date):
"""
Validate date format.
Parameters
----------
date : str
Timestamp.
Returns
-------
date : str
Timestamp.
"""
try:
parse(date)
except:
date = DateAndTime.now()
return date
def rfc2822_to_iso8601(date):
"""
Convert RFC 2822 into ISO 8601.
Parameters
----------
date : str
RFC 2822 Timestamp.
Returns
-------
date : str
ISO 8601 Timestamp.
"""
if parsedate(date):
try:
date = parsedate_to_datetime(date)
date = date.isoformat()
except:
date = DateAndTime.now()
return date
class Documentation: class Documentation:
@ -120,7 +240,7 @@ class Html:
if len(images): if len(images):
image = images[0] image = images[0]
image = str(image) image = str(image)
image_url = complete_url(url, image) image_url = Url.complete_url(url, image)
return image_url return image_url
@ -224,6 +344,343 @@ class Task:
.format(task, jid_bare)) .format(task, jid_bare))
"""
FIXME
1) Do not handle base64
https://www.lilithsaintcrow.com/2024/02/love-anonymous/
data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAABaAAAAeAAQAAAAAQ6M16AAAAAnRSTlMAAHaTzTgAAAFmSURBVBgZ7cEBAQAAAIKg/q92SMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADgWE3LAAGyZmPPAAAAAElFTkSuQmCC
https://www.lilithsaintcrow.com/2024/02/love-anonymous//image/png;base64,iVBORw0KGgoAAAANSUhEUgAABaAAAAeAAQAAAAAQ6M16AAAAAnRSTlMAAHaTzTgAAAFmSURBVBgZ7cEBAQAAAIKg/q92SMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADgWE3LAAGyZmPPAAAAAElFTkSuQmCC
TODO
1) ActivityPub URL revealer activitypub_to_http.
2) SQLite preference "instance" for preferred instances.
"""
class Url:
# NOTE
# hostname and protocol are listed as one in file proxies.toml.
# Perhaps a better practice would be to have them separated.
# NOTE
# File proxies.toml will remain as it is, in order to be
# coordinated with the dataset of project LibRedirect, even
# though rule-sets might be adopted (see )Privacy Redirect).
def get_hostname(url):
parted_url = urlsplit(url)
hostname = parted_url.netloc
if hostname.startswith('www.'): hostname = hostname.replace('www.', '')
return hostname
async def replace_hostname(url, url_type):
"""
Replace hostname.
Parameters
----------
url : str
URL.
url_type : str
"feed" or "link".
Returns
-------
url : str
URL.
"""
url_new = None
parted_url = urlsplit(url)
# protocol = parted_url.scheme
hostname = parted_url.netloc
hostname = hostname.replace('www.','')
pathname = parted_url.path
queries = parted_url.query
fragment = parted_url.fragment
proxies = config.open_config_file('proxies.toml')['proxies']
for proxy_name in proxies:
proxy = proxies[proxy_name]
if hostname in proxy['hostname'] and url_type in proxy['type']:
while not url_new:
print('>>>')
print(url_new)
proxy_type = 'clearnet'
proxy_list = proxy[proxy_type]
if len(proxy_list):
# proxy_list = proxies[proxy_name][proxy_type]
proxy_url = random.choice(proxy_list)
parted_proxy_url = urlsplit(proxy_url)
protocol_new = parted_proxy_url.scheme
hostname_new = parted_proxy_url.netloc
url_new = urlunsplit([protocol_new, hostname_new,
pathname, queries, fragment])
print(proxy_url)
print(url_new)
print('>>>')
response = await fetch.http(url_new)
if (response and
response['status_code'] == 200 and
# response.reason == 'OK' and
url_new.startswith(proxy_url)):
break
else:
config_dir = config.get_default_config_directory()
proxies_obsolete_file = config_dir + '/proxies_obsolete.toml'
proxies_file = config_dir + '/proxies.toml'
if not os.path.isfile(proxies_obsolete_file):
config.create_skeleton(proxies_file)
config.backup_obsolete(proxies_obsolete_file,
proxy_name, proxy_type,
proxy_url)
try:
config.update_proxies(proxies_file, proxy_name,
proxy_type, proxy_url)
except ValueError as e:
logger.error([str(e), proxy_url])
url_new = None
else:
logger.warning('No proxy URLs for {}. '
'Please update proxies.toml'
.format(proxy_name))
url_new = url
break
return url_new
def remove_tracking_parameters(url):
"""
Remove queries with tracking parameters.
Parameters
----------
url : str
URL.
Returns
-------
url : str
URL.
"""
if url.startswith('data:') and ';base64,' in url:
return url
parted_url = urlsplit(url)
protocol = parted_url.scheme
hostname = parted_url.netloc
pathname = parted_url.path
queries = parse_qs(parted_url.query)
fragment = parted_url.fragment
trackers = config.open_config_file('queries.toml')['trackers']
for tracker in trackers:
if tracker in queries: del queries[tracker]
queries_new = urlencode(queries, doseq=True)
url = urlunsplit([protocol, hostname, pathname, queries_new, fragment])
return url
def feed_to_http(url):
"""
Replace scheme FEED by HTTP.
Parameters
----------
url : str
URL.
Returns
-------
new_url : str
URL.
"""
par_url = urlsplit(url)
new_url = urlunsplit(['http', par_url.netloc, par_url.path, par_url.query,
par_url.fragment])
return new_url
def check_xmpp_uri(uri):
"""
Check validity of XMPP URI.
Parameters
----------
uri : str
URI.
Returns
-------
jid : str
JID or None.
"""
jid = urlsplit(uri).path
if parseaddr(jid)[1] != jid:
jid = False
return jid
# NOTE Read the documentation
# https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urljoin
def complete_url(source, link):
"""
Check if URL is pathname and complete it into URL.
Parameters
----------
source : str
Feed URL.
link : str
Link URL or pathname.
Returns
-------
str
URL.
"""
if link.startswith('data:') and ';base64,' in link:
return link
if link.startswith('www.'):
return 'http://' + link
parted_link = urlsplit(link)
parted_feed = urlsplit(source)
if parted_link.scheme == 'magnet' and parted_link.query:
return link
if parted_link.scheme and parted_link.netloc:
return link
if link.startswith('//'):
if parted_link.netloc and parted_link.path:
new_link = urlunsplit([parted_feed.scheme, parted_link.netloc,
parted_link.path, parted_link.query,
parted_link.fragment])
elif link.startswith('/'):
new_link = urlunsplit([parted_feed.scheme, parted_feed.netloc,
parted_link.path, parted_link.query,
parted_link.fragment])
elif link.startswith('../'):
pathlink = parted_link.path.split('/')
pathfeed = parted_feed.path.split('/')
for i in pathlink:
if i == '..':
if pathlink.index('..') == 0:
pathfeed.pop()
else:
break
while pathlink.count('..'):
if pathlink.index('..') == 0:
pathlink.remove('..')
else:
break
pathlink = '/'.join(pathlink)
pathfeed.extend([pathlink])
new_link = urlunsplit([parted_feed.scheme, parted_feed.netloc,
'/'.join(pathfeed), parted_link.query,
parted_link.fragment])
else:
pathlink = parted_link.path.split('/')
pathfeed = parted_feed.path.split('/')
if link.startswith('./'):
pathlink.remove('.')
if not source.endswith('/'):
pathfeed.pop()
pathlink = '/'.join(pathlink)
pathfeed.extend([pathlink])
new_link = urlunsplit([parted_feed.scheme, parted_feed.netloc,
'/'.join(pathfeed), parted_link.query,
parted_link.fragment])
return new_link
# TODO
# Feed https://www.ocaml.org/feed.xml
# Link %20https://frama-c.com/fc-versions/cobalt.html%20
# FIXME
# Feed https://cyber.dabamos.de/blog/feed.rss
# Link https://cyber.dabamos.de/blog/#article-2022-07-15
def join_url(source, link):
"""
Join base URL with given pathname.
Parameters
----------
source : str
Feed URL.
link : str
Link URL or pathname.
Returns
-------
str
URL.
"""
if link.startswith('data:') and ';base64,' in link:
return link
if link.startswith('www.'):
new_link = 'http://' + link
elif link.startswith('%20') and link.endswith('%20'):
old_link = link.split('%20')
del old_link[0]
old_link.pop()
new_link = ''.join(old_link)
else:
new_link = urljoin(source, link)
return new_link
def trim_url(url):
"""
Check URL pathname for double slash.
Parameters
----------
url : str
URL.
Returns
-------
url : str
URL.
"""
if url.startswith('data:') and ';base64,' in url:
return url
parted_url = urlsplit(url)
protocol = parted_url.scheme
hostname = parted_url.netloc
pathname = parted_url.path
queries = parted_url.query
fragment = parted_url.fragment
while '//' in pathname:
pathname = pathname.replace('//', '/')
url = urlunsplit([protocol, hostname, pathname, queries, fragment])
return url
def activitypub_to_http(namespace):
"""
Replace ActivityPub namespace by HTTP.
Parameters
----------
namespace : str
Namespace.
Returns
-------
new_url : str
URL.
"""
class Utilities: class Utilities:

View file

@ -1,2 +1,2 @@
__version__ = '0.1.81' __version__ = '0.1.82'
__version_info__ = (0, 1, 81) __version_info__ = (0, 1, 82)

View file

@ -29,16 +29,11 @@ import slixfeed.config as config
from slixfeed.config import Config from slixfeed.config import Config
from slixfeed.log import Logger from slixfeed.log import Logger
import slixfeed.sqlite as sqlite import slixfeed.sqlite as sqlite
from slixfeed.url import (
remove_tracking_parameters,
replace_hostname,
)
from slixfeed.syndication import FeedTask from slixfeed.syndication import FeedTask
from slixfeed.utilities import Documentation, Html, MD, Task from slixfeed.utilities import Documentation, Html, MD, Task, Url
from slixfeed.xmpp.commands import XmppCommands from slixfeed.xmpp.commands import XmppCommands
from slixfeed.xmpp.message import XmppMessage from slixfeed.xmpp.message import XmppMessage
from slixfeed.xmpp.presence import XmppPresence from slixfeed.xmpp.presence import XmppPresence
from slixfeed.xmpp.privilege import is_operator, is_moderator
from slixfeed.xmpp.status import XmppStatusTask from slixfeed.xmpp.status import XmppStatusTask
from slixfeed.xmpp.upload import XmppUpload from slixfeed.xmpp.upload import XmppUpload
from slixfeed.xmpp.utilities import XmppUtilities from slixfeed.xmpp.utilities import XmppUtilities
@ -89,7 +84,7 @@ class XmppChat:
if (message['muc']['nick'] == self.alias): if (message['muc']['nick'] == self.alias):
return return
jid_full = str(message['from']) jid_full = str(message['from'])
if not is_moderator(self, jid_bare, jid_full): if not XmppUtilities.is_moderator(self, jid_bare, jid_full):
return return
if message['type'] == 'groupchat': if message['type'] == 'groupchat':
@ -115,7 +110,7 @@ class XmppChat:
# return # return
# approved = False # approved = False
jid_full = str(message['from']) jid_full = str(message['from'])
if not is_moderator(self, jid_bare, jid_full): if not XmppUtilities.is_moderator(self, jid_bare, jid_full):
return return
# if role == 'moderator': # if role == 'moderator':
# approved = True # approved = True
@ -257,7 +252,7 @@ class XmppChat:
response = 'Current value for archive: ' response = 'Current value for archive: '
response += XmppCommands.get_archive(self, jid_bare) response += XmppCommands.get_archive(self, jid_bare)
case _ if command_lowercase.startswith('bookmark +'): case _ if command_lowercase.startswith('bookmark +'):
if is_operator(self, jid_bare): if XmppUtilities.is_operator(self, jid_bare):
muc_jid = command[11:] muc_jid = command[11:]
response = await XmppCommands.bookmark_add( response = await XmppCommands.bookmark_add(
self, muc_jid) self, muc_jid)
@ -265,7 +260,7 @@ class XmppChat:
response = ('This action is restricted. ' response = ('This action is restricted. '
'Type: adding bookmarks.') 'Type: adding bookmarks.')
case _ if command_lowercase.startswith('bookmark -'): case _ if command_lowercase.startswith('bookmark -'):
if is_operator(self, jid_bare): if XmppUtilities.is_operator(self, jid_bare):
muc_jid = command[11:] muc_jid = command[11:]
response = await XmppCommands.bookmark_del( response = await XmppCommands.bookmark_del(
self, muc_jid) self, muc_jid)
@ -273,7 +268,7 @@ class XmppChat:
response = ('This action is restricted. ' response = ('This action is restricted. '
'Type: removing bookmarks.') 'Type: removing bookmarks.')
case 'bookmarks': case 'bookmarks':
if is_operator(self, jid_bare): if XmppUtilities.is_operator(self, jid_bare):
response = await XmppCommands.print_bookmarks(self) response = await XmppCommands.print_bookmarks(self)
else: else:
response = ('This action is restricted. ' response = ('This action is restricted. '
@ -333,7 +328,7 @@ class XmppChat:
XmppPresence.send(self, jid_bare, status_message, XmppPresence.send(self, jid_bare, status_message,
status_type=status_type) status_type=status_type)
filename, response = XmppCommands.export_feeds( filename, response = XmppCommands.export_feeds(
self, jid_bare, ext) jid_bare, ext)
url = await XmppUpload.start(self, jid_bare, filename) url = await XmppUpload.start(self, jid_bare, filename)
# response = ( # response = (
# 'Feeds exported successfully to {}.\n{}' # 'Feeds exported successfully to {}.\n{}'
@ -388,7 +383,7 @@ class XmppChat:
response = await XmppCommands.pubsub_list(self, jid) response = await XmppCommands.pubsub_list(self, jid)
response += '```' response += '```'
case _ if command_lowercase.startswith('pubsub send'): case _ if command_lowercase.startswith('pubsub send'):
if is_operator(self, jid_bare): if XmppUtilities.is_operator(self, jid_bare):
info = command[12:] info = command[12:]
info = info.split(' ') info = info.split(' ')
jid = info[0] jid = info[0]
@ -461,7 +456,7 @@ class XmppChat:
await XmppChatAction.send_unread_items(self, jid_bare, num) await XmppChatAction.send_unread_items(self, jid_bare, num)
XmppStatusTask.restart_task(self, jid_bare) XmppStatusTask.restart_task(self, jid_bare)
case _ if command_lowercase.startswith('node delete'): case _ if command_lowercase.startswith('node delete'):
if is_operator(self, jid_bare): if XmppUtilities.is_operator(self, jid_bare):
info = command[12:] info = command[12:]
info = info.split(' ') info = info.split(' ')
response = XmppCommands.node_delete(self, info) response = XmppCommands.node_delete(self, info)
@ -469,7 +464,7 @@ class XmppChat:
response = ('This action is restricted. ' response = ('This action is restricted. '
'Type: sending news to PubSub.') 'Type: sending news to PubSub.')
case _ if command_lowercase.startswith('node purge'): case _ if command_lowercase.startswith('node purge'):
if is_operator(self, jid_bare): if XmppUtilities.is_operator(self, jid_bare):
info = command[11:] info = command[11:]
info = info.split(' ') info = info.split(' ')
response = XmppCommands.node_purge(self, info) response = XmppCommands.node_purge(self, info)
@ -770,8 +765,8 @@ class XmppChatAction:
else: else:
summary = '*** No summary ***' summary = '*** No summary ***'
link = result[2] link = result[2]
link = remove_tracking_parameters(link) link = Url.remove_tracking_parameters(link)
link = await replace_hostname(link, "link") or link link = await Url.replace_hostname(link, "link") or link
feed_id = result[4] feed_id = result[4]
# news_item = ("\n{}\n{}\n{} [{}]\n").format(str(title), str(link), # news_item = ("\n{}\n{}\n{} [{}]\n").format(str(title), str(link),
# str(feed_title), str(ix)) # str(feed_title), str(ix))

View file

@ -44,14 +44,11 @@ import slixmpp
import slixfeed.config as config import slixfeed.config as config
from slixfeed.config import Config from slixfeed.config import Config
import slixfeed.crawl as crawl
import slixfeed.dt as dt
import slixfeed.fetch as fetch import slixfeed.fetch as fetch
from slixfeed.log import Logger from slixfeed.log import Logger
import slixfeed.sqlite as sqlite import slixfeed.sqlite as sqlite
from slixfeed.syndication import Feed, FeedTask, Opml from slixfeed.syndication import Feed, FeedDiscovery, FeedTask, Opml
import slixfeed.url as uri from slixfeed.utilities import DateAndTime, Html, Task, Url, Utilities
from slixfeed.utilities import Html, Task, Utilities
from slixfeed.version import __version__ from slixfeed.version import __version__
from slixfeed.xmpp.bookmark import XmppBookmark from slixfeed.xmpp.bookmark import XmppBookmark
from slixfeed.xmpp.chat import XmppChat, XmppChatTask from slixfeed.xmpp.chat import XmppChat, XmppChatTask
@ -62,7 +59,6 @@ from slixfeed.xmpp.message import XmppMessage
from slixfeed.xmpp.muc import XmppMuc from slixfeed.xmpp.muc import XmppMuc
from slixfeed.xmpp.groupchat import XmppGroupchat from slixfeed.xmpp.groupchat import XmppGroupchat
from slixfeed.xmpp.presence import XmppPresence from slixfeed.xmpp.presence import XmppPresence
from slixfeed.xmpp.privilege import is_operator, is_access
import slixfeed.xmpp.profile as profile import slixfeed.xmpp.profile as profile
from slixfeed.xmpp.publish import XmppPubsub, XmppPubsubAction, XmppPubsubTask from slixfeed.xmpp.publish import XmppPubsub, XmppPubsubAction, XmppPubsubTask
from slixfeed.xmpp.roster import XmppRoster from slixfeed.xmpp.roster import XmppRoster
@ -791,7 +787,7 @@ class XmppClient(slixmpp.ClientXMPP):
# ) # )
# NOTE https://codeberg.org/poezio/slixmpp/issues/3515 # NOTE https://codeberg.org/poezio/slixmpp/issues/3515
# if is_operator(self, jid_bare): # if XmppUtilities.is_operator(self, jid_bare):
self['xep_0050'].add_command(node='subscription', self['xep_0050'].add_command(node='subscription',
name='🪶️ Subscribe', name='🪶️ Subscribe',
handler=self._handle_subscription_add) handler=self._handle_subscription_add)
@ -842,7 +838,7 @@ class XmppClient(slixmpp.ClientXMPP):
.format(function_name, jid_full)) .format(function_name, jid_full))
jid_bare = session['from'].bare jid_bare = session['from'].bare
chat_type = await XmppUtilities.get_chat_type(self, jid_bare) chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
if is_access(self, jid_bare, jid_full, chat_type): if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
form = self['xep_0004'].make_form('form', 'PubSub') form = self['xep_0004'].make_form('form', 'PubSub')
form['instructions'] = 'Publish news items to PubSub nodes.' form['instructions'] = 'Publish news items to PubSub nodes.'
options = form.add_field(desc='From which medium source do you ' options = form.add_field(desc='From which medium source do you '
@ -863,7 +859,7 @@ class XmppClient(slixmpp.ClientXMPP):
session['prev'] = None session['prev'] = None
session['payload'] = form session['payload'] = form
else: else:
if not is_operator(self, jid_bare): if not XmppUtilities.is_operator(self, jid_bare):
text_warn = 'This resource is restricted to operators.' text_warn = 'This resource is restricted to operators.'
elif chat_type == 'groupchat': elif chat_type == 'groupchat':
text_warn = ('This resource is restricted to moderators of {}.' text_warn = ('This resource is restricted to moderators of {}.'
@ -883,7 +879,7 @@ class XmppClient(slixmpp.ClientXMPP):
.format(function_name, jid_full)) .format(function_name, jid_full))
jid_bare = session['from'].bare jid_bare = session['from'].bare
chat_type = await XmppUtilities.get_chat_type(self, jid_bare) chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
if is_access(self, jid_bare, jid_full, chat_type): if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
values = payload['values'] values = payload['values']
form = self['xep_0004'].make_form('form', 'Publish') form = self['xep_0004'].make_form('form', 'Publish')
form['instructions'] = ('Choose a PubSub Jabber ID and verify ' form['instructions'] = ('Choose a PubSub Jabber ID and verify '
@ -971,7 +967,7 @@ class XmppClient(slixmpp.ClientXMPP):
session['has_next'] = True session['has_next'] = True
session['prev'] = self._handle_publish session['prev'] = self._handle_publish
else: else:
if not is_operator(self, jid_bare): if not XmppUtilities.is_operator(self, jid_bare):
text_warn = 'This resource is restricted to operators.' text_warn = 'This resource is restricted to operators.'
elif chat_type == 'groupchat': elif chat_type == 'groupchat':
text_warn = ('This resource is restricted to moderators of {}.' text_warn = ('This resource is restricted to moderators of {}.'
@ -994,7 +990,7 @@ class XmppClient(slixmpp.ClientXMPP):
print(values['jid']) print(values['jid'])
jid = values['jid'] if 'jid' in values else None jid = values['jid'] if 'jid' in values else None
jid_bare = session['from'].bare jid_bare = session['from'].bare
if jid != jid_bare and not is_operator(self, jid_bare): if jid != jid_bare and not XmppUtilities.is_operator(self, jid_bare):
text_warn = ('Posting to {} is restricted to operators only.' text_warn = ('Posting to {} is restricted to operators only.'
.format(jid_bare)) # Should not this be self.boundjid.bare? .format(jid_bare)) # Should not this be self.boundjid.bare?
session['allow_prev'] = False session['allow_prev'] = False
@ -1065,7 +1061,7 @@ class XmppClient(slixmpp.ClientXMPP):
ixs = values['entries'] ixs = values['entries']
#if jid: jid = jid[0] if isinstance(jid, list) else jid #if jid: jid = jid[0] if isinstance(jid, list) else jid
jid_bare = session['from'].bare jid_bare = session['from'].bare
if jid != jid_bare and not is_operator(self, jid_bare): if jid != jid_bare and not XmppUtilities.is_operator(self, jid_bare):
# TODO Report incident # TODO Report incident
text_warn = 'You are not suppose to be here.' text_warn = 'You are not suppose to be here.'
session['allow_prev'] = False session['allow_prev'] = False
@ -1100,7 +1096,7 @@ class XmppClient(slixmpp.ClientXMPP):
values = payload['values'] values = payload['values']
jid = values['jid'] if 'jid' in values else None jid = values['jid'] if 'jid' in values else None
jid_bare = session['from'].bare jid_bare = session['from'].bare
if jid != jid_bare and not is_operator(self, jid_bare): if jid != jid_bare and not XmppUtilities.is_operator(self, jid_bare):
# TODO Report incident # TODO Report incident
text_warn = 'You are not suppose to be here.' text_warn = 'You are not suppose to be here.'
# text_warn = ('Posting to {} is restricted to operators only.' # text_warn = ('Posting to {} is restricted to operators only.'
@ -1119,7 +1115,7 @@ class XmppClient(slixmpp.ClientXMPP):
if jid == self.boundjid.bare: if jid == self.boundjid.bare:
node = 'urn:xmpp:microblog:0' node = 'urn:xmpp:microblog:0'
else: else:
node = uri.get_hostname(url) node = Url.get_hostname(url)
form = self['xep_0004'].make_form('form', 'Publish') form = self['xep_0004'].make_form('form', 'Publish')
while True: while True:
result = await fetch.http(url) result = await fetch.http(url)
@ -1137,7 +1133,7 @@ class XmppClient(slixmpp.ClientXMPP):
if "title" in feed["feed"].keys(): if "title" in feed["feed"].keys():
title = feed["feed"]["title"] title = feed["feed"]["title"]
else: else:
title = uri.get_hostname(url) title = Url.get_hostname(url)
entries = feed.entries entries = feed.entries
entry_ix = 0 entry_ix = 0
for entry in entries: for entry in entries:
@ -1146,10 +1142,10 @@ class XmppClient(slixmpp.ClientXMPP):
else: else:
if entry.has_key("published"): if entry.has_key("published"):
title = entry.published title = entry.published
title = dt.rfc2822_to_iso8601(title) title = DateAndTime.rfc2822_to_iso8601(title)
elif entry.has_key("updated"): elif entry.has_key("updated"):
title = entry.updated title = entry.updated
title = dt.rfc2822_to_iso8601(title) title = DateAndTime.rfc2822_to_iso8601(title)
else: else:
title = "*** No title ***" title = "*** No title ***"
options.addOption(title, str(entry_ix)) options.addOption(title, str(entry_ix))
@ -1164,7 +1160,7 @@ class XmppClient(slixmpp.ClientXMPP):
session['payload'] = form session['payload'] = form
break break
else: else:
result = await crawl.probe_page(url, document) result = await FeedDiscovery.probe_page(url, document)
if isinstance(result, list): if isinstance(result, list):
results = result results = result
form['instructions'] = ('Discovered {} subscriptions ' form['instructions'] = ('Discovered {} subscriptions '
@ -1225,7 +1221,7 @@ class XmppClient(slixmpp.ClientXMPP):
jid = values['jid'][0] if 'jid' in values else None jid = values['jid'][0] if 'jid' in values else None
#if jid: jid = jid[0] if isinstance(jid, list) else jid #if jid: jid = jid[0] if isinstance(jid, list) else jid
jid_bare = session['from'].bare jid_bare = session['from'].bare
if jid != jid_bare and not is_operator(self, jid_bare): if jid != jid_bare and not XmppUtilities.is_operator(self, jid_bare):
# TODO Report incident # TODO Report incident
text_warn = 'You are not suppose to be here.' text_warn = 'You are not suppose to be here.'
session['allow_prev'] = False session['allow_prev'] = False
@ -1262,10 +1258,10 @@ class XmppClient(slixmpp.ClientXMPP):
# else: # else:
# if feed.entries[entry].has_key("published"): # if feed.entries[entry].has_key("published"):
# title = feed.entries[entry].published # title = feed.entries[entry].published
# title = dt.rfc2822_to_iso8601(title) # title = DateAndTime.rfc2822_to_iso8601(title)
# elif feed.entries[entry].has_key("updated"): # elif feed.entries[entry].has_key("updated"):
# title = feed.entries[entry].updated # title = feed.entries[entry].updated
# title = dt.rfc2822_to_iso8601(title) # title = DateAndTime.rfc2822_to_iso8601(title)
# else: # else:
# title = "*** No title ***" # title = "*** No title ***"
# if feed.entries[entry].has_key("summary"): # if feed.entries[entry].has_key("summary"):
@ -1393,7 +1389,7 @@ class XmppClient(slixmpp.ClientXMPP):
.format(function_name, jid_full)) .format(function_name, jid_full))
jid_bare = session['from'].bare jid_bare = session['from'].bare
chat_type = await XmppUtilities.get_chat_type(self, jid_bare) chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
if is_access(self, jid_bare, jid_full, chat_type): if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
jid = session['from'].bare jid = session['from'].bare
db_file = config.get_pathname_to_database(jid_bare) db_file = config.get_pathname_to_database(jid_bare)
form = self['xep_0004'].make_form('form', 'Filters') form = self['xep_0004'].make_form('form', 'Filters')
@ -1432,7 +1428,7 @@ class XmppClient(slixmpp.ClientXMPP):
session['next'] = self._handle_filters_complete session['next'] = self._handle_filters_complete
session['payload'] = form session['payload'] = form
else: else:
if not is_operator(self, jid_bare): if not XmppUtilities.is_operator(self, jid_bare):
text_warn = 'This resource is restricted to operators.' text_warn = 'This resource is restricted to operators.'
elif chat_type == 'groupchat': elif chat_type == 'groupchat':
text_warn = ('This resource is restricted to moderators of {}.' text_warn = ('This resource is restricted to moderators of {}.'
@ -1502,7 +1498,7 @@ class XmppClient(slixmpp.ClientXMPP):
.format(function_name, jid_full)) .format(function_name, jid_full))
jid_bare = session['from'].bare jid_bare = session['from'].bare
chat_type = await XmppUtilities.get_chat_type(self, jid_bare) chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
if is_access(self, jid_bare, jid_full, chat_type): if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
form = self['xep_0004'].make_form('form', 'Subscribe') form = self['xep_0004'].make_form('form', 'Subscribe')
# form['instructions'] = 'Add a new custom subscription.' # form['instructions'] = 'Add a new custom subscription.'
form.add_field(desc='Enter a URL.', form.add_field(desc='Enter a URL.',
@ -1517,7 +1513,7 @@ class XmppClient(slixmpp.ClientXMPP):
required=True, required=True,
value='http://', value='http://',
var='subscription') var='subscription')
if is_operator(self, jid_bare): if XmppUtilities.is_operator(self, jid_bare):
# form['instructions'] = ('Special section for operators:\n' # form['instructions'] = ('Special section for operators:\n'
# 'This section allows you to add ' # 'This section allows you to add '
# 'subscriptions for a JID of your ' # 'subscriptions for a JID of your '
@ -1544,7 +1540,7 @@ class XmppClient(slixmpp.ClientXMPP):
session['prev'] = None session['prev'] = None
session['payload'] = form session['payload'] = form
else: else:
if not is_operator(self, jid_bare): if not XmppUtilities.is_operator(self, jid_bare):
text_warn = 'This resource is restricted to operators.' text_warn = 'This resource is restricted to operators.'
elif chat_type == 'groupchat': elif chat_type == 'groupchat':
text_warn = ('This resource is restricted to moderators of {}.' text_warn = ('This resource is restricted to moderators of {}.'
@ -1576,7 +1572,7 @@ class XmppClient(slixmpp.ClientXMPP):
# options.addOption('News by tag', 'tag') # options.addOption('News by tag', 'tag')
options.addOption('Rejected', 'reject') options.addOption('Rejected', 'reject')
options.addOption('Unread', 'unread') options.addOption('Unread', 'unread')
if is_operator(self, jid_bare): if XmppUtilities.is_operator(self, jid_bare):
# form['instructions'] = ('Special section for operators:\n' # form['instructions'] = ('Special section for operators:\n'
# 'This section allows you to view news items ' # 'This section allows you to view news items '
# 'of a JID of your choice.') # 'of a JID of your choice.')
@ -1617,7 +1613,7 @@ class XmppClient(slixmpp.ClientXMPP):
jid_bare = session['from'].bare jid_bare = session['from'].bare
values = payload['values'] values = payload['values']
form = self['xep_0004'].make_form('form', 'Updates') form = self['xep_0004'].make_form('form', 'Updates')
if is_operator(self, jid_bare) and 'jid' in values: if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
jid_bare = values['jid'] jid_bare = values['jid']
form.add_field(var='jid', form.add_field(var='jid',
ftype='hidden', ftype='hidden',
@ -1675,7 +1671,7 @@ class XmppClient(slixmpp.ClientXMPP):
ix = values['update'] ix = values['update']
jid_bare = session['from'].bare jid_bare = session['from'].bare
form = self['xep_0004'].make_form('form', 'Article') form = self['xep_0004'].make_form('form', 'Article')
if is_operator(self, jid_bare) and 'jid' in values: if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
jid = values['jid'] jid = values['jid']
jid_bare = jid[0] if isinstance(jid, list) else jid jid_bare = jid[0] if isinstance(jid, list) else jid
form.add_field(var='jid', form.add_field(var='jid',
@ -1688,9 +1684,9 @@ class XmppClient(slixmpp.ClientXMPP):
url = sqlite.get_entry_url(db_file, ix) url = sqlite.get_entry_url(db_file, ix)
url = url[0] # TODO Handle a situation when index is no longer exist url = url[0] # TODO Handle a situation when index is no longer exist
logger.debug('Original URL: {}'.format(url)) logger.debug('Original URL: {}'.format(url))
url = uri.remove_tracking_parameters(url) url = Url.remove_tracking_parameters(url)
logger.debug('Processed URL (tracker removal): {}'.format(url)) logger.debug('Processed URL (tracker removal): {}'.format(url))
url = (await uri.replace_hostname(url, 'link')) or url url = (await Url.replace_hostname(url, 'link')) or url
logger.debug('Processed URL (replace hostname): {}'.format(url)) logger.debug('Processed URL (replace hostname): {}'.format(url))
# result = await fetch.http(url) # result = await fetch.http(url)
# if 'content' in result: # if 'content' in result:
@ -1750,7 +1746,7 @@ class XmppClient(slixmpp.ClientXMPP):
identifier = values['identifier'] if 'identifier' in values else None identifier = values['identifier'] if 'identifier' in values else None
url = values['subscription'] url = values['subscription']
jid_bare = session['from'].bare jid_bare = session['from'].bare
if is_operator(self, jid_bare) and 'jid' in values: if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
custom_jid = values['jid'] custom_jid = values['jid']
jid_bare = custom_jid[0] if isinstance(custom_jid, list) else jid_bare jid_bare = custom_jid[0] if isinstance(custom_jid, list) else jid_bare
# jid_bare = custom_jid[0] if custom_jid else jid_bare # jid_bare = custom_jid[0] if custom_jid else jid_bare
@ -1780,7 +1776,7 @@ class XmppClient(slixmpp.ClientXMPP):
session['prev'] = None session['prev'] = None
# elif not identifier: # elif not identifier:
# counter = 0 # counter = 0
# hostname = uri.get_hostname(url) # hostname = Url.get_hostname(url)
# identifier = hostname + ':' + str(counter) # identifier = hostname + ':' + str(counter)
# while True: # while True:
# if sqlite.check_identifier_exist(db_file, identifier): # if sqlite.check_identifier_exist(db_file, identifier):
@ -1797,7 +1793,7 @@ class XmppClient(slixmpp.ClientXMPP):
exist_count = 0 exist_count = 0
for url in urls: for url in urls:
counter = 0 counter = 0
hostname = uri.get_hostname(url) hostname = Url.get_hostname(url)
identifier = hostname + ':' + str(counter) identifier = hostname + ':' + str(counter)
while True: while True:
if sqlite.check_identifier_exist(db_file, identifier): if sqlite.check_identifier_exist(db_file, identifier):
@ -1830,7 +1826,7 @@ class XmppClient(slixmpp.ClientXMPP):
if isinstance(url, list): if isinstance(url, list):
url = url[0] url = url[0]
counter = 0 counter = 0
hostname = uri.get_hostname(url) hostname = Url.get_hostname(url)
identifier = hostname + ':' + str(counter) identifier = hostname + ':' + str(counter)
while True: while True:
if sqlite.check_identifier_exist(db_file, identifier): if sqlite.check_identifier_exist(db_file, identifier):
@ -1956,7 +1952,7 @@ class XmppClient(slixmpp.ClientXMPP):
.format(function_name, jid_full)) .format(function_name, jid_full))
jid_bare = session['from'].bare jid_bare = session['from'].bare
values = payload['values'] values = payload['values']
if is_operator(self, jid_bare) and 'jid' in values: if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
jid_bare = values['jid'][0] jid_bare = values['jid'][0]
del values['jid'] del values['jid']
db_file = config.get_pathname_to_database(jid_bare) db_file = config.get_pathname_to_database(jid_bare)
@ -1981,7 +1977,7 @@ class XmppClient(slixmpp.ClientXMPP):
.format(function_name, jid_full)) .format(function_name, jid_full))
jid_bare = session['from'].bare jid_bare = session['from'].bare
values = payload['values'] values = payload['values']
if is_operator(self, jid_bare) and 'jid' in values: if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
jid_bare = values['jid'][0] jid_bare = values['jid'][0]
del values['jid'] del values['jid']
db_file = config.get_pathname_to_database(jid_bare) db_file = config.get_pathname_to_database(jid_bare)
@ -2022,7 +2018,7 @@ class XmppClient(slixmpp.ClientXMPP):
.format(function_name, jid_full)) .format(function_name, jid_full))
jid_bare = session['from'].bare jid_bare = session['from'].bare
chat_type = await XmppUtilities.get_chat_type(self, jid_bare) chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
if is_access(self, jid_bare, jid_full, chat_type): if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
form = self['xep_0004'].make_form('form', 'Discover & Search') form = self['xep_0004'].make_form('form', 'Discover & Search')
form['instructions'] = 'Discover news subscriptions of all kinds' form['instructions'] = 'Discover news subscriptions of all kinds'
options = form.add_field(desc='Select type of search.', options = form.add_field(desc='Select type of search.',
@ -2039,7 +2035,7 @@ class XmppClient(slixmpp.ClientXMPP):
session['payload'] = form session['payload'] = form
session['prev'] = None session['prev'] = None
else: else:
if not is_operator(self, jid_bare): if not XmppUtilities.is_operator(self, jid_bare):
text_warn = 'This resource is restricted to operators.' text_warn = 'This resource is restricted to operators.'
elif chat_type == 'groupchat': elif chat_type == 'groupchat':
text_warn = ('This resource is restricted to moderators of {}.' text_warn = ('This resource is restricted to moderators of {}.'
@ -2146,7 +2142,7 @@ class XmppClient(slixmpp.ClientXMPP):
.format(function_name, jid_full)) .format(function_name, jid_full))
jid_bare = session['from'].bare jid_bare = session['from'].bare
chat_type = await XmppUtilities.get_chat_type(self, jid_bare) chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
if is_access(self, jid_bare, jid_full, chat_type): if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
form = self['xep_0004'].make_form('form', 'Subscriptions') form = self['xep_0004'].make_form('form', 'Subscriptions')
form['instructions'] = ('Browse, view, toggle or remove ' form['instructions'] = ('Browse, view, toggle or remove '
'tags and subscriptions.') 'tags and subscriptions.')
@ -2160,7 +2156,7 @@ class XmppClient(slixmpp.ClientXMPP):
options.addOption('Browse tags', 'tag') options.addOption('Browse tags', 'tag')
options.addOption('Remove subscriptions', 'delete') options.addOption('Remove subscriptions', 'delete')
options.addOption('Toggle subscriptions', 'toggle') options.addOption('Toggle subscriptions', 'toggle')
if is_operator(self, jid_bare): if XmppUtilities.is_operator(self, jid_bare):
form['instructions'] = None form['instructions'] = None
# form['instructions'] = ('Special section for operators:\n' # form['instructions'] = ('Special section for operators:\n'
# 'This section allows you to change ' # 'This section allows you to change '
@ -2190,7 +2186,7 @@ class XmppClient(slixmpp.ClientXMPP):
session['next'] = self._handle_subscriptions_result session['next'] = self._handle_subscriptions_result
session['has_next'] = True session['has_next'] = True
else: else:
if not is_operator(self, jid_bare): if not XmppUtilities.is_operator(self, jid_bare):
text_warn = 'This resource is restricted to operators.' text_warn = 'This resource is restricted to operators.'
elif chat_type == 'groupchat': elif chat_type == 'groupchat':
text_warn = ('This resource is restricted to moderators of {}.' text_warn = ('This resource is restricted to moderators of {}.'
@ -2212,7 +2208,7 @@ class XmppClient(slixmpp.ClientXMPP):
values = payload['values'] values = payload['values']
jid_bare = session['from'].bare jid_bare = session['from'].bare
form = self['xep_0004'].make_form('form', 'Subscriptions') form = self['xep_0004'].make_form('form', 'Subscriptions')
if is_operator(self, jid_bare) and 'jid' in values: if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
jid_bare = values['jid'] jid_bare = values['jid']
form.add_field(ftype='hidden', form.add_field(ftype='hidden',
value=jid_bare, value=jid_bare,
@ -2306,7 +2302,7 @@ class XmppClient(slixmpp.ClientXMPP):
form = self['xep_0004'].make_form('form', 'Subscriptions') form = self['xep_0004'].make_form('form', 'Subscriptions')
jid_bare = session['from'].bare jid_bare = session['from'].bare
values = payload['values'] values = payload['values']
if is_operator(self, jid_bare) and 'jid' in values: if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
jid_bare = values['jid'][0] jid_bare = values['jid'][0]
form.add_field(ftype='hidden', form.add_field(ftype='hidden',
value=jid_bare, value=jid_bare,
@ -2344,7 +2340,7 @@ class XmppClient(slixmpp.ClientXMPP):
form = self['xep_0004'].make_form('form', 'Subscription') form = self['xep_0004'].make_form('form', 'Subscription')
jid_bare = session['from'].bare jid_bare = session['from'].bare
values = payload['values'] values = payload['values']
if is_operator(self, jid_bare) and 'jid' in values: if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
jid_bare = values['jid'][0] if values['jid'] else jid_bare jid_bare = values['jid'][0] if values['jid'] else jid_bare
form.add_field(ftype='hidden', form.add_field(ftype='hidden',
value=jid_bare, value=jid_bare,
@ -2440,7 +2436,7 @@ class XmppClient(slixmpp.ClientXMPP):
.format(function_name, jid_full)) .format(function_name, jid_full))
jid_bare = session['from'].bare jid_bare = session['from'].bare
values = payload['values'] values = payload['values']
if is_operator(self, jid_bare) and 'jid' in values: if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
jid_bare = values['jid'][0] jid_bare = values['jid'][0]
db_file = config.get_pathname_to_database(jid_bare) db_file = config.get_pathname_to_database(jid_bare)
# url = values['url'] # url = values['url']
@ -2506,14 +2502,14 @@ class XmppClient(slixmpp.ClientXMPP):
.format(function_name, jid_full)) .format(function_name, jid_full))
jid_bare = session['from'].bare jid_bare = session['from'].bare
chat_type = await XmppUtilities.get_chat_type(self, jid_bare) chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
if is_access(self, jid_bare, jid_full, chat_type): if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
form = self['xep_0004'].make_form('form', 'Advanced') form = self['xep_0004'].make_form('form', 'Advanced')
form['instructions'] = 'Extended options' form['instructions'] = 'Extended options'
options = form.add_field(ftype='list-single', options = form.add_field(ftype='list-single',
label='Choose', label='Choose',
required=True, required=True,
var='option') var='option')
if is_operator(self, jid_bare): if XmppUtilities.is_operator(self, jid_bare):
options.addOption('Administration', 'admin') options.addOption('Administration', 'admin')
# options.addOption('Activity', 'activity') # options.addOption('Activity', 'activity')
# options.addOption('Filters', 'filter') # options.addOption('Filters', 'filter')
@ -2527,7 +2523,7 @@ class XmppClient(slixmpp.ClientXMPP):
session['next'] = self._handle_advanced_result session['next'] = self._handle_advanced_result
session['prev'] = self._handle_advanced session['prev'] = self._handle_advanced
else: else:
if not is_operator(self, jid_bare): if not XmppUtilities.is_operator(self, jid_bare):
text_warn = 'This resource is restricted to operators.' text_warn = 'This resource is restricted to operators.'
elif chat_type == 'groupchat': elif chat_type == 'groupchat':
text_warn = ('This resource is restricted to moderators of {}.' text_warn = ('This resource is restricted to moderators of {}.'
@ -2556,7 +2552,7 @@ class XmppClient(slixmpp.ClientXMPP):
case 'admin': case 'admin':
# NOTE Even though this check is already conducted on previous # NOTE Even though this check is already conducted on previous
# form, this check is being done just in case. # form, this check is being done just in case.
if is_operator(self, jid_bare): if XmppUtilities.is_operator(self, jid_bare):
if self.is_component: if self.is_component:
# NOTE This will be changed with XEP-0222 XEP-0223 # NOTE This will be changed with XEP-0222 XEP-0223
text_info = ('Subscriber management options are ' text_info = ('Subscriber management options are '
@ -2589,7 +2585,7 @@ class XmppClient(slixmpp.ClientXMPP):
else: else:
logger.warning('An unauthorized attempt to access ' logger.warning('An unauthorized attempt to access '
'bookmarks has been detected for JID {} at ' 'bookmarks has been detected for JID {} at '
'{}'.format(jid_bare, dt.timestamp())) '{}'.format(jid_bare, DateAndTime.timestamp()))
text_warn = 'This resource is restricted.' text_warn = 'This resource is restricted.'
session['notes'] = [['warn', text_warn]] session['notes'] = [['warn', text_warn]]
session['has_next'] = False session['has_next'] = False
@ -2617,7 +2613,7 @@ class XmppClient(slixmpp.ClientXMPP):
required=True, required=True,
var='url') var='url')
url['validate']['datatype'] = 'xs:anyURI' url['validate']['datatype'] = 'xs:anyURI'
if is_operator(self, jid_bare): if XmppUtilities.is_operator(self, jid_bare):
form.add_field(ftype='fixed', form.add_field(ftype='fixed',
label='* Operators', label='* Operators',
desc='This section allows you to import ' desc='This section allows you to import '
@ -2651,7 +2647,7 @@ class XmppClient(slixmpp.ClientXMPP):
options.addOption('OPML', 'opml') options.addOption('OPML', 'opml')
# options.addOption('HTML', 'html') # options.addOption('HTML', 'html')
# options.addOption('XBEL', 'xbel') # options.addOption('XBEL', 'xbel')
if is_operator(self, jid_bare): if XmppUtilities.is_operator(self, jid_bare):
# form['instructions'] = ('Special section for operators:\n' # form['instructions'] = ('Special section for operators:\n'
# 'This section allows you to ' # 'This section allows you to '
# 'import and export subscriptions ' # 'import and export subscriptions '
@ -2841,7 +2837,7 @@ class XmppClient(slixmpp.ClientXMPP):
url = values['url'] url = values['url']
if url.startswith('http') and url.endswith('.opml'): if url.startswith('http') and url.endswith('.opml'):
jid_bare = session['from'].bare jid_bare = session['from'].bare
if is_operator(self, jid_bare) and 'jid' in values: if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
jid = values['jid'] jid = values['jid']
jid_bare = jid[0] if isinstance(jid, list) else jid jid_bare = jid[0] if isinstance(jid, list) else jid
db_file = config.get_pathname_to_database(jid_bare) db_file = config.get_pathname_to_database(jid_bare)
@ -2882,7 +2878,7 @@ class XmppClient(slixmpp.ClientXMPP):
# form['type'] = 'result' # form['type'] = 'result'
values = payload['values'] values = payload['values']
jid_bare = session['from'].bare jid_bare = session['from'].bare
if is_operator(self, jid_bare) and 'jid' in values: if XmppUtilities.is_operator(self, jid_bare) and 'jid' in values:
jid = values['jid'] jid = values['jid']
jid_bare = jid[0] if isinstance(jid, list) else jid jid_bare = jid[0] if isinstance(jid, list) else jid
# form = self['xep_0004'].make_form('result', 'Done') # form = self['xep_0004'].make_form('result', 'Done')
@ -2915,7 +2911,7 @@ class XmppClient(slixmpp.ClientXMPP):
jid_bare = session['from'].bare jid_bare = session['from'].bare
jid_full = str(session['from']) jid_full = str(session['from'])
chat_type = await XmppUtilities.get_chat_type(self, jid_bare) chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
if is_access(self, jid_bare, jid_full, chat_type): if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
form = self['xep_0004'].make_form('form', 'Subscribe') form = self['xep_0004'].make_form('form', 'Subscribe')
# NOTE Refresh button would be of use # NOTE Refresh button would be of use
form['instructions'] = 'Featured subscriptions' form['instructions'] = 'Featured subscriptions'
@ -2938,7 +2934,7 @@ class XmppClient(slixmpp.ClientXMPP):
if '@' in jid_bare: if '@' in jid_bare:
hostname = jid_bare.split('@')[1] hostname = jid_bare.split('@')[1]
url = 'http://' + hostname url = 'http://' + hostname
result = await crawl.probe_page(url) result = await FeedDiscovery.probe_page(url)
if not result: if not result:
url = {'url' : url, url = {'url' : url,
'index' : None, 'index' : None,
@ -2966,7 +2962,7 @@ class XmppClient(slixmpp.ClientXMPP):
session['payload'] = form session['payload'] = form
session['prev'] = self._handle_promoted session['prev'] = self._handle_promoted
else: else:
if not is_operator(self, jid_bare): if not XmppUtilities.is_operator(self, jid_bare):
text_warn = 'This resource is restricted to operators.' text_warn = 'This resource is restricted to operators.'
elif chat_type == 'groupchat': elif chat_type == 'groupchat':
text_warn = ('This resource is restricted to moderators of {}.' text_warn = ('This resource is restricted to moderators of {}.'
@ -3620,7 +3616,7 @@ class XmppClient(slixmpp.ClientXMPP):
.format(function_name, jid_full)) .format(function_name, jid_full))
jid_bare = session['from'].bare jid_bare = session['from'].bare
chat_type = await XmppUtilities.get_chat_type(self, jid_bare) chat_type = await XmppUtilities.get_chat_type(self, jid_bare)
if is_access(self, jid_bare, jid_full, chat_type): if XmppUtilities.is_access(self, jid_bare, jid_full, chat_type):
db_file = config.get_pathname_to_database(jid_bare) db_file = config.get_pathname_to_database(jid_bare)
if jid_bare not in self.settings: if jid_bare not in self.settings:
Config.add_settings_jid(self.settings, jid_bare, db_file) Config.add_settings_jid(self.settings, jid_bare, db_file)
@ -3718,7 +3714,7 @@ class XmppClient(slixmpp.ClientXMPP):
session['next'] = self._handle_settings_complete session['next'] = self._handle_settings_complete
session['payload'] = form session['payload'] = form
else: else:
if not is_operator(self, jid_bare): if not XmppUtilities.is_operator(self, jid_bare):
text_warn = 'This resource is restricted to operators.' text_warn = 'This resource is restricted to operators.'
elif chat_type == 'groupchat': elif chat_type == 'groupchat':
text_warn = ('This resource is restricted to moderators of {}.' text_warn = ('This resource is restricted to moderators of {}.'

View file

@ -5,14 +5,11 @@ from feedparser import parse
from random import randrange from random import randrange
import slixfeed.config as config import slixfeed.config as config
from slixfeed.config import Config from slixfeed.config import Config
import slixfeed.crawl as crawl
import slixfeed.dt as dt
import slixfeed.fetch as fetch import slixfeed.fetch as fetch
from slixfeed.log import Logger from slixfeed.log import Logger
import slixfeed.sqlite as sqlite import slixfeed.sqlite as sqlite
from slixfeed.syndication import Feed, Opml from slixfeed.syndication import Feed, FeedDiscovery, Opml
import slixfeed.url as uri from slixfeed.utilities import DateAndTime, Documentation, Url, Utilities
from slixfeed.utilities import Documentation, Utilities
from slixfeed.version import __version__ from slixfeed.version import __version__
from slixfeed.xmpp.bookmark import XmppBookmark from slixfeed.xmpp.bookmark import XmppBookmark
from slixfeed.xmpp.muc import XmppMuc from slixfeed.xmpp.muc import XmppMuc
@ -121,9 +118,9 @@ class XmppCommands:
""" """
if url.startswith('http'): if url.startswith('http'):
if not title: if not title:
title = uri.get_hostname(url) title = Url.get_hostname(url)
counter = 0 counter = 0
hostname = uri.get_hostname(url) hostname = Url.get_hostname(url)
hostname = hostname.replace('.','-') hostname = hostname.replace('.','-')
identifier = hostname + ':' + str(counter) identifier = hostname + ':' + str(counter)
while True: while True:
@ -148,7 +145,7 @@ class XmppCommands:
if feed.has_key('updated_parsed'): if feed.has_key('updated_parsed'):
feed_updated = feed.updated_parsed feed_updated = feed.updated_parsed
try: try:
feed_updated = dt.convert_struct_time_to_iso8601( feed_updated = DateAndTime.convert_struct_time_to_iso8601(
feed_updated) feed_updated)
except: except:
feed_updated = None feed_updated = None
@ -393,7 +390,7 @@ class XmppCommands:
identifier = info[2] identifier = info[2]
else: else:
counter = 0 counter = 0
hostname = uri.get_hostname(url) hostname = Url.get_hostname(url)
hostname = hostname.replace('.','-') hostname = hostname.replace('.','-')
identifier = hostname + ':' + str(counter) identifier = hostname + ':' + str(counter)
while True: while True:
@ -417,8 +414,8 @@ class XmppCommands:
if (url.startswith('feed:/') or if (url.startswith('feed:/') or
url.startswith('itpc:/') or url.startswith('itpc:/') or
url.startswith('rss:/')): url.startswith('rss:/')):
url = uri.feed_to_http(url) url = Url.feed_to_http(url)
url = (await uri.replace_hostname(url, 'feed')) or url url = (await Url.replace_hostname(url, 'feed')) or url
result = await Feed.add_feed(self, jid_bare, db_file, url, result = await Feed.add_feed(self, jid_bare, db_file, url,
identifier) identifier)
if isinstance(result, list): if isinstance(result, list):
@ -479,10 +476,10 @@ class XmppCommands:
# both interfaces Chat and IPC # both interfaces Chat and IPC
async def fetch_http(self, url, db_file, jid_bare): async def fetch_http(self, url, db_file, jid_bare):
if url.startswith('feed:/') or url.startswith('rss:/'): if url.startswith('feed:/') or url.startswith('rss:/'):
url = uri.feed_to_http(url) url = Url.feed_to_http(url)
url = (await uri.replace_hostname(url, 'feed')) or url url = (await Url.replace_hostname(url, 'feed')) or url
counter = 0 counter = 0
hostname = uri.get_hostname(url) hostname = Url.get_hostname(url)
hostname = hostname.replace('.','-') hostname = hostname.replace('.','-')
identifier = hostname + ':' + str(counter) identifier = hostname + ':' + str(counter)
while True: while True:
@ -581,7 +578,7 @@ class XmppCommands:
async def muc_join(self, command): async def muc_join(self, command):
if command: if command:
muc_jid = uri.check_xmpp_uri(command) muc_jid = Url.check_xmpp_uri(command)
if muc_jid: if muc_jid:
# TODO probe JID and confirm it's a groupchat # TODO probe JID and confirm it's a groupchat
result = await XmppMuc.join(self, muc_jid) result = await XmppMuc.join(self, muc_jid)
@ -735,8 +732,8 @@ class XmppCommands:
async def feed_read(self, jid_bare, data, url): async def feed_read(self, jid_bare, data, url):
if url.startswith('feed:/') or url.startswith('rss:/'): if url.startswith('feed:/') or url.startswith('rss:/'):
url = uri.feed_to_http(url) url = Url.feed_to_http(url)
url = (await uri.replace_hostname(url, 'feed')) or url url = (await Url.replace_hostname(url, 'feed')) or url
match len(data): match len(data):
case 1: case 1:
if url.startswith('http'): if url.startswith('http'):
@ -750,7 +747,7 @@ class XmppCommands:
message = Feed.view_feed(url, feed) message = Feed.view_feed(url, feed)
break break
else: else:
result = await crawl.probe_page(url, document) result = await FeedDiscovery.probe_page(url, document)
if isinstance(result, list): if isinstance(result, list):
results = result results = result
message = ("Syndication feeds found for {}\n\n```\n" message = ("Syndication feeds found for {}\n\n```\n"
@ -786,7 +783,7 @@ class XmppCommands:
message = Feed.view_entry(url, feed, num) message = Feed.view_entry(url, feed, num)
break break
else: else:
result = await crawl.probe_page(url, document) result = await FeedDiscovery.probe_page(url, document)
if isinstance(result, list): if isinstance(result, list):
results = result results = result
message = ("Syndication feeds found for {}\n\n```\n" message = ("Syndication feeds found for {}\n\n```\n"

View file

@ -14,7 +14,7 @@ TODO
""" """
import asyncio import asyncio
from slixfeed.dt import current_time from slixfeed.utilities import DateAndTime
from slixfeed.log import Logger from slixfeed.log import Logger
from slixmpp.exceptions import IqTimeout, IqError from slixmpp.exceptions import IqTimeout, IqError
from time import sleep from time import sleep
@ -62,17 +62,17 @@ class XmppConnect:
def recover(self, message): def recover(self, message):
logger.warning(message) logger.warning(message)
print(current_time(), message, 'Attempting to reconnect.') print(DateAndTime.current_time(), message, 'Attempting to reconnect.')
self.connection_attempts += 1 self.connection_attempts += 1
# if self.connection_attempts <= self.max_connection_attempts: # if self.connection_attempts <= self.max_connection_attempts:
# self.reconnect(wait=5.0) # wait a bit before attempting to reconnect # self.reconnect(wait=5.0) # wait a bit before attempting to reconnect
# else: # else:
# print(current_time(),"Maximum connection attempts exceeded.") # print(current_time(),"Maximum connection attempts exceeded.")
# logging.error("Maximum connection attempts exceeded.") # logging.error("Maximum connection attempts exceeded.")
print(current_time(), 'Attempt number', self.connection_attempts) print(DateAndTime.current_time(), 'Attempt number', self.connection_attempts)
seconds = self.reconnect_timeout or 30 seconds = self.reconnect_timeout or 30
seconds = int(seconds) seconds = int(seconds)
print(current_time(), 'Next attempt within', seconds, 'seconds') print(DateAndTime.current_time(), 'Next attempt within', seconds, 'seconds')
# NOTE asyncio.sleep doesn't interval as expected # NOTE asyncio.sleep doesn't interval as expected
# await asyncio.sleep(seconds) # await asyncio.sleep(seconds)
sleep(seconds) sleep(seconds)

View file

@ -1,49 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
def is_access(self, jid_bare, jid_full, chat_type):
"""Determine access privilege"""
operator = is_operator(self, jid_bare)
if operator:
if chat_type == 'groupchat':
if is_moderator(self, jid_bare, jid_full):
access = True
else:
access = True
else:
access = False
return access
def is_operator(self, jid_bare):
"""Check if given JID is an operator"""
result = False
for operator in self.operators:
if jid_bare == operator['jid']:
result = True
# operator_name = operator['name']
break
return result
def is_moderator(self, jid_bare, jid_full):
"""Check if given JID is a moderator"""
alias = jid_full[jid_full.index('/')+1:]
role = self.plugin['xep_0045'].get_jid_property(jid_bare, alias, 'role')
if role == 'moderator':
result = True
else:
result = False
return result
def is_member(self, jid_bare, jid_full):
"""Check if given JID is a member"""
alias = jid_full[jid_full.index('/')+1:]
affiliation = self.plugin['xep_0045'].get_jid_property(jid_bare, alias, 'affiliation')
if affiliation == 'member':
result = True
else:
result = False
return result

View file

@ -16,8 +16,7 @@ from slixfeed.config import Config
from slixfeed.log import Logger from slixfeed.log import Logger
import slixfeed.sqlite as sqlite import slixfeed.sqlite as sqlite
from slixfeed.syndication import Feed from slixfeed.syndication import Feed
import slixfeed.url as uri from slixfeed.utilities import Url, Utilities
from slixfeed.utilities import Utilities
from slixfeed.xmpp.iq import XmppIQ from slixfeed.xmpp.iq import XmppIQ
import sys import sys
@ -337,7 +336,7 @@ class XmppPubsubAction:
node_id = node_id[0] node_id = node_id[0]
if not node_id: if not node_id:
counter = 0 counter = 0
hostname = uri.get_hostname(url) hostname = Url.get_hostname(url)
hostname = hostname.replace('.','-') hostname = hostname.replace('.','-')
identifier = hostname + ':' + str(counter) identifier = hostname + ':' + str(counter)
while True: while True:

View file

@ -16,7 +16,7 @@ logger = Logger(__name__)
class XmppUpload: class XmppUpload:
async def start(self, jid, filename, domain=None): async def start(self, jid, filename, domain=None):
logger.info('Uploading file %s...', filename) logger.info(['Uploading file %s...', filename])
try: try:
upload_file = self['xep_0363'].upload_file upload_file = self['xep_0363'].upload_file
# if self.encrypted and not self['xep_0454']: # if self.encrypted and not self['xep_0454']:
@ -34,7 +34,7 @@ class XmppUpload:
filename, domain, timeout=10, filename, domain, timeout=10,
) )
logger.info('Upload successful!') logger.info('Upload successful!')
logger.info('Sending file to %s', jid) logger.info(['Sending file to %s', jid])
except HTTPError: except HTTPError:
url = ('Error: It appears that this server does not support ' url = ('Error: It appears that this server does not support '
'HTTP File Upload.') 'HTTP File Upload.')

View file

@ -58,3 +58,51 @@ class XmppUtilities:
# finally: # finally:
# logger.info('Chat type is:', chat_type) # logger.info('Chat type is:', chat_type)
return result return result
def is_access(self, jid_bare, jid_full, chat_type):
"""Determine access privilege"""
operator = XmppUtilities.is_operator(self, jid_bare)
if operator:
if chat_type == 'groupchat':
if XmppUtilities.is_moderator(self, jid_bare, jid_full):
access = True
else:
access = True
else:
access = False
return access
def is_operator(self, jid_bare):
"""Check if given JID is an operator"""
result = False
for operator in self.operators:
if jid_bare == operator['jid']:
result = True
# operator_name = operator['name']
break
return result
def is_moderator(self, jid_bare, jid_full):
"""Check if given JID is a moderator"""
alias = jid_full[jid_full.index('/')+1:]
role = self.plugin['xep_0045'].get_jid_property(jid_bare, alias, 'role')
if role == 'moderator':
result = True
else:
result = False
return result
def is_member(self, jid_bare, jid_full):
"""Check if given JID is a member"""
alias = jid_full[jid_full.index('/')+1:]
affiliation = self.plugin['xep_0045'].get_jid_property(jid_bare, alias, 'affiliation')
if affiliation == 'member':
result = True
else:
result = False
return result