Add slixfeed.py for command line and split xmpp into modules.

This commit is contained in:
Schimon Jehudah 2023-12-28 14:50:23 +00:00
parent 8fbe97e357
commit 61bd792572
20 changed files with 2209 additions and 2185 deletions

117
slixfeed.py Normal file
View file

@ -0,0 +1,117 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Copyright © 2023 Schimon Jehudah
# This program is free software: you can redistribute it and/or modify
# it under the terms of the MIT License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# MIT License for more details.
#
# You should have received a copy of the MIT License along with
# this program. If not, see <https://opensource.org/license/mit/>
#
# Slixfeed - RSS news bot for XMPP
#
# SPDX-FileCopyrightText: 2023 Schimon Jehudah
#
# SPDX-License-Identifier: MIT
from slixfeed.__main__ import Jabber
from slixfeed.xmpp.client import Slixfeed
import slixfeed.file as filehandler
from argparse import ArgumentParser
import configparser
# import filehandler
# from filehandler import get_default_confdir
from getpass import getpass
import logging
import os
import sys
if __name__ == '__main__':
# Setup the command line arguments.
parser = ArgumentParser(description=Slixfeed.__doc__)
# Output verbosity options.
parser.add_argument(
"-q",
"--quiet",
help="set logging to ERROR",
action="store_const",
dest="loglevel",
const=logging.ERROR,
default=logging.INFO
)
parser.add_argument(
"-d",
"--debug",
help="set logging to DEBUG",
action="store_const",
dest="loglevel",
const=logging.DEBUG,
default=logging.INFO
)
# JID and password options.
parser.add_argument(
"-j",
"--jid",
dest="jid",
help="Jabber ID"
)
parser.add_argument(
"-p",
"--password",
dest="password",
help="Password of JID"
)
parser.add_argument(
"-n",
"--nickname",
dest="nickname",
help="Display name"
)
args = parser.parse_args()
# Setup logging.
logging.basicConfig(
level=args.loglevel,
format='%(levelname)-8s %(message)s'
)
# Try configuration file
config = configparser.RawConfigParser()
config_dir = filehandler.get_default_confdir()
if not os.path.isdir(config_dir):
os.mkdir(config_dir)
# TODO Copy file from /etc/slixfeed/ or /usr/share/slixfeed/
config_file = os.path.join(config_dir, r"accounts.ini")
config.read(config_file)
if config.has_section("XMPP"):
xmpp = config["XMPP"]
nickname = xmpp["nickname"]
username = xmpp["username"]
password = xmpp["password"]
# Use arguments if were given
if args.jid:
username = args.jid
if args.password:
password = args.password
if args.nickname:
nickname = args.nickname
# Prompt for credentials if none were given
if username is None:
username = input("Username: ")
if password is None:
password = getpass("Password: ")
if nickname is None:
nickname = input("Nickname: ")
Jabber(username, password, nickname)
sys.exit(0)

View file

@ -13,56 +13,53 @@ FIXME
TODO TODO
1) from slixfeed.FILENAME import XYZ 1) SQL prepared statements;
See project /chaica/feed2toot
2) SQL prepared statements; 2) Machine Learning for scrapping Title, Link, Summary and Timstamp;
3) Machine Learning for scrapping Title, Link, Summary and Timstamp;
Scrape element </article> (example: Liferea) Scrape element </article> (example: Liferea)
http://intertwingly.net/blog/ http://intertwingly.net/blog/
https://www.brandenburg.de/ https://www.brandenburg.de/
4) Set MUC subject 3) Set MUC subject
Feeds which entries are to be set as groupchat subject. Feeds which entries are to be set as groupchat subject.
Perhaps not, as it would require to check every feed for this setting. Perhaps not, as it would require to check every feed for this setting.
Maybe a separate bot; Maybe a separate bot;
5) Support categories; 4) Support categories;
6) XMPP commands; 5) XMPP commands;
7) Bot as transport; 6) Bot as service;
8) OMEMO; 7) OMEMO;
9) Logging; 8) Logging;
https://docs.python.org/3/howto/logging.html https://docs.python.org/3/howto/logging.html
10) Readability 9) Readability
See project /buriy/python-readability See project /buriy/python-readability
11) Download and upload/send article (xHTML, HTMLZ, Markdown, MHTML, TXT). 10) Download and upload/send article (xHTML, HTMLZ, Markdown, MHTML, TXT).
12) Fetch summary from URL, instead of storing summary, or 11) Fetch summary from URL, instead of storing summary, or
Store 5 upcoming summaries. Store 5 upcoming summaries.
This would help making the database files smaller. This would help making the database files smaller.
13) Support protocol Gopher 12) Support protocol Gopher
See project /michael-lazar/pygopherd See project /michael-lazar/pygopherd
See project /gopherball/gb See project /gopherball/gb
14) Support ActivityPub @person@domain (see Tip Of The Day). 13) Support ActivityPub @person@domain (see Tip Of The Day).
15) Tip Of The Day. 14) Tip Of The Day.
Did you know that you can follow you favorite Mastodon feeds by just Did you know that you can follow you favorite Mastodon feeds by just
sending the URL address? sending the URL address?
Supported fediverse websites are: Supported fediverse websites are:
Akkoma, HubZilla, Mastodon, Misskey, Pixelfed, Pleroma, Soapbox. Akkoma, HubZilla, Mastodon, Misskey, Pixelfed, Pleroma, Soapbox.
16) Brand: News Broker, Newsman, Newsdealer, Laura Harbinger 15) Brand: News Broker, Newsman, Newsdealer, Laura Harbinger
17) See project /offpunk/offblocklist.py 16) See project /offpunk/offblocklist.py
18) Search messages of government regulated publishers, and promote other sources. 18) Search messages of government regulated publishers, and promote other sources.
Dear reader, we couldn't get news from XYZ as they don't provide RSS feeds. Dear reader, we couldn't get news from XYZ as they don't provide RSS feeds.
@ -76,8 +73,8 @@ TODO
from argparse import ArgumentParser from argparse import ArgumentParser
import configparser import configparser
import filehandler # import filehandler
# from filehandler import get_default_confdir # from slixfeed.file import get_default_confdir
from getpass import getpass from getpass import getpass
import logging import logging
import os import os
@ -91,96 +88,18 @@ import os
# # with start_action(action_type="message()", msg=msg): # # with start_action(action_type="message()", msg=msg):
#import slixfeed.irchandler #import slixfeed.irchandler
from xmpphandler import Slixfeed from slixfeed.xmpp.client import Slixfeed
#import slixfeed.matrixhandler #import slixfeed.matrixhandler
if __name__ == '__main__': class Jabber:
# Setup the command line arguments.
parser = ArgumentParser(description=Slixfeed.__doc__)
# Output verbosity options. def __init__(self, jid, password, nick):
parser.add_argument(
"-q",
"--quiet",
help="set logging to ERROR",
action="store_const",
dest="loglevel",
const=logging.ERROR,
default=logging.INFO
)
parser.add_argument(
"-d",
"--debug",
help="set logging to DEBUG",
action="store_const",
dest="loglevel",
const=logging.DEBUG,
default=logging.INFO
)
# JID and password options.
parser.add_argument(
"-j",
"--jid",
dest="jid",
help="Jabber ID"
)
parser.add_argument(
"-p",
"--password",
dest="password",
help="Password of JID"
)
parser.add_argument(
"-n",
"--nickname",
dest="nickname",
help="Display name"
)
args = parser.parse_args()
# Setup logging.
logging.basicConfig(
level=args.loglevel,
format='%(levelname)-8s %(message)s'
)
# Try configuration file
config = configparser.RawConfigParser()
config_dir = filehandler.get_default_confdir()
if not os.path.isdir(config_dir):
os.mkdir(config_dir)
# TODO Copy file from /etc/slixfeed/ or /usr/share/slixfeed/
config_file = os.path.join(config_dir, r"accounts.ini")
config.read(config_file)
if config.has_section("XMPP"):
xmpp = config["XMPP"]
nickname = xmpp["nickname"]
username = xmpp["username"]
password = xmpp["password"]
# Use arguments if were given
if args.jid:
username = args.jid
if args.password:
password = args.password
if args.nickname:
nickname = args.nickname
# Prompt for credentials if none were given
if username is None:
username = input("Username: ")
if password is None:
password = getpass("Password: ")
if nickname is None:
nickname = input("Nickname: ")
# Setup the Slixfeed and register plugins. Note that while plugins may # Setup the Slixfeed and register plugins. Note that while plugins may
# have interdependencies, the order in which you register them does # have interdependencies, the order in which you register them does
# not matter. # not matter.
xmpp = Slixfeed(username, password, nickname) xmpp = Slixfeed(jid, password, nick)
xmpp.register_plugin('xep_0004') # Data Forms xmpp.register_plugin('xep_0004') # Data Forms
xmpp.register_plugin('xep_0030') # Service Discovery xmpp.register_plugin('xep_0030') # Service Discovery
xmpp.register_plugin('xep_0045') # Multi-User Chat xmpp.register_plugin('xep_0045') # Multi-User Chat

318
slixfeed/config.py Normal file
View file

@ -0,0 +1,318 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
TODO
1) Use file settings.csv and pathnames.txt instead:
See get_value_default and get_default_list
2) Website-specific filter (i.e. audiobookbay).
3) Exclude websites from filtering (e.g. metapedia).
4) Filter phrases:
Refer to sqlitehandler.search_entries for implementation.
It is expected to be more complex than function search_entries.
"""
import configparser
# from file import get_default_confdir
import slixfeed.config as config
import slixfeed.sqlite as sqlite
import os
from random import randrange
import sys
import yaml
async def get_value_default(key, section):
"""
Get settings default value.
Parameters
----------
key : str
Key: archive, enabled, interval,
length, old, quantum, random.
Returns
-------
result : str
Value.
"""
config_res = configparser.RawConfigParser()
config_dir = config.get_default_confdir()
if not os.path.isdir(config_dir):
config_dir = '/usr/share/slixfeed/'
config_file = os.path.join(config_dir, r"settings.ini")
config_res.read(config_file)
if config_res.has_section(section):
result = config_res[section][key]
isinstance(result, int)
isinstance(result, str)
breakpoint
return result
async def get_list(filename):
"""
Get settings default value.
Parameters
----------
filename : str
filename of yaml file.
Returns
-------
result : list
List of pathnames or keywords.
"""
config_dir = config.get_default_confdir()
if not os.path.isdir(config_dir):
config_dir = '/usr/share/slixfeed/'
config_file = os.path.join(config_dir, filename)
with open(config_file) as defaults:
# default = yaml.safe_load(defaults)
# result = default[key]
result = yaml.safe_load(defaults)
return result
def get_default_dbdir():
"""
Determine the directory path where dbfile will be stored.
* If $XDG_DATA_HOME is defined, use it;
* else if $HOME exists, use it;
* else if the platform is Windows, use %APPDATA%;
* else use the current directory.
Returns
-------
str
Path to database file.
Note
----
This function was taken from project buku.
See https://github.com/jarun/buku
* Arun Prakash Jana (jarun)
* Dmitry Marakasov (AMDmi3)
"""
# data_home = xdg.BaseDirectory.xdg_data_home
data_home = os.environ.get('XDG_DATA_HOME')
if data_home is None:
if os.environ.get('HOME') is None:
if sys.platform == 'win32':
data_home = os.environ.get('APPDATA')
if data_home is None:
return os.path.abspath('.')
else:
return os.path.abspath('.')
else:
data_home = os.path.join(os.environ.get('HOME'), '.local', 'share')
return os.path.join(data_home, 'slixfeed')
def get_default_confdir():
"""
Determine the directory path where configuration will be stored.
* If $XDG_CONFIG_HOME is defined, use it;
* else if $HOME exists, use it;
* else if the platform is Windows, use %APPDATA%;
* else use the current directory.
Returns
-------
str
Path to configueation directory.
"""
# config_home = xdg.BaseDirectory.xdg_config_home
config_home = os.environ.get('XDG_CONFIG_HOME')
if config_home is None:
if os.environ.get('HOME') is None:
if sys.platform == 'win32':
config_home = os.environ.get('APPDATA')
if config_home is None:
return os.path.abspath('.')
else:
return os.path.abspath('.')
else:
config_home = os.path.join(os.environ.get('HOME'), '.config')
return os.path.join(config_home, 'slixfeed')
async def initdb(jid, callback, message=None):
"""
Callback function to instantiate action on database.
Parameters
----------
jid : str
Jabber ID.
callback : ?
Function name.
message : str, optional
Optional kwarg when a message is a part or
required argument. The default is None.
Returns
-------
object
Coroutine object.
"""
db_dir = get_default_dbdir()
if not os.path.isdir(db_dir):
os.mkdir(db_dir)
db_file = os.path.join(db_dir, r"{}.db".format(jid))
sqlite.create_tables(db_file)
# await set_default_values(db_file)
if message:
return await callback(db_file, message)
else:
return await callback(db_file)
async def add_to_list(newwords, keywords):
"""
Append new keywords to list.
Parameters
----------
newwords : str
List of new keywords.
keywords : str
List of current keywords.
Returns
-------
val : str
List of current keywords and new keywords.
"""
if isinstance(keywords, str) or keywords is None:
try:
keywords = keywords.split(",")
except:
keywords = []
newwords = newwords.lower().split(",")
for word in newwords:
word = word.strip()
if len(word) and word not in keywords:
keywords.extend([word])
keywords.sort()
val = ",".join(keywords)
return val
async def remove_from_list(newwords, keywords):
"""
Remove given keywords from list.
Parameters
----------
newwords : str
List of new keywords.
keywords : str
List of current keywords.
Returns
-------
val : str
List of new keywords.
"""
if isinstance(keywords, str) or keywords is None:
try:
keywords = keywords.split(",")
except:
keywords = []
newwords = newwords.lower().split(",")
for word in newwords:
word = word.strip()
if len(word) and word in keywords:
keywords.remove(word)
keywords.sort()
val = ",".join(keywords)
return val
async def is_listed(db_file, key, string):
"""
Check keyword match.
Parameters
----------
db_file : str
Path to database file.
type : str
"allow" or "deny".
string : str
String.
Returns
-------
Matched keyword or None.
"""
# async def reject(db_file, string):
# async def is_blacklisted(db_file, string):
list = await sqlite.get_filters_value(
db_file,
key
)
if list:
list = list.split(",")
for i in list:
if not i or len(i) < 2:
continue
if i in string.lower():
# print(">>> ACTIVATE", i)
# return 1
return i
else:
return None
"""
This code was tested at module datahandler
reject = 0
blacklist = await get_settings_value(
db_file,
"filter-deny"
)
# print(">>> blacklist:")
# print(blacklist)
# breakpoint()
if blacklist:
blacklist = blacklist.split(",")
# print(">>> blacklist.split")
# print(blacklist)
# breakpoint()
for i in blacklist:
# print(">>> length", len(i))
# breakpoint()
# if len(i):
if not i or len(i) < 2:
print(">>> continue due to length", len(i))
# breakpoint()
continue
# print(title)
# print(">>> blacklisted word:", i)
# breakpoint()
test = (title + " " + summary + " " + link)
if i in test.lower():
reject = 1
break
if reject:
print("rejected:",title)
entry = (title, '', link, source, date, 1);
"""

View file

@ -1,69 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
TODO
1) Use file settings.csv and pathnames.txt instead:
See get_value_default and get_default_list
"""
import configparser
# from filehandler import get_default_confdir
import filehandler
import os
from random import randrange
import yaml
async def get_value_default(key, section):
"""
Get settings default value.
Parameters
----------
key : str
Key: archive, enabled, interval,
length, old, quantum, random.
Returns
-------
result : str
Value.
"""
config = configparser.RawConfigParser()
config_dir = filehandler.get_default_confdir()
if not os.path.isdir(config_dir):
config_dir = '/usr/share/slixfeed/'
config_file = os.path.join(config_dir, r"settings.ini")
config.read(config_file)
if config.has_section(section):
result = config[section][key]
return result
async def get_list(filename):
"""
Get settings default value.
Parameters
----------
filename : str
filename of yaml file.
Returns
-------
result : list
List of pathnames or keywords.
"""
config_dir = filehandler.get_default_confdir()
if not os.path.isdir(config_dir):
config_dir = '/usr/share/slixfeed/'
config_file = os.path.join(config_dir, filename)
with open(config_file) as defaults:
# default = yaml.safe_load(defaults)
# result = default[key]
result = yaml.safe_load(defaults)
return result

View file

@ -24,15 +24,15 @@ from aiohttp import ClientError, ClientSession, ClientTimeout
from asyncio import TimeoutError from asyncio import TimeoutError
from asyncio.exceptions import IncompleteReadError from asyncio.exceptions import IncompleteReadError
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from confighandler import get_list, get_value_default import slixfeed.config as config
from datetimehandler import now, rfc2822_to_iso8601 from slixfeed.datetime import now, rfc2822_to_iso8601
from email.utils import parseaddr from email.utils import parseaddr
from feedparser import parse from feedparser import parse
from http.client import IncompleteRead from http.client import IncompleteRead
from listhandler import is_listed from slixfeed.list import is_listed
from lxml import html from lxml import html
import sqlitehandler as sqlite import slixfeed.sqlite as sqlite
from urlhandler import complete_url, join_url, trim_url from slixfeed.url import complete_url, join_url, trim_url
from urllib import error from urllib import error
# from xml.etree.ElementTree import ElementTree, ParseError # from xml.etree.ElementTree import ElementTree, ParseError
from urllib.parse import urljoin, urlsplit, urlunsplit from urllib.parse import urljoin, urlsplit, urlunsplit
@ -534,7 +534,7 @@ async def download_feed(url):
Document or error message. Document or error message.
""" """
try: try:
user_agent = await get_value_default("user-agent", "Network") user_agent = await config.get_value_default("user-agent", "Network")
except: except:
user_agent = "Slixfeed/0.1" user_agent = "Slixfeed/0.1"
if not len(user_agent): if not len(user_agent):
@ -631,7 +631,7 @@ async def feed_mode_request(url, tree):
""" """
feeds = {} feeds = {}
parted_url = urlsplit(url) parted_url = urlsplit(url)
paths = await get_list("lists.yaml") paths = await config.get_list("lists.yaml")
paths = paths["pathnames"] paths = paths["pathnames"]
for path in paths: for path in paths:
address = urlunsplit([ address = urlunsplit([
@ -741,7 +741,7 @@ async def feed_mode_scan(url, tree):
feeds = {} feeds = {}
# paths = [] # paths = []
# TODO Test # TODO Test
paths = await get_list("lists.yaml") paths = await config.get_list("lists.yaml")
paths = paths["pathnames"] paths = paths["pathnames"]
for path in paths: for path in paths:
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path) # xpath_query = "//*[@*[contains(.,'{}')]]".format(path)

View file

@ -1,104 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
import sys
from sqlitehandler import create_tables
def get_default_dbdir():
"""
Determine the directory path where dbfile will be stored.
* If $XDG_DATA_HOME is defined, use it;
* else if $HOME exists, use it;
* else if the platform is Windows, use %APPDATA%;
* else use the current directory.
Returns
-------
str
Path to database file.
Note
----
This function was taken from project buku.
See https://github.com/jarun/buku
* Arun Prakash Jana (jarun)
* Dmitry Marakasov (AMDmi3)
"""
# data_home = xdg.BaseDirectory.xdg_data_home
data_home = os.environ.get('XDG_DATA_HOME')
if data_home is None:
if os.environ.get('HOME') is None:
if sys.platform == 'win32':
data_home = os.environ.get('APPDATA')
if data_home is None:
return os.path.abspath('.')
else:
return os.path.abspath('.')
else:
data_home = os.path.join(os.environ.get('HOME'), '.local', 'share')
return os.path.join(data_home, 'slixfeed')
def get_default_confdir():
"""
Determine the directory path where configuration will be stored.
* If $XDG_CONFIG_HOME is defined, use it;
* else if $HOME exists, use it;
* else if the platform is Windows, use %APPDATA%;
* else use the current directory.
Returns
-------
str
Path to configueation directory.
"""
# config_home = xdg.BaseDirectory.xdg_config_home
config_home = os.environ.get('XDG_CONFIG_HOME')
if config_home is None:
if os.environ.get('HOME') is None:
if sys.platform == 'win32':
config_home = os.environ.get('APPDATA')
if config_home is None:
return os.path.abspath('.')
else:
return os.path.abspath('.')
else:
config_home = os.path.join(os.environ.get('HOME'), '.config')
return os.path.join(config_home, 'slixfeed')
async def initdb(jid, callback, message=None):
"""
Callback function to instantiate action on database.
Parameters
----------
jid : str
Jabber ID.
callback : ?
Function name.
message : str, optional
Optional kwarg when a message is a part or
required argument. The default is None.
Returns
-------
object
Coroutine object.
"""
db_dir = get_default_dbdir()
if not os.path.isdir(db_dir):
os.mkdir(db_dir)
db_file = os.path.join(db_dir, r"{}.db".format(jid))
create_tables(db_file)
# await set_default_values(db_file)
if message:
return await callback(db_file, message)
else:
return await callback(db_file)

View file

@ -1,156 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
TODO
1) Website-specific filter (i.e. audiobookbay).
2) Exclude websites from filtering (e.g. metapedia).
3) Filter phrases:
Refer to sqlitehandler.search_entries for implementation.
It is expected to be more complex than function search_entries.
"""
import sqlitehandler as sqlite
async def add_to_list(newwords, keywords):
"""
Append new keywords to list.
Parameters
----------
newwords : str
List of new keywords.
keywords : str
List of current keywords.
Returns
-------
val : str
List of current keywords and new keywords.
"""
if isinstance(keywords, str) or keywords is None:
try:
keywords = keywords.split(",")
except:
keywords = []
newwords = newwords.lower().split(",")
for word in newwords:
word = word.strip()
if len(word) and word not in keywords:
keywords.extend([word])
keywords.sort()
val = ",".join(keywords)
return val
async def remove_from_list(newwords, keywords):
"""
Remove given keywords from list.
Parameters
----------
newwords : str
List of new keywords.
keywords : str
List of current keywords.
Returns
-------
val : str
List of new keywords.
"""
if isinstance(keywords, str) or keywords is None:
try:
keywords = keywords.split(",")
except:
keywords = []
newwords = newwords.lower().split(",")
for word in newwords:
word = word.strip()
if len(word) and word in keywords:
keywords.remove(word)
keywords.sort()
val = ",".join(keywords)
return val
async def is_listed(db_file, key, string):
"""
Check keyword match.
Parameters
----------
db_file : str
Path to database file.
type : str
"allow" or "deny".
string : str
String.
Returns
-------
Matched keyword or None.
"""
# async def reject(db_file, string):
# async def is_blacklisted(db_file, string):
list = await sqlite.get_filters_value(
db_file,
key
)
if list:
list = list.split(",")
for i in list:
if not i or len(i) < 2:
continue
if i in string.lower():
# print(">>> ACTIVATE", i)
# return 1
return i
else:
return None
"""
This code was tested at module datahandler
reject = 0
blacklist = await get_settings_value(
db_file,
"filter-deny"
)
# print(">>> blacklist:")
# print(blacklist)
# breakpoint()
if blacklist:
blacklist = blacklist.split(",")
# print(">>> blacklist.split")
# print(blacklist)
# breakpoint()
for i in blacklist:
# print(">>> length", len(i))
# breakpoint()
# if len(i):
if not i or len(i) < 2:
print(">>> continue due to length", len(i))
# breakpoint()
continue
# print(title)
# print(">>> blacklisted word:", i)
# breakpoint()
test = (title + " " + summary + " " + link)
if i in test.lower():
reject = 1
break
if reject:
print("rejected:",title)
entry = (title, '', link, source, date, 1);
"""

View file

@ -1,56 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
{
'bozo': False,
'bozo_exception': None,
'feeds': [
{
'url': 'https://kurtmckee.org/tag/listparser/feed',
'title': 'listparser blog',
'categories': [],
'tags': []
},
{
'url': 'https://github.com/kurtmckee/listparser/commits/develop.atom',
'title': 'listparser changelog',
'categories': [],
'tags': []
}
],
'lists': [],
'opportunities': [],
'meta': {
'title': 'listparser project feeds',
'author': {
'name': 'Kurt McKee',
'email': 'contactme@kurtmckee.org',
'url': 'https://kurtmckee.org/'
}
},
'version': 'opml2'
}
"""
import listparser
import lxml
import sqlitehandler
import datahandler
async def import_opml(db_file, opml_doc):
feeds = listparser.parse(opml_doc)['feeds']
for feed in feeds:
url = feed['url']
title = feed['title']
# categories = feed['categories']
# tags = feed['tags']
await datahandler.add_feed_no_check(db_file, [url, title])
# NOTE Use OPyML or LXML
async def export_opml():
result = await sqlitehandler.get_feeds()

View file

@ -18,13 +18,12 @@ TODO
from asyncio import Lock from asyncio import Lock
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from datetime import date from datetime import date
# from slixfeed.confighandler import get_value_default # from slixfeed.config import get_value_default
import confighandler as config import slixfeed.config as config
# from slixfeed.datahandler import join_url # from slixfeed.data import join_url
import datahandler as datahandler from slixfeed.datetime import current_time, rfc2822_to_iso8601
from datetimehandler import current_time, rfc2822_to_iso8601
from sqlite3 import connect, Error from sqlite3 import connect, Error
from urlhandler import remove_tracking_parameters from slixfeed.url import join_url, remove_tracking_parameters
# from eliot import start_action, to_file # from eliot import start_action, to_file
# # with start_action(action_type="list_feeds()", db=db_file): # # with start_action(action_type="list_feeds()", db=db_file):
@ -469,7 +468,7 @@ async def get_entry_unread(db_file, num=None):
ix = result[0] ix = result[0]
title = result[1] title = result[1]
# # TODO Retrieve summary from feed # # TODO Retrieve summary from feed
# # See datahandler.view_entry # # See fetch.view_entry
# summary = result[2] # summary = result[2]
# # Remove HTML tags # # Remove HTML tags
# try: # try:
@ -1001,7 +1000,7 @@ async def remove_nonexistent_entries(db_file, feed, source):
else: else:
title = feed["feed"]["title"] title = feed["feed"]["title"]
if entry.has_key("link"): if entry.has_key("link"):
link = datahandler.join_url(source, entry.link) link = join_url(source, entry.link)
else: else:
link = source link = source
if entry.has_key("published") and item[4]: if entry.has_key("published") and item[4]:

View file

@ -44,18 +44,18 @@ import logging
import os import os
import slixmpp import slixmpp
import confighandler as config import slixfeed.config as config
from datahandler import download_updates from slixfeed.fetch import download_updates
from datetimehandler import current_time from slixfeed.datetime import current_time
from filehandler import initdb, get_default_dbdir from slixfeed.file import initdb, get_default_dbdir
from sqlitehandler import ( from slixfeed.sqlite import (
get_entry_unread, get_entry_unread,
get_settings_value, get_settings_value,
get_number_of_items, get_number_of_items,
get_number_of_entries_unread get_number_of_entries_unread
) )
# from xmpphandler import Slixfeed # from xmpp import Slixfeed
import xmpphandler as xmpphandler import slixfeed.xmpp.client as xmpp
main_task = [] main_task = []
jid_tasker = {} jid_tasker = {}
@ -215,10 +215,10 @@ async def send_update(self, jid, num=None):
if new: if new:
# TODO Add while loop to assure delivery. # TODO Add while loop to assure delivery.
# print(await current_time(), ">>> ACT send_message",jid) # print(await current_time(), ">>> ACT send_message",jid)
chat_type = await xmpphandler.Slixfeed.is_muc(self, jid) chat_type = await xmpp.Slixfeed.is_muc(self, jid)
# NOTE Do we need "if statement"? See NOTE at is_muc. # NOTE Do we need "if statement"? See NOTE at is_muc.
if chat_type in ("chat", "groupchat"): if chat_type in ("chat", "groupchat"):
xmpphandler.Slixfeed.send_message( xmpp.Slixfeed.send_message(
self, self,
mto=jid, mto=jid,
mbody=new, mbody=new,
@ -313,7 +313,7 @@ async def send_status(self, jid):
# breakpoint() # breakpoint()
# print(await current_time(), status_text, "for", jid) # print(await current_time(), status_text, "for", jid)
xmpphandler.Slixfeed.send_presence( xmpp.Slixfeed.send_presence(
self, self,
pshow=status_mode, pshow=status_mode,
pstatus=status_text, pstatus=status_text,

View file

@ -14,7 +14,7 @@ TODO
""" """
from confighandler import get_list import slixfeed.config as config
from email.utils import parseaddr from email.utils import parseaddr
import random import random
from urllib.parse import ( from urllib.parse import (
@ -27,6 +27,25 @@ from urllib.parse import (
) )
def get_hostname(url):
"""
Get hostname.
Parameters
----------
url : str
URL.
Returns
-------
hostname : str
Hostname.
"""
parted_url = urlsplit(url)
hostname = parted_url.netloc
return hostname
# NOTE hostname and protocol are listed as one in file # NOTE hostname and protocol are listed as one in file
# proxies.yaml. Perhaps a better practice would be to have # proxies.yaml. Perhaps a better practice would be to have
# them separated. File proxies.yaml will remainas is in order # them separated. File proxies.yaml will remainas is in order
@ -52,7 +71,7 @@ async def replace_hostname(url):
pathname = parted_url.path pathname = parted_url.path
queries = parted_url.query queries = parted_url.query
fragment = parted_url.fragment fragment = parted_url.fragment
proxies = await get_list("proxies.yaml") proxies = await config.get_list("proxies.yaml")
for proxy in proxies: for proxy in proxies:
proxy = proxies[proxy] proxy = proxies[proxy]
if hostname in proxy["hostname"]: if hostname in proxy["hostname"]:
@ -90,7 +109,7 @@ async def remove_tracking_parameters(url):
pathname = parted_url.path pathname = parted_url.path
queries = parse_qs(parted_url.query) queries = parse_qs(parted_url.query)
fragment = parted_url.fragment fragment = parted_url.fragment
trackers = await get_list("queries.yaml") trackers = await config.get_list("queries.yaml")
trackers = trackers["trackers"] trackers = trackers["trackers"]
for tracker in trackers: for tracker in trackers:
if tracker in queries: del queries[tracker] if tracker in queries: del queries[tracker]

552
slixfeed/xmpp/client.py Normal file
View file

@ -0,0 +1,552 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
FIXME
1) Function check_readiness or event "changed_status" is causing for
triple status messages and also false ones that indicate of lack
of feeds.
TODO
1) Use loop (with gather) instead of TaskGroup.
2) Assure message delivery before calling a new task.
See https://slixmpp.readthedocs.io/en/latest/event_index.html#term-marker_acknowledged
3) Do not send updates when busy or away.
See https://slixmpp.readthedocs.io/en/latest/event_index.html#term-changed_status
4) XHTTML-IM
case _ if message_lowercase.startswith("html"):
message['html']="
Parse me!
"
self.send_message(
mto=jid,
mfrom=self.boundjid.bare,
mhtml=message
)
NOTE
1) Self presence
Apparently, it is possible to view self presence.
This means that there is no need to store presences in order to switch or restore presence.
check_readiness
📂 Send a URL from a blog or a news website.
JID: self.boundjid.bare
MUC: self.nick
2) Extracting attribute using xmltodict.
import xmltodict
message = xmltodict.parse(str(message))
jid = message["message"]["x"]["@jid"]
"""
import asyncio
from slixfeed.config import add_to_list, initdb, get_list, remove_from_list
import slixfeed.fetch as fetcher
from slixfeed.datetime import current_time
import logging
# import os
from random import randrange
import slixmpp
from slixmpp.exceptions import IqError, IqTimeout
import slixfeed.sqlite as sqlite
import slixfeed.task as task
import slixfeed.url as urlfixer
from time import sleep
from slixmpp.plugins.xep_0363.http_upload import FileTooBig, HTTPError, UploadServiceNotFound
# from slixmpp.plugins.xep_0402 import BookmarkStorage, Conference
from slixmpp.plugins.xep_0048.stanza import Bookmarks
import xmltodict
import xml.etree.ElementTree as ET
from lxml import etree
import slixfeed.xmpp.compose as compose
import slixfeed.xmpp.connect as connect
import slixfeed.xmpp.muc as muc
import slixfeed.xmpp.status as status
main_task = []
jid_tasker = {}
task_manager = {}
loop = asyncio.get_event_loop()
# asyncio.set_event_loop(loop)
# time_now = datetime.now()
# time_now = time_now.strftime("%H:%M:%S")
# def print_time():
# # return datetime.now().strftime("%H:%M:%S")
# now = datetime.now()
# current_time = now.strftime("%H:%M:%S")
# return current_time
class Slixfeed(slixmpp.ClientXMPP):
"""
Slixmpp
-------
News bot that sends updates from RSS feeds.
"""
def __init__(self, jid, password, nick):
slixmpp.ClientXMPP.__init__(self, jid, password)
# NOTE
# The bot works fine when the nickname is hardcoded; or
# The bot won't join some MUCs when its nickname has brackets
self.nick = nick
# The session_start event will be triggered when
# the bot establishes its connection with the server
# and the XML streams are ready for use. We want to
# listen for this event so that we we can initialize
# our roster.
self.add_event_handler("session_start", self.start_session)
self.add_event_handler("session_resumed", self.start_session)
self.add_event_handler("session_start", self.autojoin_muc)
self.add_event_handler("session_resumed", self.autojoin_muc)
self.add_event_handler("got_offline", print("got_offline"))
# self.add_event_handler("got_online", self.check_readiness)
self.add_event_handler("changed_status", self.check_readiness)
self.add_event_handler("presence_unavailable", self.stop_tasks)
# self.add_event_handler("changed_subscription", self.check_subscription)
# self.add_event_handler("chatstate_active", self.check_chatstate_active)
# self.add_event_handler("chatstate_gone", self.check_chatstate_gone)
self.add_event_handler("chatstate_composing", self.check_chatstate_composing)
self.add_event_handler("chatstate_paused", self.check_chatstate_paused)
# The message event is triggered whenever a message
# stanza is received. Be aware that that includes
# MUC messages and error messages.
self.add_event_handler("message", self.process_message)
self.add_event_handler("message", self.settle)
self.add_event_handler("groupchat_invite", self.process_muc_invite) # XEP_0045
self.add_event_handler("groupchat_direct_invite", self.process_muc_invite) # XEP_0249
# self.add_event_handler("groupchat_message", self.message)
# self.add_event_handler("disconnected", self.reconnect)
# self.add_event_handler("disconnected", self.inspect_connection)
self.add_event_handler("reactions", self.reactions)
self.add_event_handler("presence_available", self.presence_available)
self.add_event_handler("presence_error", self.presence_error)
self.add_event_handler("presence_subscribe", self.presence_subscribe)
self.add_event_handler("presence_subscribed", self.presence_subscribed)
self.add_event_handler("presence_unsubscribe", self.presence_unsubscribe)
self.add_event_handler("presence_unsubscribed", self.unsubscribe)
# Initialize event loop
# self.loop = asyncio.get_event_loop()
# handlers for connection events
self.connection_attempts = 0
self.max_connection_attempts = 10
self.add_event_handler("connection_failed", self.on_connection_failed)
self.add_event_handler("session_end", self.on_session_end)
"""
FIXME
This function is triggered even when status is dnd/away/xa.
This results in sending messages even when status is dnd/away/xa.
See function check_readiness.
NOTE
The issue occurs only at bot startup.
Once status is changed to dnd/away/xa, the interval stops - as expected.
TODO
Use "sleep()"
"""
async def presence_available(self, presence):
# print("def presence_available", presence["from"].bare)
jid = presence["from"].bare
print("presence_available", jid)
if jid not in self.boundjid.bare:
await task.clean_tasks_xmpp(
jid,
["interval", "status", "check"]
)
await task.start_tasks_xmpp(
self,
jid,
["interval", "status", "check"]
)
# await task_jid(self, jid)
# main_task.extend([asyncio.create_task(task_jid(jid))])
# print(main_task)
async def stop_tasks(self, presence):
if not self.boundjid.bare:
jid = presence["from"].bare
print(">>> unavailable:", jid)
await task.clean_tasks_xmpp(
jid,
["interval", "status", "check"]
)
async def presence_error(self, presence):
print("presence_error")
print(presence)
async def presence_subscribe(self, presence):
print("presence_subscribe")
print(presence)
async def presence_subscribed(self, presence):
print("presence_subscribed")
print(presence)
async def reactions(self, message):
print("reactions")
print(message)
# async def accept_muc_invite(self, message, ctr=None):
# # if isinstance(message, str):
# if not ctr:
# ctr = message["from"].bare
# jid = message['groupchat_invite']['jid']
# else:
# jid = message
async def process_muc_invite(self, message):
# operator muc_chat
inviter = message["from"].bare
muc_jid = message['groupchat_invite']['jid']
await muc.join_groupchat(self, inviter, muc_jid)
async def autojoin_muc(self, event):
result = await self.plugin['xep_0048'].get_bookmarks()
bookmarks = result["private"]["bookmarks"]
conferences = bookmarks["conferences"]
for conference in conferences:
if conference["autojoin"]:
muc_jid = conference["jid"]
print(current_time(), "Autojoining groupchat", muc_jid)
self.plugin['xep_0045'].join_muc(
muc_jid,
self.nick,
# If a room password is needed, use:
# password=the_room_password,
)
async def on_session_end(self, event):
print(current_time(), "Session ended. Attempting to reconnect.")
print(event)
logging.warning("Session ended. Attempting to reconnect.")
await connect.recover_connection(self, event)
async def on_connection_failed(self, event):
print(current_time(), "Connection failed. Attempting to reconnect.")
print(event)
logging.warning("Connection failed. Attempting to reconnect.")
await connect.recover_connection(self, event)
async def check_chatstate_composing(self, message):
print("def check_chatstate_composing")
print(message)
if message["type"] in ("chat", "normal"):
jid = message["from"].bare
status_text="Press \"help\" for manual."
self.send_presence(
# pshow=status_mode,
pstatus=status_text,
pto=jid,
)
async def check_chatstate_paused(self, message):
print("def check_chatstate_paused")
print(message)
if message["type"] in ("chat", "normal"):
jid = message["from"].bare
await task.refresh_task(
self,
jid,
task.send_status,
"status",
20
)
async def check_readiness(self, presence):
"""
If available, begin tasks.
If unavailable, eliminate tasks.
Parameters
----------
presence : str
XML stanza .
Returns
-------
None.
"""
# print("def check_readiness", presence["from"].bare, presence["type"])
# # available unavailable away (chat) dnd xa
# print(">>> type", presence["type"], presence["from"].bare)
# # away chat dnd xa
# print(">>> show", presence["show"], presence["from"].bare)
jid = presence["from"].bare
if presence["show"] in ("away", "dnd", "xa"):
print(">>> away, dnd, xa:", jid)
await task.clean_tasks_xmpp(
jid,
["interval"]
)
await task.start_tasks_xmpp(
self,
jid,
["status", "check"]
)
async def resume(self, event):
print("def resume")
print(event)
self.send_presence()
await self.get_roster()
async def start_session(self, event):
"""
Process the session_start event.
Typical actions for the session_start event are
requesting the roster and broadcasting an initial
presence stanza.
Arguments:
event -- An empty dictionary. The session_start
event does not provide any additional
data.
"""
print("def start_session")
print(event)
self.send_presence()
await self.get_roster()
# for task in main_task:
# task.cancel()
# Deprecated in favour of event "presence_available"
# if not main_task:
# await select_file()
async def is_muc(self, jid):
"""
Check whether a JID is of MUC.
Parameters
----------
jid : str
Jabber ID.
Returns
-------
str
"chat" or "groupchat.
"""
try:
iqresult = await self["xep_0030"].get_info(jid=jid)
features = iqresult["disco_info"]["features"]
# identity = iqresult['disco_info']['identities']
# if 'account' in indentity:
# if 'conference' in indentity:
if 'http://jabber.org/protocol/muc' in features:
return "groupchat"
# TODO elif <feature var='jabber:iq:gateway'/>
# NOTE Is it needed? We do not interact with gateways or services
else:
return "chat"
# TODO Test whether this exception is realized
except IqTimeout as e:
messages = [
("Timeout IQ"),
("IQ Stanza:", e),
("Jabber ID:", jid)
]
for message in messages:
print(current_time(), message)
logging.error(current_time(), message)
async def settle(self, msg):
"""
Add JID to roster and settle subscription.
Parameters
----------
jid : str
Jabber ID.
Returns
-------
None.
"""
jid = msg["from"].bare
if await self.is_muc(jid):
# Check whether JID is in bookmarks; otherwise, add it.
print(jid, "is muc")
else:
await self.get_roster()
# Check whether JID is in roster; otherwise, add it.
if jid not in self.client_roster.keys():
self.send_presence_subscription(
pto=jid,
ptype="subscribe",
pnick=self.nick
)
self.update_roster(
jid,
subscription="both"
)
# Check whether JID is subscribed; otherwise, ask for presence.
if not self.client_roster[jid]["to"]:
self.send_presence_subscription(
pto=jid,
pfrom=self.boundjid.bare,
ptype="subscribe",
pnick=self.nick
)
self.send_message(
mto=jid,
# mtype="headline",
msubject="RSS News Bot",
mbody=(
"Accept subscription request to receive updates."
),
mfrom=self.boundjid.bare,
mnick=self.nick
)
self.send_presence(
pto=jid,
pfrom=self.boundjid.bare,
# Accept symbol 🉑️ 👍️ ✍
pstatus=(
"✒️ Accept subscription request to receive updates."
),
# ptype="subscribe",
pnick=self.nick
)
async def presence_unsubscribe(self, presence):
print("presence_unsubscribe")
print(presence)
async def unsubscribe(self, presence):
jid = presence["from"].bare
self.send_presence(
pto=jid,
pfrom=self.boundjid.bare,
pstatus="🖋️ Subscribe to receive updates",
pnick=self.nick
)
self.send_message(
mto=jid,
mbody="You have been unsubscribed."
)
self.update_roster(
jid,
subscription="remove"
)
async def process_message(self, message):
"""
Process incoming message stanzas. Be aware that this also
includes MUC messages and error messages. It is usually
a good practice to check the messages's type before
processing or sending replies.
Parameters
----------
message : str
The received message stanza. See the documentation
for stanza objects and the Message stanza to see
how it may be used.
"""
# print("message")
# print(message)
if message["type"] in ("chat", "groupchat", "normal"):
action = 0
jid = message["from"].bare
if message["type"] == "groupchat":
# nick = message["from"][message["from"].index("/")+1:]
nick = str(message["from"])
nick = nick[nick.index("/")+1:]
if (message['muc']['nick'] == self.nick or
not message["body"].startswith("!")):
return
# token = await initdb(
# jid,
# get_settings_value,
# "token"
# )
# if token == "accepted":
# operator = await initdb(
# jid,
# get_settings_value,
# "masters"
# )
# if operator:
# if nick not in operator:
# return
# approved = False
jid_full = str(message["from"])
role = self.plugin['xep_0045'].get_jid_property(
jid,
jid_full[jid_full.index("/")+1:],
"role")
if role != "moderator":
return
# if role == "moderator":
# approved = True
# TODO Implement a list of temporary operators
# Once an operator is appointed, the control would last
# untile the participant has been disconnected from MUC
# An operator is a function to appoint non moderators.
# Changing nickname is fine and consist of no problem.
# if not approved:
# operator = await initdb(
# jid,
# get_settings_value,
# "masters"
# )
# if operator:
# if nick in operator:
# approved = True
# if not approved:
# return
# # Begin processing new JID
# # Deprecated in favour of event "presence_available"
# db_dir = get_default_dbdir()
# os.chdir(db_dir)
# if jid + ".db" not in os.listdir():
# await task_jid(jid)
await compose.message(self, jid, message)

698
slixfeed/xmpp/compose.py Normal file
View file

@ -0,0 +1,698 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
TODO
1) Deprecate "add" (see above) and make it interactive.
Slixfeed: Do you still want to add this URL to subscription list?
See: case _ if message_lowercase.startswith("add"):
"""
from slixfeed.config import add_to_list, initdb, get_list, remove_from_list
from slixfeed.datetime import current_time
import slixfeed.fetch as fetcher
import slixfeed.sqlite as sqlite
import slixfeed.task as task
import slixfeed.url as urlfixer
import slixfeed.xmpp.status as status
import slixfeed.xmpp.text as text
async def message(self, jid, message):
message_text = " ".join(message["body"].split())
if message["type"] == "groupchat":
message_text = message_text[1:]
message_lowercase = message_text.lower()
print(current_time(), "ACCOUNT: " + str(message["from"]))
print(current_time(), "COMMAND:", message_text)
match message_lowercase:
case "commands":
action = text.print_cmd()
case "help":
action = text.print_help()
case "info":
action = text.print_info()
case _ if message_lowercase in [
"greetings", "hallo", "hello", "hey",
"hi", "hola", "holla", "hollo"]:
action = (
"Greeting!\n"
"I'm Slixfeed, an RSS News Bot!\n"
"Send \"help\" for instructions."
)
# print("task_manager[jid]")
# print(task_manager[jid])
await self.get_roster()
print("roster 1")
print(self.client_roster)
print("roster 2")
print(self.client_roster.keys())
print("jid")
print(jid)
await self.autojoin_muc()
# case _ if message_lowercase.startswith("activate"):
# if message["type"] == "groupchat":
# acode = message[9:]
# token = await initdb(
# jid,
# get_settings_value,
# "token"
# )
# if int(acode) == token:
# await initdb(
# jid,
# set_settings_value,
# ["masters", nick]
# )
# await initdb(
# jid,
# set_settings_value,
# ["token", "accepted"]
# )
# action = "{}, your are in command.".format(nick)
# else:
# action = "Activation code is not valid."
# else:
# action = "This command is valid for groupchat only."
case _ if message_lowercase.startswith("add"):
message_text = message_text[4:]
url = message_text.split(" ")[0]
title = " ".join(message_text.split(" ")[1:])
if url.startswith("http"):
action = await initdb(
jid,
fetcher.add_feed_no_check,
[url, title]
)
old = await initdb(
jid,
sqlite.get_settings_value,
"old"
)
if old:
await task.clean_tasks_xmpp(
jid,
["status"]
)
# await send_status(jid)
await task.start_tasks_xmpp(
self,
jid,
["status"]
)
else:
await initdb(
jid,
sqlite.mark_source_as_read,
url
)
else:
action = "Missing URL."
case _ if message_lowercase.startswith("allow +"):
key = "filter-" + message_text[:5]
val = message_text[7:]
if val:
keywords = await initdb(
jid,
sqlite.get_filters_value,
key
)
val = await add_to_list(
val,
keywords
)
await initdb(
jid,
sqlite.set_filters_value,
[key, val]
)
action = (
"Approved keywords\n"
"```\n{}\n```"
).format(val)
else:
action = "Missing keywords."
case _ if message_lowercase.startswith("allow -"):
key = "filter-" + message_text[:5]
val = message_text[7:]
if val:
keywords = await initdb(
jid,
sqlite.get_filters_value,
key
)
val = await remove_from_list(
val,
keywords
)
await initdb(
jid,
sqlite.set_filters_value,
[key, val]
)
action = (
"Approved keywords\n"
"```\n{}\n```"
).format(val)
else:
action = "Missing keywords."
case _ if message_lowercase.startswith("archive"):
key = message_text[:7]
val = message_text[8:]
if val:
try:
if int(val) > 500:
action = "Value may not be greater than 500."
else:
await initdb(
jid,
sqlite.set_settings_value,
[key, val]
)
action = (
"Maximum archived items has been set to {}."
).format(val)
except:
action = "Enter a numeric value only."
else:
action = "Missing value."
case _ if message_lowercase.startswith("deny +"):
key = "filter-" + message_text[:4]
val = message_text[6:]
if val:
keywords = await initdb(
jid,
sqlite.get_filters_value,
key
)
val = await add_to_list(
val,
keywords
)
await initdb(
jid,
sqlite.set_filters_value,
[key, val]
)
action = (
"Rejected keywords\n"
"```\n{}\n```"
).format(val)
else:
action = "Missing keywords."
case _ if message_lowercase.startswith("deny -"):
key = "filter-" + message_text[:4]
val = message_text[6:]
if val:
keywords = await initdb(
jid,
sqlite.get_filters_value,
key
)
val = await remove_from_list(
val,
keywords
)
await initdb(
jid,
sqlite.set_filters_value,
[key, val]
)
action = (
"Rejected keywords\n"
"```\n{}\n```"
).format(val)
else:
action = "Missing keywords."
case _ if (message_lowercase.startswith("gemini") or
message_lowercase.startswith("gopher:")):
action = "Gemini and Gopher are not supported yet."
case _ if (message_lowercase.startswith("http") or
message_lowercase.startswith("feed:")):
url = message_text
await task.clean_tasks_xmpp(
jid,
["status"]
)
status_message = (
"📫️ Processing request to fetch data from {}"
).format(url)
status.process_task_message(self, jid, status_message)
if url.startswith("feed:"):
url = urlfixer.feed_to_http(url)
# url_alt = await urlfixer.replace_hostname(url)
# if url_alt:
# url = url_alt
url = (await urlfixer.replace_hostname(url)) or url
action = await initdb(
jid,
fetcher.add_feed,
url
)
await task.start_tasks_xmpp(
self,
jid,
["status"]
)
# action = "> " + message + "\n" + action
# FIXME Make the taskhandler to update status message
# await refresh_task(
# self,
# jid,
# send_status,
# "status",
# 20
# )
# NOTE This would show the number of new unread entries
old = await initdb(
jid,
sqlite.get_settings_value,
"old"
)
if old:
await task.clean_tasks_xmpp(
jid,
["status"]
)
# await send_status(jid)
await task.start_tasks_xmpp(
self,
jid,
["status"]
)
else:
await initdb(
jid,
sqlite.mark_source_as_read,
url
)
case _ if message_lowercase.startswith("feeds"):
query = message_text[6:]
if query:
if len(query) > 3:
action = await initdb(
jid,
sqlite.search_feeds,
query
)
else:
action = (
"Enter at least 4 characters to search"
)
else:
action = await initdb(
jid,
sqlite.list_feeds
)
case "goodbye":
if message["type"] == "groupchat":
await self.close_muc(jid)
else:
action = "This command is valid for groupchat only."
case _ if message_lowercase.startswith("interval"):
# FIXME
# The following error occurs only upon first attempt to set interval.
# /usr/lib/python3.11/asyncio/events.py:73: RuntimeWarning: coroutine 'Slixfeed.send_update' was never awaited
# self._args = None
# RuntimeWarning: Enable tracemalloc to get the object allocation traceback
key = message_text[:8]
val = message_text[9:]
if val:
# action = (
# "Updates will be sent every {} minutes."
# ).format(action)
await initdb(
jid,
sqlite.set_settings_value,
[key, val]
)
# NOTE Perhaps this should be replaced
# by functions clean and start
await task.refresh_task(
self,
jid,
task.send_update,
key,
val
)
action = (
"Updates will be sent every {} minutes."
).format(val)
else:
action = "Missing value."
case _ if message_lowercase.startswith("join"):
muc = urlfixer.check_xmpp_uri(message_text[5:])
if muc:
"TODO probe JID and confirm it's a groupchat"
await self.join_muc(jid, muc)
action = (
"Joined groupchat {}"
).format(message_text)
else:
action = (
"> {}\nXMPP URI is not valid."
).format(message_text)
case _ if message_lowercase.startswith("length"):
key = message_text[:6]
val = message_text[7:]
if val:
try:
val = int(val)
await initdb(
jid,
sqlite.set_settings_value,
[key, val]
)
if val == 0:
action = (
"Summary length limit is disabled."
)
else:
action = (
"Summary maximum length "
"is set to {} characters."
).format(val)
except:
action = "Enter a numeric value only."
else:
action = "Missing value."
# case _ if message_lowercase.startswith("mastership"):
# key = message_text[:7]
# val = message_text[11:]
# if val:
# names = await initdb(
# jid,
# get_settings_value,
# key
# )
# val = await add_to_list(
# val,
# names
# )
# await initdb(
# jid,
# set_settings_value,
# [key, val]
# )
# action = (
# "Operators\n"
# "```\n{}\n```"
# ).format(val)
# else:
# action = "Missing value."
case "new":
await initdb(
jid,
sqlite.set_settings_value,
["old", 0]
)
action = (
"Only new items of newly added feeds will be sent."
)
# TODO Will you add support for number of messages?
case "next":
# num = message_text[5:]
await task.clean_tasks_xmpp(
jid,
["interval", "status"]
)
await task.start_tasks_xmpp(
self,
jid,
["interval", "status"]
)
# await refresh_task(
# self,
# jid,
# send_update,
# "interval",
# num
# )
# await refresh_task(
# self,
# jid,
# send_status,
# "status",
# 20
# )
# await refresh_task(jid, key, val)
case "old":
await initdb(
jid,
sqlite.set_settings_value,
["old", 1]
)
action = (
"All items of newly added feeds will be sent."
)
case _ if message_lowercase.startswith("quantum"):
key = message_text[:7]
val = message_text[8:]
if val:
try:
val = int(val)
# action = (
# "Every update will contain {} news items."
# ).format(action)
await initdb(
jid,
sqlite.set_settings_value,
[key, val]
)
action = (
"Next update will contain {} news items."
).format(val)
except:
action = "Enter a numeric value only."
else:
action = "Missing value."
case "random":
# TODO /questions/2279706/select-random-row-from-a-sqlite-table
# NOTE sqlitehandler.get_entry_unread
action = "Updates will be sent by random order."
case _ if message_lowercase.startswith("read"):
data = message_text[5:]
data = data.split()
url = data[0]
await task.clean_tasks_xmpp(
jid,
["status"]
)
status_message = (
"📫️ Processing request to fetch data from {}"
).format(url)
status.process_task_message(self, jid, status_message)
if url.startswith("feed:"):
url = urlfixer.feed_to_http(url)
url = (await urlfixer.replace_hostname(url)) or url
match len(data):
case 1:
if url.startswith("http"):
action = await fetcher.view_feed(url)
else:
action = "Missing URL."
case 2:
num = data[1]
if url.startswith("http"):
action = await fetcher.view_entry(url, num)
else:
action = "Missing URL."
case _:
action = (
"Enter command as follows:\n"
"`read <url>` or `read <url> <number>`\n"
"URL must not contain white space."
)
await task.start_tasks_xmpp(
self,
jid,
["status"]
)
case _ if message_lowercase.startswith("recent"):
num = message_text[7:]
if num:
try:
num = int(num)
if num < 1 or num > 50:
action = "Value must be ranged from 1 to 50."
else:
action = await initdb(
jid,
sqlite.last_entries,
num
)
except:
action = "Enter a numeric value only."
else:
action = "Missing value."
# NOTE Should people be asked for numeric value?
case _ if message_lowercase.startswith("remove"):
ix = message_text[7:]
if ix:
action = await initdb(
jid,
sqlite.remove_feed,
ix
)
# await refresh_task(
# self,
# jid,
# send_status,
# "status",
# 20
# )
await task.clean_tasks_xmpp(
jid,
["status"]
)
await task.start_tasks_xmpp(
self,
jid,
["status"]
)
else:
action = "Missing feed ID."
case _ if message_lowercase.startswith("reset"):
source = message_text[6:]
await task.clean_tasks_xmpp(
jid,
["status"]
)
status_message = (
"📫️ Marking entries as read..."
)
status.process_task_message(self, jid, status_message)
if source:
await initdb(
jid,
sqlite.mark_source_as_read,
source
)
action = (
"All entries of {} have been "
"marked as read.".format(source)
)
else:
await initdb(
jid,
sqlite.mark_all_as_read
)
action = "All entries have been marked as read."
await task.start_tasks_xmpp(
self,
jid,
["status"]
)
case _ if message_lowercase.startswith("search"):
query = message_text[7:]
if query:
if len(query) > 1:
action = await initdb(
jid,
sqlite.search_entries,
query
)
else:
action = (
"Enter at least 2 characters to search"
)
else:
action = "Missing search query."
case "start":
# action = "Updates are enabled."
key = "enabled"
val = 1
await initdb(
jid,
sqlite.set_settings_value,
[key, val]
)
# asyncio.create_task(task_jid(self, jid))
await task.start_tasks_xmpp(
self,
jid,
["interval", "status", "check"]
)
action = "Updates are enabled."
# print(current_time(), "task_manager[jid]")
# print(task_manager[jid])
case "stats":
action = await initdb(
jid,
sqlite.statistics
)
case _ if message_lowercase.startswith("status "):
ix = message_text[7:]
action = await initdb(
jid,
sqlite.toggle_status,
ix
)
case "stop":
# FIXME
# The following error occurs only upon first attempt to stop.
# /usr/lib/python3.11/asyncio/events.py:73: RuntimeWarning: coroutine 'Slixfeed.send_update' was never awaited
# self._args = None
# RuntimeWarning: Enable tracemalloc to get the object allocation traceback
# action = "Updates are disabled."
# try:
# # task_manager[jid]["check"].cancel()
# # task_manager[jid]["status"].cancel()
# task_manager[jid]["interval"].cancel()
# key = "enabled"
# val = 0
# action = await initdb(
# jid,
# set_settings_value,
# [key, val]
# )
# except:
# action = "Updates are already disabled."
# # print("Updates are already disabled. Nothing to do.")
# # await send_status(jid)
key = "enabled"
val = 0
await initdb(
jid,
sqlite.set_settings_value,
[key, val]
)
await task.clean_tasks_xmpp(
jid,
["interval", "status"]
)
self.send_presence(
pshow="xa",
pstatus="💡️ Send \"Start\" to receive Jabber news",
pto=jid,
)
action = "Updates are disabled."
case "support":
# TODO Send an invitation.
action = "Join xmpp:slixfeed@chat.woodpeckersnest.space?join"
case _ if message_lowercase.startswith("xmpp:"):
muc = urlfixer.check_xmpp_uri(message_text)
if muc:
"TODO probe JID and confirm it's a groupchat"
await self.join_muc(jid, muc)
action = (
"Joined groupchat {}"
).format(message_text)
else:
action = (
"> {}\nXMPP URI is not valid."
).format(message_text)
case _:
action = (
"Unknown command. "
"Press \"help\" for list of commands"
)
# TODO Use message correction here
# NOTE This might not be a good idea if
# commands are sent one close to the next
if action: message.reply(action).send()

31
slixfeed/xmpp/connect.py Normal file
View file

@ -0,0 +1,31 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from slixfeed.datetime import current_time
from time import sleep
async def recover_connection(self, event):
self.connection_attempts += 1
# if self.connection_attempts <= self.max_connection_attempts:
# self.reconnect(wait=5.0) # wait a bit before attempting to reconnect
# else:
# print(current_time(),"Maximum connection attempts exceeded.")
# logging.error("Maximum connection attempts exceeded.")
print(current_time(), "Attempt number", self.connection_attempts)
seconds = 30
print(current_time(), "Next attempt within", seconds, "seconds")
# NOTE asyncio.sleep doesn't interval as expected
# await asyncio.sleep(seconds)
sleep(seconds)
self.reconnect(wait=5.0)
async def inspect_connection(self, event):
print("Disconnected\nReconnecting...")
print(event)
try:
self.reconnect
except:
self.disconnect()
print("Problem reconnecting")

128
slixfeed/xmpp/muc.py Normal file
View file

@ -0,0 +1,128 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
TODO
1) Send message to inviter that bot has joined to groupchat.
2) If groupchat requires captcha, send the consequent message.
3) If groupchat error is received, send that error message to inviter.
"""
from slixmpp.plugins.xep_0048.stanza import Bookmarks
async def join_groupchat(self, inviter, muc_jid):
# token = await initdb(
# muc_jid,
# get_settings_value,
# "token"
# )
# if token != "accepted":
# token = randrange(10000, 99999)
# await initdb(
# muc_jid,
# set_settings_value,
# ["token", token]
# )
# self.send_message(
# mto=inviter,
# mbody=(
# "Send activation token {} to groupchat xmpp:{}?join."
# ).format(token, muc_jid)
# )
print("muc_jid")
print(muc_jid)
self.plugin['xep_0045'].join_muc(
muc_jid,
self.nick,
# If a room password is needed, use:
# password=the_room_password,
)
await self.add_groupchat_to_bookmarks(muc_jid)
messages = [
"Greetings!",
"I'm {}, the news anchor.".format(self.nick),
"My job is to bring you the latest news "
"from sources you provide me with.",
"You may always reach me via "
"xmpp:{}?message".format(self.boundjid.bare)
]
for message in messages:
self.send_message(
mto=muc_jid,
mbody=message,
mtype="groupchat"
)
async def add_groupchat_to_bookmarks(self, muc_jid):
result = await self.plugin['xep_0048'].get_bookmarks()
bookmarks = result["private"]["bookmarks"]
conferences = bookmarks["conferences"]
mucs = []
for conference in conferences:
jid = conference["jid"]
mucs.extend([jid])
if muc_jid not in mucs:
bookmarks = Bookmarks()
mucs.extend([muc_jid])
for muc in mucs:
bookmarks.add_conference(
muc,
self.nick,
autojoin=True
)
await self.plugin['xep_0048'].set_bookmarks(bookmarks)
# bookmarks = Bookmarks()
# await self.plugin['xep_0048'].set_bookmarks(bookmarks)
# print(await self.plugin['xep_0048'].get_bookmarks())
# bm = BookmarkStorage()
# bm.conferences.append(Conference(muc_jid, autojoin=True, nick=self.nick))
# await self['xep_0402'].publish(bm)
async def close_groupchat(self, muc_jid):
messages = [
"Whenever you need an RSS service again, "
"please dont hesitate to contact me.",
"My personal contact is xmpp:{}?message".format(self.boundjid.bare),
"Farewell, and take care."
]
for message in messages:
self.send_message(
mto=muc_jid,
mbody=message,
mtype="groupchat"
)
await self.remove_groupchat_from_bookmarks(muc_jid)
self.plugin['xep_0045'].leave_muc(
muc_jid,
self.nick,
"Goodbye!",
self.boundjid.bare
)
async def remove_groupchat_from_bookmarks(self, muc_jid):
result = await self.plugin['xep_0048'].get_bookmarks()
bookmarks = result["private"]["bookmarks"]
conferences = bookmarks["conferences"]
mucs = []
for conference in conferences:
jid = conference["jid"]
mucs.extend([jid])
if muc_jid in mucs:
bookmarks = Bookmarks()
mucs.remove(muc_jid)
for muc in mucs:
bookmarks.add_conference(
muc,
self.nick,
autojoin=True
)
await self.plugin['xep_0048'].set_bookmarks(bookmarks)

0
slixfeed/xmpp/service.py Normal file
View file

10
slixfeed/xmpp/status.py Normal file
View file

@ -0,0 +1,10 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
def process_task_message(self, jid, status_message):
self.send_presence(
pshow="dnd",
pstatus=status_message,
pto=jid,
)

271
slixfeed/xmpp/text.py Normal file
View file

@ -0,0 +1,271 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
def print_info():
"""
Print information.
Returns
-------
msg : str
Message.
"""
msg = (
"```"
"\n"
"ABOUT\n"
" Slixfeed aims to be an easy to use and fully-featured news\n"
" aggregator bot for XMPP. It provides a convenient access to Blogs,\n"
" Fediverse and News websites along with filtering functionality."
"\n"
" Slixfeed is primarily designed for XMPP (aka Jabber).\n"
" Visit https://xmpp.org/software/ for more information.\n"
"\n"
" XMPP is the Extensible Messaging and Presence Protocol, a set\n"
" of open technologies for instant messaging, presence, multi-party\n"
" chat, voice and video calls, collaboration, lightweight\n"
" middleware, content syndication, and generalized routing of XML\n"
" data."
" Visit https://xmpp.org/about/ for more information on the XMPP\n"
" protocol."
" "
# "PLATFORMS\n"
# " Supported prootcols are IRC, Matrix, Tox and XMPP.\n"
# " For the best experience, we recommend you to use XMPP.\n"
# "\n"
"FILETYPES\n"
" Supported filetypes: Atom, RDF, RSS and XML.\n"
"\n"
"PROTOCOLS\n"
" Supported protocols: Dat, FTP, Gemini, Gopher, HTTP and IPFS.\n"
"\n"
"AUTHORS\n"
" Laura Harbinger, Schimon Zackary.\n"
"\n"
"THANKS\n"
" Christian Dersch (SalixOS),"
" Cyrille Pontvieux (SalixOS, France),"
"\n"
" Denis Fomin (Gajim, Russia),"
" Dimitris Tzemos (SalixOS, Greece),"
"\n"
" Emmanuel Gil Peyrot (poezio, France),"
" Florent Le Coz (poezio, France),"
"\n"
" George Vlahavas (SalixOS, Greece),"
" Maxime Buquet (slixmpp, France),"
"\n"
" Mathieu Pasquet (slixmpp, France),"
" Pierrick Le Brun (SalixOS, France),"
"\n"
" Remko Tronçon (Swift, Germany),"
" Thorsten Mühlfelder (SalixOS, Germany),"
"\n"
" Yann Leboulanger (Gajim, France).\n"
"COPYRIGHT\n"
" Slixfeed is free software; you can redistribute it and/or\n"
" modify it under the terms of the GNU General Public License\n"
" as published by the Free Software Foundation; version 3 only\n"
"\n"
" Slixfeed is distributed in the hope that it will be useful,\n"
" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"
" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n"
" GNU General Public License for more details.\n"
"\n"
"NOTE\n"
" You can run Slixfeed on your own computer, server, and\n"
" even on a Linux phone (i.e. Droidian, Kupfer, Mobian, NixOS,\n"
" postmarketOS). You can also use Termux.\n"
"\n"
" All you need is one of the above and an XMPP account to\n"
" connect Slixfeed to.\n"
"\n"
"DOCUMENTATION\n"
" Slixfeed\n"
" https://gitgud.io/sjehuda/slixfeed\n"
" Slixmpp\n"
" https://slixmpp.readthedocs.io/\n"
" feedparser\n"
" https://pythonhosted.org/feedparser\n"
"```"
)
return msg
def print_help():
"""
Print help manual.
Returns
-------
msg : str
Message.
"""
msg = (
"```"
"\n"
"NAME\n"
"Slixfeed - News syndication bot for Jabber/XMPP\n"
"\n"
"DESCRIPTION\n"
" Slixfeed is a news aggregator bot for online news feeds.\n"
" This program is primarily designed for XMPP.\n"
" For more information, visit https://xmpp.org/software/\n"
"\n"
"BASIC USAGE\n"
" <url>\n"
" Add <url> to subscription list.\n"
" add <url> TITLE\n"
" Add <url> to subscription list (without validity check).\n"
" join <muc>\n"
" Join specified groupchat.\n"
" read <url>\n"
" Display most recent 20 titles of given <url>.\n"
" read <url> <n>\n"
" Display specified entry number from given <url>.\n"
"\n"
"CUSTOM ACTIONS\n"
" new\n"
" Send only new items of newly added feeds.\n"
" old\n"
" Send all items of newly added feeds.\n"
" next N\n"
" Send N next updates.\n"
" reset\n"
" Mark all entries as read and remove all archived entries\n"
" reset <url>\n"
" Mark entries of <url> as read and remove all archived entries of <url>.\n"
" start\n"
" Enable bot and send updates.\n"
" stop\n"
" Disable bot and stop updates.\n"
"\n"
"MESSAGE OPTIONS\n"
" interval <num>\n"
" Set interval update to every <num> minutes.\n"
" length\n"
" Set maximum length of news item description. (0 for no limit)\n"
" quantum <num>\n"
" Set <num> amount of updates per interval.\n"
"\n"
"GROUPCHAT OPTIONS\n"
" ! (command initiation)\n"
" Use exclamation mark to initiate an actionable command.\n"
# " activate CODE\n"
# " Activate and command bot.\n"
# " demaster NICKNAME\n"
# " Remove master privilege.\n"
# " mastership NICKNAME\n"
# " Add master privilege.\n"
# " ownership NICKNAME\n"
# " Set new owner.\n"
"\n"
"FILTER OPTIONS\n"
" allow +\n"
" Add keywords to allow (comma separates).\n"
" allow -\n"
" Delete keywords from allow list (comma separates).\n"
" deny +\n"
" Keywords to block (comma separates).\n"
" deny -\n"
" Delete keywords from deny list (comma separates).\n"
# " filter clear allow\n"
# " Reset allow list.\n"
# " filter clear deny\n"
# " Reset deny list.\n"
"\n"
"EDIT OPTIONS\n"
" remove <id>\n"
" Remove feed of <id> from subscription list.\n"
" status <id>\n"
" Toggle update status of feed of <id>.\n"
"\n"
"SEARCH OPTIONS\n"
" feeds\n"
" List all subscriptions.\n"
" feeds <text>\n"
" Search subscriptions by given <text>.\n"
" search <text>\n"
" Search news items by given <text>.\n"
" recent <num>\n"
" List recent <num> news items (up to 50 items).\n"
"\n"
# "STATISTICS OPTIONS\n"
# " analyses\n"
# " Show report and statistics of feeds.\n"
# " obsolete\n"
# " List feeds that are not available.\n"
# " unread\n"
# " Print number of unread news items.\n"
# "\n"
# "BACKUP OPTIONS\n"
# " export opml\n"
# " Send an OPML file with your feeds.\n"
# " backup news html\n"
# " Send an HTML formatted file of your news items.\n"
# " backup news md\n"
# " Send a Markdown file of your news items.\n"
# " backup news text\n"
# " Send a Plain Text file of your news items.\n"
# "\n"
"SUPPORT\n"
" commands\n"
" Print list of commands.\n"
" help\n"
" Print this help manual.\n"
" info\n"
" Print information page.\n"
" support\n"
" Join xmpp:slixmpp@muc.poez.io?join\n"
# "\n"
# "PROTOCOLS\n"
# " Supported prootcols are IRC, Matrix and XMPP.\n"
# " For the best experience, we recommend you to use XMPP.\n"
# "\n"
"```"
)
return msg
def print_cmd():
"""
Print list of commands.
Returns
-------
msg : str
Message.
"""
msg = (
"```"
"\n"
"! : Use exclamation mark to initiate an actionable command (groupchats only).\n"
"<muc> : Join specified groupchat.\n"
"<url> : Add <url> to subscription list.\n"
"add <url> <title> : Add <url> to subscription list (without validity check).\n"
"allow + : Add keywords to allow (comma separates).\n"
"allow - : Delete keywords from allow list (comma separates).\n"
"deny + : Keywords to block (comma separates).\n"
"deny - : Delete keywords from deny list (comma separates).\n"
"feeds : List all subscriptions.\n"
"feeds <text> : Search subscriptions by given <text>.\n"
"interval <n> : Set interval update to every <n> minutes.\n"
"join <muc> : Join specified groupchat.\n"
"length : Set maximum length of news item description. (0 for no limit)\n"
"new : Send only new items of newly added feeds.\n"
"next <n> : Send <n> next updates.\n"
"old : Send all items of newly added feeds.\n"
"quantum <n> : Set <n> amount of updates per interval.\n"
"read <url> : Display most recent 20 titles of given <url>.\n"
"read <url> <n> : Display specified entry number from given <url>.\n"
"recent <n> : List recent <n> news items (up to 50 items).\n"
"reset : Mark all entries as read.\n"
"reset <url> : Mark entries of <url> as read.\n"
"remove <id> : Remove feed from subscription list.\n"
"search <text> : Search news items by given <text>.\n"
"start : Enable bot and send updates.\n"
"status <id> : Toggle update status of feed.\n"
"stop : Disable bot and stop updates.\n"
"```"
)
return msg

File diff suppressed because it is too large Load diff