Add support for configuration files. Undo most of the changes of commit ecdcfe9c22

This commit is contained in:
Schimon Jehudah 2023-12-11 09:04:45 +00:00
parent 7c928fea25
commit d8a1799978
10 changed files with 912 additions and 327 deletions

74
accounts.ini Normal file
View file

@ -0,0 +1,74 @@
# Settings to tell the bot to which accounts to connect
# and also from which accounts it receives instructions.
[XMPP]
nickname =
username =
password =
# port = 5347
operator =
[ActivityPub]
# Not yet implemented
username =
password =
operator =
[Email]
# Not yet implemented
recipient_emails =
sender_emails =
[IMAP]
# Not yet implemented
username =
password =
# port = 993
[SMTP]
# Not yet implemented
host =
# port = 465
[IRC]
# Not yet implemented
username =
password =
# port = 6667
operator =
[LXMF]
# Not yet implemented
username =
password =
operator =
[Matrix]
# Not yet implemented
username =
password =
operator =
[Nostr]
# Not yet implemented
username =
password =
operator =
[Session]
# Not yet implemented
username =
password =
operator =
[SIP]
# Not yet implemented
username =
password =
operator =
[TOX]
# Not yet implemented
username =
password =
operator =

434
lists.yaml Normal file
View file

@ -0,0 +1,434 @@
# On occasion when given web page has no auto-discovery
# setup, the following list is engaged into action to
# scan for possible paths (i.e. pathname) at which web
# feed might be discovered.
pathnames:
- .atom
# - .json
- .rss
- .xml
# wordpress
- /?feed=atom
- /?feed=rdf
- /?feed=rss
- /?feed=xml
# phpbb
- /?format=atom
- /?format=rdf
- /?format=rss
- /?format=xml
- /app.php/feed
- /atom
- /atom.php
- /atom.xml
- /blog/feed/
- /content-feeds/
- /external.php?type=RSS2
- /en/feed/
# good practice
- /feed
- /feed.atom
# - /feed.json
- /feed.php
- /feed.rdf
- /feed.rss
- /feed.xml
- /feed/atom/
- /feeds/news_feed
- /feeds/posts/default
- /feeds/posts/default?alt=atom
- /feeds/posts/default?alt=rss
- /feeds/rss/news.xml.php
- /forum_rss.php
- /index.atom
- /index.php/feed
# simple machine forum
- /index.php?type=atom;action=.xml
- /index.php?type=rss;action=.xml
- /index.rss
- /jekyll/feed.xml
# discourse
- /latest.rss
- /news
- /news.xml
- /news.xml.php
- /news/feed
# discourse
- /posts.rss
- /rdf
- /rdf.php
- /rdf.xml
- /rss
# - /rss.json
- /rss.php
- /rss.xml
# mybb
- /syndication.php?type=atom1.0
- /syndication.php?type=rss2.0
# statusnet
- /timeline.rss
# peertube
- /videos.atom
# - /videos.json
- /videos.xml
- /xml/feed.atom
- /xml/feed.rdf
- /xml/feed.rss
# Filters can be used to protect or depress your life.
#
# Before you continue reading, please realize that the
# news has to be treated as a very annoying and bad
# salesperson who wants to guile and trick you into
# buying something that you do not need at all.
# Sometimes you just have to hang up the phone.
#
# "I always tell people: News is the most highly
# "developed form of fiction. The most difficult."
# Donald Pleasence as J.G. in "The News-Benders 1968".
#
# News is no longer an information service, if it ever
# was. News in most of the world is a mean of fear.
#
# The government regulated news agencies, private and
# public, are not in your favour. You must filter in
# order to avoid from subverting your mind.
#
# Although it is recognized that the Palestinian land
# confiscation issue is indeed a problem that affects
# us all (i.e. confiscation of land without punishing).
#
# I have worked with far right wing Israeli and Zionist
# news jurnalists and editors for over a decade, and it
# occurred to me, time and again, that these entities
# have published factual and fictional criticism against
# themselves (the Israeli side), in order to distract
# the Israeli citizenry from removing the government
# because the Israelis wanted to better their life by
# improving the life of the average citizen.
#
# Most of my Israeli brothers and sisters are either poor
# or in debt, and the Israeli government uses the conflict
# against Palestinians and Israelis as one.
#
# By observing our problems instead of your own; you are,
# in fact, harming us and yourself.
#
# I have added keywords to deny entries that are related
# to the Palestinians, because most of the articles about
# the subject are meant to distract you too from issues
# that are much more closer and important to you.
#
# If the Americans and the Europeans will concentrate on
# Israel or Palestine, never mind which side they support,
# instead of protecting their own freedom of speech and their
# feedom of keep and bear arms, then tomorrow, without their
# freedoms, they would not even be able to express any
# opinion on any matter.
#
# If you want to harm yourself, your family and friends and
# the Chinese, Israelis, Palestinians, Tibetans, then you are
# free to remove the filters and concentrate on issues that
# do not really matter.
#
# But if you really care for Chinese, Israelis, Palestinians,
# Tibetans, then you must concentrate you efforts on your
# local area, care for your own neighbours and your own race,
# for uncompromising freedom and liberty for all, because
# otherwise non of us will have any.
#
# It is "all of us for all of us" or we are on our own.
# And it has to start locally. There is no other way.
#
# You are advised to look into the keywords and share
# yours with us, so people would get the best out of
# this news application.
# Entries with the following keywords will not be filtered
filter-allow:
- earthing
- gaddafi
- gadhafi
- hitler
- jabber
- marijuana
- ossad
- qaddafi
- qadhafi
- risc
- sadam
- saddam
- war crim
- ware
- xmpp
# Entries with the following keywords will be filtered
filter-deny:
# brands
# Almost every time you see a brand name in title or content,
# it is because someone, usually a marketing agency or a
# venture capital firm, has paid for it to be mentioned, not
# because an editor has found it useful or interesting.
- airbnb
- android
- at&t
- booking
- discord
- facebook
- gmail
- google
- gsoc
- instagram
- ipad
- iphone
- ipod
- microsoft
- mozilla
- myspace
- netflix
- openai
- pinterest
- robinhood
- snapchat
- spotify
- tumblr
- twitter
- verizon
- waze
- whatsapp
- wikimedia
- wikipedia
- wix
- yahoo
- youtube
# consume whore
- black friday
- coupon
- gambl
# death
- corona
- covid
- crime
- criminal
- dead
- death
- die
- disaster
- doomsday
- murder
- vaccine
- vax
- war
# degeneracy
- gay
- gender fluid
- gender identity
- homosex
- lesbian
- lgbt
- nude
- nudity
- porn
- pr0n
- prostitut
- queen
- queer
- tranny
- trans # transexual transgender transsexual
- whor
- xham
- xvid
# distraction figures
- el aviv
- el-aviv
- gaza
- gazza
- hamas
- hammas
- harari
- harary
- holocaust
- idf
- israel
- jerus
- lex jon
- lon mus
- netanya
- nfowar
- oxnew
- palestin
- trump
- west bank
# fiction
- astronaut
- meteor
- nasa
- space
# names
- boomer
- gen z
# substances
- 🚬
- alcohol
- cigar
- drug
# The following index was generated from machine learning
# scan of political and scientific forums gathered within
# a duration of 6 hours; this index is a set of value(s)
# (phrases and words) to be replaced by a value (key).
#
# While non reflects any of the opinions of the author
# of this program, you might find it amusing, entertaining,
# essential, important or all of the above or perhaps just
# a joke of "1984" or "They Live" to experience what it
# feels like.
#
# While this set of filters is intended merely for example
# purposes only to realize the power one might have when is
# given the liberty to control contents received, this
# machine-learning generated set is surprisingly effective
# and efficient.
#
# The filtering ability was primarily meant to solve an issue
# which my orthodox rabbi has asked me to solve for him.
# That set, which he and his community use, is not included.
# Replace words and phrases
filter-replace:
"CIA-backed riot group":
# We are all using 4chan; yet you have got to wonder how
# is it still allowed online in a centralized DNS system.
# If 4chan was served only in I2P, IPFS, Nostr, Plebbit
# or Yggdrasil then I would not think so.
- 4chan
- al qaeda
- al-qaeda
- antifa
- black lives matter
- da'ish
- daish
- isil
- isis
- ku klux klan
"CIA-controlled publishing agency":
- abc news
- cbn news
- cnbc news
- fox news
- foxnews
- msn news
- sky news
- skynews
"dystopian":
- orwellian
"Earth is Horizontal":
- earth is flat
- flatearth
- flat earth
"electro magnetic death antenna":
- 5g
"electro magnetic water shaker antenna":
- 2.4ghz
"electro magnetic air shaker antenna":
- 5ghz
- 5.0ghz
"Federal Government of USA":
- biden administration
- biden admin
- biden government
- bush administration
- bush admin
- bush government
- obama administration
- obama admin
- obama government
- trump administration
- trump admin
- trump government
"fictional weapon":
- atom bomb
- atomic bomb
- nukes
- nuclear bomb
"firmament":
- atmosphere
- ozone
# People who have been made to believe that earth, moon and sun are
# flying rocks traveling in an impossible vacuum of nothingness.
"globe believer":
- globtard
"Government Weather Change Scam":
- climate change
- global warming
"hazardous":
- gmo
"Human Trafficking Group":
- prostitution ring
"Human Trafficking Industry":
- porn industry
- pornographic industry
- pornography industry
- sex industry
"impossible war":
- atomic war
- nuclear war
# People who claim that earth is plane, and is enclosed
# by a firmament that separates earth from water, not
# space.
"Horizontal Earth Activist":
- flatearther
- flat earther
- globe sceptic
"internationalism":
- globalism
"internationalist":
- globalist
# Better options might be Freenet, Mixnet
"Meshnet":
- darknet
- dark net
"MI5-controlled publishing agency":
- bbc news
- the guardian
"Misleading Digital Coin":
- oecd
- shit coin
"NSDAP":
- nazi
"online content publishing platform":
- facebook
- instagram
- mastodon
- myspace
- pinterest
- tumblr
- twitter
- wix
- wordpress
"petroleum":
- fossil fuel
- fossil oil
"plane":
- planet
"poison":
- flouride
- fluoride
"poisoned":
- flouridated
- fluoridated
"poisoning":
- flouridation
- fluoridation
"Scam Currency Trading":
- forex
"water above the firmament":
- outerspace
- outer space
"World":
- globe
"Worldwide":
- around the globe
- global
- globally

24
settings.ini Normal file
View file

@ -0,0 +1,24 @@
# This file lists default settings per database.
# See file /usr/share/slixfeed/defaults.ini
[Settings]
# Maximum items to archive (0 - 500)
archive = 50
# Work status (Value 0 to disable)
enabled = 1
# Update interval (Minimum value)
interval = 300
# Maximum length of summary (Value 0 to disable)
length = 300
# Mark entries of newly added entries as unread
old = 0
# Amount of entries per update
quantum = 3
# Pick random item from database
random = 0

View file

@ -50,14 +50,16 @@ TODO
https://github.com/michael-lazar/pygopherd
https://github.com/gopherball/gb
13) Support ActivityPub @person@domain (see Tip Of The Day).
14) Support ActivityPub @person@domain (see Tip Of The Day).
12) Tip Of The Day.
15) Tip Of The Day.
Did you know that you can follow you favorite Mastodon feeds by just
sending the URL address?
Supported fediverse websites are:
Akkoma, HubZilla, Mastodon, Misskey, Pixelfed, Pleroma, Soapbox.
16) Brand: News Broker, Newsman, Newsdealer, Laura Harbinger
"""
# vars and their meanings:
@ -65,8 +67,12 @@ TODO
# res = response (HTTP)
from argparse import ArgumentParser
import configparser
import filehandler
# from filehandler import get_default_confdir
from getpass import getpass
import logging
import os
# from datetime import date
# import time
@ -87,37 +93,86 @@ if __name__ == '__main__':
# Output verbosity options.
parser.add_argument(
"-q", "--quiet", help="set logging to ERROR",
action="store_const", dest="loglevel",
const=logging.ERROR, default=logging.INFO
)
"-q",
"--quiet",
help="set logging to ERROR",
action="store_const",
dest="loglevel",
const=logging.ERROR,
default=logging.INFO
)
parser.add_argument(
"-d", "--debug", help="set logging to DEBUG",
action="store_const", dest="loglevel",
const=logging.DEBUG, default=logging.INFO
)
"-d",
"--debug",
help="set logging to DEBUG",
action="store_const",
dest="loglevel",
const=logging.DEBUG,
default=logging.INFO
)
# JID and password options.
parser.add_argument("-j", "--jid", dest="jid",
help="JID to use")
parser.add_argument("-p", "--password", dest="password",
help="password to use")
parser.add_argument(
"-j",
"--jid",
dest="jid",
help="Jabber ID"
)
parser.add_argument(
"-p",
"--password",
dest="password",
help="Password of JID"
)
parser.add_argument(
"-n",
"--nickname",
dest="nickname",
help="Display name"
)
args = parser.parse_args()
# Setup logging.
logging.basicConfig(level=args.loglevel,
format='%(levelname)-8s %(message)s')
logging.basicConfig(
level=args.loglevel,
format='%(levelname)-8s %(message)s'
)
if args.jid is None:
args.jid = input("Username: ")
if args.password is None:
args.password = getpass("Password: ")
# Try configuration file
config = configparser.RawConfigParser()
config_dir = filehandler.get_default_confdir()
if not os.path.isdir(config_dir):
os.mkdir(config_dir)
# TODO Copy file from /etc/slixfeed/ or /usr/share/slixfeed/
config_file = os.path.join(config_dir, r"accounts.ini")
config.read(config_file)
if config.has_section("XMPP"):
xmpp = config["XMPP"]
nickname = xmpp["nickname"]
username = xmpp["username"]
password = xmpp["password"]
# Use arguments if were given
if args.jid:
username = args.jid
if args.password:
password = args.password
if args.nickname:
nickname = args.nickname
# Prompt for credentials if none were given
if username is None:
username = input("Username: ")
if password is None:
password = getpass("Password: ")
if nickname is None:
nickname = input("Nickname: ")
# Setup the Slixfeed and register plugins. Note that while plugins may
# have interdependencies, the order in which you register them does
# not matter.
xmpp = Slixfeed(args.jid, args.password)
xmpp = Slixfeed(username, password, nickname)
xmpp.register_plugin('xep_0004') # Data Forms
xmpp.register_plugin('xep_0030') # Service Discovery
xmpp.register_plugin('xep_0045') # Multi-User Chat

View file

@ -11,10 +11,12 @@ TODO
"""
import configparser
# from filehandler import get_default_confdir
import filehandler
import os
from filehandler import get_default_confdir
from random import randrange
import yaml
async def get_value_default(key):
"""
@ -23,144 +25,44 @@ async def get_value_default(key):
Parameters
----------
key : str
Key: enabled, filter-allow, filter-deny,
interval, quantum, random.
Key: archive, enabled, allow, deny,interval,
length, old, quantum, random, replace.
Returns
-------
result : int or str
result : str
Value.
"""
match key:
case "archive":
result = 50
case "enabled":
result = 1
case "filter-allow":
result = "hitler,sadam,saddam"
case "filter-deny":
result = "crim,dead,death,disaster,murder,war"
case "interval":
result = 300
case "masters":
result = randrange(100000, 999999)
case "length":
result = 300
case "old":
result = 0
case "quantum":
result = 3
case "random":
result = 0
case "token":
result = "none"
config = configparser.RawConfigParser()
config_dir = filehandler.get_default_confdir()
if not os.path.isdir(config_dir):
config_dir = '/usr/share/slixfeed/'
config_file = os.path.join(config_dir, r"settings.ini")
config.read(config_file)
if config.has_section("Settings"):
result = config["Settings"][key]
return result
def get_list():
async def get_list(key):
"""
Get dictionary file.
Get settings default value.
Parameters
----------
key : str
Key: allow, deny, pathname, replace.
Returns
-------
paths : list
Dictionary of pathnames.
result : list
List of pathnames or keywords.
"""
paths = []
cfg_dir = get_default_confdir()
if not os.path.isdir(cfg_dir):
os.mkdir(cfg_dir)
cfg_file = os.path.join(cfg_dir, r"url_paths.txt")
if not os.path.isfile(cfg_file):
# confighandler.generate_dictionary()
list = get_default_list()
file = open(cfg_file, "w")
file.writelines("\n".join(list))
file.close()
file = open(cfg_file, "r")
lines = file.readlines()
for line in lines:
paths.extend([line.strip()])
return paths
# async def generate_dictionary():
def get_default_list():
"""
Generate a dictionary file.
Returns
-------
paths : list
Dictionary of pathnames.
"""
paths = [
".atom",
".rss",
".xml",
"/?feed=atom",
"/?feed=rdf",
"/?feed=rss",
"/?feed=xml", # wordpress
"/?format=atom",
"/?format=rdf",
"/?format=rss",
"/?format=xml", # phpbb
"/app.php/feed",
"/atom",
"/atom.php",
"/atom.xml",
"/blog/feed/",
"/content-feeds/",
"/external.php?type=RSS2",
"/en/feed/",
"/feed", # good practice
"/feed.atom",
# "/feed.json",
"/feed.php",
"/feed.rdf",
"/feed.rss",
"/feed.xml",
"/feed/atom/",
"/feeds/news_feed",
"/feeds/posts/default",
"/feeds/posts/default?alt=atom",
"/feeds/posts/default?alt=rss",
"/feeds/rss/news.xml.php",
"/forum_rss.php",
"/index.atom",
"/index.php/feed",
"/index.php?type=atom;action=.xml", #smf
"/index.php?type=rss;action=.xml", #smf
"/index.rss",
"/jekyll/feed.xml",
"/latest.rss",
"/news",
"/news.xml",
"/news.xml.php",
"/news/feed",
"/posts.rss", # discourse
"/rdf",
"/rdf.php",
"/rdf.xml",
"/rss",
# "/rss.json",
"/rss.php",
"/rss.xml",
"/syndication.php?type=atom1.0", #mybb
"/syndication.php?type=rss2.0",
"/timeline.rss",
"/videos.atom",
# "/videos.json",
"/videos.xml",
"/xml/feed.rss"
]
return paths
# cfg_dir = get_default_confdir()
# if not os.path.isdir(cfg_dir):
# os.mkdir(cfg_dir)
# cfg_file = os.path.join(cfg_dir, r"url_paths.txt")
# if not os.path.isfile(cfg_file):
# file = open(cfg_file, "w")
# file.writelines("\n".join(paths))
# file.close()
config_dir = filehandler.get_default_confdir()
if not os.path.isdir(config_dir):
config_dir = '/usr/share/slixfeed/'
config_file = os.path.join(config_dir, r"lists.yaml")
with open(config_file) as defaults:
default = yaml.safe_load(defaults)
result = default[key]
return result

View file

@ -25,16 +25,7 @@ from lxml import html
from datetimehandler import now, rfc2822_to_iso8601
from confighandler import get_list
from listhandler import is_listed
from sqlitehandler import (
add_entry_and_set_date,
insert_feed,
check_entry_exist,
check_feed_exist,
get_feeds_url,
remove_nonexistent_entries,
update_source_status,
update_source_validity
)
import sqlitehandler as sqlite
from urllib import error
# from xml.etree.ElementTree import ElementTree, ParseError
from urllib.parse import urljoin, urlsplit, urlunsplit
@ -54,7 +45,7 @@ async def download_updates(db_file, url=None):
if url:
urls = [url] # Valid [url] and [url,] and (url,)
else:
urls = await get_feeds_url(db_file)
urls = await sqlite.get_feeds_url(db_file)
for url in urls:
# print(os.path.basename(db_file), url[0])
source = url[0]
@ -65,7 +56,7 @@ async def download_updates(db_file, url=None):
# urls.next()
# next(urls)
continue
await update_source_status(
await sqlite.update_source_status(
db_file,
res[1],
source
@ -83,7 +74,7 @@ async def download_updates(db_file, url=None):
valid = 0
else:
valid = 1
await update_source_validity(
await sqlite.update_source_validity(
db_file,
source,
valid)
@ -108,7 +99,7 @@ async def download_updates(db_file, url=None):
entries = feed.entries
# length = len(entries)
# await remove_entry(db_file, source, length)
await remove_nonexistent_entries(
await sqlite.remove_nonexistent_entries(
db_file,
feed,
source
@ -145,7 +136,7 @@ async def download_updates(db_file, url=None):
eid = entry.id
else:
eid = link
exist = await check_entry_exist(
exist = await sqlite.check_entry_exist(
db_file,
source,
eid=eid,
@ -212,7 +203,7 @@ async def download_updates(db_file, url=None):
# breakpoint()
print(source)
print(date)
await add_entry_and_set_date(
await sqlite.add_entry_and_set_date(
db_file,
source,
entry
@ -412,9 +403,9 @@ async def add_feed_no_check(db_file, data):
url = data[0]
title = data[1]
url = await trim_url(url)
exist = await check_feed_exist(db_file, url)
exist = await sqlite.check_feed_exist(db_file, url)
if not exist:
msg = await insert_feed(db_file, url, title)
msg = await sqlite.insert_feed(db_file, url, title)
await download_updates(db_file, [url])
else:
ix = exist[0]
@ -445,7 +436,7 @@ async def add_feed(db_file, url):
"""
msg = None
url = await trim_url(url)
exist = await check_feed_exist(db_file, url)
exist = await sqlite.check_feed_exist(db_file, url)
if not exist:
res = await download_feed(url)
if res[0]:
@ -459,7 +450,7 @@ async def add_feed(db_file, url):
msg = await probe_page(add_feed, url, res[0], db_file=db_file)
else:
status = res[1]
msg = await insert_feed(
msg = await sqlite.insert_feed(
db_file,
url,
title,
@ -777,7 +768,7 @@ async def feed_mode_request(url, tree):
"""
feeds = {}
parted_url = urlsplit(url)
paths = get_list()
paths = await get_list("pathnames")
for path in paths:
address = urlunsplit([
parted_url.scheme,
@ -877,7 +868,7 @@ async def feed_mode_scan(url, tree):
feeds = {}
# paths = []
# TODO Test
paths = get_list()
paths = await get_list("pathnames")
for path in paths:
# xpath_query = "//*[@*[contains(.,'{}')]]".format(path)
xpath_query = "//a[contains(@href,'{}')]".format(path)

View file

@ -72,8 +72,7 @@ def get_default_confdir():
config_home = os.path.join(os.environ.get('HOME'), '.config')
return os.path.join(config_home, 'slixfeed')
# NOTE Perhaps this needs to be executed
# just once per program execution
async def initdb(jid, callback, message=None):
"""
Callback function to instantiate action on database.

View file

@ -15,7 +15,7 @@ TODO
"""
from sqlitehandler import get_settings_value
import sqlitehandler as sqlite
async def add_to_list(newwords, keywords):
@ -34,10 +34,11 @@ async def add_to_list(newwords, keywords):
val : str
List of current keywords and new keywords.
"""
try:
keywords = keywords.split(",")
except:
keywords = []
if isinstance(keywords, str) or keywords is None:
try:
keywords = keywords.split(",")
except:
keywords = []
newwords = newwords.lower().split(",")
for word in newwords:
word = word.strip()
@ -64,10 +65,11 @@ async def remove_from_list(newwords, keywords):
val : str
List of new keywords.
"""
try:
keywords = keywords.split(",")
except:
keywords = []
if isinstance(keywords, str) or keywords is None:
try:
keywords = keywords.split(",")
except:
keywords = []
newwords = newwords.lower().split(",")
for word in newwords:
word = word.strip()
@ -98,7 +100,7 @@ async def is_listed(db_file, key, string):
"""
# async def reject(db_file, string):
# async def is_blacklisted(db_file, string):
list = await get_settings_value(
list = await sqlite.get_filters_value(
db_file,
key
)
@ -118,37 +120,37 @@ async def is_listed(db_file, key, string):
This code was tested at module datahandler
reject = 0
blacklist = await get_settings_value(
db_file,
"filter-deny"
)
# print(">>> blacklist:")
# print(blacklist)
# breakpoint()
if blacklist:
blacklist = blacklist.split(",")
# print(">>> blacklist.split")
# print(blacklist)
# breakpoint()
for i in blacklist:
# print(">>> length", len(i))
# breakpoint()
# if len(i):
if not i or len(i) < 2:
print(">>> continue due to length", len(i))
# breakpoint()
continue
# print(title)
# print(">>> blacklisted word:", i)
# breakpoint()
test = (title + " " + summary + " " + link)
if i in test.lower():
reject = 1
break
if reject:
print("rejected:",title)
entry = (title, '', link, source, date, 1);
reject = 0
blacklist = await get_settings_value(
db_file,
"filter-deny"
)
# print(">>> blacklist:")
# print(blacklist)
# breakpoint()
if blacklist:
blacklist = blacklist.split(",")
# print(">>> blacklist.split")
# print(blacklist)
# breakpoint()
for i in blacklist:
# print(">>> length", len(i))
# breakpoint()
# if len(i):
if not i or len(i) < 2:
print(">>> continue due to length", len(i))
# breakpoint()
continue
# print(title)
# print(">>> blacklisted word:", i)
# breakpoint()
test = (title + " " + summary + " " + link)
if i in test.lower():
reject = 1
break
if reject:
print("rejected:",title)
entry = (title, '', link, source, date, 1);
"""

View file

@ -19,7 +19,7 @@ from asyncio import Lock
from bs4 import BeautifulSoup
from datetime import date
# from slixfeed.confighandler import get_value_default
import confighandler as confighandler
import confighandler as config
# from slixfeed.datahandler import join_url
import datahandler as datahandler
from datetimehandler import current_time, rfc2822_to_iso8601
@ -122,6 +122,13 @@ def create_tables(db_file):
"value INTEGER"
");"
)
filters_table_sql = (
"CREATE TABLE IF NOT EXISTS filters ("
"id INTEGER PRIMARY KEY,"
"key TEXT NOT NULL,"
"value TEXT"
");"
)
cur = conn.cursor()
# cur = get_cursor(db_file)
cur.execute(feeds_table_sql)
@ -129,6 +136,7 @@ def create_tables(db_file):
cur.execute(archive_table_sql)
# cur.execute(statistics_table_sql)
cur.execute(settings_table_sql)
cur.execute(filters_table_sql)
def get_cursor(db_file):
@ -911,7 +919,13 @@ async def maintain_archive(cur, limit):
"FROM archive"
)
count = cur.execute(sql).fetchone()[0]
reduc = count - limit
# FIXME Upon first time joining to a groupchat
# and then adding a URL, variable "limit"
# becomes a string in one of the iterations.
# if isinstance(limit,str):
# print("STOP")
# breakpoint()
reduc = count - int(limit)
if reduc > 0:
sql = (
"DELETE FROM archive "
@ -1046,7 +1060,7 @@ async def remove_nonexistent_entries(db_file, feed, source):
cur.execute(sql, (ix,))
except:
print(
"ERROR DB inset from entries "
"ERROR DB insert from entries "
"into archive at index", ix
)
sql = (
@ -1456,12 +1470,11 @@ async def set_settings_value(db_file, key_value):
"WHERE key = :key"
)
cur.execute(sql, {
"key": key,
"key": key,
"value": val
})
# TODO Place settings also in a file
async def set_settings_value_default(cur, key):
"""
Set default settings value, if no value found.
@ -1494,7 +1507,7 @@ async def set_settings_value_default(cur, key):
)
cur.execute(sql, (key,))
if not cur.fetchone():
val = await confighandler.get_value_default(key)
val = await config.get_value_default(key)
sql = (
"INSERT "
"INTO settings(key,value) "
@ -1513,7 +1526,8 @@ async def get_settings_value(db_file, key):
db_file : str
Path to database file.
key : str
Key: "enabled", "interval", "master", "quantum", "random".
Key: archive, enabled, filter-allow, filter-deny,
interval, length, old, quantum, random.
Returns
-------
@ -1545,3 +1559,109 @@ async def get_settings_value(db_file, key):
if not val:
val = await set_settings_value_default(cur, key)
return val
async def set_filters_value(db_file, key_value):
"""
Set settings value.
Parameters
----------
db_file : str
Path to database file.
key_value : list
key : str
filter-allow, filter-deny, filter-replace.
value : int
Numeric value.
"""
# if isinstance(key_value, list):
# key = key_value[0]
# val = key_value[1]
# elif key_value == "enable":
# key = "enabled"
# val = 1
# else:
# key = "enabled"
# val = 0
key = key_value[0]
val = key_value[1]
async with DBLOCK:
with create_connection(db_file) as conn:
cur = conn.cursor()
await set_filters_value_default(cur, key)
sql = (
"UPDATE filters "
"SET value = :value "
"WHERE key = :key"
)
cur.execute(sql, {
"key": key,
"value": val
})
async def set_filters_value_default(cur, key):
"""
Set default filters value, if no value found.
Parameters
----------
cur : object
Cursor object.
key : str
Key: filter-allow, filter-deny, filter-replace.
Returns
-------
val : str
List of strings.
"""
sql = (
"SELECT id "
"FROM filters "
"WHERE key = ?"
)
cur.execute(sql, (key,))
if not cur.fetchone():
val = await config.get_list(key)
val = ",".join(val)
sql = (
"INSERT "
"INTO filters(key,value) "
"VALUES(?,?)"
)
cur.execute(sql, (key, val))
return val
async def get_filters_value(db_file, key):
"""
Get filters value.
Parameters
----------
db_file : str
Path to database file.
key : str
Key: allow, deny.
Returns
-------
val : str
List of strings.
"""
with create_connection(db_file) as conn:
try:
cur = conn.cursor()
sql = (
"SELECT value "
"FROM filters "
"WHERE key = ?"
)
val = cur.execute(sql, (key,)).fetchone()[0]
except:
val = await set_filters_value_default(cur, key)
if not val:
val = await set_filters_value_default(cur, key)
return val

View file

@ -57,35 +57,13 @@ import logging
import slixmpp
from slixmpp.exceptions import IqError
from random import randrange
from datahandler import (
add_feed,
add_feed_no_check,
check_xmpp_uri,
feed_to_http,
view_entry,
view_feed
)
import datahandler as fetcher
from datetimehandler import current_time
from filehandler import initdb
from listhandler import add_to_list, remove_from_list
from sqlitehandler import (
get_settings_value,
set_settings_value,
mark_source_as_read,
last_entries,
list_feeds,
remove_feed,
search_feeds,
statistics,
toggle_status
)
from taskhandler import (
clean_tasks_xmpp,
start_tasks_xmpp,
refresh_task,
send_status,
send_update
)
import listhandler as lister
import sqlitehandler as sqlite
import taskhandler as tasker
from slixmpp.plugins.xep_0363.http_upload import FileTooBig, HTTPError, UploadServiceNotFound
# from slixmpp.plugins.xep_0402 import BookmarkStorage, Conference
from slixmpp.plugins.xep_0048.stanza import Bookmarks
@ -116,9 +94,11 @@ class Slixfeed(slixmpp.ClientXMPP):
-------
News bot that sends updates from RSS feeds.
"""
def __init__(self, jid, password, room=None, nick=None):
def __init__(self, jid, password, nick):
slixmpp.ClientXMPP.__init__(self, jid, password)
self.nick = nick
# The session_start event will be triggered when
# the bot establishes its connection with the server
# and the XML streams are ready for use. We want to
@ -192,11 +172,11 @@ class Slixfeed(slixmpp.ClientXMPP):
# print("def presence_available", presence["from"].bare)
if presence["from"].bare not in self.boundjid.bare:
jid = presence["from"].bare
await clean_tasks_xmpp(
await tasker.clean_tasks_xmpp(
jid,
["interval", "status", "check"]
)
await start_tasks_xmpp(
await tasker.start_tasks_xmpp(
self,
jid,
["interval", "status", "check"]
@ -209,7 +189,7 @@ class Slixfeed(slixmpp.ClientXMPP):
if not self.boundjid.bare:
jid = presence["from"].bare
print(">>> unavailable:", jid)
await clean_tasks_xmpp(
await tasker.clean_tasks_xmpp(
jid,
["interval", "status", "check"]
)
@ -274,7 +254,7 @@ class Slixfeed(slixmpp.ClientXMPP):
print(muc_jid)
self.plugin['xep_0045'].join_muc(
muc_jid,
"Slixfeed (RSS News Bot)",
self.nick,
# If a room password is needed, use:
# password=the_room_password,
)
@ -299,7 +279,7 @@ class Slixfeed(slixmpp.ClientXMPP):
for muc in mucs:
bookmarks.add_conference(
muc,
"Slixfeed (RSS News Bot)",
self.nick,
autojoin=True
)
await self.plugin['xep_0048'].set_bookmarks(bookmarks)
@ -308,7 +288,7 @@ class Slixfeed(slixmpp.ClientXMPP):
# print(await self.plugin['xep_0048'].get_bookmarks())
# bm = BookmarkStorage()
# bm.conferences.append(Conference(muc_jid, autojoin=True, nick="Slixfeed (RSS News Bot)"))
# bm.conferences.append(Conference(muc_jid, autojoin=True, nick=self.nick))
# await self['xep_0402'].publish(bm)
@ -333,13 +313,13 @@ class Slixfeed(slixmpp.ClientXMPP):
for muc in mucs:
bookmarks.add_conference(
muc,
"Slixfeed (RSS News Bot)",
self.nick,
autojoin=True
)
await self.plugin['xep_0048'].set_bookmarks(bookmarks)
self.plugin['xep_0045'].leave_muc(
muc_jid,
"Slixfeed (RSS News Bot)",
self.nick,
"Goodbye!",
self.boundjid.bare
)
@ -355,7 +335,7 @@ class Slixfeed(slixmpp.ClientXMPP):
print(muc)
self.plugin['xep_0045'].join_muc(
muc,
"Slixfeed (RSS News Bot)",
self.nick,
# If a room password is needed, use:
# password=the_room_password,
)
@ -414,10 +394,10 @@ class Slixfeed(slixmpp.ClientXMPP):
print(message)
if message["type"] in ("chat", "normal"):
jid = message["from"].bare
await refresh_task(
await tasker.refresh_task(
self,
jid,
send_status,
tasker.send_status,
"status",
20
)
@ -446,11 +426,11 @@ class Slixfeed(slixmpp.ClientXMPP):
jid = presence["from"].bare
if presence["show"] in ("away", "dnd", "xa"):
print(">>> away, dnd, xa:", jid)
await clean_tasks_xmpp(
await tasker.clean_tasks_xmpp(
jid,
["interval"]
)
await start_tasks_xmpp(
await tasker.start_tasks_xmpp(
self,
jid,
["status", "check"]
@ -539,7 +519,7 @@ class Slixfeed(slixmpp.ClientXMPP):
self.send_presence_subscription(
pto=jid,
ptype="subscribe",
pnick="Slixfeed RSS News Bot"
pnick=self.nick
)
self.update_roster(
jid,
@ -551,23 +531,27 @@ class Slixfeed(slixmpp.ClientXMPP):
pto=jid,
pfrom=self.boundjid.bare,
ptype="subscribe",
pnick="Slixfeed RSS News Bot"
pnick=self.nick
)
self.send_message(
mto=jid,
# mtype="headline",
msubject="RSS News Bot",
mbody="Accept subscription request to receive updates.",
mbody=(
"Accept subscription request to receive updates"
),
mfrom=self.boundjid.bare,
mnick="Slixfeed RSS News Bot"
mnick=self.nick
)
self.send_presence(
pto=jid,
pfrom=self.boundjid.bare,
# Accept symbol 🉑️ 👍️ ✍
pstatus="✒️ Accept subscription request to receive updates",
pstatus=(
"✒️ Accept subscription request to receive updates"
),
# ptype="subscribe",
pnick="Slixfeed RSS News Bot"
pnick=self.nick
)
@ -582,7 +566,7 @@ class Slixfeed(slixmpp.ClientXMPP):
pto=jid,
pfrom=self.boundjid.bare,
pstatus="🖋️ Subscribe to receive updates",
pnick="Slixfeed RSS News Bot"
pnick=self.nick
)
self.send_message(
mto=jid,
@ -617,7 +601,7 @@ class Slixfeed(slixmpp.ClientXMPP):
# nick = msg["from"][msg["from"].index("/")+1:]
nick = str(msg["from"])
nick = nick[nick.index("/")+1:]
if (msg['muc']['nick'] == "Slixfeed (RSS News Bot)" or
if (msg['muc']['nick'] == self.nick or
not msg["body"].startswith("!")):
return
# token = await initdb(
@ -732,21 +716,21 @@ class Slixfeed(slixmpp.ClientXMPP):
if url.startswith("http"):
action = await initdb(
jid,
add_feed_no_check,
fetcher.add_feed_no_check,
[url, title]
)
old = await initdb(
jid,
get_settings_value,
sqlite.get_settings_value,
"old"
)
if old:
await clean_tasks_xmpp(
await tasker.clean_tasks_xmpp(
jid,
["status"]
)
# await send_status(jid)
await start_tasks_xmpp(
await tasker.start_tasks_xmpp(
self,
jid,
["status"]
@ -754,7 +738,7 @@ class Slixfeed(slixmpp.ClientXMPP):
else:
await initdb(
jid,
mark_source_as_read,
sqlite.mark_source_as_read,
url
)
else:
@ -765,16 +749,16 @@ class Slixfeed(slixmpp.ClientXMPP):
if val:
keywords = await initdb(
jid,
get_settings_value,
sqlite.get_filters_value,
key
)
val = await add_to_list(
val = await lister.add_to_list(
val,
keywords
)
await initdb(
jid,
set_settings_value,
sqlite.set_filters_value,
[key, val]
)
action = (
@ -789,16 +773,16 @@ class Slixfeed(slixmpp.ClientXMPP):
if val:
keywords = await initdb(
jid,
get_settings_value,
sqlite.get_filters_value,
key
)
val = await remove_from_list(
val = await lister.remove_from_list(
val,
keywords
)
await initdb(
jid,
set_settings_value,
sqlite.set_filters_value,
[key, val]
)
action = (
@ -816,7 +800,7 @@ class Slixfeed(slixmpp.ClientXMPP):
else:
await initdb(
jid,
set_settings_value,
sqlite.set_settings_value,
[key, val]
)
action = (
@ -830,16 +814,16 @@ class Slixfeed(slixmpp.ClientXMPP):
if val:
keywords = await initdb(
jid,
get_settings_value,
sqlite.get_filters_value,
key
)
val = await add_to_list(
val = await lister.add_to_list(
val,
keywords
)
await initdb(
jid,
set_settings_value,
sqlite.set_filters_value,
[key, val]
)
action = (
@ -854,16 +838,16 @@ class Slixfeed(slixmpp.ClientXMPP):
if val:
keywords = await initdb(
jid,
get_settings_value,
sqlite.get_filters_value,
key
)
val = await remove_from_list(
val = await lister.remove_from_list(
val,
keywords
)
await initdb(
jid,
set_settings_value,
sqlite.set_filters_value,
[key, val]
)
action = (
@ -879,8 +863,8 @@ class Slixfeed(slixmpp.ClientXMPP):
message_lowercase.startswith("feed:")):
url = message
if url.startswith("feed:"):
url = await feed_to_http(url)
await clean_tasks_xmpp(
url = await fetcher.feed_to_http(url)
await tasker.clean_tasks_xmpp(
jid,
["status"]
)
@ -890,10 +874,10 @@ class Slixfeed(slixmpp.ClientXMPP):
process_task_message(self, jid, task)
action = await initdb(
jid,
add_feed,
fetcher.add_feed,
url
)
await start_tasks_xmpp(
await tasker.start_tasks_xmpp(
self,
jid,
["status"]
@ -910,16 +894,16 @@ class Slixfeed(slixmpp.ClientXMPP):
# NOTE This would show the number of new unread entries
old = await initdb(
jid,
get_settings_value,
sqlite.get_settings_value,
"old"
)
if old:
await clean_tasks_xmpp(
await tasker.clean_tasks_xmpp(
jid,
["status"]
)
# await send_status(jid)
await start_tasks_xmpp(
await tasker.start_tasks_xmpp(
self,
jid,
["status"]
@ -927,7 +911,7 @@ class Slixfeed(slixmpp.ClientXMPP):
else:
await initdb(
jid,
mark_source_as_read,
sqlite.mark_source_as_read,
url
)
case _ if message_lowercase.startswith("feeds"):
@ -936,7 +920,7 @@ class Slixfeed(slixmpp.ClientXMPP):
if len(query) > 3:
action = await initdb(
jid,
search_feeds,
sqlite.search_feeds,
query
)
else:
@ -946,7 +930,7 @@ class Slixfeed(slixmpp.ClientXMPP):
else:
action = await initdb(
jid,
list_feeds
sqlite.list_feeds
)
case "goodbye":
if msg["type"] == "groupchat":
@ -967,15 +951,15 @@ class Slixfeed(slixmpp.ClientXMPP):
# ).format(action)
await initdb(
jid,
set_settings_value,
sqlite.set_settings_value,
[key, val]
)
# NOTE Perhaps this should be replaced
# by functions clean and start
await refresh_task(
await tasker.refresh_task(
self,
jid,
send_update,
tasker.send_update,
key,
val
)
@ -985,7 +969,7 @@ class Slixfeed(slixmpp.ClientXMPP):
else:
action = "Missing value."
case _ if message_lowercase.startswith("join"):
muc = await check_xmpp_uri(message[5:])
muc = await fetcher.check_xmpp_uri(message[5:])
if muc:
"TODO probe JID and confirm it's a groupchat"
await self.join_muc(jid, muc)
@ -1002,7 +986,7 @@ class Slixfeed(slixmpp.ClientXMPP):
if val:
await initdb(
jid,
set_settings_value,
sqlite.set_settings_value,
[key, val]
)
if val == 0:
@ -1043,7 +1027,7 @@ class Slixfeed(slixmpp.ClientXMPP):
case "new":
await initdb(
jid,
set_settings_value,
sqlite.set_settings_value,
["old", 0]
)
action = (
@ -1051,11 +1035,11 @@ class Slixfeed(slixmpp.ClientXMPP):
)
case _ if message_lowercase.startswith("next"):
num = message[5:]
await clean_tasks_xmpp(
await tasker.clean_tasks_xmpp(
jid,
["interval", "status"]
)
await start_tasks_xmpp(
await tasker.start_tasks_xmpp(
self,
jid,
["interval", "status"]
@ -1078,7 +1062,7 @@ class Slixfeed(slixmpp.ClientXMPP):
case "old":
await initdb(
jid,
set_settings_value,
sqlite.set_settings_value,
["old", 1]
)
action = (
@ -1093,7 +1077,7 @@ class Slixfeed(slixmpp.ClientXMPP):
# ).format(action)
await initdb(
jid,
set_settings_value,
sqlite.set_settings_value,
[key, val]
)
action = (
@ -1111,22 +1095,22 @@ class Slixfeed(slixmpp.ClientXMPP):
"📫️ Processing request to fetch data from {}"
).format(url)
process_task_message(self, jid, task)
await clean_tasks_xmpp(
await tasker.clean_tasks_xmpp(
jid,
["status"]
)
if url.startswith("feed:"):
url = await feed_to_http(url)
url = await fetcher.feed_to_http(url)
match len(data):
case 1:
if url.startswith("http"):
action = await view_feed(url)
action = await fetcher.view_feed(url)
else:
action = "Missing URL."
case 2:
num = data[1]
if url.startswith("http"):
action = await view_entry(url, num)
action = await fetcher.view_entry(url, num)
else:
action = "Missing URL."
case _:
@ -1135,7 +1119,7 @@ class Slixfeed(slixmpp.ClientXMPP):
"`read URL` or `read URL NUMBER`\n"
"URL must not contain white space."
)
await start_tasks_xmpp(
await tasker.start_tasks_xmpp(
self,
jid,
["status"]
@ -1145,7 +1129,7 @@ class Slixfeed(slixmpp.ClientXMPP):
if num:
action = await initdb(
jid,
last_entries,
sqlite.last_entries,
num
)
else:
@ -1155,7 +1139,7 @@ class Slixfeed(slixmpp.ClientXMPP):
if ix:
action = await initdb(
jid,
remove_feed,
sqlite.remove_feed,
ix
)
# await refresh_task(
@ -1165,11 +1149,11 @@ class Slixfeed(slixmpp.ClientXMPP):
# "status",
# 20
# )
await clean_tasks_xmpp(
await tasker.clean_tasks_xmpp(
jid,
["status"]
)
await start_tasks_xmpp(
await tasker.start_tasks_xmpp(
self,
jid,
["status"]
@ -1197,11 +1181,11 @@ class Slixfeed(slixmpp.ClientXMPP):
val = 1
await initdb(
jid,
set_settings_value,
sqlite.set_settings_value,
[key, val]
)
# asyncio.create_task(task_jid(self, jid))
await start_tasks_xmpp(
await tasker.start_tasks_xmpp(
self,
jid,
["interval", "status", "check"]
@ -1212,13 +1196,13 @@ class Slixfeed(slixmpp.ClientXMPP):
case "stats":
action = await initdb(
jid,
statistics
sqlite.statistics
)
case _ if message_lowercase.startswith("status "):
ix = message[7:]
action = await initdb(
jid,
toggle_status,
sqlite.toggle_status,
ix
)
case "stop":
@ -1247,10 +1231,10 @@ class Slixfeed(slixmpp.ClientXMPP):
val = 0
await initdb(
jid,
set_settings_value,
sqlite.set_settings_value,
[key, val]
)
await clean_tasks_xmpp(
await tasker.clean_tasks_xmpp(
jid,
["interval", "status"]
)
@ -1264,7 +1248,7 @@ class Slixfeed(slixmpp.ClientXMPP):
# TODO Send an invitation.
action = "Join xmpp:slixmpp@muc.poez.io?join"
case _ if message_lowercase.startswith("xmpp:"):
muc = await check_xmpp_uri(message)
muc = await fetcher.check_xmpp_uri(message)
if muc:
"TODO probe JID and confirm it's a groupchat"
await self.join_muc(jid, muc)