2023-12-28 15:50:23 +01:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
TODO
|
|
|
|
|
2024-01-03 11:37:33 +01:00
|
|
|
1) Website-specific filter (i.e. audiobookbay).
|
2023-12-28 15:50:23 +01:00
|
|
|
|
2024-01-03 11:37:33 +01:00
|
|
|
2) Exclude websites from being subjected to filtering (e.g. metapedia).
|
2023-12-28 15:50:23 +01:00
|
|
|
|
2024-01-03 11:37:33 +01:00
|
|
|
3) Filter phrases:
|
2023-12-28 15:50:23 +01:00
|
|
|
Refer to sqlitehandler.search_entries for implementation.
|
|
|
|
It is expected to be more complex than function search_entries.
|
|
|
|
|
2024-01-03 11:37:33 +01:00
|
|
|
4) Copy file from /etc/slixfeed/ or /usr/share/slixfeed/
|
2023-12-28 15:50:23 +01:00
|
|
|
|
2024-01-17 15:36:28 +01:00
|
|
|
5) Merge get_value_default into get_value.
|
|
|
|
|
2024-01-02 12:42:41 +01:00
|
|
|
"""
|
2023-12-28 15:50:23 +01:00
|
|
|
|
|
|
|
import configparser
|
|
|
|
import slixfeed.sqlite as sqlite
|
|
|
|
import os
|
2024-01-02 12:42:41 +01:00
|
|
|
# from random import randrange
|
2023-12-28 15:50:23 +01:00
|
|
|
import sys
|
|
|
|
import yaml
|
2024-01-03 11:37:33 +01:00
|
|
|
import logging
|
2024-01-02 12:42:41 +01:00
|
|
|
|
2024-01-06 23:03:08 +01:00
|
|
|
|
2024-01-02 12:42:41 +01:00
|
|
|
def get_value(filename, section, keys):
|
|
|
|
"""
|
|
|
|
Get setting value.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
filename : str
|
|
|
|
INI filename.
|
|
|
|
keys : list or str
|
|
|
|
A single key as string or multiple keys as list.
|
|
|
|
section : str
|
|
|
|
INI Section.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
result : list or str
|
|
|
|
A single value as string or multiple values as list.
|
|
|
|
"""
|
2024-01-03 11:37:33 +01:00
|
|
|
result = None
|
2024-01-02 12:42:41 +01:00
|
|
|
config_res = configparser.RawConfigParser()
|
2024-01-06 23:03:08 +01:00
|
|
|
config_dir = get_default_config_directory()
|
2024-01-02 12:42:41 +01:00
|
|
|
if not os.path.isdir(config_dir):
|
2024-01-22 13:48:00 +01:00
|
|
|
config_dir = '/usr/share/slixfeed/'
|
|
|
|
if not os.path.isdir(config_dir):
|
|
|
|
config_dir = os.path.dirname(__file__) + "/assets"
|
2024-01-02 12:42:41 +01:00
|
|
|
config_file = os.path.join(config_dir, filename + ".ini")
|
|
|
|
config_res.read(config_file)
|
|
|
|
if config_res.has_section(section):
|
|
|
|
section_res = config_res[section]
|
|
|
|
if isinstance(keys, list):
|
|
|
|
result = []
|
|
|
|
for key in keys:
|
2024-01-02 19:45:43 +01:00
|
|
|
try:
|
|
|
|
value = section_res[key]
|
2024-01-04 02:16:24 +01:00
|
|
|
logging.debug(
|
|
|
|
"Found value {} for key {}".format(value, key)
|
|
|
|
)
|
2024-01-02 19:45:43 +01:00
|
|
|
except:
|
|
|
|
value = ''
|
2024-01-03 11:37:33 +01:00
|
|
|
logging.error("Missing key:", key)
|
2024-01-02 19:45:43 +01:00
|
|
|
result.extend([value])
|
2024-01-02 12:42:41 +01:00
|
|
|
elif isinstance(keys, str):
|
|
|
|
key = keys
|
2024-01-02 19:45:43 +01:00
|
|
|
try:
|
|
|
|
result = section_res[key]
|
2024-01-03 16:04:01 +01:00
|
|
|
logging.debug(
|
2024-01-04 02:16:24 +01:00
|
|
|
"Found value {} for key {}".format(result, key)
|
|
|
|
)
|
2024-01-02 19:45:43 +01:00
|
|
|
except:
|
|
|
|
result = ''
|
2024-01-03 11:37:33 +01:00
|
|
|
# logging.error("Missing key:", key)
|
|
|
|
if result == None:
|
|
|
|
logging.error(
|
2024-01-03 16:04:01 +01:00
|
|
|
"Check configuration file {}.ini for "
|
|
|
|
"missing key(s) \"{}\" under section [{}].".format(
|
2024-01-04 02:16:24 +01:00
|
|
|
filename, keys, section)
|
|
|
|
)
|
2024-01-03 11:37:33 +01:00
|
|
|
else:
|
|
|
|
return result
|
2024-01-02 12:42:41 +01:00
|
|
|
|
|
|
|
|
|
|
|
# TODO Store config file as an object in runtime, otherwise
|
|
|
|
# the file will be opened time and time again.
|
|
|
|
# TODO Copy file from /etc/slixfeed/ or /usr/share/slixfeed/
|
|
|
|
def get_value_default(filename, section, key):
|
2023-12-28 15:50:23 +01:00
|
|
|
"""
|
|
|
|
Get settings default value.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
key : str
|
|
|
|
Key: archive, enabled, interval,
|
|
|
|
length, old, quantum, random.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
result : str
|
|
|
|
Value.
|
|
|
|
"""
|
|
|
|
config_res = configparser.RawConfigParser()
|
2024-01-06 23:03:08 +01:00
|
|
|
config_dir = get_default_config_directory()
|
2023-12-28 15:50:23 +01:00
|
|
|
if not os.path.isdir(config_dir):
|
|
|
|
config_dir = '/usr/share/slixfeed/'
|
2024-01-02 12:42:41 +01:00
|
|
|
config_file = os.path.join(config_dir, filename + ".ini")
|
2023-12-28 15:50:23 +01:00
|
|
|
config_res.read(config_file)
|
|
|
|
if config_res.has_section(section):
|
|
|
|
result = config_res[section][key]
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
2024-01-04 13:38:22 +01:00
|
|
|
def get_list(filename, key):
|
2023-12-28 15:50:23 +01:00
|
|
|
"""
|
|
|
|
Get settings default value.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
filename : str
|
2024-01-04 13:38:22 +01:00
|
|
|
Filename of yaml file.
|
|
|
|
key: str
|
|
|
|
Key.
|
2023-12-28 15:50:23 +01:00
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
result : list
|
|
|
|
List of pathnames or keywords.
|
|
|
|
"""
|
2024-01-06 23:03:08 +01:00
|
|
|
config_dir = get_default_config_directory()
|
2023-12-28 15:50:23 +01:00
|
|
|
if not os.path.isdir(config_dir):
|
|
|
|
config_dir = '/usr/share/slixfeed/'
|
2024-01-22 13:48:00 +01:00
|
|
|
if not os.path.isdir(config_dir):
|
|
|
|
config_dir = os.path.dirname(__file__) + "/assets"
|
2023-12-28 15:50:23 +01:00
|
|
|
config_file = os.path.join(config_dir, filename)
|
|
|
|
with open(config_file) as defaults:
|
|
|
|
# default = yaml.safe_load(defaults)
|
|
|
|
# result = default[key]
|
|
|
|
result = yaml.safe_load(defaults)
|
2024-01-04 13:38:22 +01:00
|
|
|
result = result[key]
|
2023-12-28 15:50:23 +01:00
|
|
|
return result
|
|
|
|
|
|
|
|
|
2024-01-06 23:03:08 +01:00
|
|
|
def get_default_data_directory():
|
2023-12-28 15:50:23 +01:00
|
|
|
"""
|
|
|
|
Determine the directory path where dbfile will be stored.
|
|
|
|
|
|
|
|
* If $XDG_DATA_HOME is defined, use it;
|
|
|
|
* else if $HOME exists, use it;
|
|
|
|
* else if the platform is Windows, use %APPDATA%;
|
|
|
|
* else use the current directory.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
str
|
|
|
|
Path to database file.
|
|
|
|
|
|
|
|
Note
|
|
|
|
----
|
|
|
|
This function was taken from project buku.
|
|
|
|
|
|
|
|
See https://github.com/jarun/buku
|
|
|
|
|
|
|
|
* Arun Prakash Jana (jarun)
|
|
|
|
* Dmitry Marakasov (AMDmi3)
|
|
|
|
"""
|
|
|
|
# data_home = xdg.BaseDirectory.xdg_data_home
|
|
|
|
data_home = os.environ.get('XDG_DATA_HOME')
|
|
|
|
if data_home is None:
|
|
|
|
if os.environ.get('HOME') is None:
|
|
|
|
if sys.platform == 'win32':
|
|
|
|
data_home = os.environ.get('APPDATA')
|
|
|
|
if data_home is None:
|
|
|
|
return os.path.abspath('.')
|
|
|
|
else:
|
|
|
|
return os.path.abspath('.')
|
|
|
|
else:
|
2024-01-04 02:16:24 +01:00
|
|
|
data_home = os.path.join(
|
|
|
|
os.environ.get('HOME'), '.local', 'share'
|
|
|
|
)
|
2023-12-28 15:50:23 +01:00
|
|
|
return os.path.join(data_home, 'slixfeed')
|
|
|
|
|
|
|
|
|
2024-01-06 23:03:08 +01:00
|
|
|
def get_default_config_directory():
|
2023-12-28 15:50:23 +01:00
|
|
|
"""
|
|
|
|
Determine the directory path where configuration will be stored.
|
|
|
|
|
|
|
|
* If $XDG_CONFIG_HOME is defined, use it;
|
|
|
|
* else if $HOME exists, use it;
|
|
|
|
* else if the platform is Windows, use %APPDATA%;
|
|
|
|
* else use the current directory.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
str
|
2024-01-22 16:01:52 +01:00
|
|
|
Path to configuration directory.
|
2023-12-28 15:50:23 +01:00
|
|
|
"""
|
|
|
|
# config_home = xdg.BaseDirectory.xdg_config_home
|
|
|
|
config_home = os.environ.get('XDG_CONFIG_HOME')
|
|
|
|
if config_home is None:
|
|
|
|
if os.environ.get('HOME') is None:
|
|
|
|
if sys.platform == 'win32':
|
|
|
|
config_home = os.environ.get('APPDATA')
|
|
|
|
if config_home is None:
|
|
|
|
return os.path.abspath('.')
|
|
|
|
else:
|
|
|
|
return os.path.abspath('.')
|
|
|
|
else:
|
2024-01-04 02:16:24 +01:00
|
|
|
config_home = os.path.join(
|
|
|
|
os.environ.get('HOME'), '.config'
|
|
|
|
)
|
2023-12-28 15:50:23 +01:00
|
|
|
return os.path.join(config_home, 'slixfeed')
|
|
|
|
|
|
|
|
|
2024-01-02 12:42:41 +01:00
|
|
|
def get_pathname_to_database(jid):
|
2023-12-28 15:50:23 +01:00
|
|
|
"""
|
|
|
|
Callback function to instantiate action on database.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
jid : str
|
|
|
|
Jabber ID.
|
|
|
|
callback : ?
|
|
|
|
Function name.
|
|
|
|
message : str, optional
|
|
|
|
Optional kwarg when a message is a part or
|
|
|
|
required argument. The default is None.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
object
|
|
|
|
Coroutine object.
|
|
|
|
"""
|
2024-01-06 23:03:08 +01:00
|
|
|
db_dir = get_default_data_directory()
|
2023-12-28 15:50:23 +01:00
|
|
|
if not os.path.isdir(db_dir):
|
|
|
|
os.mkdir(db_dir)
|
2024-01-06 23:03:08 +01:00
|
|
|
if not os.path.isdir(db_dir + "/sqlite"):
|
|
|
|
os.mkdir(db_dir + "/sqlite")
|
|
|
|
db_file = os.path.join(db_dir, "sqlite", r"{}.db".format(jid))
|
2023-12-28 15:50:23 +01:00
|
|
|
sqlite.create_tables(db_file)
|
2024-01-02 12:42:41 +01:00
|
|
|
return db_file
|
2023-12-28 15:50:23 +01:00
|
|
|
# await set_default_values(db_file)
|
2024-01-02 12:42:41 +01:00
|
|
|
# if message:
|
|
|
|
# return await callback(db_file, message)
|
|
|
|
# else:
|
|
|
|
# return await callback(db_file)
|
2023-12-28 15:50:23 +01:00
|
|
|
|
|
|
|
|
|
|
|
async def add_to_list(newwords, keywords):
|
|
|
|
"""
|
|
|
|
Append new keywords to list.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
newwords : str
|
|
|
|
List of new keywords.
|
|
|
|
keywords : str
|
|
|
|
List of current keywords.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
val : str
|
|
|
|
List of current keywords and new keywords.
|
|
|
|
"""
|
|
|
|
if isinstance(keywords, str) or keywords is None:
|
|
|
|
try:
|
|
|
|
keywords = keywords.split(",")
|
|
|
|
except:
|
|
|
|
keywords = []
|
|
|
|
newwords = newwords.lower().split(",")
|
|
|
|
for word in newwords:
|
|
|
|
word = word.strip()
|
|
|
|
if len(word) and word not in keywords:
|
|
|
|
keywords.extend([word])
|
|
|
|
keywords.sort()
|
|
|
|
val = ",".join(keywords)
|
|
|
|
return val
|
|
|
|
|
|
|
|
|
|
|
|
async def remove_from_list(newwords, keywords):
|
|
|
|
"""
|
|
|
|
Remove given keywords from list.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
newwords : str
|
|
|
|
List of new keywords.
|
|
|
|
keywords : str
|
|
|
|
List of current keywords.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
val : str
|
|
|
|
List of new keywords.
|
|
|
|
"""
|
|
|
|
if isinstance(keywords, str) or keywords is None:
|
|
|
|
try:
|
|
|
|
keywords = keywords.split(",")
|
|
|
|
except:
|
|
|
|
keywords = []
|
|
|
|
newwords = newwords.lower().split(",")
|
|
|
|
for word in newwords:
|
|
|
|
word = word.strip()
|
|
|
|
if len(word) and word in keywords:
|
|
|
|
keywords.remove(word)
|
|
|
|
keywords.sort()
|
|
|
|
val = ",".join(keywords)
|
|
|
|
return val
|
|
|
|
|
|
|
|
|
2024-01-04 13:38:22 +01:00
|
|
|
async def is_include_keyword(db_file, key, string):
|
2023-12-28 15:50:23 +01:00
|
|
|
"""
|
|
|
|
Check keyword match.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
db_file : str
|
|
|
|
Path to database file.
|
|
|
|
type : str
|
|
|
|
"allow" or "deny".
|
|
|
|
string : str
|
|
|
|
String.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
Matched keyword or None.
|
|
|
|
|
|
|
|
"""
|
|
|
|
# async def reject(db_file, string):
|
|
|
|
# async def is_blacklisted(db_file, string):
|
2024-01-04 13:38:22 +01:00
|
|
|
keywords = (await sqlite.get_filters_value(db_file, key)) or ''
|
|
|
|
keywords = keywords.split(",")
|
|
|
|
keywords = keywords + (get_list("lists.yaml", key))
|
|
|
|
for keyword in keywords:
|
|
|
|
if not keyword or len(keyword) < 2:
|
|
|
|
continue
|
|
|
|
if keyword in string.lower():
|
|
|
|
# print(">>> ACTIVATE", i)
|
|
|
|
# return 1
|
|
|
|
return keyword
|
2023-12-28 15:50:23 +01:00
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
This code was tested at module datahandler
|
|
|
|
|
|
|
|
reject = 0
|
|
|
|
blacklist = await get_settings_value(
|
|
|
|
db_file,
|
|
|
|
"filter-deny"
|
|
|
|
)
|
|
|
|
# print(">>> blacklist:")
|
|
|
|
# print(blacklist)
|
|
|
|
# breakpoint()
|
|
|
|
if blacklist:
|
|
|
|
blacklist = blacklist.split(",")
|
|
|
|
# print(">>> blacklist.split")
|
|
|
|
# print(blacklist)
|
|
|
|
# breakpoint()
|
|
|
|
for i in blacklist:
|
|
|
|
# print(">>> length", len(i))
|
|
|
|
# breakpoint()
|
|
|
|
# if len(i):
|
|
|
|
if not i or len(i) < 2:
|
|
|
|
print(">>> continue due to length", len(i))
|
|
|
|
# breakpoint()
|
|
|
|
continue
|
|
|
|
# print(title)
|
|
|
|
# print(">>> blacklisted word:", i)
|
|
|
|
# breakpoint()
|
|
|
|
test = (title + " " + summary + " " + link)
|
|
|
|
if i in test.lower():
|
|
|
|
reject = 1
|
|
|
|
break
|
|
|
|
|
|
|
|
if reject:
|
|
|
|
print("rejected:",title)
|
|
|
|
entry = (title, '', link, source, date, 1);
|
|
|
|
|
|
|
|
"""
|