Restructure modules and database.
Add OPML import functionality. Minor improvements.
This commit is contained in:
parent
6b07640666
commit
dbe9ec3073
16 changed files with 1352 additions and 1076 deletions
83
slixfeed.py
83
slixfeed.py
|
@ -18,18 +18,20 @@
|
|||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
from slixfeed.__main__ import Jabber
|
||||
from slixfeed.xmpp.client import Slixfeed
|
||||
from slixfeed.config import get_default_confdir
|
||||
from argparse import ArgumentParser
|
||||
import configparser
|
||||
# import filehandler
|
||||
# from filehandler import get_default_confdir
|
||||
from getpass import getpass
|
||||
import logging
|
||||
import os
|
||||
from slixfeed.__main__ import Jabber
|
||||
from slixfeed.config import get_value
|
||||
from slixfeed.xmpp.client import Slixfeed
|
||||
import sys
|
||||
|
||||
# import socks
|
||||
# import socket
|
||||
# # socks.set_default_proxy(socks.SOCKS5, values[0], values[1])
|
||||
# socks.set_default_proxy(socks.SOCKS5, 'localhost', 9050)
|
||||
# socket.socket = socks.socksocket
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# Setup the command line arguments.
|
||||
|
@ -37,65 +39,34 @@ if __name__ == '__main__':
|
|||
|
||||
# Output verbosity options.
|
||||
parser.add_argument(
|
||||
"-q",
|
||||
"--quiet",
|
||||
help="set logging to ERROR",
|
||||
action="store_const",
|
||||
dest="loglevel",
|
||||
const=logging.ERROR,
|
||||
default=logging.INFO
|
||||
)
|
||||
"-q", "--quiet", help="set logging to ERROR",
|
||||
action="store_const", dest="loglevel",
|
||||
const=logging.ERROR, default=logging.INFO)
|
||||
parser.add_argument(
|
||||
"-d",
|
||||
"--debug",
|
||||
help="set logging to DEBUG",
|
||||
action="store_const",
|
||||
dest="loglevel",
|
||||
const=logging.DEBUG,
|
||||
default=logging.INFO
|
||||
)
|
||||
"-d", "--debug", help="set logging to DEBUG",
|
||||
action="store_const", dest="loglevel",
|
||||
const=logging.DEBUG, default=logging.INFO)
|
||||
|
||||
# JID and password options.
|
||||
parser.add_argument(
|
||||
"-j",
|
||||
"--jid",
|
||||
dest="jid",
|
||||
help="Jabber ID"
|
||||
)
|
||||
"-j", "--jid", dest="jid", help="Jabber ID")
|
||||
parser.add_argument(
|
||||
"-p",
|
||||
"--password",
|
||||
dest="password",
|
||||
help="Password of JID"
|
||||
)
|
||||
"-p", "--password", dest="password", help="Password of JID")
|
||||
parser.add_argument(
|
||||
"-n",
|
||||
"--nickname",
|
||||
dest="nickname",
|
||||
help="Display name"
|
||||
)
|
||||
"-n", "--nickname", dest="nickname", help="Display name")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Setup logging.
|
||||
logging.basicConfig(
|
||||
level=args.loglevel,
|
||||
format='%(levelname)-8s %(message)s'
|
||||
)
|
||||
level=args.loglevel, format='%(levelname)-8s %(message)s')
|
||||
|
||||
# Try configuration file
|
||||
config = configparser.RawConfigParser()
|
||||
config_dir = get_default_confdir()
|
||||
if not os.path.isdir(config_dir):
|
||||
os.mkdir(config_dir)
|
||||
# TODO Copy file from /etc/slixfeed/ or /usr/share/slixfeed/
|
||||
config_file = os.path.join(config_dir, r"accounts.ini")
|
||||
config.read(config_file)
|
||||
if config.has_section("XMPP"):
|
||||
xmpp = config["XMPP"]
|
||||
nickname = xmpp["nickname"]
|
||||
username = xmpp["username"]
|
||||
password = xmpp["password"]
|
||||
values = get_value(
|
||||
"accounts", "XMPP", ["nickname", "username", "password"])
|
||||
nickname = values[0]
|
||||
username = values[1]
|
||||
password = values[2]
|
||||
|
||||
# Use arguments if were given
|
||||
if args.jid:
|
||||
|
@ -106,11 +77,11 @@ if __name__ == '__main__':
|
|||
nickname = args.nickname
|
||||
|
||||
# Prompt for credentials if none were given
|
||||
if username is None:
|
||||
if not username:
|
||||
username = input("Username: ")
|
||||
if password is None:
|
||||
if not password:
|
||||
password = getpass("Password: ")
|
||||
if nickname is None:
|
||||
if not nickname:
|
||||
nickname = input("Nickname: ")
|
||||
|
||||
Jabber(username, password, nickname)
|
||||
|
|
|
@ -39,7 +39,10 @@ TODO
|
|||
9) Readability
|
||||
See project /buriy/python-readability
|
||||
|
||||
10) Download and upload/send article (xHTML, HTMLZ, Markdown, MHTML, TXT).
|
||||
9.1) IDEA: Bot to display Title and Excerpt
|
||||
(including sending a PDF version of it) of posted link
|
||||
|
||||
10) Download and upload/send article (xHTML, HTMLZ, Markdown, MHTML, PDF, TXT).
|
||||
|
||||
11) Fetch summary from URL, instead of storing summary, or
|
||||
Store 5 upcoming summaries.
|
||||
|
|
|
@ -1,41 +1,391 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
|
||||
TODO
|
||||
|
||||
1) Call sqlite function from function statistics.
|
||||
Returning a list of values doesn't' seem to be a good practice.
|
||||
|
||||
"""
|
||||
|
||||
from asyncio.exceptions import IncompleteReadError
|
||||
from bs4 import BeautifulSoup
|
||||
from http.client import IncompleteRead
|
||||
from feedparser import parse
|
||||
import logging
|
||||
import slixfeed.config as config
|
||||
import slixfeed.crawl as crawl
|
||||
from slixfeed.datetime import now, rfc2822_to_iso8601
|
||||
from slixfeed.datetime import (
|
||||
current_date, current_time, now,
|
||||
convert_struct_time_to_iso8601,
|
||||
rfc2822_to_iso8601
|
||||
)
|
||||
import slixfeed.fetch as fetch
|
||||
import slixfeed.sqlite as sqlite
|
||||
import slixfeed.read as read
|
||||
import slixfeed.task as task
|
||||
from slixfeed.url import complete_url, join_url, trim_url
|
||||
from slixfeed.url import (
|
||||
# complete_url,
|
||||
join_url,
|
||||
remove_tracking_parameters,
|
||||
replace_hostname,
|
||||
trim_url
|
||||
)
|
||||
import slixfeed.xmpp.bookmark as bookmark
|
||||
from urllib import error
|
||||
from urllib.parse import urlsplit
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
|
||||
def log_to_markdown(timestamp, filename, jid, message):
|
||||
"""
|
||||
Log message to file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
timestamp : str
|
||||
Time stamp.
|
||||
filename : str
|
||||
Jabber ID as name of file.
|
||||
jid : str
|
||||
Jabber ID.
|
||||
message : str
|
||||
Message content.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None.
|
||||
|
||||
"""
|
||||
with open(filename + '.md', 'a') as file:
|
||||
# entry = "{} {}:\n{}\n\n".format(timestamp, jid, message)
|
||||
entry = (
|
||||
"## {}\n"
|
||||
"### {}\n\n"
|
||||
"{}\n\n").format(jid, timestamp, message)
|
||||
file.write(entry)
|
||||
|
||||
|
||||
def is_feed(feed):
|
||||
"""
|
||||
Determine whether document is feed or not.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
feed : dict
|
||||
Parsed feed.
|
||||
|
||||
Returns
|
||||
-------
|
||||
val : boolean
|
||||
True or False.
|
||||
"""
|
||||
print("Check function action.is_feed")
|
||||
breakpoint()
|
||||
value = False
|
||||
message = None
|
||||
if not feed.entries:
|
||||
if "version" in feed.keys():
|
||||
feed["version"]
|
||||
if feed.version:
|
||||
value = True
|
||||
# message = (
|
||||
# "Empty feed for {}"
|
||||
# ).format(url)
|
||||
elif "title" in feed["feed"].keys():
|
||||
value = True
|
||||
# message = (
|
||||
# "Empty feed for {}"
|
||||
# ).format(url)
|
||||
else:
|
||||
value = False
|
||||
# message = (
|
||||
# "No entries nor title for {}"
|
||||
# ).format(url)
|
||||
elif feed.bozo:
|
||||
value = False
|
||||
# message = (
|
||||
# "Bozo detected for {}"
|
||||
# ).format(url)
|
||||
else:
|
||||
value = True
|
||||
# message = (
|
||||
# "Good feed for {}"
|
||||
# ).format(url)
|
||||
print(message)
|
||||
return value
|
||||
|
||||
|
||||
def list_unread_entries(result, feed_title):
|
||||
# TODO Add filtering
|
||||
# TODO Do this when entry is added to list and mark it as read
|
||||
# DONE!
|
||||
# results = []
|
||||
# if get_settings_value(db_file, "filter-deny"):
|
||||
# while len(results) < num:
|
||||
# result = cur.execute(sql).fetchone()
|
||||
# blacklist = await get_settings_value(db_file, "filter-deny").split(",")
|
||||
# for i in blacklist:
|
||||
# if i in result[1]:
|
||||
# continue
|
||||
# print("rejected:", result[1])
|
||||
# print("accepted:", result[1])
|
||||
# results.extend([result])
|
||||
|
||||
# news_list = "You've got {} news items:\n".format(num)
|
||||
# NOTE Why doesn't this work without list?
|
||||
# i.e. for result in results
|
||||
# for result in results.fetchall():
|
||||
ix = result[0]
|
||||
title = result[1]
|
||||
# # TODO Retrieve summary from feed
|
||||
# # See fetch.view_entry
|
||||
# summary = result[2]
|
||||
# # Remove HTML tags
|
||||
# try:
|
||||
# summary = BeautifulSoup(summary, "lxml").text
|
||||
# except:
|
||||
# print(result[2])
|
||||
# breakpoint()
|
||||
# # TODO Limit text length
|
||||
# summary = summary.replace("\n\n\n", "\n\n")
|
||||
# length = await get_settings_value(db_file, "length")
|
||||
# summary = summary[:length] + " […]"
|
||||
# summary = summary.strip().split('\n')
|
||||
# summary = ["> " + line for line in summary]
|
||||
# summary = "\n".join(summary)
|
||||
link = result[2]
|
||||
link = remove_tracking_parameters(link)
|
||||
link = (replace_hostname(link, "link")) or link
|
||||
news_item = (
|
||||
"\n{}\n{}\n{}\n"
|
||||
).format(str(title), str(link), str(feed_title))
|
||||
return news_item
|
||||
|
||||
|
||||
def list_search_results(query, results):
|
||||
results_list = (
|
||||
"Search results for '{}':\n\n```"
|
||||
).format(query)
|
||||
for result in results:
|
||||
results_list += (
|
||||
"\n{}\n{}\n"
|
||||
).format(str(result[0]), str(result[1]))
|
||||
if len(results):
|
||||
return results_list + "```\nTotal of {} results".format(len(results))
|
||||
else:
|
||||
return "No results were found for: {}".format(query)
|
||||
|
||||
|
||||
def list_feeds_by_query(query, results):
|
||||
results_list = (
|
||||
"Feeds containing '{}':\n\n```"
|
||||
).format(query)
|
||||
for result in results:
|
||||
results_list += (
|
||||
"\nName : {} [{}]"
|
||||
"\nURL : {}"
|
||||
"\n"
|
||||
).format(
|
||||
str(result[0]), str(result[1]), str(result[2]))
|
||||
if len(results):
|
||||
return results_list + "\n```\nTotal of {} feeds".format(len(results))
|
||||
else:
|
||||
return "No feeds were found for: {}".format(query)
|
||||
|
||||
|
||||
def list_statistics(values):
|
||||
"""
|
||||
Return table statistics.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Statistics as message.
|
||||
"""
|
||||
msg = (
|
||||
"```"
|
||||
"\nSTATISTICS\n"
|
||||
"News items : {}/{}\n"
|
||||
"News sources : {}/{}\n"
|
||||
"\nOPTIONS\n"
|
||||
"Items to archive : {}\n"
|
||||
"Update interval : {}\n"
|
||||
"Items per update : {}\n"
|
||||
"Operation status : {}\n"
|
||||
"```"
|
||||
).format(values[0], values[1], values[2], values[3],
|
||||
values[4], values[5], values[6], values[7])
|
||||
return msg
|
||||
|
||||
|
||||
# FIXME Replace counter by len
|
||||
def list_last_entries(results, num):
|
||||
titles_list = "Recent {} titles:\n\n```".format(num)
|
||||
for result in results:
|
||||
titles_list += (
|
||||
"\n{}\n{}\n"
|
||||
).format(
|
||||
str(result[0]), str(result[1]))
|
||||
if len(results):
|
||||
titles_list += "```\n"
|
||||
return titles_list
|
||||
else:
|
||||
return "There are no news at the moment."
|
||||
|
||||
|
||||
def list_feeds(results):
|
||||
feeds_list = "\nList of subscriptions:\n\n```\n"
|
||||
for result in results:
|
||||
feeds_list += (
|
||||
"Name : {}\n"
|
||||
"URL : {}\n"
|
||||
# "Updated : {}\n"
|
||||
# "Status : {}\n"
|
||||
"ID : {}\n"
|
||||
"\n"
|
||||
).format(
|
||||
str(result[0]), str(result[1]), str(result[2]))
|
||||
if len(results):
|
||||
return feeds_list + (
|
||||
"```\nTotal of {} subscriptions.\n"
|
||||
).format(len(results))
|
||||
else:
|
||||
msg = (
|
||||
"List of subscriptions is empty.\n"
|
||||
"To add feed, send a URL\n"
|
||||
"Try these:\n"
|
||||
# TODO Pick random from featured/recommended
|
||||
"https://reclaimthenet.org/feed/"
|
||||
)
|
||||
return msg
|
||||
|
||||
|
||||
async def list_bookmarks(self):
|
||||
conferences = await bookmark.get(self)
|
||||
groupchat_list = "\nList of groupchats:\n\n```\n"
|
||||
counter = 0
|
||||
for conference in conferences:
|
||||
counter += 1
|
||||
groupchat_list += (
|
||||
"{}\n"
|
||||
"\n"
|
||||
).format(
|
||||
conference["jid"]
|
||||
)
|
||||
groupchat_list += (
|
||||
"```\nTotal of {} groupchats.\n"
|
||||
).format(counter)
|
||||
return groupchat_list
|
||||
|
||||
|
||||
def export_to_markdown(jid, filename, results):
|
||||
with open(filename, 'w') as file:
|
||||
file.write(
|
||||
'# Subscriptions for {}\n'.format(jid))
|
||||
file.write(
|
||||
'## Set of feeds exported with Slixfeed\n')
|
||||
for result in results:
|
||||
file.write(
|
||||
'- [{}]({})\n'.format(result[0], result[1]))
|
||||
file.write(
|
||||
'\n\n* * *\n\nThis list was saved on {} from xmpp:{} using '
|
||||
'[Slixfeed](https://gitgud.io/sjehuda/slixfeed)\n'.format(
|
||||
current_date(), jid))
|
||||
|
||||
|
||||
def export_to_opml(jid, filename, results):
|
||||
root = ET.Element("opml")
|
||||
root.set("version", "1.0")
|
||||
head = ET.SubElement(root, "head")
|
||||
ET.SubElement(head, "title").text = "Subscriptions for {}".format(jid)
|
||||
ET.SubElement(head, "description").text = (
|
||||
"Set of feeds exported with Slixfeed")
|
||||
ET.SubElement(head, "generator").text = "Slixfeed"
|
||||
ET.SubElement(head, "urlPublic").text = (
|
||||
"https://gitgud.io/sjehuda/slixfeed")
|
||||
time_stamp = current_time()
|
||||
ET.SubElement(head, "dateCreated").text = time_stamp
|
||||
ET.SubElement(head, "dateModified").text = time_stamp
|
||||
body = ET.SubElement(root, "body")
|
||||
for result in results:
|
||||
outline = ET.SubElement(body, "outline")
|
||||
outline.set("text", result[0])
|
||||
outline.set("xmlUrl", result[1])
|
||||
# outline.set("type", result[2])
|
||||
tree = ET.ElementTree(root)
|
||||
tree.write(filename)
|
||||
|
||||
|
||||
async def import_opml(db_file, url):
|
||||
result = await fetch.download_feed(url)
|
||||
document = result[0]
|
||||
if document:
|
||||
root = ET.fromstring(document)
|
||||
before = await sqlite.get_number_of_items(
|
||||
db_file, 'feeds')
|
||||
feeds = []
|
||||
for child in root.findall(".//outline"):
|
||||
url = child.get("xmlUrl")
|
||||
title = child.get("text")
|
||||
# feed = (url, title)
|
||||
# feeds.extend([feed])
|
||||
feeds.extend([(url, title)])
|
||||
await sqlite.import_feeds(
|
||||
db_file, feeds)
|
||||
after = await sqlite.get_number_of_items(
|
||||
db_file, 'feeds')
|
||||
difference = int(after) - int(before)
|
||||
return difference
|
||||
|
||||
|
||||
async def add_feed(db_file, url):
|
||||
while True:
|
||||
exist = await sqlite.is_feed_exist(db_file, url)
|
||||
exist = await sqlite.get_feed_id_and_name(db_file, url)
|
||||
if not exist:
|
||||
result = await fetch.download_feed(url)
|
||||
document = result[0]
|
||||
status = result[1]
|
||||
status_code = result[1]
|
||||
if document:
|
||||
feed = parse(document)
|
||||
# if read.is_feed(url, feed):
|
||||
if read.is_feed(feed):
|
||||
try:
|
||||
# if is_feed(url, feed):
|
||||
if is_feed(feed):
|
||||
if "title" in feed["feed"].keys():
|
||||
title = feed["feed"]["title"]
|
||||
except:
|
||||
else:
|
||||
title = urlsplit(url).netloc
|
||||
if "language" in feed["feed"].keys():
|
||||
language = feed["feed"]["language"]
|
||||
else:
|
||||
language = ''
|
||||
if "encoding" in feed.keys():
|
||||
encoding = feed["encoding"]
|
||||
else:
|
||||
encoding = ''
|
||||
if "updated_parsed" in feed["feed"].keys():
|
||||
updated = feed["feed"]["updated_parsed"]
|
||||
updated = convert_struct_time_to_iso8601(updated)
|
||||
else:
|
||||
updated = ''
|
||||
version = feed["version"]
|
||||
entries = len(feed["entries"])
|
||||
await sqlite.insert_feed(
|
||||
db_file, url, title, status)
|
||||
await organize_items(
|
||||
db_file, [url])
|
||||
db_file, url,
|
||||
title=title,
|
||||
entries=entries,
|
||||
version=version,
|
||||
encoding=encoding,
|
||||
language=language,
|
||||
status_code=status_code,
|
||||
updated=updated
|
||||
)
|
||||
await scan(
|
||||
db_file, url)
|
||||
old = (
|
||||
await sqlite.get_settings_value(
|
||||
db_file, "old")
|
||||
|
@ -44,7 +394,7 @@ async def add_feed(db_file, url):
|
|||
"settings", "Settings", "old")
|
||||
)
|
||||
if not old:
|
||||
await sqlite.mark_source_as_read(
|
||||
await sqlite.mark_feed_as_read(
|
||||
db_file, url)
|
||||
response = (
|
||||
"> {}\nNews source {} has been "
|
||||
|
@ -66,7 +416,7 @@ async def add_feed(db_file, url):
|
|||
else:
|
||||
response = (
|
||||
"> {}\nFailed to load URL. Reason: {}"
|
||||
).format(url, status)
|
||||
).format(url, status_code)
|
||||
break
|
||||
else:
|
||||
ix = exist[0]
|
||||
|
@ -87,11 +437,11 @@ async def view_feed(url):
|
|||
status = result[1]
|
||||
if document:
|
||||
feed = parse(document)
|
||||
# if read.is_feed(url, feed):
|
||||
if read.is_feed(feed):
|
||||
try:
|
||||
# if is_feed(url, feed):
|
||||
if is_feed(feed):
|
||||
if "title" in feed["feed"].keys():
|
||||
title = feed["feed"]["title"]
|
||||
except:
|
||||
else:
|
||||
title = urlsplit(url).netloc
|
||||
entries = feed.entries
|
||||
response = "Preview of {}:\n\n```\n".format(title)
|
||||
|
@ -156,11 +506,11 @@ async def view_entry(url, num):
|
|||
status = result[1]
|
||||
if document:
|
||||
feed = parse(document)
|
||||
# if read.is_feed(url, feed):
|
||||
if read.is_feed(feed):
|
||||
try:
|
||||
# if is_feed(url, feed):
|
||||
if is_feed(feed):
|
||||
if "title" in feed["feed"].keys():
|
||||
title = feed["feed"]["title"]
|
||||
except:
|
||||
else:
|
||||
title = urlsplit(url).netloc
|
||||
entries = feed.entries
|
||||
num = int(num) - 1
|
||||
|
@ -222,6 +572,113 @@ async def view_entry(url, num):
|
|||
return response
|
||||
|
||||
|
||||
async def scan(db_file, url):
|
||||
"""
|
||||
Check feeds for new entries.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
url : str, optional
|
||||
URL. The default is None.
|
||||
"""
|
||||
if isinstance(url, tuple): url = url[0]
|
||||
try:
|
||||
result = await fetch.download_feed(url)
|
||||
except:
|
||||
breakpoint()
|
||||
# if not result:
|
||||
# return
|
||||
try:
|
||||
document = result[0]
|
||||
status = result[1]
|
||||
except:
|
||||
return
|
||||
if document and status == 200:
|
||||
feed = parse(document)
|
||||
entries = feed.entries
|
||||
# length = len(entries)
|
||||
await remove_nonexistent_entries(
|
||||
db_file, feed, url)
|
||||
try:
|
||||
if feed.bozo:
|
||||
# bozo = (
|
||||
# "WARNING: Bozo detected for feed: {}\n"
|
||||
# "For more information, visit "
|
||||
# "https://pythonhosted.org/feedparser/bozo.html"
|
||||
# ).format(url)
|
||||
# print(bozo)
|
||||
valid = 0
|
||||
else:
|
||||
valid = 1
|
||||
await sqlite.update_feed_validity(
|
||||
db_file, url, valid)
|
||||
if "updated_parsed" in feed["feed"].keys():
|
||||
updated = feed["feed"]["updated_parsed"]
|
||||
updated = convert_struct_time_to_iso8601(updated)
|
||||
else:
|
||||
updated = ''
|
||||
await sqlite.update_feed_properties(
|
||||
db_file, url, len(feed["entries"]), updated)
|
||||
# await update_feed_status
|
||||
except (
|
||||
IncompleteReadError,
|
||||
IncompleteRead,
|
||||
error.URLError
|
||||
) as e:
|
||||
print("Error:", e)
|
||||
return
|
||||
# new_entry = 0
|
||||
for entry in entries:
|
||||
if entry.has_key("published"):
|
||||
date = entry.published
|
||||
date = rfc2822_to_iso8601(date)
|
||||
elif entry.has_key("updated"):
|
||||
date = entry.updated
|
||||
date = rfc2822_to_iso8601(date)
|
||||
else:
|
||||
date = now()
|
||||
if entry.has_key("link"):
|
||||
# link = complete_url(source, entry.link)
|
||||
link = join_url(url, entry.link)
|
||||
link = trim_url(link)
|
||||
else:
|
||||
link = url
|
||||
# title = feed["feed"]["title"]
|
||||
# title = "{}: *{}*".format(feed["feed"]["title"], entry.title)
|
||||
title = entry.title if entry.has_key("title") else date
|
||||
entry_id = entry.id if entry.has_key("id") else link
|
||||
summary = entry.summary if entry.has_key("summary") else ''
|
||||
exist = await sqlite.check_entry_exist(
|
||||
db_file, url, entry_id=entry_id,
|
||||
title=title, link=link, date=date)
|
||||
if not exist:
|
||||
if entry.has_key("summary"):
|
||||
summary = entry.summary
|
||||
read_status = 0
|
||||
pathname = urlsplit(link).path
|
||||
string = ("{} {} {}"
|
||||
).format(
|
||||
title, summary, pathname
|
||||
)
|
||||
allow_list = await config.is_include_keyword(
|
||||
db_file, "filter-allow", string)
|
||||
if not allow_list:
|
||||
reject_list = await config.is_include_keyword(
|
||||
db_file, "filter-deny", string)
|
||||
if reject_list:
|
||||
read_status = 1
|
||||
if isinstance(date, int):
|
||||
logging.error("Variable 'date' is int:", date)
|
||||
await sqlite.add_entry(
|
||||
db_file, title, link, entry_id,
|
||||
url, date, read_status)
|
||||
await sqlite.set_date(db_file, url)
|
||||
|
||||
|
||||
|
||||
|
||||
# NOTE Why (if res[0]) and (if res[1] == 200)?
|
||||
async def organize_items(db_file, urls):
|
||||
"""
|
||||
|
@ -236,16 +693,17 @@ async def organize_items(db_file, urls):
|
|||
"""
|
||||
for url in urls:
|
||||
# print(os.path.basename(db_file), url[0])
|
||||
source = url[0]
|
||||
res = await fetch.download_feed(source)
|
||||
url = url[0]
|
||||
res = await fetch.download_feed(url)
|
||||
# TypeError: 'NoneType' object is not subscriptable
|
||||
if res is None:
|
||||
# Skip to next feed
|
||||
# urls.next()
|
||||
# next(urls)
|
||||
continue
|
||||
await sqlite.update_source_status(
|
||||
db_file, res[1], source)
|
||||
status = res[1]
|
||||
await sqlite.update_feed_status(
|
||||
db_file, url, status)
|
||||
if res[0]:
|
||||
try:
|
||||
feed = parse(res[0])
|
||||
|
@ -254,28 +712,36 @@ async def organize_items(db_file, urls):
|
|||
# "WARNING: Bozo detected for feed: {}\n"
|
||||
# "For more information, visit "
|
||||
# "https://pythonhosted.org/feedparser/bozo.html"
|
||||
# ).format(source)
|
||||
# ).format(url)
|
||||
# print(bozo)
|
||||
valid = 0
|
||||
else:
|
||||
valid = 1
|
||||
await sqlite.update_source_validity(
|
||||
db_file, source, valid)
|
||||
await sqlite.update_feed_validity(
|
||||
db_file, url, valid)
|
||||
if "updated_parsed" in feed["feed"].keys():
|
||||
updated = feed["feed"]["updated_parsed"]
|
||||
updated = convert_struct_time_to_iso8601(updated)
|
||||
else:
|
||||
updated = ''
|
||||
entries = len(feed["entries"])
|
||||
await sqlite.update_feed_properties(
|
||||
db_file, url, entries, updated)
|
||||
except (
|
||||
IncompleteReadError,
|
||||
IncompleteRead,
|
||||
error.URLError
|
||||
) as e:
|
||||
# print(e)
|
||||
print(e)
|
||||
# TODO Print error to log
|
||||
None
|
||||
# None
|
||||
# NOTE I don't think there should be "return"
|
||||
# because then we might stop scanning next URLs
|
||||
# return
|
||||
# TODO Place these couple of lines back down
|
||||
# NOTE Need to correct the SQL statement to do so
|
||||
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
|
||||
if res[1] == 200:
|
||||
if status == 200:
|
||||
# NOT SURE WHETHER I MEANT THE LINES ABOVE OR BELOW
|
||||
# TODO Place these couple of lines back down
|
||||
# NOTE Need to correct the SQL statement to do so
|
||||
|
@ -283,7 +749,7 @@ async def organize_items(db_file, urls):
|
|||
# length = len(entries)
|
||||
# await remove_entry(db_file, source, length)
|
||||
await remove_nonexistent_entries(
|
||||
db_file, feed, source)
|
||||
db_file, feed, url)
|
||||
# new_entry = 0
|
||||
for entry in entries:
|
||||
# TODO Pass date too for comparion check
|
||||
|
@ -308,19 +774,18 @@ async def organize_items(db_file, urls):
|
|||
# title = feed["feed"]["title"]
|
||||
if entry.has_key("link"):
|
||||
# link = complete_url(source, entry.link)
|
||||
link = join_url(source, entry.link)
|
||||
link = join_url(url, entry.link)
|
||||
link = trim_url(link)
|
||||
else:
|
||||
link = source
|
||||
link = url
|
||||
if entry.has_key("id"):
|
||||
eid = entry.id
|
||||
else:
|
||||
eid = link
|
||||
exist = await sqlite.check_entry_exist(
|
||||
db_file, source, eid=eid,
|
||||
db_file, url, eid=eid,
|
||||
title=title, link=link, date=date)
|
||||
if not exist:
|
||||
print(url)
|
||||
# new_entry = new_entry + 1
|
||||
# TODO Enhance summary
|
||||
if entry.has_key("summary"):
|
||||
|
@ -356,7 +821,7 @@ async def organize_items(db_file, urls):
|
|||
# summary = ""
|
||||
read_status = 1
|
||||
entry = (
|
||||
title, link, eid, source, date, read_status)
|
||||
title, link, eid, url, date, read_status)
|
||||
if isinstance(date, int):
|
||||
print("PROBLEM: date is int")
|
||||
print(date)
|
||||
|
@ -364,13 +829,13 @@ async def organize_items(db_file, urls):
|
|||
# print(source)
|
||||
# print(date)
|
||||
await sqlite.add_entry_and_set_date(
|
||||
db_file, source, entry)
|
||||
db_file, url, entry)
|
||||
# print(current_time(), entry, title)
|
||||
# else:
|
||||
# print(current_time(), exist, title)
|
||||
|
||||
|
||||
async def remove_nonexistent_entries(db_file, feed, source):
|
||||
async def remove_nonexistent_entries(db_file, feed, url):
|
||||
"""
|
||||
Remove entries that don't exist in a given parsed feed.
|
||||
Check the entries returned from feed and delete read non
|
||||
|
@ -382,10 +847,10 @@ async def remove_nonexistent_entries(db_file, feed, source):
|
|||
Path to database file.
|
||||
feed : list
|
||||
Parsed feed document.
|
||||
source : str
|
||||
url : str
|
||||
Feed URL. URL of associated feed.
|
||||
"""
|
||||
items = await sqlite.get_entries_of_source(db_file, feed, source)
|
||||
items = await sqlite.get_entries_of_feed(db_file, feed, url)
|
||||
entries = feed.entries
|
||||
# breakpoint()
|
||||
for item in items:
|
||||
|
@ -409,9 +874,9 @@ async def remove_nonexistent_entries(db_file, feed, source):
|
|||
else:
|
||||
title = feed["feed"]["title"]
|
||||
if entry.has_key("link"):
|
||||
link = join_url(source, entry.link)
|
||||
link = join_url(url, entry.link)
|
||||
else:
|
||||
link = source
|
||||
link = url
|
||||
if entry.has_key("published") and item[4]:
|
||||
# print("compare11:", title, link, time)
|
||||
# print("compare22:", item[1], item[2], item[4])
|
||||
|
@ -459,11 +924,11 @@ async def remove_nonexistent_entries(db_file, feed, source):
|
|||
# print("link :", item[2])
|
||||
# print("id :", item[3])
|
||||
if item[5] == 1:
|
||||
sqlite.delete_entry_by_id(db_file, ix)
|
||||
await sqlite.delete_entry_by_id(db_file, ix)
|
||||
# print(">>> DELETING:", item[1])
|
||||
else:
|
||||
# print(">>> ARCHIVING:", item[1])
|
||||
sqlite.archive_entry(db_file, ix)
|
||||
await sqlite.archive_entry(db_file, ix)
|
||||
limit = (
|
||||
await sqlite.get_settings_value(db_file, "archive")
|
||||
) or (
|
||||
|
|
|
@ -25,6 +25,7 @@ import sys
|
|||
import yaml
|
||||
import logging
|
||||
|
||||
|
||||
def get_value(filename, section, keys):
|
||||
"""
|
||||
Get setting value.
|
||||
|
@ -45,7 +46,7 @@ def get_value(filename, section, keys):
|
|||
"""
|
||||
result = None
|
||||
config_res = configparser.RawConfigParser()
|
||||
config_dir = get_default_confdir()
|
||||
config_dir = get_default_config_directory()
|
||||
# if not os.path.isdir(config_dir):
|
||||
# config_dir = '/usr/share/slixfeed/'
|
||||
if not os.path.isdir(config_dir):
|
||||
|
@ -105,7 +106,7 @@ def get_value_default(filename, section, key):
|
|||
Value.
|
||||
"""
|
||||
config_res = configparser.RawConfigParser()
|
||||
config_dir = get_default_confdir()
|
||||
config_dir = get_default_config_directory()
|
||||
if not os.path.isdir(config_dir):
|
||||
config_dir = '/usr/share/slixfeed/'
|
||||
config_file = os.path.join(config_dir, filename + ".ini")
|
||||
|
@ -131,7 +132,7 @@ def get_list(filename, key):
|
|||
result : list
|
||||
List of pathnames or keywords.
|
||||
"""
|
||||
config_dir = get_default_confdir()
|
||||
config_dir = get_default_config_directory()
|
||||
if not os.path.isdir(config_dir):
|
||||
config_dir = '/usr/share/slixfeed/'
|
||||
config_file = os.path.join(config_dir, filename)
|
||||
|
@ -143,7 +144,7 @@ def get_list(filename, key):
|
|||
return result
|
||||
|
||||
|
||||
def get_default_dbdir():
|
||||
def get_default_data_directory():
|
||||
"""
|
||||
Determine the directory path where dbfile will be stored.
|
||||
|
||||
|
@ -183,7 +184,7 @@ def get_default_dbdir():
|
|||
return os.path.join(data_home, 'slixfeed')
|
||||
|
||||
|
||||
def get_default_confdir():
|
||||
def get_default_config_directory():
|
||||
"""
|
||||
Determine the directory path where configuration will be stored.
|
||||
|
||||
|
@ -233,10 +234,12 @@ def get_pathname_to_database(jid):
|
|||
object
|
||||
Coroutine object.
|
||||
"""
|
||||
db_dir = get_default_dbdir()
|
||||
db_dir = get_default_data_directory()
|
||||
if not os.path.isdir(db_dir):
|
||||
os.mkdir(db_dir)
|
||||
db_file = os.path.join(db_dir, r"{}.db".format(jid))
|
||||
if not os.path.isdir(db_dir + "/sqlite"):
|
||||
os.mkdir(db_dir + "/sqlite")
|
||||
db_file = os.path.join(db_dir, "sqlite", r"{}.db".format(jid))
|
||||
sqlite.create_tables(db_file)
|
||||
return db_file
|
||||
# await set_default_values(db_file)
|
||||
|
|
|
@ -6,10 +6,14 @@
|
|||
TODO
|
||||
|
||||
1.1) Do not compose messages.
|
||||
Only return results.
|
||||
See: # TODO return feeds
|
||||
|
||||
1.2) Return URLs, nothing else other (e.g. processed messages).
|
||||
|
||||
1.3) Correction of URLs is aceptable.
|
||||
1.3) NOTE: Correction of URLs is aceptable.
|
||||
|
||||
2) Consider merging with module fetch.py
|
||||
|
||||
"""
|
||||
|
||||
|
@ -161,6 +165,7 @@ async def feed_mode_request(url, tree):
|
|||
# print(feeds)
|
||||
except:
|
||||
continue
|
||||
# TODO return feeds
|
||||
if len(feeds) > 1:
|
||||
counter = 0
|
||||
msg = (
|
||||
|
@ -275,6 +280,7 @@ async def feed_mode_scan(url, tree):
|
|||
# print(feeds)
|
||||
except:
|
||||
continue
|
||||
# TODO return feeds
|
||||
if len(feeds) > 1:
|
||||
# print(feeds)
|
||||
# breakpoint()
|
||||
|
@ -352,6 +358,7 @@ async def feed_mode_auto_discovery(url, tree):
|
|||
# xpath_query = """//link[(@rel="alternate") and (@type="application/atom+xml" or @type="application/rdf+xml" or @type="application/rss+xml")]/@href"""
|
||||
# xpath_query = "//link[@rel='alternate' and @type='application/atom+xml' or @rel='alternate' and @type='application/rss+xml' or @rel='alternate' and @type='application/rdf+xml']/@href"
|
||||
feeds = tree.xpath(xpath_query)
|
||||
# TODO return feeds
|
||||
if len(feeds) > 1:
|
||||
msg = (
|
||||
"RSS Auto-Discovery has found {} feeds:\n\n```\n"
|
||||
|
|
|
@ -9,6 +9,7 @@ from datetime import datetime
|
|||
from dateutil.parser import parse
|
||||
from email.utils import parsedate, parsedate_to_datetime
|
||||
|
||||
|
||||
def now():
|
||||
"""
|
||||
ISO 8601 Timestamp.
|
||||
|
@ -22,6 +23,12 @@ def now():
|
|||
return date
|
||||
|
||||
|
||||
def convert_struct_time_to_iso8601(struct_time):
|
||||
date = datetime(*struct_time[:6])
|
||||
date = date.isoformat()
|
||||
return date
|
||||
|
||||
|
||||
def current_date():
|
||||
"""
|
||||
Print MM DD, YYYY (Weekday Time) timestamp.
|
||||
|
|
|
@ -1,18 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from slixfeed.datetime import current_date
|
||||
|
||||
def markdown(jid, filename, results):
|
||||
with open(filename, 'w') as file:
|
||||
file.write(
|
||||
'# Subscriptions for {}\n'.format(jid))
|
||||
file.write(
|
||||
'## Set of feeds exported with Slixfeed\n')
|
||||
for result in results:
|
||||
file.write(
|
||||
'- [{}]({})\n'.format(result[0], result[1]))
|
||||
file.write(
|
||||
'\n\n* * *\n\nThis list was saved on {} from xmpp:{} using '
|
||||
'[Slixfeed](https://gitgud.io/sjehuda/slixfeed)\n'.format(
|
||||
current_date(), jid))
|
|
@ -25,19 +25,12 @@ TODO
|
|||
|
||||
from aiohttp import ClientError, ClientSession, ClientTimeout
|
||||
from asyncio import TimeoutError
|
||||
from asyncio.exceptions import IncompleteReadError
|
||||
from bs4 import BeautifulSoup
|
||||
from email.utils import parseaddr
|
||||
from feedparser import parse
|
||||
from http.client import IncompleteRead
|
||||
from lxml import html
|
||||
# from asyncio.exceptions import IncompleteReadError
|
||||
# from bs4 import BeautifulSoup
|
||||
# from http.client import IncompleteRead
|
||||
# from lxml import html
|
||||
import slixfeed.config as config
|
||||
from slixfeed.datetime import now, rfc2822_to_iso8601
|
||||
import slixfeed.sqlite as sqlite
|
||||
from slixfeed.url import complete_url, join_url, trim_url
|
||||
from urllib import error
|
||||
# from xml.etree.ElementTree import ElementTree, ParseError
|
||||
from urllib.parse import urlsplit, urlunsplit
|
||||
|
||||
|
||||
# async def dat():
|
||||
|
|
|
@ -1,152 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
|
||||
TODO
|
||||
|
||||
1) Website-specific filter (i.e. audiobookbay).
|
||||
|
||||
2) Exclude websites from filtering (e.g. metapedia).
|
||||
|
||||
3) Filter phrases:
|
||||
Refer to sqlitehandler.search_entries for implementation.
|
||||
It is expected to be more complex than function search_entries.
|
||||
|
||||
"""
|
||||
|
||||
import slixfeed.config as config
|
||||
import slixfeed.sqlite as sqlite
|
||||
|
||||
|
||||
async def add_to_list(newwords, keywords):
|
||||
"""
|
||||
Append new keywords to list.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
newwords : str
|
||||
List of new keywords.
|
||||
keywords : str
|
||||
List of current keywords.
|
||||
|
||||
Returns
|
||||
-------
|
||||
val : str
|
||||
List of current keywords and new keywords.
|
||||
"""
|
||||
if isinstance(keywords, str) or keywords is None:
|
||||
try:
|
||||
keywords = keywords.split(",")
|
||||
except:
|
||||
keywords = []
|
||||
newwords = newwords.lower().split(",")
|
||||
for word in newwords:
|
||||
word = word.strip()
|
||||
if len(word) and word not in keywords:
|
||||
keywords.extend([word])
|
||||
keywords.sort()
|
||||
val = ",".join(keywords)
|
||||
return val
|
||||
|
||||
|
||||
async def remove_from_list(newwords, keywords):
|
||||
"""
|
||||
Remove given keywords from list.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
newwords : str
|
||||
List of new keywords.
|
||||
keywords : str
|
||||
List of current keywords.
|
||||
|
||||
Returns
|
||||
-------
|
||||
val : str
|
||||
List of new keywords.
|
||||
"""
|
||||
if isinstance(keywords, str) or keywords is None:
|
||||
try:
|
||||
keywords = keywords.split(",")
|
||||
except:
|
||||
keywords = []
|
||||
newwords = newwords.lower().split(",")
|
||||
for word in newwords:
|
||||
word = word.strip()
|
||||
if len(word) and word in keywords:
|
||||
keywords.remove(word)
|
||||
keywords.sort()
|
||||
val = ",".join(keywords)
|
||||
return val
|
||||
|
||||
|
||||
async def is_include_keyword(db_file, key, string):
|
||||
"""
|
||||
Check keyword match.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
type : str
|
||||
"allow" or "deny".
|
||||
string : str
|
||||
String.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Matched keyword or None.
|
||||
|
||||
"""
|
||||
# async def reject(db_file, string):
|
||||
# async def is_blacklisted(db_file, string):
|
||||
keywords = (await sqlite.get_filters_value(db_file, key)) or ''
|
||||
keywords = keywords.split(",")
|
||||
keywords = keywords + (config.get_list("lists.yaml", key))
|
||||
for keyword in keywords:
|
||||
if not keyword or len(keyword) < 2:
|
||||
continue
|
||||
if keyword in string.lower():
|
||||
# print(">>> ACTIVATE", i)
|
||||
# return 1
|
||||
return keyword
|
||||
|
||||
"""
|
||||
|
||||
This code was tested at module datahandler
|
||||
|
||||
reject = 0
|
||||
blacklist = await get_settings_value(
|
||||
db_file,
|
||||
"filter-deny"
|
||||
)
|
||||
# print(">>> blacklist:")
|
||||
# print(blacklist)
|
||||
# breakpoint()
|
||||
if blacklist:
|
||||
blacklist = blacklist.split(",")
|
||||
# print(">>> blacklist.split")
|
||||
# print(blacklist)
|
||||
# breakpoint()
|
||||
for i in blacklist:
|
||||
# print(">>> length", len(i))
|
||||
# breakpoint()
|
||||
# if len(i):
|
||||
if not i or len(i) < 2:
|
||||
print(">>> continue due to length", len(i))
|
||||
# breakpoint()
|
||||
continue
|
||||
# print(title)
|
||||
# print(">>> blacklisted word:", i)
|
||||
# breakpoint()
|
||||
test = (title + " " + summary + " " + link)
|
||||
if i in test.lower():
|
||||
reject = 1
|
||||
break
|
||||
|
||||
if reject:
|
||||
print("rejected:",title)
|
||||
entry = (title, '', link, source, date, 1);
|
||||
|
||||
"""
|
|
@ -1,33 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
|
||||
def markdown(timestamp, filename, jid, message):
|
||||
"""
|
||||
Log message to file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
timestamp : str
|
||||
Time stamp.
|
||||
filename : str
|
||||
Jabber ID as name of file.
|
||||
jid : str
|
||||
Jabber ID.
|
||||
message : str
|
||||
Message content.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None.
|
||||
|
||||
"""
|
||||
with open(filename + '.md', 'a') as file:
|
||||
# entry = "{} {}:\n{}\n\n".format(timestamp, jid, message)
|
||||
entry = (
|
||||
"## {}\n"
|
||||
"### {}\n\n"
|
||||
"{}\n\n").format(jid, timestamp, message)
|
||||
file.write(entry)
|
||||
|
||||
|
|
@ -1,75 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
|
||||
{
|
||||
'bozo': False,
|
||||
'bozo_exception': None,
|
||||
'feeds': [
|
||||
{
|
||||
'url': 'https://kurtmckee.org/tag/listparser/feed',
|
||||
'title': 'listparser blog',
|
||||
'categories': [],
|
||||
'tags': []
|
||||
},
|
||||
{
|
||||
'url': 'https://github.com/kurtmckee/listparser/commits/develop.atom',
|
||||
'title': 'listparser changelog',
|
||||
'categories': [],
|
||||
'tags': []
|
||||
}
|
||||
],
|
||||
'lists': [],
|
||||
'opportunities': [],
|
||||
'meta': {
|
||||
'title': 'listparser project feeds',
|
||||
'author': {
|
||||
'name': 'Kurt McKee',
|
||||
'email': 'contactme@kurtmckee.org',
|
||||
'url': 'https://kurtmckee.org/'
|
||||
}
|
||||
},
|
||||
'version': 'opml2'
|
||||
}
|
||||
|
||||
import listparser
|
||||
import lxml
|
||||
|
||||
|
||||
async def import_from_file(db_file, opml_doc):
|
||||
feeds = listparser.parse(opml_doc)['feeds']
|
||||
for feed in feeds:
|
||||
url = feed['url']
|
||||
title = feed['title']
|
||||
# categories = feed['categories']
|
||||
# tags = feed['tags']
|
||||
# await datahandler.add_feed_no_check(db_file, [url, title])
|
||||
|
||||
"""
|
||||
|
||||
from slixfeed.datetime import current_time
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
# NOTE Use OPyML or LXML
|
||||
def export_to_file(jid, filename, results):
|
||||
root = ET.Element("opml")
|
||||
root.set("version", "1.0")
|
||||
head = ET.SubElement(root, "head")
|
||||
ET.SubElement(head, "title").text = "Subscriptions for {}".format(jid)
|
||||
ET.SubElement(head, "description").text = (
|
||||
"Set of feeds exported with Slixfeed")
|
||||
ET.SubElement(head, "generator").text = "Slixfeed"
|
||||
ET.SubElement(head, "urlPublic").text = (
|
||||
"https://gitgud.io/sjehuda/slixfeed")
|
||||
time_stamp = current_time()
|
||||
ET.SubElement(head, "dateCreated").text = time_stamp
|
||||
ET.SubElement(head, "dateModified").text = time_stamp
|
||||
body = ET.SubElement(root, "body")
|
||||
for result in results:
|
||||
outline = ET.SubElement(body, "outline")
|
||||
outline.set("text", result[0])
|
||||
outline.set("xmlUrl", result[1])
|
||||
# outline.set("type", result[2])
|
||||
tree = ET.ElementTree(root)
|
||||
tree.write(filename)
|
1051
slixfeed/sqlite.py
1051
slixfeed/sqlite.py
File diff suppressed because it is too large
Load diff
|
@ -44,12 +44,12 @@ import logging
|
|||
import os
|
||||
import slixmpp
|
||||
|
||||
import slixfeed.action as action
|
||||
from slixfeed.config import (
|
||||
get_pathname_to_database,
|
||||
get_default_dbdir,
|
||||
get_default_data_directory,
|
||||
get_value_default)
|
||||
from slixfeed.datetime import current_time
|
||||
from slixfeed.action import organize_items
|
||||
from slixfeed.sqlite import (
|
||||
get_feed_title,
|
||||
get_feeds_url,
|
||||
|
@ -63,7 +63,6 @@ from slixfeed.sqlite import (
|
|||
)
|
||||
# from xmpp import Slixfeed
|
||||
import slixfeed.xmpp.client as xmpp
|
||||
from slixfeed.xmpp.compose import list_unread_entries
|
||||
import slixfeed.xmpp.utility as utility
|
||||
|
||||
main_task = []
|
||||
|
@ -229,8 +228,13 @@ async def send_update(self, jid, num=None):
|
|||
news_digest = []
|
||||
results = await get_unread_entries(db_file, num)
|
||||
for result in results:
|
||||
title = get_feed_title(db_file, result[3])
|
||||
news_item = list_unread_entries(result, title)
|
||||
ix = result[0]
|
||||
title_e = result[1]
|
||||
url = result[2]
|
||||
feed_id = result[3]
|
||||
date = result[4]
|
||||
title_f = get_feed_title(db_file, feed_id)
|
||||
news_item = action.list_unread_entries(result, title_f)
|
||||
news_digest.extend([news_item])
|
||||
# print(db_file)
|
||||
# print(result[0])
|
||||
|
@ -423,7 +427,8 @@ async def check_updates(jid):
|
|||
while True:
|
||||
db_file = get_pathname_to_database(jid)
|
||||
urls = await get_feeds_url(db_file)
|
||||
await organize_items(db_file, urls)
|
||||
for url in urls:
|
||||
await action.scan(db_file, url)
|
||||
val = get_value_default(
|
||||
"settings", "Settings", "check")
|
||||
await asyncio.sleep(60 * float(val))
|
||||
|
@ -504,7 +509,7 @@ async def select_file(self):
|
|||
Initiate actions by JID (Jabber ID).
|
||||
"""
|
||||
while True:
|
||||
db_dir = get_default_dbdir()
|
||||
db_dir = get_default_data_directory()
|
||||
if not os.path.isdir(db_dir):
|
||||
msg = (
|
||||
"Slixfeed can not work without a database.\n"
|
||||
|
|
|
@ -7,16 +7,11 @@ TODO
|
|||
|
||||
1) ActivityPub URL revealer activitypub_to_http.
|
||||
|
||||
2) Remove tracking queries.
|
||||
|
||||
3) Redirect to Invidious, Librarian, Nitter, ProxiTok etc.
|
||||
because they provide RSS.
|
||||
|
||||
"""
|
||||
|
||||
import slixfeed.config as config
|
||||
from email.utils import parseaddr
|
||||
import random
|
||||
import slixfeed.config as config
|
||||
from urllib.parse import (
|
||||
parse_qs,
|
||||
urlencode,
|
||||
|
@ -131,22 +126,6 @@ def feed_to_http(url):
|
|||
return new_url
|
||||
|
||||
|
||||
def activitypub_to_http(namespace):
|
||||
"""
|
||||
Replace ActivityPub namespace by HTTP.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace : str
|
||||
Namespace.
|
||||
|
||||
Returns
|
||||
-------
|
||||
new_url : str
|
||||
URL.
|
||||
"""
|
||||
|
||||
|
||||
def check_xmpp_uri(uri):
|
||||
"""
|
||||
Check validity of XMPP URI.
|
||||
|
@ -318,4 +297,20 @@ def trim_url(url):
|
|||
queries,
|
||||
fragment
|
||||
])
|
||||
return url
|
||||
return url
|
||||
|
||||
|
||||
def activitypub_to_http(namespace):
|
||||
"""
|
||||
Replace ActivityPub namespace by HTTP.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
namespace : str
|
||||
Namespace.
|
||||
|
||||
Returns
|
||||
-------
|
||||
new_url : str
|
||||
URL.
|
||||
"""
|
||||
|
|
|
@ -1,194 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
"""
|
||||
|
||||
TODO
|
||||
|
||||
1) Port functions insert_feed, remove_feed, get_entry_unread.
|
||||
|
||||
2) Merge with action.py
|
||||
|
||||
3) Call sqlite function from function statistics.
|
||||
Returning a list of values doesn't' seem to be a good practice.
|
||||
|
||||
"""
|
||||
|
||||
import slixfeed.xmpp.bookmark as bookmark
|
||||
from slixfeed.url import remove_tracking_parameters, replace_hostname
|
||||
|
||||
|
||||
def list_unread_entries(result, feed_title):
|
||||
# TODO Add filtering
|
||||
# TODO Do this when entry is added to list and mark it as read
|
||||
# DONE!
|
||||
# results = []
|
||||
# if get_settings_value(db_file, "filter-deny"):
|
||||
# while len(results) < num:
|
||||
# result = cur.execute(sql).fetchone()
|
||||
# blacklist = await get_settings_value(db_file, "filter-deny").split(",")
|
||||
# for i in blacklist:
|
||||
# if i in result[1]:
|
||||
# continue
|
||||
# print("rejected:", result[1])
|
||||
# print("accepted:", result[1])
|
||||
# results.extend([result])
|
||||
|
||||
# news_list = "You've got {} news items:\n".format(num)
|
||||
# NOTE Why doesn't this work without list?
|
||||
# i.e. for result in results
|
||||
# for result in results.fetchall():
|
||||
ix = result[0]
|
||||
title = result[1]
|
||||
# # TODO Retrieve summary from feed
|
||||
# # See fetch.view_entry
|
||||
# summary = result[2]
|
||||
# # Remove HTML tags
|
||||
# try:
|
||||
# summary = BeautifulSoup(summary, "lxml").text
|
||||
# except:
|
||||
# print(result[2])
|
||||
# breakpoint()
|
||||
# # TODO Limit text length
|
||||
# summary = summary.replace("\n\n\n", "\n\n")
|
||||
# length = await get_settings_value(db_file, "length")
|
||||
# summary = summary[:length] + " […]"
|
||||
# summary = summary.strip().split('\n')
|
||||
# summary = ["> " + line for line in summary]
|
||||
# summary = "\n".join(summary)
|
||||
link = result[2]
|
||||
link = remove_tracking_parameters(link)
|
||||
link = (replace_hostname(link, "link")) or link
|
||||
news_item = (
|
||||
"\n{}\n{}\n{}\n"
|
||||
).format(str(title), str(link), str(feed_title))
|
||||
return news_item
|
||||
|
||||
|
||||
def list_search_results(query, results):
|
||||
results_list = (
|
||||
"Search results for '{}':\n\n```"
|
||||
).format(query)
|
||||
counter = 0
|
||||
for result in results:
|
||||
counter += 1
|
||||
results_list += (
|
||||
"\n{}\n{}\n"
|
||||
).format(str(result[0]), str(result[1]))
|
||||
if counter:
|
||||
return results_list + "```\nTotal of {} results".format(counter)
|
||||
else:
|
||||
return "No results were found for: {}".format(query)
|
||||
|
||||
|
||||
def list_feeds_by_query(query, results):
|
||||
results_list = (
|
||||
"Feeds containing '{}':\n\n```"
|
||||
).format(query)
|
||||
counter = 0
|
||||
for result in results:
|
||||
counter += 1
|
||||
results_list += (
|
||||
"\nName : {}"
|
||||
"\nURL : {}"
|
||||
"\nIndex : {}"
|
||||
"\nMode : {}"
|
||||
"\n"
|
||||
).format(str(result[0]), str(result[1]),
|
||||
str(result[2]), str(result[3]))
|
||||
if counter:
|
||||
return results_list + "\n```\nTotal of {} feeds".format(counter)
|
||||
else:
|
||||
return "No feeds were found for: {}".format(query)
|
||||
|
||||
|
||||
def list_statistics(values):
|
||||
"""
|
||||
Return table statistics.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
db_file : str
|
||||
Path to database file.
|
||||
|
||||
Returns
|
||||
-------
|
||||
msg : str
|
||||
Statistics as message.
|
||||
"""
|
||||
msg = (
|
||||
"```"
|
||||
"\nSTATISTICS\n"
|
||||
"News items : {}/{}\n"
|
||||
"News sources : {}/{}\n"
|
||||
"\nOPTIONS\n"
|
||||
"Items to archive : {}\n"
|
||||
"Update interval : {}\n"
|
||||
"Items per update : {}\n"
|
||||
"Operation status : {}\n"
|
||||
"```"
|
||||
).format(values[0], values[1], values[2], values[3],
|
||||
values[4], values[5], values[6], values[7])
|
||||
return msg
|
||||
|
||||
|
||||
def list_last_entries(results, num):
|
||||
titles_list = "Recent {} titles:\n\n```".format(num)
|
||||
counter = 0
|
||||
for result in results:
|
||||
counter += 1
|
||||
titles_list += (
|
||||
"\n{}\n{}\n"
|
||||
).format(str(result[0]), str(result[1]))
|
||||
if counter:
|
||||
titles_list += "```\n"
|
||||
return titles_list
|
||||
else:
|
||||
return "There are no news at the moment."
|
||||
|
||||
|
||||
def list_feeds(results):
|
||||
feeds_list = "\nList of subscriptions:\n\n```\n"
|
||||
counter = 0
|
||||
for result in results:
|
||||
counter += 1
|
||||
feeds_list += (
|
||||
"Name : {}\n"
|
||||
"Address : {}\n"
|
||||
"Updated : {}\n"
|
||||
"Status : {}\n"
|
||||
"ID : {}\n"
|
||||
"\n"
|
||||
).format(str(result[0]), str(result[1]), str(result[2]),
|
||||
str(result[3]), str(result[4]))
|
||||
if counter:
|
||||
return feeds_list + (
|
||||
"```\nTotal of {} subscriptions.\n"
|
||||
).format(counter)
|
||||
else:
|
||||
msg = (
|
||||
"List of subscriptions is empty.\n"
|
||||
"To add feed, send a URL\n"
|
||||
"Try these:\n"
|
||||
# TODO Pick random from featured/recommended
|
||||
"https://reclaimthenet.org/feed/"
|
||||
)
|
||||
return msg
|
||||
|
||||
|
||||
async def list_bookmarks(self):
|
||||
conferences = await bookmark.get(self)
|
||||
groupchat_list = "\nList of groupchats:\n\n```\n"
|
||||
counter = 0
|
||||
for conference in conferences:
|
||||
counter += 1
|
||||
groupchat_list += (
|
||||
"{}\n"
|
||||
"\n"
|
||||
).format(
|
||||
conference["jid"]
|
||||
)
|
||||
groupchat_list += (
|
||||
"```\nTotal of {} groupchats.\n"
|
||||
).format(counter)
|
||||
return groupchat_list
|
|
@ -22,29 +22,20 @@ import os
|
|||
import slixfeed.action as action
|
||||
from slixfeed.config import (
|
||||
add_to_list,
|
||||
get_default_dbdir,
|
||||
get_default_data_directory,
|
||||
get_value_default,
|
||||
get_value,
|
||||
get_pathname_to_database,
|
||||
remove_from_list)
|
||||
import slixfeed.crawl as crawl
|
||||
from slixfeed.datetime import current_time, timestamp
|
||||
import slixfeed.export as export
|
||||
import slixfeed.fetch as fetch
|
||||
import slixfeed.opml as opml
|
||||
import slixfeed.sqlite as sqlite
|
||||
import slixfeed.task as task
|
||||
import slixfeed.log as log
|
||||
import slixfeed.read as read
|
||||
import slixfeed.url as uri
|
||||
import slixfeed.xmpp.bookmark as bookmark
|
||||
import slixfeed.xmpp.compose as compose
|
||||
import slixfeed.xmpp.muc as groupchat
|
||||
import slixfeed.xmpp.status as status
|
||||
import slixfeed.xmpp.text as text
|
||||
import slixfeed.xmpp.upload as upload
|
||||
from slixfeed.xmpp.utility import jid_type
|
||||
from urllib.parse import urlsplit, urlunsplit
|
||||
|
||||
|
||||
async def event(self, event):
|
||||
|
@ -137,7 +128,7 @@ async def message(self, message):
|
|||
|
||||
# # Begin processing new JID
|
||||
# # Deprecated in favour of event "presence_available"
|
||||
# db_dir = get_default_dbdir()
|
||||
# db_dir = get_default_data_directory()
|
||||
# os.chdir(db_dir)
|
||||
# if jid + ".db" not in os.listdir():
|
||||
# await task_jid(jid)
|
||||
|
@ -221,10 +212,12 @@ async def message(self, message):
|
|||
title = " ".join(message_text.split(" ")[1:])
|
||||
if url.startswith("http"):
|
||||
db_file = get_pathname_to_database(jid)
|
||||
exist = await sqlite.is_feed_exist(db_file, url)
|
||||
exist = await sqlite.get_feed_id_and_name(
|
||||
db_file, url)
|
||||
if not exist:
|
||||
await sqlite.insert_feed(db_file, url, title)
|
||||
await action.organize_items(db_file, [url])
|
||||
await sqlite.insert_feed(
|
||||
db_file, url, title)
|
||||
await action.scan(db_file, url)
|
||||
old = (
|
||||
await sqlite.get_settings_value(db_file, "old")
|
||||
) or (
|
||||
|
@ -237,7 +230,7 @@ async def message(self, message):
|
|||
await task.start_tasks_xmpp(
|
||||
self, jid, ["status"])
|
||||
else:
|
||||
await sqlite.mark_source_as_read(
|
||||
await sqlite.mark_feed_as_read(
|
||||
db_file, url)
|
||||
response = (
|
||||
"> {}\nNews source has been "
|
||||
|
@ -325,7 +318,7 @@ async def message(self, message):
|
|||
case "bookmarks":
|
||||
if jid == get_value(
|
||||
"accounts", "XMPP", "operator"):
|
||||
response = await compose.list_bookmarks(self)
|
||||
response = await action.list_bookmarks(self)
|
||||
else:
|
||||
response = (
|
||||
"This action is restricted. "
|
||||
|
@ -368,13 +361,6 @@ async def message(self, message):
|
|||
else:
|
||||
response = "Missing keywords."
|
||||
send_reply_message(self, message, response)
|
||||
case _ if message_lowercase.startswith("import "):
|
||||
status_type = "dnd"
|
||||
status_message = (
|
||||
"📥️ Procesing request to import feeds ..."
|
||||
)
|
||||
send_status_message(
|
||||
self, jid, status_type, status_message)
|
||||
case _ if message_lowercase.startswith("export "):
|
||||
key = message_text[7:]
|
||||
if key in ("opml", "html", "md", "xbel"):
|
||||
|
@ -384,7 +370,7 @@ async def message(self, message):
|
|||
).format(key)
|
||||
send_status_message(
|
||||
self, jid, status_type, status_message)
|
||||
data_dir = get_default_dbdir()
|
||||
data_dir = get_default_data_directory()
|
||||
if not os.path.isdir(data_dir):
|
||||
os.mkdir(data_dir)
|
||||
if not os.path.isdir(data_dir + '/' + key):
|
||||
|
@ -397,10 +383,10 @@ async def message(self, message):
|
|||
case "html":
|
||||
response = "Not yet implemented."
|
||||
case "md":
|
||||
export.markdown(
|
||||
action.export_to_markdown(
|
||||
jid, filename, results)
|
||||
case "opml":
|
||||
opml.export_to_file(
|
||||
action.export_to_opml(
|
||||
jid, filename, results)
|
||||
case "xbel":
|
||||
response = "Not yet implemented."
|
||||
|
@ -409,24 +395,54 @@ async def message(self, message):
|
|||
"Feeds exported successfully to {}.\n{}"
|
||||
).format(key, url)
|
||||
# send_oob_reply_message(message, url, response)
|
||||
await send_oob_message(self, jid, url)
|
||||
await task.start_tasks_xmpp(self, jid, ["status"])
|
||||
await send_oob_message(
|
||||
self, jid, url)
|
||||
await task.start_tasks_xmpp(
|
||||
self, jid, ["status"])
|
||||
else:
|
||||
response = "Unsupported filetype."
|
||||
send_reply_message(self, message, response)
|
||||
send_reply_message(self, message, response)
|
||||
case _ if (message_lowercase.startswith("gemini:") or
|
||||
message_lowercase.startswith("gopher:")):
|
||||
response = "Gemini and Gopher are not supported yet."
|
||||
send_reply_message(self, message, response)
|
||||
case _ if (message_lowercase.startswith("http")) and(
|
||||
message_lowercase.endswith(".opml")):
|
||||
url = message_text
|
||||
await task.clean_tasks_xmpp(
|
||||
jid, ["status"])
|
||||
status_type = "dnd"
|
||||
status_message = (
|
||||
"📥️ Procesing request to import feeds ..."
|
||||
)
|
||||
send_status_message(
|
||||
self, jid, status_type, status_message)
|
||||
db_file = get_pathname_to_database(jid)
|
||||
count = await action.import_opml(db_file, url)
|
||||
if count:
|
||||
response = (
|
||||
"Successfully imported {} feeds"
|
||||
).format(count)
|
||||
else:
|
||||
response = (
|
||||
"OPML file was not imported."
|
||||
)
|
||||
await task.clean_tasks_xmpp(
|
||||
jid, ["status"])
|
||||
await task.start_tasks_xmpp(
|
||||
self, jid, ["status"])
|
||||
send_reply_message(self, message, response)
|
||||
case _ if (message_lowercase.startswith("http") or
|
||||
message_lowercase.startswith("feed:")):
|
||||
url = message_text
|
||||
await task.clean_tasks_xmpp(jid, ["status"])
|
||||
await task.clean_tasks_xmpp(
|
||||
jid, ["status"])
|
||||
status_type = "dnd"
|
||||
status_message = (
|
||||
"📫️ Processing request to fetch data from {}"
|
||||
).format(url)
|
||||
send_status_message(self, jid, status_type, status_message)
|
||||
send_status_message(
|
||||
self, jid, status_type, status_message)
|
||||
if url.startswith("feed:"):
|
||||
url = uri.feed_to_http(url)
|
||||
url = (uri.replace_hostname(url, "feed")) or url
|
||||
|
@ -443,7 +459,7 @@ async def message(self, message):
|
|||
if len(query) > 3:
|
||||
db_file = get_pathname_to_database(jid)
|
||||
result = await sqlite.search_feeds(db_file, query)
|
||||
response = compose.list_feeds_by_query(query, result)
|
||||
response = action.list_feeds_by_query(query, result)
|
||||
else:
|
||||
response = (
|
||||
"Enter at least 4 characters to search"
|
||||
|
@ -451,7 +467,7 @@ async def message(self, message):
|
|||
else:
|
||||
db_file = get_pathname_to_database(jid)
|
||||
result = await sqlite.get_feeds(db_file)
|
||||
response = compose.list_feeds(result)
|
||||
response = action.list_feeds(result)
|
||||
send_reply_message(self, message, response)
|
||||
case "goodbye":
|
||||
if message["type"] == "groupchat":
|
||||
|
@ -616,7 +632,8 @@ async def message(self, message):
|
|||
status_message = (
|
||||
"📫️ Processing request to fetch data from {}"
|
||||
).format(url)
|
||||
send_status_message(self, jid, status_type, status_message)
|
||||
send_status_message(
|
||||
self, jid, status_type, status_message)
|
||||
if url.startswith("feed:"):
|
||||
url = uri.feed_to_http(url)
|
||||
url = (uri.replace_hostname(url, "feed")) or url
|
||||
|
@ -651,52 +668,59 @@ async def message(self, message):
|
|||
else:
|
||||
db_file = get_pathname_to_database(jid)
|
||||
result = await sqlite.last_entries(db_file, num)
|
||||
response = compose.list_last_entries(result, num)
|
||||
response = action.list_last_entries(result, num)
|
||||
except:
|
||||
response = "Enter a numeric value only."
|
||||
else:
|
||||
response = "Missing value."
|
||||
send_reply_message(self, message, response)
|
||||
# NOTE Should people be asked for numeric value?
|
||||
case _ if message_lowercase.startswith("remove "):
|
||||
ix = message_text[7:]
|
||||
if ix:
|
||||
ix_url = message_text[7:]
|
||||
if ix_url:
|
||||
db_file = get_pathname_to_database(jid)
|
||||
try:
|
||||
await sqlite.remove_feed(db_file, ix)
|
||||
response = (
|
||||
"News source {} has been removed "
|
||||
"from subscription list.").format(ix)
|
||||
# await refresh_task(
|
||||
# self,
|
||||
# jid,
|
||||
# send_status,
|
||||
# "status",
|
||||
# 20
|
||||
# )
|
||||
await task.clean_tasks_xmpp(
|
||||
jid, ["status"])
|
||||
await task.start_tasks_xmpp(
|
||||
self, jid, ["status"])
|
||||
ix = int(ix_url)
|
||||
try:
|
||||
url = await sqlite.remove_feed_by_index(
|
||||
db_file, ix)
|
||||
response = (
|
||||
"> {}\nNews source {} has been removed "
|
||||
"from subscription list.").format(url, ix)
|
||||
except:
|
||||
response = (
|
||||
"No news source with ID {}.".format(ix))
|
||||
except:
|
||||
url = ix_url
|
||||
await sqlite.remove_feed_by_url(db_file, url)
|
||||
response = (
|
||||
"No news source with ID {}.".format(ix))
|
||||
"> {}\nNews source has been removed "
|
||||
"from subscription list.").format(url)
|
||||
# await refresh_task(
|
||||
# self,
|
||||
# jid,
|
||||
# send_status,
|
||||
# "status",
|
||||
# 20
|
||||
# )
|
||||
await task.clean_tasks_xmpp(jid, ["status"])
|
||||
await task.start_tasks_xmpp(self, jid, ["status"])
|
||||
else:
|
||||
response = "Missing feed ID."
|
||||
send_reply_message(self, message, response)
|
||||
case _ if message_lowercase.startswith("reset"):
|
||||
source = message_text[6:]
|
||||
url = message_text[6:]
|
||||
await task.clean_tasks_xmpp(jid, ["status"])
|
||||
status_type = "dnd"
|
||||
status_message = "📫️ Marking entries as read..."
|
||||
send_status_message(
|
||||
self, jid, status_type, status_message)
|
||||
if source:
|
||||
if url:
|
||||
db_file = get_pathname_to_database(jid)
|
||||
await sqlite.mark_source_as_read(db_file, source)
|
||||
await sqlite.mark_feed_as_read(
|
||||
db_file, url)
|
||||
response = (
|
||||
"All entries of {} have been "
|
||||
"marked as read.".format(source)
|
||||
"marked as read.".format(url)
|
||||
)
|
||||
else:
|
||||
db_file = get_pathname_to_database(jid)
|
||||
|
@ -712,7 +736,7 @@ async def message(self, message):
|
|||
db_file = get_pathname_to_database(jid)
|
||||
results = await sqlite.search_entries(
|
||||
db_file, query)
|
||||
response = compose.list_search_results(
|
||||
response = action.list_search_results(
|
||||
query, results)
|
||||
else:
|
||||
response = (
|
||||
|
@ -738,7 +762,7 @@ async def message(self, message):
|
|||
case "stats":
|
||||
db_file = get_pathname_to_database(jid)
|
||||
result = await sqlite.statistics(db_file)
|
||||
response = compose.list_statistics(result)
|
||||
response = action.list_statistics(result)
|
||||
send_reply_message(self, message, response)
|
||||
case _ if message_lowercase.startswith("disable "):
|
||||
ix = message_text[8:]
|
||||
|
@ -829,15 +853,15 @@ async def message(self, message):
|
|||
# if response: message.reply(response).send()
|
||||
|
||||
if not response: response = "EMPTY MESSAGE - ACTION ONLY"
|
||||
data_dir = get_default_dbdir()
|
||||
data_dir = get_default_data_directory()
|
||||
if not os.path.isdir(data_dir):
|
||||
os.mkdir(data_dir)
|
||||
if not os.path.isdir(data_dir + '/logs/'):
|
||||
os.mkdir(data_dir + '/logs/')
|
||||
log.markdown(
|
||||
action.log_to_markdown(
|
||||
current_time(), os.path.join(data_dir, "logs", jid),
|
||||
jid, message_text)
|
||||
log.markdown(
|
||||
action.log_to_markdown(
|
||||
current_time(), os.path.join(data_dir, "logs", jid),
|
||||
self.boundjid.bare, response)
|
||||
|
||||
|
|
Loading…
Reference in a new issue