Save enclosures
Send new message upon media detection
This commit is contained in:
parent
ec82aeb3cc
commit
43fa1a463c
5 changed files with 348 additions and 249 deletions
|
@ -42,7 +42,7 @@ from slixfeed.url import (
|
||||||
)
|
)
|
||||||
import slixfeed.xmpp.bookmark as bookmark
|
import slixfeed.xmpp.bookmark as bookmark
|
||||||
from urllib import error
|
from urllib import error
|
||||||
from urllib.parse import urlsplit
|
from urllib.parse import parse_qs, urlsplit
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -688,9 +688,34 @@ async def scan(db_file, url):
|
||||||
if isinstance(date, int):
|
if isinstance(date, int):
|
||||||
logging.error(
|
logging.error(
|
||||||
"Variable 'date' is int: {}".format(date))
|
"Variable 'date' is int: {}".format(date))
|
||||||
|
media_link = ''
|
||||||
|
if entry.has_key("links"):
|
||||||
|
for e_link in entry.links:
|
||||||
|
try:
|
||||||
|
# if (link.rel == "enclosure" and
|
||||||
|
# (link.type.startswith("audio/") or
|
||||||
|
# link.type.startswith("image/") or
|
||||||
|
# link.type.startswith("video/"))
|
||||||
|
# ):
|
||||||
|
media_type = e_link.type[:e_link.type.index("/")]
|
||||||
|
if e_link.has_key("rel"):
|
||||||
|
if (e_link.rel == "enclosure" and
|
||||||
|
media_type in ("audio", "image", "video")):
|
||||||
|
media_link = e_link.href
|
||||||
|
media_link = join_url(url, e_link.href)
|
||||||
|
media_link = trim_url(media_link)
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
logging.error(
|
||||||
|
"KeyError: 'href'\n"
|
||||||
|
"Missing 'href' attribute for {}".format(url))
|
||||||
|
logging.info(
|
||||||
|
"Continue scanning for next potential "
|
||||||
|
"enclosure of {}".format(link))
|
||||||
entry = {
|
entry = {
|
||||||
"title": title,
|
"title": title,
|
||||||
"link": link,
|
"link": link,
|
||||||
|
"enclosure": media_link,
|
||||||
"entry_id": entry_id,
|
"entry_id": entry_id,
|
||||||
"url": url,
|
"url": url,
|
||||||
"date": date,
|
"date": date,
|
||||||
|
@ -706,42 +731,47 @@ async def scan(db_file, url):
|
||||||
db_file, new_entries)
|
db_file, new_entries)
|
||||||
|
|
||||||
|
|
||||||
async def generate_document(url, ext, filename):
|
def get_document_title(data):
|
||||||
result = await fetch.http(url)
|
try:
|
||||||
data = result[0]
|
document = Document(data)
|
||||||
code = result[1]
|
title = document.short_title()
|
||||||
status = None
|
except:
|
||||||
if data:
|
document = BeautifulSoup(data, 'html.parser')
|
||||||
try:
|
title = document.title.string
|
||||||
document = Document(data)
|
return title
|
||||||
content = document.summary()
|
|
||||||
except:
|
|
||||||
logging.warning(
|
def generate_document(data, url, ext, filename):
|
||||||
"Check that package readability is installed.")
|
error = None
|
||||||
match ext:
|
try:
|
||||||
case "html":
|
document = Document(data)
|
||||||
generate_html(content, filename)
|
content = document.summary()
|
||||||
case "md":
|
except:
|
||||||
try:
|
content = data
|
||||||
generate_markdown(content, filename)
|
logging.warning(
|
||||||
except:
|
"Check that package readability is installed.")
|
||||||
logging.warning(
|
match ext:
|
||||||
"Check that package html2text is installed.")
|
case "html":
|
||||||
status = (
|
generate_html(content, filename)
|
||||||
"Package html2text was not found.")
|
case "md":
|
||||||
case "pdf":
|
try:
|
||||||
try:
|
generate_markdown(content, filename)
|
||||||
generate_pdf(content, filename)
|
except:
|
||||||
except:
|
logging.warning(
|
||||||
logging.warning(
|
"Check that package html2text is installed.")
|
||||||
"Check that packages pdfkit and wkhtmltopdf "
|
error = (
|
||||||
"are installed.")
|
"Package html2text was not found.")
|
||||||
status = (
|
case "pdf":
|
||||||
"Package pdfkit or wkhtmltopdf was not found.")
|
try:
|
||||||
else:
|
generate_pdf(content, filename)
|
||||||
status = code
|
except:
|
||||||
if status:
|
logging.warning(
|
||||||
return status
|
"Check that packages pdfkit and wkhtmltopdf "
|
||||||
|
"are installed.")
|
||||||
|
error = (
|
||||||
|
"Package pdfkit or wkhtmltopdf was not found.")
|
||||||
|
if error:
|
||||||
|
return error
|
||||||
|
|
||||||
# TODO Either adapt it to filename
|
# TODO Either adapt it to filename
|
||||||
# or change it to something else
|
# or change it to something else
|
||||||
|
@ -751,28 +781,25 @@ async def generate_document(url, ext, filename):
|
||||||
# file.write(html_doc)
|
# file.write(html_doc)
|
||||||
|
|
||||||
|
|
||||||
async def extract_image_from_feed(db_file, ix, url):
|
async def extract_image_from_feed(db_file, feed_id, url):
|
||||||
feed_url = sqlite.get_feed_url(db_file, ix)
|
feed_url = sqlite.get_feed_url(db_file, feed_id)
|
||||||
result = await fetch.http(feed_url)
|
result = await fetch.http(feed_url)
|
||||||
document = result[0]
|
document = result[0]
|
||||||
# breakpoint()
|
|
||||||
print("extract_image_from_feed")
|
|
||||||
if document:
|
if document:
|
||||||
feed = parse(document)
|
feed = parse(document)
|
||||||
for entry in feed.entries:
|
for entry in feed.entries:
|
||||||
print(len(feed.entries))
|
try:
|
||||||
print(entry.link)
|
if entry.link == url:
|
||||||
print(url)
|
for link in entry.links:
|
||||||
if entry.link == url:
|
if (link.rel == "enclosure" and
|
||||||
for link in entry.links:
|
link.type.startswith("image/")):
|
||||||
if (link.rel == "enclosure" and
|
image_url = link.href
|
||||||
link.type.startswith("image/")):
|
return image_url
|
||||||
# if link.type.startswith("image/"):
|
except:
|
||||||
image_url = link.href
|
logging.error(url)
|
||||||
print("found")
|
logging.error(
|
||||||
print(image_url)
|
"AttributeError: object has no attribute 'link'")
|
||||||
break
|
breakpoint()
|
||||||
return image_url
|
|
||||||
|
|
||||||
|
|
||||||
async def extract_image_from_html(url):
|
async def extract_image_from_html(url):
|
||||||
|
@ -783,17 +810,17 @@ async def extract_image_from_html(url):
|
||||||
document = Document(data)
|
document = Document(data)
|
||||||
content = document.summary()
|
content = document.summary()
|
||||||
except:
|
except:
|
||||||
|
content = data
|
||||||
logging.warning(
|
logging.warning(
|
||||||
"Check that package readability is installed.")
|
"Check that package readability is installed.")
|
||||||
tree = html.fromstring(content)
|
tree = html.fromstring(content)
|
||||||
|
# TODO Exclude banners, class="share" links etc.
|
||||||
images = tree.xpath('//img/@src')
|
images = tree.xpath('//img/@src')
|
||||||
if len(images):
|
if len(images):
|
||||||
image = images[0]
|
image = images[0]
|
||||||
image = str(image)
|
image = str(image)
|
||||||
image_url = complete_url(url, image)
|
image_url = complete_url(url, image)
|
||||||
else:
|
return image_url
|
||||||
image_url = None
|
|
||||||
return image_url
|
|
||||||
|
|
||||||
|
|
||||||
def generate_html(text, filename):
|
def generate_html(text, filename):
|
||||||
|
@ -813,6 +840,35 @@ def generate_markdown(text, filename):
|
||||||
file.write(markdown)
|
file.write(markdown)
|
||||||
|
|
||||||
|
|
||||||
|
# TODO Add support for eDonkey, Gnutella, Soulseek
|
||||||
|
async def get_magnet(link):
|
||||||
|
parted_link = urlsplit(link)
|
||||||
|
queries = parse_qs(parted_link.query)
|
||||||
|
query_xt = queries["xt"][0]
|
||||||
|
if query_xt.startswith("urn:btih:"):
|
||||||
|
filename = queries["dn"][0]
|
||||||
|
checksum = query_xt[len("urn:btih:"):]
|
||||||
|
torrent = await fetch.magnet(link)
|
||||||
|
logging.debug(
|
||||||
|
"Attempting to retrieve {} ({})".format(
|
||||||
|
filename, checksum))
|
||||||
|
if not torrent:
|
||||||
|
logging.debug(
|
||||||
|
"Attempting to retrieve {} from HTTP caching service".format(
|
||||||
|
filename))
|
||||||
|
urls = [
|
||||||
|
'https://watercache.libertycorp.org/get/{}/{}',
|
||||||
|
'https://itorrents.org/torrent/{}.torrent?title={}',
|
||||||
|
'https://firecache.libertycorp.org/get/{}/{}',
|
||||||
|
'http://fcache63sakpihd44kxdduy6kgpdhgejgp323wci435zwy6kiylcnfad.onion/get/{}/{}'
|
||||||
|
]
|
||||||
|
for url in urls:
|
||||||
|
torrent = fetch.http(url.format(checksum, filename))
|
||||||
|
if torrent:
|
||||||
|
break
|
||||||
|
return torrent
|
||||||
|
|
||||||
|
|
||||||
# NOTE Why (if res[0]) and (if res[1] == 200)?
|
# NOTE Why (if res[0]) and (if res[1] == 200)?
|
||||||
async def organize_items(db_file, urls):
|
async def organize_items(db_file, urls):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -28,9 +28,16 @@ from asyncio import TimeoutError
|
||||||
# from asyncio.exceptions import IncompleteReadError
|
# from asyncio.exceptions import IncompleteReadError
|
||||||
# from bs4 import BeautifulSoup
|
# from bs4 import BeautifulSoup
|
||||||
# from http.client import IncompleteRead
|
# from http.client import IncompleteRead
|
||||||
|
import logging
|
||||||
# from lxml import html
|
# from lxml import html
|
||||||
import slixfeed.config as config
|
|
||||||
# from xml.etree.ElementTree import ElementTree, ParseError
|
# from xml.etree.ElementTree import ElementTree, ParseError
|
||||||
|
import slixfeed.config as config
|
||||||
|
try:
|
||||||
|
from magnet2torrent import Magnet2Torrent, FailedToFetchException
|
||||||
|
except:
|
||||||
|
logging.info(
|
||||||
|
"Package magnet2torrent was not found.\n"
|
||||||
|
"BitTorrent is disabled.")
|
||||||
|
|
||||||
|
|
||||||
# async def dat():
|
# async def dat():
|
||||||
|
@ -105,3 +112,11 @@ async def http(url):
|
||||||
False, "Timeout: " + str(e)
|
False, "Timeout: " + str(e)
|
||||||
]
|
]
|
||||||
return msg
|
return msg
|
||||||
|
|
||||||
|
|
||||||
|
async def magnet(link):
|
||||||
|
m2t = Magnet2Torrent(link)
|
||||||
|
try:
|
||||||
|
filename, torrent_data = await m2t.retrieve_torrent()
|
||||||
|
except FailedToFetchException:
|
||||||
|
logging.debug("Failed")
|
||||||
|
|
|
@ -129,6 +129,7 @@ def create_tables(db_file):
|
||||||
id INTEGER NOT NULL,
|
id INTEGER NOT NULL,
|
||||||
title TEXT NOT NULL,
|
title TEXT NOT NULL,
|
||||||
link TEXT NOT NULL,
|
link TEXT NOT NULL,
|
||||||
|
enclosure TEXT,
|
||||||
entry_id TEXT NOT NULL,
|
entry_id TEXT NOT NULL,
|
||||||
feed_id INTEGER NOT NULL,
|
feed_id INTEGER NOT NULL,
|
||||||
timestamp TEXT,
|
timestamp TEXT,
|
||||||
|
@ -146,6 +147,7 @@ def create_tables(db_file):
|
||||||
id INTEGER NOT NULL,
|
id INTEGER NOT NULL,
|
||||||
title TEXT NOT NULL,
|
title TEXT NOT NULL,
|
||||||
link TEXT NOT NULL,
|
link TEXT NOT NULL,
|
||||||
|
enclosure TEXT,
|
||||||
entry_id TEXT NOT NULL,
|
entry_id TEXT NOT NULL,
|
||||||
feed_id INTEGER NOT NULL,
|
feed_id INTEGER NOT NULL,
|
||||||
timestamp TEXT,
|
timestamp TEXT,
|
||||||
|
@ -486,7 +488,8 @@ async def remove_feed_by_url(db_file, url):
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
sql = (
|
sql = (
|
||||||
"""
|
"""
|
||||||
DELETE FROM feeds
|
DELETE
|
||||||
|
FROM feeds
|
||||||
WHERE url = ?
|
WHERE url = ?
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
@ -556,16 +559,17 @@ async def get_feed_id_and_name(db_file, url):
|
||||||
result : list
|
result : list
|
||||||
List of ID and Name of feed.
|
List of ID and Name of feed.
|
||||||
"""
|
"""
|
||||||
cur = get_cursor(db_file)
|
with create_connection(db_file) as conn:
|
||||||
sql = (
|
cur = conn.cursor()
|
||||||
"""
|
sql = (
|
||||||
SELECT id, name
|
"""
|
||||||
FROM feeds
|
SELECT id, name
|
||||||
WHERE url = ?
|
FROM feeds
|
||||||
"""
|
WHERE url = ?
|
||||||
)
|
"""
|
||||||
result = cur.execute(sql, (url,)).fetchone()
|
)
|
||||||
return result
|
result = cur.execute(sql, (url,)).fetchone()
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
async def get_number_of_items(db_file, table):
|
async def get_number_of_items(db_file, table):
|
||||||
|
@ -677,11 +681,11 @@ async def get_unread_entries(db_file, num):
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
sql = (
|
sql = (
|
||||||
"""
|
"""
|
||||||
SELECT id, title, link, feed_id, timestamp
|
SELECT id, title, link, enclosure, feed_id, timestamp
|
||||||
FROM entries
|
FROM entries
|
||||||
WHERE read = 0
|
WHERE read = 0
|
||||||
UNION ALL
|
UNION ALL
|
||||||
SELECT id, title, link, feed_id, timestamp
|
SELECT id, title, link, enclosure, feed_id, timestamp
|
||||||
FROM archive
|
FROM archive
|
||||||
ORDER BY timestamp
|
ORDER BY timestamp
|
||||||
DESC LIMIT :num
|
DESC LIMIT :num
|
||||||
|
@ -861,17 +865,9 @@ def get_entry_url(db_file, ix):
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
||||||
def get_feed_url(db_file, ix):
|
def get_feed_url(db_file, feed_id):
|
||||||
with create_connection(db_file) as conn:
|
with create_connection(db_file) as conn:
|
||||||
cur = conn.cursor()
|
cur = conn.cursor()
|
||||||
sql = ( # TODO Handletable archive too
|
|
||||||
"""
|
|
||||||
SELECT feed_id
|
|
||||||
FROM entries
|
|
||||||
WHERE id = :ix
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
feed_id = cur.execute(sql, (ix,)).fetchone()[0]
|
|
||||||
sql = (
|
sql = (
|
||||||
"""
|
"""
|
||||||
SELECT url
|
SELECT url
|
||||||
|
@ -1152,14 +1148,15 @@ async def add_entries_and_update_timestamp(db_file, new_entries):
|
||||||
"""
|
"""
|
||||||
INSERT
|
INSERT
|
||||||
INTO entries(
|
INTO entries(
|
||||||
title, link, entry_id, feed_id, timestamp, read)
|
title, link, enclosure, entry_id, feed_id, timestamp, read)
|
||||||
VALUES(
|
VALUES(
|
||||||
:title, :link, :entry_id, :feed_id, :timestamp, :read)
|
:title, :link, :enclosure, :entry_id, :feed_id, :timestamp, :read)
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
cur.execute(sql, {
|
cur.execute(sql, {
|
||||||
"title": entry["title"],
|
"title": entry["title"],
|
||||||
"link": entry["link"],
|
"link": entry["link"],
|
||||||
|
"enclosure": entry["enclosure"],
|
||||||
"entry_id": entry["entry_id"],
|
"entry_id": entry["entry_id"],
|
||||||
"feed_id": feed_id,
|
"feed_id": feed_id,
|
||||||
"timestamp": entry["date"],
|
"timestamp": entry["date"],
|
||||||
|
@ -1338,10 +1335,12 @@ async def maintain_archive(db_file, limit):
|
||||||
"""
|
"""
|
||||||
DELETE FROM archive
|
DELETE FROM archive
|
||||||
WHERE id
|
WHERE id
|
||||||
IN (SELECT id
|
IN (
|
||||||
FROM archive
|
SELECT id
|
||||||
ORDER BY timestamp ASC
|
FROM archive
|
||||||
LIMIT :difference)
|
ORDER BY timestamp ASC
|
||||||
|
LIMIT :difference
|
||||||
|
)
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
cur.execute(sql, {
|
cur.execute(sql, {
|
||||||
|
@ -1452,15 +1451,16 @@ async def get_feeds(db_file):
|
||||||
# Select name, url (feeds) updated, enabled, feed_id (status)
|
# Select name, url (feeds) updated, enabled, feed_id (status)
|
||||||
# 2) Sort feeds by id. Sort status by feed_id
|
# 2) Sort feeds by id. Sort status by feed_id
|
||||||
# results += cur.execute(sql).fetchall()
|
# results += cur.execute(sql).fetchall()
|
||||||
cur = get_cursor(db_file)
|
with create_connection(db_file) as conn:
|
||||||
sql = (
|
cur = conn.cursor()
|
||||||
"""
|
sql = (
|
||||||
SELECT name, url, id
|
"""
|
||||||
FROM feeds
|
SELECT name, url, id
|
||||||
"""
|
FROM feeds
|
||||||
)
|
"""
|
||||||
results = cur.execute(sql).fetchall()
|
)
|
||||||
return results
|
results = cur.execute(sql).fetchall()
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
async def last_entries(db_file, num):
|
async def last_entries(db_file, num):
|
||||||
|
@ -1479,29 +1479,30 @@ async def last_entries(db_file, num):
|
||||||
titles_list : str
|
titles_list : str
|
||||||
List of recent N entries as message.
|
List of recent N entries as message.
|
||||||
"""
|
"""
|
||||||
cur = get_cursor(db_file)
|
with create_connection(db_file) as conn:
|
||||||
# sql = (
|
cur = conn.cursor()
|
||||||
# "SELECT title, link "
|
# sql = (
|
||||||
# "FROM entries "
|
# "SELECT title, link "
|
||||||
# "ORDER BY ROWID DESC "
|
# "FROM entries "
|
||||||
# "LIMIT :num"
|
# "ORDER BY ROWID DESC "
|
||||||
# )
|
# "LIMIT :num"
|
||||||
sql = (
|
# )
|
||||||
"""
|
sql = (
|
||||||
SELECT title, link, timestamp
|
"""
|
||||||
FROM entries
|
SELECT title, link, timestamp
|
||||||
WHERE read = 0
|
FROM entries
|
||||||
UNION ALL
|
WHERE read = 0
|
||||||
SELECT title, link, timestamp
|
UNION ALL
|
||||||
FROM archive
|
SELECT title, link, timestamp
|
||||||
WHERE read = 0
|
FROM archive
|
||||||
ORDER BY timestamp DESC
|
WHERE read = 0
|
||||||
LIMIT :num
|
ORDER BY timestamp DESC
|
||||||
"""
|
LIMIT :num
|
||||||
)
|
"""
|
||||||
results = cur.execute(
|
)
|
||||||
sql, (num,)).fetchall()
|
results = cur.execute(
|
||||||
return results
|
sql, (num,)).fetchall()
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
async def search_feeds(db_file, query):
|
async def search_feeds(db_file, query):
|
||||||
|
@ -1520,19 +1521,20 @@ async def search_feeds(db_file, query):
|
||||||
titles_list : str
|
titles_list : str
|
||||||
Feeds of specified keywords as message.
|
Feeds of specified keywords as message.
|
||||||
"""
|
"""
|
||||||
cur = get_cursor(db_file)
|
with create_connection(db_file) as conn:
|
||||||
sql = (
|
cur = conn.cursor()
|
||||||
"""
|
sql = (
|
||||||
SELECT name, id, url
|
"""
|
||||||
FROM feeds
|
SELECT name, id, url
|
||||||
WHERE name LIKE ?
|
FROM feeds
|
||||||
OR url LIKE ?
|
WHERE name LIKE ?
|
||||||
LIMIT 50
|
OR url LIKE ?
|
||||||
"""
|
LIMIT 50
|
||||||
)
|
"""
|
||||||
results = cur.execute(
|
)
|
||||||
sql, [f'%{query}%', f'%{query}%']).fetchall()
|
results = cur.execute(
|
||||||
return results
|
sql, [f'%{query}%', f'%{query}%']).fetchall()
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
async def search_entries(db_file, query):
|
async def search_entries(db_file, query):
|
||||||
|
@ -1551,22 +1553,23 @@ async def search_entries(db_file, query):
|
||||||
titles_list : str
|
titles_list : str
|
||||||
Entries of specified keywords as message.
|
Entries of specified keywords as message.
|
||||||
"""
|
"""
|
||||||
cur = get_cursor(db_file)
|
with create_connection(db_file) as conn:
|
||||||
sql = (
|
cur = conn.cursor()
|
||||||
"""
|
sql = (
|
||||||
SELECT title, link
|
"""
|
||||||
FROM entries
|
SELECT title, link
|
||||||
WHERE title LIKE ?
|
FROM entries
|
||||||
UNION ALL
|
WHERE title LIKE ?
|
||||||
SELECT title, link
|
UNION ALL
|
||||||
FROM archive
|
SELECT title, link
|
||||||
WHERE title LIKE ?
|
FROM archive
|
||||||
LIMIT 50
|
WHERE title LIKE ?
|
||||||
"""
|
LIMIT 50
|
||||||
)
|
"""
|
||||||
results = cur.execute(
|
)
|
||||||
sql, (f'%{query}%', f'%{query}%')).fetchall()
|
results = cur.execute(
|
||||||
return results
|
sql, (f'%{query}%', f'%{query}%')).fetchall()
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
@ -1619,68 +1622,62 @@ async def check_entry_exist(
|
||||||
bool
|
bool
|
||||||
True or None.
|
True or None.
|
||||||
"""
|
"""
|
||||||
cur = get_cursor(db_file)
|
with create_connection(db_file) as conn:
|
||||||
exist = False
|
cur = conn.cursor()
|
||||||
if entry_id:
|
exist = False
|
||||||
feed_id = get_feed_id(cur, url)
|
if entry_id:
|
||||||
sql = (
|
feed_id = get_feed_id(cur, url)
|
||||||
"""
|
sql = (
|
||||||
SELECT id
|
"""
|
||||||
FROM entries
|
SELECT id
|
||||||
WHERE
|
FROM entries
|
||||||
entry_id = :entry_id and
|
WHERE entry_id = :entry_id and feed_id = :feed_id
|
||||||
feed_id = :feed_id
|
"""
|
||||||
"""
|
)
|
||||||
)
|
|
||||||
result = cur.execute(sql, {
|
|
||||||
"entry_id": entry_id,
|
|
||||||
"feed_id": feed_id
|
|
||||||
}).fetchone()
|
|
||||||
if result: exist = True
|
|
||||||
elif date:
|
|
||||||
sql = (
|
|
||||||
"""
|
|
||||||
SELECT id
|
|
||||||
FROM entries
|
|
||||||
WHERE
|
|
||||||
title = :title and
|
|
||||||
link = :link and
|
|
||||||
timestamp = :date
|
|
||||||
"""
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
result = cur.execute(sql, {
|
result = cur.execute(sql, {
|
||||||
"title": title,
|
"entry_id": entry_id,
|
||||||
"link": link,
|
"feed_id": feed_id
|
||||||
"timestamp": date
|
|
||||||
}).fetchone()
|
}).fetchone()
|
||||||
if result: exist = True
|
if result: exist = True
|
||||||
except:
|
elif date:
|
||||||
print(current_time(), "ERROR DATE: source =", url)
|
sql = (
|
||||||
print(current_time(), "ERROR DATE: date =", date)
|
"""
|
||||||
else:
|
SELECT id
|
||||||
sql = (
|
FROM entries
|
||||||
"""
|
WHERE title = :title and link = :link and timestamp = :date
|
||||||
SELECT id
|
"""
|
||||||
FROM entries
|
)
|
||||||
WHERE
|
try:
|
||||||
title = :title and
|
result = cur.execute(sql, {
|
||||||
link = :link
|
"title": title,
|
||||||
"""
|
"link": link,
|
||||||
)
|
"timestamp": date
|
||||||
result = cur.execute(sql, {
|
}).fetchone()
|
||||||
"title": title,
|
if result: exist = True
|
||||||
"link": link
|
except:
|
||||||
}).fetchone()
|
print(current_time(), "ERROR DATE: source =", url)
|
||||||
if result: exist = True
|
print(current_time(), "ERROR DATE: date =", date)
|
||||||
# try:
|
else:
|
||||||
# if result:
|
sql = (
|
||||||
# return True
|
"""
|
||||||
# else:
|
SELECT id
|
||||||
# return None
|
FROM entries
|
||||||
# except:
|
WHERE title = :title and link = :link
|
||||||
# print(current_time(), "ERROR DATE: result =", url)
|
"""
|
||||||
return exist
|
)
|
||||||
|
result = cur.execute(sql, {
|
||||||
|
"title": title,
|
||||||
|
"link": link
|
||||||
|
}).fetchone()
|
||||||
|
if result: exist = True
|
||||||
|
# try:
|
||||||
|
# if result:
|
||||||
|
# return True
|
||||||
|
# else:
|
||||||
|
# return None
|
||||||
|
# except:
|
||||||
|
# print(current_time(), "ERROR DATE: result =", url)
|
||||||
|
return exist
|
||||||
|
|
||||||
|
|
||||||
async def set_settings_value(db_file, key_value):
|
async def set_settings_value(db_file, key_value):
|
||||||
|
|
|
@ -227,46 +227,60 @@ async def send_update(self, jid, num=None):
|
||||||
num = int(num)
|
num = int(num)
|
||||||
news_digest = []
|
news_digest = []
|
||||||
results = await get_unread_entries(db_file, num)
|
results = await get_unread_entries(db_file, num)
|
||||||
image_url = None
|
news_digest = ''
|
||||||
|
media = None
|
||||||
|
chat_type = await utility.jid_type(self, jid)
|
||||||
for result in results:
|
for result in results:
|
||||||
ix = result[0]
|
ix = result[0]
|
||||||
title_e = result[1]
|
title_e = result[1]
|
||||||
url = result[2]
|
url = result[2]
|
||||||
feed_id = result[3]
|
enclosure = result[3]
|
||||||
date = result[4]
|
feed_id = result[4]
|
||||||
|
date = result[5]
|
||||||
title_f = get_feed_title(db_file, feed_id)
|
title_f = get_feed_title(db_file, feed_id)
|
||||||
news_item = action.list_unread_entries(result, title_f)
|
news_digest += action.list_unread_entries(result, title_f)
|
||||||
news_digest.extend([news_item])
|
|
||||||
# print(db_file)
|
# print(db_file)
|
||||||
# print(result[0])
|
# print(result[0])
|
||||||
# breakpoint()
|
# breakpoint()
|
||||||
await mark_as_read(db_file, result[0])
|
await mark_as_read(db_file, ix)
|
||||||
if not image_url:
|
|
||||||
image_url = await action.extract_image_from_feed(
|
# Find media
|
||||||
db_file, ix, url)
|
if url.startswith("magnet:"):
|
||||||
if not image_url:
|
media = action.get_magnet(url)
|
||||||
image_url = await action.extract_image_from_html(url)
|
elif enclosure.startswith("magnet:"):
|
||||||
print("image_url")
|
media = action.get_magnet(enclosure)
|
||||||
print(image_url)
|
elif enclosure:
|
||||||
new = " ".join(news_digest)
|
media = enclosure
|
||||||
# breakpoint()
|
else:
|
||||||
if new:
|
media = await action.extract_image_from_html(url)
|
||||||
|
|
||||||
|
if media and news_digest:
|
||||||
|
# Send textual message
|
||||||
|
xmpp.Slixfeed.send_message(
|
||||||
|
self, mto=jid, mbody=news_digest, mtype=chat_type)
|
||||||
|
news_digest = ''
|
||||||
|
# Send media
|
||||||
|
message = xmpp.Slixfeed.make_message(
|
||||||
|
self, mto=jid, mbody=media, mtype=chat_type)
|
||||||
|
message['oob']['url'] = media
|
||||||
|
message.send()
|
||||||
|
media = None
|
||||||
|
|
||||||
|
if news_digest:
|
||||||
# TODO Add while loop to assure delivery.
|
# TODO Add while loop to assure delivery.
|
||||||
# print(await current_time(), ">>> ACT send_message",jid)
|
# print(await current_time(), ">>> ACT send_message",jid)
|
||||||
chat_type = await utility.jid_type(self, jid)
|
|
||||||
# NOTE Do we need "if statement"? See NOTE at is_muc.
|
# NOTE Do we need "if statement"? See NOTE at is_muc.
|
||||||
if chat_type in ("chat", "groupchat"):
|
if chat_type in ("chat", "groupchat"):
|
||||||
# TODO Provide a choice (with or without images)
|
# TODO Provide a choice (with or without images)
|
||||||
xmpp.Slixfeed.send_message(
|
xmpp.Slixfeed.send_message(
|
||||||
self, mto=jid, mbody=new, mtype=chat_type)
|
self, mto=jid, mbody=news_digest, mtype=chat_type)
|
||||||
if image_url:
|
# if media:
|
||||||
# message = xmpp.Slixfeed.make_message(
|
# # message = xmpp.Slixfeed.make_message(
|
||||||
# self, mto=jid, mbody=new, mtype=chat_type)
|
# # self, mto=jid, mbody=new, mtype=chat_type)
|
||||||
message = xmpp.Slixfeed.make_message(
|
# message = xmpp.Slixfeed.make_message(
|
||||||
self, mto=jid, mbody=image_url, mtype=chat_type)
|
# self, mto=jid, mbody=media, mtype=chat_type)
|
||||||
message['oob']['url'] = image_url
|
# message['oob']['url'] = media
|
||||||
print(image_url)
|
# message.send()
|
||||||
message.send()
|
|
||||||
|
|
||||||
# TODO Do not refresh task before
|
# TODO Do not refresh task before
|
||||||
# verifying that it was completed.
|
# verifying that it was completed.
|
||||||
|
|
|
@ -18,6 +18,7 @@ TODO
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import slixfeed.fetch as fetch
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import slixfeed.action as action
|
import slixfeed.action as action
|
||||||
|
@ -335,7 +336,7 @@ async def message(self, message):
|
||||||
else:
|
else:
|
||||||
response = "Missing value."
|
response = "Missing value."
|
||||||
send_reply_message(self, message, response)
|
send_reply_message(self, message, response)
|
||||||
case _ if message_lowercase.startswith("bookmark - "):
|
case _ if message_lowercase.startswith("bookmark -"):
|
||||||
if jid == get_value("accounts", "XMPP", "operator"):
|
if jid == get_value("accounts", "XMPP", "operator"):
|
||||||
muc_jid = message_text[11:]
|
muc_jid = message_text[11:]
|
||||||
await bookmark.remove(self, muc_jid)
|
await bookmark.remove(self, muc_jid)
|
||||||
|
@ -394,7 +395,7 @@ async def message(self, message):
|
||||||
else:
|
else:
|
||||||
response = "Missing keywords."
|
response = "Missing keywords."
|
||||||
send_reply_message(self, message, response)
|
send_reply_message(self, message, response)
|
||||||
case _ if message_lowercase.startswith("export "):
|
case _ if message_lowercase.startswith("export"):
|
||||||
ex = message_text[7:]
|
ex = message_text[7:]
|
||||||
if ex in ("opml", "html", "md", "xbel"):
|
if ex in ("opml", "html", "md", "xbel"):
|
||||||
status_type = "dnd"
|
status_type = "dnd"
|
||||||
|
@ -440,7 +441,7 @@ async def message(self, message):
|
||||||
response = "Gemini and Gopher are not supported yet."
|
response = "Gemini and Gopher are not supported yet."
|
||||||
send_reply_message(self, message, response)
|
send_reply_message(self, message, response)
|
||||||
# TODO xHTML, HTMLZ, Markdown, MHTML, PDF, TXT
|
# TODO xHTML, HTMLZ, Markdown, MHTML, PDF, TXT
|
||||||
case _ if (message_lowercase.startswith("get ")):
|
case _ if (message_lowercase.startswith("get")):
|
||||||
message_text = message_text[4:]
|
message_text = message_text[4:]
|
||||||
ix_url = message_text.split(" ")[0]
|
ix_url = message_text.split(" ")[0]
|
||||||
ext = " ".join(message_text.split(" ")[1:])
|
ext = " ".join(message_text.split(" ")[1:])
|
||||||
|
@ -450,8 +451,8 @@ async def message(self, message):
|
||||||
if ext in ("html", "md", "pdf"):
|
if ext in ("html", "md", "pdf"):
|
||||||
status_type = "dnd"
|
status_type = "dnd"
|
||||||
status_message = (
|
status_message = (
|
||||||
"📃️ Procesing request to produce {} document ..."
|
"📃️ Procesing request to produce {} document..."
|
||||||
).format(ext)
|
).format(ext.upper())
|
||||||
send_status_message(
|
send_status_message(
|
||||||
self, jid, status_type, status_message)
|
self, jid, status_type, status_message)
|
||||||
db_file = get_pathname_to_database(jid)
|
db_file = get_pathname_to_database(jid)
|
||||||
|
@ -461,27 +462,43 @@ async def message(self, message):
|
||||||
os.mkdir(data_dir)
|
os.mkdir(data_dir)
|
||||||
if not os.path.isdir(data_dir + '/readability'):
|
if not os.path.isdir(data_dir + '/readability'):
|
||||||
os.mkdir(data_dir + '/readability')
|
os.mkdir(data_dir + '/readability')
|
||||||
filename = os.path.join(
|
|
||||||
data_dir, "readability", "saved_article_" + timestamp() + "." + ext)
|
|
||||||
try:
|
try:
|
||||||
ix = int(ix_url)
|
ix = int(ix_url)
|
||||||
try:
|
try:
|
||||||
url = sqlite.get_entry_url(db_file, ix)
|
url = sqlite.get_entry_url(db_file, ix)
|
||||||
except:
|
except:
|
||||||
response = "No entry Id with {}".format(ix)
|
response = "No entry with Id {}".format(ix)
|
||||||
except:
|
except:
|
||||||
url = ix_url
|
url = ix_url
|
||||||
if url:
|
if url:
|
||||||
url = uri.remove_tracking_parameters(url)
|
url = uri.remove_tracking_parameters(url)
|
||||||
url = (uri.replace_hostname(url, "link")) or url
|
url = (uri.replace_hostname(url, "link")) or url
|
||||||
status = await action.generate_document(url, ext, filename)
|
result = await fetch.http(url)
|
||||||
if status:
|
data = result[0]
|
||||||
response = (
|
code = result[1]
|
||||||
"Failed to export {}. Reason: {}"
|
if data:
|
||||||
).format(ext, status)
|
title = action.get_document_title(data)
|
||||||
|
title = title.strip().lower()
|
||||||
|
for i in (" ", "-"):
|
||||||
|
title = title.replace(i, "_")
|
||||||
|
for i in ("?", "'", "!"):
|
||||||
|
title = title.replace(i, "")
|
||||||
|
filename = os.path.join(
|
||||||
|
data_dir, "readability",
|
||||||
|
title + "_" + timestamp() + "." + ext)
|
||||||
|
error = action.generate_document(
|
||||||
|
data, url, ext, filename)
|
||||||
|
if status:
|
||||||
|
response = (
|
||||||
|
"Failed to export {}. Reason: {}"
|
||||||
|
).format(ext.upper(), error)
|
||||||
|
else:
|
||||||
|
url = await upload.start(self, jid, filename)
|
||||||
|
await send_oob_message(self, jid, url)
|
||||||
else:
|
else:
|
||||||
url = await upload.start(self, jid, filename)
|
response = (
|
||||||
await send_oob_message(self, jid, url)
|
"Failed to fetch {}. Reason: {}"
|
||||||
|
).format(url, code)
|
||||||
await task.start_tasks_xmpp(
|
await task.start_tasks_xmpp(
|
||||||
self, jid, ["status"])
|
self, jid, ["status"])
|
||||||
else:
|
else:
|
||||||
|
@ -769,7 +786,7 @@ async def message(self, message):
|
||||||
else:
|
else:
|
||||||
response = "Missing value."
|
response = "Missing value."
|
||||||
send_reply_message(self, message, response)
|
send_reply_message(self, message, response)
|
||||||
case _ if message_lowercase.startswith("remove "):
|
case _ if message_lowercase.startswith("remove"):
|
||||||
ix_url = message_text[7:]
|
ix_url = message_text[7:]
|
||||||
if ix_url:
|
if ix_url:
|
||||||
db_file = get_pathname_to_database(jid)
|
db_file = get_pathname_to_database(jid)
|
||||||
|
@ -873,7 +890,7 @@ async def message(self, message):
|
||||||
except:
|
except:
|
||||||
response = "No news source with ID {}.".format(ix)
|
response = "No news source with ID {}.".format(ix)
|
||||||
send_reply_message(self, message, response)
|
send_reply_message(self, message, response)
|
||||||
case _ if message_lowercase.startswith("enable "):
|
case _ if message_lowercase.startswith("enable"):
|
||||||
ix = message_text[7:]
|
ix = message_text[7:]
|
||||||
db_file = get_pathname_to_database(jid)
|
db_file = get_pathname_to_database(jid)
|
||||||
try:
|
try:
|
||||||
|
|
Loading…
Reference in a new issue