Add functionality to display images from first link to provide an image (experimental)

This commit is contained in:
Schimon Jehudah 2024-01-09 22:36:16 +00:00
parent 8b313acd91
commit 46a0819229
4 changed files with 84 additions and 43 deletions

View file

@ -16,6 +16,7 @@ import html2text
from http.client import IncompleteRead
from feedparser import parse
import logging
from lxml import html
import pdfkit
from readability import Document
import slixfeed.config as config
@ -28,7 +29,7 @@ from slixfeed.datetime import (
import slixfeed.fetch as fetch
import slixfeed.sqlite as sqlite
from slixfeed.url import (
# complete_url,
complete_url,
join_url,
remove_tracking_parameters,
replace_hostname,
@ -663,12 +664,14 @@ async def scan(db_file, url):
await sqlite.set_date(db_file, url)
async def get_content(db_file, ix):
url = sqlite.get_entry_url(db_file, ix)
async def get_content(url):
result = await fetch.download_feed(url)
if result[0]:
document = Document(result[0])
return document.summary()
content = document.summary()
else:
content = None
return content
# TODO Either adapt it to filename
# or change it to something else
#filename = document.title()
@ -677,6 +680,17 @@ async def get_content(db_file, ix):
# file.write(html_doc)
def extract_first_image(url, content):
tree = html.fromstring(content)
images = tree.xpath('//img/@src')
if len(images):
image = images[0]
image = str(image)
image_url = complete_url(url, image)
else:
image_url = None
return image_url
def generate_html(text, filename):
with open(filename, 'w') as file:
file.write(text)

View file

@ -227,6 +227,7 @@ async def send_update(self, jid, num=None):
num = int(num)
news_digest = []
results = await get_unread_entries(db_file, num)
image_url = None
for result in results:
ix = result[0]
title_e = result[1]
@ -240,18 +241,29 @@ async def send_update(self, jid, num=None):
# print(result[0])
# breakpoint()
await mark_as_read(db_file, result[0])
if not image_url:
content = await action.get_content(url)
image_url = action.extract_first_image(url, content)
new = " ".join(news_digest)
# breakpoint()
if new:
# print("if new")
# breakpoint()
# TODO Add while loop to assure delivery.
# print(await current_time(), ">>> ACT send_message",jid)
chat_type = await utility.jid_type(self, jid)
# NOTE Do we need "if statement"? See NOTE at is_muc.
if chat_type in ("chat", "groupchat"):
# TODO Provide a choice (with or without images)
xmpp.Slixfeed.send_message(
self, mto=jid, mbody=new, mtype=chat_type)
if image_url:
# message = xmpp.Slixfeed.make_message(
# self, mto=jid, mbody=new, mtype=chat_type)
message = xmpp.Slixfeed.make_message(
self, mto=jid, mbody=image_url, mtype=chat_type)
message['oob']['url'] = image_url
print(image_url)
message.send()
# TODO Do not refresh task before
# verifying that it was completed.
await refresh_task(

View file

@ -394,8 +394,8 @@ async def message(self, message):
response = "Missing keywords."
send_reply_message(self, message, response)
case _ if message_lowercase.startswith("export "):
key = message_text[7:]
if key in ("opml", "html", "md", "xbel"):
ex = message_text[7:]
if ex in ("opml", "html", "md", "xbel"):
status_type = "dnd"
status_message = (
"📤️ Procesing request to export feeds into {} ..."
@ -423,9 +423,9 @@ async def message(self, message):
case "xbel":
response = "Not yet implemented."
url = await upload.start(self, jid, filename)
response = (
"Feeds exported successfully to {}.\n{}"
).format(key, url)
# response = (
# "Feeds exported successfully to {}.\n{}"
# ).format(key, url)
# send_oob_reply_message(message, url, response)
await send_oob_message(
self, jid, url)
@ -441,39 +441,54 @@ async def message(self, message):
# TODO xHTML, HTMLZ, Markdown, MHTML, PDF, TXT
case _ if (message_lowercase.startswith("get ")):
message_text = message_text[4:]
ix = message_text.split(" ")[0]
ex = " ".join(message_text.split(" ")[1:])
ex = ex if ex else 'pdf'
db_file = get_pathname_to_database(jid)
data_dir = get_default_data_directory()
if ix:
if not os.path.isdir(data_dir):
os.mkdir(data_dir)
if not os.path.isdir(data_dir + '/readability'):
os.mkdir(data_dir + '/readability')
filename = os.path.join(
data_dir, "readability", "saved_article_" + timestamp() + "." + ex)
try:
text = await action.get_content(db_file, ix)
except:
response = "No entry Id with {}".format(ix)
if text:
match ex:
case "html":
action.generate_html(text, filename)
case "md":
action.generate_markdown(text, filename)
case "pdf":
action.generate_pdf(text, filename)
url = await upload.start(
self, jid, filename)
print(url)
await send_oob_message(
self, jid, url)
ix_url = message_text.split(" ")[0]
ext = " ".join(message_text.split(" ")[1:])
ext = ext if ext else 'pdf'
if ext in ("html", "md", "pdf"):
status_type = "dnd"
status_message = (
"📃️ Procesing request to produce {} document ..."
).format(ext)
send_status_message(
self, jid, status_type, status_message)
db_file = get_pathname_to_database(jid)
data_dir = get_default_data_directory()
if ix_url:
if not os.path.isdir(data_dir):
os.mkdir(data_dir)
if not os.path.isdir(data_dir + '/readability'):
os.mkdir(data_dir + '/readability')
filename = os.path.join(
data_dir, "readability", "saved_article_" + timestamp() + "." + ext)
try:
ix = int(ix_url)
try:
url = sqlite.get_entry_url(db_file, ix)
except:
response = "No entry Id with {}".format(ix)
except:
url = ix_url
content = await action.get_content(url)
if content:
match ext:
case "html":
action.generate_html(content, filename)
case "md":
action.generate_markdown(content, filename)
case "pdf":
action.generate_pdf(content, filename)
url = await upload.start(
self, jid, filename)
await send_oob_message(
self, jid, url)
await task.start_tasks_xmpp(
self, jid, ["status"])
else:
response = "Failed to fetch resource."
else:
response = "Failed to fetch resource."
response = "Missing entry Id."
else:
response = "Missing entry Id."
response = "Unsupported filetype."
if response:
send_reply_message(self, message, response)
# case _ if (message_lowercase.startswith("http")) and(

View file

@ -30,6 +30,6 @@ async def start(self, jid, filename, domain=None):
)
except IqTimeout:
raise TimeoutError('Could not send message in time')
logging.info('Upload success!')
logging.info('Upload successful!')
logging.info('Sending file to %s', jid)
return url