forked from sch/Slixfeed
Add functionality to display images from first link to provide an image (experimental)
This commit is contained in:
parent
8b313acd91
commit
46a0819229
4 changed files with 84 additions and 43 deletions
|
@ -16,6 +16,7 @@ import html2text
|
|||
from http.client import IncompleteRead
|
||||
from feedparser import parse
|
||||
import logging
|
||||
from lxml import html
|
||||
import pdfkit
|
||||
from readability import Document
|
||||
import slixfeed.config as config
|
||||
|
@ -28,7 +29,7 @@ from slixfeed.datetime import (
|
|||
import slixfeed.fetch as fetch
|
||||
import slixfeed.sqlite as sqlite
|
||||
from slixfeed.url import (
|
||||
# complete_url,
|
||||
complete_url,
|
||||
join_url,
|
||||
remove_tracking_parameters,
|
||||
replace_hostname,
|
||||
|
@ -663,12 +664,14 @@ async def scan(db_file, url):
|
|||
await sqlite.set_date(db_file, url)
|
||||
|
||||
|
||||
async def get_content(db_file, ix):
|
||||
url = sqlite.get_entry_url(db_file, ix)
|
||||
async def get_content(url):
|
||||
result = await fetch.download_feed(url)
|
||||
if result[0]:
|
||||
document = Document(result[0])
|
||||
return document.summary()
|
||||
content = document.summary()
|
||||
else:
|
||||
content = None
|
||||
return content
|
||||
# TODO Either adapt it to filename
|
||||
# or change it to something else
|
||||
#filename = document.title()
|
||||
|
@ -677,6 +680,17 @@ async def get_content(db_file, ix):
|
|||
# file.write(html_doc)
|
||||
|
||||
|
||||
def extract_first_image(url, content):
|
||||
tree = html.fromstring(content)
|
||||
images = tree.xpath('//img/@src')
|
||||
if len(images):
|
||||
image = images[0]
|
||||
image = str(image)
|
||||
image_url = complete_url(url, image)
|
||||
else:
|
||||
image_url = None
|
||||
return image_url
|
||||
|
||||
def generate_html(text, filename):
|
||||
with open(filename, 'w') as file:
|
||||
file.write(text)
|
||||
|
|
|
@ -227,6 +227,7 @@ async def send_update(self, jid, num=None):
|
|||
num = int(num)
|
||||
news_digest = []
|
||||
results = await get_unread_entries(db_file, num)
|
||||
image_url = None
|
||||
for result in results:
|
||||
ix = result[0]
|
||||
title_e = result[1]
|
||||
|
@ -240,18 +241,29 @@ async def send_update(self, jid, num=None):
|
|||
# print(result[0])
|
||||
# breakpoint()
|
||||
await mark_as_read(db_file, result[0])
|
||||
if not image_url:
|
||||
content = await action.get_content(url)
|
||||
image_url = action.extract_first_image(url, content)
|
||||
new = " ".join(news_digest)
|
||||
# breakpoint()
|
||||
if new:
|
||||
# print("if new")
|
||||
# breakpoint()
|
||||
# TODO Add while loop to assure delivery.
|
||||
# print(await current_time(), ">>> ACT send_message",jid)
|
||||
chat_type = await utility.jid_type(self, jid)
|
||||
# NOTE Do we need "if statement"? See NOTE at is_muc.
|
||||
if chat_type in ("chat", "groupchat"):
|
||||
# TODO Provide a choice (with or without images)
|
||||
xmpp.Slixfeed.send_message(
|
||||
self, mto=jid, mbody=new, mtype=chat_type)
|
||||
if image_url:
|
||||
# message = xmpp.Slixfeed.make_message(
|
||||
# self, mto=jid, mbody=new, mtype=chat_type)
|
||||
message = xmpp.Slixfeed.make_message(
|
||||
self, mto=jid, mbody=image_url, mtype=chat_type)
|
||||
message['oob']['url'] = image_url
|
||||
print(image_url)
|
||||
message.send()
|
||||
|
||||
# TODO Do not refresh task before
|
||||
# verifying that it was completed.
|
||||
await refresh_task(
|
||||
|
|
|
@ -394,8 +394,8 @@ async def message(self, message):
|
|||
response = "Missing keywords."
|
||||
send_reply_message(self, message, response)
|
||||
case _ if message_lowercase.startswith("export "):
|
||||
key = message_text[7:]
|
||||
if key in ("opml", "html", "md", "xbel"):
|
||||
ex = message_text[7:]
|
||||
if ex in ("opml", "html", "md", "xbel"):
|
||||
status_type = "dnd"
|
||||
status_message = (
|
||||
"📤️ Procesing request to export feeds into {} ..."
|
||||
|
@ -423,9 +423,9 @@ async def message(self, message):
|
|||
case "xbel":
|
||||
response = "Not yet implemented."
|
||||
url = await upload.start(self, jid, filename)
|
||||
response = (
|
||||
"Feeds exported successfully to {}.\n{}"
|
||||
).format(key, url)
|
||||
# response = (
|
||||
# "Feeds exported successfully to {}.\n{}"
|
||||
# ).format(key, url)
|
||||
# send_oob_reply_message(message, url, response)
|
||||
await send_oob_message(
|
||||
self, jid, url)
|
||||
|
@ -441,39 +441,54 @@ async def message(self, message):
|
|||
# TODO xHTML, HTMLZ, Markdown, MHTML, PDF, TXT
|
||||
case _ if (message_lowercase.startswith("get ")):
|
||||
message_text = message_text[4:]
|
||||
ix = message_text.split(" ")[0]
|
||||
ex = " ".join(message_text.split(" ")[1:])
|
||||
ex = ex if ex else 'pdf'
|
||||
ix_url = message_text.split(" ")[0]
|
||||
ext = " ".join(message_text.split(" ")[1:])
|
||||
ext = ext if ext else 'pdf'
|
||||
if ext in ("html", "md", "pdf"):
|
||||
status_type = "dnd"
|
||||
status_message = (
|
||||
"📃️ Procesing request to produce {} document ..."
|
||||
).format(ext)
|
||||
send_status_message(
|
||||
self, jid, status_type, status_message)
|
||||
db_file = get_pathname_to_database(jid)
|
||||
data_dir = get_default_data_directory()
|
||||
if ix:
|
||||
if ix_url:
|
||||
if not os.path.isdir(data_dir):
|
||||
os.mkdir(data_dir)
|
||||
if not os.path.isdir(data_dir + '/readability'):
|
||||
os.mkdir(data_dir + '/readability')
|
||||
filename = os.path.join(
|
||||
data_dir, "readability", "saved_article_" + timestamp() + "." + ex)
|
||||
data_dir, "readability", "saved_article_" + timestamp() + "." + ext)
|
||||
try:
|
||||
text = await action.get_content(db_file, ix)
|
||||
ix = int(ix_url)
|
||||
try:
|
||||
url = sqlite.get_entry_url(db_file, ix)
|
||||
except:
|
||||
response = "No entry Id with {}".format(ix)
|
||||
if text:
|
||||
match ex:
|
||||
except:
|
||||
url = ix_url
|
||||
content = await action.get_content(url)
|
||||
if content:
|
||||
match ext:
|
||||
case "html":
|
||||
action.generate_html(text, filename)
|
||||
action.generate_html(content, filename)
|
||||
case "md":
|
||||
action.generate_markdown(text, filename)
|
||||
action.generate_markdown(content, filename)
|
||||
case "pdf":
|
||||
action.generate_pdf(text, filename)
|
||||
action.generate_pdf(content, filename)
|
||||
url = await upload.start(
|
||||
self, jid, filename)
|
||||
print(url)
|
||||
await send_oob_message(
|
||||
self, jid, url)
|
||||
await task.start_tasks_xmpp(
|
||||
self, jid, ["status"])
|
||||
else:
|
||||
response = "Failed to fetch resource."
|
||||
else:
|
||||
response = "Missing entry Id."
|
||||
else:
|
||||
response = "Unsupported filetype."
|
||||
if response:
|
||||
send_reply_message(self, message, response)
|
||||
# case _ if (message_lowercase.startswith("http")) and(
|
||||
|
|
|
@ -30,6 +30,6 @@ async def start(self, jid, filename, domain=None):
|
|||
)
|
||||
except IqTimeout:
|
||||
raise TimeoutError('Could not send message in time')
|
||||
logging.info('Upload success!')
|
||||
logging.info('Upload successful!')
|
||||
logging.info('Sending file to %s', jid)
|
||||
return url
|
||||
|
|
Loading…
Reference in a new issue