Handle readability exception

This commit is contained in:
Schimon Jehudah 2024-01-10 21:20:02 +00:00
parent 0ca37dfdee
commit b1cb9f7777

View file

@ -24,7 +24,6 @@ from http.client import IncompleteRead
from feedparser import parse
import logging
from lxml import html
from readability import Document
import slixfeed.config as config
import slixfeed.crawl as crawl
from slixfeed.datetime import (
@ -60,6 +59,13 @@ except:
"Package pdfkit was not found.\n"
"PDF support is disabled.")
try:
from readability import Document
except:
logging.info(
"Package readability was not found.\n"
"Arc90 Lab algorithm is disabled.")
def log_to_markdown(timestamp, filename, jid, message):
"""
@ -706,9 +712,14 @@ async def get_content(url):
data = result[0]
code = result[1]
if data:
try:
document = Document(result[0])
content = document.summary()
info = [code, content]
except:
logging.warning(
"Install package readability.")
info = result
else:
info = [code, None]
return info