Handle readability exception

This commit is contained in:
Schimon Jehudah 2024-01-10 21:20:02 +00:00
parent 0ca37dfdee
commit b1cb9f7777

View file

@ -24,7 +24,6 @@ from http.client import IncompleteRead
from feedparser import parse from feedparser import parse
import logging import logging
from lxml import html from lxml import html
from readability import Document
import slixfeed.config as config import slixfeed.config as config
import slixfeed.crawl as crawl import slixfeed.crawl as crawl
from slixfeed.datetime import ( from slixfeed.datetime import (
@ -60,6 +59,13 @@ except:
"Package pdfkit was not found.\n" "Package pdfkit was not found.\n"
"PDF support is disabled.") "PDF support is disabled.")
try:
from readability import Document
except:
logging.info(
"Package readability was not found.\n"
"Arc90 Lab algorithm is disabled.")
def log_to_markdown(timestamp, filename, jid, message): def log_to_markdown(timestamp, filename, jid, message):
""" """
@ -706,9 +712,14 @@ async def get_content(url):
data = result[0] data = result[0]
code = result[1] code = result[1]
if data: if data:
document = Document(result[0]) try:
content = document.summary() document = Document(result[0])
info = [code, content] content = document.summary()
info = [code, content]
except:
logging.warning(
"Install package readability.")
info = result
else: else:
info = [code, None] info = [code, None]
return info return info