forked from sch/Slixfeed
Handle readability exception
This commit is contained in:
parent
0ca37dfdee
commit
b1cb9f7777
1 changed files with 15 additions and 4 deletions
|
@ -24,7 +24,6 @@ from http.client import IncompleteRead
|
||||||
from feedparser import parse
|
from feedparser import parse
|
||||||
import logging
|
import logging
|
||||||
from lxml import html
|
from lxml import html
|
||||||
from readability import Document
|
|
||||||
import slixfeed.config as config
|
import slixfeed.config as config
|
||||||
import slixfeed.crawl as crawl
|
import slixfeed.crawl as crawl
|
||||||
from slixfeed.datetime import (
|
from slixfeed.datetime import (
|
||||||
|
@ -60,6 +59,13 @@ except:
|
||||||
"Package pdfkit was not found.\n"
|
"Package pdfkit was not found.\n"
|
||||||
"PDF support is disabled.")
|
"PDF support is disabled.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
from readability import Document
|
||||||
|
except:
|
||||||
|
logging.info(
|
||||||
|
"Package readability was not found.\n"
|
||||||
|
"Arc90 Lab algorithm is disabled.")
|
||||||
|
|
||||||
|
|
||||||
def log_to_markdown(timestamp, filename, jid, message):
|
def log_to_markdown(timestamp, filename, jid, message):
|
||||||
"""
|
"""
|
||||||
|
@ -706,9 +712,14 @@ async def get_content(url):
|
||||||
data = result[0]
|
data = result[0]
|
||||||
code = result[1]
|
code = result[1]
|
||||||
if data:
|
if data:
|
||||||
|
try:
|
||||||
document = Document(result[0])
|
document = Document(result[0])
|
||||||
content = document.summary()
|
content = document.summary()
|
||||||
info = [code, content]
|
info = [code, content]
|
||||||
|
except:
|
||||||
|
logging.warning(
|
||||||
|
"Install package readability.")
|
||||||
|
info = result
|
||||||
else:
|
else:
|
||||||
info = [code, None]
|
info = [code, None]
|
||||||
return info
|
return info
|
||||||
|
|
Loading…
Reference in a new issue