2023-10-24 16:43:14 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
2023-11-22 12:47:34 +01:00
|
|
|
"""
|
|
|
|
|
|
|
|
FIXME
|
|
|
|
|
|
|
|
1) feed_mode_scan doesn't find feed for https://www.blender.org/
|
|
|
|
even though it should be according to the pathnames dictionary.
|
|
|
|
|
2023-11-23 17:55:36 +01:00
|
|
|
TODO
|
|
|
|
|
|
|
|
1) Support Gemini and Gopher.
|
|
|
|
|
2023-12-26 12:22:45 +01:00
|
|
|
2) Check also for HTML, not only feed.bozo.
|
|
|
|
|
2024-01-02 12:42:41 +01:00
|
|
|
3) Add "if utility.is_feed(url, feed)" to view_entry and view_feed
|
2023-12-27 23:48:31 +01:00
|
|
|
|
|
|
|
4) Refactor view_entry and view_feed - Why "if" twice?
|
|
|
|
|
2024-01-02 19:11:36 +01:00
|
|
|
5) Replace sqlite.remove_nonexistent_entries by sqlite.check_entry_exist
|
|
|
|
Same check, just reverse.
|
|
|
|
|
2023-11-22 12:47:34 +01:00
|
|
|
"""
|
|
|
|
|
2023-12-05 09:18:29 +01:00
|
|
|
from aiohttp import ClientError, ClientSession, ClientTimeout
|
2023-12-04 15:41:02 +01:00
|
|
|
from asyncio import TimeoutError
|
2024-01-06 23:03:08 +01:00
|
|
|
# from asyncio.exceptions import IncompleteReadError
|
|
|
|
# from bs4 import BeautifulSoup
|
|
|
|
# from http.client import IncompleteRead
|
|
|
|
# from lxml import html
|
2024-01-02 12:42:41 +01:00
|
|
|
import slixfeed.config as config
|
2023-10-24 16:43:14 +02:00
|
|
|
# from xml.etree.ElementTree import ElementTree, ParseError
|
2023-11-13 14:45:10 +01:00
|
|
|
|
2023-11-26 06:48:09 +01:00
|
|
|
|
2024-01-04 02:16:24 +01:00
|
|
|
# async def dat():
|
2023-11-26 06:48:09 +01:00
|
|
|
|
2024-01-04 02:16:24 +01:00
|
|
|
# async def ftp():
|
|
|
|
|
|
|
|
# async def gemini():
|
2023-10-24 16:43:14 +02:00
|
|
|
|
2024-01-04 02:16:24 +01:00
|
|
|
# async def gopher():
|
2023-10-24 16:43:14 +02:00
|
|
|
|
2024-01-04 02:16:24 +01:00
|
|
|
# async def http():
|
2023-11-26 06:48:09 +01:00
|
|
|
|
2024-01-04 02:16:24 +01:00
|
|
|
# async def ipfs():
|
2023-11-26 06:48:09 +01:00
|
|
|
|
2023-10-24 16:43:14 +02:00
|
|
|
async def download_feed(url):
|
|
|
|
"""
|
|
|
|
Download content of given URL.
|
2023-11-02 06:17:04 +01:00
|
|
|
|
2023-11-13 14:45:10 +01:00
|
|
|
Parameters
|
|
|
|
----------
|
2024-01-04 02:16:24 +01:00
|
|
|
url : list
|
2023-11-13 14:45:10 +01:00
|
|
|
URL.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
msg: list or str
|
|
|
|
Document or error message.
|
2023-10-24 16:43:14 +02:00
|
|
|
"""
|
2024-01-04 14:58:06 +01:00
|
|
|
user_agent = (
|
|
|
|
config.get_value(
|
|
|
|
"settings", "Network", "user-agent")
|
|
|
|
) or 'Slixfeed/0.1'
|
2024-01-04 02:16:24 +01:00
|
|
|
headers = {'User-Agent': user_agent}
|
2024-01-04 13:38:22 +01:00
|
|
|
proxy = (config.get_value(
|
|
|
|
"settings", "Network", "http_proxy")) or ''
|
2023-12-04 15:41:02 +01:00
|
|
|
timeout = ClientTimeout(total=10)
|
2023-12-18 16:29:32 +01:00
|
|
|
async with ClientSession(headers=headers) as session:
|
2023-12-04 15:41:02 +01:00
|
|
|
# async with ClientSession(trust_env=True) as session:
|
2023-10-24 16:43:14 +02:00
|
|
|
try:
|
2024-01-04 02:16:24 +01:00
|
|
|
async with session.get(url, proxy=proxy,
|
|
|
|
# proxy_auth=(proxy_username, proxy_password),
|
|
|
|
timeout=timeout
|
|
|
|
) as response:
|
2023-10-24 16:43:14 +02:00
|
|
|
status = response.status
|
|
|
|
if response.status == 200:
|
|
|
|
try:
|
|
|
|
doc = await response.text()
|
|
|
|
# print (response.content_type)
|
2024-01-04 02:16:24 +01:00
|
|
|
msg = [doc, status]
|
2023-10-24 16:43:14 +02:00
|
|
|
except:
|
2023-11-13 14:45:10 +01:00
|
|
|
# msg = [
|
|
|
|
# False,
|
|
|
|
# ("The content of this document "
|
|
|
|
# "doesn't appear to be textual."
|
|
|
|
# )
|
|
|
|
# ]
|
|
|
|
msg = [
|
2024-01-04 02:16:24 +01:00
|
|
|
False, "Document is too large or is not textual."
|
2023-11-13 14:45:10 +01:00
|
|
|
]
|
2023-10-24 16:43:14 +02:00
|
|
|
else:
|
2023-11-13 14:45:10 +01:00
|
|
|
msg = [
|
2024-01-04 02:16:24 +01:00
|
|
|
False, "HTTP Error: " + str(status)
|
2023-11-13 14:45:10 +01:00
|
|
|
]
|
2023-12-04 15:41:02 +01:00
|
|
|
except ClientError as e:
|
2023-11-13 14:45:10 +01:00
|
|
|
# print('Error', str(e))
|
|
|
|
msg = [
|
2024-01-04 02:16:24 +01:00
|
|
|
False, "Error: " + str(e)
|
2023-11-13 14:45:10 +01:00
|
|
|
]
|
2023-12-04 15:41:02 +01:00
|
|
|
except TimeoutError as e:
|
2023-10-24 16:43:14 +02:00
|
|
|
# print('Timeout:', str(e))
|
2023-11-13 14:45:10 +01:00
|
|
|
msg = [
|
2024-01-04 02:16:24 +01:00
|
|
|
False, "Timeout: " + str(e)
|
2023-11-13 14:45:10 +01:00
|
|
|
]
|
|
|
|
return msg
|