Support import of bookmarks from HTML files.

This commit is contained in:
Schimon Jehudah, Adv. 2025-01-07 17:38:44 +02:00
parent d0655fd16a
commit a3d50fe8d8
4 changed files with 63 additions and 5 deletions

View file

@ -1366,8 +1366,19 @@ class HttpInstance:
content = file.file.read().decode()
# TODO Add match/case for filetype.
entries = UtilitiesData.load_data_toml(content)
filename = os.path.splitext(file.filename)
file_extension = filename[len(filename)-1]
match file_extension:
case '.html':
entries = UtilitiesData.load_data_netscape(content)
case '.toml':
entries = UtilitiesData.load_data_toml(content)
case _:
message = 'Blasta system message » Error: Unknown file type.'
description = 'Import error'
path = 'error'
return result_post(request, jabber_id, description, message, path)
# entries_node = entries[node]
#breakpoint()

View file

@ -5,8 +5,11 @@ from blasta.database.sqlite import DatabaseSQLite
from blasta.utilities.cryptography import UtilitiesCryptography
from blasta.utilities.syndication import UtilitiesSyndication
from blasta.xmpp.pubsub import XmppPubsub
from datetime import datetime
import os
import re
from slixmpp.stanza.iq import Iq
import time
import tomli_w
try:
@ -130,6 +133,50 @@ class UtilitiesData:
def load_data_toml(data: dict) -> dict:
return tomllib.loads(data)
def load_data_netscape(html: str) -> dict:
bookmarks = []
current_summary = ""
lines = html.splitlines()
for line in lines:
line = line.strip()
# Check for <DT> tag
if line.startswith("<DT>"):
# Look for <A> tag within <DT>
a_match = re.search(r'<A HREF="(.*?)" ADD_DATE="(.*?)" LAST_MODIFIED="(.*?)" PRIVATE="(.*?)" TAGS="(.*?)">(.*?)</A>', line)
if a_match:
link, published, updated, private, tags, title = a_match.groups()
# Convert timestamps from seconds since epoch to ISO format
published_date = datetime.fromtimestamp(int(published)).isoformat()
updated_date = datetime.fromtimestamp(int(updated)).isoformat()
# Create bookmark dictionary
bookmark = {
'title': title,
'link': link,
'summary': current_summary,
'published': published_date,
'updated': updated_date,
'tags': [tag.strip() for tag in tags.split(',')] if tags else []
}
# Append bookmark to the list
bookmarks.append(bookmark)
# Reset summary for the next bookmark
current_summary = ""
# Check for <DD> tag
elif line.startswith("<DD>"):
# Extract summary from <DD>
summary_match = re.search(r'<DD>(.*?)</DD>|<DD>(.*?)(?=s*<DT>|$)', line)
if summary_match:
bookmarks[len(bookmarks)-1]['summary'] = summary_match.group(2).strip()
return {'entries': bookmarks}
def open_file_toml(filename: str) -> dict:
with open(filename, mode="rb") as fn:
data = tomllib.load(fn)

View file

@ -1,2 +1,2 @@
__version__ = '0.1'
__version_info__ = (0, 1)
__version__ = '0.2'
__version_info__ = (0, 2)

View file

@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "Blasta"
version = "1.0"
version = "0.2"
description = "A collaborative annotation management system for XMPP"
authors = [{name = "Schimon Zachary", email = "sch@fedora.email"}]
license = {text = "AGPL-3.0"}