From a3d50fe8d864dcc4c10804b2d7c0bf2a7750e859 Mon Sep 17 00:00:00 2001 From: "Schimon Jehudah, Adv." Date: Tue, 7 Jan 2025 17:38:44 +0200 Subject: [PATCH] Support import of bookmarks from HTML files. --- blasta/http/instance.py | 15 +++++++++++-- blasta/utilities/data.py | 47 ++++++++++++++++++++++++++++++++++++++++ blasta/version.py | 4 ++-- pyproject.toml | 2 +- 4 files changed, 63 insertions(+), 5 deletions(-) diff --git a/blasta/http/instance.py b/blasta/http/instance.py index 97cf56f..d6c45cf 100644 --- a/blasta/http/instance.py +++ b/blasta/http/instance.py @@ -1366,8 +1366,19 @@ class HttpInstance: content = file.file.read().decode() # TODO Add match/case for filetype. - - entries = UtilitiesData.load_data_toml(content) + filename = os.path.splitext(file.filename) + file_extension = filename[len(filename)-1] + match file_extension: + case '.html': + entries = UtilitiesData.load_data_netscape(content) + case '.toml': + entries = UtilitiesData.load_data_toml(content) + case _: + message = 'Blasta system message ยป Error: Unknown file type.' + description = 'Import error' + path = 'error' + return result_post(request, jabber_id, description, message, path) + # entries_node = entries[node] #breakpoint() diff --git a/blasta/utilities/data.py b/blasta/utilities/data.py index 773590c..5315b32 100644 --- a/blasta/utilities/data.py +++ b/blasta/utilities/data.py @@ -5,8 +5,11 @@ from blasta.database.sqlite import DatabaseSQLite from blasta.utilities.cryptography import UtilitiesCryptography from blasta.utilities.syndication import UtilitiesSyndication from blasta.xmpp.pubsub import XmppPubsub +from datetime import datetime import os +import re from slixmpp.stanza.iq import Iq +import time import tomli_w try: @@ -130,6 +133,50 @@ class UtilitiesData: def load_data_toml(data: dict) -> dict: return tomllib.loads(data) + def load_data_netscape(html: str) -> dict: + bookmarks = [] + current_summary = "" + + lines = html.splitlines() + for line in lines: + line = line.strip() + + # Check for
tag + if line.startswith("
"): + # Look for tag within
+ a_match = re.search(r'(.*?)', line) + if a_match: + link, published, updated, private, tags, title = a_match.groups() + + # Convert timestamps from seconds since epoch to ISO format + published_date = datetime.fromtimestamp(int(published)).isoformat() + updated_date = datetime.fromtimestamp(int(updated)).isoformat() + + # Create bookmark dictionary + bookmark = { + 'title': title, + 'link': link, + 'summary': current_summary, + 'published': published_date, + 'updated': updated_date, + 'tags': [tag.strip() for tag in tags.split(',')] if tags else [] + } + + # Append bookmark to the list + bookmarks.append(bookmark) + + # Reset summary for the next bookmark + current_summary = "" + + # Check for
tag + elif line.startswith("
"): + # Extract summary from
+ summary_match = re.search(r'
(.*?)
|
(.*?)(?=s*
|$)', line) + if summary_match: + bookmarks[len(bookmarks)-1]['summary'] = summary_match.group(2).strip() + + return {'entries': bookmarks} + def open_file_toml(filename: str) -> dict: with open(filename, mode="rb") as fn: data = tomllib.load(fn) diff --git a/blasta/version.py b/blasta/version.py index 0e742f9..12019d7 100644 --- a/blasta/version.py +++ b/blasta/version.py @@ -1,2 +1,2 @@ -__version__ = '0.1' -__version_info__ = (0, 1) +__version__ = '0.2' +__version_info__ = (0, 2) diff --git a/pyproject.toml b/pyproject.toml index 92d0e2d..1eebefa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "Blasta" -version = "1.0" +version = "0.2" description = "A collaborative annotation management system for XMPP" authors = [{name = "Schimon Zachary", email = "sch@fedora.email"}] license = {text = "AGPL-3.0"}