Support import of bookmarks from HTML files.

2025-01-07 17:38:44 +02:00 · 2025-01-07 17:38:44 +02:00 · a3d50fe8d8
commit a3d50fe8d8
parent d0655fd16a
4 changed files with 63 additions and 5 deletions
--- a/blasta/http/instance.py
+++ b/blasta/http/instance.py
@ -1366,8 +1366,19 @@ class HttpInstance:
                    content = file.file.read().decode()

                    # TODO Add match/case for filetype.
-
-                    entries = UtilitiesData.load_data_toml(content)
+                    filename = os.path.splitext(file.filename)
+                    file_extension = filename[len(filename)-1]
+                    match file_extension:
+                        case '.html':
+                            entries = UtilitiesData.load_data_netscape(content)
+                        case '.toml':
+                            entries = UtilitiesData.load_data_toml(content)
+                        case _:
+                            message = 'Blasta system message » Error: Unknown file type.'
+                            description = 'Import error'
+                            path = 'error'
+                            return result_post(request, jabber_id, description, message, path)
+                            
                    # entries_node = entries[node]

                    #breakpoint()
--- a/blasta/utilities/data.py
+++ b/blasta/utilities/data.py
@ -5,8 +5,11 @@ from blasta.database.sqlite import DatabaseSQLite
 from blasta.utilities.cryptography import UtilitiesCryptography
 from blasta.utilities.syndication import UtilitiesSyndication
 from blasta.xmpp.pubsub import XmppPubsub
+from datetime import datetime
 import os
+import re
 from slixmpp.stanza.iq import Iq
+import time
 import tomli_w

 try:
@ -130,6 +133,50 @@ class UtilitiesData:
    def load_data_toml(data: dict) -> dict:
        return tomllib.loads(data)

+    def load_data_netscape(html: str) -> dict:
+        bookmarks = []
+        current_summary = ""
+
+        lines = html.splitlines()
+        for line in lines:
+            line = line.strip()
+
+            # Check for <DT> tag
+            if line.startswith("<DT>"):
+                # Look for <A> tag within <DT>
+                a_match = re.search(r'<A HREF="(.*?)" ADD_DATE="(.*?)" LAST_MODIFIED="(.*?)" PRIVATE="(.*?)" TAGS="(.*?)">(.*?)</A>', line)
+                if a_match:
+                    link, published, updated, private, tags, title = a_match.groups()
+
+                    # Convert timestamps from seconds since epoch to ISO format
+                    published_date = datetime.fromtimestamp(int(published)).isoformat()
+                    updated_date = datetime.fromtimestamp(int(updated)).isoformat()
+
+                    # Create bookmark dictionary
+                    bookmark = {
+                        'title': title,
+                        'link': link,
+                        'summary': current_summary,
+                        'published': published_date,
+                        'updated': updated_date,
+                        'tags': [tag.strip() for tag in tags.split(',')] if tags else []
+                    }
+
+                    # Append bookmark to the list
+                    bookmarks.append(bookmark)
+
+                    # Reset summary for the next bookmark
+                    current_summary = ""
+
+            # Check for <DD> tag
+            elif line.startswith("<DD>"):
+                # Extract summary from <DD>
+                summary_match = re.search(r'<DD>(.*?)</DD>|<DD>(.*?)(?=s*<DT>|$)', line)
+                if summary_match:
+                    bookmarks[len(bookmarks)-1]['summary'] = summary_match.group(2).strip()
+
+        return {'entries': bookmarks}
+
    def open_file_toml(filename: str) -> dict:
        with open(filename, mode="rb") as fn:
            data = tomllib.load(fn)
--- a/blasta/version.py
+++ b/blasta/version.py
@ -1,2 +1,2 @@
-__version__ = '0.1'
-__version_info__ = (0, 1)
+__version__ = '0.2'
+__version_info__ = (0, 2)
--- a/pyproject.toml
+++ b/pyproject.toml
@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "Blasta"
-version = "1.0"
+version = "0.2"
 description = "A collaborative annotation management system for XMPP"
 authors = [{name = "Schimon Zachary", email = "sch@fedora.email"}]
 license = {text = "AGPL-3.0"}