homeylab · pchang388 · Sep 19, 2023 · Sep 19, 2023
diff --git a/bookstack_file_exporter/archiver/archiver.py b/bookstack_file_exporter/archiver/archiver.py
@@ -1,6 +1,6 @@
 from typing import List, Dict, Union
-from pathlib import Path
-import json
+from time import sleep
+from datetime import datetime
 
 from bookstack_file_exporter.exporter.node import Node
 from bookstack_file_exporter.archiver import util
@@ -19,6 +19,8 @@
     "tar": _TAR_GZ_SUFFIX
 }
 
+_DATE_STR_FORMAT = "%Y-%m-%d_%H-%M-%S"
+
 class Archiver:
     """
     Archiver pulls all the necessary files from upstream and then pushes them to the specified backup location(s)
@@ -32,41 +34,47 @@ class Archiver:
     Returns:
         Archiver instance with attributes that are accessible for use for file level archival and backup.
     """
-    def __init__(self, root_dir: str, add_meta: bool, base_page_url: str, headers: Dict[str, str]):
-        self.root_dir = root_dir
+    def __init__(self, base_dir: str, add_meta: bool, base_page_url: str, headers: Dict[str, str]):
+        self.base_dir = base_dir
         self.add_meta = add_meta
         self.base_page_url = base_page_url
         self.headers = headers
         # remote_system to function mapping
         self._remote_exports = {'minio': self._archive_minio, 's3': self._archive_s3}
-        # self._tar_file = ""
+        self._root_dir = self.generate_root_folder(self.base_dir)
         self._minio_token = ""
         self._minio_id = ""
 
     # create local tarball first
+    def archive(self, page_nodes: Dict[int, Node], export_formats: List[str]):
+        for _, page in page_nodes.items():
+            for format in export_formats:
+                # instead of sleep, implement back off retry in utils
+                sleep(0.5)
+                self._gather(page, format)
+        self._tar_dir()
+
     # convert to bytes to be agnostic to end destination (future use case?)
-    def gather(self, page_node: Node, export_format: str):
+    def _gather(self, page_node: Node, export_format: str):
         raw_data = self._get_data_format(page_node.id, export_format)
         self._gather_local(page_node.file_path, raw_data, export_format, page_node.meta)
-
-    def archive(self):
-        self._tar_dir()
-
-    # send to remote systems
-    def archive_remote(self, remote_dest: str):
-        self._remote_exports[remote_dest]()
 
     def _gather_local(self, page_path: str, data: bytes, export_format: str, meta_data: Union[bytes, None]):
-        file_path = self._get_combined_path(page_path)
+        file_path = f"{self._root_dir}/{page_path}"
         file_full_name = f"{file_path}{_FILE_EXTENSION_MAP[export_format]}"
         util.write_bytes(file_path=file_full_name, data=data)
         if self.add_meta:
             meta_file_name = f"{file_path}{_FILE_EXTENSION_MAP['meta']}"
             util.dump_json(file_name=meta_file_name, data=meta_data)
 
+    # send to remote systems
+    def archive_remote(self, remote_targets: List[str]):
+        if remote_targets:
+            for target in remote_targets:
+                self._remote_exports[target]()
+
     def _tar_dir(self):
-        # tar_path = f"{self.root_dir}{_FILE_EXTENSION_MAP['tar']}"
-        util.create_tar(self.root_dir, _FILE_EXTENSION_MAP['tar'])
+        util.create_tar(self._root_dir, _FILE_EXTENSION_MAP['tar'])
 
     def _archive_minio(self):
         pass
@@ -78,9 +86,10 @@ def _archive_s3(self):
     def _get_data_format(self, page_node_id: int, export_format: str) -> bytes:
         url = self._get_export_url(node_id=page_node_id, export_format=export_format)
         return util.get_byte_response(url=url, headers=self.headers)
-
-    def _get_combined_path(self, dir_name: str) -> str:
-        return f"{self.root_dir}/{dir_name}"
 
     def _get_export_url(self, node_id: int, export_format: str) -> str:
-        return f"{self.base_page_url}/{node_id}/{_EXPORT_API_PATH}/{export_format}"
+        return f"{self.base_page_url}/{node_id}/{_EXPORT_API_PATH}/{export_format}"
+
+    @staticmethod
+    def generate_root_folder(base_folder_name: str) -> str:
+        return base_folder_name + "_" + datetime.now().strftime(_DATE_STR_FORMAT)
diff --git a/bookstack_file_exporter/archiver/util.py b/bookstack_file_exporter/archiver/util.py
@@ -10,9 +10,6 @@
 
 log = logging.getLogger(__name__)
 
-def generate_root_folder(base_folder_name: str) -> str:
-    return base_folder_name + "_" + datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
-
 def get_byte_response(url: str, headers: Dict[str, str]) -> bytes:
     try:
         response = requests.get(url=url, headers=headers)

diff --git a/bookstack_file_exporter/exporter/exporter.py b/bookstack_file_exporter/exporter/exporter.py
@@ -0,0 +1,95 @@
+from typing import Dict, List, Union
+
+import bookstack_file_exporter.exporter.util as util
+from bookstack_file_exporter.exporter.node import Node
+
+
+# _API_SUFFIX_PATHS = {
+#     "shelves": "api/shelves",
+#     "books": "api/books",
+#     "chapters": "api/chapters",
+#     "pages": "api/pages"
+# }
+
+class NodeExporter():
+    """
+    NodeExporter class provides an interface to help create Bookstack resources/nodes (pages, books, etc) and their relationships.
+
+    Raises:
+
+    ValueError if data returned from bookstack api is empty or not in desired format.
+    """
+    def __init__(self, api_urls: Dict[str, str], headers: Dict[str,str]):
+        self.api_urls = api_urls
+        self.headers = headers
+
+    def get_shelf_nodes(self) -> Dict[int, Node]:
+        """
+        Function to get all shelf Node instances 
+        :returns: Dict[int, Node] for all shelf nodes
+        """
+        base_url = self.api_urls["shelves"]
+        all_parents: List[int] = util.get_all_ids(base_url, self.headers)
+        if not all_parents:
+            raise ValueError(f"No resources returned from Bookstack api url: {base_url}")
+        return self._get_parents(base_url, all_parents)
+
+    def _get_parents(self, base_url: str, parent_ids: List[int], path_prefix: Union[str, None] = None) -> Dict[int, Node]:
+        parent_nodes = {}
+        for parent_id in parent_ids:
+            parent_url = f"{base_url}/{parent_id}"
+            parent_data = util.get_json_response(url=parent_url, headers=self.headers)
+            parent_nodes[parent_id] = Node(parent_data, path_prefix=path_prefix)
+        return parent_nodes
+
+    def get_chapter_nodes(self, book_nodes: Dict[int, Node]):
+        # Chapters are treated a little differently
+        # They are under books like pages but have their own children
+        # i.e. not a terminal node
+        base_url = self.api_urls["chapters"]
+        all_chapters: List[int] = util.get_all_ids(base_url, self.headers)
+        if not all_chapters:
+            raise ValueError(f"No resources returned from Bookstack api url: {base_url}")
+        return self._get_chapters(base_url, all_chapters, book_nodes)
+
+    def _get_chapters(self, base_url: str, all_chapters: List[int], book_nodes: Dict[int, Node]):
+        chapter_nodes = {}
+        for chapter_id in all_chapters:
+            chapter_url = f"{base_url}/{chapter_id}"
+            chapter_data = util.get_json_response(url=chapter_url, headers=self.headers)
+            book_id = chapter_data['book_id']
+            chapter_nodes[chapter_id] = Node(chapter_data, book_nodes[book_id])
+        return chapter_nodes
+
+    def get_child_nodes(self, resource_type: str, parent_nodes: Dict[int, Node], filter_empty: bool = True):
+        base_url = self.api_urls[resource_type]
+        return self._get_children(base_url, parent_nodes, filter_empty)
+
+    def _get_children(self, base_url: str, parent_nodes: Dict[int, Node], filter_empty: bool):
+        child_nodes = {}
+        for _, parent in parent_nodes.items():
+            if parent.children:
+                for child in parent.children:
+                    child_id = child['id']
+                    child_url = f"{base_url}/{child_id}"
+                    child_data = util.get_json_response(url=child_url, headers=self.headers)
+                    child_node = Node(child_data, parent)
+                    if filter_empty:
+                        if not child_node.empty:
+                            child_nodes[child_id] = child_node
+                    else:
+                        child_nodes[child_id] = child_node
+        return child_nodes
+
+    def get_unassigned_books(self, existing_resources: Dict[int, Node], path_prefix: str) -> Dict[int, Node]:
+        base_url = self.api_urls["books"]
+        all_resources: List[int] = util.get_all_ids(url=base_url, headers=self.headers)
+        unassigned = []
+        # get all existing ones and compare against current known resources
+        for resource_id in all_resources:
+            if resource_id not in existing_resources:
+                unassigned.append(resource_id)
+        if not unassigned:
+            raise ValueError(f"No unassigned resources found for type: {base_url}")
+        # books with no shelf treated like a parent resource
+        return self._get_parents(base_url, unassigned, path_prefix)
diff --git a/bookstack_file_exporter/exporter/node.py b/bookstack_file_exporter/exporter/node.py
@@ -9,7 +9,7 @@
 
 class Node():
     """
-    Node class provides an interface to create and reference bookstack child/parent relationships for resources like pages, books, chapters, and shelves.
+    Node class provides an interface to create bookstack child/parent relationships for resources like pages, books, chapters, and shelves.
 
     Args:
         metadata: Dict[str, Union[str, int]] (required) = The metadata of the resource from bookstack api
@@ -23,7 +23,7 @@ class Node():
     """
     def __init__(self, meta: Dict[str, Union[str, int]], parent: Union['Node', None] = None, path_prefix: Union[str, None] = None):
         self.meta = meta
-        self.__parent = parent
+        self._parent = parent
         self._path_prefix = path_prefix
         self.name: str = ""
         self.id: int = 0
@@ -39,9 +39,8 @@ def _initialize(self):
         self.id = self.meta['id']
         self._display_name = self.meta['name']
         # get base file path from parent if it exists
-        if self.__parent:
-            self._file_path = f"{self.__parent.file_path}/{self.name}"
-            # self._file_path = self.__parent.file_path + '/' + self.name
+        if self._parent:
+            self._file_path = f"{self._parent.file_path}/{self.name}"
         # normalize path prefix if it does not exist
         if not self._path_prefix:
             self._path_prefix = ""

diff --git a/bookstack_file_exporter/exporter/util.py b/bookstack_file_exporter/exporter/util.py
@@ -17,49 +17,9 @@ def get_json_response(url: str, headers: Dict[str, str], verify: bool = True, ti
 
 def get_all_ids(url: str, headers: Dict[str, str]) -> List[int]:
     ids_api_meta = get_json_response(url=url, headers=headers)
-    all_ids = [item['id'] for item in ids_api_meta['data']]
-    return all_ids
-
-def get_parent_meta(url: str, headers: Dict[str, str], parent_ids: List[int],
-                     path_prefix: Union[str, None] = None) -> Dict[int, Node]:
-    parent_nodes = {}
-    for parent_id in parent_ids:
-        parent_url = f"{url}/{parent_id}"
-        # parent_url = url + "/" + str(parent_id)
-        parent_data = get_json_response(url=parent_url, headers=headers)
-        parent_nodes[parent_id] = Node(parent_data, path_prefix=path_prefix)
-    return parent_nodes
-
-def get_chapter_meta(url: str, headers: Dict[str, str], chapters: List[int],
-                     books:Dict[int, Node], path_prefix: Union[str, None] = None) -> Dict[int, Node]:
-    chapter_nodes = {}
-    for chapter_id in chapters:
-        chapter_url = f"{url}/{chapter_id}"
-        # chapter_url = url + "/" + str(chapter_id)
-        chapter_data = get_json_response(url=chapter_url, headers=headers)
-        book_id = chapter_data['book_id']
-        chapter_nodes[chapter_id] = Node(chapter_data, books[book_id], path_prefix=path_prefix)
-    return chapter_nodes
-
-def get_child_meta(url: str, headers: Dict[str, str], parent_nodes: Dict[int, Node],
-                    filter_empty: bool = False, path_prefix: Union[str, None] = None) -> Dict[int, Node]:
-    child_nodes = {}
-    for _, parent in parent_nodes.items():
-        if parent.children:
-            for child in parent.children:
-                child_id = child['id']
-                child_url = f"{url}/{child_id}"
-                # child_url = url + "/" + str(child_id)
-                child_data = get_json_response(url=child_url, headers=headers)
-                child_node = Node(child_data, parent, path_prefix=path_prefix)
-                if filter_empty:
-                    if not child_node.empty:
-                        child_nodes[child_id] = child_node
-                else:
-                    child_nodes[child_id] = child_node
-    return child_nodes
-
-def get_page_export(url: str, headers: Dict[str, str]):
-    pass
+    if ids_api_meta:
+        return [item['id'] for item in ids_api_meta['data']]
+    else:
+        return []