Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

initial code refactor #4

Merged
merged 1 commit into from
Sep 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 29 additions & 20 deletions bookstack_file_exporter/archiver/archiver.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import List, Dict, Union
from pathlib import Path
import json
from time import sleep
from datetime import datetime

from bookstack_file_exporter.exporter.node import Node
from bookstack_file_exporter.archiver import util
Expand All @@ -19,6 +19,8 @@
"tar": _TAR_GZ_SUFFIX
}

_DATE_STR_FORMAT = "%Y-%m-%d_%H-%M-%S"

class Archiver:
"""
Archiver pulls all the necessary files from upstream and then pushes them to the specified backup location(s)
Expand All @@ -32,41 +34,47 @@ class Archiver:
Returns:
Archiver instance with attributes that are accessible for use for file level archival and backup.
"""
def __init__(self, root_dir: str, add_meta: bool, base_page_url: str, headers: Dict[str, str]):
self.root_dir = root_dir
def __init__(self, base_dir: str, add_meta: bool, base_page_url: str, headers: Dict[str, str]):
self.base_dir = base_dir
self.add_meta = add_meta
self.base_page_url = base_page_url
self.headers = headers
# remote_system to function mapping
self._remote_exports = {'minio': self._archive_minio, 's3': self._archive_s3}
# self._tar_file = ""
self._root_dir = self.generate_root_folder(self.base_dir)
self._minio_token = ""
self._minio_id = ""

# create local tarball first
def archive(self, page_nodes: Dict[int, Node], export_formats: List[str]):
for _, page in page_nodes.items():
for format in export_formats:
# instead of sleep, implement back off retry in utils
sleep(0.5)
self._gather(page, format)
self._tar_dir()

# convert to bytes to be agnostic to end destination (future use case?)
def gather(self, page_node: Node, export_format: str):
def _gather(self, page_node: Node, export_format: str):
raw_data = self._get_data_format(page_node.id, export_format)
self._gather_local(page_node.file_path, raw_data, export_format, page_node.meta)

def archive(self):
self._tar_dir()

# send to remote systems
def archive_remote(self, remote_dest: str):
self._remote_exports[remote_dest]()

def _gather_local(self, page_path: str, data: bytes, export_format: str, meta_data: Union[bytes, None]):
file_path = self._get_combined_path(page_path)
file_path = f"{self._root_dir}/{page_path}"
file_full_name = f"{file_path}{_FILE_EXTENSION_MAP[export_format]}"
util.write_bytes(file_path=file_full_name, data=data)
if self.add_meta:
meta_file_name = f"{file_path}{_FILE_EXTENSION_MAP['meta']}"
util.dump_json(file_name=meta_file_name, data=meta_data)

# send to remote systems
def archive_remote(self, remote_targets: List[str]):
if remote_targets:
for target in remote_targets:
self._remote_exports[target]()

def _tar_dir(self):
# tar_path = f"{self.root_dir}{_FILE_EXTENSION_MAP['tar']}"
util.create_tar(self.root_dir, _FILE_EXTENSION_MAP['tar'])
util.create_tar(self._root_dir, _FILE_EXTENSION_MAP['tar'])

def _archive_minio(self):
pass
Expand All @@ -78,9 +86,10 @@ def _archive_s3(self):
def _get_data_format(self, page_node_id: int, export_format: str) -> bytes:
url = self._get_export_url(node_id=page_node_id, export_format=export_format)
return util.get_byte_response(url=url, headers=self.headers)

def _get_combined_path(self, dir_name: str) -> str:
return f"{self.root_dir}/{dir_name}"

def _get_export_url(self, node_id: int, export_format: str) -> str:
return f"{self.base_page_url}/{node_id}/{_EXPORT_API_PATH}/{export_format}"
return f"{self.base_page_url}/{node_id}/{_EXPORT_API_PATH}/{export_format}"

@staticmethod
def generate_root_folder(base_folder_name: str) -> str:
return base_folder_name + "_" + datetime.now().strftime(_DATE_STR_FORMAT)
3 changes: 0 additions & 3 deletions bookstack_file_exporter/archiver/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@

log = logging.getLogger(__name__)

def generate_root_folder(base_folder_name: str) -> str:
return base_folder_name + "_" + datetime.now().strftime('%Y-%m-%d_%H-%M-%S')

def get_byte_response(url: str, headers: Dict[str, str]) -> bytes:
try:
response = requests.get(url=url, headers=headers)
Expand Down
95 changes: 95 additions & 0 deletions bookstack_file_exporter/exporter/exporter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
from typing import Dict, List, Union

import bookstack_file_exporter.exporter.util as util
from bookstack_file_exporter.exporter.node import Node


# _API_SUFFIX_PATHS = {
# "shelves": "api/shelves",
# "books": "api/books",
# "chapters": "api/chapters",
# "pages": "api/pages"
# }

class NodeExporter():
"""
NodeExporter class provides an interface to help create Bookstack resources/nodes (pages, books, etc) and their relationships.

Raises:

ValueError if data returned from bookstack api is empty or not in desired format.
"""
def __init__(self, api_urls: Dict[str, str], headers: Dict[str,str]):
self.api_urls = api_urls
self.headers = headers

def get_shelf_nodes(self) -> Dict[int, Node]:
"""
Function to get all shelf Node instances
:returns: Dict[int, Node] for all shelf nodes
"""
base_url = self.api_urls["shelves"]
all_parents: List[int] = util.get_all_ids(base_url, self.headers)
if not all_parents:
raise ValueError(f"No resources returned from Bookstack api url: {base_url}")
return self._get_parents(base_url, all_parents)

def _get_parents(self, base_url: str, parent_ids: List[int], path_prefix: Union[str, None] = None) -> Dict[int, Node]:
parent_nodes = {}
for parent_id in parent_ids:
parent_url = f"{base_url}/{parent_id}"
parent_data = util.get_json_response(url=parent_url, headers=self.headers)
parent_nodes[parent_id] = Node(parent_data, path_prefix=path_prefix)
return parent_nodes

def get_chapter_nodes(self, book_nodes: Dict[int, Node]):
# Chapters are treated a little differently
# They are under books like pages but have their own children
# i.e. not a terminal node
base_url = self.api_urls["chapters"]
all_chapters: List[int] = util.get_all_ids(base_url, self.headers)
if not all_chapters:
raise ValueError(f"No resources returned from Bookstack api url: {base_url}")
return self._get_chapters(base_url, all_chapters, book_nodes)

def _get_chapters(self, base_url: str, all_chapters: List[int], book_nodes: Dict[int, Node]):
chapter_nodes = {}
for chapter_id in all_chapters:
chapter_url = f"{base_url}/{chapter_id}"
chapter_data = util.get_json_response(url=chapter_url, headers=self.headers)
book_id = chapter_data['book_id']
chapter_nodes[chapter_id] = Node(chapter_data, book_nodes[book_id])
return chapter_nodes

def get_child_nodes(self, resource_type: str, parent_nodes: Dict[int, Node], filter_empty: bool = True):
base_url = self.api_urls[resource_type]
return self._get_children(base_url, parent_nodes, filter_empty)

def _get_children(self, base_url: str, parent_nodes: Dict[int, Node], filter_empty: bool):
child_nodes = {}
for _, parent in parent_nodes.items():
if parent.children:
for child in parent.children:
child_id = child['id']
child_url = f"{base_url}/{child_id}"
child_data = util.get_json_response(url=child_url, headers=self.headers)
child_node = Node(child_data, parent)
if filter_empty:
if not child_node.empty:
child_nodes[child_id] = child_node
else:
child_nodes[child_id] = child_node
return child_nodes

def get_unassigned_books(self, existing_resources: Dict[int, Node], path_prefix: str) -> Dict[int, Node]:
base_url = self.api_urls["books"]
all_resources: List[int] = util.get_all_ids(url=base_url, headers=self.headers)
unassigned = []
# get all existing ones and compare against current known resources
for resource_id in all_resources:
if resource_id not in existing_resources:
unassigned.append(resource_id)
if not unassigned:
raise ValueError(f"No unassigned resources found for type: {base_url}")
# books with no shelf treated like a parent resource
return self._get_parents(base_url, unassigned, path_prefix)
9 changes: 4 additions & 5 deletions bookstack_file_exporter/exporter/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

class Node():
"""
Node class provides an interface to create and reference bookstack child/parent relationships for resources like pages, books, chapters, and shelves.
Node class provides an interface to create bookstack child/parent relationships for resources like pages, books, chapters, and shelves.

Args:
metadata: Dict[str, Union[str, int]] (required) = The metadata of the resource from bookstack api
Expand All @@ -23,7 +23,7 @@ class Node():
"""
def __init__(self, meta: Dict[str, Union[str, int]], parent: Union['Node', None] = None, path_prefix: Union[str, None] = None):
self.meta = meta
self.__parent = parent
self._parent = parent
self._path_prefix = path_prefix
self.name: str = ""
self.id: int = 0
Expand All @@ -39,9 +39,8 @@ def _initialize(self):
self.id = self.meta['id']
self._display_name = self.meta['name']
# get base file path from parent if it exists
if self.__parent:
self._file_path = f"{self.__parent.file_path}/{self.name}"
# self._file_path = self.__parent.file_path + '/' + self.name
if self._parent:
self._file_path = f"{self._parent.file_path}/{self.name}"
# normalize path prefix if it does not exist
if not self._path_prefix:
self._path_prefix = ""
Expand Down
48 changes: 4 additions & 44 deletions bookstack_file_exporter/exporter/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,49 +17,9 @@ def get_json_response(url: str, headers: Dict[str, str], verify: bool = True, ti

def get_all_ids(url: str, headers: Dict[str, str]) -> List[int]:
ids_api_meta = get_json_response(url=url, headers=headers)
all_ids = [item['id'] for item in ids_api_meta['data']]
return all_ids

def get_parent_meta(url: str, headers: Dict[str, str], parent_ids: List[int],
path_prefix: Union[str, None] = None) -> Dict[int, Node]:
parent_nodes = {}
for parent_id in parent_ids:
parent_url = f"{url}/{parent_id}"
# parent_url = url + "/" + str(parent_id)
parent_data = get_json_response(url=parent_url, headers=headers)
parent_nodes[parent_id] = Node(parent_data, path_prefix=path_prefix)
return parent_nodes

def get_chapter_meta(url: str, headers: Dict[str, str], chapters: List[int],
books:Dict[int, Node], path_prefix: Union[str, None] = None) -> Dict[int, Node]:
chapter_nodes = {}
for chapter_id in chapters:
chapter_url = f"{url}/{chapter_id}"
# chapter_url = url + "/" + str(chapter_id)
chapter_data = get_json_response(url=chapter_url, headers=headers)
book_id = chapter_data['book_id']
chapter_nodes[chapter_id] = Node(chapter_data, books[book_id], path_prefix=path_prefix)
return chapter_nodes

def get_child_meta(url: str, headers: Dict[str, str], parent_nodes: Dict[int, Node],
filter_empty: bool = False, path_prefix: Union[str, None] = None) -> Dict[int, Node]:
child_nodes = {}
for _, parent in parent_nodes.items():
if parent.children:
for child in parent.children:
child_id = child['id']
child_url = f"{url}/{child_id}"
# child_url = url + "/" + str(child_id)
child_data = get_json_response(url=child_url, headers=headers)
child_node = Node(child_data, parent, path_prefix=path_prefix)
if filter_empty:
if not child_node.empty:
child_nodes[child_id] = child_node
else:
child_nodes[child_id] = child_node
return child_nodes

def get_page_export(url: str, headers: Dict[str, str]):
pass
if ids_api_meta:
return [item['id'] for item in ids_api_meta['data']]
else:
return []


Loading