From 36370673cd65ee98d36386f0b0223bb38dd87e26 Mon Sep 17 00:00:00 2001
From: pchang388 <pchang388@gmail.com>
Date: Sat, 30 Sep 2023 04:00:07 -0400
Subject: [PATCH 01/20] wip: lint changes

---
 Dockerfile                                   |  1 -
 Makefile                                     |  4 +-
 bookstack_file_exporter/__init__.py          |  1 -
 bookstack_file_exporter/__main__.py          |  8 ++--
 bookstack_file_exporter/archiver/archiver.py | 36 ++++++++++------
 bookstack_file_exporter/exporter/__init__.py |  1 -
 bookstack_file_exporter/exporter/exporter.py | 45 ++++++++++----------
 bookstack_file_exporter/exporter/node.py     | 31 +++++++++-----
 bookstack_file_exporter/exporter/util.py     |  6 +--
 bookstack_file_exporter/run.py               | 19 ++++-----
 bookstack_file_exporter/run_args.py          | 11 +++--
 11 files changed, 90 insertions(+), 73 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 2c85e69..d19d8db 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,3 @@
-
 ARG BASE_IMAGE=python
 ARG BASE_IMAGE_TAG=3.11-slim-python
 
diff --git a/Makefile b/Makefile
index 94de42c..d05cac5 100644
--- a/Makefile
+++ b/Makefile
@@ -23,9 +23,11 @@ docker_build:
 docker_push:
 	docker push ${IMAGE_NAME}:${IMAGE_TAG}
 
+# add -i option due to bug in rancher desktop: https://github.com/rancher-sandbox/rancher-desktop/issues/3239
 docker_test:
-	docker run \
+	docker run -i \
 	-e LOG_LEVEL='debug' \
+	--user 1000:1000 \
 	-v ${CURDIR}/local/config.yml:/export/config/config.yml:ro \
 	-v ${CURDIR}/bkps:/export/dump \
 	${IMAGE_NAME}:${IMAGE_TAG}
\ No newline at end of file
diff --git a/bookstack_file_exporter/__init__.py b/bookstack_file_exporter/__init__.py
index 8b13789..e69de29 100644
--- a/bookstack_file_exporter/__init__.py
+++ b/bookstack_file_exporter/__init__.py
@@ -1 +0,0 @@
-
diff --git a/bookstack_file_exporter/__main__.py b/bookstack_file_exporter/__main__.py
index c13bfcc..d91a0df 100644
--- a/bookstack_file_exporter/__main__.py
+++ b/bookstack_file_exporter/__main__.py
@@ -1,18 +1,16 @@
 import argparse
 import logging
-from typing import Dict, List, Union
 
 from bookstack_file_exporter import run
 from bookstack_file_exporter import run_args
 
 def main():
+    """run entrypoint"""
     args: argparse.Namespace = run_args.get_args()
-
     logging.basicConfig(format='%(asctime)s [%(levelname)s] %(message)s',
-                    level=run_args.get_log_level(args.log_level), datefmt='%Y-%m-%d %H:%M:%S')
-    
+                        level=run_args.get_log_level(args.log_level), datefmt='%Y-%m-%d %H:%M:%S')
     run.exporter(args)
 
 
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    main()
diff --git a/bookstack_file_exporter/archiver/archiver.py b/bookstack_file_exporter/archiver/archiver.py
index bc01d8a..d1d76ad 100644
--- a/bookstack_file_exporter/archiver/archiver.py
+++ b/bookstack_file_exporter/archiver/archiver.py
@@ -28,42 +28,50 @@
 
 class Archiver:
     """
-    Archiver pulls all the necessary files from upstream and then pushes them to the specified backup location(s)
+    Archiver pulls all the necessary files from upstream 
+    and then pushes them to the specified backup location(s)
 
     Args:
-        :root_dir: str (required) = the base directory for which the files will be placed .
-        :add_meta: bool (required) = whether or not to add metadata json files for each page, book, chapter, and/or shelve.
+        :root_dir: str (required) = the base directory for 
+        which the files will be placed .
+        :add_meta: bool (required) = whether or not to add 
+        metadata json files for each page, book, chapter, and/or shelve.
         :base_page_url: str (required) = the full url and path to get page content.
         :headers: Dict[str, str] (required) = the headers which include the Authorization to use
 
     Returns:
-        Archiver instance with attributes that are accessible for use for file level archival and backup.
+        Archiver instance with attributes that are 
+        accessible for use for file level archival and backup.
     """
-    def __init__(self, base_dir: str, add_meta: Union[bool, None], base_page_url: str, headers: Dict[str, str]):
+    def __init__(self, base_dir: str, add_meta: Union[bool, None],
+                  base_page_url: str, headers: Dict[str, str]):
         self.base_dir = base_dir
         self.add_meta = add_meta
         self.base_page_url = base_page_url
         self._headers = headers
         self._root_dir = self.generate_root_folder(self.base_dir)
-        # the tar file will be name of parent export directory, bookstack-<timestamp>, and .tgz extension
+        # the tar file will be name of
+        # parent export directory, bookstack-<timestamp>, and .tgz extension
         self._tar_file = f"{self._root_dir}{_FILE_EXTENSION_MAP['tar']}"
         # remote_system to function mapping
         self._remote_exports = {'minio': self._archive_minio, 's3': self._archive_s3}
-    
+
     # create local tarball first
     def archive(self, page_nodes: Dict[int, Node], export_formats: List[str]):
+        """create a .tgz of all page content"""
         for _, page in page_nodes.items():
             for format in export_formats:
                 self._gather(page, format)
         self._tar_dir()
-    
+
     # convert to bytes to be agnostic to end destination (future use case?)
     def _gather(self, page_node: Node, export_format: str):
         raw_data = self._get_data_format(page_node.id, export_format)
         log.debug(f"Output directory for exports set to: {self._root_dir}")
         self._gather_local(page_node.file_path, raw_data, export_format, page_node.meta)
-    
-    def _gather_local(self, page_path: str, data: bytes, export_format: str, meta_data: Union[bytes, None]):
+
+    def _gather_local(self, page_path: str, data: bytes,
+                      export_format: str, meta_data: Union[bytes, None]):
         # get path to page
         file_path = f"{self._root_dir}/{page_path}"
         # add extension to page path
@@ -78,7 +86,7 @@ def archive_remote(self, remote_targets: Dict[str, StorageProviderConfig]):
         if remote_targets:
             for key, value in remote_targets.items():
                 self._remote_exports[key](value)
-    
+
     def _tar_dir(self):
         util.create_tar(self._root_dir, _FILE_EXTENSION_MAP['tar'])
 
@@ -104,10 +112,10 @@ def _clean(self, clean_up_archive: Union[bool, None]):
     def _get_data_format(self, page_node_id: int, export_format: str) -> bytes:
         url = self._get_export_url(node_id=page_node_id, export_format=export_format)
         return util.get_byte_response(url=url, headers=self._headers)
-    
+
     def _get_export_url(self, node_id: int, export_format: str) -> str:
         return f"{self.base_page_url}/{node_id}/{_EXPORT_API_PATH}/{export_format}"
-    
+
     @staticmethod
     def generate_root_folder(base_folder_name: str) -> str:
-        return base_folder_name + "_" + datetime.now().strftime(_DATE_STR_FORMAT)
\ No newline at end of file
+        return base_folder_name + "_" + datetime.now().strftime(_DATE_STR_FORMAT)
diff --git a/bookstack_file_exporter/exporter/__init__.py b/bookstack_file_exporter/exporter/__init__.py
index 8b13789..e69de29 100644
--- a/bookstack_file_exporter/exporter/__init__.py
+++ b/bookstack_file_exporter/exporter/__init__.py
@@ -1 +0,0 @@
-
diff --git a/bookstack_file_exporter/exporter/exporter.py b/bookstack_file_exporter/exporter/exporter.py
index 11dc798..4777276 100644
--- a/bookstack_file_exporter/exporter/exporter.py
+++ b/bookstack_file_exporter/exporter/exporter.py
@@ -1,24 +1,15 @@
-from typing import Dict, List, Union
+from typing import Dict, List
 import logging
 
-import bookstack_file_exporter.exporter.util as util
+from bookstack_file_exporter.exporter import util
 from bookstack_file_exporter.exporter.node import Node
 
-from bookstack_file_exporter.config_helper.config_helper import ConfigNode
-
-
 log = logging.getLogger(__name__)
 
-# _API_SUFFIX_PATHS = {
-#     "shelves": "api/shelves",
-#     "books": "api/books",
-#     "chapters": "api/chapters",
-#     "pages": "api/pages"
-# }
-
 class NodeExporter():
     """
-    NodeExporter class provides an interface to help create Bookstack resources/nodes (pages, books, etc) and their relationships.
+    NodeExporter class provides an interface to help create 
+    Bookstack resources/nodes (pages, books, etc) and their relationships.
 
     Raises:
 
@@ -39,16 +30,18 @@ def get_all_shelves(self) -> Dict[int, Node]:
             log.warning("No shelves found in given Bookstack instance")
             return {}
         return self._get_parents(base_url, all_parents)
-        
-    def _get_parents(self, base_url: str, parent_ids: List[int], path_prefix: str = "") -> Dict[int, Node]:
+
+    def _get_parents(self, base_url: str, parent_ids: List[int],
+                      path_prefix: str = "") -> Dict[int, Node]:
         parent_nodes = {}
         for parent_id in parent_ids:
             parent_url = f"{base_url}/{parent_id}"
             parent_data = util.get_json_response(url=parent_url, headers=self.headers)
             parent_nodes[parent_id] = Node(parent_data, path_prefix=path_prefix)
         return parent_nodes
-    
+
     def get_chapter_nodes(self, book_nodes: Dict[int, Node]) -> Dict[int, Node]:
+        """ get chapter nodes """
         # Chapters are treated a little differently
         # They are under books like pages but have their own children
         # i.e. not a terminal node
@@ -59,7 +52,8 @@ def get_chapter_nodes(self, book_nodes: Dict[int, Node]) -> Dict[int, Node]:
             return {}
         return self._get_chapters(base_url, all_chapters, book_nodes)
 
-    def _get_chapters(self, base_url: str, all_chapters: List[int], book_nodes: Dict[int, Node]) -> Dict[int, Node]:
+    def _get_chapters(self, base_url: str, all_chapters: List[int],
+                       book_nodes: Dict[int, Node]) -> Dict[int, Node]:
         chapter_nodes = {}
         for chapter_id in all_chapters:
             chapter_url = f"{base_url}/{chapter_id}"
@@ -67,12 +61,15 @@ def _get_chapters(self, base_url: str, all_chapters: List[int], book_nodes: Dict
             book_id = chapter_data['book_id']
             chapter_nodes[chapter_id] = Node(chapter_data, book_nodes[book_id])
         return chapter_nodes
-    
-    def get_child_nodes(self, resource_type: str, parent_nodes: Dict[int, Node], filter_empty: bool = True):
+
+    def get_child_nodes(self, resource_type: str, parent_nodes: Dict[int, Node],
+                        filter_empty: bool = True) -> Dict[int, Node]:
+        """get child nodes from a book/chapter/shelf"""
         base_url = self.api_urls[resource_type]
         return self._get_children(base_url, parent_nodes, filter_empty)
 
-    def _get_children(self, base_url: str, parent_nodes: Dict[int, Node], filter_empty: bool) -> Dict[int, Node]:
+    def _get_children(self, base_url: str, parent_nodes: Dict[int, Node],
+                       filter_empty: bool) -> Dict[int, Node]:
         child_nodes = {}
         for _, parent in parent_nodes.items():
             if parent.children:
@@ -88,7 +85,9 @@ def _get_children(self, base_url: str, parent_nodes: Dict[int, Node], filter_emp
                         child_nodes[child_id] = child_node
         return child_nodes
 
-    def get_unassigned_books(self, existing_resources: Dict[int, Node], path_prefix: str) -> Dict[int, Node]:
+    def get_unassigned_books(self, existing_resources: Dict[int, Node],
+                              path_prefix: str) -> Dict[int, Node]:
+        """get books not under a shelf"""
         base_url = self.api_urls["books"]
         all_resources: List[int] = util.get_all_ids(url=base_url, headers=self.headers)
         unassigned = []
@@ -103,6 +102,7 @@ def get_unassigned_books(self, existing_resources: Dict[int, Node], path_prefix:
 
     # convenience function
     def get_all_books(self, shelve_nodes: Dict[int, Node], unassigned_dir: str) -> Dict[int, Node]:
+        """get all books"""
         book_nodes = {}
         # get books in shelves
         if shelve_nodes:
@@ -119,9 +119,10 @@ def get_all_books(self, shelve_nodes: Dict[int, Node], unassigned_dir: str) -> D
                 book_nodes[key] = value
 
         return book_nodes
-    
+
     # convenience function
     def get_all_pages(self, book_nodes: Dict[int, Node]) -> Dict[int, Node]:
+        """get all pages and their content"""
         ## chapters (if exists)
         # chapter nodes are treated a little differently
         # chapters are children under books
diff --git a/bookstack_file_exporter/exporter/node.py b/bookstack_file_exporter/exporter/node.py
index aa37d33..413e644 100644
--- a/bookstack_file_exporter/exporter/node.py
+++ b/bookstack_file_exporter/exporter/node.py
@@ -9,19 +9,26 @@
 
 class Node():
     """
-    Node class provides an interface to create bookstack child/parent relationships for resources like pages, books, chapters, and shelves.
+    Node class provides an interface to create bookstack child/parent 
+    relationships for resources like pages, books, chapters, and shelves.
 
     Args:
-        metadata: Dict[str, Union[str, int]] (required) = The metadata of the resource from bookstack api
-        parent: Union['Node', None] (optional) = The parent resource if any, parent/children are also of the same class 'Node'.
-        path_prefix: Union[str, None] (optional) = This appends a relative 'root' directory to the child resource path/file_name. 
-            It is mainly used to prepend a shelve level directory for books that are not assigned or under any shelf.
+        metadata: Dict[str, Union[str, int]] (required) 
+        = The metadata of the resource from bookstack api
+        parent: Union['Node', None] (optional) 
+        = The parent resource if any, parent/children are also of the same class 'Node'.
+        path_prefix: Union[str, None] (optional) 
+        = This appends a relative 'root' directory to the child resource path/file_name. 
+            It is mainly used to prepend a shelve level 
+            directory for books that are not assigned or under any shelf.
 
     Returns:
-        Node instance to help create and reference bookstack child/parent relationships for resources like pages, books, chapters, and shelves.
+        Node instance to help create and reference bookstack child/parent 
+        relationships for resources like pages, books, chapters, and shelves.
 
     """
-    def __init__(self, meta: Dict[str, Union[str, int]], parent: Union['Node', None] = None, path_prefix: str = ""):
+    def __init__(self, meta: Dict[str, Union[str, int]],
+                 parent: Union['Node', None] = None, path_prefix: str = ""):
         self.meta = meta
         self._parent = parent
         self._path_prefix = path_prefix
@@ -33,7 +40,6 @@ def __init__(self, meta: Dict[str, Union[str, int]], parent: Union['Node', None]
         self._children = self._get_children()
         # if parent
         self._file_path = self._get_file_path()
-    
 
     def _get_file_path(self) -> str:
         if self._parent:
@@ -48,9 +54,10 @@ def _get_children(self) -> List[Dict[str, Union[str, int]]]:
                 children = self.meta[match]
                 break
         return children
-    
+
     @property
     def file_path(self):
+        """get the base file path"""
         # check to see if parent exists
         if not self._file_path:
             # return base path + name if no parent
@@ -61,10 +68,12 @@ def file_path(self):
 
     @property
     def children(self):
+        """return all children of a book/chapter/shelf"""
         return self._children
-    
+
     @property
     def empty(self):
+        """return True if page node lacks content"""
         if not self.name and self._display_name == _NULL_PAGE_NAME:
             return True
-        return False
\ No newline at end of file
+        return False
diff --git a/bookstack_file_exporter/exporter/util.py b/bookstack_file_exporter/exporter/util.py
index 3cb5ceb..4e5c9f1 100644
--- a/bookstack_file_exporter/exporter/util.py
+++ b/bookstack_file_exporter/exporter/util.py
@@ -1,17 +1,17 @@
 from typing import Dict, Union, List
 import logging
-from bookstack_file_exporter.exporter.node import Node
 from bookstack_file_exporter.common import util
 
 log = logging.getLogger(__name__)
 
 def get_json_response(url: str, headers: Dict[str, str]) -> List[Dict[str, Union[str,int]]]:
+    """get http response data in json format"""
     response =  util.http_get_request(url=url, headers=headers)
     return response.json()
 
 def get_all_ids(url: str, headers: Dict[str, str]) -> List[int]:
+    """get all ids for a bookstack resource"""
     ids_api_meta = get_json_response(url=url, headers=headers)
     if ids_api_meta:
         return [item['id'] for item in ids_api_meta['data']]
-    else:
-        return []
+    return []
diff --git a/bookstack_file_exporter/run.py b/bookstack_file_exporter/run.py
index 629dd9a..178040f 100644
--- a/bookstack_file_exporter/run.py
+++ b/bookstack_file_exporter/run.py
@@ -1,22 +1,21 @@
 import argparse
 import sys
 import logging
-from time import sleep
 from typing import Dict
 
 from bookstack_file_exporter.config_helper.config_helper import ConfigNode
 from bookstack_file_exporter.exporter.node import Node
 from bookstack_file_exporter.exporter.exporter import NodeExporter
-from bookstack_file_exporter.archiver import util as archiver_util
 from bookstack_file_exporter.archiver.archiver import Archiver
 
 log = logging.getLogger(__name__)
 
 def exporter(args: argparse.Namespace):
+    """export bookstack nodes and archive locally and/or remotely"""
     ## get configuration from helper
     config = ConfigNode(args)
 
-    ## convenience vars 
+    ## convenience vars
     bookstack_headers = config.headers
     api_urls = config.urls
     export_formats = config.user_inputs.formats
@@ -29,24 +28,24 @@ def exporter(args: argparse.Namespace):
     log.info("Beginning export")
 
     ## Use exporter class to get all the resources (pages, books, etc.) and their relationships
-    exportHelper = NodeExporter(api_urls, bookstack_headers)
+    export_helper = NodeExporter(api_urls, bookstack_headers)
     ## shelves
-    shelve_nodes: Dict[int, Node] = exportHelper.get_all_shelves()
+    shelve_nodes: Dict[int, Node] = export_helper.get_all_shelves()
     ## books
-    book_nodes: Dict[int, Node] = exportHelper.get_all_books(shelve_nodes, unassigned_dir)
+    book_nodes: Dict[int, Node] = export_helper.get_all_books(shelve_nodes, unassigned_dir)
     ## pages
-    page_nodes: Dict[int, Node] = exportHelper.get_all_pages(book_nodes)
+    page_nodes: Dict[int, Node] = export_helper.get_all_pages(book_nodes)
     if not page_nodes:
         log.warning("No page data available from given Bookstack instance. Nothing to archive")
         sys.exit(0)
-    
     log.info("Beginning archive")
     ## start archive ##
-    archive: Archiver = Archiver(base_export_dir, config.user_inputs.export_meta, page_base_url, bookstack_headers)
+    archive: Archiver = Archiver(base_export_dir, config.user_inputs.export_meta,
+                                 page_base_url, bookstack_headers)
     # create tar
     archive.archive(page_nodes, export_formats)
     # archive to remote targets
     archive.archive_remote(config.object_storage_config)
     # if remote target is specified and clean is true
     # clean up the .tgz archive since it is already uploaded
-    archive.clean_up(config.user_inputs.clean_up)
\ No newline at end of file
+    archive.clean_up(config.user_inputs.clean_up)
diff --git a/bookstack_file_exporter/run_args.py b/bookstack_file_exporter/run_args.py
index a455d3c..7d7b776 100644
--- a/bookstack_file_exporter/run_args.py
+++ b/bookstack_file_exporter/run_args.py
@@ -1,5 +1,4 @@
 import argparse
-from typing import Dict, List
 import logging
 
 LOG_LEVEL = {
@@ -10,24 +9,28 @@
 }
 
 def get_log_level(log_level:str) -> int:
+    """return log level int"""
     return LOG_LEVEL.get(log_level)
 
 def get_args() -> argparse.Namespace:
+    """return user cmd line options"""
     parser = argparse.ArgumentParser(description='BookStack File Exporter')
     parser.add_argument('-c',
                     '--config-file',
                     type=str,
                     default="data/config.yml",
-                    help='Provide a configuration file (full or relative path). See README for more details')
+                    help='''Provide a configuration file (full or relative path).
+                     See README for more details''')
     parser.add_argument('-o',
                     '--output-dir',
                     type=str,
                     default="",
-                    help='Optional, specify an output directory. This can also be specified in the config.yml file')
+                    help='''Optional, specify an output directory.
+                     This can also be specified in the config.yml file''')
     parser.add_argument('-v',
                     '--log-level',
                     type=str.lower,
                     default='info',
                     help='Set verbosity level for logging.',
                     choices=LOG_LEVEL.keys())
-    return parser.parse_args()
\ No newline at end of file
+    return parser.parse_args()

From 69930887cfb27720eb94e0e86cbe7b280b2a303b Mon Sep 17 00:00:00 2001
From: Peter Chang <pchang388@gmail.com>
Date: Sun, 1 Oct 2023 00:29:28 -0400
Subject: [PATCH 02/20] wip

---
 bookstack_file_exporter/archiver/archiver.py | 43 ++++++++++++--------
 bookstack_file_exporter/archiver/util.py     | 36 ++++++++++++----
 2 files changed, 56 insertions(+), 23 deletions(-)

diff --git a/bookstack_file_exporter/archiver/archiver.py b/bookstack_file_exporter/archiver/archiver.py
index d1d76ad..028e20a 100644
--- a/bookstack_file_exporter/archiver/archiver.py
+++ b/bookstack_file_exporter/archiver/archiver.py
@@ -1,5 +1,4 @@
 from typing import List, Dict, Union
-from time import sleep
 from datetime import datetime
 import logging
 
@@ -52,7 +51,7 @@ def __init__(self, base_dir: str, add_meta: Union[bool, None],
         self._root_dir = self.generate_root_folder(self.base_dir)
         # the tar file will be name of
         # parent export directory, bookstack-<timestamp>, and .tgz extension
-        self._tar_file = f"{self._root_dir}{_FILE_EXTENSION_MAP['tar']}"
+        self._archive_file = f"{self._root_dir}{_FILE_EXTENSION_MAP['tar']}"
         # remote_system to function mapping
         self._remote_exports = {'minio': self._archive_minio, 's3': self._archive_s3}
 
@@ -60,29 +59,35 @@ def __init__(self, base_dir: str, add_meta: Union[bool, None],
     def archive(self, page_nodes: Dict[int, Node], export_formats: List[str]):
         """create a .tgz of all page content"""
         for _, page in page_nodes.items():
-            for format in export_formats:
-                self._gather(page, format)
-        self._tar_dir()
+            for ex_format in export_formats:
+                self._gather(page, ex_format)
+        # self._tar_dir()
+        self._gzip_tar()
 
     # convert to bytes to be agnostic to end destination (future use case?)
     def _gather(self, page_node: Node, export_format: str):
         raw_data = self._get_data_format(page_node.id, export_format)
-        log.debug(f"Output directory for exports set to: {self._root_dir}")
         self._gather_local(page_node.file_path, raw_data, export_format, page_node.meta)
 
     def _gather_local(self, page_path: str, data: bytes,
                       export_format: str, meta_data: Union[bytes, None]):
         # get path to page
-        file_path = f"{self._root_dir}/{page_path}"
-        # add extension to page path
-        file_full_name = f"{file_path}{_FILE_EXTENSION_MAP[export_format]}"
-        util.write_bytes(file_path=file_full_name, data=data)
-        if self.add_meta:
-            meta_file_name = f"{file_path}{_FILE_EXTENSION_MAP['meta']}"
-            util.dump_json(file_name=meta_file_name, data=meta_data)
+        # file_path = f"{self._root_dir}/{page_path}"
+        # # add extension to page path
+        # file_full_name = f"{file_path}{_FILE_EXTENSION_MAP[export_format]}"
+        # log.debug("Output directory for page export set to: %s", file_full_name)
+
+        page_file = f"{page_path}{_FILE_EXTENSION_MAP[export_format]}"
+        tar_file = f"{self._root_dir}.tar"
+        util.write_bytes(tar_file, file_path=page_file, data=data)
+
+        # if self.add_meta:
+        #     meta_file_name = f"{file_path}{_FILE_EXTENSION_MAP['meta']}"
+        #     util.dump_json(file_name=meta_file_name, data=meta_data)
 
     # send to remote systems
     def archive_remote(self, remote_targets: Dict[str, StorageProviderConfig]):
+        """for each target, do their respective tasks"""
         if remote_targets:
             for key, value in remote_targets.items():
                 self._remote_exports[key](value)
@@ -90,23 +95,28 @@ def archive_remote(self, remote_targets: Dict[str, StorageProviderConfig]):
     def _tar_dir(self):
         util.create_tar(self._root_dir, _FILE_EXTENSION_MAP['tar'])
 
+    def _gzip_tar(self):
+        tar_file = f"{self._root_dir}.tar"
+        util.create_gzip(tar_file, self._archive_file)
+
     def _archive_minio(self, config: StorageProviderConfig):
         minio_archiver = MinioArchiver(config)
-        minio_archiver.upload_backup(self._tar_file)
+        minio_archiver.upload_backup(self._archive_file)
 
     def _archive_s3(self, config: StorageProviderConfig):
         pass
 
     def clean_up(self, clean_up_archive: Union[bool, None]):
+        """remove archive after sending to remote target"""
         self._clean(clean_up_archive)
 
     def _clean(self, clean_up_archive: Union[bool, None]):
         # remove data root directory since we already have the .tgz file now
-        util.remove_dir(self._root_dir)
+        # util.remove_dir(self._root_dir)
         # if user is uploading to object storage
         # delete the local .tgz archive since we have it there already
         if clean_up_archive:
-            util.remove_file(self._tar_file)
+            util.remove_file(self._archive_file)
 
     # convert page data to bytes
     def _get_data_format(self, page_node_id: int, export_format: str) -> bytes:
@@ -118,4 +128,5 @@ def _get_export_url(self, node_id: int, export_format: str) -> str:
 
     @staticmethod
     def generate_root_folder(base_folder_name: str) -> str:
+        """return base archive name"""
         return base_folder_name + "_" + datetime.now().strftime(_DATE_STR_FORMAT)
diff --git a/bookstack_file_exporter/archiver/util.py b/bookstack_file_exporter/archiver/util.py
index c19bc4d..a403175 100644
--- a/bookstack_file_exporter/archiver/util.py
+++ b/bookstack_file_exporter/archiver/util.py
@@ -5,23 +5,39 @@
 import logging
 import tarfile
 import shutil
+from io import BytesIO
+import gzip
 
 from bookstack_file_exporter.common import util
 
 log = logging.getLogger(__name__)
 
 def get_byte_response(url: str, headers: Dict[str, str]) -> bytes:
+    """get byte response from http request"""
     response = util.http_get_request(url=url, headers=headers)
     return response.content
 
-def write_bytes(file_path: str, data: bytes):
-    path_file = Path(file_path)
-    # create parent directories as needed, ignore already exists errors
-    path_file.parent.mkdir(parents=True, exist_ok=True)
-    path_file.write_bytes(data)
+# def write_bytes(file_path: str, data: bytes):
+#     """write byte data to file"""
+#     path_file = Path(file_path)
+#     # create parent directories as needed, ignore already exists errors
+#     path_file.parent.mkdir(parents=True, exist_ok=True)
+#     path_file.write_bytes(data)
+
+def write_bytes(base_tar_dir: str, file_path: str, data: bytes):
+    """write byte data to file"""
+    log.info("Opening tar file: %s", base_tar_dir)
+    with tarfile.open(base_tar_dir, "a") as tar:
+        data_obj = BytesIO(data)
+        tar_info = tarfile.TarInfo(name=file_path)
+        tar_info.size = data_obj.getbuffer().nbytes
+        log.info(tar_info)
+        log.info(tar_info.size)
+        tar.addfile(tar_info, fileobj=data_obj)
 
 def dump_json(file_name: str, data: Dict[str, Union[str, int]]):
-    with open(file_name, 'w') as fp:
+    """dump dict to json file"""
+    with open(file_name, 'w', encoding="utf-8") as fp:
         json.dump(data, fp, indent=4)
 
 # set as function in case we want to do checks or final actions later
@@ -43,4 +59,10 @@ def create_tar(export_path: str, file_extension: str):
     # create tar file
     with tarfile.open(tar_path, "w:gz") as tar:
         # add export directory to dump
-        tar.add(str(parent_abs_path), arcname='.')
\ No newline at end of file
+        tar.add(str(parent_abs_path), arcname='.')
+
+def create_gzip(tar_file: str, gzip_file: str):
+    with open(tar_file, 'rb') as f_in:
+        with gzip.open(gzip_file, 'wb') as f_out:
+            shutil.copyfileobj(f_in, f_out)
+    remove_file(tar_file)
\ No newline at end of file

From 432f3c3205605ec4ddb446aa9e6f300e0724020c Mon Sep 17 00:00:00 2001
From: pchang388 <pchang388@gmail.com>
Date: Sun, 1 Oct 2023 02:02:27 -0400
Subject: [PATCH 03/20] refactor archive to be more efficient with tgz file

---
 bookstack_file_exporter/archiver/archiver.py | 44 ++++++++------------
 bookstack_file_exporter/archiver/util.py     | 40 ++++--------------
 2 files changed, 25 insertions(+), 59 deletions(-)

diff --git a/bookstack_file_exporter/archiver/archiver.py b/bookstack_file_exporter/archiver/archiver.py
index 028e20a..9253325 100644
--- a/bookstack_file_exporter/archiver/archiver.py
+++ b/bookstack_file_exporter/archiver/archiver.py
@@ -9,7 +9,8 @@
 
 log = logging.getLogger(__name__)
 
-_META_FILE_SUFFIX = "_meta"
+_META_FILE_SUFFIX = "_meta.json"
+_TAR_SUFFIX = ".tar"
 _TAR_GZ_SUFFIX = ".tgz"
 
 _EXPORT_API_PATH = "export"
@@ -19,8 +20,9 @@
     "html": ".html",
     "pdf": ".pdf",
     "plaintext": ".txt",
-    "meta": f"{_META_FILE_SUFFIX}.json",
-    "tar": _TAR_GZ_SUFFIX
+    "meta": _META_FILE_SUFFIX,
+    "tar": _TAR_SUFFIX,
+    "tgz": _TAR_GZ_SUFFIX
 }
 
 _DATE_STR_FORMAT = "%Y-%m-%d_%H-%M-%S"
@@ -49,9 +51,13 @@ def __init__(self, base_dir: str, add_meta: Union[bool, None],
         self.base_page_url = base_page_url
         self._headers = headers
         self._root_dir = self.generate_root_folder(self.base_dir)
-        # the tar file will be name of
+        # the tgz file will be name of
         # parent export directory, bookstack-<timestamp>, and .tgz extension
-        self._archive_file = f"{self._root_dir}{_FILE_EXTENSION_MAP['tar']}"
+        self._archive_file = f"{self._root_dir}{_FILE_EXTENSION_MAP['tgz']}"
+        # name of intermediate tar file before gzip
+        self._tar_file = f"{self._root_dir}{_FILE_EXTENSION_MAP['tar']}"
+        # name of the base folder to use within the tgz archive
+        self._archive_base_path = self._root_dir.split("/")[-1]
         # remote_system to function mapping
         self._remote_exports = {'minio': self._archive_minio, 's3': self._archive_s3}
 
@@ -61,7 +67,6 @@ def archive(self, page_nodes: Dict[int, Node], export_formats: List[str]):
         for _, page in page_nodes.items():
             for ex_format in export_formats:
                 self._gather(page, ex_format)
-        # self._tar_dir()
         self._gzip_tar()
 
     # convert to bytes to be agnostic to end destination (future use case?)
@@ -71,19 +76,12 @@ def _gather(self, page_node: Node, export_format: str):
 
     def _gather_local(self, page_path: str, data: bytes,
                       export_format: str, meta_data: Union[bytes, None]):
-        # get path to page
-        # file_path = f"{self._root_dir}/{page_path}"
-        # # add extension to page path
-        # file_full_name = f"{file_path}{_FILE_EXTENSION_MAP[export_format]}"
-        # log.debug("Output directory for page export set to: %s", file_full_name)
-
-        page_file = f"{page_path}{_FILE_EXTENSION_MAP[export_format]}"
-        tar_file = f"{self._root_dir}.tar"
-        util.write_bytes(tar_file, file_path=page_file, data=data)
-
-        # if self.add_meta:
-        #     meta_file_name = f"{file_path}{_FILE_EXTENSION_MAP['meta']}"
-        #     util.dump_json(file_name=meta_file_name, data=meta_data)
+        page_file_name = f"{self._archive_base_path}/{page_path}{_FILE_EXTENSION_MAP[export_format]}"
+        util.write_bytes(self._tar_file, file_path=page_file_name, data=data)
+        if self.add_meta:
+            meta_file_name = f"{self._archive_base_path}/{page_path}{_FILE_EXTENSION_MAP['meta']}"
+            bytes_meta = util.get_json_bytes(meta_data)
+            util.write_bytes(self._tar_file, file_path=meta_file_name, data=bytes_meta)
 
     # send to remote systems
     def archive_remote(self, remote_targets: Dict[str, StorageProviderConfig]):
@@ -92,12 +90,8 @@ def archive_remote(self, remote_targets: Dict[str, StorageProviderConfig]):
             for key, value in remote_targets.items():
                 self._remote_exports[key](value)
 
-    def _tar_dir(self):
-        util.create_tar(self._root_dir, _FILE_EXTENSION_MAP['tar'])
-
     def _gzip_tar(self):
-        tar_file = f"{self._root_dir}.tar"
-        util.create_gzip(tar_file, self._archive_file)
+        util.create_gzip(self._tar_file, self._archive_file)
 
     def _archive_minio(self, config: StorageProviderConfig):
         minio_archiver = MinioArchiver(config)
@@ -111,8 +105,6 @@ def clean_up(self, clean_up_archive: Union[bool, None]):
         self._clean(clean_up_archive)
 
     def _clean(self, clean_up_archive: Union[bool, None]):
-        # remove data root directory since we already have the .tgz file now
-        # util.remove_dir(self._root_dir)
         # if user is uploading to object storage
         # delete the local .tgz archive since we have it there already
         if clean_up_archive:
diff --git a/bookstack_file_exporter/archiver/util.py b/bookstack_file_exporter/archiver/util.py
index a403175..c2db168 100644
--- a/bookstack_file_exporter/archiver/util.py
+++ b/bookstack_file_exporter/archiver/util.py
@@ -17,52 +17,26 @@ def get_byte_response(url: str, headers: Dict[str, str]) -> bytes:
     response = util.http_get_request(url=url, headers=headers)
     return response.content
 
-# def write_bytes(file_path: str, data: bytes):
-#     """write byte data to file"""
-#     path_file = Path(file_path)
-#     # create parent directories as needed, ignore already exists errors
-#     path_file.parent.mkdir(parents=True, exist_ok=True)
-#     path_file.write_bytes(data)
-
 def write_bytes(base_tar_dir: str, file_path: str, data: bytes):
-    """write byte data to file"""
-    log.info("Opening tar file: %s", base_tar_dir)
+    """write byte data to tar file"""
     with tarfile.open(base_tar_dir, "a") as tar:
         data_obj = BytesIO(data)
         tar_info = tarfile.TarInfo(name=file_path)
         tar_info.size = data_obj.getbuffer().nbytes
-        log.info(tar_info)
-        log.info(tar_info.size)
+        log.debug("Adding file: %s with size: %d bytes to tar file", tar_info.name, tar_info.size)
         tar.addfile(tar_info, fileobj=data_obj)
 
-def dump_json(file_name: str, data: Dict[str, Union[str, int]]):
+def get_json_bytes(data: Dict[str, Union[str, int]]) -> bytes:
     """dump dict to json file"""
-    with open(file_name, 'w', encoding="utf-8") as fp:
-        json.dump(data, fp, indent=4)
+    return json.dumps(data, indent=4).encode('utf-8')
 
 # set as function in case we want to do checks or final actions later
-def remove_dir(dir_path: str):
-    shutil.rmtree(dir_path)
-
 def remove_file(file_path: str):
     os.remove(file_path)
 
-def create_tar(export_path: str, file_extension: str):
-    # path of the export dir
-    output_path = Path(export_path)
-    # create tar in parent of export dir
-    # get abs path of parent
-    parent_path = output_path.parent
-    parent_abs_path = parent_path.resolve()
-    # set tar file path
-    tar_path = f"{export_path}{file_extension}"
-    # create tar file
-    with tarfile.open(tar_path, "w:gz") as tar:
-        # add export directory to dump
-        tar.add(str(parent_abs_path), arcname='.')
-
-def create_gzip(tar_file: str, gzip_file: str):
+def create_gzip(tar_file: str, gzip_file: str, remove_old: bool = True):
     with open(tar_file, 'rb') as f_in:
         with gzip.open(gzip_file, 'wb') as f_out:
             shutil.copyfileobj(f_in, f_out)
-    remove_file(tar_file)
\ No newline at end of file
+    if remove_old:
+        remove_file(tar_file)
\ No newline at end of file

From 314d8125c318e9d53bc0473aa9d1f2d7226bdc81 Mon Sep 17 00:00:00 2001
From: pchang388 <pchang388@gmail.com>
Date: Sun, 1 Oct 2023 20:06:29 -0400
Subject: [PATCH 04/20] wip fixed lint warnings for >90% score

---
 .github/workflows/pylint.yml                  |  23 +
 .pylintrc                                     | 633 ++++++++++++++++++
 bookstack_file_exporter/archiver/__init__.py  |   1 -
 bookstack_file_exporter/archiver/archiver.py  |   7 +-
 .../archiver/minio_archiver.py                |  29 +-
 bookstack_file_exporter/archiver/util.py      |   7 +-
 bookstack_file_exporter/common/__init__.py    |   1 -
 bookstack_file_exporter/common/util.py        |  18 +-
 .../config_helper/__init__.py                 |   1 -
 .../config_helper/config_helper.py            |  43 +-
 .../config_helper/models.py                   |   7 +-
 .../config_helper/remote.py                   |  19 +-
 bookstack_file_exporter/exporter/exporter.py  |   9 +-
 bookstack_file_exporter/exporter/node.py      |   2 +-
 bookstack_file_exporter/run.py                |   2 +
 15 files changed, 752 insertions(+), 50 deletions(-)
 create mode 100644 .github/workflows/pylint.yml
 create mode 100644 .pylintrc

diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
new file mode 100644
index 0000000..10ad95b
--- /dev/null
+++ b/.github/workflows/pylint.yml
@@ -0,0 +1,23 @@
+name: Pylint
+
+on: [push]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.11"]
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install pylint
+    - name: Analysing the code with pylint
+      run: |
+        pylint $(git ls-files '*.py')
diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 0000000..f0c2702
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,633 @@
+[MAIN]
+
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
+
+# Clear in-memory caches upon conclusion of linting. Useful if running pylint
+# in a server-like mode.
+clear-cache-post-run=no
+
+# Load and enable all available extensions. Use --list-extensions to see a list
+# all available extensions.
+#enable-all-extensions=
+
+# In error mode, messages with a category besides ERROR or FATAL are
+# suppressed, and no reports are done by default. Error mode is compatible with
+# disabling specific errors.
+#errors-only=
+
+# Always return a 0 (non-error) status code, even if lint errors are found.
+# This is primarily useful in continuous integration scripts.
+#exit-zero=
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code.
+extension-pkg-allow-list=
+
+# A comma-separated list of package or module names from where C extensions may
+# be loaded. Extensions are loading into the active Python interpreter and may
+# run arbitrary code. (This is an alternative name to extension-pkg-allow-list
+# for backward compatibility.)
+extension-pkg-whitelist=
+
+# Return non-zero exit code if any of these messages/categories are detected,
+# even if score is above --fail-under value. Syntax same as enable. Messages
+# specified are enabled, while categories only check already-enabled messages.
+fail-on=
+
+# Specify a score threshold under which the program will exit with error.
+fail-under=10
+
+# Interpret the stdin as a python script, whose filename needs to be passed as
+# the module_or_package argument.
+#from-stdin=
+
+# Files or directories to be skipped. They should be base names, not paths.
+ignore=CVS
+
+# Add files or directories matching the regular expressions patterns to the
+# ignore-list. The regex matches against paths and can be in Posix or Windows
+# format. Because '\\' represents the directory delimiter on Windows systems,
+# it can't be used as an escape character.
+ignore-paths=
+
+# Files or directories matching the regular expression patterns are skipped.
+# The regex matches against base names, not paths. The default value ignores
+# Emacs file locks
+ignore-patterns=^\.#
+
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis). It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
+# number of processors available to use, and will cap the count on Windows to
+# avoid hangs.
+jobs=1
+
+# Control the amount of potential inferred values when inferring a single
+# object. This can help the performance when dealing with large functions or
+# complex, nested conditions.
+limit-inference-results=100
+
+# List of plugins (as comma separated values of python module names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# Minimum Python version to use for version dependent checks. Will default to
+# the version used to run pylint.
+py-version=3.11
+
+# Discover python modules and packages in the file system subtree.
+recursive=no
+
+# Add paths to the list of the source roots. Supports globbing patterns. The
+# source root is an absolute path or a path relative to the current working
+# directory used to determine a package namespace for modules located under the
+# source root.
+source-roots=
+
+# When enabled, pylint would attempt to guess common misconfiguration and emit
+# user-friendly hints instead of false-positive error messages.
+suggestion-mode=yes
+
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+
+# In verbose mode, extra non-checker-related info will be displayed.
+#verbose=
+
+
+[BASIC]
+
+# Naming style matching correct argument names.
+argument-naming-style=snake_case
+
+# Regular expression matching correct argument names. Overrides argument-
+# naming-style. If left empty, argument names will be checked with the set
+# naming style.
+#argument-rgx=
+
+# Naming style matching correct attribute names.
+attr-naming-style=snake_case
+
+# Regular expression matching correct attribute names. Overrides attr-naming-
+# style. If left empty, attribute names will be checked with the set naming
+# style.
+#attr-rgx=
+
+# Bad variable names which should always be refused, separated by a comma.
+bad-names=foo,
+          bar,
+          baz,
+          toto,
+          tutu,
+          tata
+
+# Bad variable names regexes, separated by a comma. If names match any regex,
+# they will always be refused
+bad-names-rgxs=
+
+# Naming style matching correct class attribute names.
+class-attribute-naming-style=any
+
+# Regular expression matching correct class attribute names. Overrides class-
+# attribute-naming-style. If left empty, class attribute names will be checked
+# with the set naming style.
+#class-attribute-rgx=
+
+# Naming style matching correct class constant names.
+class-const-naming-style=UPPER_CASE
+
+# Regular expression matching correct class constant names. Overrides class-
+# const-naming-style. If left empty, class constant names will be checked with
+# the set naming style.
+#class-const-rgx=
+
+# Naming style matching correct class names.
+class-naming-style=PascalCase
+
+# Regular expression matching correct class names. Overrides class-naming-
+# style. If left empty, class names will be checked with the set naming style.
+#class-rgx=
+
+# Naming style matching correct constant names.
+const-naming-style=UPPER_CASE
+
+# Regular expression matching correct constant names. Overrides const-naming-
+# style. If left empty, constant names will be checked with the set naming
+# style.
+#const-rgx=
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=-1
+
+# Naming style matching correct function names.
+function-naming-style=snake_case
+
+# Regular expression matching correct function names. Overrides function-
+# naming-style. If left empty, function names will be checked with the set
+# naming style.
+#function-rgx=
+
+# Good variable names which should always be accepted, separated by a comma.
+good-names=i,
+           j,
+           k,
+           ex,
+           Run,
+           _
+
+# Good variable names regexes, separated by a comma. If names match any regex,
+# they will always be accepted
+good-names-rgxs=
+
+# Include a hint for the correct naming format with invalid-name.
+include-naming-hint=no
+
+# Naming style matching correct inline iteration names.
+inlinevar-naming-style=any
+
+# Regular expression matching correct inline iteration names. Overrides
+# inlinevar-naming-style. If left empty, inline iteration names will be checked
+# with the set naming style.
+#inlinevar-rgx=
+
+# Naming style matching correct method names.
+method-naming-style=snake_case
+
+# Regular expression matching correct method names. Overrides method-naming-
+# style. If left empty, method names will be checked with the set naming style.
+#method-rgx=
+
+# Naming style matching correct module names.
+module-naming-style=snake_case
+
+# Regular expression matching correct module names. Overrides module-naming-
+# style. If left empty, module names will be checked with the set naming style.
+#module-rgx=
+
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=
+
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=^_
+
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+# These decorators are taken in consideration only for invalid-name.
+property-classes=abc.abstractproperty
+
+# Regular expression matching correct type alias names. If left empty, type
+# alias names will be checked with the set naming style.
+#typealias-rgx=
+
+# Regular expression matching correct type variable names. If left empty, type
+# variable names will be checked with the set naming style.
+#typevar-rgx=
+
+# Naming style matching correct variable names.
+variable-naming-style=snake_case
+
+# Regular expression matching correct variable names. Overrides variable-
+# naming-style. If left empty, variable names will be checked with the set
+# naming style.
+#variable-rgx=
+
+
+[CLASSES]
+
+# Warn about protected attribute access inside special methods
+check-protected-access-in-special-methods=no
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,
+                      __new__,
+                      setUp,
+                      asyncSetUp,
+                      __post_init__
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=mcs
+
+
+[DESIGN]
+
+# List of regular expressions of class ancestor names to ignore when counting
+# public methods (see R0903)
+exclude-too-few-public-methods=
+
+# List of qualified class names to ignore when counting class parents (see
+# R0901)
+ignored-parents=
+
+# Maximum number of arguments for function / method.
+max-args=5
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Maximum number of boolean expressions in an if statement (see R0916).
+max-bool-expr=5
+
+# Maximum number of branch for function / method body.
+max-branches=12
+
+# Maximum number of locals for function / method body.
+max-locals=15
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+# Maximum number of return / yield for function / method body.
+max-returns=6
+
+# Maximum number of statements in function / method body.
+max-statements=50
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when caught.
+overgeneral-exceptions=builtins.BaseException,builtins.Exception
+
+
+[FORMAT]
+
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=^\s*(# )?<?https?://\S+>?$
+
+# Number of spaces of indent required inside a hanging or continued line.
+indent-after-paren=4
+
+# String used as indentation unit. This is usually "    " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
+
+# Maximum number of characters on a single line.
+max-line-length=100
+
+# Maximum number of lines in a module.
+max-module-lines=1000
+
+# Allow the body of a class to be on the same line as the declaration if body
+# contains single statement.
+single-line-class-stmt=no
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=no
+
+
+[IMPORTS]
+
+# List of modules that can be imported at any level, not just the top level
+# one.
+allow-any-import-level=
+
+# Allow explicit reexports by alias from a package __init__.
+allow-reexport-from-package=no
+
+# Allow wildcard imports from modules that define __all__.
+allow-wildcard-with-all=no
+
+# Deprecated modules which should not be used, separated by a comma.
+deprecated-modules=
+
+# Output a graph (.gv or any supported image format) of external dependencies
+# to the given file (report RP0402 must not be disabled).
+ext-import-graph=
+
+# Output a graph (.gv or any supported image format) of all (i.e. internal and
+# external) dependencies to the given file (report RP0402 must not be
+# disabled).
+import-graph=
+
+# Output a graph (.gv or any supported image format) of internal dependencies
+# to the given file (report RP0402 must not be disabled).
+int-import-graph=
+
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+
+# Force import order to recognize a module as part of a third party library.
+known-third-party=enchant
+
+# Couples of modules and preferred modules, separated by a comma.
+preferred-modules=
+
+
+[LOGGING]
+
+# The type of string formatting that logging methods do. `old` means using %
+# formatting, `new` is for `{}` formatting.
+logging-format-style=old
+
+# Logging modules to check that the string format arguments are in logging
+# function parameter format.
+logging-modules=logging
+
+
+[MESSAGES CONTROL]
+
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE,
+# UNDEFINED.
+confidence=HIGH,
+           CONTROL_FLOW,
+           INFERENCE,
+           INFERENCE_FAILURE,
+           UNDEFINED
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once). You can also use "--disable=all" to
+# disable everything first and then re-enable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use "--disable=all --enable=classes
+# --disable=W".
+disable=raw-checker-failed,
+        bad-inline-option,
+        locally-disabled,
+        file-ignored,
+        suppressed-message,
+        useless-suppression,
+        deprecated-pragma,
+        use-symbolic-message-instead,
+        missing-module-docstring
+        
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+enable=c-extension-no-member
+
+
+[METHOD_ARGS]
+
+# List of qualified names (i.e., library.method) which require a timeout
+# parameter e.g. 'requests.api.get,requests.api.post'
+timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,
+      XXX,
+      TODO
+
+# Regular expression of note tags to take in consideration.
+notes-rgx=
+
+
+[REFACTORING]
+
+# Maximum number of nested blocks for function / method body
+max-nested-blocks=5
+
+# Complete name of functions that never returns. When checking for
+# inconsistent-return-statements if a never returning function is called then
+# it will be considered as an explicit return statement and no message will be
+# printed.
+never-returning-functions=sys.exit,argparse.parse_error
+
+
+[REPORTS]
+
+# Python expression which should return a score less than or equal to 10. You
+# have access to the variables 'fatal', 'error', 'warning', 'refactor',
+# 'convention', and 'info' which contain the number of messages in each
+# category, as well as 'statement' which is the total number of statements
+# analyzed. This score is used by the global evaluation report (RP0004).
+evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10))
+
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details.
+msg-template=
+
+# Set the output format. Available formats are text, parseable, colorized, json
+# and msvs (visual studio). You can also give a reporter class, e.g.
+# mypackage.mymodule.MyReporterClass.
+#output-format=
+
+# Tells whether to display a full report or only the messages.
+reports=no
+
+# Activate the evaluation score.
+score=yes
+
+
+[SIMILARITIES]
+
+# Comments are removed from the similarity computation
+ignore-comments=yes
+
+# Docstrings are removed from the similarity computation
+ignore-docstrings=yes
+
+# Imports are removed from the similarity computation
+ignore-imports=yes
+
+# Signatures are removed from the similarity computation
+ignore-signatures=yes
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+
+[SPELLING]
+
+# Limits count of emitted suggestions for spelling mistakes.
+max-spelling-suggestions=4
+
+# Spelling dictionary name. No available dictionaries : You need to install
+# both the python package and the system dependency for enchant to work..
+spelling-dict=
+
+# List of comma separated words that should be considered directives if they
+# appear at the beginning of a comment and should not be checked.
+spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy:
+
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+
+# A path to a file that contains the private dictionary; one word per line.
+spelling-private-dict-file=
+
+# Tells whether to store unknown words to the private dictionary (see the
+# --spelling-private-dict-file option) instead of raising a message.
+spelling-store-unknown-words=no
+
+
+[STRING]
+
+# This flag controls whether inconsistent-quotes generates a warning when the
+# character used as a quote delimiter is used inconsistently within a module.
+check-quote-consistency=no
+
+# This flag controls whether the implicit-str-concat should generate a warning
+# on implicit string concatenation in sequences defined over several lines.
+check-str-concat-over-line-jumps=no
+
+
+[TYPECHECK]
+
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=
+
+# Tells whether to warn about missing members when the owner of the attribute
+# is inferred to be None.
+ignore-none=yes
+
+# This flag controls whether pylint should warn about no-member and similar
+# checks whenever an opaque object is returned when inferring. The inference
+# can return multiple potential results while evaluating a Python object, but
+# some branches might not be evaluated, which results in partial inference. In
+# that case, it might be useful to still emit no-member and other checks for
+# the rest of the inferred objects.
+ignore-on-opaque-inference=yes
+
+# List of symbolic message names to ignore for Mixin members.
+ignored-checks-for-mixins=no-member,
+                          not-async-context-manager,
+                          not-context-manager,
+                          attribute-defined-outside-init
+
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace
+
+# Show a hint with possible names when a member name was not found. The aspect
+# of finding the hint is based on edit distance.
+missing-member-hint=yes
+
+# The minimum edit distance a name should have in order to be considered a
+# similar match for a missing member name.
+missing-member-hint-distance=1
+
+# The total number of similar names that should be taken in consideration when
+# showing a hint for a missing member.
+missing-member-max-choices=1
+
+# Regex pattern to define which classes are considered mixins.
+mixin-class-rgx=.*[Mm]ixin
+
+# List of decorators that change the signature of a decorated function.
+signature-mutators=
+
+
+[VARIABLES]
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid defining new builtins when possible.
+additional-builtins=
+
+# Tells whether unused global variables should be treated as a violation.
+allow-global-unused-variables=yes
+
+# List of names allowed to shadow builtins
+allowed-redefined-builtins=
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,
+          _cb
+
+# A regular expression matching the name of dummy variables (i.e. expected to
+# not be used).
+dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
+
+# Argument names that match this expression will be ignored.
+ignored-argument-names=_.*|^ignored_|^unused_
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
diff --git a/bookstack_file_exporter/archiver/__init__.py b/bookstack_file_exporter/archiver/__init__.py
index 8b13789..e69de29 100644
--- a/bookstack_file_exporter/archiver/__init__.py
+++ b/bookstack_file_exporter/archiver/__init__.py
@@ -1 +0,0 @@
-
diff --git a/bookstack_file_exporter/archiver/archiver.py b/bookstack_file_exporter/archiver/archiver.py
index 9253325..f4e7ca7 100644
--- a/bookstack_file_exporter/archiver/archiver.py
+++ b/bookstack_file_exporter/archiver/archiver.py
@@ -34,7 +34,7 @@ class Archiver:
 
     Args:
         :root_dir: str (required) = the base directory for 
-        which the files will be placed .
+        which the archive .tgz will be placed.
         :add_meta: bool (required) = whether or not to add 
         metadata json files for each page, book, chapter, and/or shelve.
         :base_page_url: str (required) = the full url and path to get page content.
@@ -71,12 +71,13 @@ def archive(self, page_nodes: Dict[int, Node], export_formats: List[str]):
 
     # convert to bytes to be agnostic to end destination (future use case?)
     def _gather(self, page_node: Node, export_format: str):
-        raw_data = self._get_data_format(page_node.id, export_format)
+        raw_data = self._get_data_format(page_node.id_, export_format)
         self._gather_local(page_node.file_path, raw_data, export_format, page_node.meta)
 
     def _gather_local(self, page_path: str, data: bytes,
                       export_format: str, meta_data: Union[bytes, None]):
-        page_file_name = f"{self._archive_base_path}/{page_path}{_FILE_EXTENSION_MAP[export_format]}"
+        page_file_name = f"{self._archive_base_path}/" \
+        f"{page_path}{_FILE_EXTENSION_MAP[export_format]}"
         util.write_bytes(self._tar_file, file_path=page_file_name, data=data)
         if self.add_meta:
             meta_file_name = f"{self._archive_base_path}/{page_path}{_FILE_EXTENSION_MAP['meta']}"
diff --git a/bookstack_file_exporter/archiver/minio_archiver.py b/bookstack_file_exporter/archiver/minio_archiver.py
index b9a1cf8..3bd3f30 100644
--- a/bookstack_file_exporter/archiver/minio_archiver.py
+++ b/bookstack_file_exporter/archiver/minio_archiver.py
@@ -1,16 +1,24 @@
 from typing import Union
-
-from bookstack_file_exporter.config_helper.remote import StorageProviderConfig
-from bookstack_file_exporter.archiver import util
+import logging
 
 from minio import Minio
 
-import logging
+from bookstack_file_exporter.config_helper.remote import StorageProviderConfig
 
 log = logging.getLogger(__name__)
 
-
 class MinioArchiver:
+    """
+    Class to handle minio object upload and validations.
+    
+    Args:
+        config <StorageProviderConfig> = minio configuration
+        bucket <str> = upload bucket
+        path <str> (optional) = specify bucket path for upload
+
+    Returns:
+        MinioArchiver instance for archival use
+    """
     def __init__(self, config: StorageProviderConfig):
         self._client = Minio(
             config.host,
@@ -25,16 +33,16 @@ def __init__(self, config: StorageProviderConfig):
     def _validate_bucket(self):
         if not self._client.bucket_exists(self.bucket):
             raise ValueError(f"Given bucket does not exist: {self.bucket}")
-    
+
     def _generate_path(self, path_name: Union[str, None]) -> str:
         if path_name:
             if path_name[-1] == '/':
                 return path_name[:-1]
-            else:
-                return path_name
+            return path_name
         return ""
-              
+
     def upload_backup(self, local_file_path: str):
+        """upload archive file to minio bucket"""
         # this will be the name of the object to upload
         # only get the file name not path
         # we are going to use path provided by user for object storage
@@ -44,4 +52,5 @@ def upload_backup(self, local_file_path: str):
         else:
             object_path = file_name
         result = self._client.fput_object(self.bucket, object_path, local_file_path)
-        log.info(f"Created object: {result.object_name} with tag: {result.etag} and version-id: {result.version_id}")
\ No newline at end of file
+        log.info("""Created object: %s with tag: %s and version-id: %s""",
+                 result.object_name, result.etag, result.version_id)
diff --git a/bookstack_file_exporter/archiver/util.py b/bookstack_file_exporter/archiver/util.py
index c2db168..286a0c8 100644
--- a/bookstack_file_exporter/archiver/util.py
+++ b/bookstack_file_exporter/archiver/util.py
@@ -1,5 +1,4 @@
 from typing import Dict, Union
-from pathlib import Path
 import json
 import os
 import logging
@@ -18,7 +17,7 @@ def get_byte_response(url: str, headers: Dict[str, str]) -> bytes:
     return response.content
 
 def write_bytes(base_tar_dir: str, file_path: str, data: bytes):
-    """write byte data to tar file"""
+    """append byte data to tar file"""
     with tarfile.open(base_tar_dir, "a") as tar:
         data_obj = BytesIO(data)
         tar_info = tarfile.TarInfo(name=file_path)
@@ -32,11 +31,13 @@ def get_json_bytes(data: Dict[str, Union[str, int]]) -> bytes:
 
 # set as function in case we want to do checks or final actions later
 def remove_file(file_path: str):
+    """remove a file"""
     os.remove(file_path)
 
 def create_gzip(tar_file: str, gzip_file: str, remove_old: bool = True):
+    """create a gzip of an existing tar file and remove it"""
     with open(tar_file, 'rb') as f_in:
         with gzip.open(gzip_file, 'wb') as f_out:
             shutil.copyfileobj(f_in, f_out)
     if remove_old:
-        remove_file(tar_file)
\ No newline at end of file
+        remove_file(tar_file)
diff --git a/bookstack_file_exporter/common/__init__.py b/bookstack_file_exporter/common/__init__.py
index 8b13789..e69de29 100644
--- a/bookstack_file_exporter/common/__init__.py
+++ b/bookstack_file_exporter/common/__init__.py
@@ -1 +0,0 @@
-
diff --git a/bookstack_file_exporter/common/util.py b/bookstack_file_exporter/common/util.py
index 99c0ead..cd67e5b 100644
--- a/bookstack_file_exporter/common/util.py
+++ b/bookstack_file_exporter/common/util.py
@@ -1,26 +1,32 @@
+import logging
 from typing import Tuple, Dict
 import requests
 from requests.adapters import HTTPAdapter, Retry
-import logging
 
 log = logging.getLogger(__name__)
 
 def http_get_request(url: str, headers: Dict[str, str], timeout: int = 30) -> requests.Response:
+    """make http requests and return response object"""
     verify, url_prefix = should_verify(url)
     try:
         with requests.Session() as session:
+            # {backoff factor} * (2 ** ({number of previous retries}))
+            # {raise_on_status} if status falls in status_forcelist range
+            #  and retries have been exhausted.
+            # {status_force_list} 429 is supposed to be included
             retries = Retry(total=3,
-                            backoff_factor=0.5, # {backoff factor} * (2 ** ({number of previous retries}))
-                            raise_on_status=True, # if status falls in status_forcelist range and retries have been exhausted.
-                            status_forcelist=[ 500, 502, 503, 504 ]) # 429 is supposed to be included
+                            backoff_factor=0.5,
+                            raise_on_status=True,
+                            status_forcelist=[ 500, 502, 503, 504 ])
             session.mount(url_prefix, HTTPAdapter(max_retries=retries))
             response = session.get(url, headers=headers, verify=verify, timeout=timeout)
     except Exception as req_err:
-        log.error(f"Failed to make request for {url}")
+        log.error("Failed to make request for %s", url)
         raise req_err
     return response
 
 def should_verify(url: str) -> Tuple[bool, str]:
+    """check if http or https"""
     if url.startswith("https://"):
         return (True, "https://")
-    return (False, "http://")
\ No newline at end of file
+    return (False, "http://")
diff --git a/bookstack_file_exporter/config_helper/__init__.py b/bookstack_file_exporter/config_helper/__init__.py
index 8b13789..e69de29 100644
--- a/bookstack_file_exporter/config_helper/__init__.py
+++ b/bookstack_file_exporter/config_helper/__init__.py
@@ -1 +0,0 @@
-
diff --git a/bookstack_file_exporter/config_helper/config_helper.py b/bookstack_file_exporter/config_helper/config_helper.py
index b8c6968..697dc2a 100644
--- a/bookstack_file_exporter/config_helper/config_helper.py
+++ b/bookstack_file_exporter/config_helper/config_helper.py
@@ -1,9 +1,9 @@
 import os
-import json
 import argparse
-import yaml
-import logging
 from typing import Dict, Tuple
+import logging
+
+import yaml
 
 from bookstack_file_exporter.config_helper import models
 from bookstack_file_exporter.config_helper.remote import StorageProviderConfig
@@ -40,7 +40,8 @@ class ConfigNode:
         Arg parse from user input
 
     Returns:
-        ConfigNode object with attributes that are accessible for use for further downstream processes
+        ConfigNode object with attributes that are 
+        accessible for use for further downstream processes
 
     Raises:
         YAMLError: if provided configuration file is not valid YAML
@@ -61,7 +62,7 @@ def __init__(self, args: argparse.Namespace):
     def _generate_config(self, config_file: str) -> models.UserInput:
         if not os.path.isfile(config_file):
             raise FileNotFoundError(config_file)
-        with open(config_file, "r") as yaml_stream:
+        with open(config_file, "r", encoding="utf-8") as yaml_stream:
             try:
                 yaml_input = yaml.safe_load(yaml_stream)
             except Exception as load_err:
@@ -75,7 +76,7 @@ def _generate_config(self, config_file: str) -> models.UserInput:
             log.error("Yaml configuration failed schema validation")
             raise err
         return user_inputs
-    
+
     def _generate_credentials(self) -> Tuple[str, str]:
         # if user provided credentials in config file, load them
         token_id = ""
@@ -83,7 +84,7 @@ def _generate_credentials(self) -> Tuple[str, str]:
         if self.user_inputs.credentials:
             token_id = self.user_inputs.credentials.token_id
             token_secret = self.user_inputs.credentials.token_secret
-        
+
         # check to see if env var is specified, if so, it takes precedence
         token_id = self._check_var(_BOOKSTACK_TOKEN_FIELD, token_id)
         token_secret = self._check_var(_BOOKSTACK_TOKEN_SECRET_FIELD, token_secret)
@@ -93,10 +94,12 @@ def _generate_remote_config(self) -> Dict[str, StorageProviderConfig]:
         object_config = {}
         # check for optional minio credentials if configuration is set in yaml configuration file
         if self.user_inputs.minio_config:
-            minio_access_key = self._check_var(_MINIO_ACCESS_KEY_FIELD, self.user_inputs.minio_config.access_key)
-            minio_secret_key = self._check_var(_MINIO_SECRET_KEY_FIELD, self.user_inputs.minio_config.secret_key)
+            minio_access_key = self._check_var(_MINIO_ACCESS_KEY_FIELD,
+                                               self.user_inputs.minio_config.access_key)
+            minio_secret_key = self._check_var(_MINIO_SECRET_KEY_FIELD,
+                                               self.user_inputs.minio_config.secret_key)
             object_config["minio"] = StorageProviderConfig(minio_access_key,
-                                     minio_secret_key, self.user_inputs.minio_config.bucket, 
+                                     minio_secret_key, self.user_inputs.minio_config.bucket,
                                      host=self.user_inputs.minio_config.host,
                                      path=self.user_inputs.minio_config.path,
                                      region=self.user_inputs.minio_config.region)
@@ -114,7 +117,7 @@ def _generate_headers(self) -> Dict[str, str]:
             # do not override if user added one already with same key
             if key not in headers:
                 headers[key] = value
-        
+
         # do not override user provided one
         if 'Authorization' not in headers:
             headers['Authorization'] = f"Token {self._token_id}:{self._token_secret}"
@@ -153,23 +156,27 @@ def _set_base_dir(self, cmd_output_dir: str) -> str:
         else:
             base_dir = _BASE_DIR_NAME
         return base_dir
-    
+
     @property
     def headers(self) -> Dict[str, str]:
+        """get generated headers"""
         return self._headers
 
     @property
     def urls(self) -> Dict[str, str]:
+        """get generated urls"""
         return self._urls
-    
+
     @property
     def base_dir_name(self) -> str:
+        """get base dir of output target"""
         return self._base_dir_name
 
     @property
     def object_storage_config(self) -> Dict[str, StorageProviderConfig]:
+        """return remote storage configuration"""
         return self._object_storage_config
-    
+
     @staticmethod
     def _check_var(env_key: str, default_val: str) -> str:
         """
@@ -182,10 +189,12 @@ def _check_var(env_key: str, default_val: str) -> str:
         env_value = os.environ.get(env_key, "")
         # env value takes precedence
         if env_value:
-            log.debug(f"env key: {env_key} specified. Will override configuration file value if set.")
+            log.debug("""env key: %s specified.
+                       Will override configuration file value if set.""", env_key)
             return env_value
         # check for optional inputs, if env and input is missing
         if not env_value and not default_val:
-            raise ValueError(f"{env_key} is not specified in env and is missing from configuration - at least one should be set")
+            raise ValueError(f"""{env_key} is not specified in env and is
+                              missing from configuration - at least one should be set""")
         # fall back to configuration file value if present
-        return default_val
\ No newline at end of file
+        return default_val
diff --git a/bookstack_file_exporter/config_helper/models.py b/bookstack_file_exporter/config_helper/models.py
index 9b41fd6..1bd9c40 100644
--- a/bookstack_file_exporter/config_helper/models.py
+++ b/bookstack_file_exporter/config_helper/models.py
@@ -1,7 +1,10 @@
 from typing import Dict, Literal, List, Optional
 from pydantic import BaseModel
 
+# pylint: disable=R0903
+
 class MinioConfig(BaseModel):
+    """YAML schema for minio configuration"""
     host: str
     access_key: Optional[str] = None
     secret_key: Optional[str] = None
@@ -10,10 +13,12 @@ class MinioConfig(BaseModel):
     region: str
 
 class BookstackAccess(BaseModel):
+    """YAML schema for bookstack access credentials"""
     token_id: str
     token_secret: str
 
 class UserInput(BaseModel):
+    """YAML schema for user provided configuration file"""
     host: str
     additional_headers: Optional[Dict[str, str]] = None
     credentials: Optional[BookstackAccess] = None
@@ -21,4 +26,4 @@ class UserInput(BaseModel):
     output_path: Optional[str] = None
     export_meta: Optional[bool] = None
     minio_config: Optional[MinioConfig] = None
-    clean_up: Optional[bool] = None
\ No newline at end of file
+    clean_up: Optional[bool] = None
diff --git a/bookstack_file_exporter/config_helper/remote.py b/bookstack_file_exporter/config_helper/remote.py
index 9d79aff..21fc2bf 100644
--- a/bookstack_file_exporter/config_helper/remote.py
+++ b/bookstack_file_exporter/config_helper/remote.py
@@ -3,7 +3,24 @@
 ## convenience class
 ## able to work for minio, s3, etc.
 class StorageProviderConfig:
-    def __init__(self, access_key: str, secret_key: str, bucket: str, host: Union[str, None], path: Union[str, None], region: Union[str, None]):
+    """
+    Convenience class to get dot notation for remote object storage
+    configuration access.
+    
+    Args:
+        access_key <str> = required token id
+        secret_key <str> = required secret token
+        bucket <str> = bucket to upload
+        host <str> (optionalgit) = if provider requires a host/url
+        path <str> (optional) = specify bucket path for upload
+        region <str> (optional) = if provider requires region
+
+    Returns:
+        StorageProviderConfig instance for dot notation access
+    """
+    def __init__(self, access_key: str, secret_key: str, bucket: str,
+                 host: Union[str, None]=None, path: Union[str, None]=None,
+                 region: Union[str, None]=None):
         self.host = host
         self.access_key = access_key
         self.secret_key = secret_key
diff --git a/bookstack_file_exporter/exporter/exporter.py b/bookstack_file_exporter/exporter/exporter.py
index 4777276..f340a1a 100644
--- a/bookstack_file_exporter/exporter/exporter.py
+++ b/bookstack_file_exporter/exporter/exporter.py
@@ -123,15 +123,14 @@ def get_all_books(self, shelve_nodes: Dict[int, Node], unassigned_dir: str) -> D
     # convenience function
     def get_all_pages(self, book_nodes: Dict[int, Node]) -> Dict[int, Node]:
         """get all pages and their content"""
-        ## chapters (if exists)
-        # chapter nodes are treated a little differently
-        # chapters are children under books
-        chapter_nodes: Dict[int, Node] = self.get_chapter_nodes(book_nodes)
-
         ## pages
         page_nodes = {}
         if book_nodes:
             page_nodes: Dict[int, Node] = self.get_child_nodes("pages", book_nodes)
+        ## chapters (if exists)
+        # chapter nodes are treated a little differently
+        # chapters are children under books
+        chapter_nodes: Dict[int, Node] = self.get_chapter_nodes(book_nodes)
         # add chapter node pages
         # replace existing page node if found with proper chapter parent
         if chapter_nodes:
diff --git a/bookstack_file_exporter/exporter/node.py b/bookstack_file_exporter/exporter/node.py
index 413e644..7b8cf41 100644
--- a/bookstack_file_exporter/exporter/node.py
+++ b/bookstack_file_exporter/exporter/node.py
@@ -34,7 +34,7 @@ def __init__(self, meta: Dict[str, Union[str, int]],
         self._path_prefix = path_prefix
         # for convenience/usage for exporter
         self.name: str = self.meta['slug']
-        self.id: int = self.meta['id']
+        self.id_: int = self.meta['id']
         self._display_name = self.meta['name']
         # children
         self._children = self._get_children()
diff --git a/bookstack_file_exporter/run.py b/bookstack_file_exporter/run.py
index 178040f..4b4fc8e 100644
--- a/bookstack_file_exporter/run.py
+++ b/bookstack_file_exporter/run.py
@@ -49,3 +49,5 @@ def exporter(args: argparse.Namespace):
     # if remote target is specified and clean is true
     # clean up the .tgz archive since it is already uploaded
     archive.clean_up(config.user_inputs.clean_up)
+
+    log.info("Completed run")

From ac37158583b1e2a07b9440670286089320805bba Mon Sep 17 00:00:00 2001
From: pchang388 <pchang388@gmail.com>
Date: Sun, 1 Oct 2023 20:12:44 -0400
Subject: [PATCH 05/20] wip set pylint to fail if under 9/10

---
 .pylintrc                                       | 2 +-
 bookstack_file_exporter/config_helper/remote.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pylintrc b/.pylintrc
index f0c2702..3c66f40 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -39,7 +39,7 @@ extension-pkg-whitelist=
 fail-on=
 
 # Specify a score threshold under which the program will exit with error.
-fail-under=10
+fail-under=9
 
 # Interpret the stdin as a python script, whose filename needs to be passed as
 # the module_or_package argument.
diff --git a/bookstack_file_exporter/config_helper/remote.py b/bookstack_file_exporter/config_helper/remote.py
index 21fc2bf..713c7fd 100644
--- a/bookstack_file_exporter/config_helper/remote.py
+++ b/bookstack_file_exporter/config_helper/remote.py
@@ -11,7 +11,7 @@ class StorageProviderConfig:
         access_key <str> = required token id
         secret_key <str> = required secret token
         bucket <str> = bucket to upload
-        host <str> (optionalgit) = if provider requires a host/url
+        host <str> (optional) = if provider requires a host/url
         path <str> (optional) = specify bucket path for upload
         region <str> (optional) = if provider requires region
 

From 8da4f5cd9c4857709b435123ed61f8fd56544d21 Mon Sep 17 00:00:00 2001
From: pchang388 <pchang388@gmail.com>
Date: Sun, 1 Oct 2023 20:29:59 -0400
Subject: [PATCH 06/20] add more workflows

---
 .github/workflows/{ => always}/pylint.yml |  0
 .github/workflows/merge/docker-build.yml  | 28 +++++++++++++++++++++++
 .pylintrc                                 |  1 +
 3 files changed, 29 insertions(+)
 rename .github/workflows/{ => always}/pylint.yml (100%)
 create mode 100644 .github/workflows/merge/docker-build.yml

diff --git a/.github/workflows/pylint.yml b/.github/workflows/always/pylint.yml
similarity index 100%
rename from .github/workflows/pylint.yml
rename to .github/workflows/always/pylint.yml
diff --git a/.github/workflows/merge/docker-build.yml b/.github/workflows/merge/docker-build.yml
new file mode 100644
index 0000000..5d2836d
--- /dev/null
+++ b/.github/workflows/merge/docker-build.yml
@@ -0,0 +1,28 @@
+name: Test Builds
+
+on:
+  push:
+  pull_request:
+    branches: [ "main" ]
+
+jobs:
+  docker_build:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - name: Build the Docker image
+      run: docker build . --file Dockerfile --tag ${{ vars.DOCKERHUB_REPO }}:${{ github.sha }}
+  pip_build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.11"]
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Test Install of Pip Module
+      run: |
+        pip install .
\ No newline at end of file
diff --git a/.pylintrc b/.pylintrc
index 3c66f40..1e52a9b 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -39,6 +39,7 @@ extension-pkg-whitelist=
 fail-on=
 
 # Specify a score threshold under which the program will exit with error.
+# set to fail if under 9/10 on score
 fail-under=9
 
 # Interpret the stdin as a python script, whose filename needs to be passed as

From 497a659a4af0716fcd0d0f4069f6fc4584013e2d Mon Sep 17 00:00:00 2001
From: pchang388 <pchang388@gmail.com>
Date: Sun, 1 Oct 2023 20:32:30 -0400
Subject: [PATCH 07/20] add more workflows

---
 .github/workflows/{always/pylint.yml => always.pylint.yml}        | 0
 .../{merge/docker-build.yml => on_pr_open.docker-build.yml}       | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename .github/workflows/{always/pylint.yml => always.pylint.yml} (100%)
 rename .github/workflows/{merge/docker-build.yml => on_pr_open.docker-build.yml} (100%)

diff --git a/.github/workflows/always/pylint.yml b/.github/workflows/always.pylint.yml
similarity index 100%
rename from .github/workflows/always/pylint.yml
rename to .github/workflows/always.pylint.yml
diff --git a/.github/workflows/merge/docker-build.yml b/.github/workflows/on_pr_open.docker-build.yml
similarity index 100%
rename from .github/workflows/merge/docker-build.yml
rename to .github/workflows/on_pr_open.docker-build.yml

From 13e368a316195c04d154da248a2af400698e7f45 Mon Sep 17 00:00:00 2001
From: pchang388 <pchang388@gmail.com>
Date: Sun, 1 Oct 2023 20:35:34 -0400
Subject: [PATCH 08/20] add more workflows

---
 .github/workflows/on_pr_open.docker-build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/on_pr_open.docker-build.yml b/.github/workflows/on_pr_open.docker-build.yml
index 5d2836d..1e8d9c8 100644
--- a/.github/workflows/on_pr_open.docker-build.yml
+++ b/.github/workflows/on_pr_open.docker-build.yml
@@ -11,7 +11,7 @@ jobs:
     steps:
     - uses: actions/checkout@v3
     - name: Build the Docker image
-      run: docker build . --file Dockerfile --tag ${{ vars.DOCKERHUB_REPO }}:${{ github.sha }}
+      run: docker build . --file Dockerfile --tag $DOCKERHUB_REPO:${{ github.sha }}
   pip_build:
     runs-on: ubuntu-latest
     strategy:

From d670cddec5890102fda8c1b5c36fd17ed771f873 Mon Sep 17 00:00:00 2001
From: pchang388 <pchang388@gmail.com>
Date: Sun, 1 Oct 2023 20:38:19 -0400
Subject: [PATCH 09/20] add more workflows

---
 .github/workflows/on_pr_open.docker-build.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/on_pr_open.docker-build.yml b/.github/workflows/on_pr_open.docker-build.yml
index 1e8d9c8..dff2459 100644
--- a/.github/workflows/on_pr_open.docker-build.yml
+++ b/.github/workflows/on_pr_open.docker-build.yml
@@ -8,10 +8,12 @@ on:
 jobs:
   docker_build:
     runs-on: ubuntu-latest
+    env:
+      DOCKER_REPO: ${{ vars.DOCKERHUB_REPO }}
     steps:
     - uses: actions/checkout@v3
     - name: Build the Docker image
-      run: docker build . --file Dockerfile --tag $DOCKERHUB_REPO:${{ github.sha }}
+      run: docker build . --file Dockerfile --tag $DOCKER_REPO:${{ github.sha }}
   pip_build:
     runs-on: ubuntu-latest
     strategy:

From b3e609751b941fc56cdd579d1a7116d70c653469 Mon Sep 17 00:00:00 2001
From: pchang388 <pchang388@gmail.com>
Date: Sun, 1 Oct 2023 20:45:16 -0400
Subject: [PATCH 10/20] fix env var in workflow

---
 .github/workflows/on_pr_open.docker-build.yml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/on_pr_open.docker-build.yml b/.github/workflows/on_pr_open.docker-build.yml
index dff2459..28c35f0 100644
--- a/.github/workflows/on_pr_open.docker-build.yml
+++ b/.github/workflows/on_pr_open.docker-build.yml
@@ -8,12 +8,10 @@ on:
 jobs:
   docker_build:
     runs-on: ubuntu-latest
-    env:
-      DOCKER_REPO: ${{ vars.DOCKERHUB_REPO }}
     steps:
     - uses: actions/checkout@v3
     - name: Build the Docker image
-      run: docker build . --file Dockerfile --tag $DOCKER_REPO:${{ github.sha }}
+      run: docker build . --file Dockerfile --tag ${{ env.DOCKERHUB_REPO }}:${{ GITHUB_RUN_ID }}
   pip_build:
     runs-on: ubuntu-latest
     strategy:

From 5adc1d4c24f9e52d33081590d10700ce66a827ef Mon Sep 17 00:00:00 2001
From: pchang388 <pchang388@gmail.com>
Date: Sun, 1 Oct 2023 20:46:13 -0400
Subject: [PATCH 11/20] fix env var in workflow

---
 .github/workflows/on_pr_open.docker-build.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/on_pr_open.docker-build.yml b/.github/workflows/on_pr_open.docker-build.yml
index 28c35f0..d275f0e 100644
--- a/.github/workflows/on_pr_open.docker-build.yml
+++ b/.github/workflows/on_pr_open.docker-build.yml
@@ -11,7 +11,7 @@ jobs:
     steps:
     - uses: actions/checkout@v3
     - name: Build the Docker image
-      run: docker build . --file Dockerfile --tag ${{ env.DOCKERHUB_REPO }}:${{ GITHUB_RUN_ID }}
+      run: docker build . --file Dockerfile --tag ${{ env.DOCKERHUB_REPO }}:${{ github.run_id }}
   pip_build:
     runs-on: ubuntu-latest
     strategy:

From b8b9bdabb3b3a5226e5752f87cdee0075d817d2f Mon Sep 17 00:00:00 2001
From: pchang388 <pchang388@gmail.com>
Date: Sun, 1 Oct 2023 20:49:40 -0400
Subject: [PATCH 12/20] add makefile for test

---
 .github/workflows/on_pr_open.docker-build.yml | 5 ++---
 Makefile                                      | 5 ++++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/on_pr_open.docker-build.yml b/.github/workflows/on_pr_open.docker-build.yml
index d275f0e..1a4b6fd 100644
--- a/.github/workflows/on_pr_open.docker-build.yml
+++ b/.github/workflows/on_pr_open.docker-build.yml
@@ -11,7 +11,7 @@ jobs:
     steps:
     - uses: actions/checkout@v3
     - name: Build the Docker image
-      run: docker build . --file Dockerfile --tag ${{ env.DOCKERHUB_REPO }}:${{ github.run_id }}
+      run: make docker_build
   pip_build:
     runs-on: ubuntu-latest
     strategy:
@@ -24,5 +24,4 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
     - name: Test Install of Pip Module
-      run: |
-        pip install .
\ No newline at end of file
+      run: make pip_build
\ No newline at end of file
diff --git a/Makefile b/Makefile
index d05cac5..86a4258 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,10 @@ DOCKER_WORK_DIR=/export
 DOCKER_CONFIG_DIR=/export/config
 DOCKER_EXPORT_DIR=/export/dump
 
-test_local:
+pip_build:
+	pip install .
+
+pip_local_dev:
 	pip install -e .
 
 docker_build: 

From 1c51ba1a298c831b23a27a86933db6b482f97e06 Mon Sep 17 00:00:00 2001
From: pchang388 <pchang388@gmail.com>
Date: Sun, 1 Oct 2023 20:58:34 -0400
Subject: [PATCH 13/20] fix on pr workflow

---
 .github/workflows/on_pr_open.docker-build.yml | 3 ++-
 Makefile                                      | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/on_pr_open.docker-build.yml b/.github/workflows/on_pr_open.docker-build.yml
index 1a4b6fd..ad12e23 100644
--- a/.github/workflows/on_pr_open.docker-build.yml
+++ b/.github/workflows/on_pr_open.docker-build.yml
@@ -1,7 +1,6 @@
 name: Test Builds
 
 on:
-  push:
   pull_request:
     branches: [ "main" ]
 
@@ -10,6 +9,8 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v3
+    - name: Prepare Makefile
+      run: sed -i 's/^IMAGE_TAG=[^ ]*/IMAGE_TAG=${{github.run_id}}/' Makefile
     - name: Build the Docker image
       run: make docker_build
   pip_build:
diff --git a/Makefile b/Makefile
index 86a4258..6c29435 100644
--- a/Makefile
+++ b/Makefile
@@ -2,6 +2,8 @@
 BASE_IMAGE=python
 BASE_IMAGE_TAG=3.11-slim-bookworm
 IMAGE_NAME=homeylab/bookstack-file-exporter
+# keep this start sequence unique (IMAGE_TAG=)
+# github actions will replace this value for later use
 IMAGE_TAG=test
 DOCKER_WORK_DIR=/export
 DOCKER_CONFIG_DIR=/export/config

From 0243820cee3b7b1c9091017dc5f234fd2500f092 Mon Sep 17 00:00:00 2001
From: pchang388 <pchang388@gmail.com>
Date: Mon, 2 Oct 2023 01:40:10 -0400
Subject: [PATCH 14/20] add tag release action

---
 .github/workflows/on_pr_open.docker-build.yml |  8 +-
 .github/workflows/on_tag.push.yml             | 44 +++++++++
 Makefile                                      | 16 +++-
 README.md                                     | 91 +++++++++++--------
 setup.cfg                                     |  3 +-
 5 files changed, 117 insertions(+), 45 deletions(-)
 create mode 100644 .github/workflows/on_tag.push.yml

diff --git a/.github/workflows/on_pr_open.docker-build.yml b/.github/workflows/on_pr_open.docker-build.yml
index ad12e23..d5607fc 100644
--- a/.github/workflows/on_pr_open.docker-build.yml
+++ b/.github/workflows/on_pr_open.docker-build.yml
@@ -24,5 +24,9 @@ jobs:
       uses: actions/setup-python@v3
       with:
         python-version: ${{ matrix.python-version }}
-    - name: Test Install of Pip Module
-      run: make pip_build
\ No newline at end of file
+    - name: Prepare setup.cfg
+      run: sed -i 's/^version = [^ ]*/version = ${{github.run_id}}/' setup.cfg
+    - name: Test Build of Package
+      run: make build
+    #- name: Upload to TestPypi
+    #- name: Test install from TestPypi
\ No newline at end of file
diff --git a/.github/workflows/on_tag.push.yml b/.github/workflows/on_tag.push.yml
new file mode 100644
index 0000000..62d9d37
--- /dev/null
+++ b/.github/workflows/on_tag.push.yml
@@ -0,0 +1,44 @@
+# needs: [tests]  # require tests to pass before deploy runs
+
+name: Build and Push
+
+on:
+  push:
+    # Pattern matched against refs/tags
+    tags:
+      - '**' # Push events to every tag including hierarchical tags like v1.0/beta
+
+jobs:
+  docker_deploy:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - name: Login to Dockerhub
+      run: docker login -u ${{ secrets.DOCKERHUB_USER }} -p ${{ secrets.DOCKERHUB_TOKEN }}
+    - name: Prepare Makefile
+      run: sed -i 's/^IMAGE_TAG=[^ ]*/IMAGE_TAG=${{github.ref_name}}/' Makefile
+    - name: Build the Docker image
+      run: make docker_build
+    - name: Push Docker image
+      run: make docker_push
+  pypi_deploy:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.x'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build
+    - name: Update Release tag
+      run: sed -i 's/^version = [^ ]*/version = ${{github.ref_name}}/' setup.cfg
+    - name: Build package
+      run: make build
+    - name: Publish package
+      uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
+      with:
+        user: __token__
+        password: ${{ secrets.PYPI_API_TOKEN }}
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 6c29435..4afdfde 100644
--- a/Makefile
+++ b/Makefile
@@ -10,10 +10,22 @@ DOCKER_CONFIG_DIR=/export/config
 DOCKER_EXPORT_DIR=/export/dump
 
 pip_build:
-	pip install .
+	python -m pip install .
 
 pip_local_dev:
-	pip install -e .
+	python -m pip install -e .
+
+build:
+	python -m pip install --upgrade build
+	python -m build
+
+upload_testpypi:
+	python -m pip install --upgrade twine
+	python -m twine upload --repository testpypi dist/*
+
+# extra-url is for dependencies using real pypi
+download_testpypi:
+	python -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple bookstack-file-exporter
 
 docker_build: 
 	docker buildx build \
diff --git a/README.md b/README.md
index 356752a..e7e8b39 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,10 @@
 # bookstack-file-exporter
 
-**WIP** not yet complete. 
+_This is project is still under active development. Functionality is there and is relatively stable at this time._
 
-_This is project is still under active development but has made significant progress._
+This tool provides a way to export Bookstack pages in a folder-tree layout locally with an option to push to remote object storage locations.
 
-This tool provides a way to export Bookstack pages in a folder-tree layout into object storage and/or locally.
-
-This small project was mainly created to run as a cronjob in k8s but also run locally if needed. This would allow me to export my docs in markdown, or other formats like pdf. I use Bookstack's markdown editor as default instead of WYSIWYG editor and this makes my notes portable anywhere even if offline.
+This small project was mainly created to run as a cron job in k8s but works anywhere. This would allow me to export my docs in markdown, or other formats like pdf. I use Bookstack's markdown editor as default instead of WYSIWYG editor and this makes my notes portable anywhere even if offline.
 
 The main use case is to backup all docs in a folder-tree format to cover the scenarios:
 
@@ -36,9 +34,22 @@ python -m bookstack_file_exporter -c <path_to_config_file>
 ## Using This Application
 
 ### Install via Pip
+Note: This application is tested and developed on Python `3.11.X`. It will probably work for >= `3.8` but is recommended to install (or set up a venv) `3.11.X` versions.
+
 ```
 pip install bookstack-file-exporter
+
+# if you already have python bin directory in your path
+bookstack-file-exporter -c <path_to_config_file>
+
+# using pip
+python -m bookstack_file_exporter -c <path_to_config_file>
 ```
+Command line options:
+| option | required | description |
+| ------ | -------- | ----------- |
+|`-c`, `--config-file`|True|Relative or Absolute path to a valid configuration file. This configuration file is checked against a schema for validation.|
+|`-v`, `--log-level` |False, default: info|Provide a valid log level: info, debug, warning, error.|
 
 ### Authentication
 **Note visibility of pages is based on user**, so use a user that has access to pages you want to back up
@@ -112,41 +123,6 @@ export_meta: true
 clean_up: true
 ```
 
-### Minio Backups
-When specifying `minio_config` in the configuration file, these fields are required in the file:
-```
-# a host/ip + port combination is also allowed
-# example: "minio.yourdomain.com:8443"
-host: "minio.yourdomain.com"
-
-# this is required since minio api appears to require it
-# set to the region your bucket resides in
-# if unsure, try "us-east-1" first
-region: "us-east-1"
-
-# bucket to upload to
-bucket "mybucket"
-```
-
-These fields are optional:
-```
-# access key for the minio instance
-# optionally set as env variable instead
-access_key: ""
-
-# secret key for the minio instance
-# optionally set as env variable instead
-secret_key: ""
-
-# the path of the backup
-# in example below, the exported archive will appear in: `<bucket_name>:/bookstack/backups/bookstack-<timestamp>.tgz`
-path: "bookstack/backups"
-```
-
-As mentioned you can optionally set access and secret key as env variables. If both are specified, env variable will take precedence.
-- `MINIO_ACCESS_KEY`
-- `MINIO_SECRET_KEY`
-
 ### Backup Behavior
 We will use slug names (from Bookstack API) by default, as such certain characters like `!`, `/` will be ignored and spaces replaced.
 
@@ -189,6 +165,41 @@ Empty/New Pages will be ignored since they have not been modified yet from creat
 
 You may notice some directories (books) and/or files (pages) in the archive have a random string at the end, example - `nKA`: `user-and-group-management-nKA`. This is expected and is because there were resources with the same name created in another shelve and bookstack adds a string at the end to ensure uniqueness.
 
+### Minio Backups
+When specifying `minio_config` in the configuration file, these fields are required in the file:
+```
+# a host/ip + port combination is also allowed
+# example: "minio.yourdomain.com:8443"
+host: "minio.yourdomain.com"
+
+# this is required since minio api appears to require it
+# set to the region your bucket resides in
+# if unsure, try "us-east-1" first
+region: "us-east-1"
+
+# bucket to upload to
+bucket "mybucket"
+```
+
+These fields are optional:
+```
+# access key for the minio instance
+# optionally set as env variable instead
+access_key: ""
+
+# secret key for the minio instance
+# optionally set as env variable instead
+secret_key: ""
+
+# the path of the backup
+# in example below, the exported archive will appear in: `<bucket_name>:/bookstack/backups/bookstack-<timestamp>.tgz`
+path: "bookstack/backups"
+```
+
+As mentioned you can optionally set access and secret key as env variables. If both are specified, env variable will take precedence.
+- `MINIO_ACCESS_KEY`
+- `MINIO_SECRET_KEY`
+
 ## Future Items
 1. Be able to pull media/photos locally and place in their respective page folders for a more complete file level backup.
 2. Include the exporter in a maintained helm chart as an optional deployment. The helm chart is [here](https://github.com/homeylab/helm-charts/tree/main/charts/bookstack).
diff --git a/setup.cfg b/setup.cfg
index e28b411..98eb518 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,10 +1,11 @@
 [metadata]
 name = bookstack-file-exporter
+# version will be replaced by github actions by release tag
 version = 0.0.1
 author = pchang388
 # author_email = your@email.address
 url = https://github.com/homeylab/bookstack-file-exporter
-description = An exporter written in python to export all documents from a bookstack instance with your preferred medium
+description = An exporter written in python to export all documents from a bookstack instance in different formats
 long_description = file: README.md
 long_description_content_type = text/markdown
 keywords = bookstack, exporter

From 51f77c08039ce1fb4e307382450ccccf768c24a2 Mon Sep 17 00:00:00 2001
From: pchang388 <pchang388@gmail.com>
Date: Mon, 2 Oct 2023 01:48:44 -0400
Subject: [PATCH 15/20] remove prefix char in release tags

---
 .github/workflows/on_tag.push.yml | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/on_tag.push.yml b/.github/workflows/on_tag.push.yml
index 62d9d37..9f7d863 100644
--- a/.github/workflows/on_tag.push.yml
+++ b/.github/workflows/on_tag.push.yml
@@ -13,10 +13,16 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v3
+    # Store the version, stripping any v-prefix
+    - name: Write release version
+      run: |
+        TAG=${{ github.event.release.tag_name }}
+        echo "VERSION=${TAG#v}" >> $GITHUB_ENV
+        echo Version: $VERSION
     - name: Login to Dockerhub
       run: docker login -u ${{ secrets.DOCKERHUB_USER }} -p ${{ secrets.DOCKERHUB_TOKEN }}
     - name: Prepare Makefile
-      run: sed -i 's/^IMAGE_TAG=[^ ]*/IMAGE_TAG=${{github.ref_name}}/' Makefile
+      run: sed -i 's/^IMAGE_TAG=[^ ]*/IMAGE_TAG=${VERSION}/' Makefile
     - name: Build the Docker image
       run: make docker_build
     - name: Push Docker image
@@ -33,8 +39,13 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install build
+    - name: Write release version
+      run: |
+        TAG=${{ github.event.release.tag_name }}
+        echo "VERSION=${TAG#v}" >> $GITHUB_ENV
+        echo Version: $VERSION
     - name: Update Release tag
-      run: sed -i 's/^version = [^ ]*/version = ${{github.ref_name}}/' setup.cfg
+      run: sed -i 's/^version = [^ ]*/version = ${VERSION}}/' setup.cfg
     - name: Build package
       run: make build
     - name: Publish package

From 7dcf84b0ca12fa2da495592065704522359640a6 Mon Sep 17 00:00:00 2001
From: pchang388 <pchang388@gmail.com>
Date: Mon, 2 Oct 2023 02:08:28 -0400
Subject: [PATCH 16/20] fix workflow

---
 .github/workflows/on_tag.push.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/on_tag.push.yml b/.github/workflows/on_tag.push.yml
index 9f7d863..2424be2 100644
--- a/.github/workflows/on_tag.push.yml
+++ b/.github/workflows/on_tag.push.yml
@@ -16,7 +16,7 @@ jobs:
     # Store the version, stripping any v-prefix
     - name: Write release version
       run: |
-        TAG=${{ github.event.release.tag_name }}
+        TAG=${{ github.ref_name }}
         echo "VERSION=${TAG#v}" >> $GITHUB_ENV
         echo Version: $VERSION
     - name: Login to Dockerhub
@@ -41,7 +41,7 @@ jobs:
         pip install build
     - name: Write release version
       run: |
-        TAG=${{ github.event.release.tag_name }}
+        TAG=${{ github.ref_name }}
         echo "VERSION=${TAG#v}" >> $GITHUB_ENV
         echo Version: $VERSION
     - name: Update Release tag

From 0d39bd71211730393e05cc9f8f853bf5bbc216a3 Mon Sep 17 00:00:00 2001
From: pchang388 <pchang388@gmail.com>
Date: Mon, 2 Oct 2023 03:52:27 -0400
Subject: [PATCH 17/20] add release merge workflow

---
 .github/workflows/on_pr_merged.yml | 99 ++++++++++++++++++++++++++++++
 .github/workflows/on_tag.push.yml  | 55 -----------------
 Makefile                           |  4 +-
 setup.cfg                          |  2 +-
 4 files changed, 102 insertions(+), 58 deletions(-)
 create mode 100644 .github/workflows/on_pr_merged.yml
 delete mode 100644 .github/workflows/on_tag.push.yml

diff --git a/.github/workflows/on_pr_merged.yml b/.github/workflows/on_pr_merged.yml
new file mode 100644
index 0000000..6e6d6cd
--- /dev/null
+++ b/.github/workflows/on_pr_merged.yml
@@ -0,0 +1,99 @@
+# needs: [tests]  # require tests to pass before deploy runs
+
+name: Build and Push
+
+# on:
+#   push:
+#     # Pattern matched against refs/tags
+#     tags:
+#       - '**' # Push events to every tag including hierarchical tags like v1.0/beta
+
+on:
+  pull_request:
+    types:
+      - closed
+    branches:
+      - master
+
+jobs:
+  docker_deploy:
+    if: github.event.pull_request.merged
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - name: Login to Dockerhub
+      run: docker login -u ${{ secrets.DOCKERHUB_USER }} -p ${{ secrets.DOCKERHUB_TOKEN }}
+    - name: Build the Docker image
+      run: make docker_build
+    - name: Push Docker image
+      run: make docker_push
+  pypi_deploy:
+    if: github.event.pull_request.merged
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.x'
+    - name: Set tag version
+      run: |
+        TAG=$(cat Makefile | grep -E  ^IMAGE_TAG=[0-9].[0-9].[0-9])
+        echo "VERSION=${TAG}" >> $GITHUB_ENV
+        echo "version from Makefile is: ${VERSION}"
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install build
+    - name: Update Release tag
+      run: sed -i 's/^version = [^ ]*/version = ${VERSION}/' setup.cfg
+    - name: Build package
+      run: make build
+    - name: Publish package
+      uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
+      with:
+        user: __token__
+        password: ${{ secrets.PYPI_API_TOKEN }}
+  create_tag:
+    if: github.event.pull_request.merged
+    runs-on: ubuntu-latest
+    needs:
+      - docker_deploy
+      - pypi_deploy
+    permissions:
+      contents: write
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        ref: ${{ github.event.pull_request.merge_commit_sha }}
+        fetch-depth: '0'
+    - name: Set tag version
+      run: |
+        TAG=$(cat Makefile | grep -E  ^IMAGE_TAG=[0-9].[0-9].[0-9])
+        echo "VERSION=${TAG}" >> $GITHUB_ENV
+        echo "version from Makefile is: ${VERSION}"
+    - name: Create tag
+      uses: anothrNick/github-tag-action@1.64.0
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        WITH_V: true
+        PRERELEASE: true
+        CUSTOM_TAG: ${VERSION}
+  create_release:
+    if: github.event.pull_request.merged
+    runs-on: ubuntu-latest
+    needs:
+      - create_tag
+    permissions:
+      contents: write
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set tag version
+      run: |
+        TAG=$(cat Makefile | grep -E  ^IMAGE_TAG=[0-9].[0-9].[0-9])
+        echo "VERSION=${TAG}" >> $GITHUB_ENV
+        echo "version from Makefile is: ${VERSION}"
+    - uses: ncipollo/release-action@v1
+      with:
+        tag: ${VERSION}
+  # docker image tag latest
diff --git a/.github/workflows/on_tag.push.yml b/.github/workflows/on_tag.push.yml
deleted file mode 100644
index 2424be2..0000000
--- a/.github/workflows/on_tag.push.yml
+++ /dev/null
@@ -1,55 +0,0 @@
-# needs: [tests]  # require tests to pass before deploy runs
-
-name: Build and Push
-
-on:
-  push:
-    # Pattern matched against refs/tags
-    tags:
-      - '**' # Push events to every tag including hierarchical tags like v1.0/beta
-
-jobs:
-  docker_deploy:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v3
-    # Store the version, stripping any v-prefix
-    - name: Write release version
-      run: |
-        TAG=${{ github.ref_name }}
-        echo "VERSION=${TAG#v}" >> $GITHUB_ENV
-        echo Version: $VERSION
-    - name: Login to Dockerhub
-      run: docker login -u ${{ secrets.DOCKERHUB_USER }} -p ${{ secrets.DOCKERHUB_TOKEN }}
-    - name: Prepare Makefile
-      run: sed -i 's/^IMAGE_TAG=[^ ]*/IMAGE_TAG=${VERSION}/' Makefile
-    - name: Build the Docker image
-      run: make docker_build
-    - name: Push Docker image
-      run: make docker_push
-  pypi_deploy:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python
-      uses: actions/setup-python@v3
-      with:
-        python-version: '3.x'
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install build
-    - name: Write release version
-      run: |
-        TAG=${{ github.ref_name }}
-        echo "VERSION=${TAG#v}" >> $GITHUB_ENV
-        echo Version: $VERSION
-    - name: Update Release tag
-      run: sed -i 's/^version = [^ ]*/version = ${VERSION}}/' setup.cfg
-    - name: Build package
-      run: make build
-    - name: Publish package
-      uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
-      with:
-        user: __token__
-        password: ${{ secrets.PYPI_API_TOKEN }}
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 4afdfde..bbf2917 100644
--- a/Makefile
+++ b/Makefile
@@ -3,8 +3,8 @@ BASE_IMAGE=python
 BASE_IMAGE_TAG=3.11-slim-bookworm
 IMAGE_NAME=homeylab/bookstack-file-exporter
 # keep this start sequence unique (IMAGE_TAG=)
-# github actions will replace this value for later use
-IMAGE_TAG=test
+# github actions will use this to create a tag
+IMAGE_TAG=0.0.1
 DOCKER_WORK_DIR=/export
 DOCKER_CONFIG_DIR=/export/config
 DOCKER_EXPORT_DIR=/export/dump
diff --git a/setup.cfg b/setup.cfg
index 98eb518..6bb1832 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = bookstack-file-exporter
-# version will be replaced by github actions by release tag
+# version will be replaced by IMAGE_TAG in Makefile
 version = 0.0.1
 author = pchang388
 # author_email = your@email.address

From d319c641cb6bbc1daf768dedca046da5462b1457 Mon Sep 17 00:00:00 2001
From: pchang388 <pchang388@gmail.com>
Date: Mon, 2 Oct 2023 04:12:44 -0400
Subject: [PATCH 18/20] update readme

---
 README.md | 36 ++++++++++++++++++++++++++++++++----
 1 file changed, 32 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index e7e8b39..08c0eac 100644
--- a/README.md
+++ b/README.md
@@ -33,11 +33,11 @@ python -m bookstack_file_exporter -c <path_to_config_file>
 
 ## Using This Application
 
-### Install via Pip
-Note: This application is tested and developed on Python `3.11.X`. It will probably work for >= `3.8` but is recommended to install (or set up a venv) `3.11.X` versions.
+### Run via Pip
+Note: This application is tested and developed on Python `3.11.X`. It will probably work for >= `3.8` but is recommended to install (or set up a venv) a `3.11.X` version.
 
-```
-pip install bookstack-file-exporter
+```bash
+python -m pip install bookstack-file-exporter
 
 # if you already have python bin directory in your path
 bookstack-file-exporter -c <path_to_config_file>
@@ -51,6 +51,34 @@ Command line options:
 |`-c`, `--config-file`|True|Relative or Absolute path to a valid configuration file. This configuration file is checked against a schema for validation.|
 |`-v`, `--log-level` |False, default: info|Provide a valid log level: info, debug, warning, error.|
 
+### Run Via Docker
+Example
+```bash
+docker run \
+    --user ${USER_ID}:${USER_GID} \
+	-v $(pwd)/local/config.yml:/export/config/config.yml:ro \
+	-v $(pwd)/bkps:/export/dump \
+	bookstack-file-exporter:0.0.1
+```
+Required Options:
+| option | description |
+| `config.yml` file mount | Provide a valid configuration file. Specified in example as read only: `-v ${CURDIR}/local/config.yml:/export/config/config.yml:ro`, `${USER_LOCAL_PATH}:${STATIC_DOCKER_PATH}` |
+| `dump` file mount | Directory to place exports. Specified in example: `-v ${CURDIR}/bkps:/export/dump`, `${USER_LOCAL_PATH}:${STATIC_DOCKER_PATH}` |
+
+Tokens and other options can be specified, example:
+```bash
+# '-e' flag for env vars
+# --user flag to override the uid/gid for created files
+docker run -i \
+	-e LOG_LEVEL='debug' \
+    -e BOOKSTACK_TOKEN_ID='xyz' \
+    -e BOOKSTACK_TOKEN_SECRET='xyz' \
+	--user 1000:1000 \
+	-v $(pwd)/local/config.yml:/export/config/config.yml:ro \
+	-v $(pwd):/export/dump \
+	bookstack-file-exporter:0.0.1
+```
+
 ### Authentication
 **Note visibility of pages is based on user**, so use a user that has access to pages you want to back up
 

From daab5163101bf3a83826d089412622d767550840 Mon Sep 17 00:00:00 2001
From: pchang388 <pchang388@gmail.com>
Date: Mon, 2 Oct 2023 04:15:17 -0400
Subject: [PATCH 19/20] update readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 08c0eac..95ea79c 100644
--- a/README.md
+++ b/README.md
@@ -69,7 +69,7 @@ Tokens and other options can be specified, example:
 ```bash
 # '-e' flag for env vars
 # --user flag to override the uid/gid for created files
-docker run -i \
+docker run \
 	-e LOG_LEVEL='debug' \
     -e BOOKSTACK_TOKEN_ID='xyz' \
     -e BOOKSTACK_TOKEN_SECRET='xyz' \

From 51308bb5e1d370d47877706bff3fbd8ae52b10f0 Mon Sep 17 00:00:00 2001
From: pchang388 <pchang388@gmail.com>
Date: Mon, 2 Oct 2023 04:17:39 -0400
Subject: [PATCH 20/20] update readme

---
 .github/workflows/on_pr_merged.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/on_pr_merged.yml b/.github/workflows/on_pr_merged.yml
index 6e6d6cd..9f6ce6a 100644
--- a/.github/workflows/on_pr_merged.yml
+++ b/.github/workflows/on_pr_merged.yml
@@ -96,4 +96,5 @@ jobs:
     - uses: ncipollo/release-action@v1
       with:
         tag: ${VERSION}
+        generateReleaseNotes: true
   # docker image tag latest