From 36370673cd65ee98d36386f0b0223bb38dd87e26 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Sat, 30 Sep 2023 04:00:07 -0400 Subject: [PATCH 01/20] wip: lint changes --- Dockerfile | 1 - Makefile | 4 +- bookstack_file_exporter/__init__.py | 1 - bookstack_file_exporter/__main__.py | 8 ++-- bookstack_file_exporter/archiver/archiver.py | 36 ++++++++++------ bookstack_file_exporter/exporter/__init__.py | 1 - bookstack_file_exporter/exporter/exporter.py | 45 ++++++++++---------- bookstack_file_exporter/exporter/node.py | 31 +++++++++----- bookstack_file_exporter/exporter/util.py | 6 +-- bookstack_file_exporter/run.py | 19 ++++----- bookstack_file_exporter/run_args.py | 11 +++-- 11 files changed, 90 insertions(+), 73 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2c85e69..d19d8db 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,3 @@ - ARG BASE_IMAGE=python ARG BASE_IMAGE_TAG=3.11-slim-python diff --git a/Makefile b/Makefile index 94de42c..d05cac5 100644 --- a/Makefile +++ b/Makefile @@ -23,9 +23,11 @@ docker_build: docker_push: docker push ${IMAGE_NAME}:${IMAGE_TAG} +# add -i option due to bug in rancher desktop: https://github.com/rancher-sandbox/rancher-desktop/issues/3239 docker_test: - docker run \ + docker run -i \ -e LOG_LEVEL='debug' \ + --user 1000:1000 \ -v ${CURDIR}/local/config.yml:/export/config/config.yml:ro \ -v ${CURDIR}/bkps:/export/dump \ ${IMAGE_NAME}:${IMAGE_TAG} \ No newline at end of file diff --git a/bookstack_file_exporter/__init__.py b/bookstack_file_exporter/__init__.py index 8b13789..e69de29 100644 --- a/bookstack_file_exporter/__init__.py +++ b/bookstack_file_exporter/__init__.py @@ -1 +0,0 @@ - diff --git a/bookstack_file_exporter/__main__.py b/bookstack_file_exporter/__main__.py index c13bfcc..d91a0df 100644 --- a/bookstack_file_exporter/__main__.py +++ b/bookstack_file_exporter/__main__.py @@ -1,18 +1,16 @@ import argparse import logging -from typing import Dict, List, Union from bookstack_file_exporter import run from bookstack_file_exporter import run_args def main(): + """run entrypoint""" args: argparse.Namespace = run_args.get_args() - logging.basicConfig(format='%(asctime)s [%(levelname)s] %(message)s', - level=run_args.get_log_level(args.log_level), datefmt='%Y-%m-%d %H:%M:%S') - + level=run_args.get_log_level(args.log_level), datefmt='%Y-%m-%d %H:%M:%S') run.exporter(args) if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/bookstack_file_exporter/archiver/archiver.py b/bookstack_file_exporter/archiver/archiver.py index bc01d8a..d1d76ad 100644 --- a/bookstack_file_exporter/archiver/archiver.py +++ b/bookstack_file_exporter/archiver/archiver.py @@ -28,42 +28,50 @@ class Archiver: """ - Archiver pulls all the necessary files from upstream and then pushes them to the specified backup location(s) + Archiver pulls all the necessary files from upstream + and then pushes them to the specified backup location(s) Args: - :root_dir: str (required) = the base directory for which the files will be placed . - :add_meta: bool (required) = whether or not to add metadata json files for each page, book, chapter, and/or shelve. + :root_dir: str (required) = the base directory for + which the files will be placed . + :add_meta: bool (required) = whether or not to add + metadata json files for each page, book, chapter, and/or shelve. :base_page_url: str (required) = the full url and path to get page content. :headers: Dict[str, str] (required) = the headers which include the Authorization to use Returns: - Archiver instance with attributes that are accessible for use for file level archival and backup. + Archiver instance with attributes that are + accessible for use for file level archival and backup. """ - def __init__(self, base_dir: str, add_meta: Union[bool, None], base_page_url: str, headers: Dict[str, str]): + def __init__(self, base_dir: str, add_meta: Union[bool, None], + base_page_url: str, headers: Dict[str, str]): self.base_dir = base_dir self.add_meta = add_meta self.base_page_url = base_page_url self._headers = headers self._root_dir = self.generate_root_folder(self.base_dir) - # the tar file will be name of parent export directory, bookstack-, and .tgz extension + # the tar file will be name of + # parent export directory, bookstack-, and .tgz extension self._tar_file = f"{self._root_dir}{_FILE_EXTENSION_MAP['tar']}" # remote_system to function mapping self._remote_exports = {'minio': self._archive_minio, 's3': self._archive_s3} - + # create local tarball first def archive(self, page_nodes: Dict[int, Node], export_formats: List[str]): + """create a .tgz of all page content""" for _, page in page_nodes.items(): for format in export_formats: self._gather(page, format) self._tar_dir() - + # convert to bytes to be agnostic to end destination (future use case?) def _gather(self, page_node: Node, export_format: str): raw_data = self._get_data_format(page_node.id, export_format) log.debug(f"Output directory for exports set to: {self._root_dir}") self._gather_local(page_node.file_path, raw_data, export_format, page_node.meta) - - def _gather_local(self, page_path: str, data: bytes, export_format: str, meta_data: Union[bytes, None]): + + def _gather_local(self, page_path: str, data: bytes, + export_format: str, meta_data: Union[bytes, None]): # get path to page file_path = f"{self._root_dir}/{page_path}" # add extension to page path @@ -78,7 +86,7 @@ def archive_remote(self, remote_targets: Dict[str, StorageProviderConfig]): if remote_targets: for key, value in remote_targets.items(): self._remote_exports[key](value) - + def _tar_dir(self): util.create_tar(self._root_dir, _FILE_EXTENSION_MAP['tar']) @@ -104,10 +112,10 @@ def _clean(self, clean_up_archive: Union[bool, None]): def _get_data_format(self, page_node_id: int, export_format: str) -> bytes: url = self._get_export_url(node_id=page_node_id, export_format=export_format) return util.get_byte_response(url=url, headers=self._headers) - + def _get_export_url(self, node_id: int, export_format: str) -> str: return f"{self.base_page_url}/{node_id}/{_EXPORT_API_PATH}/{export_format}" - + @staticmethod def generate_root_folder(base_folder_name: str) -> str: - return base_folder_name + "_" + datetime.now().strftime(_DATE_STR_FORMAT) \ No newline at end of file + return base_folder_name + "_" + datetime.now().strftime(_DATE_STR_FORMAT) diff --git a/bookstack_file_exporter/exporter/__init__.py b/bookstack_file_exporter/exporter/__init__.py index 8b13789..e69de29 100644 --- a/bookstack_file_exporter/exporter/__init__.py +++ b/bookstack_file_exporter/exporter/__init__.py @@ -1 +0,0 @@ - diff --git a/bookstack_file_exporter/exporter/exporter.py b/bookstack_file_exporter/exporter/exporter.py index 11dc798..4777276 100644 --- a/bookstack_file_exporter/exporter/exporter.py +++ b/bookstack_file_exporter/exporter/exporter.py @@ -1,24 +1,15 @@ -from typing import Dict, List, Union +from typing import Dict, List import logging -import bookstack_file_exporter.exporter.util as util +from bookstack_file_exporter.exporter import util from bookstack_file_exporter.exporter.node import Node -from bookstack_file_exporter.config_helper.config_helper import ConfigNode - - log = logging.getLogger(__name__) -# _API_SUFFIX_PATHS = { -# "shelves": "api/shelves", -# "books": "api/books", -# "chapters": "api/chapters", -# "pages": "api/pages" -# } - class NodeExporter(): """ - NodeExporter class provides an interface to help create Bookstack resources/nodes (pages, books, etc) and their relationships. + NodeExporter class provides an interface to help create + Bookstack resources/nodes (pages, books, etc) and their relationships. Raises: @@ -39,16 +30,18 @@ def get_all_shelves(self) -> Dict[int, Node]: log.warning("No shelves found in given Bookstack instance") return {} return self._get_parents(base_url, all_parents) - - def _get_parents(self, base_url: str, parent_ids: List[int], path_prefix: str = "") -> Dict[int, Node]: + + def _get_parents(self, base_url: str, parent_ids: List[int], + path_prefix: str = "") -> Dict[int, Node]: parent_nodes = {} for parent_id in parent_ids: parent_url = f"{base_url}/{parent_id}" parent_data = util.get_json_response(url=parent_url, headers=self.headers) parent_nodes[parent_id] = Node(parent_data, path_prefix=path_prefix) return parent_nodes - + def get_chapter_nodes(self, book_nodes: Dict[int, Node]) -> Dict[int, Node]: + """ get chapter nodes """ # Chapters are treated a little differently # They are under books like pages but have their own children # i.e. not a terminal node @@ -59,7 +52,8 @@ def get_chapter_nodes(self, book_nodes: Dict[int, Node]) -> Dict[int, Node]: return {} return self._get_chapters(base_url, all_chapters, book_nodes) - def _get_chapters(self, base_url: str, all_chapters: List[int], book_nodes: Dict[int, Node]) -> Dict[int, Node]: + def _get_chapters(self, base_url: str, all_chapters: List[int], + book_nodes: Dict[int, Node]) -> Dict[int, Node]: chapter_nodes = {} for chapter_id in all_chapters: chapter_url = f"{base_url}/{chapter_id}" @@ -67,12 +61,15 @@ def _get_chapters(self, base_url: str, all_chapters: List[int], book_nodes: Dict book_id = chapter_data['book_id'] chapter_nodes[chapter_id] = Node(chapter_data, book_nodes[book_id]) return chapter_nodes - - def get_child_nodes(self, resource_type: str, parent_nodes: Dict[int, Node], filter_empty: bool = True): + + def get_child_nodes(self, resource_type: str, parent_nodes: Dict[int, Node], + filter_empty: bool = True) -> Dict[int, Node]: + """get child nodes from a book/chapter/shelf""" base_url = self.api_urls[resource_type] return self._get_children(base_url, parent_nodes, filter_empty) - def _get_children(self, base_url: str, parent_nodes: Dict[int, Node], filter_empty: bool) -> Dict[int, Node]: + def _get_children(self, base_url: str, parent_nodes: Dict[int, Node], + filter_empty: bool) -> Dict[int, Node]: child_nodes = {} for _, parent in parent_nodes.items(): if parent.children: @@ -88,7 +85,9 @@ def _get_children(self, base_url: str, parent_nodes: Dict[int, Node], filter_emp child_nodes[child_id] = child_node return child_nodes - def get_unassigned_books(self, existing_resources: Dict[int, Node], path_prefix: str) -> Dict[int, Node]: + def get_unassigned_books(self, existing_resources: Dict[int, Node], + path_prefix: str) -> Dict[int, Node]: + """get books not under a shelf""" base_url = self.api_urls["books"] all_resources: List[int] = util.get_all_ids(url=base_url, headers=self.headers) unassigned = [] @@ -103,6 +102,7 @@ def get_unassigned_books(self, existing_resources: Dict[int, Node], path_prefix: # convenience function def get_all_books(self, shelve_nodes: Dict[int, Node], unassigned_dir: str) -> Dict[int, Node]: + """get all books""" book_nodes = {} # get books in shelves if shelve_nodes: @@ -119,9 +119,10 @@ def get_all_books(self, shelve_nodes: Dict[int, Node], unassigned_dir: str) -> D book_nodes[key] = value return book_nodes - + # convenience function def get_all_pages(self, book_nodes: Dict[int, Node]) -> Dict[int, Node]: + """get all pages and their content""" ## chapters (if exists) # chapter nodes are treated a little differently # chapters are children under books diff --git a/bookstack_file_exporter/exporter/node.py b/bookstack_file_exporter/exporter/node.py index aa37d33..413e644 100644 --- a/bookstack_file_exporter/exporter/node.py +++ b/bookstack_file_exporter/exporter/node.py @@ -9,19 +9,26 @@ class Node(): """ - Node class provides an interface to create bookstack child/parent relationships for resources like pages, books, chapters, and shelves. + Node class provides an interface to create bookstack child/parent + relationships for resources like pages, books, chapters, and shelves. Args: - metadata: Dict[str, Union[str, int]] (required) = The metadata of the resource from bookstack api - parent: Union['Node', None] (optional) = The parent resource if any, parent/children are also of the same class 'Node'. - path_prefix: Union[str, None] (optional) = This appends a relative 'root' directory to the child resource path/file_name. - It is mainly used to prepend a shelve level directory for books that are not assigned or under any shelf. + metadata: Dict[str, Union[str, int]] (required) + = The metadata of the resource from bookstack api + parent: Union['Node', None] (optional) + = The parent resource if any, parent/children are also of the same class 'Node'. + path_prefix: Union[str, None] (optional) + = This appends a relative 'root' directory to the child resource path/file_name. + It is mainly used to prepend a shelve level + directory for books that are not assigned or under any shelf. Returns: - Node instance to help create and reference bookstack child/parent relationships for resources like pages, books, chapters, and shelves. + Node instance to help create and reference bookstack child/parent + relationships for resources like pages, books, chapters, and shelves. """ - def __init__(self, meta: Dict[str, Union[str, int]], parent: Union['Node', None] = None, path_prefix: str = ""): + def __init__(self, meta: Dict[str, Union[str, int]], + parent: Union['Node', None] = None, path_prefix: str = ""): self.meta = meta self._parent = parent self._path_prefix = path_prefix @@ -33,7 +40,6 @@ def __init__(self, meta: Dict[str, Union[str, int]], parent: Union['Node', None] self._children = self._get_children() # if parent self._file_path = self._get_file_path() - def _get_file_path(self) -> str: if self._parent: @@ -48,9 +54,10 @@ def _get_children(self) -> List[Dict[str, Union[str, int]]]: children = self.meta[match] break return children - + @property def file_path(self): + """get the base file path""" # check to see if parent exists if not self._file_path: # return base path + name if no parent @@ -61,10 +68,12 @@ def file_path(self): @property def children(self): + """return all children of a book/chapter/shelf""" return self._children - + @property def empty(self): + """return True if page node lacks content""" if not self.name and self._display_name == _NULL_PAGE_NAME: return True - return False \ No newline at end of file + return False diff --git a/bookstack_file_exporter/exporter/util.py b/bookstack_file_exporter/exporter/util.py index 3cb5ceb..4e5c9f1 100644 --- a/bookstack_file_exporter/exporter/util.py +++ b/bookstack_file_exporter/exporter/util.py @@ -1,17 +1,17 @@ from typing import Dict, Union, List import logging -from bookstack_file_exporter.exporter.node import Node from bookstack_file_exporter.common import util log = logging.getLogger(__name__) def get_json_response(url: str, headers: Dict[str, str]) -> List[Dict[str, Union[str,int]]]: + """get http response data in json format""" response = util.http_get_request(url=url, headers=headers) return response.json() def get_all_ids(url: str, headers: Dict[str, str]) -> List[int]: + """get all ids for a bookstack resource""" ids_api_meta = get_json_response(url=url, headers=headers) if ids_api_meta: return [item['id'] for item in ids_api_meta['data']] - else: - return [] + return [] diff --git a/bookstack_file_exporter/run.py b/bookstack_file_exporter/run.py index 629dd9a..178040f 100644 --- a/bookstack_file_exporter/run.py +++ b/bookstack_file_exporter/run.py @@ -1,22 +1,21 @@ import argparse import sys import logging -from time import sleep from typing import Dict from bookstack_file_exporter.config_helper.config_helper import ConfigNode from bookstack_file_exporter.exporter.node import Node from bookstack_file_exporter.exporter.exporter import NodeExporter -from bookstack_file_exporter.archiver import util as archiver_util from bookstack_file_exporter.archiver.archiver import Archiver log = logging.getLogger(__name__) def exporter(args: argparse.Namespace): + """export bookstack nodes and archive locally and/or remotely""" ## get configuration from helper config = ConfigNode(args) - ## convenience vars + ## convenience vars bookstack_headers = config.headers api_urls = config.urls export_formats = config.user_inputs.formats @@ -29,24 +28,24 @@ def exporter(args: argparse.Namespace): log.info("Beginning export") ## Use exporter class to get all the resources (pages, books, etc.) and their relationships - exportHelper = NodeExporter(api_urls, bookstack_headers) + export_helper = NodeExporter(api_urls, bookstack_headers) ## shelves - shelve_nodes: Dict[int, Node] = exportHelper.get_all_shelves() + shelve_nodes: Dict[int, Node] = export_helper.get_all_shelves() ## books - book_nodes: Dict[int, Node] = exportHelper.get_all_books(shelve_nodes, unassigned_dir) + book_nodes: Dict[int, Node] = export_helper.get_all_books(shelve_nodes, unassigned_dir) ## pages - page_nodes: Dict[int, Node] = exportHelper.get_all_pages(book_nodes) + page_nodes: Dict[int, Node] = export_helper.get_all_pages(book_nodes) if not page_nodes: log.warning("No page data available from given Bookstack instance. Nothing to archive") sys.exit(0) - log.info("Beginning archive") ## start archive ## - archive: Archiver = Archiver(base_export_dir, config.user_inputs.export_meta, page_base_url, bookstack_headers) + archive: Archiver = Archiver(base_export_dir, config.user_inputs.export_meta, + page_base_url, bookstack_headers) # create tar archive.archive(page_nodes, export_formats) # archive to remote targets archive.archive_remote(config.object_storage_config) # if remote target is specified and clean is true # clean up the .tgz archive since it is already uploaded - archive.clean_up(config.user_inputs.clean_up) \ No newline at end of file + archive.clean_up(config.user_inputs.clean_up) diff --git a/bookstack_file_exporter/run_args.py b/bookstack_file_exporter/run_args.py index a455d3c..7d7b776 100644 --- a/bookstack_file_exporter/run_args.py +++ b/bookstack_file_exporter/run_args.py @@ -1,5 +1,4 @@ import argparse -from typing import Dict, List import logging LOG_LEVEL = { @@ -10,24 +9,28 @@ } def get_log_level(log_level:str) -> int: + """return log level int""" return LOG_LEVEL.get(log_level) def get_args() -> argparse.Namespace: + """return user cmd line options""" parser = argparse.ArgumentParser(description='BookStack File Exporter') parser.add_argument('-c', '--config-file', type=str, default="data/config.yml", - help='Provide a configuration file (full or relative path). See README for more details') + help='''Provide a configuration file (full or relative path). + See README for more details''') parser.add_argument('-o', '--output-dir', type=str, default="", - help='Optional, specify an output directory. This can also be specified in the config.yml file') + help='''Optional, specify an output directory. + This can also be specified in the config.yml file''') parser.add_argument('-v', '--log-level', type=str.lower, default='info', help='Set verbosity level for logging.', choices=LOG_LEVEL.keys()) - return parser.parse_args() \ No newline at end of file + return parser.parse_args() From 69930887cfb27720eb94e0e86cbe7b280b2a303b Mon Sep 17 00:00:00 2001 From: Peter Chang Date: Sun, 1 Oct 2023 00:29:28 -0400 Subject: [PATCH 02/20] wip --- bookstack_file_exporter/archiver/archiver.py | 43 ++++++++++++-------- bookstack_file_exporter/archiver/util.py | 36 ++++++++++++---- 2 files changed, 56 insertions(+), 23 deletions(-) diff --git a/bookstack_file_exporter/archiver/archiver.py b/bookstack_file_exporter/archiver/archiver.py index d1d76ad..028e20a 100644 --- a/bookstack_file_exporter/archiver/archiver.py +++ b/bookstack_file_exporter/archiver/archiver.py @@ -1,5 +1,4 @@ from typing import List, Dict, Union -from time import sleep from datetime import datetime import logging @@ -52,7 +51,7 @@ def __init__(self, base_dir: str, add_meta: Union[bool, None], self._root_dir = self.generate_root_folder(self.base_dir) # the tar file will be name of # parent export directory, bookstack-, and .tgz extension - self._tar_file = f"{self._root_dir}{_FILE_EXTENSION_MAP['tar']}" + self._archive_file = f"{self._root_dir}{_FILE_EXTENSION_MAP['tar']}" # remote_system to function mapping self._remote_exports = {'minio': self._archive_minio, 's3': self._archive_s3} @@ -60,29 +59,35 @@ def __init__(self, base_dir: str, add_meta: Union[bool, None], def archive(self, page_nodes: Dict[int, Node], export_formats: List[str]): """create a .tgz of all page content""" for _, page in page_nodes.items(): - for format in export_formats: - self._gather(page, format) - self._tar_dir() + for ex_format in export_formats: + self._gather(page, ex_format) + # self._tar_dir() + self._gzip_tar() # convert to bytes to be agnostic to end destination (future use case?) def _gather(self, page_node: Node, export_format: str): raw_data = self._get_data_format(page_node.id, export_format) - log.debug(f"Output directory for exports set to: {self._root_dir}") self._gather_local(page_node.file_path, raw_data, export_format, page_node.meta) def _gather_local(self, page_path: str, data: bytes, export_format: str, meta_data: Union[bytes, None]): # get path to page - file_path = f"{self._root_dir}/{page_path}" - # add extension to page path - file_full_name = f"{file_path}{_FILE_EXTENSION_MAP[export_format]}" - util.write_bytes(file_path=file_full_name, data=data) - if self.add_meta: - meta_file_name = f"{file_path}{_FILE_EXTENSION_MAP['meta']}" - util.dump_json(file_name=meta_file_name, data=meta_data) + # file_path = f"{self._root_dir}/{page_path}" + # # add extension to page path + # file_full_name = f"{file_path}{_FILE_EXTENSION_MAP[export_format]}" + # log.debug("Output directory for page export set to: %s", file_full_name) + + page_file = f"{page_path}{_FILE_EXTENSION_MAP[export_format]}" + tar_file = f"{self._root_dir}.tar" + util.write_bytes(tar_file, file_path=page_file, data=data) + + # if self.add_meta: + # meta_file_name = f"{file_path}{_FILE_EXTENSION_MAP['meta']}" + # util.dump_json(file_name=meta_file_name, data=meta_data) # send to remote systems def archive_remote(self, remote_targets: Dict[str, StorageProviderConfig]): + """for each target, do their respective tasks""" if remote_targets: for key, value in remote_targets.items(): self._remote_exports[key](value) @@ -90,23 +95,28 @@ def archive_remote(self, remote_targets: Dict[str, StorageProviderConfig]): def _tar_dir(self): util.create_tar(self._root_dir, _FILE_EXTENSION_MAP['tar']) + def _gzip_tar(self): + tar_file = f"{self._root_dir}.tar" + util.create_gzip(tar_file, self._archive_file) + def _archive_minio(self, config: StorageProviderConfig): minio_archiver = MinioArchiver(config) - minio_archiver.upload_backup(self._tar_file) + minio_archiver.upload_backup(self._archive_file) def _archive_s3(self, config: StorageProviderConfig): pass def clean_up(self, clean_up_archive: Union[bool, None]): + """remove archive after sending to remote target""" self._clean(clean_up_archive) def _clean(self, clean_up_archive: Union[bool, None]): # remove data root directory since we already have the .tgz file now - util.remove_dir(self._root_dir) + # util.remove_dir(self._root_dir) # if user is uploading to object storage # delete the local .tgz archive since we have it there already if clean_up_archive: - util.remove_file(self._tar_file) + util.remove_file(self._archive_file) # convert page data to bytes def _get_data_format(self, page_node_id: int, export_format: str) -> bytes: @@ -118,4 +128,5 @@ def _get_export_url(self, node_id: int, export_format: str) -> str: @staticmethod def generate_root_folder(base_folder_name: str) -> str: + """return base archive name""" return base_folder_name + "_" + datetime.now().strftime(_DATE_STR_FORMAT) diff --git a/bookstack_file_exporter/archiver/util.py b/bookstack_file_exporter/archiver/util.py index c19bc4d..a403175 100644 --- a/bookstack_file_exporter/archiver/util.py +++ b/bookstack_file_exporter/archiver/util.py @@ -5,23 +5,39 @@ import logging import tarfile import shutil +from io import BytesIO +import gzip from bookstack_file_exporter.common import util log = logging.getLogger(__name__) def get_byte_response(url: str, headers: Dict[str, str]) -> bytes: + """get byte response from http request""" response = util.http_get_request(url=url, headers=headers) return response.content -def write_bytes(file_path: str, data: bytes): - path_file = Path(file_path) - # create parent directories as needed, ignore already exists errors - path_file.parent.mkdir(parents=True, exist_ok=True) - path_file.write_bytes(data) +# def write_bytes(file_path: str, data: bytes): +# """write byte data to file""" +# path_file = Path(file_path) +# # create parent directories as needed, ignore already exists errors +# path_file.parent.mkdir(parents=True, exist_ok=True) +# path_file.write_bytes(data) + +def write_bytes(base_tar_dir: str, file_path: str, data: bytes): + """write byte data to file""" + log.info("Opening tar file: %s", base_tar_dir) + with tarfile.open(base_tar_dir, "a") as tar: + data_obj = BytesIO(data) + tar_info = tarfile.TarInfo(name=file_path) + tar_info.size = data_obj.getbuffer().nbytes + log.info(tar_info) + log.info(tar_info.size) + tar.addfile(tar_info, fileobj=data_obj) def dump_json(file_name: str, data: Dict[str, Union[str, int]]): - with open(file_name, 'w') as fp: + """dump dict to json file""" + with open(file_name, 'w', encoding="utf-8") as fp: json.dump(data, fp, indent=4) # set as function in case we want to do checks or final actions later @@ -43,4 +59,10 @@ def create_tar(export_path: str, file_extension: str): # create tar file with tarfile.open(tar_path, "w:gz") as tar: # add export directory to dump - tar.add(str(parent_abs_path), arcname='.') \ No newline at end of file + tar.add(str(parent_abs_path), arcname='.') + +def create_gzip(tar_file: str, gzip_file: str): + with open(tar_file, 'rb') as f_in: + with gzip.open(gzip_file, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + remove_file(tar_file) \ No newline at end of file From 432f3c3205605ec4ddb446aa9e6f300e0724020c Mon Sep 17 00:00:00 2001 From: pchang388 Date: Sun, 1 Oct 2023 02:02:27 -0400 Subject: [PATCH 03/20] refactor archive to be more efficient with tgz file --- bookstack_file_exporter/archiver/archiver.py | 44 ++++++++------------ bookstack_file_exporter/archiver/util.py | 40 ++++-------------- 2 files changed, 25 insertions(+), 59 deletions(-) diff --git a/bookstack_file_exporter/archiver/archiver.py b/bookstack_file_exporter/archiver/archiver.py index 028e20a..9253325 100644 --- a/bookstack_file_exporter/archiver/archiver.py +++ b/bookstack_file_exporter/archiver/archiver.py @@ -9,7 +9,8 @@ log = logging.getLogger(__name__) -_META_FILE_SUFFIX = "_meta" +_META_FILE_SUFFIX = "_meta.json" +_TAR_SUFFIX = ".tar" _TAR_GZ_SUFFIX = ".tgz" _EXPORT_API_PATH = "export" @@ -19,8 +20,9 @@ "html": ".html", "pdf": ".pdf", "plaintext": ".txt", - "meta": f"{_META_FILE_SUFFIX}.json", - "tar": _TAR_GZ_SUFFIX + "meta": _META_FILE_SUFFIX, + "tar": _TAR_SUFFIX, + "tgz": _TAR_GZ_SUFFIX } _DATE_STR_FORMAT = "%Y-%m-%d_%H-%M-%S" @@ -49,9 +51,13 @@ def __init__(self, base_dir: str, add_meta: Union[bool, None], self.base_page_url = base_page_url self._headers = headers self._root_dir = self.generate_root_folder(self.base_dir) - # the tar file will be name of + # the tgz file will be name of # parent export directory, bookstack-, and .tgz extension - self._archive_file = f"{self._root_dir}{_FILE_EXTENSION_MAP['tar']}" + self._archive_file = f"{self._root_dir}{_FILE_EXTENSION_MAP['tgz']}" + # name of intermediate tar file before gzip + self._tar_file = f"{self._root_dir}{_FILE_EXTENSION_MAP['tar']}" + # name of the base folder to use within the tgz archive + self._archive_base_path = self._root_dir.split("/")[-1] # remote_system to function mapping self._remote_exports = {'minio': self._archive_minio, 's3': self._archive_s3} @@ -61,7 +67,6 @@ def archive(self, page_nodes: Dict[int, Node], export_formats: List[str]): for _, page in page_nodes.items(): for ex_format in export_formats: self._gather(page, ex_format) - # self._tar_dir() self._gzip_tar() # convert to bytes to be agnostic to end destination (future use case?) @@ -71,19 +76,12 @@ def _gather(self, page_node: Node, export_format: str): def _gather_local(self, page_path: str, data: bytes, export_format: str, meta_data: Union[bytes, None]): - # get path to page - # file_path = f"{self._root_dir}/{page_path}" - # # add extension to page path - # file_full_name = f"{file_path}{_FILE_EXTENSION_MAP[export_format]}" - # log.debug("Output directory for page export set to: %s", file_full_name) - - page_file = f"{page_path}{_FILE_EXTENSION_MAP[export_format]}" - tar_file = f"{self._root_dir}.tar" - util.write_bytes(tar_file, file_path=page_file, data=data) - - # if self.add_meta: - # meta_file_name = f"{file_path}{_FILE_EXTENSION_MAP['meta']}" - # util.dump_json(file_name=meta_file_name, data=meta_data) + page_file_name = f"{self._archive_base_path}/{page_path}{_FILE_EXTENSION_MAP[export_format]}" + util.write_bytes(self._tar_file, file_path=page_file_name, data=data) + if self.add_meta: + meta_file_name = f"{self._archive_base_path}/{page_path}{_FILE_EXTENSION_MAP['meta']}" + bytes_meta = util.get_json_bytes(meta_data) + util.write_bytes(self._tar_file, file_path=meta_file_name, data=bytes_meta) # send to remote systems def archive_remote(self, remote_targets: Dict[str, StorageProviderConfig]): @@ -92,12 +90,8 @@ def archive_remote(self, remote_targets: Dict[str, StorageProviderConfig]): for key, value in remote_targets.items(): self._remote_exports[key](value) - def _tar_dir(self): - util.create_tar(self._root_dir, _FILE_EXTENSION_MAP['tar']) - def _gzip_tar(self): - tar_file = f"{self._root_dir}.tar" - util.create_gzip(tar_file, self._archive_file) + util.create_gzip(self._tar_file, self._archive_file) def _archive_minio(self, config: StorageProviderConfig): minio_archiver = MinioArchiver(config) @@ -111,8 +105,6 @@ def clean_up(self, clean_up_archive: Union[bool, None]): self._clean(clean_up_archive) def _clean(self, clean_up_archive: Union[bool, None]): - # remove data root directory since we already have the .tgz file now - # util.remove_dir(self._root_dir) # if user is uploading to object storage # delete the local .tgz archive since we have it there already if clean_up_archive: diff --git a/bookstack_file_exporter/archiver/util.py b/bookstack_file_exporter/archiver/util.py index a403175..c2db168 100644 --- a/bookstack_file_exporter/archiver/util.py +++ b/bookstack_file_exporter/archiver/util.py @@ -17,52 +17,26 @@ def get_byte_response(url: str, headers: Dict[str, str]) -> bytes: response = util.http_get_request(url=url, headers=headers) return response.content -# def write_bytes(file_path: str, data: bytes): -# """write byte data to file""" -# path_file = Path(file_path) -# # create parent directories as needed, ignore already exists errors -# path_file.parent.mkdir(parents=True, exist_ok=True) -# path_file.write_bytes(data) - def write_bytes(base_tar_dir: str, file_path: str, data: bytes): - """write byte data to file""" - log.info("Opening tar file: %s", base_tar_dir) + """write byte data to tar file""" with tarfile.open(base_tar_dir, "a") as tar: data_obj = BytesIO(data) tar_info = tarfile.TarInfo(name=file_path) tar_info.size = data_obj.getbuffer().nbytes - log.info(tar_info) - log.info(tar_info.size) + log.debug("Adding file: %s with size: %d bytes to tar file", tar_info.name, tar_info.size) tar.addfile(tar_info, fileobj=data_obj) -def dump_json(file_name: str, data: Dict[str, Union[str, int]]): +def get_json_bytes(data: Dict[str, Union[str, int]]) -> bytes: """dump dict to json file""" - with open(file_name, 'w', encoding="utf-8") as fp: - json.dump(data, fp, indent=4) + return json.dumps(data, indent=4).encode('utf-8') # set as function in case we want to do checks or final actions later -def remove_dir(dir_path: str): - shutil.rmtree(dir_path) - def remove_file(file_path: str): os.remove(file_path) -def create_tar(export_path: str, file_extension: str): - # path of the export dir - output_path = Path(export_path) - # create tar in parent of export dir - # get abs path of parent - parent_path = output_path.parent - parent_abs_path = parent_path.resolve() - # set tar file path - tar_path = f"{export_path}{file_extension}" - # create tar file - with tarfile.open(tar_path, "w:gz") as tar: - # add export directory to dump - tar.add(str(parent_abs_path), arcname='.') - -def create_gzip(tar_file: str, gzip_file: str): +def create_gzip(tar_file: str, gzip_file: str, remove_old: bool = True): with open(tar_file, 'rb') as f_in: with gzip.open(gzip_file, 'wb') as f_out: shutil.copyfileobj(f_in, f_out) - remove_file(tar_file) \ No newline at end of file + if remove_old: + remove_file(tar_file) \ No newline at end of file From 314d8125c318e9d53bc0473aa9d1f2d7226bdc81 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Sun, 1 Oct 2023 20:06:29 -0400 Subject: [PATCH 04/20] wip fixed lint warnings for >90% score --- .github/workflows/pylint.yml | 23 + .pylintrc | 633 ++++++++++++++++++ bookstack_file_exporter/archiver/__init__.py | 1 - bookstack_file_exporter/archiver/archiver.py | 7 +- .../archiver/minio_archiver.py | 29 +- bookstack_file_exporter/archiver/util.py | 7 +- bookstack_file_exporter/common/__init__.py | 1 - bookstack_file_exporter/common/util.py | 18 +- .../config_helper/__init__.py | 1 - .../config_helper/config_helper.py | 43 +- .../config_helper/models.py | 7 +- .../config_helper/remote.py | 19 +- bookstack_file_exporter/exporter/exporter.py | 9 +- bookstack_file_exporter/exporter/node.py | 2 +- bookstack_file_exporter/run.py | 2 + 15 files changed, 752 insertions(+), 50 deletions(-) create mode 100644 .github/workflows/pylint.yml create mode 100644 .pylintrc diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml new file mode 100644 index 0000000..10ad95b --- /dev/null +++ b/.github/workflows/pylint.yml @@ -0,0 +1,23 @@ +name: Pylint + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.11"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pylint + - name: Analysing the code with pylint + run: | + pylint $(git ls-files '*.py') diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..f0c2702 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,633 @@ +[MAIN] + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Clear in-memory caches upon conclusion of linting. Useful if running pylint +# in a server-like mode. +clear-cache-post-run=no + +# Load and enable all available extensions. Use --list-extensions to see a list +# all available extensions. +#enable-all-extensions= + +# In error mode, messages with a category besides ERROR or FATAL are +# suppressed, and no reports are done by default. Error mode is compatible with +# disabling specific errors. +#errors-only= + +# Always return a 0 (non-error) status code, even if lint errors are found. +# This is primarily useful in continuous integration scripts. +#exit-zero= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-allow-list= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. (This is an alternative name to extension-pkg-allow-list +# for backward compatibility.) +extension-pkg-whitelist= + +# Return non-zero exit code if any of these messages/categories are detected, +# even if score is above --fail-under value. Syntax same as enable. Messages +# specified are enabled, while categories only check already-enabled messages. +fail-on= + +# Specify a score threshold under which the program will exit with error. +fail-under=10 + +# Interpret the stdin as a python script, whose filename needs to be passed as +# the module_or_package argument. +#from-stdin= + +# Files or directories to be skipped. They should be base names, not paths. +ignore=CVS + +# Add files or directories matching the regular expressions patterns to the +# ignore-list. The regex matches against paths and can be in Posix or Windows +# format. Because '\\' represents the directory delimiter on Windows systems, +# it can't be used as an escape character. +ignore-paths= + +# Files or directories matching the regular expression patterns are skipped. +# The regex matches against base names, not paths. The default value ignores +# Emacs file locks +ignore-patterns=^\.# + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis). It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use, and will cap the count on Windows to +# avoid hangs. +jobs=1 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python module names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=yes + +# Minimum Python version to use for version dependent checks. Will default to +# the version used to run pylint. +py-version=3.11 + +# Discover python modules and packages in the file system subtree. +recursive=no + +# Add paths to the list of the source roots. Supports globbing patterns. The +# source root is an absolute path or a path relative to the current working +# directory used to determine a package namespace for modules located under the +# source root. +source-roots= + +# When enabled, pylint would attempt to guess common misconfiguration and emit +# user-friendly hints instead of false-positive error messages. +suggestion-mode=yes + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# In verbose mode, extra non-checker-related info will be displayed. +#verbose= + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names. Overrides argument- +# naming-style. If left empty, argument names will be checked with the set +# naming style. +#argument-rgx= + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. If left empty, attribute names will be checked with the set naming +# style. +#attr-rgx= + +# Bad variable names which should always be refused, separated by a comma. +bad-names=foo, + bar, + baz, + toto, + tutu, + tata + +# Bad variable names regexes, separated by a comma. If names match any regex, +# they will always be refused +bad-names-rgxs= + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. If left empty, class attribute names will be checked +# with the set naming style. +#class-attribute-rgx= + +# Naming style matching correct class constant names. +class-const-naming-style=UPPER_CASE + +# Regular expression matching correct class constant names. Overrides class- +# const-naming-style. If left empty, class constant names will be checked with +# the set naming style. +#class-const-rgx= + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names. Overrides class-naming- +# style. If left empty, class names will be checked with the set naming style. +#class-rgx= + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names. Overrides const-naming- +# style. If left empty, constant names will be checked with the set naming +# style. +#const-rgx= + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names. Overrides function- +# naming-style. If left empty, function names will be checked with the set +# naming style. +#function-rgx= + +# Good variable names which should always be accepted, separated by a comma. +good-names=i, + j, + k, + ex, + Run, + _ + +# Good variable names regexes, separated by a comma. If names match any regex, +# they will always be accepted +good-names-rgxs= + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. If left empty, inline iteration names will be checked +# with the set naming style. +#inlinevar-rgx= + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names. Overrides method-naming- +# style. If left empty, method names will be checked with the set naming style. +#method-rgx= + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names. Overrides module-naming- +# style. If left empty, module names will be checked with the set naming style. +#module-rgx= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Regular expression matching correct type alias names. If left empty, type +# alias names will be checked with the set naming style. +#typealias-rgx= + +# Regular expression matching correct type variable names. If left empty, type +# variable names will be checked with the set naming style. +#typevar-rgx= + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names. Overrides variable- +# naming-style. If left empty, variable names will be checked with the set +# naming style. +#variable-rgx= + + +[CLASSES] + +# Warn about protected attribute access inside special methods +check-protected-access-in-special-methods=no + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp, + asyncSetUp, + __post_init__ + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + + +[DESIGN] + +# List of regular expressions of class ancestor names to ignore when counting +# public methods (see R0903) +exclude-too-few-public-methods= + +# List of qualified class names to ignore when counting class parents (see +# R0901) +ignored-parents= + +# Maximum number of arguments for function / method. +max-args=5 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=12 + +# Maximum number of locals for function / method body. +max-locals=15 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when caught. +overgeneral-exceptions=builtins.BaseException,builtins.Exception + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. +max-line-length=100 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow explicit reexports by alias from a package __init__. +allow-reexport-from-package=no + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules= + +# Output a graph (.gv or any supported image format) of external dependencies +# to the given file (report RP0402 must not be disabled). +ext-import-graph= + +# Output a graph (.gv or any supported image format) of all (i.e. internal and +# external) dependencies to the given file (report RP0402 must not be +# disabled). +import-graph= + +# Output a graph (.gv or any supported image format) of internal dependencies +# to the given file (report RP0402 must not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Couples of modules and preferred modules, separated by a comma. +preferred-modules= + + +[LOGGING] + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE, +# UNDEFINED. +confidence=HIGH, + CONTROL_FLOW, + INFERENCE, + INFERENCE_FAILURE, + UNDEFINED + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then re-enable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + use-symbolic-message-instead, + missing-module-docstring + + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable=c-extension-no-member + + +[METHOD_ARGS] + +# List of qualified names (i.e., library.method) which require a timeout +# parameter e.g. 'requests.api.get,requests.api.post' +timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME, + XXX, + TODO + +# Regular expression of note tags to take in consideration. +notes-rgx= + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit,argparse.parse_error + + +[REPORTS] + +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'fatal', 'error', 'warning', 'refactor', +# 'convention', and 'info' which contain the number of messages in each +# category, as well as 'statement' which is the total number of statements +# analyzed. This score is used by the global evaluation report (RP0004). +evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +msg-template= + +# Set the output format. Available formats are text, parseable, colorized, json +# and msvs (visual studio). You can also give a reporter class, e.g. +# mypackage.mymodule.MyReporterClass. +#output-format= + +# Tells whether to display a full report or only the messages. +reports=no + +# Activate the evaluation score. +score=yes + + +[SIMILARITIES] + +# Comments are removed from the similarity computation +ignore-comments=yes + +# Docstrings are removed from the similarity computation +ignore-docstrings=yes + +# Imports are removed from the similarity computation +ignore-imports=yes + +# Signatures are removed from the similarity computation +ignore-signatures=yes + +# Minimum lines number of a similarity. +min-similarity-lines=4 + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# Spelling dictionary name. No available dictionaries : You need to install +# both the python package and the system dependency for enchant to work.. +spelling-dict= + +# List of comma separated words that should be considered directives if they +# appear at the beginning of a comment and should not be checked. +spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains the private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +spelling-store-unknown-words=no + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=no + +# This flag controls whether the implicit-str-concat should generate a warning +# on implicit string concatenation in sequences defined over several lines. +check-str-concat-over-line-jumps=no + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of symbolic message names to ignore for Mixin members. +ignored-checks-for-mixins=no-member, + not-async-context-manager, + not-context-manager, + attribute-defined-outside-init + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The minimum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +# Regex pattern to define which classes are considered mixins. +mixin-class-rgx=.*[Mm]ixin + +# List of decorators that change the signature of a decorated function. +signature-mutators= + + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of names allowed to shadow builtins +allowed-redefined-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_, + _cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io diff --git a/bookstack_file_exporter/archiver/__init__.py b/bookstack_file_exporter/archiver/__init__.py index 8b13789..e69de29 100644 --- a/bookstack_file_exporter/archiver/__init__.py +++ b/bookstack_file_exporter/archiver/__init__.py @@ -1 +0,0 @@ - diff --git a/bookstack_file_exporter/archiver/archiver.py b/bookstack_file_exporter/archiver/archiver.py index 9253325..f4e7ca7 100644 --- a/bookstack_file_exporter/archiver/archiver.py +++ b/bookstack_file_exporter/archiver/archiver.py @@ -34,7 +34,7 @@ class Archiver: Args: :root_dir: str (required) = the base directory for - which the files will be placed . + which the archive .tgz will be placed. :add_meta: bool (required) = whether or not to add metadata json files for each page, book, chapter, and/or shelve. :base_page_url: str (required) = the full url and path to get page content. @@ -71,12 +71,13 @@ def archive(self, page_nodes: Dict[int, Node], export_formats: List[str]): # convert to bytes to be agnostic to end destination (future use case?) def _gather(self, page_node: Node, export_format: str): - raw_data = self._get_data_format(page_node.id, export_format) + raw_data = self._get_data_format(page_node.id_, export_format) self._gather_local(page_node.file_path, raw_data, export_format, page_node.meta) def _gather_local(self, page_path: str, data: bytes, export_format: str, meta_data: Union[bytes, None]): - page_file_name = f"{self._archive_base_path}/{page_path}{_FILE_EXTENSION_MAP[export_format]}" + page_file_name = f"{self._archive_base_path}/" \ + f"{page_path}{_FILE_EXTENSION_MAP[export_format]}" util.write_bytes(self._tar_file, file_path=page_file_name, data=data) if self.add_meta: meta_file_name = f"{self._archive_base_path}/{page_path}{_FILE_EXTENSION_MAP['meta']}" diff --git a/bookstack_file_exporter/archiver/minio_archiver.py b/bookstack_file_exporter/archiver/minio_archiver.py index b9a1cf8..3bd3f30 100644 --- a/bookstack_file_exporter/archiver/minio_archiver.py +++ b/bookstack_file_exporter/archiver/minio_archiver.py @@ -1,16 +1,24 @@ from typing import Union - -from bookstack_file_exporter.config_helper.remote import StorageProviderConfig -from bookstack_file_exporter.archiver import util +import logging from minio import Minio -import logging +from bookstack_file_exporter.config_helper.remote import StorageProviderConfig log = logging.getLogger(__name__) - class MinioArchiver: + """ + Class to handle minio object upload and validations. + + Args: + config = minio configuration + bucket = upload bucket + path (optional) = specify bucket path for upload + + Returns: + MinioArchiver instance for archival use + """ def __init__(self, config: StorageProviderConfig): self._client = Minio( config.host, @@ -25,16 +33,16 @@ def __init__(self, config: StorageProviderConfig): def _validate_bucket(self): if not self._client.bucket_exists(self.bucket): raise ValueError(f"Given bucket does not exist: {self.bucket}") - + def _generate_path(self, path_name: Union[str, None]) -> str: if path_name: if path_name[-1] == '/': return path_name[:-1] - else: - return path_name + return path_name return "" - + def upload_backup(self, local_file_path: str): + """upload archive file to minio bucket""" # this will be the name of the object to upload # only get the file name not path # we are going to use path provided by user for object storage @@ -44,4 +52,5 @@ def upload_backup(self, local_file_path: str): else: object_path = file_name result = self._client.fput_object(self.bucket, object_path, local_file_path) - log.info(f"Created object: {result.object_name} with tag: {result.etag} and version-id: {result.version_id}") \ No newline at end of file + log.info("""Created object: %s with tag: %s and version-id: %s""", + result.object_name, result.etag, result.version_id) diff --git a/bookstack_file_exporter/archiver/util.py b/bookstack_file_exporter/archiver/util.py index c2db168..286a0c8 100644 --- a/bookstack_file_exporter/archiver/util.py +++ b/bookstack_file_exporter/archiver/util.py @@ -1,5 +1,4 @@ from typing import Dict, Union -from pathlib import Path import json import os import logging @@ -18,7 +17,7 @@ def get_byte_response(url: str, headers: Dict[str, str]) -> bytes: return response.content def write_bytes(base_tar_dir: str, file_path: str, data: bytes): - """write byte data to tar file""" + """append byte data to tar file""" with tarfile.open(base_tar_dir, "a") as tar: data_obj = BytesIO(data) tar_info = tarfile.TarInfo(name=file_path) @@ -32,11 +31,13 @@ def get_json_bytes(data: Dict[str, Union[str, int]]) -> bytes: # set as function in case we want to do checks or final actions later def remove_file(file_path: str): + """remove a file""" os.remove(file_path) def create_gzip(tar_file: str, gzip_file: str, remove_old: bool = True): + """create a gzip of an existing tar file and remove it""" with open(tar_file, 'rb') as f_in: with gzip.open(gzip_file, 'wb') as f_out: shutil.copyfileobj(f_in, f_out) if remove_old: - remove_file(tar_file) \ No newline at end of file + remove_file(tar_file) diff --git a/bookstack_file_exporter/common/__init__.py b/bookstack_file_exporter/common/__init__.py index 8b13789..e69de29 100644 --- a/bookstack_file_exporter/common/__init__.py +++ b/bookstack_file_exporter/common/__init__.py @@ -1 +0,0 @@ - diff --git a/bookstack_file_exporter/common/util.py b/bookstack_file_exporter/common/util.py index 99c0ead..cd67e5b 100644 --- a/bookstack_file_exporter/common/util.py +++ b/bookstack_file_exporter/common/util.py @@ -1,26 +1,32 @@ +import logging from typing import Tuple, Dict import requests from requests.adapters import HTTPAdapter, Retry -import logging log = logging.getLogger(__name__) def http_get_request(url: str, headers: Dict[str, str], timeout: int = 30) -> requests.Response: + """make http requests and return response object""" verify, url_prefix = should_verify(url) try: with requests.Session() as session: + # {backoff factor} * (2 ** ({number of previous retries})) + # {raise_on_status} if status falls in status_forcelist range + # and retries have been exhausted. + # {status_force_list} 429 is supposed to be included retries = Retry(total=3, - backoff_factor=0.5, # {backoff factor} * (2 ** ({number of previous retries})) - raise_on_status=True, # if status falls in status_forcelist range and retries have been exhausted. - status_forcelist=[ 500, 502, 503, 504 ]) # 429 is supposed to be included + backoff_factor=0.5, + raise_on_status=True, + status_forcelist=[ 500, 502, 503, 504 ]) session.mount(url_prefix, HTTPAdapter(max_retries=retries)) response = session.get(url, headers=headers, verify=verify, timeout=timeout) except Exception as req_err: - log.error(f"Failed to make request for {url}") + log.error("Failed to make request for %s", url) raise req_err return response def should_verify(url: str) -> Tuple[bool, str]: + """check if http or https""" if url.startswith("https://"): return (True, "https://") - return (False, "http://") \ No newline at end of file + return (False, "http://") diff --git a/bookstack_file_exporter/config_helper/__init__.py b/bookstack_file_exporter/config_helper/__init__.py index 8b13789..e69de29 100644 --- a/bookstack_file_exporter/config_helper/__init__.py +++ b/bookstack_file_exporter/config_helper/__init__.py @@ -1 +0,0 @@ - diff --git a/bookstack_file_exporter/config_helper/config_helper.py b/bookstack_file_exporter/config_helper/config_helper.py index b8c6968..697dc2a 100644 --- a/bookstack_file_exporter/config_helper/config_helper.py +++ b/bookstack_file_exporter/config_helper/config_helper.py @@ -1,9 +1,9 @@ import os -import json import argparse -import yaml -import logging from typing import Dict, Tuple +import logging + +import yaml from bookstack_file_exporter.config_helper import models from bookstack_file_exporter.config_helper.remote import StorageProviderConfig @@ -40,7 +40,8 @@ class ConfigNode: Arg parse from user input Returns: - ConfigNode object with attributes that are accessible for use for further downstream processes + ConfigNode object with attributes that are + accessible for use for further downstream processes Raises: YAMLError: if provided configuration file is not valid YAML @@ -61,7 +62,7 @@ def __init__(self, args: argparse.Namespace): def _generate_config(self, config_file: str) -> models.UserInput: if not os.path.isfile(config_file): raise FileNotFoundError(config_file) - with open(config_file, "r") as yaml_stream: + with open(config_file, "r", encoding="utf-8") as yaml_stream: try: yaml_input = yaml.safe_load(yaml_stream) except Exception as load_err: @@ -75,7 +76,7 @@ def _generate_config(self, config_file: str) -> models.UserInput: log.error("Yaml configuration failed schema validation") raise err return user_inputs - + def _generate_credentials(self) -> Tuple[str, str]: # if user provided credentials in config file, load them token_id = "" @@ -83,7 +84,7 @@ def _generate_credentials(self) -> Tuple[str, str]: if self.user_inputs.credentials: token_id = self.user_inputs.credentials.token_id token_secret = self.user_inputs.credentials.token_secret - + # check to see if env var is specified, if so, it takes precedence token_id = self._check_var(_BOOKSTACK_TOKEN_FIELD, token_id) token_secret = self._check_var(_BOOKSTACK_TOKEN_SECRET_FIELD, token_secret) @@ -93,10 +94,12 @@ def _generate_remote_config(self) -> Dict[str, StorageProviderConfig]: object_config = {} # check for optional minio credentials if configuration is set in yaml configuration file if self.user_inputs.minio_config: - minio_access_key = self._check_var(_MINIO_ACCESS_KEY_FIELD, self.user_inputs.minio_config.access_key) - minio_secret_key = self._check_var(_MINIO_SECRET_KEY_FIELD, self.user_inputs.minio_config.secret_key) + minio_access_key = self._check_var(_MINIO_ACCESS_KEY_FIELD, + self.user_inputs.minio_config.access_key) + minio_secret_key = self._check_var(_MINIO_SECRET_KEY_FIELD, + self.user_inputs.minio_config.secret_key) object_config["minio"] = StorageProviderConfig(minio_access_key, - minio_secret_key, self.user_inputs.minio_config.bucket, + minio_secret_key, self.user_inputs.minio_config.bucket, host=self.user_inputs.minio_config.host, path=self.user_inputs.minio_config.path, region=self.user_inputs.minio_config.region) @@ -114,7 +117,7 @@ def _generate_headers(self) -> Dict[str, str]: # do not override if user added one already with same key if key not in headers: headers[key] = value - + # do not override user provided one if 'Authorization' not in headers: headers['Authorization'] = f"Token {self._token_id}:{self._token_secret}" @@ -153,23 +156,27 @@ def _set_base_dir(self, cmd_output_dir: str) -> str: else: base_dir = _BASE_DIR_NAME return base_dir - + @property def headers(self) -> Dict[str, str]: + """get generated headers""" return self._headers @property def urls(self) -> Dict[str, str]: + """get generated urls""" return self._urls - + @property def base_dir_name(self) -> str: + """get base dir of output target""" return self._base_dir_name @property def object_storage_config(self) -> Dict[str, StorageProviderConfig]: + """return remote storage configuration""" return self._object_storage_config - + @staticmethod def _check_var(env_key: str, default_val: str) -> str: """ @@ -182,10 +189,12 @@ def _check_var(env_key: str, default_val: str) -> str: env_value = os.environ.get(env_key, "") # env value takes precedence if env_value: - log.debug(f"env key: {env_key} specified. Will override configuration file value if set.") + log.debug("""env key: %s specified. + Will override configuration file value if set.""", env_key) return env_value # check for optional inputs, if env and input is missing if not env_value and not default_val: - raise ValueError(f"{env_key} is not specified in env and is missing from configuration - at least one should be set") + raise ValueError(f"""{env_key} is not specified in env and is + missing from configuration - at least one should be set""") # fall back to configuration file value if present - return default_val \ No newline at end of file + return default_val diff --git a/bookstack_file_exporter/config_helper/models.py b/bookstack_file_exporter/config_helper/models.py index 9b41fd6..1bd9c40 100644 --- a/bookstack_file_exporter/config_helper/models.py +++ b/bookstack_file_exporter/config_helper/models.py @@ -1,7 +1,10 @@ from typing import Dict, Literal, List, Optional from pydantic import BaseModel +# pylint: disable=R0903 + class MinioConfig(BaseModel): + """YAML schema for minio configuration""" host: str access_key: Optional[str] = None secret_key: Optional[str] = None @@ -10,10 +13,12 @@ class MinioConfig(BaseModel): region: str class BookstackAccess(BaseModel): + """YAML schema for bookstack access credentials""" token_id: str token_secret: str class UserInput(BaseModel): + """YAML schema for user provided configuration file""" host: str additional_headers: Optional[Dict[str, str]] = None credentials: Optional[BookstackAccess] = None @@ -21,4 +26,4 @@ class UserInput(BaseModel): output_path: Optional[str] = None export_meta: Optional[bool] = None minio_config: Optional[MinioConfig] = None - clean_up: Optional[bool] = None \ No newline at end of file + clean_up: Optional[bool] = None diff --git a/bookstack_file_exporter/config_helper/remote.py b/bookstack_file_exporter/config_helper/remote.py index 9d79aff..21fc2bf 100644 --- a/bookstack_file_exporter/config_helper/remote.py +++ b/bookstack_file_exporter/config_helper/remote.py @@ -3,7 +3,24 @@ ## convenience class ## able to work for minio, s3, etc. class StorageProviderConfig: - def __init__(self, access_key: str, secret_key: str, bucket: str, host: Union[str, None], path: Union[str, None], region: Union[str, None]): + """ + Convenience class to get dot notation for remote object storage + configuration access. + + Args: + access_key = required token id + secret_key = required secret token + bucket = bucket to upload + host (optionalgit) = if provider requires a host/url + path (optional) = specify bucket path for upload + region (optional) = if provider requires region + + Returns: + StorageProviderConfig instance for dot notation access + """ + def __init__(self, access_key: str, secret_key: str, bucket: str, + host: Union[str, None]=None, path: Union[str, None]=None, + region: Union[str, None]=None): self.host = host self.access_key = access_key self.secret_key = secret_key diff --git a/bookstack_file_exporter/exporter/exporter.py b/bookstack_file_exporter/exporter/exporter.py index 4777276..f340a1a 100644 --- a/bookstack_file_exporter/exporter/exporter.py +++ b/bookstack_file_exporter/exporter/exporter.py @@ -123,15 +123,14 @@ def get_all_books(self, shelve_nodes: Dict[int, Node], unassigned_dir: str) -> D # convenience function def get_all_pages(self, book_nodes: Dict[int, Node]) -> Dict[int, Node]: """get all pages and their content""" - ## chapters (if exists) - # chapter nodes are treated a little differently - # chapters are children under books - chapter_nodes: Dict[int, Node] = self.get_chapter_nodes(book_nodes) - ## pages page_nodes = {} if book_nodes: page_nodes: Dict[int, Node] = self.get_child_nodes("pages", book_nodes) + ## chapters (if exists) + # chapter nodes are treated a little differently + # chapters are children under books + chapter_nodes: Dict[int, Node] = self.get_chapter_nodes(book_nodes) # add chapter node pages # replace existing page node if found with proper chapter parent if chapter_nodes: diff --git a/bookstack_file_exporter/exporter/node.py b/bookstack_file_exporter/exporter/node.py index 413e644..7b8cf41 100644 --- a/bookstack_file_exporter/exporter/node.py +++ b/bookstack_file_exporter/exporter/node.py @@ -34,7 +34,7 @@ def __init__(self, meta: Dict[str, Union[str, int]], self._path_prefix = path_prefix # for convenience/usage for exporter self.name: str = self.meta['slug'] - self.id: int = self.meta['id'] + self.id_: int = self.meta['id'] self._display_name = self.meta['name'] # children self._children = self._get_children() diff --git a/bookstack_file_exporter/run.py b/bookstack_file_exporter/run.py index 178040f..4b4fc8e 100644 --- a/bookstack_file_exporter/run.py +++ b/bookstack_file_exporter/run.py @@ -49,3 +49,5 @@ def exporter(args: argparse.Namespace): # if remote target is specified and clean is true # clean up the .tgz archive since it is already uploaded archive.clean_up(config.user_inputs.clean_up) + + log.info("Completed run") From ac37158583b1e2a07b9440670286089320805bba Mon Sep 17 00:00:00 2001 From: pchang388 Date: Sun, 1 Oct 2023 20:12:44 -0400 Subject: [PATCH 05/20] wip set pylint to fail if under 9/10 --- .pylintrc | 2 +- bookstack_file_exporter/config_helper/remote.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.pylintrc b/.pylintrc index f0c2702..3c66f40 100644 --- a/.pylintrc +++ b/.pylintrc @@ -39,7 +39,7 @@ extension-pkg-whitelist= fail-on= # Specify a score threshold under which the program will exit with error. -fail-under=10 +fail-under=9 # Interpret the stdin as a python script, whose filename needs to be passed as # the module_or_package argument. diff --git a/bookstack_file_exporter/config_helper/remote.py b/bookstack_file_exporter/config_helper/remote.py index 21fc2bf..713c7fd 100644 --- a/bookstack_file_exporter/config_helper/remote.py +++ b/bookstack_file_exporter/config_helper/remote.py @@ -11,7 +11,7 @@ class StorageProviderConfig: access_key = required token id secret_key = required secret token bucket = bucket to upload - host (optionalgit) = if provider requires a host/url + host (optional) = if provider requires a host/url path (optional) = specify bucket path for upload region (optional) = if provider requires region From 8da4f5cd9c4857709b435123ed61f8fd56544d21 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Sun, 1 Oct 2023 20:29:59 -0400 Subject: [PATCH 06/20] add more workflows --- .github/workflows/{ => always}/pylint.yml | 0 .github/workflows/merge/docker-build.yml | 28 +++++++++++++++++++++++ .pylintrc | 1 + 3 files changed, 29 insertions(+) rename .github/workflows/{ => always}/pylint.yml (100%) create mode 100644 .github/workflows/merge/docker-build.yml diff --git a/.github/workflows/pylint.yml b/.github/workflows/always/pylint.yml similarity index 100% rename from .github/workflows/pylint.yml rename to .github/workflows/always/pylint.yml diff --git a/.github/workflows/merge/docker-build.yml b/.github/workflows/merge/docker-build.yml new file mode 100644 index 0000000..5d2836d --- /dev/null +++ b/.github/workflows/merge/docker-build.yml @@ -0,0 +1,28 @@ +name: Test Builds + +on: + push: + pull_request: + branches: [ "main" ] + +jobs: + docker_build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Build the Docker image + run: docker build . --file Dockerfile --tag ${{ vars.DOCKERHUB_REPO }}:${{ github.sha }} + pip_build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.11"] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Test Install of Pip Module + run: | + pip install . \ No newline at end of file diff --git a/.pylintrc b/.pylintrc index 3c66f40..1e52a9b 100644 --- a/.pylintrc +++ b/.pylintrc @@ -39,6 +39,7 @@ extension-pkg-whitelist= fail-on= # Specify a score threshold under which the program will exit with error. +# set to fail if under 9/10 on score fail-under=9 # Interpret the stdin as a python script, whose filename needs to be passed as From 497a659a4af0716fcd0d0f4069f6fc4584013e2d Mon Sep 17 00:00:00 2001 From: pchang388 Date: Sun, 1 Oct 2023 20:32:30 -0400 Subject: [PATCH 07/20] add more workflows --- .github/workflows/{always/pylint.yml => always.pylint.yml} | 0 .../{merge/docker-build.yml => on_pr_open.docker-build.yml} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{always/pylint.yml => always.pylint.yml} (100%) rename .github/workflows/{merge/docker-build.yml => on_pr_open.docker-build.yml} (100%) diff --git a/.github/workflows/always/pylint.yml b/.github/workflows/always.pylint.yml similarity index 100% rename from .github/workflows/always/pylint.yml rename to .github/workflows/always.pylint.yml diff --git a/.github/workflows/merge/docker-build.yml b/.github/workflows/on_pr_open.docker-build.yml similarity index 100% rename from .github/workflows/merge/docker-build.yml rename to .github/workflows/on_pr_open.docker-build.yml From 13e368a316195c04d154da248a2af400698e7f45 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Sun, 1 Oct 2023 20:35:34 -0400 Subject: [PATCH 08/20] add more workflows --- .github/workflows/on_pr_open.docker-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/on_pr_open.docker-build.yml b/.github/workflows/on_pr_open.docker-build.yml index 5d2836d..1e8d9c8 100644 --- a/.github/workflows/on_pr_open.docker-build.yml +++ b/.github/workflows/on_pr_open.docker-build.yml @@ -11,7 +11,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Build the Docker image - run: docker build . --file Dockerfile --tag ${{ vars.DOCKERHUB_REPO }}:${{ github.sha }} + run: docker build . --file Dockerfile --tag $DOCKERHUB_REPO:${{ github.sha }} pip_build: runs-on: ubuntu-latest strategy: From d670cddec5890102fda8c1b5c36fd17ed771f873 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Sun, 1 Oct 2023 20:38:19 -0400 Subject: [PATCH 09/20] add more workflows --- .github/workflows/on_pr_open.docker-build.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/on_pr_open.docker-build.yml b/.github/workflows/on_pr_open.docker-build.yml index 1e8d9c8..dff2459 100644 --- a/.github/workflows/on_pr_open.docker-build.yml +++ b/.github/workflows/on_pr_open.docker-build.yml @@ -8,10 +8,12 @@ on: jobs: docker_build: runs-on: ubuntu-latest + env: + DOCKER_REPO: ${{ vars.DOCKERHUB_REPO }} steps: - uses: actions/checkout@v3 - name: Build the Docker image - run: docker build . --file Dockerfile --tag $DOCKERHUB_REPO:${{ github.sha }} + run: docker build . --file Dockerfile --tag $DOCKER_REPO:${{ github.sha }} pip_build: runs-on: ubuntu-latest strategy: From b3e609751b941fc56cdd579d1a7116d70c653469 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Sun, 1 Oct 2023 20:45:16 -0400 Subject: [PATCH 10/20] fix env var in workflow --- .github/workflows/on_pr_open.docker-build.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/on_pr_open.docker-build.yml b/.github/workflows/on_pr_open.docker-build.yml index dff2459..28c35f0 100644 --- a/.github/workflows/on_pr_open.docker-build.yml +++ b/.github/workflows/on_pr_open.docker-build.yml @@ -8,12 +8,10 @@ on: jobs: docker_build: runs-on: ubuntu-latest - env: - DOCKER_REPO: ${{ vars.DOCKERHUB_REPO }} steps: - uses: actions/checkout@v3 - name: Build the Docker image - run: docker build . --file Dockerfile --tag $DOCKER_REPO:${{ github.sha }} + run: docker build . --file Dockerfile --tag ${{ env.DOCKERHUB_REPO }}:${{ GITHUB_RUN_ID }} pip_build: runs-on: ubuntu-latest strategy: From 5adc1d4c24f9e52d33081590d10700ce66a827ef Mon Sep 17 00:00:00 2001 From: pchang388 Date: Sun, 1 Oct 2023 20:46:13 -0400 Subject: [PATCH 11/20] fix env var in workflow --- .github/workflows/on_pr_open.docker-build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/on_pr_open.docker-build.yml b/.github/workflows/on_pr_open.docker-build.yml index 28c35f0..d275f0e 100644 --- a/.github/workflows/on_pr_open.docker-build.yml +++ b/.github/workflows/on_pr_open.docker-build.yml @@ -11,7 +11,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Build the Docker image - run: docker build . --file Dockerfile --tag ${{ env.DOCKERHUB_REPO }}:${{ GITHUB_RUN_ID }} + run: docker build . --file Dockerfile --tag ${{ env.DOCKERHUB_REPO }}:${{ github.run_id }} pip_build: runs-on: ubuntu-latest strategy: From b8b9bdabb3b3a5226e5752f87cdee0075d817d2f Mon Sep 17 00:00:00 2001 From: pchang388 Date: Sun, 1 Oct 2023 20:49:40 -0400 Subject: [PATCH 12/20] add makefile for test --- .github/workflows/on_pr_open.docker-build.yml | 5 ++--- Makefile | 5 ++++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/on_pr_open.docker-build.yml b/.github/workflows/on_pr_open.docker-build.yml index d275f0e..1a4b6fd 100644 --- a/.github/workflows/on_pr_open.docker-build.yml +++ b/.github/workflows/on_pr_open.docker-build.yml @@ -11,7 +11,7 @@ jobs: steps: - uses: actions/checkout@v3 - name: Build the Docker image - run: docker build . --file Dockerfile --tag ${{ env.DOCKERHUB_REPO }}:${{ github.run_id }} + run: make docker_build pip_build: runs-on: ubuntu-latest strategy: @@ -24,5 +24,4 @@ jobs: with: python-version: ${{ matrix.python-version }} - name: Test Install of Pip Module - run: | - pip install . \ No newline at end of file + run: make pip_build \ No newline at end of file diff --git a/Makefile b/Makefile index d05cac5..86a4258 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,10 @@ DOCKER_WORK_DIR=/export DOCKER_CONFIG_DIR=/export/config DOCKER_EXPORT_DIR=/export/dump -test_local: +pip_build: + pip install . + +pip_local_dev: pip install -e . docker_build: From 1c51ba1a298c831b23a27a86933db6b482f97e06 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Sun, 1 Oct 2023 20:58:34 -0400 Subject: [PATCH 13/20] fix on pr workflow --- .github/workflows/on_pr_open.docker-build.yml | 3 ++- Makefile | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/on_pr_open.docker-build.yml b/.github/workflows/on_pr_open.docker-build.yml index 1a4b6fd..ad12e23 100644 --- a/.github/workflows/on_pr_open.docker-build.yml +++ b/.github/workflows/on_pr_open.docker-build.yml @@ -1,7 +1,6 @@ name: Test Builds on: - push: pull_request: branches: [ "main" ] @@ -10,6 +9,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 + - name: Prepare Makefile + run: sed -i 's/^IMAGE_TAG=[^ ]*/IMAGE_TAG=${{github.run_id}}/' Makefile - name: Build the Docker image run: make docker_build pip_build: diff --git a/Makefile b/Makefile index 86a4258..6c29435 100644 --- a/Makefile +++ b/Makefile @@ -2,6 +2,8 @@ BASE_IMAGE=python BASE_IMAGE_TAG=3.11-slim-bookworm IMAGE_NAME=homeylab/bookstack-file-exporter +# keep this start sequence unique (IMAGE_TAG=) +# github actions will replace this value for later use IMAGE_TAG=test DOCKER_WORK_DIR=/export DOCKER_CONFIG_DIR=/export/config From 0243820cee3b7b1c9091017dc5f234fd2500f092 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Mon, 2 Oct 2023 01:40:10 -0400 Subject: [PATCH 14/20] add tag release action --- .github/workflows/on_pr_open.docker-build.yml | 8 +- .github/workflows/on_tag.push.yml | 44 +++++++++ Makefile | 16 +++- README.md | 91 +++++++++++-------- setup.cfg | 3 +- 5 files changed, 117 insertions(+), 45 deletions(-) create mode 100644 .github/workflows/on_tag.push.yml diff --git a/.github/workflows/on_pr_open.docker-build.yml b/.github/workflows/on_pr_open.docker-build.yml index ad12e23..d5607fc 100644 --- a/.github/workflows/on_pr_open.docker-build.yml +++ b/.github/workflows/on_pr_open.docker-build.yml @@ -24,5 +24,9 @@ jobs: uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - - name: Test Install of Pip Module - run: make pip_build \ No newline at end of file + - name: Prepare setup.cfg + run: sed -i 's/^version = [^ ]*/version = ${{github.run_id}}/' setup.cfg + - name: Test Build of Package + run: make build + #- name: Upload to TestPypi + #- name: Test install from TestPypi \ No newline at end of file diff --git a/.github/workflows/on_tag.push.yml b/.github/workflows/on_tag.push.yml new file mode 100644 index 0000000..62d9d37 --- /dev/null +++ b/.github/workflows/on_tag.push.yml @@ -0,0 +1,44 @@ +# needs: [tests] # require tests to pass before deploy runs + +name: Build and Push + +on: + push: + # Pattern matched against refs/tags + tags: + - '**' # Push events to every tag including hierarchical tags like v1.0/beta + +jobs: + docker_deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Login to Dockerhub + run: docker login -u ${{ secrets.DOCKERHUB_USER }} -p ${{ secrets.DOCKERHUB_TOKEN }} + - name: Prepare Makefile + run: sed -i 's/^IMAGE_TAG=[^ ]*/IMAGE_TAG=${{github.ref_name}}/' Makefile + - name: Build the Docker image + run: make docker_build + - name: Push Docker image + run: make docker_push + pypi_deploy: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Update Release tag + run: sed -i 's/^version = [^ ]*/version = ${{github.ref_name}}/' setup.cfg + - name: Build package + run: make build + - name: Publish package + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} \ No newline at end of file diff --git a/Makefile b/Makefile index 6c29435..4afdfde 100644 --- a/Makefile +++ b/Makefile @@ -10,10 +10,22 @@ DOCKER_CONFIG_DIR=/export/config DOCKER_EXPORT_DIR=/export/dump pip_build: - pip install . + python -m pip install . pip_local_dev: - pip install -e . + python -m pip install -e . + +build: + python -m pip install --upgrade build + python -m build + +upload_testpypi: + python -m pip install --upgrade twine + python -m twine upload --repository testpypi dist/* + +# extra-url is for dependencies using real pypi +download_testpypi: + python -m pip install --index-url https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple bookstack-file-exporter docker_build: docker buildx build \ diff --git a/README.md b/README.md index 356752a..e7e8b39 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,10 @@ # bookstack-file-exporter -**WIP** not yet complete. +_This is project is still under active development. Functionality is there and is relatively stable at this time._ -_This is project is still under active development but has made significant progress._ +This tool provides a way to export Bookstack pages in a folder-tree layout locally with an option to push to remote object storage locations. -This tool provides a way to export Bookstack pages in a folder-tree layout into object storage and/or locally. - -This small project was mainly created to run as a cronjob in k8s but also run locally if needed. This would allow me to export my docs in markdown, or other formats like pdf. I use Bookstack's markdown editor as default instead of WYSIWYG editor and this makes my notes portable anywhere even if offline. +This small project was mainly created to run as a cron job in k8s but works anywhere. This would allow me to export my docs in markdown, or other formats like pdf. I use Bookstack's markdown editor as default instead of WYSIWYG editor and this makes my notes portable anywhere even if offline. The main use case is to backup all docs in a folder-tree format to cover the scenarios: @@ -36,9 +34,22 @@ python -m bookstack_file_exporter -c ## Using This Application ### Install via Pip +Note: This application is tested and developed on Python `3.11.X`. It will probably work for >= `3.8` but is recommended to install (or set up a venv) `3.11.X` versions. + ``` pip install bookstack-file-exporter + +# if you already have python bin directory in your path +bookstack-file-exporter -c + +# using pip +python -m bookstack_file_exporter -c ``` +Command line options: +| option | required | description | +| ------ | -------- | ----------- | +|`-c`, `--config-file`|True|Relative or Absolute path to a valid configuration file. This configuration file is checked against a schema for validation.| +|`-v`, `--log-level` |False, default: info|Provide a valid log level: info, debug, warning, error.| ### Authentication **Note visibility of pages is based on user**, so use a user that has access to pages you want to back up @@ -112,41 +123,6 @@ export_meta: true clean_up: true ``` -### Minio Backups -When specifying `minio_config` in the configuration file, these fields are required in the file: -``` -# a host/ip + port combination is also allowed -# example: "minio.yourdomain.com:8443" -host: "minio.yourdomain.com" - -# this is required since minio api appears to require it -# set to the region your bucket resides in -# if unsure, try "us-east-1" first -region: "us-east-1" - -# bucket to upload to -bucket "mybucket" -``` - -These fields are optional: -``` -# access key for the minio instance -# optionally set as env variable instead -access_key: "" - -# secret key for the minio instance -# optionally set as env variable instead -secret_key: "" - -# the path of the backup -# in example below, the exported archive will appear in: `:/bookstack/backups/bookstack-.tgz` -path: "bookstack/backups" -``` - -As mentioned you can optionally set access and secret key as env variables. If both are specified, env variable will take precedence. -- `MINIO_ACCESS_KEY` -- `MINIO_SECRET_KEY` - ### Backup Behavior We will use slug names (from Bookstack API) by default, as such certain characters like `!`, `/` will be ignored and spaces replaced. @@ -189,6 +165,41 @@ Empty/New Pages will be ignored since they have not been modified yet from creat You may notice some directories (books) and/or files (pages) in the archive have a random string at the end, example - `nKA`: `user-and-group-management-nKA`. This is expected and is because there were resources with the same name created in another shelve and bookstack adds a string at the end to ensure uniqueness. +### Minio Backups +When specifying `minio_config` in the configuration file, these fields are required in the file: +``` +# a host/ip + port combination is also allowed +# example: "minio.yourdomain.com:8443" +host: "minio.yourdomain.com" + +# this is required since minio api appears to require it +# set to the region your bucket resides in +# if unsure, try "us-east-1" first +region: "us-east-1" + +# bucket to upload to +bucket "mybucket" +``` + +These fields are optional: +``` +# access key for the minio instance +# optionally set as env variable instead +access_key: "" + +# secret key for the minio instance +# optionally set as env variable instead +secret_key: "" + +# the path of the backup +# in example below, the exported archive will appear in: `:/bookstack/backups/bookstack-.tgz` +path: "bookstack/backups" +``` + +As mentioned you can optionally set access and secret key as env variables. If both are specified, env variable will take precedence. +- `MINIO_ACCESS_KEY` +- `MINIO_SECRET_KEY` + ## Future Items 1. Be able to pull media/photos locally and place in their respective page folders for a more complete file level backup. 2. Include the exporter in a maintained helm chart as an optional deployment. The helm chart is [here](https://github.com/homeylab/helm-charts/tree/main/charts/bookstack). diff --git a/setup.cfg b/setup.cfg index e28b411..98eb518 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,10 +1,11 @@ [metadata] name = bookstack-file-exporter +# version will be replaced by github actions by release tag version = 0.0.1 author = pchang388 # author_email = your@email.address url = https://github.com/homeylab/bookstack-file-exporter -description = An exporter written in python to export all documents from a bookstack instance with your preferred medium +description = An exporter written in python to export all documents from a bookstack instance in different formats long_description = file: README.md long_description_content_type = text/markdown keywords = bookstack, exporter From 51f77c08039ce1fb4e307382450ccccf768c24a2 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Mon, 2 Oct 2023 01:48:44 -0400 Subject: [PATCH 15/20] remove prefix char in release tags --- .github/workflows/on_tag.push.yml | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/.github/workflows/on_tag.push.yml b/.github/workflows/on_tag.push.yml index 62d9d37..9f7d863 100644 --- a/.github/workflows/on_tag.push.yml +++ b/.github/workflows/on_tag.push.yml @@ -13,10 +13,16 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 + # Store the version, stripping any v-prefix + - name: Write release version + run: | + TAG=${{ github.event.release.tag_name }} + echo "VERSION=${TAG#v}" >> $GITHUB_ENV + echo Version: $VERSION - name: Login to Dockerhub run: docker login -u ${{ secrets.DOCKERHUB_USER }} -p ${{ secrets.DOCKERHUB_TOKEN }} - name: Prepare Makefile - run: sed -i 's/^IMAGE_TAG=[^ ]*/IMAGE_TAG=${{github.ref_name}}/' Makefile + run: sed -i 's/^IMAGE_TAG=[^ ]*/IMAGE_TAG=${VERSION}/' Makefile - name: Build the Docker image run: make docker_build - name: Push Docker image @@ -33,8 +39,13 @@ jobs: run: | python -m pip install --upgrade pip pip install build + - name: Write release version + run: | + TAG=${{ github.event.release.tag_name }} + echo "VERSION=${TAG#v}" >> $GITHUB_ENV + echo Version: $VERSION - name: Update Release tag - run: sed -i 's/^version = [^ ]*/version = ${{github.ref_name}}/' setup.cfg + run: sed -i 's/^version = [^ ]*/version = ${VERSION}}/' setup.cfg - name: Build package run: make build - name: Publish package From 7dcf84b0ca12fa2da495592065704522359640a6 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Mon, 2 Oct 2023 02:08:28 -0400 Subject: [PATCH 16/20] fix workflow --- .github/workflows/on_tag.push.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/on_tag.push.yml b/.github/workflows/on_tag.push.yml index 9f7d863..2424be2 100644 --- a/.github/workflows/on_tag.push.yml +++ b/.github/workflows/on_tag.push.yml @@ -16,7 +16,7 @@ jobs: # Store the version, stripping any v-prefix - name: Write release version run: | - TAG=${{ github.event.release.tag_name }} + TAG=${{ github.ref_name }} echo "VERSION=${TAG#v}" >> $GITHUB_ENV echo Version: $VERSION - name: Login to Dockerhub @@ -41,7 +41,7 @@ jobs: pip install build - name: Write release version run: | - TAG=${{ github.event.release.tag_name }} + TAG=${{ github.ref_name }} echo "VERSION=${TAG#v}" >> $GITHUB_ENV echo Version: $VERSION - name: Update Release tag From 0d39bd71211730393e05cc9f8f853bf5bbc216a3 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Mon, 2 Oct 2023 03:52:27 -0400 Subject: [PATCH 17/20] add release merge workflow --- .github/workflows/on_pr_merged.yml | 99 ++++++++++++++++++++++++++++++ .github/workflows/on_tag.push.yml | 55 ----------------- Makefile | 4 +- setup.cfg | 2 +- 4 files changed, 102 insertions(+), 58 deletions(-) create mode 100644 .github/workflows/on_pr_merged.yml delete mode 100644 .github/workflows/on_tag.push.yml diff --git a/.github/workflows/on_pr_merged.yml b/.github/workflows/on_pr_merged.yml new file mode 100644 index 0000000..6e6d6cd --- /dev/null +++ b/.github/workflows/on_pr_merged.yml @@ -0,0 +1,99 @@ +# needs: [tests] # require tests to pass before deploy runs + +name: Build and Push + +# on: +# push: +# # Pattern matched against refs/tags +# tags: +# - '**' # Push events to every tag including hierarchical tags like v1.0/beta + +on: + pull_request: + types: + - closed + branches: + - master + +jobs: + docker_deploy: + if: github.event.pull_request.merged + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Login to Dockerhub + run: docker login -u ${{ secrets.DOCKERHUB_USER }} -p ${{ secrets.DOCKERHUB_TOKEN }} + - name: Build the Docker image + run: make docker_build + - name: Push Docker image + run: make docker_push + pypi_deploy: + if: github.event.pull_request.merged + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.x' + - name: Set tag version + run: | + TAG=$(cat Makefile | grep -E ^IMAGE_TAG=[0-9].[0-9].[0-9]) + echo "VERSION=${TAG}" >> $GITHUB_ENV + echo "version from Makefile is: ${VERSION}" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Update Release tag + run: sed -i 's/^version = [^ ]*/version = ${VERSION}/' setup.cfg + - name: Build package + run: make build + - name: Publish package + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} + create_tag: + if: github.event.pull_request.merged + runs-on: ubuntu-latest + needs: + - docker_deploy + - pypi_deploy + permissions: + contents: write + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.merge_commit_sha }} + fetch-depth: '0' + - name: Set tag version + run: | + TAG=$(cat Makefile | grep -E ^IMAGE_TAG=[0-9].[0-9].[0-9]) + echo "VERSION=${TAG}" >> $GITHUB_ENV + echo "version from Makefile is: ${VERSION}" + - name: Create tag + uses: anothrNick/github-tag-action@1.64.0 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + WITH_V: true + PRERELEASE: true + CUSTOM_TAG: ${VERSION} + create_release: + if: github.event.pull_request.merged + runs-on: ubuntu-latest + needs: + - create_tag + permissions: + contents: write + steps: + - uses: actions/checkout@v3 + - name: Set tag version + run: | + TAG=$(cat Makefile | grep -E ^IMAGE_TAG=[0-9].[0-9].[0-9]) + echo "VERSION=${TAG}" >> $GITHUB_ENV + echo "version from Makefile is: ${VERSION}" + - uses: ncipollo/release-action@v1 + with: + tag: ${VERSION} + # docker image tag latest diff --git a/.github/workflows/on_tag.push.yml b/.github/workflows/on_tag.push.yml deleted file mode 100644 index 2424be2..0000000 --- a/.github/workflows/on_tag.push.yml +++ /dev/null @@ -1,55 +0,0 @@ -# needs: [tests] # require tests to pass before deploy runs - -name: Build and Push - -on: - push: - # Pattern matched against refs/tags - tags: - - '**' # Push events to every tag including hierarchical tags like v1.0/beta - -jobs: - docker_deploy: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - # Store the version, stripping any v-prefix - - name: Write release version - run: | - TAG=${{ github.ref_name }} - echo "VERSION=${TAG#v}" >> $GITHUB_ENV - echo Version: $VERSION - - name: Login to Dockerhub - run: docker login -u ${{ secrets.DOCKERHUB_USER }} -p ${{ secrets.DOCKERHUB_TOKEN }} - - name: Prepare Makefile - run: sed -i 's/^IMAGE_TAG=[^ ]*/IMAGE_TAG=${VERSION}/' Makefile - - name: Build the Docker image - run: make docker_build - - name: Push Docker image - run: make docker_push - pypi_deploy: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: Set up Python - uses: actions/setup-python@v3 - with: - python-version: '3.x' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install build - - name: Write release version - run: | - TAG=${{ github.ref_name }} - echo "VERSION=${TAG#v}" >> $GITHUB_ENV - echo Version: $VERSION - - name: Update Release tag - run: sed -i 's/^version = [^ ]*/version = ${VERSION}}/' setup.cfg - - name: Build package - run: make build - - name: Publish package - uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 - with: - user: __token__ - password: ${{ secrets.PYPI_API_TOKEN }} \ No newline at end of file diff --git a/Makefile b/Makefile index 4afdfde..bbf2917 100644 --- a/Makefile +++ b/Makefile @@ -3,8 +3,8 @@ BASE_IMAGE=python BASE_IMAGE_TAG=3.11-slim-bookworm IMAGE_NAME=homeylab/bookstack-file-exporter # keep this start sequence unique (IMAGE_TAG=) -# github actions will replace this value for later use -IMAGE_TAG=test +# github actions will use this to create a tag +IMAGE_TAG=0.0.1 DOCKER_WORK_DIR=/export DOCKER_CONFIG_DIR=/export/config DOCKER_EXPORT_DIR=/export/dump diff --git a/setup.cfg b/setup.cfg index 98eb518..6bb1832 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = bookstack-file-exporter -# version will be replaced by github actions by release tag +# version will be replaced by IMAGE_TAG in Makefile version = 0.0.1 author = pchang388 # author_email = your@email.address From d319c641cb6bbc1daf768dedca046da5462b1457 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Mon, 2 Oct 2023 04:12:44 -0400 Subject: [PATCH 18/20] update readme --- README.md | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index e7e8b39..08c0eac 100644 --- a/README.md +++ b/README.md @@ -33,11 +33,11 @@ python -m bookstack_file_exporter -c ## Using This Application -### Install via Pip -Note: This application is tested and developed on Python `3.11.X`. It will probably work for >= `3.8` but is recommended to install (or set up a venv) `3.11.X` versions. +### Run via Pip +Note: This application is tested and developed on Python `3.11.X`. It will probably work for >= `3.8` but is recommended to install (or set up a venv) a `3.11.X` version. -``` -pip install bookstack-file-exporter +```bash +python -m pip install bookstack-file-exporter # if you already have python bin directory in your path bookstack-file-exporter -c @@ -51,6 +51,34 @@ Command line options: |`-c`, `--config-file`|True|Relative or Absolute path to a valid configuration file. This configuration file is checked against a schema for validation.| |`-v`, `--log-level` |False, default: info|Provide a valid log level: info, debug, warning, error.| +### Run Via Docker +Example +```bash +docker run \ + --user ${USER_ID}:${USER_GID} \ + -v $(pwd)/local/config.yml:/export/config/config.yml:ro \ + -v $(pwd)/bkps:/export/dump \ + bookstack-file-exporter:0.0.1 +``` +Required Options: +| option | description | +| `config.yml` file mount | Provide a valid configuration file. Specified in example as read only: `-v ${CURDIR}/local/config.yml:/export/config/config.yml:ro`, `${USER_LOCAL_PATH}:${STATIC_DOCKER_PATH}` | +| `dump` file mount | Directory to place exports. Specified in example: `-v ${CURDIR}/bkps:/export/dump`, `${USER_LOCAL_PATH}:${STATIC_DOCKER_PATH}` | + +Tokens and other options can be specified, example: +```bash +# '-e' flag for env vars +# --user flag to override the uid/gid for created files +docker run -i \ + -e LOG_LEVEL='debug' \ + -e BOOKSTACK_TOKEN_ID='xyz' \ + -e BOOKSTACK_TOKEN_SECRET='xyz' \ + --user 1000:1000 \ + -v $(pwd)/local/config.yml:/export/config/config.yml:ro \ + -v $(pwd):/export/dump \ + bookstack-file-exporter:0.0.1 +``` + ### Authentication **Note visibility of pages is based on user**, so use a user that has access to pages you want to back up From daab5163101bf3a83826d089412622d767550840 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Mon, 2 Oct 2023 04:15:17 -0400 Subject: [PATCH 19/20] update readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 08c0eac..95ea79c 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ Tokens and other options can be specified, example: ```bash # '-e' flag for env vars # --user flag to override the uid/gid for created files -docker run -i \ +docker run \ -e LOG_LEVEL='debug' \ -e BOOKSTACK_TOKEN_ID='xyz' \ -e BOOKSTACK_TOKEN_SECRET='xyz' \ From 51308bb5e1d370d47877706bff3fbd8ae52b10f0 Mon Sep 17 00:00:00 2001 From: pchang388 Date: Mon, 2 Oct 2023 04:17:39 -0400 Subject: [PATCH 20/20] update readme --- .github/workflows/on_pr_merged.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/on_pr_merged.yml b/.github/workflows/on_pr_merged.yml index 6e6d6cd..9f6ce6a 100644 --- a/.github/workflows/on_pr_merged.yml +++ b/.github/workflows/on_pr_merged.yml @@ -96,4 +96,5 @@ jobs: - uses: ncipollo/release-action@v1 with: tag: ${VERSION} + generateReleaseNotes: true # docker image tag latest