Skip to content

Commit

Permalink
Merge pull request #7 from homeylab/clean_up
Browse files Browse the repository at this point in the history
Clean up
  • Loading branch information
pchang388 authored Sep 30, 2023
2 parents 29bc255 + f8e6ff1 commit 9eedfa5
Show file tree
Hide file tree
Showing 16 changed files with 339 additions and 56 deletions.
167 changes: 167 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/


## Local
local/

## test outputs
bkps/
31 changes: 31 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@

ARG BASE_IMAGE=python
ARG BASE_IMAGE_TAG=3.11-slim-python

FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG}

# Get security updates and clean up apt cache for smaller size
RUN apt update -y && apt upgrade -y && \
apt install dumb-init && \
rm -rf /var/lib/apt/lists/*

ARG DOCKER_WORK_DIR
ARG DOCKER_CONFIG_DIR
ARG DOCKER_EXPORT_DIR

ENV DOCKER_CONFIG_DIR=${DOCKER_CONFIG_DIR}
ENV DOCKER_EXPORT_DIR=${DOCKER_EXPORT_DIR}

WORKDIR ${DOCKER_WORK_DIR}

COPY . .

RUN pip install .

RUN mkdir -p ${DOCKER_CONFIG_DIR} && \
mkdir -p ${DOCKER_EXPORT_DIR}

USER nobody

ENTRYPOINT ["/usr/bin/dumb-init", "--"]
CMD [ "./entrypoint.sh" ]
31 changes: 30 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,2 +1,31 @@
## DOCKER BUILD VARS
BASE_IMAGE=python
BASE_IMAGE_TAG=3.11-slim-bookworm
IMAGE_NAME=homeylab/bookstack-file-exporter
IMAGE_TAG=test
DOCKER_WORK_DIR=/export
DOCKER_CONFIG_DIR=/export/config
DOCKER_EXPORT_DIR=/export/dump

test_local:
pip install -e .
pip install -e .

docker_build:
docker buildx build \
--build-arg BASE_IMAGE=${BASE_IMAGE} \
--build-arg BASE_IMAGE_TAG=${BASE_IMAGE_TAG} \
--build-arg DOCKER_WORK_DIR=${DOCKER_WORK_DIR} \
--build-arg DOCKER_CONFIG_DIR=${DOCKER_CONFIG_DIR} \
--build-arg DOCKER_EXPORT_DIR=${DOCKER_EXPORT_DIR} \
-t ${IMAGE_NAME}:${IMAGE_TAG} \
--no-cache .

docker_push:
docker push ${IMAGE_NAME}:${IMAGE_TAG}

docker_test:
docker run \
-e LOG_LEVEL='debug' \
-v ${CURDIR}/local/config.yml:/export/config/config.yml:ro \
-v ${CURDIR}/bkps:/export/dump \
${IMAGE_NAME}:${IMAGE_TAG}
1 change: 1 addition & 0 deletions bookstack_file_exporter/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

1 change: 1 addition & 0 deletions bookstack_file_exporter/archiver/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

7 changes: 5 additions & 2 deletions bookstack_file_exporter/archiver/archiver.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
from typing import List, Dict, Union
from time import sleep
from datetime import datetime
import logging

from bookstack_file_exporter.exporter.node import Node
from bookstack_file_exporter.archiver import util
from bookstack_file_exporter.archiver.minio_archiver import MinioArchiver
from bookstack_file_exporter.config_helper.remote import StorageProviderConfig

log = logging.getLogger(__name__)

_META_FILE_SUFFIX = "_meta"
_TAR_GZ_SUFFIX = ".tgz"

Expand Down Expand Up @@ -36,12 +39,11 @@ class Archiver:
Returns:
Archiver instance with attributes that are accessible for use for file level archival and backup.
"""
def __init__(self, base_dir: str, add_meta: Union[bool, None], base_page_url: str, headers: Dict[str, str], object_storage_config: Dict[str, StorageProviderConfig]):
def __init__(self, base_dir: str, add_meta: Union[bool, None], base_page_url: str, headers: Dict[str, str]):
self.base_dir = base_dir
self.add_meta = add_meta
self.base_page_url = base_page_url
self._headers = headers
self._object_storage_config = object_storage_config
self._root_dir = self.generate_root_folder(self.base_dir)
# the tar file will be name of parent export directory, bookstack-<timestamp>, and .tgz extension
self._tar_file = f"{self._root_dir}{_FILE_EXTENSION_MAP['tar']}"
Expand All @@ -58,6 +60,7 @@ def archive(self, page_nodes: Dict[int, Node], export_formats: List[str]):
# convert to bytes to be agnostic to end destination (future use case?)
def _gather(self, page_node: Node, export_format: str):
raw_data = self._get_data_format(page_node.id, export_format)
log.debug(f"Output directory for exports set to: {self._root_dir}")
self._gather_local(page_node.file_path, raw_data, export_format, page_node.meta)

def _gather_local(self, page_path: str, data: bytes, export_format: str, meta_data: Union[bytes, None]):
Expand Down
1 change: 1 addition & 0 deletions bookstack_file_exporter/common/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

1 change: 1 addition & 0 deletions bookstack_file_exporter/config_helper/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

9 changes: 7 additions & 2 deletions bookstack_file_exporter/config_helper/config_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ class ConfigNode:
def __init__(self, args: argparse.Namespace):
self.unassigned_book_dir = _UNASSIGNED_BOOKS_DIR
self.user_inputs = self._generate_config(args.config_file)
self._base_dir_name = self._set_base_dir()
self._base_dir_name = self._set_base_dir(args.output_dir)
self._token_id, self._token_secret = self._generate_credentials()
self._headers = self._generate_headers()
self._urls = self._generate_urls()
Expand Down Expand Up @@ -136,8 +136,12 @@ def _generate_urls(self) -> Dict[str, str]:
urls[key] = f"{url_prefix}{self.user_inputs.host}/{value}"
return urls

def _set_base_dir(self) -> str:
def _set_base_dir(self, cmd_output_dir: str) -> str:
output_dir = self.user_inputs.output_path
# override if command line specified
if cmd_output_dir:
log.debug("Output directory overwritten by command line option")
output_dir = cmd_output_dir
# check if user provided an output path
if output_dir:
# detect trailing slash
Expand Down Expand Up @@ -178,6 +182,7 @@ def _check_var(env_key: str, default_val: str) -> str:
env_value = os.environ.get(env_key, "")
# env value takes precedence
if env_value:
log.debug(f"env key: {env_key} specified. Will override configuration file value if set.")
return env_value
# check for optional inputs, if env and input is missing
if not env_value and not default_val:
Expand Down
1 change: 1 addition & 0 deletions bookstack_file_exporter/exporter/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Loading

0 comments on commit 9eedfa5

Please sign in to comment.