Skip to content

Commit

Permalink
Only read text files that look like entries when opening folder journ…
Browse files Browse the repository at this point in the history
…al (#1697)

* Add text file that should be ignored to basic test folder journal. Makes tons of tests fail
* Add additional files that should be ignored by FolderJournal
* Ignore all files in folder journal except year/month/day.txt
* Completely remake get_files in FolderJournal:
- move get_files into FolderJournal class and add underscore prefix
- create iterables to get for year/month folders and day files
- make year/month/day file reading strict: only exact expected months and days out of all possible months and days
* Restore accidentally-deleted self.sort() line
* Use match instead of string comparison to be os-agnostic
* Explicitly declare static methods
* Filter with glob first for max performance
* Explicitly check for valid dates in FolderJournal and add unit test
* Remove unneeded jrnl import
* Clean up method comment and add type hints
* Add is_valid_date unit test
* Elucidate comment

Co-authored-by: Jonathan Wren <jonathan@nowandwren.com>
  • Loading branch information
micahellison and wren authored Apr 29, 2023
1 parent 88aa249 commit 95836a7
Show file tree
Hide file tree
Showing 7 changed files with 153 additions and 16 deletions.
68 changes: 52 additions & 16 deletions jrnl/journals/FolderJournal.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
# License: https://www.gnu.org/licenses/gpl-3.0.html

import codecs
import fnmatch
import os
import pathlib
from typing import TYPE_CHECKING

from jrnl import time
Expand All @@ -13,14 +13,11 @@
if TYPE_CHECKING:
from jrnl.journals import Entry


def get_files(journal_config: str) -> list[str]:
"""Searches through sub directories starting with journal_config and find all text files"""
filenames = []
for root, dirnames, f in os.walk(journal_config):
for filename in fnmatch.filter(f, "*.txt"):
filenames.append(os.path.join(root, filename))
return filenames
# glob search patterns for folder/file structure
DIGIT_PATTERN = "[0123456789]"
YEAR_PATTERN = DIGIT_PATTERN * 4
MONTH_PATTERN = "[01]" + DIGIT_PATTERN
DAY_PATTERN = "[0123]" + DIGIT_PATTERN + ".txt"


class Folder(Journal):
Expand All @@ -35,12 +32,15 @@ def __init__(self, name: str = "default", **kwargs):
def open(self) -> "Folder":
filenames = []
self.entries = []
filenames = get_files(self.config["journal"])
for filename in filenames:
with codecs.open(filename, "r", "utf-8") as f:
journal = f.read()
self.entries.extend(self._parse(journal))
self.sort()

if os.path.exists(self.config["journal"]):
filenames = Folder._get_files(self.config["journal"])
for filename in filenames:
with codecs.open(filename, "r", "utf-8") as f:
journal = f.read()
self.entries.extend(self._parse(journal))
self.sort()

return self

def write(self) -> None:
Expand Down Expand Up @@ -81,7 +81,7 @@ def write(self) -> None:
journal_file.write(journal)
# look for and delete empty files
filenames = []
filenames = get_files(self.config["journal"])
filenames = Folder._get_files(self.config["journal"])
for filename in filenames:
if os.stat(filename).st_size <= 0:
os.remove(filename)
Expand Down Expand Up @@ -119,3 +119,39 @@ def parse_editable_str(self, edited: str) -> None:

self.increment_change_counts_by_edit(mod_entries)
self.entries = mod_entries

@staticmethod
def _get_files(journal_path: str) -> list[str]:
"""Searches through sub directories starting with journal_path and find all text files that look like entries"""
for year_folder in Folder._get_year_folders(pathlib.Path(journal_path)):
for month_folder in Folder._get_month_folders(year_folder):
yield from Folder._get_day_files(month_folder)

@staticmethod
def _get_year_folders(path: pathlib.Path) -> list[pathlib.Path]:
for child in path.glob(YEAR_PATTERN):
if child.is_dir():
yield child
return

@staticmethod
def _get_month_folders(path: pathlib.Path) -> list[pathlib.Path]:
for child in path.glob(MONTH_PATTERN):
if int(child.name) > 0 and int(child.name) <= 12 and path.is_dir():
yield child
return

@staticmethod
def _get_day_files(path: pathlib.Path) -> list[str]:
for child in path.glob(DAY_PATTERN):
if (
int(child.stem) > 0
and int(child.stem) <= 31
and time.is_valid_date(
year=int(path.parent.name),
month=int(path.name),
day=int(child.stem),
)
and child.is_file()
):
yield str(child)
8 changes: 8 additions & 0 deletions jrnl/time.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,11 @@ def parse(
if dt.days < -28 and not year_present:
date = date.replace(date.year - 1)
return date


def is_valid_date(year: int, month: int, day: int) -> bool:
try:
datetime.datetime(year, month, day)
return True
except ValueError:
return False
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[2022-03-02 9:25:00 AM] This file should be ignored (month)
This text file is in a folder journal's month directory ("2020/09"), but it's not in the file name format used by jrnl for folder journal entries, so it should be ignored.

This file should not ever appear in a test.
4 changes: 4 additions & 0 deletions tests/data/journals/basic_folder/2020/should-be-ignored.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[2022-03-02 9:25:00 AM] This file should be ignored (year)
This text file is in a folder journal's year directory ("2020"), but it's not in the file name format used by jrnl for folder journal entries, so it should be ignored.

This file should not ever appear in a test.
4 changes: 4 additions & 0 deletions tests/data/journals/basic_folder/should-be-ignored.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[2022-03-02 9:25:00 AM] This file should be ignored (root)
This text file is in a folder journal's root directory, but it's not in the file name format used by jrnl for folder journal entries, so it should be ignored.

This file should not ever appear in a test.
59 changes: 59 additions & 0 deletions tests/unit/test_journals_folder_journal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Copyright © 2012-2023 jrnl contributors
# License: https://www.gnu.org/licenses/gpl-3.0.html

import pathlib
from unittest import mock

import pytest

from jrnl.journals.FolderJournal import Folder


@pytest.mark.parametrize(
"inputs_and_outputs",
[
[
"/2020/01",
["02.txt", "03.txt", "31.txt"],
["/2020/01/02.txt", "/2020/01/03.txt", "/2020/01/31.txt"],
],
[
"/2020/02", # leap year
["02.txt", "03.txt", "28.txt", "29.txt", "31.txt", "39.txt"],
[
"/2020/02/02.txt",
"/2020/02/03.txt",
"/2020/02/28.txt",
"/2020/02/29.txt",
],
],
[
"/2100/02", # not a leap year
["01.txt", "28.txt", "29.txt", "39.txt"],
["/2100/02/01.txt", "/2100/02/28.txt"],
],
[
"/2023/04",
["29.txt", "30.txt", "31.txt", "39.txt"],
["/2023/04/29.txt", "/2023/04/30.txt"],
],
],
)
def test_get_day_files_expected_filtering(inputs_and_outputs):
year_month_path, glob_filenames, expected_output = inputs_and_outputs

year_month_path = pathlib.Path(year_month_path)

glob_files = map(lambda x: year_month_path / x, glob_filenames)
expected_output = list(map(lambda x: str(pathlib.PurePath(x)), expected_output))

with (
mock.patch("pathlib.Path.glob", return_value=glob_files),
mock.patch.object(pathlib.Path, "is_file", return_value=True),
):
actual_output = list(Folder._get_day_files(year_month_path))
actual_output.sort()

expected_output.sort()

assert actual_output == expected_output
22 changes: 22 additions & 0 deletions tests/unit/test_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import datetime

import pytest

from jrnl import time


Expand All @@ -20,3 +22,23 @@ def test_default_minute_is_added():
default_minute=30,
bracketed=False,
) == datetime.datetime(2020, 6, 20, 0, 30)


@pytest.mark.parametrize(
"inputs",
[
[2000, 2, 29, True],
[2023, 1, 0, False],
[2023, 1, 1, True],
[2023, 4, 31, False],
[2023, 12, 31, True],
[2023, 12, 32, False],
[2023, 13, 1, False],
[2100, 2, 27, True],
[2100, 2, 28, True],
[2100, 2, 29, False],
],
)
def test_is_valid_date(inputs):
year, month, day, expected_result = inputs
assert time.is_valid_date(year, month, day) == expected_result

0 comments on commit 95836a7

Please sign in to comment.