Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Only read text files that look like entries when opening folder journal #1697

Merged
merged 15 commits into from
Apr 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 52 additions & 16 deletions jrnl/journals/FolderJournal.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
# License: https://www.gnu.org/licenses/gpl-3.0.html

import codecs
import fnmatch
import os
import pathlib
from typing import TYPE_CHECKING

from jrnl import time
Expand All @@ -13,14 +13,11 @@
if TYPE_CHECKING:
from jrnl.journals import Entry


def get_files(journal_config: str) -> list[str]:
"""Searches through sub directories starting with journal_config and find all text files"""
filenames = []
for root, dirnames, f in os.walk(journal_config):
for filename in fnmatch.filter(f, "*.txt"):
filenames.append(os.path.join(root, filename))
return filenames
# glob search patterns for folder/file structure
DIGIT_PATTERN = "[0123456789]"
YEAR_PATTERN = DIGIT_PATTERN * 4
MONTH_PATTERN = "[01]" + DIGIT_PATTERN
DAY_PATTERN = "[0123]" + DIGIT_PATTERN + ".txt"


class Folder(Journal):
Expand All @@ -35,12 +32,15 @@ def __init__(self, name: str = "default", **kwargs):
def open(self) -> "Folder":
filenames = []
self.entries = []
filenames = get_files(self.config["journal"])
for filename in filenames:
with codecs.open(filename, "r", "utf-8") as f:
journal = f.read()
self.entries.extend(self._parse(journal))
self.sort()

if os.path.exists(self.config["journal"]):
filenames = Folder._get_files(self.config["journal"])
for filename in filenames:
with codecs.open(filename, "r", "utf-8") as f:
journal = f.read()
self.entries.extend(self._parse(journal))
self.sort()

return self

def write(self) -> None:
Expand Down Expand Up @@ -81,7 +81,7 @@ def write(self) -> None:
journal_file.write(journal)
# look for and delete empty files
filenames = []
filenames = get_files(self.config["journal"])
filenames = Folder._get_files(self.config["journal"])
for filename in filenames:
if os.stat(filename).st_size <= 0:
os.remove(filename)
Expand Down Expand Up @@ -119,3 +119,39 @@ def parse_editable_str(self, edited: str) -> None:

self.increment_change_counts_by_edit(mod_entries)
self.entries = mod_entries

@staticmethod
def _get_files(journal_path: str) -> list[str]:
"""Searches through sub directories starting with journal_path and find all text files that look like entries"""
for year_folder in Folder._get_year_folders(pathlib.Path(journal_path)):
for month_folder in Folder._get_month_folders(year_folder):
yield from Folder._get_day_files(month_folder)

@staticmethod
def _get_year_folders(path: pathlib.Path) -> list[pathlib.Path]:
for child in path.glob(YEAR_PATTERN):
if child.is_dir():
yield child
return

@staticmethod
def _get_month_folders(path: pathlib.Path) -> list[pathlib.Path]:
for child in path.glob(MONTH_PATTERN):
if int(child.name) > 0 and int(child.name) <= 12 and path.is_dir():
yield child
return

@staticmethod
def _get_day_files(path: pathlib.Path) -> list[str]:
for child in path.glob(DAY_PATTERN):
if (
int(child.stem) > 0
and int(child.stem) <= 31
and time.is_valid_date(
year=int(path.parent.name),
month=int(path.name),
day=int(child.stem),
)
and child.is_file()
):
yield str(child)
8 changes: 8 additions & 0 deletions jrnl/time.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,11 @@ def parse(
if dt.days < -28 and not year_present:
date = date.replace(date.year - 1)
return date


def is_valid_date(year: int, month: int, day: int) -> bool:
try:
datetime.datetime(year, month, day)
return True
except ValueError:
return False
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[2022-03-02 9:25:00 AM] This file should be ignored (month)
This text file is in a folder journal's month directory ("2020/09"), but it's not in the file name format used by jrnl for folder journal entries, so it should be ignored.

This file should not ever appear in a test.
4 changes: 4 additions & 0 deletions tests/data/journals/basic_folder/2020/should-be-ignored.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[2022-03-02 9:25:00 AM] This file should be ignored (year)
This text file is in a folder journal's year directory ("2020"), but it's not in the file name format used by jrnl for folder journal entries, so it should be ignored.

This file should not ever appear in a test.
4 changes: 4 additions & 0 deletions tests/data/journals/basic_folder/should-be-ignored.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[2022-03-02 9:25:00 AM] This file should be ignored (root)
This text file is in a folder journal's root directory, but it's not in the file name format used by jrnl for folder journal entries, so it should be ignored.

This file should not ever appear in a test.
59 changes: 59 additions & 0 deletions tests/unit/test_journals_folder_journal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Copyright © 2012-2023 jrnl contributors
# License: https://www.gnu.org/licenses/gpl-3.0.html

import pathlib
from unittest import mock

import pytest

from jrnl.journals.FolderJournal import Folder


@pytest.mark.parametrize(
"inputs_and_outputs",
[
[
"/2020/01",
["02.txt", "03.txt", "31.txt"],
["/2020/01/02.txt", "/2020/01/03.txt", "/2020/01/31.txt"],
],
[
"/2020/02", # leap year
["02.txt", "03.txt", "28.txt", "29.txt", "31.txt", "39.txt"],
[
"/2020/02/02.txt",
"/2020/02/03.txt",
"/2020/02/28.txt",
"/2020/02/29.txt",
],
],
[
"/2100/02", # not a leap year
["01.txt", "28.txt", "29.txt", "39.txt"],
["/2100/02/01.txt", "/2100/02/28.txt"],
],
[
"/2023/04",
["29.txt", "30.txt", "31.txt", "39.txt"],
["/2023/04/29.txt", "/2023/04/30.txt"],
],
],
)
def test_get_day_files_expected_filtering(inputs_and_outputs):
year_month_path, glob_filenames, expected_output = inputs_and_outputs

year_month_path = pathlib.Path(year_month_path)

glob_files = map(lambda x: year_month_path / x, glob_filenames)
expected_output = list(map(lambda x: str(pathlib.PurePath(x)), expected_output))

with (
mock.patch("pathlib.Path.glob", return_value=glob_files),
mock.patch.object(pathlib.Path, "is_file", return_value=True),
):
actual_output = list(Folder._get_day_files(year_month_path))
actual_output.sort()

expected_output.sort()

assert actual_output == expected_output
22 changes: 22 additions & 0 deletions tests/unit/test_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

import datetime

import pytest

from jrnl import time


Expand All @@ -20,3 +22,23 @@ def test_default_minute_is_added():
default_minute=30,
bracketed=False,
) == datetime.datetime(2020, 6, 20, 0, 30)


@pytest.mark.parametrize(
"inputs",
[
[2000, 2, 29, True],
[2023, 1, 0, False],
[2023, 1, 1, True],
[2023, 4, 31, False],
[2023, 12, 31, True],
[2023, 12, 32, False],
[2023, 13, 1, False],
[2100, 2, 27, True],
[2100, 2, 28, True],
[2100, 2, 29, False],
],
)
def test_is_valid_date(inputs):
year, month, day, expected_result = inputs
assert time.is_valid_date(year, month, day) == expected_result