From aadbaedbcdb69bb6196350dd87ce953b619a607f Mon Sep 17 00:00:00 2001 From: betawolfy Date: Mon, 16 Dec 2024 09:00:05 +0100 Subject: [PATCH] Separate functions in pythons files --- .gitignore | 2 + functions/add_footer.py | 9 +++++ functions/clean_text.py | 36 ++++++++++++++++++ index.py | 82 +++++------------------------------------ 4 files changed, 56 insertions(+), 73 deletions(-) create mode 100644 .gitignore create mode 100644 functions/add_footer.py create mode 100644 functions/clean_text.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..39920f0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +functions/__pycache__/add_footer.cpython-311.pyc +functions/__pycache__/clean_text.cpython-311.pyc diff --git a/functions/add_footer.py b/functions/add_footer.py new file mode 100644 index 0000000..3190f4f --- /dev/null +++ b/functions/add_footer.py @@ -0,0 +1,9 @@ +def add_footer(output_file_path): + with open('footer.txt', 'r', encoding='utf-8') as file: + footer = file.readlines() + with open(output_file_path, 'r', encoding='utf-8') as file: + lines = file.readlines() + with open(output_file_path, 'w', encoding='utf-8') as file: + file.writelines(footer) + file.writelines(lines) + print(f'Footer added to {output_file_path}') \ No newline at end of file diff --git a/functions/clean_text.py b/functions/clean_text.py new file mode 100644 index 0000000..24e98c2 --- /dev/null +++ b/functions/clean_text.py @@ -0,0 +1,36 @@ +import re + +def clean_text(input_file_path, output_file_path): + with open(input_file_path, 'r', encoding='utf-8') as file: + lines = file.readlines() + + with open('remove.txt', 'r', encoding='utf-8') as file: + remove_lines = file.readlines() + replace_rules = {} + for line in remove_lines: + if ':' in line: + remove_str, replace_str = line.strip().split(':', 1) + replace_rules[remove_str] = replace_str.strip() + + cleaned_lines = [] + for line in lines: + if not line.strip() == '----': + line = line.strip() + is_dialog = line.lstrip().startswith('d') + if is_dialog: + line = re.sub(r'd\d{2}:\d{2}', '', line) + for remove_str, replace_str in replace_rules.items(): + if remove_str in line: + line = line.replace(remove_str, replace_str + " ") + else: + line = re.sub(r'\b\d{2}:\d{2}\b', '', line) + for remove_str in replace_rules: + if remove_str in line: + line = line.replace(remove_str, '') + line = ' '.join(line.split()) + if line.strip(): + cleaned_lines.append(line + '\n') + + with open(output_file_path, 'w', encoding='utf-8') as file: + file.writelines(cleaned_lines) + print(f'Cleaned text saved to {output_file_path}') \ No newline at end of file diff --git a/index.py b/index.py index e94e194..801916f 100644 --- a/index.py +++ b/index.py @@ -19,82 +19,18 @@ # ---------------------------- -# Importing the required modules -# re module is used for regular expressions -import re -# Function to clean the text -# It takes the input file path and output file path as arguments -def clean_text(input_file_path, output_file_path): - # Reading the input file - with open(input_file_path, 'r', encoding='utf-8') as file: - lines = file.readlines() +# import functions +from functions.clean_text import clean_text +from functions.add_footer import add_footer - # Reading the remove.txt file to get the strings to remove and their replacements - # Basic format of the rule: string_to_remove:replacement_string - # * Note: you can add more rules to the remove.txt file as needed - with open('remove.txt', 'r', encoding='utf-8') as file: - remove_lines = file.readlines() - replace_rules = {} - for line in remove_lines: - if ':' in line: - remove_str, replace_str = line.strip().split(':', 1) - replace_rules[remove_str] = replace_str.strip() +# Input and output file paths +input_file_path = 'lore.txt' # Chemin du fichier d'entrée +output_file_path = 'lore_cleaned.txt' # Chemin du fichier de sortie - # Cleaning the text - # for each line in the input file, the following steps are performed: - cleaned_lines = [] - for line in lines: - # Removing leading/trailing whitespaces unless it's "----" - if not line.strip() == '----': - line = line.strip() - - # Check if the line is a dialogue line (starts with 'd') - is_dialog = line.lstrip().startswith('d') - - # If it is a dialogue line: - if is_dialog: - # Remove timestamps for dialogue lines - line = re.sub(r'd\d{2}:\d{2}', '', line) - - # For dialogue lines, apply the replace rules - for remove_str, replace_str in replace_rules.items(): - if remove_str in line: - line = line.replace(remove_str, replace_str + " ") - # If it is not a dialogue line (i.e., Lore story line): - else: - # Remove timestamps - line = re.sub(r'\b\d{2}:\d{2}\b', '', line) - - # Removeing names - for remove_str in replace_rules: - if remove_str in line: - line = line.replace(remove_str, '') - - # Remove all spaces - line = ' '.join(line.split()) - - # Adding the cleaned line to the list - if line.strip(): - cleaned_lines.append(line + '\n') - - # Saving the cleaned text to the output file - with open(output_file_path, 'w', encoding='utf-8') as file: - file.writelines(cleaned_lines) - print(f'Cleaned text saved to {output_file_path}') - -#Function to add the footer -#Optionally, you can add in footer.txt the name of the event, the date, and the name of the person who participated in the event -#It will put the footer at the beginning of the file -def add_footer(output_file_path): - with open('footer.txt', 'r', encoding='utf-8') as file: - footer = file.readlines() - with open(output_file_path, 'r', encoding='utf-8') as file: - lines = file.readlines() - with open(output_file_path, 'w', encoding='utf-8') as file: - file.writelines(footer) - file.writelines(lines) - print(f'Footer added to {output_file_path}') +# Clean the text and add footer +clean_text(input_file_path, output_file_path) +add_footer(output_file_path) # Main function # The input file path and output file path are specified here