Skip to content

Commit

Permalink
Separate functions in pythons files
Browse files Browse the repository at this point in the history
  • Loading branch information
Betawolfy committed Dec 16, 2024
1 parent 3ee9d96 commit aadbaed
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 73 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
functions/__pycache__/add_footer.cpython-311.pyc
functions/__pycache__/clean_text.cpython-311.pyc
9 changes: 9 additions & 0 deletions functions/add_footer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
def add_footer(output_file_path):
with open('footer.txt', 'r', encoding='utf-8') as file:
footer = file.readlines()
with open(output_file_path, 'r', encoding='utf-8') as file:
lines = file.readlines()
with open(output_file_path, 'w', encoding='utf-8') as file:
file.writelines(footer)
file.writelines(lines)
print(f'Footer added to {output_file_path}')
36 changes: 36 additions & 0 deletions functions/clean_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import re

def clean_text(input_file_path, output_file_path):
with open(input_file_path, 'r', encoding='utf-8') as file:
lines = file.readlines()

with open('remove.txt', 'r', encoding='utf-8') as file:
remove_lines = file.readlines()
replace_rules = {}
for line in remove_lines:
if ':' in line:
remove_str, replace_str = line.strip().split(':', 1)
replace_rules[remove_str] = replace_str.strip()

cleaned_lines = []
for line in lines:
if not line.strip() == '----':
line = line.strip()
is_dialog = line.lstrip().startswith('d')
if is_dialog:
line = re.sub(r'd\d{2}:\d{2}', '', line)
for remove_str, replace_str in replace_rules.items():
if remove_str in line:
line = line.replace(remove_str, replace_str + " ")
else:
line = re.sub(r'\b\d{2}:\d{2}\b', '', line)
for remove_str in replace_rules:
if remove_str in line:
line = line.replace(remove_str, '')
line = ' '.join(line.split())
if line.strip():
cleaned_lines.append(line + '\n')

with open(output_file_path, 'w', encoding='utf-8') as file:
file.writelines(cleaned_lines)
print(f'Cleaned text saved to {output_file_path}')
82 changes: 9 additions & 73 deletions index.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,82 +19,18 @@

# ----------------------------

# Importing the required modules
# re module is used for regular expressions
import re

# Function to clean the text
# It takes the input file path and output file path as arguments
def clean_text(input_file_path, output_file_path):
# Reading the input file
with open(input_file_path, 'r', encoding='utf-8') as file:
lines = file.readlines()
# import functions
from functions.clean_text import clean_text
from functions.add_footer import add_footer

# Reading the remove.txt file to get the strings to remove and their replacements
# Basic format of the rule: string_to_remove:replacement_string
# * Note: you can add more rules to the remove.txt file as needed
with open('remove.txt', 'r', encoding='utf-8') as file:
remove_lines = file.readlines()
replace_rules = {}
for line in remove_lines:
if ':' in line:
remove_str, replace_str = line.strip().split(':', 1)
replace_rules[remove_str] = replace_str.strip()
# Input and output file paths
input_file_path = 'lore.txt' # Chemin du fichier d'entrée
output_file_path = 'lore_cleaned.txt' # Chemin du fichier de sortie

# Cleaning the text
# for each line in the input file, the following steps are performed:
cleaned_lines = []
for line in lines:
# Removing leading/trailing whitespaces unless it's "----"
if not line.strip() == '----':
line = line.strip()

# Check if the line is a dialogue line (starts with 'd')
is_dialog = line.lstrip().startswith('d')

# If it is a dialogue line:
if is_dialog:
# Remove timestamps for dialogue lines
line = re.sub(r'd\d{2}:\d{2}', '', line)

# For dialogue lines, apply the replace rules
for remove_str, replace_str in replace_rules.items():
if remove_str in line:
line = line.replace(remove_str, replace_str + " ")
# If it is not a dialogue line (i.e., Lore story line):
else:
# Remove timestamps
line = re.sub(r'\b\d{2}:\d{2}\b', '', line)

# Removeing names
for remove_str in replace_rules:
if remove_str in line:
line = line.replace(remove_str, '')

# Remove all spaces
line = ' '.join(line.split())

# Adding the cleaned line to the list
if line.strip():
cleaned_lines.append(line + '\n')

# Saving the cleaned text to the output file
with open(output_file_path, 'w', encoding='utf-8') as file:
file.writelines(cleaned_lines)
print(f'Cleaned text saved to {output_file_path}')

#Function to add the footer
#Optionally, you can add in footer.txt the name of the event, the date, and the name of the person who participated in the event
#It will put the footer at the beginning of the file
def add_footer(output_file_path):
with open('footer.txt', 'r', encoding='utf-8') as file:
footer = file.readlines()
with open(output_file_path, 'r', encoding='utf-8') as file:
lines = file.readlines()
with open(output_file_path, 'w', encoding='utf-8') as file:
file.writelines(footer)
file.writelines(lines)
print(f'Footer added to {output_file_path}')
# Clean the text and add footer
clean_text(input_file_path, output_file_path)
add_footer(output_file_path)

# Main function
# The input file path and output file path are specified here
Expand Down

0 comments on commit aadbaed

Please sign in to comment.