diff --git a/Alteryx/Average Grip Strength.yxmd b/Archive/Alteryx/Average Grip Strength.yxmd similarity index 100% rename from Alteryx/Average Grip Strength.yxmd rename to Archive/Alteryx/Average Grip Strength.yxmd diff --git a/Alteryx/Blood Pressure Count.yxmd b/Archive/Alteryx/Blood Pressure Count.yxmd similarity index 100% rename from Alteryx/Blood Pressure Count.yxmd rename to Archive/Alteryx/Blood Pressure Count.yxmd diff --git a/Alteryx/Blood Pressure and Marital Status.yxwz b/Archive/Alteryx/Blood Pressure and Marital Status.yxwz similarity index 100% rename from Alteryx/Blood Pressure and Marital Status.yxwz rename to Archive/Alteryx/Blood Pressure and Marital Status.yxwz diff --git a/Alteryx/Correlation Analysis.yxmd b/Archive/Alteryx/Correlation Analysis.yxmd similarity index 100% rename from Alteryx/Correlation Analysis.yxmd rename to Archive/Alteryx/Correlation Analysis.yxmd diff --git a/Alteryx/CstickETl.bak b/Archive/Alteryx/CstickETl.bak similarity index 100% rename from Alteryx/CstickETl.bak rename to Archive/Alteryx/CstickETl.bak diff --git a/Alteryx/CstickETl.yxmd b/Archive/Alteryx/CstickETl.yxmd similarity index 100% rename from Alteryx/CstickETl.yxmd rename to Archive/Alteryx/CstickETl.yxmd diff --git a/Alteryx/Demographics Data.bak b/Archive/Alteryx/Demographics Data.bak similarity index 100% rename from Alteryx/Demographics Data.bak rename to Archive/Alteryx/Demographics Data.bak diff --git a/Alteryx/Demographics Data.yxdb b/Archive/Alteryx/Demographics Data.yxdb similarity index 100% rename from Alteryx/Demographics Data.yxdb rename to Archive/Alteryx/Demographics Data.yxdb diff --git a/Alteryx/Demographics data.yxmd b/Archive/Alteryx/Demographics data.yxmd similarity index 100% rename from Alteryx/Demographics data.yxmd rename to Archive/Alteryx/Demographics data.yxmd diff --git a/Alteryx/Diet Data.bak b/Archive/Alteryx/Diet Data.bak similarity index 100% rename from Alteryx/Diet Data.bak rename to Archive/Alteryx/Diet Data.bak diff --git a/Alteryx/Diet Data.yxdb b/Archive/Alteryx/Diet Data.yxdb similarity index 100% rename from Alteryx/Diet Data.yxdb rename to Archive/Alteryx/Diet Data.yxdb diff --git a/Alteryx/Diet data.yxmd b/Archive/Alteryx/Diet data.yxmd similarity index 100% rename from Alteryx/Diet data.yxmd rename to Archive/Alteryx/Diet data.yxmd diff --git a/Alteryx/Exploratory Data Analysis.bak b/Archive/Alteryx/Exploratory Data Analysis.bak similarity index 100% rename from Alteryx/Exploratory Data Analysis.bak rename to Archive/Alteryx/Exploratory Data Analysis.bak diff --git a/Alteryx/Exploratory Data Analysis.yxmd b/Archive/Alteryx/Exploratory Data Analysis.yxmd similarity index 100% rename from Alteryx/Exploratory Data Analysis.yxmd rename to Archive/Alteryx/Exploratory Data Analysis.yxmd diff --git a/Alteryx/Histogram.yxmd b/Archive/Alteryx/Histogram.yxmd similarity index 100% rename from Alteryx/Histogram.yxmd rename to Archive/Alteryx/Histogram.yxmd diff --git a/Alteryx/Median Carbohydartes.yxmd b/Archive/Alteryx/Median Carbohydartes.yxmd similarity index 100% rename from Alteryx/Median Carbohydartes.yxmd rename to Archive/Alteryx/Median Carbohydartes.yxmd diff --git a/Alteryx/Median Protein.bak b/Archive/Alteryx/Median Protein.bak similarity index 100% rename from Alteryx/Median Protein.bak rename to Archive/Alteryx/Median Protein.bak diff --git a/Alteryx/Median Protein.yxmd b/Archive/Alteryx/Median Protein.yxmd similarity index 100% rename from Alteryx/Median Protein.yxmd rename to Archive/Alteryx/Median Protein.yxmd diff --git a/Alteryx/Questionnare Data.yxdb b/Archive/Alteryx/Questionnare Data.yxdb similarity index 100% rename from Alteryx/Questionnare Data.yxdb rename to Archive/Alteryx/Questionnare Data.yxdb diff --git a/Alteryx/Questionnare data.yxmd b/Archive/Alteryx/Questionnare data.yxmd similarity index 100% rename from Alteryx/Questionnare data.yxmd rename to Archive/Alteryx/Questionnare data.yxmd diff --git a/Alteryx/Research Report on Alteryx.docx b/Archive/Alteryx/Research Report on Alteryx.docx similarity index 100% rename from Alteryx/Research Report on Alteryx.docx rename to Archive/Alteryx/Research Report on Alteryx.docx diff --git a/Alteryx/Research Report on Alteryx.pdf b/Archive/Alteryx/Research Report on Alteryx.pdf similarity index 100% rename from Alteryx/Research Report on Alteryx.pdf rename to Archive/Alteryx/Research Report on Alteryx.pdf diff --git a/Alteryx/Simple Linear Regression.bak b/Archive/Alteryx/Simple Linear Regression.bak similarity index 100% rename from Alteryx/Simple Linear Regression.bak rename to Archive/Alteryx/Simple Linear Regression.bak diff --git a/Alteryx/Simple Linear Regression.yxmd b/Archive/Alteryx/Simple Linear Regression.yxmd similarity index 100% rename from Alteryx/Simple Linear Regression.yxmd rename to Archive/Alteryx/Simple Linear Regression.yxmd diff --git a/Alteryx/Summarize tool.bak b/Archive/Alteryx/Summarize tool.bak similarity index 100% rename from Alteryx/Summarize tool.bak rename to Archive/Alteryx/Summarize tool.bak diff --git a/Alteryx/Summarize tool.yxmd b/Archive/Alteryx/Summarize tool.yxmd similarity index 100% rename from Alteryx/Summarize tool.yxmd rename to Archive/Alteryx/Summarize tool.yxmd diff --git a/Alteryx/dietETL.bak b/Archive/Alteryx/dietETL.bak similarity index 100% rename from Alteryx/dietETL.bak rename to Archive/Alteryx/dietETL.bak diff --git a/Alteryx/dietETL.yxmd b/Archive/Alteryx/dietETL.yxmd similarity index 100% rename from Alteryx/dietETL.yxmd rename to Archive/Alteryx/dietETL.yxmd diff --git a/Average Grip Strength.yxmd b/Archive/Average Grip Strength.yxmd similarity index 100% rename from Average Grip Strength.yxmd rename to Archive/Average Grip Strength.yxmd diff --git a/Correlation Analysis.yxmd b/Archive/Correlation Analysis.yxmd similarity index 100% rename from Correlation Analysis.yxmd rename to Archive/Correlation Analysis.yxmd diff --git a/Exploratory Data Analysis.yxmd b/Archive/Exploratory Data Analysis.yxmd similarity index 100% rename from Exploratory Data Analysis.yxmd rename to Archive/Exploratory Data Analysis.yxmd diff --git a/Median Carbohydartes.yxmd b/Archive/Median Carbohydartes.yxmd similarity index 100% rename from Median Carbohydartes.yxmd rename to Archive/Median Carbohydartes.yxmd diff --git a/Median Protein.yxmd b/Archive/Median Protein.yxmd similarity index 100% rename from Median Protein.yxmd rename to Archive/Median Protein.yxmd diff --git a/Redback_Operations_-_Requirements_Gathering_&_Options_Paper_v1.3.docx b/Archive/Redback_Operations_-_Requirements_Gathering_&_Options_Paper_v1.3.docx similarity index 100% rename from Redback_Operations_-_Requirements_Gathering_&_Options_Paper_v1.3.docx rename to Archive/Redback_Operations_-_Requirements_Gathering_&_Options_Paper_v1.3.docx diff --git a/Research Report on Alteryx.pdf b/Archive/Research Report on Alteryx.pdf similarity index 100% rename from Research Report on Alteryx.pdf rename to Archive/Research Report on Alteryx.pdf diff --git a/Simple Linear Regression.yxmd b/Archive/Simple Linear Regression.yxmd similarity index 100% rename from Simple Linear Regression.yxmd rename to Archive/Simple Linear Regression.yxmd diff --git a/File Upload Service/streamlitdw/.gitignore b/File Upload Service/.gitignore similarity index 100% rename from File Upload Service/streamlitdw/.gitignore rename to File Upload Service/.gitignore diff --git a/File Upload Service/streamlitdw/File upload service files/data-lakehouse/app/Dockerfile b/File Upload Service/app/Dockerfile similarity index 100% rename from File Upload Service/streamlitdw/File upload service files/data-lakehouse/app/Dockerfile rename to File Upload Service/app/Dockerfile diff --git a/File Upload Service/streamlitdw/File upload service files/data-lakehouse/app/streamlitdw_fe.py b/File Upload Service/app/backup/streamlitdw_fe_backup.py similarity index 96% rename from File Upload Service/streamlitdw/File upload service files/data-lakehouse/app/streamlitdw_fe.py rename to File Upload Service/app/backup/streamlitdw_fe_backup.py index fd93b23..4442cd8 100644 --- a/File Upload Service/streamlitdw/File upload service files/data-lakehouse/app/streamlitdw_fe.py +++ b/File Upload Service/app/backup/streamlitdw_fe_backup.py @@ -1,60 +1,60 @@ -import streamlit as st -from minio import Minio -from minio.error import S3Error -from dotenv import load_dotenv -import io -import os - - -# Load environment variables -load_dotenv() - -# Check the environment variables -access_key = os.getenv('AWS_ACCESS_KEY_ID') -secret_key = os.getenv('AWS_SECRET_ACCESS_KEY') - -# st.write("Access Key ID:", access_key) -# st.write("Secret Access Key:", secret_key) - -# Set up MinIO client using the loaded environment variables -minio_client = Minio( - "10.137.0.149:9000", # MinIO server address - access_key=access_key, - secret_key=secret_key, - secure=False -) - -bucket_name = "file-upload-service-sl" - -def upload_to_minio(file, filename): - try: - # Convert the uploaded file to bytes - data = file.read() - file_stream = io.BytesIO(data) - - # Upload file to file upload service - minio_client.put_object( - bucket_name, filename, file_stream, len(data) - ) - st.success(f"File {filename} uploaded successfully to Data Warehouse.") #try and except block to capture upload issues - except S3Error as e: - st.error(f"Failed to upload {filename} to DataWarehouse: {e}") - -def main(): - st.title("File Upload to Redback Data Warehouse Server") - - # File uploader - uploaded_file = st.file_uploader("Choose a file", type=["csv", "txt", "xlsx","json"]) - - if uploaded_file is not None: - # Display file details - st.write(f"**Filename:** {uploaded_file.name}") - st.write(f"**File type:** {uploaded_file.type}") - st.write(f"**File size:** {uploaded_file.size / (1024 * 1024):.2f} MB") - - # save file option - if st.button("Upload to Data Warehouse"): - upload_to_minio(uploaded_file, uploaded_file.name) - -if __name__ == "__main__": +import streamlit as st +from minio import Minio +from minio.error import S3Error +from dotenv import load_dotenv +import io +import os + + +# Load environment variables +load_dotenv() + +# Check the environment variables +access_key = os.getenv('AWS_ACCESS_KEY_ID') +secret_key = os.getenv('AWS_SECRET_ACCESS_KEY') + +# st.write("Access Key ID:", access_key) +# st.write("Secret Access Key:", secret_key) + +# Set up MinIO client using the loaded environment variables +minio_client = Minio( + "10.137.0.149:9000", # MinIO server address + access_key=access_key, + secret_key=secret_key, + secure=False +) + +bucket_name = "file-upload-service-sl" + +def upload_to_minio(file, filename): + try: + # Convert the uploaded file to bytes + data = file.read() + file_stream = io.BytesIO(data) + + # Upload file to file upload service + minio_client.put_object( + bucket_name, filename, file_stream, len(data) + ) + st.success(f"File {filename} uploaded successfully to Data Warehouse.") #try and except block to capture upload issues + except S3Error as e: + st.error(f"Failed to upload {filename} to DataWarehouse: {e}") + +def main(): + st.title("File Upload to Redback Data Warehouse Server") + + # File uploader + uploaded_file = st.file_uploader("Choose a file", type=["csv", "txt", "xlsx","json"]) + + if uploaded_file is not None: + # Display file details + st.write(f"**Filename:** {uploaded_file.name}") + st.write(f"**File type:** {uploaded_file.type}") + st.write(f"**File size:** {uploaded_file.size / (1024 * 1024):.2f} MB") + + # save file option + if st.button("Upload to Data Warehouse"): + upload_to_minio(uploaded_file, uploaded_file.name) + +if __name__ == "__main__": main() \ No newline at end of file diff --git a/File Upload Service/streamlitdw/File upload service files/data-lakehouse/app/requirements.txt b/File Upload Service/app/requirements.txt similarity index 100% rename from File Upload Service/streamlitdw/File upload service files/data-lakehouse/app/requirements.txt rename to File Upload Service/app/requirements.txt diff --git a/File Upload Service/app/streamlitdw_fe.py b/File Upload Service/app/streamlitdw_fe.py new file mode 100644 index 0000000..93e62f7 --- /dev/null +++ b/File Upload Service/app/streamlitdw_fe.py @@ -0,0 +1,78 @@ +import streamlit as st +from minio import Minio +from minio.error import S3Error +from dotenv import load_dotenv +import io +import os +import datetime + +# Load environment variables +load_dotenv() + +# Check the environment variables +access_key = os.getenv('AWS_ACCESS_KEY_ID') +secret_key = os.getenv('AWS_SECRET_ACCESS_KEY') + +# Set up MinIO client using the loaded environment variables +minio_client = Minio( + "10.137.0.149:9000", # MinIO server address + access_key=access_key, + secret_key=secret_key, + secure=False +) + +bucket_name = "file-upload-service-sl" + +def validate_filename(name): + # Ensure the name is alphanumeric (you can expand this with more rules if needed) + return name.isalnum() + +def generate_custom_filename(project, base_name, original_filename): + # Extract file extension + file_extension = original_filename.split(".")[-1] + # Generate a custom name with the project prefix, base name, and a date (YYYYMMDD) + date_stamp = datetime.datetime.now().strftime("%Y%m%d") + custom_filename = f"{project}/{base_name}_{date_stamp}.{file_extension}" # Use project as folder prefix + return custom_filename + +def upload_to_minio(file, filename): + try: + # Convert the uploaded file to bytes + data = file.read() + file_stream = io.BytesIO(data) + + # Upload file to MinIO, using filename with the project prefix as the object name + minio_client.put_object( + bucket_name, filename, file_stream, len(data) + ) + st.success(f"File {filename} uploaded successfully to Data Warehouse.") + except S3Error as e: + st.error(f"Failed to upload {filename} to Data Warehouse: {e}") + +def main(): + st.title("File Upload to Redback Data Warehouse Server") + + # Project selection dropdown + project = st.selectbox("Select Project", options=["project1", "project2", "project3", "project4", "project5", "other"]) + + # File uploader + uploaded_file = st.file_uploader("Choose a file", type=["csv", "txt", "xlsx", "json"]) + + if uploaded_file is not None: + base_name = st.text_input("Enter base name for the file:") + + if base_name and validate_filename(base_name): + # Generate the custom filename with the project prefix + custom_filename = generate_custom_filename(project, base_name, uploaded_file.name) + # Display file details + st.write(f"**Filename:** {custom_filename}") + st.write(f"**File type:** {uploaded_file.type}") + st.write(f"**File size:** {uploaded_file.size / (1024 * 1024):.2f} MB") + + if st.button("Upload to Data Warehouse"): + upload_to_minio(uploaded_file, custom_filename) + else: + st.warning("Please enter a valid base name. Only alphanumeric characters are allowed.") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/File Upload Service/streamlitdw/File upload service files/data-lakehouse/data-lakehouse2.yml b/File Upload Service/data-lakehouse2.yml similarity index 100% rename from File Upload Service/streamlitdw/File upload service files/data-lakehouse/data-lakehouse2.yml rename to File Upload Service/data-lakehouse2.yml diff --git a/File Upload Service/streamlitdw/File upload service files/data-lakehouse/docker-compose.yml b/File Upload Service/docker-compose.yml similarity index 100% rename from File Upload Service/streamlitdw/File upload service files/data-lakehouse/docker-compose.yml rename to File Upload Service/docker-compose.yml diff --git a/File Upload Service/streamlitdw/File upload service files/data-lakehouse/flask/Dockerfile b/File Upload Service/flask/Dockerfile similarity index 100% rename from File Upload Service/streamlitdw/File upload service files/data-lakehouse/flask/Dockerfile rename to File Upload Service/flask/Dockerfile diff --git a/File Upload Service/streamlitdw/File upload service files/data-lakehouse/flask/flaskapi_dw.py b/File Upload Service/flask/flaskapi_dw.py similarity index 96% rename from File Upload Service/streamlitdw/File upload service files/data-lakehouse/flask/flaskapi_dw.py rename to File Upload Service/flask/flaskapi_dw.py index c9164f5..fd3bbb3 100644 --- a/File Upload Service/streamlitdw/File upload service files/data-lakehouse/flask/flaskapi_dw.py +++ b/File Upload Service/flask/flaskapi_dw.py @@ -1,48 +1,48 @@ -from flask import Flask, jsonify, send_file, Response -from minio import Minio -from minio.error import S3Error -from dotenv import load_dotenv -import os -import io - -app = Flask(__name__) - -# minio details -MINIO_URL = "10.137.0.149:9000" -ACCESS_KEY = os.getenv('AWS_ACCESS_KEY_ID') -SECRET_KEY = os.getenv('AWS_SECRET_ACCESS_KEY') -BUCKET_NAME = "file-upload-service-sl" - -# Initialise Mini -minio_client = Minio( - MINIO_URL, - access_key=ACCESS_KEY, - secret_key=SECRET_KEY, - secure=False -) - -# Endpoint to list files in the bucket -@app.route('/list-files', methods=['GET']) -def list_files(): - try: - objects = minio_client.list_objects(BUCKET_NAME) - files = [obj.object_name for obj in objects] - return jsonify(files) - except S3Error as err: - return jsonify({"error": str(err)}), 500 - -# Endpoint to download a file from the bucket -@app.route('/download-file/', methods=['GET']) -def download_file(filename): - try: - data = minio_client.get_object(BUCKET_NAME, filename) - return send_file( - io.BytesIO(data.read()), - attachment_filename=filename, - as_attachment=True - ) - except S3Error as err: - return jsonify({"error": str(err)}), 500 - -if __name__ == '__main__': +from flask import Flask, jsonify, send_file, Response +from minio import Minio +from minio.error import S3Error +from dotenv import load_dotenv +import os +import io + +app = Flask(__name__) + +# minio details +MINIO_URL = "10.137.0.149:9000" +ACCESS_KEY = os.getenv('AWS_ACCESS_KEY_ID') +SECRET_KEY = os.getenv('AWS_SECRET_ACCESS_KEY') +BUCKET_NAME = "file-upload-service-sl" + +# Initialise Mini +minio_client = Minio( + MINIO_URL, + access_key=ACCESS_KEY, + secret_key=SECRET_KEY, + secure=False +) + +# Endpoint to list files in the bucket +@app.route('/list-files', methods=['GET']) +def list_files(): + try: + objects = minio_client.list_objects(BUCKET_NAME) + files = [obj.object_name for obj in objects] + return jsonify(files) + except S3Error as err: + return jsonify({"error": str(err)}), 500 + +# Endpoint to download a file from the bucket +@app.route('/download-file/', methods=['GET']) +def download_file(filename): + try: + data = minio_client.get_object(BUCKET_NAME, filename) + return send_file( + io.BytesIO(data.read()), + attachment_filename=filename, + as_attachment=True + ) + except S3Error as err: + return jsonify({"error": str(err)}), 500 + +if __name__ == '__main__': app.run(host='0.0.0.0', port=5000) # runnning on 5000 \ No newline at end of file diff --git a/File Upload Service/streamlitdw/File upload service files/data-lakehouse/flask/requirements.txt b/File Upload Service/flask/requirements.txt similarity index 100% rename from File Upload Service/streamlitdw/File upload service files/data-lakehouse/flask/requirements.txt rename to File Upload Service/flask/requirements.txt diff --git a/Tutorial b/Tutorial deleted file mode 160000 index 811d9b5..0000000 --- a/Tutorial +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 811d9b55f79bfab4ea1f74d5413a8418f6f0700b diff --git a/gitignore.txt b/gitignore.txt new file mode 100644 index 0000000..4c49bd7 --- /dev/null +++ b/gitignore.txt @@ -0,0 +1 @@ +.env