diff --git a/README.md b/README.md index 05f7f52..2d475e1 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ export DBPORT=5432 ``` #### Snowflake -To setup the data in snowflake, you would need to have the snowflake cli installed ([instructions](https://docs.snowflake.com/en/user-guide/snowsql-install-config)), and have your credentials configured as per the [docs](https://docs.snowflake.com/en/user-guide/snowsql-config). You can then run the following command to setup the data: +To set up the data in snowflake, you would need to have the snowflake cli installed ([instructions](https://docs.snowflake.com/en/user-guide/snowsql-install-config)), and have your credentials configured as per the [docs](https://docs.snowflake.com/en/user-guide/snowsql-config). You can then run the following command to setup the data: ```sh ./setup_snowflake.sh ``` @@ -24,6 +24,15 @@ This will create 1 database per database in the repo as before, using `public` a Note that the same sql files work for both the postgres and snowflake databases, so you can use the same sql files to setup both databases. +#### BigQuery, MySQL, SQLite, SQL Server +To set up the data in these systems, you would need your credentials to be configured in `utils_dialects`. You can then run the following command to set up the databases: +``` +python translate_ddl_dialect.py +``` +This will create one new SQL file per database per dialect. +For SQLite, the `.db` files will be saved in the folder `sqlite_dbs`. +Note that BigQuery, MySQL and SQLite do not support schemas and hence the SQL files will be modified to skip schema creation. + ### Python Library This is the recommended way to access the schema from the json files in a python environment. To use the python library in your code, navigate to this repository and install it using pip: diff --git a/defog_data/advising/advising.sql b/defog_data/advising/advising.sql index d14533b..e5b1521 100644 --- a/defog_data/advising/advising.sql +++ b/defog_data/advising/advising.sql @@ -52,7 +52,7 @@ CREATE TABLE public.course_offering ( has_final_exam boolean, textbook text, class_address text, - allow_audit text DEFAULT 'n'::text + allow_audit text DEFAULT 'false'::text ); @@ -176,21 +176,21 @@ INSERT INTO public.comment_instructor (instructor_id, student_id, score, comment ; INSERT INTO public.course (course_id, name, department, number, credits, advisory_requirement, enforced_requirement, description, num_semesters, num_enrolled, has_discussion, has_lab, has_projects, has_exams, num_reviews, clarity_score, easiness_score, helpfulness_score) VALUES -(1, 'Introduction to Computer Science', 'Computer Science', 'CS101', 3, NULL, NULL, 'This course introduces the basics of computer science.', 2, 2, true, false, true, false, 10, 5, 3, 4), -(2, 'Advanced Calculus', 'Mathematics', 'MATH201', 4, 'CS101', NULL, 'This course covers advanced topics in calculus.', 1, 3, false, false, true, true, 5, 4, 2, 3), -(3, 'Introduction to Physics', 'Physics', 'PHYS101', 3, NULL, 'MATH201', 'This course provides an introduction to physics principles.', 2, 1, true, true, true, true, 8, 4, 3, 5), -(4, 'Distributed Databases', 'Computer Science', 'CS302', 3, NULL, 'CS101', 'This course provides an introduction to distributed databases.', 2, 2, true, true, false, true, 4, 2, 1, 5) +(1, 'Introduction to Computer Science', 'Computer Science', 'CS101', '3', NULL, NULL, 'This course introduces the basics of computer science.', 2, 2, true, false, true, false, 10, 5, 3, 4), +(2, 'Advanced Calculus', 'Mathematics', 'MATH201', '4', 'CS101', NULL, 'This course covers advanced topics in calculus.', 1, 3, false, false, true, true, 5, 4, 2, 3), +(3, 'Introduction to Physics', 'Physics', 'PHYS101', '3', NULL, 'MATH201', 'This course provides an introduction to physics principles.', 2, 1, true, true, true, true, 8, 4, 3, 5), +(4, 'Distributed Databases', 'Computer Science', 'CS302', '3', NULL, 'CS101', 'This course provides an introduction to distributed databases.', 2, 2, true, true, false, true, 4, 2, 1, 5) ; INSERT INTO public.course_offering (offering_id, course_id, semester, section_number, start_time, end_time, monday, tuesday, wednesday, thursday, friday, saturday, sunday, has_final_project, has_final_exam, textbook, class_address, allow_audit) VALUES -(1, 1, 1, 1, '08:00:00', '10:00:00', 'John Smith', NULL, NULL, 'Jane Doe', NULL, NULL, NULL, true, false, 'Introduction to Computer Science', '123 Main St', true), -(2, 2, 1, 1, '10:00:00', '12:00:00', NULL, NULL, 'Gilbert Strang', NULL, NULL, NULL, NULL, true, true, 'Advanced Calculus', '456 Elm St', false), -(3, 3, 2, 1, '08:00:00', '10:00:00', 'John Smith', NULL, NULL, 'Jane Doe', NULL, NULL, NULL, false, true, 'Introduction to Physics', '789 Oak St', true), -(4, 4, 2, 1, '16:00:00', '18:00:00', NULL, NULL, 'John Smith', 'Brendan Burns', NULL, NULL, NULL, false, true, 'Distributed Systems', '789 Oak St', true), -(5, 1, 3, 1, '08:00:00', '10:00:00', NULL, 'John Smith', 'Jane Doe', NULL, NULL, NULL, NULL, true, false, 'Introduction to Computer Science', '123 Main St', true), -(6, 2, 3, 1, '10:00:00', '12:00:00', 'Gilbert Strang', NULL, NULL, NULL, NULL, NULL, NULL, true, true, 'Advanced Calculus', '456 Elm St', false), -(7, 3, 4, 1, '14:00:00', '16:00:00', NULL, NULL, 'Jane Doe', NULL, 'John Smith', NULL, NULL, false, true, 'Introduction to Physics', '789 Oak St', true), -(8, 4, 4, 1, '16:00:00', '18:00:00', NULL, NULL, 'John Smith', NULL, 'Brendan Burns', NULL, NULL, false, true, 'Distributed Systems', '789 Oak St', true) +(1, 1, 1, 1, '08:00:00', '10:00:00', 'John Smith', NULL, NULL, 'Jane Doe', NULL, NULL, NULL, true, false, 'Introduction to Computer Science', '123 Main St', 'true'), +(2, 2, 1, 1, '10:00:00', '12:00:00', NULL, NULL, 'Gilbert Strang', NULL, NULL, NULL, NULL, true, true, 'Advanced Calculus', '456 Elm St', 'false'), +(3, 3, 2, 1, '08:00:00', '10:00:00', 'John Smith', NULL, NULL, 'Jane Doe', NULL, NULL, NULL, false, true, 'Introduction to Physics', '789 Oak St', 'true'), +(4, 4, 2, 1, '16:00:00', '18:00:00', NULL, NULL, 'John Smith', 'Brendan Burns', NULL, NULL, NULL, false, true, 'Distributed Systems', '789 Oak St', 'true'), +(5, 1, 3, 1, '08:00:00', '10:00:00', NULL, 'John Smith', 'Jane Doe', NULL, NULL, NULL, NULL, true, false, 'Introduction to Computer Science', '123 Main St', 'true'), +(6, 2, 3, 1, '10:00:00', '12:00:00', 'Gilbert Strang', NULL, NULL, NULL, NULL, NULL, NULL, true, true, 'Advanced Calculus', '456 Elm St', 'false'), +(7, 3, 4, 1, '14:00:00', '16:00:00', NULL, NULL, 'Jane Doe', NULL, 'John Smith', NULL, NULL, false, true, 'Introduction to Physics', '789 Oak St', 'true'), +(8, 4, 4, 1, '16:00:00', '18:00:00', NULL, NULL, 'John Smith', NULL, 'Brendan Burns', NULL, NULL, false, true, 'Distributed Systems', '789 Oak St', 'true') ; INSERT INTO public.course_prerequisite (pre_course_id, course_id) VALUES @@ -263,12 +263,12 @@ INSERT INTO public.student (student_id, lastname, firstname, program_id, declare ; INSERT INTO public.student_record (student_id, course_id, semester, grade, how, transfer_source, earn_credit, repeat_term, test_id, offering_id) VALUES -(1, 1, 1, 'A', 'in-person', NULL, 'Yes', NULL, 1, 1), -(1, 2, 1, 'A', 'in-person', NULL, 'Yes', NULL, 1, 2), -(1, 3, 2, 'A', 'in-person', NULL, 'Yes', NULL, 1, 3), -(1, 4, 2, 'A', 'in-person', NULL, 'Yes', NULL, 1, 4), -(2, 2, 1, 'C', 'in-person', NULL, 'Yes', NULL, 1, 2), -(2, 1, 1, 'B', 'online', NULL, 'Yes', NULL, 1, 1), -(3, 2, 1, 'B+', 'in-person', NULL, 'Yes', NULL, 1, 2), -(3, 4, 2, 'B+', 'in-person', NULL, 'Yes', NULL, 1, 4) +(1, 1, 1, 'A', 'in-person', NULL, 'Yes', NULL, '1', 1), +(1, 2, 1, 'A', 'in-person', NULL, 'Yes', NULL, '1', 2), +(1, 3, 2, 'A', 'in-person', NULL, 'Yes', NULL, '1', 3), +(1, 4, 2, 'A', 'in-person', NULL, 'Yes', NULL, '1', 4), +(2, 2, 1, 'C', 'in-person', NULL, 'Yes', NULL, '1', 2), +(2, 1, 1, 'B', 'online', NULL, 'Yes', NULL, '1', 1), +(3, 2, 1, 'B+', 'in-person', NULL, 'Yes', NULL, '1', 2), +(3, 4, 2, 'B+', 'in-person', NULL, 'Yes', NULL, '1', 4) ; diff --git a/defog_data/atis/atis.sql b/defog_data/atis/atis.sql index 3b14c29..a501abe 100644 --- a/defog_data/atis/atis.sql +++ b/defog_data/atis/atis.sql @@ -327,13 +327,13 @@ INSERT INTO public.compartment_class (compartment, class_type) VALUES ; INSERT INTO public.days (days_code, day_name) VALUES -(1, 'Monday'), -(2, 'Tuesday'), -(3, 'Wednesday'), -(4, 'Thursday'), -(5, 'Friday'), -(6, 'Saturday'), -(7, 'Sunday') +('1', 'Monday'), +('2', 'Tuesday'), +('3', 'Wednesday'), +('4', 'Thursday'), +('5', 'Friday'), +('6', 'Saturday'), +('7', 'Sunday') ; INSERT INTO public.dual_carrier (main_airline, low_flight_number, high_flight_number, dual_airline, service_name) VALUES @@ -350,16 +350,16 @@ INSERT INTO public.dual_carrier (main_airline, low_flight_number, high_flight_nu ; INSERT INTO public.equipment_sequence (aircraft_code_sequence, aircraft_code) VALUES -(1, 'B747'), -(2, 'A320'), -(3, 'B737'), -(4, 'A380'), -(5, 'B777'), -(6, 'A330'), -(7, 'B787'), -(8, 'A350'), -(9, 'E190'), -(10, 'CRJ200') +('1', 'B747'), +('2', 'A320'), +('3', 'B737'), +('4', 'A380'), +('5', 'B777'), +('6', 'A330'), +('7', 'B787'), +('8', 'A350'), +('9', 'E190'), +('10', 'CRJ200') ; INSERT INTO public.fare (fare_id, from_airport, to_airport, fare_basis_code, fare_airline, restriction_code, one_direction_cost, round_trip_cost, round_trip_required) VALUES @@ -376,29 +376,29 @@ INSERT INTO public.fare (fare_id, from_airport, to_airport, fare_basis_code, far ; INSERT INTO public.fare_basis (fare_basis_code, booking_class, class_type, premium, economy, discounted, night, season, basis_days) VALUES -('ABC', 'First', 'First Class', 'Yes', 'No', 'No', 'No', 'Regular', 30), -('DEF', 'Business', 'Business Class', 'Yes', 'No', 'No', 'No', 'Regular', 30), -('GHI', 'Economy', 'Economy Class', 'No', 'Yes', 'Yes', 'No', 'Regular', 30), -('JKL', 'First', 'First Class', 'Yes', 'No', 'No', 'No', 'Regular', 30), -('MNO', 'Business', 'Business Class', 'Yes', 'No', 'No', 'No', 'Regular', 30), -('PQR', 'Economy', 'Economy Class', 'No', 'Yes', 'Yes', 'No', 'Regular', 30), -('STU', 'First', 'First Class', 'Yes', 'No', 'No', 'No', 'Regular', 30), -('VWX', 'Business', 'Business Class', 'Yes', 'No', 'No', 'No', 'Regular', 30), -('YZ', 'Economy', 'Economy Class', 'No', 'Yes', 'Yes', 'No', 'Regular', 30), -('AAA', 'First', 'First Class', 'Yes', 'No', 'No', 'No', 'Regular', 30) +('ABC', 'First', 'First Class', 'Yes', 'No', 'No', 'No', 'Regular', '30'), +('DEF', 'Business', 'Business Class', 'Yes', 'No', 'No', 'No', 'Regular', '30'), +('GHI', 'Economy', 'Economy Class', 'No', 'Yes', 'Yes', 'No', 'Regular', '30'), +('JKL', 'First', 'First Class', 'Yes', 'No', 'No', 'No', 'Regular', '30'), +('MNO', 'Business', 'Business Class', 'Yes', 'No', 'No', 'No', 'Regular', '30'), +('PQR', 'Economy', 'Economy Class', 'No', 'Yes', 'Yes', 'No', 'Regular', '30'), +('STU', 'First', 'First Class', 'Yes', 'No', 'No', 'No', 'Regular', '30'), +('VWX', 'Business', 'Business Class', 'Yes', 'No', 'No', 'No', 'Regular', '30'), +('YZ', 'Economy', 'Economy Class', 'No', 'Yes', 'Yes', 'No', 'Regular', '30'), +('AAA', 'First', 'First Class', 'Yes', 'No', 'No', 'No', 'Regular', '30') ; INSERT INTO public.flight (flight_id, flight_days, from_airport, to_airport, departure_time, arrival_time, airline_flight, airline_code, flight_number, aircraft_code_sequence, meal_code, stops, connections, dual_carrier, time_elapsed) VALUES -(1, 'mon,wed', 'ORD', 'JFK', 1577836800, 1577840400, 'AA123', 'AA', 'AA123', 1, 'BF', 0, 0, 'AA123', 3600), -(2, 'tue,thu', 'ORD', 'JFK', 1577844000, 1577854000, 'UA456', 'UA', 'UA456', 2, 'LN', 1, 1, 'UA456', 10000), -(3, 'wed', 'ORD', 'JFK', 1577851200, 1577854900, 'AA789', 'AA', 'AA789', 3, 'DN', 0, 0, 'AA789', 3700), -(4, 'thu', 'ORD', 'JFK', 1577858400, 1577873400, 'WN012', 'WN', 'WN012', 4, 'BS', 1, 1, 'WN012', 15000), -(5, 'fri', 'ORD', 'LAX', 1577865600, 1577869600, 'AS345', 'AS', 'AS345', 5, 'BF', 0, 0, 'AS345', 4000), -(6, 'sat,mon', 'JFK', 'ORD', 1577872800, 1577884800, 'AA124', 'AA', 'AA123', 6, 'LN', 1, 1, 'B678', 12000), -(7, 'sun', 'JFK', 'ORD', 1577880000, 1577883700, 'UA457', 'UA', 'UA457', 7, 'DN', 0, 0, 'UA457', 3700), -(8, 'mon', 'JFK', 'LAX', 1577887200, 1577897200, 'F934', 'F9', 'F934', 8, 'BS', 1, 1, 'F934', 10000), -(9, 'tue', 'LAX', 'ORD', 1577894400, 1577898400, 'HA567', 'HA', 'HA567', 9, 'LS', 0, 0, 'HA567', 4000), -(10, 'wed,mon', 'LAX', 'ORD', 1577901600, 1577921600, 'VX890', 'VX', 'VX890', 10, 'DS', 1, 1, 'VX890', 20000) +(1, 'mon,wed', 'ORD', 'JFK', 1577836800, 1577840400, 'AA123', 'AA', 'AA123', '1', 'BF', 0, 0, 'AA123', 3600), +(2, 'tue,thu', 'ORD', 'JFK', 1577844000, 1577854000, 'UA456', 'UA', 'UA456', '2', 'LN', 1, 1, 'UA456', 10000), +(3, 'wed', 'ORD', 'JFK', 1577851200, 1577854900, 'AA789', 'AA', 'AA789', '3', 'DN', 0, 0, 'AA789', 3700), +(4, 'thu', 'ORD', 'JFK', 1577858400, 1577873400, 'WN012', 'WN', 'WN012', '4', 'BS', 1, 1, 'WN012', 15000), +(5, 'fri', 'ORD', 'LAX', 1577865600, 1577869600, 'AS345', 'AS', 'AS345', '5', 'BF', 0, 0, 'AS345', 4000), +(6, 'sat,mon', 'JFK', 'ORD', 1577872800, 1577884800, 'AA124', 'AA', 'AA123', '6', 'LN', 1, 1, 'B678', 12000), +(7, 'sun', 'JFK', 'ORD', 1577880000, 1577883700, 'UA457', 'UA', 'UA457', '7', 'DN', 0, 0, 'UA457', 3700), +(8, 'mon', 'JFK', 'LAX', 1577887200, 1577897200, 'F934', 'F9', 'F934', '8', 'BS', 1, 1, 'F934', 10000), +(9, 'tue', 'LAX', 'ORD', 1577894400, 1577898400, 'HA567', 'HA', 'HA567', '9', 'LS', 0, 0, 'HA567', 4000), +(10, 'wed,mon', 'LAX', 'ORD', 1577901600, 1577921600, 'VX890', 'VX', 'VX890', '10', 'DS', 1, 1, 'VX890', 20000) ; INSERT INTO public.flight_fare (flight_id, fare_id) VALUES @@ -428,11 +428,11 @@ INSERT INTO public.flight_leg (flight_id, leg_number, leg_flight) VALUES ; INSERT INTO public.flight_stop (flight_id, stop_number, stop_days, stop_airport, arrival_time, arrival_airline, arrival_flight_number, departure_time, departure_airline, departure_flight_number, stop_time) VALUES -(2, 1, 2, 'DFW', 1577847600, 'UA', 'UA456', 1577851200, 'UA', 'UA456', 3600), -(4, 1, 4, 'DEN', 1577862000, 'WN', 'WN012', 1577865600, 'WN', 'WN012', 3600), -(6, 1, 6, 'DFW', 1577876400, 'AA', 'AA123', 1577880000, 'AA', 'AA123', 3600), -(8, 1, 1, 'LAX', 1577890800, 'F9', 'F934', 1577894400, 'F9', 'F934', 3600), -(10, 1, 3, 'JFK', 1577905200, 'VX', 'VX890', 1577908800, 'VX', 'VX890', 3600) +(2, 1, '2', 'DFW', 1577847600, 'UA', 'UA456', 1577851200, 'UA', 'UA456', 3600), +(4, 1, '4', 'DEN', 1577862000, 'WN', 'WN012', 1577865600, 'WN', 'WN012', 3600), +(6, 1, '6', 'DFW', 1577876400, 'AA', 'AA123', 1577880000, 'AA', 'AA123', 3600), +(8, 1, '1', 'LAX', 1577890800, 'F9', 'F934', 1577894400, 'F9', 'F934', 3600), +(10, 1, '3', 'JFK', 1577905200, 'VX', 'VX890', 1577908800, 'VX', 'VX890', 3600) ; INSERT INTO public.food_service (meal_code, meal_number, compartment, meal_description) VALUES @@ -471,16 +471,16 @@ INSERT INTO public.month (month_number, month_name) VALUES ; INSERT INTO public.restriction (restriction_code, advance_purchase, stopovers, saturday_stay_required, minimum_stay, maximum_stay, application, no_discounts) VALUES -('NONE', 14, 2, 'No', 7, 30, 'One-Way', 'Yes'), -('NONE', 14, 2, 'No', 7, 30, 'One-Way', 'Yes'), -('NONE', 14, 2, 'No', 7, 30, 'One-Way', 'Yes'), -('NONE', 14, 2, 'No', 7, 30, 'One-Way', 'Yes'), -('NONE', 14, 2, 'No', 7, 30, 'One-Way', 'Yes'), -('NONE', 14, 2, 'No', 7, 30, 'One-Way', 'Yes'), -('NONE', 14, 2, 'No', 7, 30, 'One-Way', 'Yes'), -('NONE', 14, 2, 'No', 7, 30, 'One-Way', 'Yes'), -('NONE', 14, 2, 'No', 7, 30, 'One-Way', 'Yes'), -('NONE', 14, 2, 'No', 7, 30, 'One-Way', 'Yes') +('NONE', 14, '2', 'No', 7, 30, 'One-Way', 'Yes'), +('NONE', 14, '2', 'No', 7, 30, 'One-Way', 'Yes'), +('NONE', 14, '2', 'No', 7, 30, 'One-Way', 'Yes'), +('NONE', 14, '2', 'No', 7, 30, 'One-Way', 'Yes'), +('NONE', 14, '2', 'No', 7, 30, 'One-Way', 'Yes'), +('NONE', 14, '2', 'No', 7, 30, 'One-Way', 'Yes'), +('NONE', 14, '2', 'No', 7, 30, 'One-Way', 'Yes'), +('NONE', 14, '2', 'No', 7, 30, 'One-Way', 'Yes'), +('NONE', 14, '2', 'No', 7, 30, 'One-Way', 'Yes'), +('NONE', 14, '2', 'No', 7, 30, 'One-Way', 'Yes') ; INSERT INTO public.state (state_code, state_name, country_name) VALUES diff --git a/defog_data/broker/broker.sql b/defog_data/broker/broker.sql index 52a1a98..f697f8f 100644 --- a/defog_data/broker/broker.sql +++ b/defog_data/broker/broker.sql @@ -73,12 +73,12 @@ INSERT INTO sbCustomer (sbCustId, sbCustName, sbCustEmail, sbCustPhone, sbCustAd ('C012', 'Olivia Johnson', 'olivia.johnson@email.com', '555-987-6543', '321 Elm St', 'Newtown', 'NJ', 'USA', '08801', '2023-01-05', 'active'), ('C013', 'Ethan Davis', 'ethan.davis@email.com', '555-246-8135', '654 Oak Ave', 'Someville', 'NY', 'USA', '10002', '2023-02-12', 'active'), ('C014', 'Ava Wilson', 'ava.wilson@email.com', '555-135-7902', '987 Pine Rd', 'Anytown', 'CA', 'USA', '90001', '2023-03-20', 'active'), -('C015', 'Emma Brown', 'emma.brown@email.com', '555-987-6543', '789 Oak St', 'Newtown', 'NJ', 'USA', '08801', DATE_TRUNC('month', NOW()) - INTERVAL '5 months', 'active'), -('C016', 'sophia martinez', 'sophia.martinez@email.com', '555-246-8135', '159 Elm Ave', 'Anytown', 'CA', 'USA', '90001', DATE_TRUNC('month', NOW()) - INTERVAL '4 months', 'active'), -('C017', 'Jacob Taylor', 'jacob.taylor@email.com', '555-135-7902', '753 Pine Rd', 'Someville', 'NY', 'USA', '10002', DATE_TRUNC('month', NOW()) - INTERVAL '3 months', 'active'), -('C018', 'Michael Anderson', 'michael.anderson@email.com', '555-864-2319', '321 Cedar Ln', 'Yourtown', 'CA', 'USA', '92101', DATE_TRUNC('month', NOW()) - INTERVAL '2 months', 'active'), -('C019', 'Isabella Thompson', 'isabella.thompson@email.com', '555-753-1904', '987 Maple Dr', 'Anothertown', 'FL', 'USA', '33101', DATE_TRUNC('month', NOW()) - INTERVAL '1 month', 'active'), -('C020', 'Maurice Lee', 'maurice.lee@email.com', '555-370-2648', '654 Oak St', 'Mytown', 'TX', 'USA', '75000', DATE_TRUNC('month', NOW()), 'active'); +('C015', 'Emma Brown', 'emma.brown@email.com', '555-987-6543', '789 Oak St', 'Newtown', 'NJ', 'USA', '08801', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '5 months', 'active'), +('C016', 'sophia martinez', 'sophia.martinez@email.com', '555-246-8135', '159 Elm Ave', 'Anytown', 'CA', 'USA', '90001', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '4 months', 'active'), +('C017', 'Jacob Taylor', 'jacob.taylor@email.com', '555-135-7902', '753 Pine Rd', 'Someville', 'NY', 'USA', '10002', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '3 months', 'active'), +('C018', 'Michael Anderson', 'michael.anderson@email.com', '555-864-2319', '321 Cedar Ln', 'Yourtown', 'CA', 'USA', '92101', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '2 months', 'active'), +('C019', 'Isabella Thompson', 'isabella.thompson@email.com', '555-753-1904', '987 Maple Dr', 'Anothertown', 'FL', 'USA', '33101', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 month', 'active'), +('C020', 'Maurice Lee', 'maurice.lee@email.com', '555-370-2648', '654 Oak St', 'Mytown', 'TX', 'USA', '75000', DATE_TRUNC('month', CURRENT_DATE), 'active'); -- sbTicker diff --git a/defog_data/car_dealership/car_dealership.sql b/defog_data/car_dealership/car_dealership.sql index c54f6b0..ce7971c 100644 --- a/defog_data/car_dealership/car_dealership.sql +++ b/defog_data/car_dealership/car_dealership.sql @@ -103,18 +103,18 @@ VALUES -- salespersons INSERT INTO salespersons (first_name, last_name, email, phone, hire_date) VALUES - ('John', 'Doe', 'john.doe@autonation.com', '(555)-123-4567', NOW() - INTERVAL '2 years'), - ('Jane', 'Smith', 'jane.smith@autonation.com', '(415)-987-6543', NOW() - INTERVAL '3 years'), - ('Michael', 'Johnson', 'michael.johnson@autonation.com', '(555)-456-7890', NOW() - INTERVAL '1 year'), - ('Emily', 'Brown', 'emily.brown@sonicauto.com', '(444)-111-2222', NOW() - INTERVAL '1 year'), - ('David', 'Wilson', 'david.wilson@sonicauto.com', '(444)-333-4444', NOW() - INTERVAL '2 years'), + ('John', 'Doe', 'john.doe@autonation.com', '(555)-123-4567', CURRENT_DATE - INTERVAL '2 years'), + ('Jane', 'Smith', 'jane.smith@autonation.com', '(415)-987-6543', CURRENT_DATE - INTERVAL '3 years'), + ('Michael', 'Johnson', 'michael.johnson@autonation.com', '(555)-456-7890', CURRENT_DATE - INTERVAL '1 year'), + ('Emily', 'Brown', 'emily.brown@sonicauto.com', '(444)-111-2222', CURRENT_DATE - INTERVAL '1 year'), + ('David', 'Wilson', 'david.wilson@sonicauto.com', '(444)-333-4444', CURRENT_DATE - INTERVAL '2 years'), ('Sarah', 'Taylor', 'sarah.taylor@sonicauto.com', '(123)-555-6666', '2018-09-01'), ('Daniel', 'Anderson', 'daniel.anderson@sonicauto.com', '(555)-777-8888', '2021-07-12'), ('Olivia', 'Thomas', 'olivia.thomas@pensake.com', '(333)-415-0000', '2023-01-25'), ('James', 'Jackson', 'james.jackson@pensake.com', '(555)-212-3333', '2019-04-30'), ('Sophia', 'White', 'sophia.white@pensake.com', '(555)-444-5555', '2022-08-18'), - ('Robert', 'Johnson', 'robert.johnson@pensake.com', '(001)-415-5678', NOW() - INTERVAL '15 days'), - ('Jennifer', 'Davis', 'jennifer.davis@directauto.com', '(555)-345-6789', NOW() - INTERVAL '20 days'), + ('Robert', 'Johnson', 'robert.johnson@pensake.com', '(001)-415-5678', CURRENT_DATE - INTERVAL '15 days'), + ('Jennifer', 'Davis', 'jennifer.davis@directauto.com', '(555)-345-6789', CURRENT_DATE - INTERVAL '20 days'), ('Jessica', 'Rodriguez', 'jessica.rodriguez@directauto.com', '(555)-789-0123', '2022-06-01'); -- customers @@ -147,18 +147,18 @@ VALUES (7, 2, 10, 60000.00, '2023-04-10'), (4, 6, 8, 40000.00, '2023-04-12'), (9, 2, 4, 44500.00, '2023-04-15'), - (1, 7, 11, 28000.00, NOW() - INTERVAL '32 days'), - (3, 3, 12, 43500.00, NOW() - INTERVAL '10 days'), - (6, 1, 11, 24000.00, NOW() - INTERVAL '15 days'), - (2, 3, 1, 17200.00, NOW() - INTERVAL '21 days'), - (8, 6, 12, 37500.00, NOW() - INTERVAL '3 days'), - (10, 4, 2, 26500.00, NOW() - INTERVAL '5 days'), - (3, 2, 3, 115000.00, DATE_TRUNC('week', NOW() - INTERVAL '1 week') + INTERVAL '1 day'), - (3, 2, 7, 115000.00, DATE_TRUNC('week', NOW() - INTERVAL '1 week')), - (3, 2, 10, 115000.00, DATE_TRUNC('week', NOW() - INTERVAL '1 week') - INTERVAL '1 day'), - (4, 1, 3, 115000.00, DATE_TRUNC('week', NOW() - INTERVAL '8 week') + INTERVAL '1 day'), - (4, 1, 7, 115000.00, DATE_TRUNC('week', NOW() - INTERVAL '8 week')), - (4, 1, 10, 115000.00, DATE_TRUNC('week', NOW() - INTERVAL '8 week') - INTERVAL '1 day'); + (1, 7, 11, 28000.00, CURRENT_DATE - INTERVAL '32 days'), + (3, 3, 12, 43500.00, CURRENT_DATE - INTERVAL '10 days'), + (6, 1, 11, 24000.00, CURRENT_DATE - INTERVAL '15 days'), + (2, 3, 1, 17200.00, CURRENT_DATE - INTERVAL '21 days'), + (8, 6, 12, 37500.00, CURRENT_DATE - INTERVAL '3 days'), + (10, 4, 2, 26500.00, CURRENT_DATE - INTERVAL '5 days'), + (3, 2, 3, 115000.00, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '1 week' + INTERVAL '1 day'), + (3, 2, 7, 115000.00, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '1 week'), + (3, 2, 10, 115000.00, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '1 week' - INTERVAL '1 day'), + (4, 1, 3, 115000.00, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '8 week' + INTERVAL '1 day'), + (4, 1, 7, 115000.00, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '8 week'), + (4, 1, 10, 115000.00, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '8 week' - INTERVAL '1 day'); -- inventory_snapshots @@ -199,18 +199,18 @@ VALUES (8, '2023-04-10', 60000.00, 'financing'), (9, '2023-04-12', 40000.00, 'check'), (10, '2023-04-15', 44500.00, 'credit_card'), - (11, NOW() - INTERVAL '30 days', 28000.00, 'cash'), - (12, NOW() - INTERVAL '3 days', 43500.00, 'credit_card'), - (13, NOW() - INTERVAL '6 days', 24000.00, 'debit_card'), - (14, NOW() - INTERVAL '1 days', 17200.00, 'financing'), - (15, NOW() - INTERVAL '1 days', 37500.00, 'credit_card'), - (16, NOW() - INTERVAL '5 days', 26500.00, 'debit_card'), - (17, DATE_TRUNC('week', NOW() - INTERVAL '1 week') + INTERVAL '1 day', 115000.00, 'financing'), - (18, DATE_TRUNC('week', NOW() - INTERVAL '1 week'), 115000.00, 'credit_card'), - (19, DATE_TRUNC('week', NOW() - INTERVAL '1 week') - INTERVAL '1 day', 115000.00, 'debit_card'), - (20, DATE_TRUNC('week', NOW() - INTERVAL '8 week') + INTERVAL '1 day', 115000.00, 'cash'), - (21, DATE_TRUNC('week', NOW() - INTERVAL '8 week'), 115000.00, 'check'), - (22, DATE_TRUNC('week', NOW() - INTERVAL '8 week') - INTERVAL '1 day', 115000.00, 'credit_card'); + (11, CURRENT_DATE - INTERVAL '30 days', 28000.00, 'cash'), + (12, CURRENT_DATE - INTERVAL '3 days', 43500.00, 'credit_card'), + (13, CURRENT_DATE - INTERVAL '6 days', 24000.00, 'debit_card'), + (14, CURRENT_DATE - INTERVAL '1 days', 17200.00, 'financing'), + (15, CURRENT_DATE - INTERVAL '1 days', 37500.00, 'credit_card'), + (16, CURRENT_DATE - INTERVAL '5 days', 26500.00, 'debit_card'), + (17, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '1 week' + INTERVAL '1 day', 115000.00, 'financing'), + (18, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '1 week', 115000.00, 'credit_card'), + (19, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '1 week' - INTERVAL '1 day', 115000.00, 'debit_card'), + (20, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '8 week' + INTERVAL '1 day', 115000.00, 'cash'), + (21, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '8 week', 115000.00, 'check'), + (22, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '8 week' - INTERVAL '1 day', 115000.00, 'credit_card'); -- payments_made INSERT INTO payments_made (vendor_name, payment_date, payment_amount, payment_method, invoice_number, invoice_date, due_date) @@ -227,8 +227,8 @@ VALUES ('Marketing Agency', '2023-04-20', 15000.00, 'credit_card', 'INV-010', '2023-04-15', '2023-05-15'), ('Insurance Provider', '2023-04-25', 5000.00, 'bank_transfer', 'INV-011', '2023-04-20', '2023-05-20'), ('Cleaning Service', '2023-04-30', 2000.00, 'check', 'INV-012', '2023-04-25', '2023-05-25'), - ('Toyota Auto Parts', NOW() - INTERVAL '5 days', 12500.00, 'bank_transfer', 'INV-013', NOW() - INTERVAL '10 days', NOW() + INTERVAL '20 days'), - ('Honda Manufacturing', NOW() - INTERVAL '3 days', 18000.00, 'check', 'INV-014', NOW() - INTERVAL '8 days', NOW() + INTERVAL '22 days'), - ('Ford Supplier Co', NOW() - INTERVAL '2 days', 22000.00, 'bank_transfer', 'INV-015', NOW() - INTERVAL '7 days', NOW() + INTERVAL '23 days'), - ('Tesla Parts Inc', NOW() - INTERVAL '1 day', 15000.00, 'credit_card', 'INV-016', NOW() - INTERVAL '6 days', NOW() + INTERVAL '24 days'), - ('Chevrolet Auto', NOW(), 20000.00, 'bank_transfer', 'INV-017', NOW() - INTERVAL '5 days', NOW() + INTERVAL '25 days'); \ No newline at end of file + ('Toyota Auto Parts', CURRENT_DATE - INTERVAL '5 days', 12500.00, 'bank_transfer', 'INV-013', CURRENT_DATE - INTERVAL '10 days', CURRENT_DATE + INTERVAL '20 days'), + ('Honda Manufacturing', CURRENT_DATE - INTERVAL '3 days', 18000.00, 'check', 'INV-014', CURRENT_DATE - INTERVAL '8 days', CURRENT_DATE + INTERVAL '22 days'), + ('Ford Supplier Co', CURRENT_DATE - INTERVAL '2 days', 22000.00, 'bank_transfer', 'INV-015', CURRENT_DATE - INTERVAL '7 days', CURRENT_DATE + INTERVAL '23 days'), + ('Tesla Parts Inc', CURRENT_DATE - INTERVAL '1 day', 15000.00, 'credit_card', 'INV-016', CURRENT_DATE - INTERVAL '6 days', CURRENT_DATE + INTERVAL '24 days'), + ('Chevrolet Auto', CURRENT_DATE, 20000.00, 'bank_transfer', 'INV-017', CURRENT_DATE - INTERVAL '5 days', CURRENT_DATE + INTERVAL '25 days'); \ No newline at end of file diff --git a/defog_data/derm_treatment/derm_treatment.sql b/defog_data/derm_treatment/derm_treatment.sql index c15f3de..de66a5b 100644 --- a/defog_data/derm_treatment/derm_treatment.sql +++ b/defog_data/derm_treatment/derm_treatment.sql @@ -181,13 +181,13 @@ VALUES (7, 6, 1, 3, '2023-01-01', '2023-06-30', false, 240, 'mg'), (1, 7, 2, 4, '2023-02-01', '2023-07-31', false, 180, 'mg'), (2, 1, 3, 5, '2023-03-01', '2023-08-31', false, 360, 'g'), -(1, 2, 4, 6, DATE_TRUNC('month', CURRENT_DATE - INTERVAL '2 year'), DATE_TRUNC('month', CURRENT_DATE - INTERVAL '2 months'), false, 300, 'mg'), -(2, 5, 1, 8, DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 year'), DATE_TRUNC('month', CURRENT_DATE - INTERVAL '4 months'), false, 80, 'mg'), -(3, 6, 2, 9, DATE_TRUNC('month', CURRENT_DATE - INTERVAL '5 months'), NULL, true, 200, 'mg'), -(1, 7, 3, 10, DATE_TRUNC('month', CURRENT_DATE - INTERVAL '4 months'), NULL, false, 150, 'g'), -(2, 1, 4, 1, DATE_TRUNC('month', CURRENT_DATE - INTERVAL '3 months'), NULL, false, 100, 'mg'), -(3, 2, 5, 2, DATE_TRUNC('month', CURRENT_DATE - INTERVAL '2 months'), NULL, false, 250, 'mg'), -(1, 3, 6, 3, DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month'), NULL, false, 300, 'g'), +(1, 2, 4, 6, DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '2 year', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '2 months', false, 300, 'mg'), +(2, 5, 1, 8, DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 year', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '4 months', false, 80, 'mg'), +(3, 6, 2, 9, DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '5 months', NULL, true, 200, 'mg'), +(1, 7, 3, 10, DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '4 months', NULL, false, 150, 'g'), +(2, 1, 4, 1, DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '3 months', NULL, false, 100, 'mg'), +(3, 2, 5, 2, DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '2 months', NULL, false, 250, 'mg'), +(1, 3, 6, 3, DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 month', NULL, false, 300, 'g'), (2, 4, 1, 4, CURRENT_DATE, NULL, true, 200, 'mg'), (3, 5, 2, 5, CURRENT_DATE, NULL, false, 150, 'mg'), (9, 1, 1, 1, CURRENT_DATE - INTERVAL '6 months', CURRENT_DATE - INTERVAL '3 months', false, 240, 'mg'), @@ -210,8 +210,8 @@ VALUES (13, '2023-01-08', 18, 12, 3, 10.5, 6.1, 1.0, 16.9, 14.3, 11.0, 56, 36, 16, 1.9, 2.9, 4.4), (14, '2023-02-08', 27, 20, 10, 16.2, 11.1, 4.1, 21.0, 17.9, 14.1, 74, 54, 34, 0.5, 1.5, 3.0), (15, '2023-03-08', 20, 14, 4, 11.8, 7.3, 1.7, 17.8, 15.2, 11.8, 60, 40, 20, 1.6, 2.6, 4.1), -(16, DATE_TRUNC('month', CURRENT_DATE - INTERVAL '5 months') + INTERVAL '7 days', 24, 18, 8, 14.4, 9.6, 3.2, 20.4, 17.4, 13.7, 70, 50, 30, 0.9, 1.9, 3.4), -(17, DATE_TRUNC('month', CURRENT_DATE - INTERVAL '1 month') + INTERVAL '7 days', 22, 16, NULL, 13.2, 8.8, NULL, 19.1, 16.3, NULL, 65, 45, NULL, 1.3, 2.3, NULL), +(16, DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '5 months' + INTERVAL '7 days', 24, 18, 8, 14.4, 9.6, 3.2, 20.4, 17.4, 13.7, 70, 50, 30, 0.9, 1.9, 3.4), +(17, DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 month' + INTERVAL '7 days', 22, 16, NULL, 13.2, 8.8, NULL, 19.1, 16.3, NULL, 65, 45, NULL, 1.3, 2.3, NULL), (25, CURRENT_DATE - INTERVAL '6 months' + INTERVAL '7 days', 30, NULL, NULL, 18.0, NULL, NULL, 22.0, NULL, NULL, 80, NULL, NULL, 1.0, NULL, NULL), (25, CURRENT_DATE - INTERVAL '2 months', 30, 18, 10, 18.0, 12.0, 4.0, 22.0, 19.0, 15.0, 80, 60, 40, 1.0, 2.0, 3.0), (26, CURRENT_DATE - INTERVAL '5 months' + INTERVAL '7 days', 25, NULL, NULL, 15.0, NULL, NULL, 20.0, NULL, NULL, 75, NULL, NULL, 0.5, NULL, NULL), diff --git a/defog_data/ewallet/ewallet.json b/defog_data/ewallet/ewallet.json index e54ad80..2eb9407 100644 --- a/defog_data/ewallet/ewallet.json +++ b/defog_data/ewallet/ewallet.json @@ -364,7 +364,7 @@ "column_description": "" }, { - "data_type": "INT", + "data_type": "BIGINT", "column_name": "user_id", "column_description": "" }, diff --git a/defog_data/ewallet/ewallet.sql b/defog_data/ewallet/ewallet.sql index 541d93e..afb8a6c 100644 --- a/defog_data/ewallet/ewallet.sql +++ b/defog_data/ewallet/ewallet.sql @@ -129,8 +129,8 @@ CREATE TABLE consumer_div.user_setting_snapshot ( -- users INSERT INTO consumer_div.users (uid, username, email, phone_number, created_at, user_type, status, country, address_billing, address_delivery, kyc_status) VALUES - (1, 'john_doe', 'john.doe@email.com', '+1234567890', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 month', 'individual', 'active', 'US', '123 Main St, Anytown US 12345', '123 Main St, Anytown US 12345', 'approved'), - (2, 'jane_smith', 'jane.smith@email.com', '+9876543210', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '2 months', 'individual', 'active', 'CA', '456 Oak Rd, Toronto ON M1M2M2', '456 Oak Rd, Toronto ON M1M2M2', 'approved'), + (1, 'john_doe', 'john.doe@email.com', '+1234567890', DATE_TRUNC('month', CURRENT_TIMESTAMP) - INTERVAL '1 month', 'individual', 'active', 'US', '123 Main St, Anytown US 12345', '123 Main St, Anytown US 12345', 'approved'), + (2, 'jane_smith', 'jane.smith@email.com', '+9876543210', DATE_TRUNC('month', CURRENT_TIMESTAMP) - INTERVAL '2 months', 'individual', 'active', 'CA', '456 Oak Rd, Toronto ON M1M2M2', '456 Oak Rd, Toronto ON M1M2M2', 'approved'), (3, 'bizuser', 'contact@business.co', '+1234509876', '2021-06-01 09:15:00', 'business', 'active', 'FR', '12 Rue Baptiste, Paris 75001', NULL, 'approved'), (4, 'david_miller', 'dave@personal.email', '+4477788899', '2023-03-20 18:45:00', 'individual', 'inactive', 'GB', '25 London Road, Manchester M12 4XY', '25 London Road, Manchester M12 4XY', 'pending'), (5, 'emily_wilson', 'emily.w@gmail.com', '+8091017161', '2021-11-03 22:10:00', 'individual', 'suspended', 'AU', '72 Collins St, Melbourne VIC 3000', '19 Smith St, Brunswick VIC 3056', 'rejected'), @@ -261,10 +261,10 @@ VALUES (8, 'Playtime! New games and toys have arrived', 'promotion', 'archived', '2023-06-01 18:00:00', 'email', NULL, 'https://kidzplayhouse.com/new-arrivals'), (9, 'Here''s $10 to start your glow up!', 'promotion', 'unread', '2023-06-01 10:15:00', 'email', NULL, 'https://beautytrending.com/new-customer'), (10, 'Your order #ord_mjs337 is being processed', 'transaction', 'read', '2023-06-04 19:31:30', 'web_app', 'web_d8180kaf', 'https://gamerush.co/orders/32e2b29c'), -(1, 'New promotion: Get 10% off your next order!', 'promotion', 'unread', DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '1 week', 'email', NULL, 'https://techmart.com/promo/TECH10'), -(1, 'Your order #456def has been delivered', 'transaction', 'unread', DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '2 weeks', 'mobile_app', 'mobile_8fh2k1', 'app://orders/456def'), -(2, 'Reminder: Your FitLife membership expires in 7 days', 'general', 'unread', DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '3 weeks', 'email', NULL, 'https://fitlifegear.com/renew'), -(2, 'Weekend Flash Sale: 25% off all activewear!', 'promotion', 'unread', DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '1 week' + INTERVAL '2 days', 'mobile_app', 'mobile_yjp08q', 'app://shop/activewear'); +(1, 'New promotion: Get 10% off your next order!', 'promotion', 'unread', DATE_TRUNC('week', CURRENT_TIMESTAMP) - INTERVAL '1 week', 'email', NULL, 'https://techmart.com/promo/TECH10'), +(1, 'Your order #456def has been delivered', 'transaction', 'unread', DATE_TRUNC('week', CURRENT_TIMESTAMP) - INTERVAL '2 weeks', 'mobile_app', 'mobile_8fh2k1', 'app://orders/456def'), +(2, 'Reminder: Your FitLife membership expires in 7 days', 'general', 'unread', DATE_TRUNC('week', CURRENT_TIMESTAMP) - INTERVAL '3 weeks', 'email', NULL, 'https://fitlifegear.com/renew'), +(2, 'Weekend Flash Sale: 25% off all activewear!', 'promotion', 'unread', DATE_TRUNC('week', CURRENT_TIMESTAMP) - INTERVAL '1 week' + INTERVAL '2 days', 'mobile_app', 'mobile_yjp08q', 'app://shop/activewear'); -- user_sessions INSERT INTO consumer_div.user_sessions (user_id, session_start_ts, session_end_ts, device_type, device_id) @@ -285,13 +285,13 @@ VALUES (8, '2023-06-01 17:30:00', '2023-06-01 18:15:35', 'mobile_app', 'mobile_q3mz8n'), (9, '2023-06-04 07:45:30', '2023-06-04 08:15:27', 'mobile_app', 'mobile_g3mjfz'), (10, '2023-06-02 14:10:15', '2023-06-02 14:40:58', 'web_app', 'web_zz91p44l'), -(5, CURRENT_DATE - INTERVAL '31 days', CURRENT_DATE - INTERVAL '31 days' + INTERVAL '15 min', 'web_app', 'web_8902wknz'), -(6, CURRENT_DATE - INTERVAL '8 days', CURRENT_DATE - INTERVAL '8 days' + INTERVAL '15 min', 'web_app', 'web_zz91p44l'), -(7, CURRENT_DATE - INTERVAL '5 days', CURRENT_DATE - INTERVAL '5 days' + INTERVAL '15 min', 'web_app', 'web_zz91p44l'), -(8, CURRENT_DATE - INTERVAL '3 days', CURRENT_DATE - INTERVAL '3 days' + INTERVAL '15 min', 'web_app', 'web_d8180kaf'), -(9, CURRENT_DATE - INTERVAL '1 days', CURRENT_DATE - INTERVAL '1 days' + INTERVAL '15 min', 'mobile_app', 'mobile_g3mjfz'), -(10, CURRENT_DATE - INTERVAL '2 days', CURRENT_DATE - INTERVAL '2 days' + INTERVAL '15 min', 'web_app', 'web_zz91p44l'), -(5, CURRENT_DATE - INTERVAL '2 days', CURRENT_DATE - INTERVAL '2 days' + INTERVAL '15 min', 'web_app', 'web_8902wknz') +(5, CURRENT_TIMESTAMP - INTERVAL '31 days', CURRENT_TIMESTAMP - INTERVAL '31 days' + INTERVAL '15 min', 'web_app', 'web_8902wknz'), +(6, CURRENT_TIMESTAMP - INTERVAL '8 days', CURRENT_TIMESTAMP - INTERVAL '8 days' + INTERVAL '15 min', 'web_app', 'web_zz91p44l'), +(7, CURRENT_TIMESTAMP - INTERVAL '5 days', CURRENT_TIMESTAMP - INTERVAL '5 days' + INTERVAL '15 min', 'web_app', 'web_zz91p44l'), +(8, CURRENT_TIMESTAMP - INTERVAL '3 days', CURRENT_TIMESTAMP - INTERVAL '3 days' + INTERVAL '15 min', 'web_app', 'web_d8180kaf'), +(9, CURRENT_TIMESTAMP - INTERVAL '1 days', CURRENT_TIMESTAMP - INTERVAL '1 days' + INTERVAL '15 min', 'mobile_app', 'mobile_g3mjfz'), +(10, CURRENT_TIMESTAMP - INTERVAL '2 days', CURRENT_TIMESTAMP - INTERVAL '2 days' + INTERVAL '15 min', 'web_app', 'web_zz91p44l'), +(5, CURRENT_TIMESTAMP - INTERVAL '2 days', CURRENT_TIMESTAMP - INTERVAL '2 days' + INTERVAL '15 min', 'web_app', 'web_8902wknz') ; -- user_setting_snapshot diff --git a/defog_data/geography/geography.sql b/defog_data/geography/geography.sql index e57eb30..3a1ad72 100644 --- a/defog_data/geography/geography.sql +++ b/defog_data/geography/geography.sql @@ -85,16 +85,16 @@ INSERT INTO public.city (city_name, population, country_name, state_name) VALUES ; INSERT INTO public.highlow (state_name, highest_elevation, lowest_point, highest_point, lowest_elevation) VALUES -('California', 4421, 'Death Valley', 'Mount Whitney', -86), -('Texas', 2667, 'Gulf of Mexico', 'Guadalupe Peak', 0), -('Florida', NULL, 'Atlantic Ocean', 'Unnamed location', 0), -('New York', 1629, 'Atlantic Ocean', 'Mount Marcy', 0), -('Ontario', NULL, 'Atlantic Ocean', 'Unnamed location', 0), -('Sao Paulo', NULL, 'Atlantic Ocean', 'Unnamed location', 0), -('Guangdong', NULL, 'South China Sea', 'Unnamed location', 0), -('Maharashtra', NULL, 'Arabian Sea', 'Unnamed location', 0), -('England', 978, 'North Sea', 'Scafell Pike', 0), -('Tokyo', 3776, 'Pacific Ocean', 'Mount Fuji', 0) +('California', '4421', 'Death Valley', 'Mount Whitney', '-86'), +('Texas', '2667', 'Gulf of Mexico', 'Guadalupe Peak', '0'), +('Florida', NULL, 'Atlantic Ocean', 'Unnamed location', '0'), +('New York', '1629', 'Atlantic Ocean', 'Mount Marcy', '0'), +('Ontario', NULL, 'Atlantic Ocean', 'Unnamed location', '0'), +('Sao Paulo', NULL, 'Atlantic Ocean', 'Unnamed location', '0'), +('Guangdong', NULL, 'South China Sea', 'Unnamed location', '0'), +('Maharashtra', NULL, 'Arabian Sea', 'Unnamed location', '0'), +('England', '978', 'North Sea', 'Scafell Pike', '0'), +('Tokyo', '3776', 'Pacific Ocean', 'Mount Fuji', '0') ; INSERT INTO public.lake (lake_name, area, country_name, state_name) VALUES diff --git a/defog_data/supplementary.py b/defog_data/supplementary.py index d96929e..2194498 100644 --- a/defog_data/supplementary.py +++ b/defog_data/supplementary.py @@ -228,6 +228,19 @@ def load_embeddings(emb_path: str) -> tuple[dict, dict]: "journal.journalname,text,Name or title of the journal", ], }, + "broker": { + "GPE": [] + }, + "car_dealership": { + "GPE": [] + }, + "derm_treatment": { + "GPE": [] + }, + "ewallet": { + "GPE": [] + + }, } # (pair of tables): list of (column1, column2) tuples that can be joined @@ -532,4 +545,8 @@ def load_embeddings(emb_path: str) -> tuple[dict, dict]: ("paperkeyphrase.paperid", "writes.paperid"), ], }, + "broker": {}, + "car_dealership": {}, + "derm_treatment": {}, + "ewallet": {}, } diff --git a/defog_data/yelp/yelp.sql b/defog_data/yelp/yelp.sql index 166e357..db5891b 100644 --- a/defog_data/yelp/yelp.sql +++ b/defog_data/yelp/yelp.sql @@ -64,13 +64,13 @@ CREATE TABLE public.users ( INSERT INTO public.business (bid, business_id, name, full_address, city, latitude, longitude, review_count, is_open, state) VALUES -(1, 'abc123', 'Joe’s Pizza', '123 Main St', 'San Francisco', 37.7749295, -122.4194155, 2, 0, 'CA'), -(2, 'def456', 'Peter’s Cafe', '456 Elm St', 'New York', 40.712776, -74.005974, 3, 1, 'NY'), -(3, 'ghi789', 'Anna’s Diner', '789 Oak St', 'Los Angeles', 34.052235, -118.243683, 4, 0, 'CA'), -(4, 'jkl012', 'Mark’s Bistro', '012 Maple St', 'San Francisco', 37.7749295, -122.4194155, 4, 1, 'CA'), -(5, 'mno345', 'Lily’s Bakery', '345 Walnut St', 'New York', 40.712776, -74.005974, 3, 1, 'NY'), -(6, 'xyz123', 'Izza’s Pizza', '83 Main St', 'San Francisco', 37.8749295, -122.5194155, 2, 1, 'CA'), -(7, 'uvw456', 'Sashays Cafe', '246 Elm St', 'New York', 40.812776, -74.105974, 2, 1, 'NY') +(1, 'abc123', 'Joe’s Pizza', '123 Main St', 'San Francisco', '37.7749295', '-122.4194155', 2, 0, 'CA'), +(2, 'def456', 'Peter’s Cafe', '456 Elm St', 'New York', '40.712776', '-74.005974', 3, 1, 'NY'), +(3, 'ghi789', 'Anna’s Diner', '789 Oak St', 'Los Angeles', '34.052235', '-118.243683', 4, 0, 'CA'), +(4, 'jkl012', 'Mark’s Bistro', '012 Maple St', 'San Francisco', '37.7749295', '-122.4194155', 4, 1, 'CA'), +(5, 'mno345', 'Lily’s Bakery', '345 Walnut St', 'New York', '40.712776', '-74.005974', 3, 1, 'NY'), +(6, 'xyz123', 'Izza’s Pizza', '83 Main St', 'San Francisco', '37.8749295', '-122.5194155', 2, 1, 'CA'), +(7, 'uvw456', 'Sashays Cafe', '246 Elm St', 'New York', '40.812776', '-74.105974', 2, 1, 'NY') ; INSERT INTO public.category (id, business_id, category_name) VALUES @@ -109,58 +109,58 @@ INSERT INTO public.neighbourhood (id, business_id, neighbourhood_name) VALUES ; INSERT INTO public.review (rid, business_id, user_id, rating, text, year, month) VALUES -(1, 'abc123', 1, 4.5, 'Great pizza!', 2021, 'January'), -(2, 'def456', 2, 4.2, 'Delicious food.', 2021, 'February'), -(3, 'ghi789', 3, 3.9, 'Average diner.', 2021, 'March'), -(4, 'jkl012', 4, 4.8, 'Amazing bistro.', 2021, 'April'), -(5, 'mno345', 5, 4.6, 'Yummy bakery.', 2021, 'January'), -(6, 'ghi789', 1, 1.2, 'Horrible staff!', 2021, 'April'), -(7, 'def456', 2, 4.9, 'Second visit. I’m loving it.', 2021, 'May'), -(8, 'xyz123', 3, 0.5, 'Hate it', 2021, 'June'), -(9, 'uvw456', 4, 4.0, 'Not bad.', 2021, 'July'), -(10, 'abc123', 5, 4.6, 'Very goody.', 2022, 'January'), -(11, 'def456', 1, 3.0, 'Average', 2022, 'February'), -(12, 'ghi789', 2, 4.0, 'Not bad.', 2022, 'March'), -(13, 'jkl012', 3, 4.5, 'Second time here.', 2022, 'April'), -(14, 'mno345', 4, 4.6, 'Third time here.', 2022, 'May'), -(15, 'xyz123', 5, 3.5, 'Wont come again.', 2022, 'June'), -(16, 'uvw456', 1, 4.0, 'Quite good.', 2022, 'July'), -(17, 'mno345', 2, 4.6, 'Superb.', 2022, 'July'), -(18, 'jkl012', 3, 5.0, 'WOwowow.', 2022, 'August'), -(19, 'jkl012', 4, 4.8, 'Lovin it.', 2022, 'September'), -(20, 'ghi789', 5, 1.5, 'Worst experience ever.', 2022, 'October') +(1, 'abc123', '1', 4.5, 'Great pizza!', 2021, 'January'), +(2, 'def456', '2', 4.2, 'Delicious food.', 2021, 'February'), +(3, 'ghi789', '3', 3.9, 'Average diner.', 2021, 'March'), +(4, 'jkl012', '4', 4.8, 'Amazing bistro.', 2021, 'April'), +(5, 'mno345', '5', 4.6, 'Yummy bakery.', 2021, 'January'), +(6, 'ghi789', '1', 1.2, 'Horrible staff!', 2021, 'April'), +(7, 'def456', '2', 4.9, 'Second visit. I’m loving it.', 2021, 'May'), +(8, 'xyz123', '3', 0.5, 'Hate it', 2021, 'June'), +(9, 'uvw456', '4', 4.0, 'Not bad.', 2021, 'July'), +(10, 'abc123', '5', 4.6, 'Very goody.', 2022, 'January'), +(11, 'def456', '1', 3.0, 'Average', 2022, 'February'), +(12, 'ghi789', '2', 4.0, 'Not bad.', 2022, 'March'), +(13, 'jkl012', '3', 4.5, 'Second time here.', 2022, 'April'), +(14, 'mno345', '4', 4.6, 'Third time here.', 2022, 'May'), +(15, 'xyz123', '5', 3.5, 'Wont come again.', 2022, 'June'), +(16, 'uvw456', '1', 4.0, 'Quite good.', 2022, 'July'), +(17, 'mno345', '2', 4.6, 'Superb.', 2022, 'July'), +(18, 'jkl012', '3', 5.0, 'WOwowow.', 2022, 'August'), +(19, 'jkl012', '4', 4.8, 'Lovin it.', 2022, 'September'), +(20, 'ghi789', '5', 1.5, 'Worst experience ever.', 2022, 'October') ; INSERT INTO public.tip (tip_id, business_id, text, user_id, likes, year, month) VALUES -(1, 'abc123', 'Try their pepperoni pizza!', 1, NULL, 2021, 'January'), -(2, 'def456', 'Their coffee is amazing.', 2, NULL, 2021, 'February'), -(3, 'ghi789', 'The pancakes are delicious.', 3, NULL, 2021, 'March'), -(4, 'jkl012', 'Highly recommend the steak.', 4, NULL, 2021, 'April'), -(5, 'mno345', 'Their pastries are to die for.', 5, NULL, 2021, 'May'), -(6, 'xyz123', 'Don’t waste your money.', 1, NULL, 2021, 'June'), -(7, 'uvw456', 'Not bad.', 2, NULL, 2021, 'July'), -(8, 'mno345', 'Get the blueberry pancakes!', 1, NULL, 2022, 'January'), -(9, 'abc123', 'Try their pepperoni pizza!', 1, NULL, 2022, 'January'), -(10, 'def456', 'Their coffee is amazing.', 2, NULL, 2022, 'February'), -(11, 'ghi789', 'The pancakes are delicious.', 3, NULL, 2022, 'March'), -(12, 'jkl012', 'Highly recommend the steak.', 4, NULL, 2022, 'April'), -(13, 'mno345', 'Their pastries are to die for.', 5, NULL, 2022, 'May'), -(14, 'xyz123', 'Don’t waste your money.', 1, NULL, 2022, 'June'), -(15, 'uvw456', 'So-so.', 2, NULL, 2022, 'July'), -(16, 'mno345', 'Second time having blueberry pancakes!', 1, NULL, 2022, 'July'), -(17, 'jkl012', 'Great happy hour deals.', 5, NULL, 2022, 'August'), -(18, 'jkl012', 'Ask for extra sauce.', 3, NULL, 2022, 'September'), -(19, 'ghi789', 'Friendly staff.', 4, NULL, 2022, 'October'), -(20, 'def456', 'Tasty lattes.', 4, NULL, 2022, 'November'), -(21, 'abc123', 'Fresh ingredients.', 2, NULL, 2022, 'December') +(1, 'abc123', 'Try their pepperoni pizza!', '1', NULL, 2021, 'January'), +(2, 'def456', 'Their coffee is amazing.', '2', NULL, 2021, 'February'), +(3, 'ghi789', 'The pancakes are delicious.', '3', NULL, 2021, 'March'), +(4, 'jkl012', 'Highly recommend the steak.', '4', NULL, 2021, 'April'), +(5, 'mno345', 'Their pastries are to die for.', '5', NULL, 2021, 'May'), +(6, 'xyz123', 'Don’t waste your money.', '1', NULL, 2021, 'June'), +(7, 'uvw456', 'Not bad.', '2', NULL, 2021, 'July'), +(8, 'mno345', 'Get the blueberry pancakes!', '1', NULL, 2022, 'January'), +(9, 'abc123', 'Try their pepperoni pizza!', '1', NULL, 2022, 'January'), +(10, 'def456', 'Their coffee is amazing.', '2', NULL, 2022, 'February'), +(11, 'ghi789', 'The pancakes are delicious.', '3', NULL, 2022, 'March'), +(12, 'jkl012', 'Highly recommend the steak.', '4', NULL, 2022, 'April'), +(13, 'mno345', 'Their pastries are to die for.', '5', NULL, 2022, 'May'), +(14, 'xyz123', 'Don’t waste your money.', '1', NULL, 2022, 'June'), +(15, 'uvw456', 'So-so.', '2', NULL, 2022, 'July'), +(16, 'mno345', 'Second time having blueberry pancakes!', '1', NULL, 2022, 'July'), +(17, 'jkl012', 'Great happy hour deals.', '5', NULL, 2022, 'August'), +(18, 'jkl012', 'Ask for extra sauce.', '3', NULL, 2022, 'September'), +(19, 'ghi789', 'Friendly staff.', '4', NULL, 2022, 'October'), +(20, 'def456', 'Tasty lattes.', '4', NULL, 2022, 'November'), +(21, 'abc123', 'Fresh ingredients.', '2', NULL, 2022, 'December') ; INSERT INTO public.users (uid, user_id, name) VALUES -(1, 1, 'John Doe'), -(2, 2, 'Jane Smith'), -(3, 3, 'David Johnson'), -(4, 4, 'Sarah Williams'), -(5, 5, 'Michael Brown') +(1, '1', 'John Doe'), +(2, '2', 'Jane Smith'), +(3, '3', 'David Johnson'), +(4, '4', 'Sarah Williams'), +(5, '5', 'Michael Brown') ; diff --git a/setup_snowflake.sh b/setup_snowflake.sh index 4ea3256..25b2281 100755 --- a/setup_snowflake.sh +++ b/setup_snowflake.sh @@ -3,7 +3,7 @@ set -e # get arguments # if there are no arguments, set them to a default list if [ $# -eq 0 ]; then - set -- academic advising atis geography restaurants scholar yelp + set -- academic advising atis geography restaurants scholar yelp broker car_dealership derm_treatment ewallet fi echo "Databases to init: $@" diff --git a/translate_ddl_dialect.py b/translate_ddl_dialect.py new file mode 100644 index 0000000..cb28ce5 --- /dev/null +++ b/translate_ddl_dialect.py @@ -0,0 +1,84 @@ +# This script reads the DDL statements from the Postgres sql files and translates them to the specified dialect. +# It then creates the databases and tables from the translated DDL statements. +# Finally, it queries the database to check that values were insert into tables. + +import os +import time +from tqdm import tqdm +from utils_dialects import ( + create_bq_db, + create_mysql_db, + create_sqlite_db, + create_tsql_db, + test_query_db, + conv_ddl_to_dialect +) + +# List of databases to create +db_names = [ + "academic", + "advising", + "atis", + "broker", + "car_dealership", + "derm_treatment", + "ewallet", + "geography", + "restaurants", + "scholar", + "yelp", +] +dialects = [ + "bigquery", + "mysql", + "sqlite", + "tsql", +] # Supported dialects: bigquery, mysql, sqlite, tsql +bigquery_proj = os.getenv("BIGQUERY_PROJ") + +# For testing that values were inserted into tables, format: (db_name, table_name) +test_queries = [ + ("academic", "writes"), + ("advising", "student_record"), + ("atis", "time_zone"), + ("broker", "sbTransaction"), + ("car_dealership", "payments_made"), + ("derm_treatment", "concomitant_meds"), + ("ewallet", "consumer_div.user_setting_snapshot"), + ("geography", "state"), + ("restaurants", "restaurant"), + ("scholar", "writes"), + ("yelp", "users"), +] + +# Run the main function +def main(): + for dialect in tqdm(dialects): + print(f"Translating DDL to {dialect} dialect...") + for db_name in tqdm(db_names): + conv_ddl_to_dialect(db_name, dialect) + if dialect == "bigquery": + create_bq_db(bigquery_proj, db_name) + time.sleep(10) + elif dialect == "mysql": + create_mysql_db(db_name) + elif dialect == "sqlite": + create_sqlite_db(db_name) + elif dialect == "tsql": + create_tsql_db(db_name) + tries = 0 + while tries < 20: + try: + test_query_db(db_name, dialect, test_queries) + break + except Exception as e: + if "not found" in str(e): + # print(f"Table not found. Retrying...") + tries += 1 + continue + else: + break + + +if __name__ == "__main__": + main() diff --git a/utils_dialects.py b/utils_dialects.py new file mode 100644 index 0000000..d52d812 --- /dev/null +++ b/utils_dialects.py @@ -0,0 +1,551 @@ +import sqlglot +from google.cloud import bigquery +import mysql.connector +from mysql.connector import errorcode +import pyodbc +import sqlite3 +import re +import os +import time + +GOOGLE_APPLICATION_CREDENTIALS = os.getenv("GOOGLE_APPLICATION_CREDENTIALS") +bigquery_proj = os.getenv("BIGQUERY_PROJ") +creds = { + "mysql": { + "user": "root", + "password": "password", + "host": "localhost", + }, + "tsql": { + "server": os.getenv("TSQL_SERVER"), + "user": "test_user", + "password": "password", + "driver": "{ODBC Driver 17 for SQL Server}", + }, +} + + +def fix_ddl_bigquery(translated_ddl): + """ + Fix the translated DDL for BigQuery + """ + translated_ddl = re.sub( + r"NOT NULL DEFAULT CURRENT_TIMESTAMP\(\)", + "DEFAULT CAST(CURRENT_TIMESTAMP() AS DATETIME) NOT NULL", + translated_ddl, + ) + # translated_ddl = re.sub( + # r"DEFAULT\s+('[^']*'|\d+|[a-zA-Z_]+\(\))", "", translated_ddl + # ) + translated_ddl = re.sub( + r"SERIAL(PRIMARY KEY)?", "STRING DEFAULT GENERATE_UUID()", translated_ddl + ) + translated_ddl = translated_ddl.replace("EXTRACT(EPOCH FROM ", "UNIX_SECONDS(") + translated_ddl = re.sub( + r", (TIMESTAMP_TRUNC\(CURRENT_DATE, (DAY|MIN|WEEK|MONTH|YEAR)\))", + lambda match: rf", CAST({match.group(1)} AS DATE)", + translated_ddl, + ) + translated_ddl = re.sub( + r"(TIMESTAMP_TRUNC\(CURRENT_DATE|CURRENT_DATE(?!,))(.+?\s*.+? INTERVAL\s*'\d+'\s*(DAY|MIN|WEEK|MONTH|YEAR))", + lambda match: rf"CAST({match.group(1)}{match.group(2)} AS DATE)", + translated_ddl, + ) + translated_ddl = re.sub( + r"(?