From 06fd72a6f10f9eaa302dd6ee947891ebcd8adddc Mon Sep 17 00:00:00 2001 From: jp Date: Tue, 26 Mar 2024 12:36:30 +0800 Subject: [PATCH] Added 2 new schema - car_dealership - derm_treatment --- defog_data/car_dealership/car_dealership.json | 299 +++++++++++++ defog_data/car_dealership/car_dealership.sql | 188 ++++++++ defog_data/derm_treatment/derm_treatment.json | 421 ++++++++++++++++++ defog_data/derm_treatment/derm_treatment.sql | 219 +++++++++ defog_data/metadata.py | 9 + setup.sh | 4 +- tests.py | 33 +- 7 files changed, 1170 insertions(+), 3 deletions(-) create mode 100644 defog_data/car_dealership/car_dealership.json create mode 100644 defog_data/car_dealership/car_dealership.sql create mode 100644 defog_data/derm_treatment/derm_treatment.json create mode 100644 defog_data/derm_treatment/derm_treatment.sql diff --git a/defog_data/car_dealership/car_dealership.json b/defog_data/car_dealership/car_dealership.json new file mode 100644 index 0000000..762572b --- /dev/null +++ b/defog_data/car_dealership/car_dealership.json @@ -0,0 +1,299 @@ +{ + "table_metadata": { + "cars": [ + { + "data_type": "SERIAL", + "column_name": "id", + "column_description": "Primary key for the cars table" + }, + { + "data_type": "TEXT", + "column_name": "make", + "column_description": "Manufacturer of the car" + }, + { + "data_type": "TEXT", + "column_name": "model", + "column_description": "Model name of the car" + }, + { + "data_type": "INTEGER", + "column_name": "year", + "column_description": "Year of manufacture" + }, + { + "data_type": "TEXT", + "column_name": "color", + "column_description": "Color of the car" + }, + { + "data_type": "VARCHAR(17)", + "column_name": "vin_number", + "column_description": "Vehicle Identification Number" + }, + { + "data_type": "TEXT", + "column_name": "engine_type", + "column_description": "Type of engine (e.g., V6, V8, Electric)" + }, + { + "data_type": "TEXT", + "column_name": "transmission", + "column_description": "Type of transmission (e.g., Automatic, Manual)" + }, + { + "data_type": "NUMERIC(10, 2)", + "column_name": "price", + "column_description": "Selling price of the car" + }, + { + "data_type": "BOOLEAN", + "column_name": "is_sold", + "column_description": "Indicates if the car has been sold" + }, + { + "data_type": "TIMESTAMP", + "column_name": "crtd_ts", + "column_description": "Timestamp when the car was added to the system" + } + ], + "salespersons": [ + { + "data_type": "SERIAL", + "column_name": "id", + "column_description": "Primary key for the salespersons table" + }, + { + "data_type": "TEXT", + "column_name": "first_name", + "column_description": "First name of the salesperson" + }, + { + "data_type": "TEXT", + "column_name": "last_name", + "column_description": "Last name of the salesperson" + }, + { + "data_type": "VARCHAR(255)", + "column_name": "email", + "column_description": "Email address of the salesperson" + }, + { + "data_type": "VARCHAR(20)", + "column_name": "phone", + "column_description": "Phone number of the salesperson" + }, + { + "data_type": "DATE", + "column_name": "hire_date", + "column_description": "Date when the salesperson was hired" + }, + { + "data_type": "DATE", + "column_name": "termination_date", + "column_description": "Date when the salesperson's employment was terminated" + }, + { + "data_type": "TIMESTAMP", + "column_name": "crtd_ts", + "column_description": "Timestamp when the salesperson record was created" + } + ], + "customers": [ + { + "data_type": "SERIAL", + "column_name": "id", + "column_description": "Primary key for the customers table" + }, + { + "data_type": "TEXT", + "column_name": "first_name", + "column_description": "First name of the customer" + }, + { + "data_type": "TEXT", + "column_name": "last_name", + "column_description": "Last name of the customer" + }, + { + "data_type": "VARCHAR(255)", + "column_name": "email", + "column_description": "Email address of the customer" + }, + { + "data_type": "VARCHAR(20)", + "column_name": "phone", + "column_description": "Phone number of the customer" + }, + { + "data_type": "TEXT", + "column_name": "address", + "column_description": "Address of the customer" + }, + { + "data_type": "TEXT", + "column_name": "city", + "column_description": "City of the customer" + }, + { + "data_type": "TEXT", + "column_name": "state", + "column_description": "State of the customer" + }, + { + "data_type": "VARCHAR(10)", + "column_name": "zip_code", + "column_description": "ZIP code of the customer" + }, + { + "data_type": "TIMESTAMP", + "column_name": "crtd_ts", + "column_description": "Timestamp when the customer record was created" + } + ], + "sales": [ + { + "data_type": "SERIAL", + "column_name": "id", + "column_description": "Primary key for the sales table" + }, + { + "data_type": "INTEGER", + "column_name": "car_id", + "column_description": "Foreign key referencing the cars table" + }, + { + "data_type": "INTEGER", + "column_name": "salesperson_id", + "column_description": "Foreign key referencing the salespersons table" + }, + { + "data_type": "INTEGER", + "column_name": "customer_id", + "column_description": "Foreign key referencing the customers table" + }, + { + "data_type": "NUMERIC(10, 2)", + "column_name": "sale_price", + "column_description": "Price at which the car was sold" + }, + { + "data_type": "DATE", + "column_name": "sale_date", + "column_description": "Date when the car was sold" + }, + { + "data_type": "TIMESTAMP", + "column_name": "crtd_ts", + "column_description": "Timestamp when the sale record was created" + } + ], + "inventory_snapshots": [ + { + "data_type": "SERIAL", + "column_name": "id", + "column_description": "Primary key for the inventory_snapshots table" + }, + { + "data_type": "DATE", + "column_name": "snapshot_date", + "column_description": "Date of the inventory snapshot" + }, + { + "data_type": "INTEGER", + "column_name": "car_id", + "column_description": "Foreign key referencing the cars table" + }, + { + "data_type": "BOOLEAN", + "column_name": "is_in_inventory", + "column_description": "Indicates if the car was in inventory on the snapshot date" + }, + { + "data_type": "TIMESTAMP", + "column_name": "crtd_ts", + "column_description": "Timestamp when the inventory snapshot record was created" + } + ], + "payments_received": [ + { + "data_type": "SERIAL", + "column_name": "id", + "column_description": "Primary key for the payments_received table" + }, + { + "data_type": "INTEGER", + "column_name": "sale_id", + "column_description": "Foreign key referencing the sales table" + }, + { + "data_type": "DATE", + "column_name": "payment_date", + "column_description": "Date when the payment was received" + }, + { + "data_type": "NUMERIC(10, 2)", + "column_name": "payment_amount", + "column_description": "Amount of the payment received" + }, + { + "data_type": "TEXT", + "column_name": "payment_method", + "column_description": "Method of payment (e.g., cash, check, card, banktrf, trp01)" + }, + { + "data_type": "TIMESTAMP", + "column_name": "crtd_ts", + "column_description": "Timestamp when the payment received record was created" + } + ], + "payments_made": [ + { + "data_type": "SERIAL", + "column_name": "id", + "column_description": "Primary key for the payments_made table" + }, + { + "data_type": "TEXT", + "column_name": "vendor_name", + "column_description": "Name of the vendor to whom the payment was made" + }, + { + "data_type": "DATE", + "column_name": "payment_date", + "column_description": "Date when the payment was made" + }, + { + "data_type": "NUMERIC(10, 2)", + "column_name": "payment_amount", + "column_description": "Amount of the payment made" + }, + { + "data_type": "TEXT", + "column_name": "payment_method", + "column_description": "Method of payment (e.g., check, bank_transfer, credit_card)" + }, + { + "data_type": "VARCHAR(50)", + "column_name": "invoice_number", + "column_description": "Invoice number associated with the payment" + }, + { + "data_type": "DATE", + "column_name": "invoice_date", + "column_description": "Date of the invoice" + }, + { + "data_type": "DATE", + "column_name": "due_date", + "column_description": "Due date of the invoice" + }, + { + "data_type": "TIMESTAMP", + "column_name": "crtd_ts", + "column_description": "Timestamp when the payment made record was created" + } + ] + }, + "glossary": "- `cars.id` can be joined with `car_id` from `sales` and `inventory_snapshots` tables\n- `salespersons.id` can be joined with `salesperson_id` from `sales` table\n- `customers.id` can be joined with `customer_id` from `sales` table\n- `sales.id` can be joined with `sale_id` from `payments_received` table\n- All string columns should be matched exactly unless specified otherwise\n- `cars.make`, `cars.model`, `cars.color`, `customers.address`, `customers.city`, `customers.state` can be filtered with ILIKE '%%'\n- `cars.vin_number` should be matched case-insensitively\n- Total Sales = SUM(sale_price) from `sales` table\n- Total Payments Received = SUM(payment_amount) from `payments_received` table\n- Total Payments Made = SUM(payment_amount) from `payments_made` table\n- Profit = Total Sales - Total Payments Made" + } \ No newline at end of file diff --git a/defog_data/car_dealership/car_dealership.sql b/defog_data/car_dealership/car_dealership.sql new file mode 100644 index 0000000..c9cd0ba --- /dev/null +++ b/defog_data/car_dealership/car_dealership.sql @@ -0,0 +1,188 @@ +CREATE TABLE cars ( + id SERIAL PRIMARY KEY, + make TEXT NOT NULL, -- manufacturer of the car + model TEXT NOT NULL, -- model name of the car + year INTEGER NOT NULL, -- year of manufacture + color TEXT NOT NULL, -- color of the car + vin_number VARCHAR(17) NOT NULL UNIQUE, -- Vehicle Identification Number + engine_type TEXT NOT NULL, -- type of engine (e.g., V6, V8, Electric) + transmission TEXT NOT NULL, -- type of transmission (e.g., Automatic, Manual) + price NUMERIC(10, 2) NOT NULL, -- selling price of the car + is_sold BOOLEAN NOT NULL DEFAULT FALSE, -- indicates if the car has been sold + crtd_ts TIMESTAMP NOT NULL DEFAULT NOW() -- timestamp when the car was added to the system +); + +CREATE TABLE salespersons ( + id SERIAL PRIMARY KEY, + first_name TEXT NOT NULL, + last_name TEXT NOT NULL, + email VARCHAR(255) NOT NULL UNIQUE, + phone VARCHAR(20) NOT NULL, + hire_date DATE NOT NULL, + termination_date DATE, + crtd_ts TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE TABLE customers ( + id SERIAL PRIMARY KEY, + first_name TEXT NOT NULL, + last_name TEXT NOT NULL, + email VARCHAR(255) NOT NULL UNIQUE, + phone VARCHAR(20) NOT NULL, + address TEXT NOT NULL, + city TEXT NOT NULL, + state TEXT NOT NULL, + zip_code VARCHAR(10) NOT NULL, + crtd_ts TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE TABLE sales ( + id SERIAL PRIMARY KEY, + car_id INTEGER NOT NULL REFERENCES cars(id), + salesperson_id INTEGER NOT NULL REFERENCES salespersons(id), + customer_id INTEGER NOT NULL REFERENCES customers(id), + sale_price NUMERIC(10, 2) NOT NULL, + sale_date DATE NOT NULL, + crtd_ts TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE TABLE inventory_snapshots ( + id SERIAL PRIMARY KEY, + snapshot_date DATE NOT NULL, + car_id INTEGER NOT NULL REFERENCES cars(id), + is_in_inventory BOOLEAN NOT NULL, + crtd_ts TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE TABLE payments_received ( + id SERIAL PRIMARY KEY, + sale_id INTEGER NOT NULL REFERENCES sales(id), + payment_date DATE NOT NULL, + payment_amount NUMERIC(10, 2) NOT NULL, + payment_method TEXT NOT NULL, -- values: cash, check, credit_card, debit_card, financing + crtd_ts TIMESTAMP NOT NULL DEFAULT NOW() +); + +CREATE TABLE payments_made ( + id SERIAL PRIMARY KEY, + vendor_name TEXT NOT NULL, + payment_date DATE NOT NULL, + payment_amount NUMERIC(10, 2) NOT NULL, + payment_method TEXT NOT NULL, -- values: check, bank_transfer, credit_card + invoice_number VARCHAR(50) NOT NULL, + invoice_date DATE NOT NULL, + due_date DATE NOT NULL, + crtd_ts TIMESTAMP NOT NULL DEFAULT NOW() +); + + +-- cars +INSERT INTO cars (make, model, year, color, vin_number, engine_type, transmission, price) +VALUES + ('Toyota', 'Camry', 2022, 'Silver', '4T1BF1FK3CU510984', 'V6', 'Automatic', 28500.00), + ('Honda', 'Civic', 2021, 'Red', '2HGFC2F53MH522780', 'Inline 4', 'CVT', 22000.00), + ('Ford', 'Mustang', 2023, 'Blue', '1FA6P8TH4M5100001', 'V8', 'Manual', 45000.00), + ('Tesla', 'Model 3', 2022, 'White', '5YJ3E1EB7MF123456', 'Electric', 'Automatic', 41000.00), + ('Chevrolet', 'Equinox', 2021, 'Gray', '2GNAXUEV1M6290124', 'Inline 4', 'Automatic', 26500.00), + ('Nissan', 'Altima', 2022, 'Black', '1N4BL4BV4NN123456', 'V6', 'CVT', 25000.00), + ('BMW', 'X5', 2023, 'Silver', '5UXCR6C56M9A12345', 'V8', 'Automatic', 62000.00), + ('Audi', 'A4', 2022, 'Blue', 'WAUBNAF47MA098765', 'Inline 4', 'Automatic', 39000.00), + ('Lexus', 'RX350', 2021, 'White', '2T2BZMCA7MC143210', 'V6', 'Automatic', 45500.00), + ('Subaru', 'Outback', 2022, 'Green', '4S4BSANC2N3246801', 'Boxer 4', 'CVT', 28000.00); + +-- salespersons +INSERT INTO salespersons (first_name, last_name, email, phone, hire_date) +VALUES + ('John', 'Doe', 'john.doe@example.com', '555-123-4567', '2020-01-01'), + ('Jane', 'Smith', 'jane.smith@example.com', '555-987-6543', '2019-06-15'), + ('Michael', 'Johnson', 'michael.johnson@example.com', '555-456-7890', '2021-03-10'), + ('Emily', 'Brown', 'emily.brown@example.com', '555-111-2222', '2022-02-20'), + ('David', 'Wilson', 'david.wilson@example.com', '555-333-4444', '2020-11-05'), + ('Sarah', 'Taylor', 'sarah.taylor@example.com', '555-555-6666', '2018-09-01'), + ('Daniel', 'Anderson', 'daniel.anderson@example.com', '555-777-8888', '2021-07-12'), + ('Olivia', 'Thomas', 'olivia.thomas@example.com', '555-999-0000', '2023-01-25'), + ('James', 'Jackson', 'james.jackson@example.com', '555-222-3333', '2019-04-30'), + ('Sophia', 'White', 'sophia.white@example.com', '555-444-5555', '2022-08-18'); + +-- customers +INSERT INTO customers (first_name, last_name, email, phone, address, city, state, zip_code) +VALUES + ('William', 'Davis', 'william.davis@example.com', '555-888-9999', '123 Main St', 'New York', 'NY', '10001'), + ('Ava', 'Miller', 'ava.miller@example.com', '555-777-6666', '456 Oak Ave', 'Los Angeles', 'CA', '90001'), + ('Benjamin', 'Wilson', 'benjamin.wilson@example.com', '555-666-5555', '789 Elm St', 'Chicago', 'IL', '60007'), + ('Mia', 'Moore', 'mia.moore@example.com', '555-555-4444', '321 Pine Rd', 'Houston', 'TX', '77001'), + ('Henry', 'Taylor', 'henry.taylor@example.com', '555-444-3333', '654 Cedar Ln', 'Phoenix', 'AZ', '85001'), + ('Charlotte', 'Anderson', 'charlotte.anderson@example.com', '555-333-2222', '987 Birch Dr', 'Philadelphia', 'PA', '19019'), + ('Alexander', 'Thomas', 'alexander.thomas@example.com', '555-222-1111', '741 Walnut St', 'San Antonio', 'TX', '78006'), + ('Amelia', 'Jackson', 'amelia.jackson@example.com', '555-111-0000', '852 Maple Ave', 'San Diego', 'CA', '92101'), + ('Daniel', 'White', 'daniel.white@example.com', '555-000-9999', '963 Oak St', 'Dallas', 'TX', '75001'), + ('Abigail', 'Harris', 'abigail.harris@example.com', '555-999-8888', '159 Pine Ave', 'San Jose', 'CA', '95101'); + +-- sales +INSERT INTO sales (car_id, salesperson_id, customer_id, sale_price, sale_date) +VALUES + (1, 2, 3, 27500.00, '2023-03-15'), + (3, 1, 5, 44000.00, '2023-03-20'), + (6, 4, 2, 24500.00, '2023-03-22'), + (8, 7, 9, 38000.00, '2023-03-25'), + (2, 5, 7, 21500.00, '2023-03-28'), + (10, 9, 1, 27000.00, '2023-04-01'), + (5, 3, 6, 26000.00, '2023-04-05'), + (7, 8, 10, 60000.00, '2023-04-10'), + (4, 6, 8, 40000.00, '2023-04-12'), + (9, 10, 4, 44500.00, '2023-04-15'); + +-- inventory_snapshots +INSERT INTO inventory_snapshots (snapshot_date, car_id, is_in_inventory) +VALUES + ('2023-03-15', 1, TRUE), + ('2023-03-15', 2, TRUE), + ('2023-03-15', 3, TRUE), + ('2023-03-15', 4, TRUE), + ('2023-03-15', 5, TRUE), + ('2023-03-15', 6, TRUE), + ('2023-03-15', 7, TRUE), + ('2023-03-15', 8, TRUE), + ('2023-03-15', 9, TRUE), + ('2023-03-15', 10, TRUE), + ('2023-03-20', 1, FALSE), + ('2023-03-20', 3, FALSE), + ('2023-03-22', 6, FALSE), + ('2023-03-25', 8, FALSE), + ('2023-03-28', 2, FALSE), + ('2023-04-01', 10, FALSE), + ('2023-04-05', 5, FALSE), + ('2023-04-10', 7, FALSE), + ('2023-04-12', 4, FALSE), + ('2023-04-15', 9, FALSE); + +-- payments_received +INSERT INTO payments_received (sale_id, payment_date, payment_amount, payment_method) +VALUES + (1, '2023-03-15', 5000.00, 'check'), + (1, '2023-03-20', 22500.00, 'financing'), + (2, '2023-03-20', 44000.00, 'credit_card'), + (3, '2023-03-22', 24500.00, 'debit_card'), + (4, '2023-03-25', 38000.00, 'financing'), + (5, '2023-03-28', 21500.00, 'cash'), + (6, '2023-04-01', 27000.00, 'credit_card'), + (7, '2023-04-05', 26000.00, 'debit_card'), + (8, '2023-04-10', 60000.00, 'financing'), + (9, '2023-04-12', 40000.00, 'check'), + (10, '2023-04-15', 44500.00, 'credit_card'); + +-- payments_made +INSERT INTO payments_made (vendor_name, payment_date, payment_amount, payment_method, invoice_number, invoice_date, due_date) +VALUES + ('Car Manufacturer Inc', '2023-03-01', 150000.00, 'bank_transfer', 'INV-001', '2023-02-25', '2023-03-25'), + ('Auto Parts Supplier', '2023-03-10', 25000.00, 'check', 'INV-002', '2023-03-05', '2023-04-04'), + ('Utility Company', '2023-03-15', 1500.00, 'bank_transfer', 'INV-003', '2023-03-01', '2023-03-31'), + ('Marketing Agency', '2023-03-20', 10000.00, 'credit_card', 'INV-004', '2023-03-15', '2023-04-14'), + ('Insurance Provider', '2023-03-25', 5000.00, 'bank_transfer', 'INV-005', '2023-03-20', '2023-04-19'), + ('Cleaning Service', '2023-03-31', 2000.00, 'check', 'INV-006', '2023-03-25', '2023-04-24'), + ('Car Manufacturer Inc', '2023-04-01', 200000.00, 'bank_transfer', 'INV-007', '2023-03-25', '2023-04-24'), + ('Auto Parts Supplier', '2023-04-10', 30000.00, 'check', 'INV-008', '2023-04-05', '2023-05-05'), + ('Utility Company', '2023-04-15', 1500.00, 'bank_transfer', 'INV-009', '2023-04-01', '2023-04-30'), + ('Marketing Agency', '2023-04-20', 15000.00, 'credit_card', 'INV-010', '2023-04-15', '2023-05-15'), + ('Insurance Provider', '2023-04-25', 5000.00, 'bank_transfer', 'INV-011', '2023-04-20', '2023-05-20'), + ('Cleaning Service', '2023-04-30', 2000.00, 'check', 'INV-012', '2023-04-25', '2023-05-25'); \ No newline at end of file diff --git a/defog_data/derm_treatment/derm_treatment.json b/defog_data/derm_treatment/derm_treatment.json new file mode 100644 index 0000000..6843e1c --- /dev/null +++ b/defog_data/derm_treatment/derm_treatment.json @@ -0,0 +1,421 @@ +{ + "table_metadata": { + "doctors": [ + { + "data_type": "SERIAL", + "column_name": "doc_id", + "column_description": "" + }, + { + "data_type": "VARCHAR(50)", + "column_name": "first_name", + "column_description": "" + }, + { + "data_type": "VARCHAR(50)", + "column_name": "last_name", + "column_description": "" + }, + { + "data_type": "TEXT", + "column_name": "specialty", + "column_description": "possible values: dermatology, immunology, general" + }, + { + "data_type": "INT", + "column_name": "year_reg", + "column_description": "year the doctor was registered and obtained license" + }, + { + "data_type": "VARCHAR(100)", + "column_name": "med_school_name", + "column_description": "" + }, + { + "data_type": "VARCHAR(50)", + "column_name": "loc_city", + "column_description": "" + }, + { + "data_type": "CHAR(2)", + "column_name": "loc_state", + "column_description": "" + }, + { + "data_type": "VARCHAR(10)", + "column_name": "loc_zip", + "column_description": "" + }, + { + "data_type": "VARCHAR(20)", + "column_name": "bd_cert_num", + "column_description": "board certification number" + } + ], + "patients": [ + { + "data_type": "SERIAL", + "column_name": "patient_id", + "column_description": "" + }, + { + "data_type": "VARCHAR(50)", + "column_name": "first_name", + "column_description": "" + }, + { + "data_type": "VARCHAR(50)", + "column_name": "last_name", + "column_description": "" + }, + { + "data_type": "DATE", + "column_name": "date_of_birth", + "column_description": "" + }, + { + "data_type": "VARCHAR(10)", + "column_name": "gender", + "column_description": "" + }, + { + "data_type": "VARCHAR(100)", + "column_name": "email", + "column_description": "" + }, + { + "data_type": "VARCHAR(20)", + "column_name": "phone", + "column_description": "" + }, + { + "data_type": "VARCHAR(100)", + "column_name": "addr_street", + "column_description": "" + }, + { + "data_type": "VARCHAR(50)", + "column_name": "addr_city", + "column_description": "" + }, + { + "data_type": "CHAR(2)", + "column_name": "addr_state", + "column_description": "" + }, + { + "data_type": "VARCHAR(10)", + "column_name": "addr_zip", + "column_description": "" + }, + { + "data_type": "TEXT", + "column_name": "ins_type", + "column_description": "Insurance type. Possible values: private, medicare, medicaid, uninsured" + }, + { + "data_type": "VARCHAR(20)", + "column_name": "ins_policy_num", + "column_description": "Insurance policy number" + }, + { + "data_type": "FLOAT", + "column_name": "height_cm", + "column_description": "" + }, + { + "data_type": "FLOAT", + "column_name": "weight_kg", + "column_description": "" + } + ], + "drugs": [ + { + "data_type": "SERIAL", + "column_name": "drug_id", + "column_description": "" + }, + { + "data_type": "VARCHAR(100)", + "column_name": "drug_name", + "column_description": "" + }, + { + "data_type": "VARCHAR(100)", + "column_name": "manufacturer", + "column_description": "" + }, + { + "data_type": "TEXT", + "column_name": "drug_type", + "column_description": "possible values: biologic, small molecule, topical" + }, + { + "data_type": "TEXT", + "column_name": "moa", + "column_description": "mechanism of action" + }, + { + "data_type": "DATE", + "column_name": "fda_appr_dt", + "column_description": "FDA approval date. NULL if drug is still under trial." + }, + { + "data_type": "TEXT", + "column_name": "admin_route", + "column_description": "possible values: oral, injection, topical" + }, + { + "data_type": "DECIMAL(10,2)", + "column_name": "dos_amt", + "column_description": "recommended dosage amount" + }, + { + "data_type": "VARCHAR(20)", + "column_name": "dos_unit", + "column_description": "recommended dosage unit" + }, + { + "data_type": "INT", + "column_name": "dos_freq_hrs", + "column_description": "recommended number of hours between dosages" + }, + { + "data_type": "VARCHAR(20)", + "column_name": "ndc", + "column_description": "National Drug Code" + } + ], + "diagnoses": [ + { + "data_type": "SERIAL", + "column_name": "diag_id", + "column_description": "" + }, + { + "data_type": "VARCHAR(10)", + "column_name": "diag_code", + "column_description": "" + }, + { + "data_type": "VARCHAR(100)", + "column_name": "diag_name", + "column_description": "" + }, + { + "data_type": "TEXT", + "column_name": "diag_desc", + "column_description": "" + } + ], + "treatments": [ + { + "data_type": "SERIAL", + "column_name": "treatment_id", + "column_description": "" + }, + { + "data_type": "INT", + "column_name": "patient_id", + "column_description": "" + }, + { + "data_type": "INT", + "column_name": "doc_id", + "column_description": "" + }, + { + "data_type": "INT", + "column_name": "drug_id", + "column_description": "" + }, + { + "data_type": "INT", + "column_name": "diag_id", + "column_description": "" + }, + { + "data_type": "DATE", + "column_name": "start_dt", + "column_description": "" + }, + { + "data_type": "DATE", + "column_name": "end_dt", + "column_description": "NULL if treatment is ongoing" + }, + { + "data_type": "BOOLEAN", + "column_name": "is_placebo", + "column_description": "" + }, + { + "data_type": "DECIMAL(10,2)", + "column_name": "tot_drug_amt", + "column_description": "" + }, + { + "data_type": "TEXT", + "column_name": "drug_unit", + "column_description": "possible values: mg, ml, g" + } + ], + "outcomes": [ + { + "data_type": "SERIAL", + "column_name": "outcome_id", + "column_description": "" + }, + { + "data_type": "INT", + "column_name": "treatment_id", + "column_description": "" + }, + { + "data_type": "DATE", + "column_name": "assess_dt", + "column_description": "" + }, + { + "data_type": "INT", + "column_name": "day7_lesion_cnt", + "column_description": "lesion counts on day 7." + }, + { + "data_type": "INT", + "column_name": "day30_lesion_cnt", + "column_description": "" + }, + { + "data_type": "INT", + "column_name": "day100_lesion_cnt", + "column_description": "" + }, + { + "data_type": "DECIMAL(4,1)", + "column_name": "day7_pasi_score", + "column_description": "PASI score range 0-72" + }, + { + "data_type": "DECIMAL(4,1)", + "column_name": "day30_pasi_score", + "column_description": "" + }, + { + "data_type": "DECIMAL(4,1)", + "column_name": "day100_pasi_score", + "column_description": "" + }, + { + "data_type": "DECIMAL(5,2)", + "column_name": "day7_tewl", + "column_description": "in g/m^2/h" + }, + { + "data_type": "DECIMAL(5,2)", + "column_name": "day30_tewl", + "column_description": "" + }, + { + "data_type": "DECIMAL(5,2)", + "column_name": "day100_tewl", + "column_description": "" + }, + { + "data_type": "INT", + "column_name": "day7_itch_vas", + "column_description": "visual analog scale 0-100" + }, + { + "data_type": "INT", + "column_name": "day30_itch_vas", + "column_description": "" + }, + { + "data_type": "INT", + "column_name": "day100_itch_vas", + "column_description": "" + }, + { + "data_type": "DECIMAL(4,1)", + "column_name": "day7_hfg", + "column_description": "hair growth factor range 0-5" + }, + { + "data_type": "DECIMAL(4,1)", + "column_name": "day30_hfg", + "column_description": "" + }, + { + "data_type": "DECIMAL(4,1)", + "column_name": "day100_hfg", + "column_description": "" + } + ], + "adverse_events": [ + { + "data_type": "SERIAL", + "column_name": "id", + "column_description": "1 row per adverse event per treatment_id" + }, + { + "data_type": "INT", + "column_name": "treatment_id", + "column_description": "" + }, + { + "data_type": "DATE", + "column_name": "reported_dt", + "column_description": "" + }, + { + "data_type": "TEXT", + "column_name": "description", + "column_description": "" + } + ], + "concomitant_meds": [ + { + "data_type": "SERIAL", + "column_name": "id", + "column_description": "1 row per med per treatment_id" + }, + { + "data_type": "INT", + "column_name": "treatment_id", + "column_description": "" + }, + { + "data_type": "VARCHAR(100)", + "column_name": "med_name", + "column_description": "" + }, + { + "data_type": "TEXT", + "column_name": "start_dt", + "column_description": "YYYY-MM-DD" + }, + { + "data_type": "TEXT", + "column_name": "end_dt", + "column_description": "YYYY-MM-DD NULL if still taking" + }, + { + "data_type": "DECIMAL(10,2)", + "column_name": "dose_amt", + "column_description": "" + }, + { + "data_type": "TEXT", + "column_name": "dose_unit", + "column_description": "possible values: mg, ml, g" + }, + { + "data_type": "INT", + "column_name": "freq_hrs", + "column_description": "" + } + ] + }, + "glossary": "- All string columns should be matched exactly unless specified otherwise\n- `patients.first_name`, `patients.last_name`, `doctors.first_name`, `doctors.last_name` can be filtered with ILIKE '%%'\n- `drugs.drug_name`, `diagnoses.diag_name` should be matched case insensitively\n- day30_* will be empty if treatment has yet to progress to day 30. same for day100.\n- PASI75D30 (75% reduction in PASI score on day 30) = COUNT(CASE WHEN day30_pasi_score <= 0.25 * day0_pasi_score THEN 1 END) / COUNT(day0_pasi_score)\n- Mean change in TEWL = AVG(day30_tewl) - AVG(day0_tewl) \n- Proportion with improved itch = COUNT(CASE WHEN day30_itch_vas < day0_itch_vas THEN 1 END) / COUNT(*)\n- Mean percent change in hair growth = AVG((day30_hfg - day0_hfg) / day0_hfg * 100)\n\nAdditional outcomes documented:\n- day0_pasi_score: Baseline PASI score before treatment \n- day0_tewl: Baseline TEWL before treatment\n- day0_itch_vas: Baseline itch VAS before treatment \n- day0_hfg: Baseline hair growth factor before treatment" + } \ No newline at end of file diff --git a/defog_data/derm_treatment/derm_treatment.sql b/defog_data/derm_treatment/derm_treatment.sql new file mode 100644 index 0000000..b39430b --- /dev/null +++ b/defog_data/derm_treatment/derm_treatment.sql @@ -0,0 +1,219 @@ +-- doctor dimension table +CREATE TABLE doctors ( + doc_id SERIAL PRIMARY KEY, + first_name VARCHAR(50), + last_name VARCHAR(50), + specialty TEXT, -- possible values: dermatology, immunology, general + year_reg INT, -- year the doctor was registered and obtained license + med_school_name VARCHAR(100), + loc_city VARCHAR(50), + loc_state CHAR(2), + loc_zip VARCHAR(10), + bd_cert_num VARCHAR(20) -- board certification number +); + +-- patient dimension table +CREATE TABLE patients ( + patient_id SERIAL PRIMARY KEY, + first_name VARCHAR(50), + last_name VARCHAR(50), + date_of_birth DATE, + gender VARCHAR(10), + email VARCHAR(100), + phone VARCHAR(20), + addr_street VARCHAR(100), + addr_city VARCHAR(50), + addr_state CHAR(2), + addr_zip VARCHAR(10), + ins_type TEXT, -- possible values: private, medicare, medicaid, uninsured + ins_policy_num VARCHAR(20), + height_cm FLOAT, + weight_kg FLOAT +); + +-- drug dimension table +CREATE TABLE drugs ( + drug_id SERIAL PRIMARY KEY, + drug_name VARCHAR(100), + manufacturer VARCHAR(100), + drug_type TEXT, -- possible values: biologic, small molecule, topical + moa TEXT, -- mechanism of action + fda_appr_dt DATE, -- FDA approval date. NULL if drug is still under trial. + admin_route TEXT, -- possible values: oral, injection, topical + dos_amt DECIMAL(10,2), + dos_unit VARCHAR(20), + dos_freq_hrs INT, + ndc VARCHAR(20) -- National Drug Code +); + +-- diagnosis dimension table +CREATE TABLE diagnoses ( + diag_id SERIAL PRIMARY KEY, + diag_code VARCHAR(10), + diag_name VARCHAR(100), + diag_desc TEXT +); + +-- treatment fact table +CREATE TABLE treatments ( + treatment_id SERIAL PRIMARY KEY, + patient_id INT REFERENCES patients(patient_id), + doc_id INT REFERENCES doctors(doc_id), + drug_id INT REFERENCES drugs(drug_id), + diag_id INT REFERENCES diagnoses(diag_id), + start_dt DATE, + end_dt DATE, -- NULL if treatment is ongoing + is_placebo BOOLEAN, + tot_drug_amt DECIMAL(10,2), + drug_unit TEXT -- possible values: mg, ml, g +); + +-- outcome fact table +CREATE TABLE outcomes ( + outcome_id SERIAL PRIMARY KEY, + treatment_id INT REFERENCES treatments(treatment_id), + assess_dt DATE, + day7_lesion_cnt INT, -- lesion counts on day 7. + day30_lesion_cnt INT, + day100_lesion_cnt INT, + day7_pasi_score DECIMAL(4,1), -- PASI score range 0-72 + day30_pasi_score DECIMAL(4,1), + day100_pasi_score DECIMAL(4,1), + day7_tewl DECIMAL(5,2), -- in g/m^2/h + day30_tewl DECIMAL(5,2), + day100_tewl DECIMAL(5,2), + day7_itch_vas INT, -- visual analog scale 0-100 + day30_itch_vas INT, + day100_itch_vas INT, + day7_hfg DECIMAL(4,1), -- hair growth factor range 0-5 + day30_hfg DECIMAL(4,1), + day100_hfg DECIMAL(4,1) +); + +CREATE TABLE adverse_events ( + id SERIAL PRIMARY KEY, -- 1 row per adverse event per treatment_id + treatment_id INT REFERENCES treatments(treatment_id), + reported_dt DATE, + description TEXT +); + +CREATE TABLE concomitant_meds ( + id SERIAL PRIMARY KEY, -- 1 row per med per treatment_id + treatment_id INT REFERENCES treatments(treatment_id), + med_name VARCHAR(100), + start_dt TEXT, -- YYYY-MM-DD + end_dt TEXT, -- YYYY-MM-DD NULL if still taking + dose_amt DECIMAL(10,2), + dose_unit TEXT, -- possible values: mg, ml, g + freq_hrs INT +); + +-- insert into dimension tables first + +INSERT INTO doctors (first_name, last_name, specialty, year_reg, med_school_name, loc_city, loc_state, loc_zip, bd_cert_num) +VALUES +('John', 'Doe', 'dermatology', 2005, 'Johns Hopkins University', 'Baltimore', 'MD', '21201', 'ABC123'), +('Jane', 'Smith', 'immunology', 2010, 'Harvard Medical School', 'Boston', 'MA', '02115', 'XYZ789'), +('David', 'Johnson', 'general', 1998, 'University of Pennsylvania', 'Philadelphia', 'PA', '19104', 'DEF456'), +('Emily', 'Brown', 'dermatology', 2015, 'Stanford University', 'Palo Alto', 'CA', '94304', 'GHI012'), +('Michael', 'Davis', 'immunology', 2008, 'Duke University', 'Durham', 'NC', '27708', 'JKL345'), +('Sarah', 'Wilson', 'general', 2003, 'University of California, San Francisco', 'San Francisco', 'CA', '94143', 'MNO678'), +('Robert', 'Taylor', 'dermatology', 2012, 'Yale University', 'New Haven', 'CT', '06510', 'PQR901'); + +INSERT INTO patients (first_name, last_name, date_of_birth, gender, email, phone, addr_street, addr_city, addr_state, addr_zip, ins_type, ins_policy_num, height_cm, weight_kg) +VALUES +('Alice', 'Johnson', '1985-03-15', 'Female', 'alice@email.com', '555-123-4567', '123 Main St', 'Anytown', 'CA', '12345', 'private', 'ABC123456', 165, 60), +('Bob', 'Smith', '1978-11-23', 'Male', 'bob@email.com', '555-987-6543', '456 Oak Ave', 'Somecity', 'NY', '54321', 'medicare', 'XYZ789012', 180, 85), +('Carol', 'Davis', '1992-07-08', 'Female', 'carol@email.com', '555-246-8135', '789 Elm Rd', 'Anothercity', 'TX', '67890', 'private', 'DEF345678', 158, 52), +('David', 'Wilson', '1965-09-30', 'Male', 'david@email.com', '555-369-2580', '321 Pine Ln', 'Somewhere', 'FL', '13579', 'medicaid', 'GHI901234', 175, 78), +('Eve', 'Brown', '2000-01-01', 'Female', 'eve@email.com', '555-147-2589', '654 Cedar St', 'Nowhere', 'WA', '97531', 'uninsured', NULL, 160, 55), +('Frank', 'Taylor', '1988-05-12', 'Male', 'frank@email.com', '555-753-9514', '987 Birch Dr', 'Anyplace', 'CO', '24680', 'private', 'JKL567890', 183, 90), +('Grace', 'Anderson', '1975-12-25', 'Female', 'grace@email.com', '555-951-7532', '159 Maple Rd', 'Somewhere', 'OH', '86420', 'medicare', 'MNO246810', 170, 68); + +INSERT INTO drugs (drug_name, manufacturer, drug_type, moa, fda_appr_dt, admin_route, dos_amt, dos_unit, dos_freq_hrs, ndc) +VALUES +('Drugalin', 'Pharma Inc', 'biologic', 'TNF-alpha inhibitor', '2010-01-15', 'injection', 40, 'mg', 336, '12345-678-90'), +('Medicol', 'Acme Pharma', 'small molecule', 'IL-17A inhibitor', '2015-06-30', 'oral', 30, 'mg', 24, '54321-012-34'), +('Topizol', 'BioMed Ltd', 'topical', 'PDE4 inhibitor', '2018-11-01', 'topical', 15, 'g', 12, '98765-432-10'), +('Biologic-X', 'Innova Biologics', 'biologic', 'IL-23 inhibitor', NULL, 'injection', 100, 'mg', 672, '13579-246-80'), +('Smallazine', 'Chem Co', 'small molecule', 'JAK inhibitor', '2020-03-15', 'oral', 5, 'mg', 24, '97531-864-20'), +('Topicort', 'Derma Rx', 'topical', 'Corticosteroid', '2005-09-30', 'topical', 30, 'g', 12, '24680-135-79'); + +INSERT INTO diagnoses (diag_code, diag_name, diag_desc) +VALUES +('L40.0', 'Psoriasis vulgaris', 'Plaque psoriasis, the most common form'), +('L40.1', 'Generalized pustular psoriasis', 'Widespread pustules on top of red skin'), +('L40.4', 'Guttate psoriasis', 'Small, teardrop-shaped lesions'), +('L40.8', 'Other psoriasis', 'Includes flexural, erythrodermic, and other rare types'), +('L40.9', 'Psoriasis, unspecified', 'Psoriasis not further specified'), +('L40.50', 'Arthropathic psoriasis, unspecified', 'Psoriatic arthritis, unspecified'), +('L40.51', 'Distal interphalangeal psoriatic arthropathy', 'Psoriatic arthritis mainly affecting the ends of fingers and toes'), +('L40.52', 'Psoriatic arthritis mutilans', 'Severe, deforming psoriatic arthritis'), +('L40.53', 'Psoriatic spondylitis', 'Psoriatic arthritis of the spine'), +('L40.59', 'Other psoriatic arthropathy', 'Other specified types of psoriatic arthritis'); + +-- insert into fact tables +INSERT INTO treatments (patient_id, doc_id, drug_id, diag_id, start_dt, end_dt, is_placebo, tot_drug_amt, drug_unit) +VALUES +(1, 1, 1, 1, '2022-01-01', '2022-06-30', false, 240, 'mg'), +(2, 2, 2, 2, '2022-02-15', '2022-08-14', false, 180, 'mg'), +(3, 3, 3, 3, '2022-03-10', '2022-09-09', false, 360, 'g'), +(4, 4, 4, 4, '2022-04-01', NULL, false, 200, 'mg'), +(5, 5, 5, 5, '2022-05-01', '2022-10-31', false, 180, 'mg'), +(6, 6, 6, 6, '2022-06-15', '2022-12-14', false, 720, 'g'), +(1, 7, 1, 7, '2022-07-01', '2022-12-31', false, 240, 'mg'), +(2, 1, 2, 8, '2022-08-01', '2023-01-31', false, 180, 'mg'), +(3, 2, 3, 9, '2022-09-01', '2023-02-28', false, 360, 'g'), +(4, 3, 4, 10, '2022-10-01', NULL, true, 0, NULL), +(5, 4, 5, 1, '2022-11-01', '2023-04-30', false, 180, 'mg'), +(6, 5, 6, 2, '2022-12-01', '2023-05-31', false, 720, 'g'), +(7, 6, 1, 3, '2023-01-01', '2023-06-30', false, 240, 'mg'), +(1, 7, 2, 4, '2023-02-01', '2023-07-31', false, 180, 'mg'), +(2, 1, 3, 5, '2023-03-01', '2023-08-31', false, 360, 'g'); + +INSERT INTO outcomes (treatment_id, assess_dt, day7_lesion_cnt, day30_lesion_cnt, day100_lesion_cnt, day7_pasi_score, day30_pasi_score, day100_pasi_score, day7_tewl, day30_tewl, day100_tewl, day7_itch_vas, day30_itch_vas, day100_itch_vas, day7_hfg, day30_hfg, day100_hfg) +VALUES +(1, '2022-01-08', 20, 15, 5, 12.5, 8.2, 2.1, 18.2, 15.6, 12.1, 60, 40, 20, 1.5, 2.5, 4.0), +(2, '2022-02-22', 25, 18, 8, 15.0, 10.1, 3.5, 20.1, 17.2, 13.5, 70, 50, 30, 1.0, 2.0, 3.5), +(3, '2022-03-17', 18, 12, 3, 10.8, 6.4, 1.2, 16.5, 14.0, 10.8, 55, 35, 15, 2.0, 3.0, 4.5), +(4, '2022-04-08', 30, 25, 12, 18.2, 13.9, 5.8, 22.4, 19.1, 15.2, 80, 60, 40, 0.5, 1.5, 3.0), +(5, '2022-05-08', 22, 16, 6, 13.1, 8.7, 2.6, 19.0, 16.3, 12.7, 65, 45, 25, 1.2, 2.2, 3.8), +(6, '2022-06-22', 28, 21, 10, 16.7, 11.5, 4.3, 21.3, 18.1, 14.3, 75, 55, 35, 0.8, 1.8, 3.3), +(7, '2022-07-08', 19, 13, 4, 11.2, 6.9, 1.5, 17.1, 14.5, 11.2, 58, 38, 18, 1.8, 2.8, 4.3), +(8, '2022-08-08', 26, 19, 9, 15.6, 10.6, 3.8, 20.7, 17.6, 13.9, 72, 52, 32, 0.7, 1.7, 3.2), +(9, '2022-09-08', 21, 15, 5, 12.3, 8.0, 2.0, 18.6, 15.9, 12.4, 62, 42, 22, 1.4, 2.4, 3.9), +(10, '2022-10-08', 32, 30, 25, 19.5, 17.8, 14.1, 23.2, 21.4, 18.7, 85, 80, 70, 0.2, 0.4, 0.8), +(11, '2022-11-08', 23, 17, 7, 13.7, 9.2, 2.9, 19.5, 16.8, 13.1, 68, 48, 28, 1.1, 2.1, 3.6), +(12, '2022-12-08', 29, 23, 11, 17.4, 12.3, 4.9, 21.8, 18.7, 14.8, 78, 58, 38, 0.6, 1.6, 3.1), +(13, '2023-01-08', 18, 12, 3, 10.5, 6.1, 1.0, 16.9, 14.3, 11.0, 56, 36, 16, 1.9, 2.9, 4.4), +(14, '2023-02-08', 27, 20, 10, 16.2, 11.1, 4.1, 21.0, 17.9, 14.1, 74, 54, 34, 0.5, 1.5, 3.0), +(15, '2023-03-08', 20, 14, 4, 11.8, 7.3, 1.7, 17.8, 15.2, 11.8, 60, 40, 20, 1.6, 2.6, 4.1); + +INSERT INTO adverse_events (treatment_id, reported_dt, description) +VALUES +(1, '2022-01-15', 'Mild injection site reaction'), +(2, '2022-02-28', 'Headache, nausea'), +(4, '2022-04-10', 'Severe allergic reaction, hospitalization required'), +(5, '2022-05-20', 'Upper respiratory infection'), +(7, '2022-07-22', 'Mild injection site reaction'), +(9, '2022-09-18', 'Diarrhea'), +(11, '2022-11-30', 'Elevated liver enzymes'), +(14, '2023-02-25', 'Mild skin rash'); + +INSERT INTO concomitant_meds (treatment_id, med_name, start_dt, end_dt, dose_amt, dose_unit, freq_hrs) +VALUES +(1, 'Acetaminophen', '2022-01-01', '2022-01-07', 500, 'mg', 6), +(1, 'Ibuprofen', '2022-01-08', '2022-01-14', 200, 'mg', 8), +(2, 'Loratadine', '2022-02-15', '2022-03-15', 10, 'mg', 24), +(3, 'Multivitamin', '2022-03-10', NULL, 1, 'tablet', 24), +(4, 'Epinephrine', '2022-04-10', '2022-04-10', 0.3, 'mg', NULL), +(4, 'Diphenhydramine', '2022-04-10', '2022-04-17', 50, 'mg', 6), +(5, 'Amoxicillin', '2022-05-20', '2022-05-30', 500, 'mg', 8), +(6, 'Calcium supplement', '2022-06-15', NULL, 600, 'mg', 24), +(7, 'Acetaminophen', '2022-07-15', '2022-07-21', 500, 'mg', 6), +(8, 'Cetirizine', '2022-08-01', '2022-08-14', 10, 'mg', 24), +(9, 'Loperamide', '2022-09-18', '2022-09-20', 4, 'mg', 6), +(11, 'Ursodiol', '2022-11-30', '2022-12-30', 300, 'mg', 8), +(12, 'Vitamin D', '2022-12-01', NULL, 1000, 'IU', 24), +(13, 'Acetaminophen', '2023-01-08', '2023-01-14', 500, 'mg', 6), +(14, 'Hydrocortisone cream', '2023-02-25', '2023-03-07', 10, 'g', 12); diff --git a/defog_data/metadata.py b/defog_data/metadata.py index 2b8d237..3427953 100644 --- a/defog_data/metadata.py +++ b/defog_data/metadata.py @@ -14,6 +14,7 @@ def get_db(db_name): return db_schema +# sql-eval datasets academic = get_db("academic") advising = get_db("advising") atis = get_db("atis") @@ -22,7 +23,12 @@ def get_db(db_name): scholar = get_db("scholar") yelp = get_db("yelp") +# sql-eval-instruct datasets +car_dealership = get_db("car_dealership") +derm_treatment = get_db("derm_treatment") + dbs = { + # sql-eval datasets "academic": academic, "advising": advising, "atis": atis, @@ -30,4 +36,7 @@ def get_db(db_name): "restaurants": restaurants, "scholar": scholar, "yelp": yelp, + # sql-eval-instruct datasets + "car_dealership": car_dealership, + "derm_treatment": derm_treatment, } diff --git a/setup.sh b/setup.sh index 60df779..2929f96 100755 --- a/setup.sh +++ b/setup.sh @@ -1,9 +1,9 @@ set -e # get arguments -# if $@ is empty, set it to academic advising atis geography restaurants scholar yelp +# if $@ is empty, set it to all of our db's if [ -z "$@" ]; then - set -- academic advising atis geography restaurants scholar yelp + set -- academic advising atis car_dealership derm_treatment geography restaurants scholar yelp fi # $@ is all arguments passed to the script echo "Databases to init: $@" diff --git a/tests.py b/tests.py index bf85f9a..61cca25 100644 --- a/tests.py +++ b/tests.py @@ -14,6 +14,8 @@ def test_load_all_in_diff_dir(self): "academic", "advising", "atis", + "car_dealership", + "derm_treatment", "geography", "restaurants", "scholar", @@ -27,7 +29,7 @@ def test_load_all_in_diff_dir(self): os.chdir(test_dir) def dbs_exist(self): - assert len(dbs) == 7 + assert len(dbs) == 9 # check that all the tables exist in each db def test_academic(self): @@ -104,6 +106,35 @@ def test_atis(self): "equipment_sequence", ] self.assertEqual(list(db_schema.keys()), expected_tables) + + def test_car_dealership(self): + db_name = "car_dealership" + db_schema = get_db(db_name)["table_metadata"] + expected_tables = [ + "cars", + "salespersons", + "customers", + "sales", + "inventory_snapshots", + "payments_received", + "payments_made", + ] + self.assertEqual(list(db_schema.keys()), expected_tables) + + def test_derm_treatment(self): + db_name = "derm_treatment" + db_schema = get_db(db_name)["table_metadata"] + expected_tables = [ + "doctors", + "patients", + "drugs", + "diagnoses", + "treatments", + "outcomes", + "adverse_events", + "concomitant_meds", + ] + self.assertEqual(list(db_schema.keys()), expected_tables) def test_geography(self): db_name = "geography"