From b30d32df2c53970721c3508b55eca4071e447821 Mon Sep 17 00:00:00 2001 From: joocer Date: Mon, 25 Mar 2024 22:35:51 -0400 Subject: [PATCH] clashing --- requirements.txt | 3 +- src/internals/helpers/paths.py | 4 - src/internals/helpers/search.py | 27 - src/internals/models/search.py | 5 +- src/main.py | 31 +- src/routers/download.py | 73 - src/routers/search.py | 55 +- src/routers/stores.py | 43 - src/routers/users.py | 7 +- src/static/dist/js/colors.js | 8 +- src/static/dist/js/notebook-cell-chart.js | 5 +- src/static/dist/js/notebook-cell-discover.js | 35 + src/static/dist/js/notebook-cell-parameter.js | 4 +- .../dist/js/notebook-cell-sql-execute.js | 6 +- .../plugins/arrow@8.0.0/Arrow.es2015.min.js | 1209 +++++++++++++++++ .../arrow@8.0.0/Arrow.es2015.min.js.map | 1 + src/static/plugins/arrow@8.0.0/table.js | 356 +++++ src/static/plugins/arrow@8.0.0/table.js.map | 1 + tests/data/convert.py | 26 + tests/test_adapter_google_secrets_manager.py | 3 +- tests/test_adapter_http.py | 6 +- tests/test_adapter_http_download.py | 10 +- 22 files changed, 1703 insertions(+), 215 deletions(-) delete mode 100644 src/internals/helpers/search.py delete mode 100644 src/routers/download.py delete mode 100644 src/routers/stores.py create mode 100644 src/static/dist/js/notebook-cell-discover.js create mode 100644 src/static/plugins/arrow@8.0.0/Arrow.es2015.min.js create mode 100644 src/static/plugins/arrow@8.0.0/Arrow.es2015.min.js.map create mode 100644 src/static/plugins/arrow@8.0.0/table.js create mode 100644 src/static/plugins/arrow@8.0.0/table.js.map create mode 100644 tests/data/convert.py diff --git a/requirements.txt b/requirements.txt index e130a4a..d5ba4b9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,7 @@ uvicorn fastapi pydantic -mabelbeta orjson -ujson requests google-cloud-storage google-cloud-tasks @@ -14,3 +12,4 @@ jinja2 pyarrow pyjwt pymemcache +opteryx \ No newline at end of file diff --git a/src/internals/helpers/paths.py b/src/internals/helpers/paths.py index 10ee250..1801430 100644 --- a/src/internals/helpers/paths.py +++ b/src/internals/helpers/paths.py @@ -5,14 +5,10 @@ def find_path(path): from here. Otherwise do a broader search which is a little slower. """ import glob - from mabel.logging import get_logger - - logger = get_logger() paths = glob.glob(f"/app/src/**/{path}", recursive=True) if len(paths) == 0: paths = glob.glob(f"**/{path}", recursive=True) for i in paths: if i.endswith(path): - logger.info(f"Found `{path}` at `{i}`") return i diff --git a/src/internals/helpers/search.py b/src/internals/helpers/search.py deleted file mode 100644 index bcb0101..0000000 --- a/src/internals/helpers/search.py +++ /dev/null @@ -1,27 +0,0 @@ -import os -from mabel.data.readers import SqlReader -from internals.models import SearchModel - - -def do_search(search: SearchModel, auth_token=None): - - raw_path = False - inner_reader = None - - # K_SERVICE is a K8s flag, without it we're probably running locally - if os.environ.get("K_SERVICE") is None: - from mabel.adapters.disk import DiskReader - - raw_path = True - inner_reader = DiskReader - - sql_reader = SqlReader( - start_date=search.start_date, - end_date=search.end_date, - sql_statement=search.query, - #multiprocess=True, - raw_path=raw_path, - inner_reader=inner_reader, - project="dcsgva-data-prd", - ) - return sql_reader diff --git a/src/internals/models/search.py b/src/internals/models/search.py index c1c4eca..43d3d1e 100644 --- a/src/internals/models/search.py +++ b/src/internals/models/search.py @@ -1,6 +1,7 @@ -from pydantic import BaseModel -from typing import Optional, Union import datetime +from typing import Optional, Union + +from pydantic import BaseModel class SearchModel(BaseModel): diff --git a/src/main.py b/src/main.py index 97c11e8..a29171c 100644 --- a/src/main.py +++ b/src/main.py @@ -1,29 +1,21 @@ import os import sys -sys.path.insert(0, os.path.join(sys.path[0], "../../mabel_/")) +sys.path.insert(0, os.path.join(sys.path[0], "../../opteryx/")) +#sys.path.insert(0, os.path.join(sys.path[0], "../../mabel@0.6/")) import uvicorn -from fastapi.responses import HTMLResponse from fastapi import FastAPI, Request -from mabel.logging import get_logger, set_log_name -from mabel.utils.common import build_context +from fastapi.responses import HTMLResponse +#from mabel.logging import get_logger, set_log_name from internals.helpers.paths import find_path +from routers import search, users -from routers import search, download, users, stores - -context = build_context() -set_log_name("QUERY") -logger = get_logger() -logger.setLevel(5) - - -RESULT_BATCH = int(context.get("maximum_return", 2000)) or 5000 +RESULT_BATCH = 5000 from fastapi.staticfiles import StaticFiles from fastapi.templating import Jinja2Templates - # set up API interface os.environ["TZ"] = "UTC" version = os.getenv("SHORT_SHA", "local") @@ -43,11 +35,18 @@ def home(request: Request): """ return templates.TemplateResponse("index.html", {"request": request}) +@application.get("/arrow", response_class=HTMLResponse) +def home(request: Request): + """ + This is a single page app, we deliver a single HTML page and interact + with the backend using APIs. + """ + return templates.TemplateResponse("wow.html", {"request": request}) + application.include_router(search.router) -application.include_router(download.router) application.include_router(users.router) -application.include_router(stores.router) + # tell the server to start if __name__ == "__main__": diff --git a/src/routers/download.py b/src/routers/download.py deleted file mode 100644 index e43a6e5..0000000 --- a/src/routers/download.py +++ /dev/null @@ -1,73 +0,0 @@ -from fastapi import APIRouter, HTTPException -from fastapi.responses import StreamingResponse -from mabel import DictSet -from mabel.logging import get_logger, set_log_name -from mabel.errors import DataNotFoundError -from internals.models import SearchModel -from internals.helpers.search import do_search -from internals.drivers.csv_writer import csv_set - - -router = APIRouter() -set_log_name("QUERY") -logger = get_logger() -logger.setLevel(5) - - -def join_lists(list_a, list_b): - yield from list_a - yield from list_b - - -@router.post("/download/") -def download_results(request: SearchModel): - """ - Bulk download - not severely limited like the search which targets the UI - """ - try: - request = SearchModel( - start_date=request.start_date, - end_date=request.end_date, - query=request.query, - ) - results = do_search(request) - - # this allows us to get the columns from the first 100 records, - # if the data is more hetreogenous than that, it's not going to - # play well with being in a table - - temp_head = results.take(100).collect() - - head_results = DictSet(temp_head) - columns = head_results.keys() - - # add the records back - back_together = join_lists(temp_head, results) - - response = StreamingResponse( - csv_set(back_together, columns), media_type="text/csv" - ) - response.headers["Content-Disposition"] = "attachment; filename=export.csv" - - return response - - except HTTPException: - raise - except DataNotFoundError as err: - # NOT FOUND - raise HTTPException(status_code=404, detail="Dataset not Found") - except Exception as err: - import traceback - - trace = traceback.format_exc() - error_message = {"error": type(err).__name__, "detail": str(err)} - logger.error(f"Error {type(err).__name__} - {err}:\n{trace}") - # I'M A TEAPOT - raise HTTPException(status_code=418, detail=error_message) - except SystemExit as err: - import traceback - - trace = traceback.format_exc() - logger.alert(f"Fatal Error {type(err).__name__} - {err}:\n{trace}") - # ERROR - raise HTTPException(status_code=500, detail=err) diff --git a/src/routers/search.py b/src/routers/search.py index f3b2f47..7361179 100644 --- a/src/routers/search.py +++ b/src/routers/search.py @@ -1,37 +1,39 @@ -import orjson import datetime -from fastapi import APIRouter, HTTPException, Response, Request -from mabel.logging import get_logger, set_log_name -from mabel.errors import DataNotFoundError + +import orjson +from fastapi import APIRouter, HTTPException, Request, Response from internals.models import SearchModel -from internals.helpers.search import do_search +class DataNotFoundError(Exception): + pass + router = APIRouter() -set_log_name("QUERY") -logger = get_logger() -logger.setLevel(5) RESULT_BATCH = 2000 ########################################################################## + def fix_dict(obj: dict) -> dict: def fix_fields(dt): - if isinstance(dt, (datetime.date, datetime.datetime)): + dt_type = type(dt) + if dt_type in (int, float, bool, str): + return dt + if dt_type in (datetime.date, datetime.datetime): return dt.isoformat() - if isinstance(dt, bytes): + if dt_type == dict: + return str(fix_dict(dt)) + if dt_type == bytes: return dt.decode("UTF8") - if hasattr(dt, "mini"): - return dt.mini.decode("UTF8") - if isinstance(dt, dict): - return {k: fix_fields(v) for k, v in dt.items()} return str(dt) if not isinstance(obj, dict): return obj # type:ignore - return {k: fix_fields(v) for k, v in obj.items()} - + + for key in obj.keys(): + obj[key] = fix_fields(obj[key]) + return obj ######################################################################## @@ -45,11 +47,8 @@ def serialize_response(response, max_records): for i, record in enumerate(response): if i > max_records: break - if i < max_records and hasattr(record, "mini"): - # we have a saved minified json string - yield record.mini + b"\n" elif i < max_records: - yield orjson.dumps(fix_dict(record)) + b"\n" + yield record.as_json + b"\n" if i == -1: # UNABLE TO SATISFY RANGE raise HTTPException(status_code=416) @@ -60,14 +59,18 @@ def serialize_response(response, max_records): @router.post("/v1/search") def search(search: SearchModel, request: Request): try: - from internals.helpers.identity import get_jwt, get_identity + from internals.helpers.identity import get_identity, get_jwt encoded_jwt = get_jwt(request) - logger.info({**search.dict(), "user": get_identity(encoded_jwt)}) - results = do_search(search, encoded_jwt) + import opteryx + conn = opteryx.connect( + partition_scheme=[] + ) + cur = conn.cursor() + cur.execute(search.query) - body = b"\n".join(serialize_response(results, RESULT_BATCH)) + body = b"\n".join(serialize_response(cur.fetchmany(100), RESULT_BATCH)) response = Response( body, media_type="application/jsonlines", @@ -85,13 +88,13 @@ def search(search: SearchModel, request: Request): trace = traceback.format_exc() error_message = {"error": type(err).__name__, "detail": str(err)} - logger.error(f"Error {type(err).__name__} - {err}:\n{trace}") + print(f"Error {type(err).__name__} - {err}:\n{trace}") # I'M A TEAPOT raise HTTPException(status_code=418, detail=error_message) except SystemExit as err: import traceback trace = traceback.format_exc() - logger.alert(f"Fatal Error {type(err).__name__} - {err}:\n{trace}") + print(f"Fatal Error {type(err).__name__} - {err}:\n{trace}") # ERROR raise HTTPException(status_code=500, detail=err) diff --git a/src/routers/stores.py b/src/routers/stores.py deleted file mode 100644 index d73ab12..0000000 --- a/src/routers/stores.py +++ /dev/null @@ -1,43 +0,0 @@ -import os -from typing import Optional -from fastapi import APIRouter, HTTPException, Response -from fastapi.responses import UJSONResponse -from mabel.logging import get_logger, set_log_name -from mabel.errors import DataNotFoundError -from mabel.utils.common import build_context - - -context = build_context() -router = APIRouter() -set_log_name("QUERY") -logger = get_logger() -logger.setLevel(5) - - -@router.get("/v1/datastores/list", response_class=UJSONResponse) -def handle_start_request(): - try: - project = os.environ.get("PROJECT_NAME", "LOCAL") - environments = context["environments"][project] - return {"stores": environments["datastores"]} - - except HTTPException: - raise - except DataNotFoundError as err: - # NOT FOUND - raise HTTPException(status_code=404, detail="Dataset not Found") - except Exception as err: - import traceback - - trace = traceback.format_exc() - error_message = {"error": type(err).__name__, "detail": str(err)} - logger.error(f"Error {type(err).__name__} - {err}:\n{trace}") - # I'M A TEAPOT - raise HTTPException(status_code=418, detail=error_message) - except SystemExit as err: - import traceback - - trace = traceback.format_exc() - logger.alert(f"Fatal Error {type(err).__name__} - {err}:\n{trace}") - # ERROR - raise HTTPException(status_code=500, detail=err) diff --git a/src/routers/users.py b/src/routers/users.py index 1184031..52004a2 100644 --- a/src/routers/users.py +++ b/src/routers/users.py @@ -1,12 +1,7 @@ from fastapi import APIRouter, Request from fastapi.responses import UJSONResponse -from mabel.logging import get_logger, set_log_name - router = APIRouter() -set_log_name("QUERY") -logger = get_logger() -logger.setLevel(5) @router.get("/user", response_class=UJSONResponse) @@ -14,7 +9,7 @@ def get_user_informations(request: Request): """ Get User information """ - from internals.helpers.identity import get_jwt, get_identity + from internals.helpers.identity import get_identity, get_jwt encoded_jwt = get_jwt(request) return {"identity": get_identity(encoded_jwt), "saved_queries": []} diff --git a/src/static/dist/js/colors.js b/src/static/dist/js/colors.js index 8ec6588..3ddcecd 100644 --- a/src/static/dist/js/colors.js +++ b/src/static/dist/js/colors.js @@ -35,8 +35,14 @@ MACARONS = [ '#59678c', '#c9ab00', '#7eb00a', '#6f5553', '#c14089' ] +DRACULA = [ + "#44475A", "#FF6E6E", "#282A36", "#21222C", "#A4FFFF", + "#BD93F9", "#8BE9FD", "#FF79C6", "#50FA7B", "#69FF94", + "#F1FA8C", "#6272A4", "#D6ACFF", "#FF92DF" +] + -DEFAULT_COLORS = SPROUT +DEFAULT_COLORS = DRACULA function interpolateColor(c0, c1, f) { c0 = c0.match(/.{1,2}/g).map((oct) => parseInt(oct, 16) * (1 - f)) diff --git a/src/static/dist/js/notebook-cell-chart.js b/src/static/dist/js/notebook-cell-chart.js index 2ad781c..5943200 100644 --- a/src/static/dist/js/notebook-cell-chart.js +++ b/src/static/dist/js/notebook-cell-chart.js @@ -16,9 +16,7 @@ function createNewChartCell(id, cellBlock) { Cell_1 @@ -72,6 +70,7 @@ function createNewChartCell(id, cellBlock) {
  • Sprout
    ${createExampleGradient(SPROUT)}
  • Plasma
    ${createExampleGradient(PLASMA)}
  • Viridis
    ${createExampleGradient(VIRIDIS)}
  • +
  • Dracula
    ${createExampleGradient(DRACULA)}
  • diff --git a/src/static/dist/js/notebook-cell-discover.js b/src/static/dist/js/notebook-cell-discover.js new file mode 100644 index 0000000..0058d88 --- /dev/null +++ b/src/static/dist/js/notebook-cell-discover.js @@ -0,0 +1,35 @@ +function createNewDiscovererCell(id, cellBlock) { + let cell_icon = ''; + let editor_class = 'notebook-cell-editor-parameter'; + + cellBlock.insertAdjacentHTML('beforeend', createCell(id, cell_icon, editor_class)) + + // add the cell specific options to the control bar + document.getElementById(`controls-${id}`).insertAdjacentHTML('beforebegin', ` +
    + + +
    + `); + + // add the actions for the type selectors + document.getElementById(`parameter-text-${id}`).addEventListener("click", function() { set_as_type_text(id); }); + document.getElementById(`parameter-number-${id}`).addEventListener("click", function() { set_as_type_number(id); }); + document.getElementById(`parameter-date-${id}`).addEventListener("click", function() { set_as_type_date(id); }); + + // default to a string + set_as_type_text(id); + +} + + +// add the new Parameter Cell option +const newParameterCellOption = `
  • Parameter cell
  • ` +document.getElementById("notebook-new-cell-selector").insertAdjacentHTML('beforeend', newParameterCellOption) +document.getElementById("new-parameter-cell").addEventListener("click", function() { createNewCell("parameter") }, false) \ No newline at end of file diff --git a/src/static/dist/js/notebook-cell-parameter.js b/src/static/dist/js/notebook-cell-parameter.js index b9560ec..c19a1f6 100644 --- a/src/static/dist/js/notebook-cell-parameter.js +++ b/src/static/dist/js/notebook-cell-parameter.js @@ -1,5 +1,5 @@ function createNewParameterCell(id, cellBlock) { - let cell_icon = ''; + let cell_icon = ''; let editor_class = 'notebook-cell-editor-parameter'; cellBlock.insertAdjacentHTML('beforeend', createCell(id, cell_icon, editor_class)) @@ -8,7 +8,7 @@ function createNewParameterCell(id, cellBlock) { document.getElementById(`controls-${id}`).insertAdjacentHTML('beforebegin', `