From 557efe91a3c0604e4cf0595746390a223d477e15 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Fri, 25 Aug 2023 20:06:55 -0400 Subject: [PATCH 001/103] Initial commit of new directory. --- prism/commands/buckets_commands.py | 217 ++++++++++++++++ prism/commands/dataChanges_commands.py | 80 ++++++ prism/commands/fileContainers_commands.py | 42 +++ prism/commands/raas_commands.py | 14 + prism/commands/tables_commands.py | 300 ++++++++++++++++++++++ prism/commands/util.py | 57 ++++ prism/commands/wql_commands.py | 62 +++++ 7 files changed, 772 insertions(+) create mode 100644 prism/commands/buckets_commands.py create mode 100644 prism/commands/dataChanges_commands.py create mode 100644 prism/commands/fileContainers_commands.py create mode 100644 prism/commands/raas_commands.py create mode 100644 prism/commands/tables_commands.py create mode 100644 prism/commands/util.py create mode 100644 prism/commands/wql_commands.py diff --git a/prism/commands/buckets_commands.py b/prism/commands/buckets_commands.py new file mode 100644 index 0000000..a0338ac --- /dev/null +++ b/prism/commands/buckets_commands.py @@ -0,0 +1,217 @@ +import click +import uuid +import logging +import gzip +import shutil +import json +import os + +logger = logging.getLogger(__name__) + + +def buckets_generate_impl(): + return "cli_" + uuid.uuid4().hex + + +@click.command("generate", help="Generate a unique bucket name.") +def buckets_generate(): + print(buckets_generate_impl()) + + +@click.command("list", help="View the buckets permitted by the security profile of the current user.") +@click.option("-w", "--wid", + help="The Workday ID of the bucket.") +@click.option("-n", "--table_name", + help="The API name of the table to retrieve (see search option).") +@click.option("-l", "--limit", default=None, + help="The maximum number of object data entries included in the response, default=-1 (all).") +@click.option("-o", "--offset", default=None, + help="The offset to the first object in a collection to include in the response.") +@click.option("-t", "--type", "type_", default="summary", show_default=True, + help="How much information to be returned in response JSON.") +@click.option("-s", "--search", is_flag=True, show_default=True, default=False, + help="Use contains search substring for --table_name or --id.") +@click.argument("bucket_name", required=False) +@click.pass_context +def buckets_list(ctx, wid, table_name, limit, offset, type_, search, bucket_name): + p = ctx.obj["p"] + + buckets = p.buckets_list(wid, bucket_name, limit, offset, type_, table_name, search) + + print(json.dumps(buckets, indent=2)) + + +@click.command("create", help="Create a new bucket with the specified name.") +@click.option("-n", "--table_name", default=None, + help="Name of the table to associate with the bucket.") +@click.option("-w", "--table_wid", default=None, + help="Table ID to associate with the table.") +@click.option("-f", "--file", "file_", required=False, default=None, type=click.Path(exists=True), + help="Schema JSON file for the target table.") +@click.option("-o", "--operation", default="TruncateandInsert", show_default=True, + help="Operation to perform on the table.") +@click.argument("bucket_name") +@click.pass_context +def buckets_create(ctx, table_name, table_wid, file_, operation, bucket_name): + p = ctx.obj["p"] + + bucket = buckets_create_impl(p, bucket_name, table_wid, file_, operation) + + print(bucket) + + +@click.command("upload", help="Upload a CSV or gzip file to the specified bucket.") +@click.option("-n", "--table_name", default=None, + help="Name of the table to associate with the bucket.") +@click.option("-w", "--table_wid", default=None, + help="Table ID to associate with the table.") +@click.option("-s", "--schema_file", default=None, + help="Schema JSON file for the target table.") +@click.option("-o", "--operation", default="TruncateandInsert", show_default=True, + help="Operation to perform on the table.") +@click.option("-g", "--generate", is_flag=True, default=True, + help="Generate a unique bucket name.") +@click.option("-b", "--bucket", help="Bucket name to create.") +@click.option("-c", "--complete", is_flag=True, default=False, + help="Automatically complete bucket and load the data into the table.") +@click.argument("file", nargs=-1, required=True, type=click.Path(exists=True)) +@click.pass_context +def buckets_upload(ctx, table_name, table_wid, schema_file, operation, generate, bucket, complete, file): + p = ctx.obj["p"] + + # We know we have valid file name. Check to see if we have a gzip file or a CSV + # by checking the extension. + + if file is None: + logger.error("An existing file name is required to upload to a bucket.") + return + + source_file = file[0] + target_file = source_file + + if source_file.lower().endswith(".csv"): + # GZIP the file into the same directory with the appropriate extension. + target_file += ".gz" + + with open(source_file, 'rb') as f_in: + with gzip.open(target_file, "wb") as f_out: + shutil.copyfileobj(f_in, f_out) + elif not source_file.lower().endswith(".gz"): + logger.error(f"File {target_file} is not a .gz or .csv file.") + return + + # We think we have a file - we don't test the contents. + # Go ahead and create a new bucket or use an existing. + bucket = buckets_create_impl(p, bucket, table_name, table_wid, schema_file, operation) + + if bucket is None: + logger.error("Invalid bucket for upload operation.") + return + + upload = p.buckets_upload(bucket["id"], target_file) + + if upload is not None and complete: + complete = p.buckets_complete(bucket["id"]) + + +@click.command("complete", help="Complete the specified bucket and load any files in the bucket.") +@click.option("-n", "--bucket_name", + help="Bucket to complete.") +@click.argument("bucket_wid", required=False) +@click.pass_context +def buckets_complete(ctx, bucket_name, bucket_wid): + p = ctx.obj["p"] + + if bucket_wid is None and bucket_name is None: + print("Either a bucket wid or a bucket name must be specified.") + return + + if bucket_wid is not None: + # If the caller passed both a name and WID, then + # use the WID first. + buckets = p.buckets_list(bucket_id=bucket_wid) + else: + # Lookup the bucket by name. + buckets = p.buckets_list(bucket=bucket_name, verbosity="full") + + if buckets["total"] == 0: + logger.error('Bucket not found.') + return + + bucket = buckets["data"][0] + + bucket_state = bucket["state"]["descriptor"] + + if bucket_state != "New": + print(f"Bucket state is \"{bucket_state}\" - only valid state is \"New.\"") + return + + bucket_wid = bucket["id"] + + return p.buckets_complete(bucket_wid) + + +def buckets_create_impl(prism, bucket_name, table_name, table_wid, schema_file, operation): + if bucket_name is not None: + # Let's see if this bucket already exists + buckets = prism.buckets_list(bucket=bucket_name) + + if buckets is not None and buckets["total"] != 0: + logger.warning(f"Bucket {bucket_name} already exists - status: .") + return buckets["data"][0] + else: + # Generate a unique bucket name for this operation. + bucket = buckets_generate_impl() + logger.debug(f"New bucket name: {bucket}") + + # A target table must be named and must exist. + + if table_name is None and table_wid is None: + print("A table name or wid must be specified to create a bucket.") + return None + + if table_name is not None: + tables = prism.tables_list(api_name=table_name, type_="full") + else: + tables = prism.tables_list(wid=table_wid, type_="full") + + if tables["total"] != 1: + print("Table for create bucket not found.") + return + + table = tables["data"][0] + + if schema_file is not None: + schema = prism.table_to_bucket_schema(load_schema(schema_file)) + else: + schema = prism.table_to_bucket_schema(table) + + bucket = prism.buckets_create(bucket, table["id"], schema, operation=operation) + + return bucket + + +def load_schema(filename): + """Load a table schema from a JSON file. + + :param filename: + :return: + """ + if not os.path.isfile(filename): + logger.critical("Schema file not found: {filename}") + return None + + try: + with open(filename) as file: + schema = json.load(file) + + # Check to see if this is a full table definition + # or just a list of fields. + + if type(schema) is list: + schema = {"fields": schema} + except Exception as e: + logger.critical("Invalid schema: %s".format(str(e))) + pass + + return None diff --git a/prism/commands/dataChanges_commands.py b/prism/commands/dataChanges_commands.py new file mode 100644 index 0000000..6190540 --- /dev/null +++ b/prism/commands/dataChanges_commands.py @@ -0,0 +1,80 @@ +import click +import pandas as pd + +@click.command("list", + help="View the data change tasks permitted by the security profile of the current user.") +@click.option("-w", "--wid", + help="The dataChangeID to list.") +@click.option("-a", "--activity_wid", + help="A specific activity associated with the data change task.") +@click.option("-l", "--limit", default=-1, + help="The maximum number of object data entries included in the response, default=-1 (all).") +@click.option("-o", "--offset", default=0, + help="The offset to the first object in a collection to include in the response.") +@click.option("-t", "--type", "type_", default="summary", + help="How much information to be returned in response JSON (default=summary).") +@click.option("-f", "--format", + default="full", + help="Format output as full, summary, schema, or CSV.", + type=click.Choice(['full', 'summary', 'schema', 'csv'], case_sensitive=False)) +@click.option("-s", "--search", is_flag=True, help="Use contains search substring for --name or --id (default=false).") +@click.argument("api_name", required=False) +@click.pass_context +def dataChanges_list(ctx, api_name, wid, activity_wid, limit, offset, type_, format, search): + p = ctx.obj["p"] + o = ctx.obj["o"] + + o.dataChanges_query() + + dataChanges = p.dataChanges_list(api_name, wid, activity_wid, limit, offset, type_, search) + dataChanges["data"] = sorted(dataChanges["data"], key=lambda dct: dct["displayName"].lower()) + + # Handle output + for dct in dataChanges["data"]: + print(dct["displayName"]) + + +@click.command("validate", help="Validate the data change specified by name or ID.") +@click.option("-w", "--wid", help="The dataChangeID to list.") +@click.option("-s", "--search", is_flag=True, help="Use contains search substring for --name or --id (default=false).") +@click.argument("api_name", required=False) +@click.pass_context +def dataChanges_validate(ctx, api_name, wid, search): + p = ctx.obj["p"] + + # See if we have any matching data change tasks. + dataChanges = p.dataChanges_list( + name=api_name, + wid=wid, + search=search, + refresh=True) + + if dataChanges["total"] == 0: + print("No matching data change task(s) found.") + + if len(dataChanges) == 1: + print(p.dataChanges.activities_post(dataChanges["id"])) + + +@click.command("execute", help="This resource executes a data change.") +@click.argument("api_name", required=True) +@click.argument("fileContainerID", required=False) +@click.pass_context +def dataChanges_execute(ctx, api_name, filecontainerid): + p = ctx.obj["p"] + + # See if we have any matching data change tasks. + # See if we have any matching data change tasks. + dataChanges = p.dataChanges_list( + name=api_name, + refresh=True) + + if dataChanges["total"] != 1: + print("Invalid data change task to execute") + return + + dct_id = dataChanges["data"][0]["id"] + + dataChanges = p.dataChanges_validate(dct_id) + + print(p.dataChanges_activities_post(dct_id, filecontainerid)) diff --git a/prism/commands/fileContainers_commands.py b/prism/commands/fileContainers_commands.py new file mode 100644 index 0000000..6684930 --- /dev/null +++ b/prism/commands/fileContainers_commands.py @@ -0,0 +1,42 @@ +import click +from commands import util as u + + +@click.command("create", help="Use this method to create a new fileContainers.") +@click.pass_context +def fileContainers_create(ctx): + p = ctx.obj["p"] + + fileContainer = p.fileContainers_create() + + if fileContainer is not None: + print(fileContainer["id"]) + else: + print("") + + +@click.command("list", help="This resource returns all files for a file container.") +@click.argument("fileContainerID") +@click.pass_context +def filecontainers_list(ctx, filecontainerid): + p = ctx.obj["p"] + + files = p.filecontainers_list(filecontainerid) + + print(files) + + +@click.command("load", help="This resource loads the file into a file container.") +@click.option("-f", "--fileContainerID", default=None, help="File container ID to load the file into.") +@click.argument("file", nargs=-1, type=click.Path(exists=True)) +@click.pass_context +def filecontainers_load(ctx, filecontainerid, file): + p = ctx.obj["p"] + + fid = u.fileContainers_load_impl(p, filecontainerid, file) + + if fid is None: + print("Error loading fileContainer.") + else: + # Return the file container ID to the command line. + print(fid) diff --git a/prism/commands/raas_commands.py b/prism/commands/raas_commands.py new file mode 100644 index 0000000..cc3135e --- /dev/null +++ b/prism/commands/raas_commands.py @@ -0,0 +1,14 @@ +import click + + +@click.command("run", help="View the buckets permitted by the security profile of the current user.") +@click.option("-u", "--user", default=None, help="Output query results as CSV.") +@click.option("-f", "--format", "format_", default=None, help="Output query results as CSV.") +@click.argument("report", required=True) +@click.pass_context +def run(ctx, user, format_, report): + p = ctx.obj["p"] + + report_output = p.raas_run(report, user, format_) + + print(report_output) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py new file mode 100644 index 0000000..25311ab --- /dev/null +++ b/prism/commands/tables_commands.py @@ -0,0 +1,300 @@ +import click +import json +import pandas as pd + +# Lazy instantiation of sources for Instance type columns (if any) for a create operation. +data_sources = None + + +@click.command("list", help="View the tables or datasets permitted by the security profile of the current user.") +@click.option("-w", "--wid", + help="Unique WID for Prism table or dataset.") +@click.option("-l", "--limit", + help="The maximum number of object data entries included in the response, default=all.", + type=int, + default=None) +@click.option("-o", "--offset", + help="The offset to the first object in a collection to include in the response.", + type=int, + default=None) +@click.option("-t", "--type", "type_", + help="How much information to be returned in response JSON.", + type=click.Choice(["summary", "full", "permissions"], case_sensitive=False), + default="summary") +@click.option("-f", "--format", "format_", + default="json", + help="Format output as JSON, summary, schema, or CSV.", + type=click.Choice(['json', 'summary', 'schema', 'csv'], case_sensitive=False)) +@click.option("-s", "--search", + help="Search substring in api name or display name (default=false).", + is_flag=True) +@click.argument("api_name", required=False) +@click.pass_context +def tables_list(ctx, api_name, wid, limit, offset, type_, format_, search): + """tables list TABLENAME + + Prism API TABLENAME of the table to list. + """ + + if type_ in ("summary", "permissions") and format in ("schema", "csv"): + # Summary results cannot generate schema or CSV output. + + print(f"Invalid combination of type {type_} and format {format}.") + return + + p = ctx.obj["p"] + + # Query the tenant... + tables = p.tables_list(api_name, wid, limit, offset, type_, search) + + # The return always has a total tables returned value. + # note: tables_list never fails, it simply returns 0 tables if there is a problem. + if tables["total"] == 0: + return + + # Handle output + if format_ == "json": + # The results could be one or more tables - simply dump the + # returned object. + + print(json.dumps(tables, indent=2)) + elif format_ == "summary": + for table in tables["data"]: + print(f'{table["displayName"]}, Rows: {table["stats"]["rows"]}, Size: {table["stats"]["rows"]}, Refreshed: {table["dateRefreshed"]}') + elif format_ == "csv": + df = pd.json_normalize(tables["data"]) + print(df.to_csv()) + elif format_ == "schema": + # Dump out the fields of the first table in the result in + # a format compatible with a schema used to created or edit + # a table. + table = tables["data"][0] # Only output the first table. + + # Remove the Prism audit columns. + fields = [fld for fld in tables["data"][0]["fields"] if not fld["name"].startswith("WPA_")] + + # Remove tenant specific values - these are not needed + # if the user wants to update a table definition. + + for fld in fields: + if "fieldId" in fld: + if "fieldId" in fld: + del fld["fieldId"] + + if "id" in fld: + del fld["id"] + + print(json.dumps(fields, indent=2)) + else: + table = tables["data"][0] # Only output the first table. + fields = [fld for fld in tables["data"][0]["fields"] if not fld["name"].startswith("WPA_")] + + print(csv_from_fields(fields)) + + +@click.command("create", help="Create a new table with the specified name.") +@click.option("-d", "--displayName", help="Specify a display name - defaults to name") +@click.option("-e", "--enabledForAnalysis", is_flag=True, default=False, help="Enable this table for analytics.") +@click.option("-n", "--sourceName", help="The API name of an existing table to copy.") +@click.option("-w", "--sourceWID", help="The WID of an existing table to copy.") +@click.argument("name", required=True) +@click.argument("file", required=False, type=click.Path(exists=True)) +@click.pass_context +def tables_create(ctx, displayname, enabledforanalysis, sourcename, sourcewid, format_, name, file): + p = ctx.obj["p"] + + if file is not None: + if file.lower().endswith(".json"): + schema = json.loads(file.read()) + + # The JSON file could be a complete table definitions (GET:/tables - full) or just + # the list of fields. If we got a list, then we have a list of fields we + # use to start the schema definition. + + if type(schema) is list: + fields = schema + else: + fields = schema["fields"] + elif file.lower().endswith(".csv"): + fields = fields_from_csv(p, file) + else: + print("Invalid file extension - valid extensions are .json or .csv.") + return + else: + if sourcename is None and sourcewid is None: + print("No schema provided and a table to copy (--sourceName or --sourceWID) not specified.") + return + + if sourcewid is not None: + tables = p.tables_list(wid=sourcewid, type_="full") # Exact match on WID - and get the fields + else: + tables = p.tables_list(name=sourcename, type_="full") # Exact match on API Name + + if tables["total"] == 0: + print("Invalid --sourceName or --sourceWID : table not found.") + return + + fields = tables["data"][0]["fields"] + + fields[:] = [fld for fld in fields if "WPA" not in fld["name"]] + + # Initialize a new schema with just the fields. + schema = {"fields": fields} + + # Set the particulars for this table operation. + + schema["enableForAnalysis"] = enabledforanalysis + schema["name"] = name.replace(" ", "_") # Minor clean-up + + if displayname is not None: + schema["displayName"] = displayname + else: + schema["displayName"] = name + + table_def = p.tables_create(schema["name"], schema) + + if table_def is not None: + print(f"Table {name} created.") + + +@click.command("update", help="Edit the schema for an existing table.") +@click.option("-s", "--sourceName", help="The API name of an existing table to copy.") +@click.option("-w", "--sourceWID", help="The ID of an existing table to copy.") +@click.argument("name", required=True) +@click.argument("file", required=False, type=click.Path(exists=True)) +@click.pass_context +def tables_update(ctx, name, filename, source_table, source_id): + p = ctx.obj["p"] + + table = p.tables(name=name) + + if table is not None: + p.tables_put(name, filename) + + print("update") + + +@click.command("upload", + help="Upload a file into the table using a bucket.") +@click.option("-n", "--table_name", + help="Specify a name for the table.") +@click.option("-i", "--table_id", + help="Specify a specific table API ID - this value overrides a name, if specified.") +@click.option("-b", "--bucket", + help="Specify a bucket name - defaults to random bucket name.") +@click.option("-o", "--operation", default="TruncateandInsert", + help="Operation for the bucket - default to TruncateandInsert.") +@click.option("-f", "--filename", + help="File (csv or gzip) to upload.") +@click.pass_context +def tables_upload(ctx, table_name, table_id, bucket, operation, filename): + p = ctx.obj["p"] + + print("upload") + + +def csv_from_fields(fields): + format_str = '{name},"{displayName}",{ordinal},{type},"{businessObject}",{precision},{scale},"{parseFormat}",{required},{externalId}\n' + + csv = "name,displayName,ordinal,type,businessObject,precision,scale,parseFormat,required,externalId\n" + + for field in fields: + # Suppress Prism auditing fields. + if field["name"].startswith("WPA_"): + continue + + field_def = {"name": field["name"], + "displayName": field["displayName"], + "ordinal": field["ordinal"], + "type": field["type"]["descriptor"], + "businessObject": field["businessObject"]["descriptor"] if "businessObject" in field else "", + "precision": field["precision"] if "precision" in field else "", + "scale": field["scale"] if "scale" in field else "", + "parseFormat": field["parseFormat"] if "parseFormat" in field else "", + "required": field["required"], + "externalId": field["externalId"] + } + + csv += format_str.format_map(field_def) + + return csv + + +def fields_from_csv(prism, file): + global data_sources + + schema = {"fields": []} # Start with an empy schema definition. + + with open(file, newline='') as csvfile: + reader = csv.DictReader(csvfile) + + ordinal = 1 + + for row in reader: + field = { + "ordinal": ordinal, + "name": row["name"], + "displayName": row["displayName"] if "displayName" in row else row["name"], + "required": row["required"] if "required" in row else False, + "externalId": row["externalId"] if "externalId" in row else False + } + + match row["type"].lower(): + case "text": + field["type"] = { + "id": "fdd7dd26156610006a12d4fd1ea300ce", + "descriptor": "Text" + } + case "date": + field["type"] = { + "id": "fdd7dd26156610006a71e070b08200d6", + "descriptor": "Date" + } + + if "parseFormat" in row: + field["parseFormat"] = row["parseFormat"] + + case "numeric": + field["type"] = { + "id": "32e3fa0dd9ea1000072bac410415127a", + "descriptor": "Numeric" + } + + if "precision" in row: + field["precision"] = row["precision"] + + if "scale" in row: + field["scale"] = row["scale"] + + case "instance": + # We need all the data sources to resolve the business objects + # to include their WID. + if data_sources is None: + data_sources = prism.datasources_list() + + if data_sources is None or data_sources["total"] == 0: + print("Error calling WQL/dataSources") + return + + field["type"] = { + "id": "db9cd1dbf95010000e8fc7c78cd012a9", + "descriptor": "Instance" + } + + # Find the matching businessObject + bo = [ds for ds in data_sources["data"] + if ds["businessObject"]["descriptor"] == row["businessObject"]] + + if len(bo) == 1: + field["businessObject"] = bo[0]["businessObject"] + case _: + # Default to string + field["type"] = { + "id": "fdd7dd26156610006a12d4fd1ea300ce", + "descriptor": "Text" + } + + schema["fields"].append(field) + ordinal += 1 + + return schema diff --git a/prism/commands/util.py b/prism/commands/util.py new file mode 100644 index 0000000..38aaec9 --- /dev/null +++ b/prism/commands/util.py @@ -0,0 +1,57 @@ +import gzip +import shutil + + +def fileContainers_load_impl(prism, filecontainersid, file): + # Because Click verified the file already exists, we know we have valid + # file name. Check to see if we have a gzip file or a CSV + # by checking the extension. + + if file is None or len(file) == 0: + print("An existing file name is required to upload to a file container.") + return None + + # Verify that each file is already a gzip file or a CSV we gzip for them. + + # The CSV contents are not validated here - Prism eventually + # returns an error if the content is invalid. + + target_files = [] + + for f in file: + target_file = file + + if f.lower().endswith(".csv"): + # GZIP the file into the same directory with the appropriate extension. + target_file = f + ".gz" + + with open(f, 'rb') as f_in: + with gzip.open(target_file, "wb") as f_out: + shutil.copyfileobj(f_in, f_out) + elif not f.lower().endswith(".gz"): + print(f"File {f} is not a .gz or .csv file.") + return None + + target_files.append(target_file) + + # Assume we have a fID - it can be None right now + # if the user wants to create a fileContainers during + # this operation. + fID = filecontainersid + + for target_file in target_files: + # Load the file and retrieve the fID - this is only + # set by the load on the first file - subsequent + # files are loaded into the same container (fID). + fID = prism.filecontainers_load(fID, target_file, ) + + # If the fID comes back blank, then something is not + # working. Note: any error messages have already + # been displayed by the load operation. + if fID is None: + break + + # Return the fID to the caller - this is the value + # passed by the caller, or the new fID created by + # the load of the first file. + return fID diff --git a/prism/commands/wql_commands.py b/prism/commands/wql_commands.py new file mode 100644 index 0000000..c97eee3 --- /dev/null +++ b/prism/commands/wql_commands.py @@ -0,0 +1,62 @@ +import click +import json +import sys + + +@click.command("dataSources", + help="View the buckets permitted by the security profile of the current user.") +@click.option("-w", "--wid", + help="The Workday ID of the dataSources.") +@click.option("-l", "--limit", default=None, + help="The maximum number of object data entries included in the response, default=-1 (all).") +@click.option("-o", "--offset", default=None, + help="The offset to the first object in a collection to include in the response.") +@click.option("-s", "--search", is_flag=True, show_default=True, default=False, + help="Use contains search substring for --table_name or --id.") +@click.argument("name", required=False) +@click.pass_context +def dataSources(ctx, wid, limit, offset, search, name): + p = ctx.obj["p"] + + ds = p.wql_dataSources(wid, limit, offset, name, search) + + print(json.dumps(ds, indent=2)) + + +@click.command("data", + help="View the buckets permitted by the security profile of the current user.") +@click.option("-l", "--limit", default=None, + help="The maximum number of object data entries included in the response, default=-1 (all).") +@click.option("-o", "--offset", default=None, + help="The offset to the first object in a collection to include in the response.") +@click.option("-f", "--file", "file_", default=None, type=click.Path(exists=True), + help="Filename of a query") +@click.option("-c", "--csv", "csv_", is_flag=True, show_default=True, default=False, + help="Output query results as CSV.") +@click.argument("query", required=False) +@click.pass_context +def data(ctx, limit, offset, file_, csv_, query): + p = ctx.obj["p"] + + if file_ is None and query is None: + print("No query provided") + return + + if query is not None: + query_resolved = query + else: + with open(file_) as file: + query_resolved = file.read().replace('\n',' ') + + rows = p.wql_data(query_resolved, limit, offset) + + if rows["total"] != 0: + if csv_: + headers = rows["data"][0].keys() + + writer = csv.DictWriter(sys.stdout, fieldnames=headers) + writer.writeheader() + writer.writerows(rows["data"]) + else: + print(json.dumps(data, indent=2)) + From 45be5420a9bef0b2bc2d350d2d0a0025d550da65 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Fri, 25 Aug 2023 20:11:06 -0400 Subject: [PATCH 002/103] Prism V3 first commit --- prism/prism.py | 1006 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 788 insertions(+), 218 deletions(-) diff --git a/prism/prism.py b/prism/prism.py index e4c3f9e..8139aaf 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -7,36 +7,26 @@ import logging import json -import random import requests - -# set up basic logging -logger = logging.getLogger() -handler = logging.StreamHandler() -formatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s", "%Y-%m-%d %H:%M:%S") -handler.setFormatter(formatter) +import time +import os +import urllib +import sys + +logger = logging.getLogger(__name__) +logger.setLevel(logging.WARNING) + +# writing to stdout +handler = logging.StreamHandler(sys.stdout) +handler.setLevel(logging.DEBUG) +log_format = logging.Formatter('[%(asctime)s] [%(levelname)s] - %(message)s') +handler.setFormatter(log_format) logger.addHandler(handler) -logger.setLevel(logging.INFO) - - -def load_schema(filename): - """Load schema from a JSON file. - - Parameters - ---------- - filename : str - The path to your file. - - Returns - ------- - schema : dict - A dictionary containing the schema for your table. - """ - with open(filename) as f: - schema = json.load(f) - return schema +def log_elapsed(msg, timedelta): + elapsed = timedelta.total_seconds() + logger.debug(f"{msg}: elapsed {elapsed:.5f}") class Prism: @@ -63,19 +53,69 @@ class Prism: The version of the Prism API to use """ - def __init__(self, base_url, tenant_name, client_id, client_secret, refresh_token, version="v2"): + def __init__(self, base_url, tenant_name, client_id, client_secret, refresh_token, version="v3"): """Init the Prism class with required attributes.""" + + # Capture the arguments into the class variables. self.base_url = base_url self.tenant_name = tenant_name self.client_id = client_id self.client_secret = client_secret self.refresh_token = refresh_token - self.token_endpoint = f"{base_url}/ccx/oauth2/{tenant_name}/token" self.version = version + + # Compose the endpoints for authentication and API calls. + self.token_endpoint = f"{base_url}/ccx/oauth2/{tenant_name}/token" self.rest_endpoint = f"{base_url}/ccx/api/{version}/{tenant_name}" - self.prism_endpoint = f"{base_url}/ccx/api/prismAnalytics/{version}/{tenant_name}" + self.prism_endpoint = f"{base_url}/api/prismAnalytics/{version}/{tenant_name}" self.upload_endpoint = f"{base_url}/wday/opa/tenant/{tenant_name}/service/wBuckets" + + # Support URLs for additional Workday API calls. + self.wql_endpoint = f"{base_url}/api/wql/v1/{tenant_name}" + self.raas_endpoint = f"{base_url}/ccx/service/customreport2/{tenant_name}" + + # At creation, there cannot yet be a bearer_token obtained from Workday. self.bearer_token = None + self.bearer_token_timestamp = None + + @staticmethod + def set_log_level(log_level): + logger.setLevel(getattr(logging, log_level)) # Convert the string to the proper log level + logger.debug("set log level: {log_level}") + + def get(self, url, headers=None, params=None, log_tag="generic get"): + if url is None: + logger.warning("http_get: missing URL") + return None + + # Every request requires an authorization header - make it true. + auth_attr = "Authorization" + + if headers is None: + headers = {} + + if auth_attr not in headers: + headers[auth_attr] = "Bearer " + self.get_bearer_token() + + response = requests.get(url, params=params, headers=headers) + log_elapsed("GET: " + log_tag, response.elapsed) + + if response.status_code != 200: + logger.error(f"Invalid HTTP status: {response.status_code}") + + return response + + def validate_schema(self, schema): + if "fields" not in schema or not isinstance(schema["fields"], list) or len(schema["fields"]) == 0: + logger.error("Invalid schema detected!") + return False + + # Add a sequential order (ordinal) on the fields to (en)force + # proper numbering. + for ordinal in range(len(schema["fields"])): + schema["fields"][ordinal]["ordinal"] = ordinal + 1 + + return True def create_bearer_token(self): """Exchange a refresh token for an access token. @@ -89,6 +129,9 @@ def create_bearer_token(self): class. """ + + logger.debug("create_bearer_token") + headers = {"Content-Type": "application/x-www-form-urlencoded"} data = { @@ -99,15 +142,155 @@ def create_bearer_token(self): } r = requests.post(self.token_endpoint, headers=headers, data=data) - r.raise_for_status() + log_elapsed("create_bearer_token", r.elapsed) if r.status_code == 200: - logging.info("Successfully obtained bearer token") + logger.debug("successfully obtained bearer token") self.bearer_token = r.json()["access_token"] + self.bearer_token_timestamp = time.time() else: - logging.warning(f"HTTP status code {r.status_code}: {r.content}") + logger.warning(f"HTTP status code {r.status_code}: {r.content}") + self.bearer_token = None + + def get_bearer_token(self): + """Get the current bearer token, or create a new one if it doesn't exist, or it's older than 15 minutes.""" + if self.bearer_token is None: + self.create_bearer_token() - def create_table(self, table_name, schema): + if time.time() - self.bearer_token_timestamp > 900: + self.create_bearer_token() + + return self.bearer_token + + def reset_bearer_token(self): + """Remove the current bearer token to force getting a new token on the next API call.""" + self.bearer_token = None + + def tables_list( + self, + name=None, + id=None, + limit=None, + offset=None, + type_="summary", + search=False): + """Obtain details for all tables or a given table(s). + + Parameters + ---------- + name : str + The name of the table to obtain details about. If the default value + of None is specified, details regarding first 100 tables is returned. + + id : str + The ID of a table to obtain details about. When specified, all tables + are searched for the matching id. + + limit : int + The maximum number of tables to be queried, to the maximum of 100. + + offset: int + The offset from zero of tables to return. + + type_ : str + details + + search : bool + Enable substring searching for table names or ids + + Returns + ------- + If the request is successful, a dictionary containing information about + the table is returned. + + """ + operation = "/tables" + url = self.prism_endpoint + operation + + if type_ is None or type_ not in ["full", "summary", "permissions"]: + logger.warning("Invalid return type for tables list operation.") + type_ = "summary" + + # If we are searching, then we have to get everything using + # limits and offsets, i.e., paging of results. + + params = {} + + # See if we want to add table name as a search parameter. + if not search and name is not None: + # Here, the user is not searching, they gave us an exact name. + params["name"] = name.replace(" ", "_") # Minor clean-up + + limit = 1 # Should only be 0 (not found) or 1 (found) tables found. + offset = 0 + + # When searching by name or id, set the maximum limit size to + # reduce the number of individual REST API calls. + if search: + limit = 100 + offset = 0 + + # If we didn't get a limit, set it to the maximum supported by the API + if limit is None: + search = True # Force a search so we get all tables + limit = 100 + + if offset is None: + offset = 0 + + # Always assume we will retrieve more than one page + params["limit"] = limit + params["offset"] = offset + params["type"] = type_ + + # Always return a valid JSON object of results! + return_tables = {"total": 0, "data": []} + + while True: + r = self.get(url, params=params) + + if r.status_code != 200: + logger.error(f"Invalid HTTP return code: {r.status_code}") + break + + tables = r.json() + + if not search and name is not None: # Explicit table name + # We are not searching and we have a specific table - return whatever we got. + return tables + + # If we are not searching, simply append all the results to the return object. + + if name is not None: + # Substring search for matching table names + match_tables = [tab for tab in tables["data"] if name in tab["name"]] + elif id is not None: + # User is looking for a table by ID + match_tables = [tab for tab in tables["data"] if id == tab["id"]] + else: + # Grab all the tables in the result + match_tables = tables["data"] + + return_tables["data"] += match_tables + + # If we get back anything but a full page, we are done + # paging the results. + if len(tables["data"]) < limit: + break + + if search: + # Figure out what to search for on the next page. + offset += limit + params["offset"] = offset + else: + # The caller asked for a specific limit and offset, exit the loop. + break + + # We always return a valid JSON. + return_tables["total"] = len(return_tables["data"]) + return return_tables + + def tables_create(self, table_name, schema): """Create an empty table of type "API". Parameters @@ -125,17 +308,20 @@ def create_table(self, table_name, schema): the new table is returned. """ - url = self.prism_endpoint + "/datasets" + operation = "/tables" + logger.debug(f"POST : {operation}") + + url = self.prism_endpoint + "/tables" + + if not self.validate_schema(schema): + return None headers = { - "Authorization": "Bearer " + self.bearer_token, + "Authorization": "Bearer " + self.get_bearer_token(), "Content-Type": "application/json", } - data = {"name": table_name, "fields": schema} - - r = requests.post(url, headers=headers, data=json.dumps(data)) - r.raise_for_status() + r = requests.post(url, headers=headers, data=json.dumps(schema)) if r.status_code == 201: logging.info("Successfully created an empty API table") @@ -145,7 +331,121 @@ def create_table(self, table_name, schema): else: logging.warning(f"HTTP status code {r.status_code}: {r.content}") - def create_bucket(self, schema, table_id, operation="TruncateandInsert"): + return None + + def tables_update(self, name, schema): + tables = self.tables(name=name) + + # We never fail - if the table doesn't exist, only + # log a warning. + + if tables["total"] == 0: + # Assume we are doing a create + table = self.tables_create(name, schema) + return None + + def tables_patch(self, id, displayName=None, description=None, documentation=None, enableForAnalysis=None, schema=None): + return None + + def buckets_list(self, + wid=None, + bucket_name=None, + limit=None, + offset=None, + type_="summary", + table_name=None, + search=False): + + operation = "/buckets" + url = self.prism_endpoint + operation + + # Start the return object - this routine NEVER fails + # and always returns a valid JSON object. + return_buckets = {"total": 0, "data": []} + + # If we are searching, then we have to get everything first + # so don't add a name to the bucket. + + params = {} + + if not search and bucket_name is not None: + # List a specific bucket name overrides any other + # combination of search/table/bucket name/wid. + params["name"] = bucket_name + + limit = 1 + offset = 0 + else: + # Any other combination of parameters requires a search. + search = True + limit = 100 # Max pagesize to retrieve in the fewest REST calls. + offset = 0 + + if limit is not None: + params["limit"] = limit + params["offset"] = offset if offset is not None else 0 + else: + params["limit"] = 100 + params["offset"] = 0 + + if type_ in ["summary", "full"]: + params["type"] = type_ + else: + params["type"] = "summary" + + while True: + r = self.get(url, params=params, log_tag=operation) + + if r.status_code != 200: + return return_buckets + + buckets = r.json() + + if not search and bucket_name is not None: # Explicit bucket name + # We are not searching, and we have a specific bucket, + # return whatever we got. + return buckets + + # If we are not searching, simply append all the results to the return object. + + if bucket_name is not None: + # Substring search for matching table names + match_buckets = [bck for bck in buckets["data"] if bucket_name in bck["name"]] + elif wid is not None: + # User is looking for a bucket by ID + match_buckets = [bck for bck in buckets["data"] if wid == bck["id"]] + elif table_name is not None: + # Caller is looking for any/all buckets by target table + match_buckets = [bck for bck in buckets["data"] if table_name in bck["targetDataset"]["descriptor"]] + else: + # Grab all the tables in the result - select all buckets. + match_buckets = buckets["data"] + + return_buckets["data"] += match_buckets + return_buckets["total"] = len(return_buckets["data"]) + + # If we get back anything but a full page, we are done + # paging the results. + if len(buckets["data"]) < params["limit"]: + break + + if search: + # Figure out what to search for on the next page. + params["offset"] += params["limit"] + else: + # The caller asked for a specific limit and offset, exit the loop. + break + + # We always return a valid JSON. + return return_buckets + + def buckets_create( + self, + name, + target_id=None, + target_name=None, + schema=None, + operation="TruncateandInsert"): """Create a temporary bucket to upload files. Parameters @@ -153,9 +453,12 @@ def create_bucket(self, schema, table_id, operation="TruncateandInsert"): schema : dict A dictionary containing the schema for your table. - table_id : str + target_id : str The ID of the table that this bucket is to be associated with. + target_name : str + The name of the table that this bucket is to be associated with. + operation : str Required, defaults to "TruncateandInsert" operation Additional Operations - “Insert”, “Update”, “Upsert”, “Delete” @@ -169,8 +472,38 @@ def create_bucket(self, schema, table_id, operation="TruncateandInsert"): If the request is successful, a dictionary containing information about the new bucket is returned. + + https://confluence.workday.com/display/PRISM/Public+API+V2+Endpoints+for+WBuckets """ - url = self.prism_endpoint + "/wBuckets" + + # A target table must be identified by ID or name. + if target_id is None and target_name is None: + logger.error("A table id or table name is required to create a bucket.") + return None + + # The caller didn't include a schema, make a copy of the target table's schema. + if target_id is not None and schema is None: + tables = self.tables_list(table_id=target_id, type_="full") + + if tables["total"] == 0: + logger.error(f"Table ID {target_id} does not exist for bucket operation.") + return None + + schema = tables["data"][0]["fields"] + + if target_id is None: + tables = self.tables_list(api_name=target_name, type_="full") + + if tables["total"] == 0: + logger.error(f"Table {target_name} does not exist for create bucket operation.") + return None + + target_id = tables["data"]["0"]["id"] + + if schema is None: + schema = tables["data"]["0"]["fields"] + + url = self.prism_endpoint + "/buckets" headers = { "Authorization": "Bearer " + self.bearer_token, @@ -178,14 +511,13 @@ def create_bucket(self, schema, table_id, operation="TruncateandInsert"): } data = { - "name": "prism_python_wbucket_" + str(random.randint(1000000, 9999999)), + "name": name, "operation": {"id": "Operation_Type=" + operation}, - "targetDataset": {"id": table_id}, + "targetDataset": {"id": target_id}, "schema": schema, } - r = requests.post(url, headers=headers, data=json.dumps(data)) - r.raise_for_status() + r = requests.post(url, headers=headers, data=data) if r.status_code == 201: logging.info("Successfully created a new wBucket") @@ -195,7 +527,94 @@ def create_bucket(self, schema, table_id, operation="TruncateandInsert"): else: logging.warning(f"HTTP status code {r.status_code}: {r.content}") - def upload_file_to_bucket(self, bucket_id, filename): + return None + + def buckets_complete(self, bucketid): + url = self.prism_endpoint + f"/buckets/{bucketid}/complete" + + headers = { + "Authorization": "Bearer " + self.bearer_token + } + + r = requests.post(url, headers=headers) + + if r.status_code == 201: + logging.info("Successfully created a new wBucket") + return r.json() + + if r.status_code == 400: + logging.warning(r.json()["errors"][0]["error"]) + else: + logging.warning(f"HTTP status code {r.status_code}: {r.content}") + + return None + + def table_to_bucket_schema(self, table): + """Convert schema (derived from describe table) to bucket schema + + Parameters + ---------- + describe_schema: dict + A dictionary containing the describe schema for your dataset. + + Returns + ------- + If the request is successful, a dictionary containing the bucket schema is returned. + The results can then be passed to the create_bucket function + + """ + + # describe_schema is a python dict object and needs to be accessed as such, 'data' is the top level object, + # but this is itself a list (with just one item) so needs the list index, in this case 0. 'fields' is found + # in the dict that is in ['data'][0] + + if table is None or "fields" not in table: + logger.critical("Invalid table passed to table_to_bucket_schema.") + return None + + fields = table["fields"] + + # Create and assign useAsOperationKey field with true/false values based on externalId value + operation_key_false = {"useAsOperationKey": False} + operation_key_true = {"useAsOperationKey": True} + + for i in fields: + if i["externalId"] is True: + i.update(operation_key_true) + else: + i.update(operation_key_false) + + # Get rid of the WPA_ fields... + fields[:] = [x for x in fields if "WPA" not in x["name"]] + + # Now trim our fields data to keep just what we need + for i in fields: + del i["id"] + del i["displayName"] + del i["fieldId"] + del i["required"] + del i["externalId"] + + # The "header" for the load schema + bucket_schema = { + "parseOptions": { + "fieldsDelimitedBy": ",", + "fieldsEnclosedBy": '"', + "headerLinesToIgnore": 1, + "charset": {"id": "Encoding=UTF-8"}, + "type": {"id": "Schema_File_Type=Delimited"}, + } + } + + # The footer for the load schema + schema_version = {"id": "Schema_Version=1.0"} + + bucket_schema["fields"] = fields + bucket_schema["schemaVersion"] = schema_version + + return bucket_schema + + def buckets_upload(self, bucketid, filename): """Upload a file to a given bucket. Parameters @@ -213,262 +632,413 @@ def upload_file_to_bucket(self, bucket_id, filename): None """ - url = self.upload_endpoint + "/" + bucket_id + "/files" + url = self.prism_endpoint + f"/buckets/{bucketid}/files" - headers = {"Authorization": "Bearer " + self.bearer_token} + headers = {"Authorization": "Bearer " + self.get_bearer_token() + } files = {"file": open(filename, "rb")} r = requests.post(url, headers=headers, files=files) - r.raise_for_status() - if r.status_code == 200: + if r.status_code == 201: logging.info("Successfully uploaded file to the bucket") + return r.json() else: logging.warning(f"HTTP status code {r.status_code}: {r.content}") + return None + + def dataChanges_list(self, + name=None, + wid=None, + activity_id=None, + limit=-1, offset=None, + type_="summary", + search=False, + refresh=False): + # We are doing a dataChanges GET operation. + operation = "/dataChanges" + + # If an ID is provided, add it to the URL as part of the path. + if wid is not None and isinstance(wid, str) and len(wid) > 0: + operation += f"/{wid}" + search_by_id = True + else: + search_by_id = False + + # We know what kind of list we want, add in the ability to + # search by name and pages. + if type_ and isinstance(type_, str): + if type_ == "summary": + operation += "?type=summary" + elif type_ == "full": + operation += "?type=full" + else: + operation += "?type=summary" + logger.warning("/dataChanges: invalid verbosity {verbosity} - defaulting to summary.") - def complete_bucket(self, bucket_id): - """Finalize the bucket once all files have been added. + logger.debug(f"dataChanges_activities_get: {operation}") - Parameters - ---------- - bucket_id : str - The ID of the bucket to be marked as complete. + # Start building the full URL for the call + url = self.prism_endpoint + operation - Returns - ------- - None + # Searching by ID is a special case that eliminates all other types + # of search. Ask for the datachange by id and return just this + # result - even blank. - """ - url = self.prism_endpoint + "/wBuckets/" + bucket_id + "/complete" + if search_by_id: + response = self.get(url=url, log_tag="dataChanges") + + if response.status_code == 200: + return response.json() + else: + return None + + # Get a list of tasks by page, with or without searching. + + search_limit = 500 # Assume all DCTs should be returned + search_offset = 0 # API default value + + if limit is not None and isinstance(limit, int) and limit > 0: + search_limit = limit + + if offset is not None and isinstance(offset, int) and offset > 0: + search_offset = offset + + searching = False + + if name is not None and isinstance(name, str) and len(name) > 0: + if search is not None and isinstance(search, bool) and search: + # Force a return of ALL data change tasks, so we can search the names. + searching = True + + search_limit = 500 + search_offset = 0 + else: + # Should return at most 1 result. + url += "&name=" + urllib.parse.quote(name) + + searching = False + search_limit = 1 + search_offset = 0 + + # Assume we will be looping based on limit and offset values; however, we may + # execute only once. + + dataChanges = {"total": 0, "data": []} + + while True: + search_url = f"{url}&limit={search_limit}&offset={search_offset}" + logger.debug(f"dataChangesID url: {search_url}") + + response = self.get(url=search_url, log_tag=operation) + + if response.status_code != 200: + break + + retJSON = response.json() + + if searching: + # Only add matching rows + dataChanges["data"] += \ + filter(lambda dtc: dtc["name"].find(name) != -1 or + dtc["displayName"].find(name) != -1, + retJSON["data"]) + else: + # Without searching, simply paste the current page to the list. + dataChanges["data"] += retJSON["data"] + break + + # If we didn't get a full page, then we done. + if len(retJSON["data"]) < search_limit: + break + + # Go to the next page. + offset += search_limit + + dataChanges["total"] = len(dataChanges["data"]) + + return dataChanges + + def dataChanges_activities_get(self, data_change_id, activity_id): + operation = f"/dataChanges/{data_change_id}/activities/{activity_id}" + logger.debug(f"dataChanges_activities_get: {operation}") + + r = self.get(self.prism_endpoint + operation) + + if r.status_code == 200: + return json.loads(r.text) + + return None + + def dataChanges_activities_post(self, data_change_id, fileContainerID=None): + operation = f"/dataChanges/{data_change_id}/activities" + logger.debug(f"dataChanges_activities_post: {operation}") + + url = self.prism_endpoint + operation headers = { "Authorization": "Bearer " + self.bearer_token, "Content-Type": "application/json", } - data = {} + if fileContainerID is None: + logger.debug("no file container ID") - r = requests.post(url, headers=headers, data=json.dumps(data)) - r.raise_for_status() + data = None + else: + logger.debug("with file container ID: {fileContainerID") + + data = json.dumps({"fileContainerWid": fileContainerID}) + + r = requests.post(url, data=data, headers=headers) + log_elapsed(f"POST {operation}", r.elapsed) if r.status_code == 201: - logging.info("Successfully completed the bucket") + activityID = json.loads(r.text)["id"] + + logging.debug(f"Successfully started data load task - id: {activityID}") + return activityID elif r.status_code == 400: logging.warning(r.json()["errors"][0]["error"]) else: logging.warning(f"HTTP status code {r.status_code}: {r.content}") - def list_bucket(self, bucket_id=None): - """Obtain details for all buckets or a given bucket. + return None - Parameters - ---------- - bucket_id : str - The ID of the bucket to obtain details about. If the default value - of None is specified, details regarding all buckets is returned. + def dataChanges_by_name(self, data_change_name): + logger.debug(f"data_changes_by_name: {data_change_name}") - Returns - ------- - If the request is successful, a dictionary containing information about - the bucket is returned. + data_changes_list = self.data_changes_list() - """ - url = self.prism_endpoint + "/wBuckets" + for data_change in data_changes_list: + if data_change.get("displayName") == data_change_name: + # We found the DCT by name, lookup all the details. + data_change_id = data_change.get("id") + logger.debug(f"found {data_change_name}: {data_change_id}") - if bucket_id is not None: - url = url + "/" + bucket_id + return self.data_changes_by_id(data_change_id) - headers = {"Authorization": "Bearer " + self.bearer_token} + logger.debug(f"{data_change_name} was not found!") + + return None + + def dataChanges_by_id(self, data_change_id): + operation = f"/dataChanges/{data_change_id}" + logger.debug(f"dataChanges_by_id: {operation}") + + url = self.prism_endpoint + f"/dataChanges/{data_change_id}" + + headers = {"Authorization": "Bearer " + self.get_bearer_token()} r = requests.get(url, headers=headers) + log_elapsed(logger, operation, r.elapsed) r.raise_for_status() if r.status_code == 200: - logging.info("Successfully obtained information about your buckets") - return r.json() + logger.debug(f"Found data change task: id = {data_change_id}") + + return json.loads(r.text) + elif r.status_code == 400: + logger.warning(r.json()["errors"][0]["error"]) else: - logging.warning(f"HTTP status code {r.status_code}: {r.content}") + logger.warning(f"HTTP status code {r.status_code}: {r.content}") - def list_table(self, table_name=None): - """Obtain details for all tables or a given table. + return json.loads(r.text) - Parameters - ---------- - table_name : str - The name of the table to obtain details about. If the default value - of None is specified, details regarding first 100 tables is returned. + def dataChanges_is_valid(self, data_change_id): + dtc = self.dataChanges_validate(data_change_id) - Returns - ------- - If the request is successful, a dictionary containing information about - the table is returned. + if dtc is None: + logger.critical(f"data_change_id {data_change_id} not found!") - """ - url = self.prism_endpoint + "/datasets?" + return False - if table_name is not None: - url = url + "name=" + table_name + if "error" in dtc: + logger.critical(f"data_change_id {data_change_id} is not valid!") - params = {"limit": 100} + return False - headers = {"Authorization": "Bearer " + self.bearer_token} + return True - r = requests.get(url, params=params, headers=headers) - r.raise_for_status() + def dataChanges_validate(self, data_change_id): + operation = f"/dataChanges/{data_change_id}/validate" + logger.debug(f"dataChanges_validate: GET {operation}") + + url = self.prism_endpoint + operation + + r = self.get(url) if r.status_code == 200: - logging.info("Successfully obtained information about your tables") - return r.json() - else: - logging.warning(f"HTTP status code {r.status_code}: {r.content}") + return json.loads(r.text) - def describe_table(self, table_id=None): - """Obtain details for for a given table + return None - Parameters - ---------- - table_id : str - The ID of the table to obtain details about. If the default value - of None is specified, details regarding all tables is returned. + def fileContainers_create(self): + operation = "/fileContainers" + logger.debug(f"fileContainer_create: POST {operation}") - Returns - ------- - If the request is successful, a dictionary containing information about - the table is returned. + url = self.prism_endpoint + operation - """ - url = self.prism_endpoint + "/datasets/" + headers = {"Authorization": "Bearer " + self.get_bearer_token()} - if table_id is not None: - url = url + table_id + "/describe" + r = requests.post(url, headers=headers) + log_elapsed(f"POST {operation}", r.elapsed) - headers = {"Authorization": "Bearer " + self.bearer_token} + if r.status_code == 201: + return_json = r.json() - r = requests.get(url, headers=headers) - r.raise_for_status() + fileContainerID = return_json["id"] + logger.debug(f"successfully created file container: {fileContainerID}") - if r.status_code == 200: - logging.info("Successfully obtained information about your datasets") - return r.json() + return return_json + elif r.status_code == 400: + logging.warning(r.json()["errors"][0]["error"]) else: logging.warning(f"HTTP status code {r.status_code}: {r.content}") - def convert_describe_schema_to_bucket_schema(self, describe_schema): - """Convert schema (derived from describe table) to bucket schema + return None - Parameters - ---------- - describe_schema: dict - A dictionary containing the describe schema for your dataset. + def fileContainers_list(self, fileContainerID): + operation = f"/fileContainers/{fileContainerID}/files" + logger.debug(f"fileContainers_list: GET {operation}") - Returns - ------- - If the request is successful, a dictionary containing the bucket schema is returned. - The results can then be passed to the create_bucket function + url = self.prism_endpoint + operation - """ + r = self.get(url) - # describe_schema is a python dict object and needs to be accessed as such, 'data' is the top level object, - # but this is itself a list (with just one item) so needs the list index, in this case 0. 'fields' is found - # in the dict that is in ['data'][0] - fields = describe_schema["data"][0]["fields"] + if r.status_code == 200: + return r.json() - # Create and assign useAsOperationKey field with true/false values based on externalId value - operation_key_false = {"useAsOperationKey": False} - operation_key_true = {"useAsOperationKey": True} + return None - for i in fields: - if i["externalId"] is True: - i.update(operation_key_true) - else: - i.update(operation_key_false) + def fileContainers_load(self, fileContainerID, fqfn): + # Do a sanity check and make sure the fqfn exists and + # has a gzip extension. - # Now trim our fields data to keep just what we need - for i in fields: - del i["id"] - del i["displayName"] - del i["fieldId"] - del i["required"] - del i["externalId"] + if not os.path.isfile(fqfn): + logger.critical("file not found: {fqfn}") + return None - # Get rid of the WPA_ fields... - fields[:] = [x for x in fields if "WPA" not in x["name"]] + # Create the file container and get the ID. We use the + # file container ID to load the file and then return the + # value to the caller for use in a data change call. - # The "header" for the load schema - bucket_schema = { - "parseOptions": { - "fieldsDelimitedBy": ",", - "fieldsEnclosedBy": '"', - "headerLinesToIgnore": 1, - "charset": {"id": "Encoding=UTF-8"}, - "type": {"id": "Schema_File_Type=Delimited"}, - } + if fileContainerID is None: + file_container_response = self.fileContainers_create() + + if file_container_response is None: + return None + + fID = file_container_response["id"] + else: + fID = fileContainerID + + print(self.fileContainers_list(fID)) + + # We have our container, load the file + + headers = { + "Authorization": "Bearer " + self.get_bearer_token() } - # The footer for the load schema - schema_version = {"id": "Schema_Version=1.0"} + operation = f"/fileContainers/{fID}/files" + logger.debug(f"fileContainer_load: POST {operation}") - bucket_schema["fields"] = fields - bucket_schema["schemaVersion"] = schema_version + files = {"file": open(fqfn, "rb")} - return bucket_schema + url = self.prism_endpoint + operation + r = requests.post(url, files=files, headers=headers) + log_elapsed(f"POST {operation}", r.elapsed) -def create_table(p, table_name, schema): - """Create a new Prism table. + if r.status_code == 201: + logging.info("successfully loaded fileContainer") - Parameters - ---------- - p : Prism - Instantiated Prism class from prism.Prism() + print(self.fileContainers_list(fID)) - table_name : str - The name of the table to obtain details about. If the default value - of None is specified, details regarding first 100 tables is returned. + return fID + elif r.status_code == 400: + logging.warning(r.json()["errors"][0]["error"]) + else: + logging.warning(f"HTTP status code {r.status_code}: {r.content}") - schema : list - A list of dictionaries containing the schema + return None - Returns - ------- - If the request is successful, a dictionary containing information about - the table is returned. - """ + def wql_dataSources(self, wid=None, limit=100, offset=0, dataSources_name=None, search=False): + operation = "/dataSources" - p.create_bearer_token() - table = p.create_table(table_name, schema=schema) + url = f"{self.wql_endpoint}{operation}" - return table + offset = 0 + return_sources = {"total": 0, "data": []} + while True: + r = self.get(f"{url}?limit=100&offset={offset}") -def upload_file(p, filename, table_id, operation="TruncateandInsert"): - """Create a new Prism table. + if r.status_code == 200: + ds = r.json() + return_sources["data"] += ds["data"] + else: + return None - Parameters - ---------- - p : Prism - Instantiated Prism class from prism.Prism() + if len(ds["data"]) < 100: + break - filename : str - The path to you GZIP compressed file to upload. + offset += 100 - table_id : str - The ID of the Prism table to upload your file to. + return_sources["total"] = len(return_sources["data"]) - operation : str (default = TruncateandInsert) - The table load operation. - Possible options include TruncateandInsert, Insert, Update, Upsert, Delete. + return return_sources - Returns - ------- - If the request is successful, a dictionary containing information about - the table is returned. - """ + def wql_data(self, query, limit, offset): + operation = "/data" + + url = f"{self.wql_endpoint}{operation}" + query_safe = urllib.parse.quote(query) + + offset = 0 + data = {"total": 0, "data": []} + + while True: + r = self.get(f"{url}?query={query_safe}&limit=10000&offset={offset}") + + if r.status_code == 200: + ds = r.json() + data["data"] += ds["data"] + else: + return None - p.create_bearer_token() - details = p.describe_table(table_id) - bucket_schema = p.convert_describe_schema_to_bucket_schema(details) - bucket = p.create_bucket(bucket_schema, table_id, operation=operation) - p.upload_file_to_bucket(bucket["id"], filename) - p.complete_bucket(bucket["id"]) + if len(ds["data"]) < 10000: + break + + offset += 100 + + data["total"] = len(data["data"]) + + return data + + def raas_run(self, report, user, format_): + url = f"{self.raas_endpoint}/{user}/{report}?format={format_}" + + if url is None: + raise ValueError("RaaS URL is required") + else: + if url.find("format=") == -1: + output_format = "xml" + else: + output_format = url.split("format=")[1] + + headers = {"Accept": "text/csv"} + r = self.get(url, headers=headers) + + if r.status_code == 200: + # if output_format == "json": + # return r.json()["Report_Entry"] + # elif output_format == "csv": + # return list(csv.reader(io.StringIO(r.content.decode("utf8")))) + # else: + # raise ValueError(f"Output format type {output_format} is unknown") + return r.text + else: + logging.warning("HTTP Error: {}".format(r.content.decode("utf-8"))) From fc8f6f03db37c18d69889e508e51400c1605f71d Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Fri, 25 Aug 2023 20:12:28 -0400 Subject: [PATCH 003/103] Prism V3 update --- prism/cli.py | 254 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 180 insertions(+), 74 deletions(-) diff --git a/prism/cli.py b/prism/cli.py index 2145798..7d192d7 100644 --- a/prism/cli.py +++ b/prism/cli.py @@ -1,122 +1,228 @@ import click -import json +import configparser +import os +import sys + import prism +from commands import tables_commands +from commands import buckets_commands +from commands import dataChanges_commands +from commands import fileContainers_commands +from commands import wql_commands +from commands import raas_commands + + +def param_fixup(value, config, config_name, option): + # If already set by an enviroment or by a command line option, do nothing. + if value is not None: + return value + + try: + return config.get(config_name, option) + except configparser.Error: + # Always fail silently. + return None -@click.group() -@click.option("--base_url", envvar="workday_base_url", type=str, required=True, help="The base URL for the API client") + +@click.group(help="CLI for interacting with Workday’s Prism API") +@click.option( + "--base_url", + envvar="workday_base_url", + type=str, + required=False, + help="The base URL for the API client") @click.option( - "--tenant_name", envvar="workday_tenant_name", type=str, required=True, help="The name of your Workday tenant" -) + "--tenant_name", + envvar="workday_tenant_name", + type=str, + required=False, + help="The name of your Workday tenant") +@click.option( + "--username", + envvar="workday_username", + type=str, + required=False, + help="The login username of your Workday user") +@click.option( + "--password", + envvar="workday_password", + type=str, + required=False, + help="The password of your Workday user") @click.option( "--client_id", envvar="prism_client_id", type=str, - required=True, - help="The client ID for your registered API client", -) + required=False, + help="The client ID for your registered API client") @click.option( "--client_secret", envvar="prism_client_secret", type=str, - required=True, - help="The client secret for your registered API client", -) + required=False, + help="The client secret for your registered API client") @click.option( "--refresh_token", envvar="prism_refresh_token", type=str, - required=True, - help="The refresh token for your registered API client", -) + required=False, + help="The refresh token for your registered API client") +@click.option( + "--log_level", + envvar="prism_log_level", + type=str, + required=False, + help="Level of debugging to display - default = warning.") +@click.option( + "--log_file", + envvar="prism_log_file", + type=str, + required=False, + help="Output file for logging - default prism.log.") +@click.option( + "--config_file", + envvar="prism_config", + type=click.Path(exists=True), + required=False, + help="The name of a configuration with parameters for connections and logging.") +@click.option( + "--config_name", + envvar="prism_config", + type=str, + required=False, + default="default", + help="The name of a configuration with parameters for connections and logging.") @click.pass_context -def main(ctx, base_url, tenant_name, client_id, client_secret, refresh_token): - """CLI for interacting with Workday’s Prism API""" +def cli(ctx, + base_url, + tenant_name, + username, password, + client_id, client_secret, refresh_token, + log_level, + log_file, + config_file, config_name): + # Attempt to locate a configuration file - this is not required and is only + # used if the configuration values are not passed on the command line or in + # the environment. + + if config_file is None: + # See if we have a configuration file in the current directory + filename = os.path.join(os.getcwd(), "prism.ini") + else: + filename = config_file + + # If the configuration path exists, then load values - this overrides + # environment variables. + if os.path.isfile(filename): + try: + config = configparser.ConfigParser() + config.read(filename) + + # Check to see if a particular configuration was asked for, it must + # exist in the configuration file otherwise exit with an error. + + if not config.has_section(config_name): + print(f"The specified configuration {config_name} does not exist in the configuration file.") + sys.exit(1) + else: + # Do fix-up on command line args. Priority comes from the command + # line, then environment variables, and finally the config file. + # Any value not passed and not in the environment arrives here with + # the value "None" - override these with the configuration values. + + base_url = param_fixup(base_url, config, config_name, "workday_base_url") + tenant_name = param_fixup(tenant_name, config, config_name, "workday_tenant_name") + username = param_fixup(username, config, config_name, "workday_username") + password = param_fixup(password, config, config_name, "workday_password") + client_id = param_fixup(client_id, config, config_name, "prism_client_id") + client_secret = param_fixup(client_secret, config, config_name, "prism_client_secret") + refresh_token = param_fixup(refresh_token, config, config_name, "prism_refresh_token") + log_level = param_fixup(log_level, config, config_name, "prism_log_level") + log_file = param_fixup(log_level, config, config_name, "prism_log_file") + except configparser.Error: + print(f"Error accessing configuration file {filename}.") + # If the configuration is not available, exit + exit(1) + + if log_file is None: + log_file = "prism.log" # initialize the prism class with your credentials - p = prism.Prism(base_url, tenant_name, client_id, client_secret, refresh_token, version="v2") - # create the bearer token - p.create_bearer_token() + p = prism.Prism(base_url, tenant_name, client_id, client_secret, refresh_token) + p.set_log_level(log_level) # store the prism object in the context ctx.obj = {"p": p} -@main.command() -@click.option("--name", default=None, type=str, help="The name of the table to obtain details about") +@cli.command("config") +@click.argument("file") @click.pass_context -def list(ctx, name): - """List all tables of type API""" +def config_file(ctx, file): + """Configure command""" - # get the initialized prism class - p = ctx.obj["p"] - # list the tables - status = p.list_table(table_name=name) +@cli.group("tables", help="Commands to list, create, and update Prism tables.") +def tables(): + """Tables Command""" - # print message - if id is None: - click.echo("There are {} API tables".format(status["total"])) - click.echo(json.dumps(status["data"], indent=2, sort_keys=True)) - else: - click.echo(json.dumps(status, indent=2, sort_keys=True)) +tables.add_command(tables_commands.tables_list) +tables.add_command(tables_commands.tables_create) +tables.add_command(tables_commands.tables_update) +tables.add_command(tables_commands.tables_upload) -@main.command() -@click.argument("table_name", type=str) -@click.argument("schema_path", type=click.Path()) -@click.pass_context -def create(ctx, table_name, schema_path): - """Create a new Prism table TABLE_NAME with schema from SCHEMA_PATH - Example: prism create my_table /home/data/schema.json - """ +@cli.group("buckets", help="Bucket operations to list, create and load buckets.") +def buckets(): + """You create a bucket for a specific table, load data into the bucket, and then commit (complete) the bucket. """ - # get the initialized prism class - p = ctx.obj["p"] - # read in your table schema - schema = prism.load_schema(schema_path) +buckets.add_command(buckets_commands.buckets_list) +buckets.add_command(buckets_commands.buckets_create) +buckets.add_command(buckets_commands.buckets_generate) +buckets.add_command(buckets_commands.buckets_complete) +buckets.add_command(buckets_commands.buckets_upload) - # clean up the table name - table_name = table_name.replace(" ", "_") - # create an empty API table - table = prism.create_table(p, table_name, schema["fields"]) +@cli.group("dataChanges", help="Data Change Tasks (dataChanges) operations to list, load, and activate.") +def dataChanges(): + """dataChanges.py Command""" - # print message - click.echo(json.dumps(table, indent=2, sort_keys=True)) +dataChanges.add_command(dataChanges_commands.dataChanges_list) +dataChanges.add_command(dataChanges_commands.dataChanges_validate) +dataChanges.add_command(dataChanges_commands.dataChanges_execute) + + +@cli.group("fileContainers", help="File containers (fileContainers) operations to create, load, and list.") +def fileContainers(): + """dataChanges.py Command""" + + +fileContainers.add_command(fileContainers_commands.fileContainers_create) +fileContainers.add_command(fileContainers_commands.filecontainers_list) +fileContainers.add_command(fileContainers_commands.filecontainers_load) + + +@cli.group("wql", help="dataSources operations to list and query WQL sources.") +def wql(): + """dataChanges.py Command""" -@main.command() -@click.argument("gzip_file", type=click.Path()) -@click.argument("table_id", type=str) -@click.option( - "--operation", - type=click.Choice(["TruncateandInsert", "Insert", "Update", "Upsert", "Delete"]), - default="TruncateandInsert", - help="The Table load operation", -) -@click.pass_context -def upload(ctx, gzip_file, table_id, operation): - """Upload GZIP_FILE to TABLE_ID - Example: prism upload /home/data/file.csv.gz bbab30e3018b01a723524ce18010811b - """ +wql.add_command(wql_commands.dataSources) +wql.add_command(wql_commands.data) - # get the initialized prism class - p = ctx.obj["p"] - # upload file to the table - prism.upload_file(p, gzip_file, table_id, operation) +@cli.group("raas", help="Run report!") +def raas(): + """dataChanges.py Command""" - # check the status of the table you just created - status = p.list_table(table_id) - # print message - click.echo(json.dumps(status["data"], indent=2, sort_keys=True)) +raas.add_command(raas_commands.run) if __name__ == "__main__": - main() + cli() From 28c18bc3c97bf103d40d42d6ddf36ebbe220f993 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Fri, 25 Aug 2023 20:14:28 -0400 Subject: [PATCH 004/103] Prism V3 additional requirements. --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index 6e05bdc..daa2546 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,4 @@ click>=6 requests>=2.20.0,<3 +configparser>=6 +pandas~=2.0.0 From e31a4cbd9707d3308fed8cfc7ba180bf650659ff Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Sat, 26 Aug 2023 06:41:41 -0400 Subject: [PATCH 005/103] Update .gitignore Updated to ignore DS_Store --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 35d8f26..676f87e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ # Pycharm .idea/ +# MacOS +.DS_Store + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] From ad955a369eae7f6c0d04f4ac6f9bf6576c4e310d Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Fri, 15 Sep 2023 16:41:24 -0400 Subject: [PATCH 006/103] Update for QA testing --- .gitignore | 2 +- prism/cli.py | 18 +- prism/commands/buckets_commands.py | 214 ++++++-------- prism/commands/dataChanges_commands.py | 101 +++++-- prism/commands/fileContainers_commands.py | 4 +- prism/commands/raas_commands.py | 19 +- prism/commands/tables_commands.py | 339 ++++++++++------------ prism/commands/util.py | 223 +++++++++++++- prism/commands/wql_commands.py | 12 +- prism/data/blank.csv.gz | 0 prism/data/invalid-schema.json | 1 + prism/data/schema.csv | 6 + prism/prism.py | 257 ++++++++++------ tests/buckets-help.sh | 23 ++ tests/test_cli.py | 4 +- 15 files changed, 766 insertions(+), 457 deletions(-) create mode 100644 prism/data/blank.csv.gz create mode 100644 prism/data/invalid-schema.json create mode 100644 prism/data/schema.csv create mode 100755 tests/buckets-help.sh diff --git a/.gitignore b/.gitignore index 676f87e..8cdd94d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,7 @@ # Pycharm .idea/ -# MacOS +# MacOS - Desktop Services Store file(s) .DS_Store # Byte-compiled / optimized / DLL files diff --git a/prism/cli.py b/prism/cli.py index 7d192d7..c69e723 100644 --- a/prism/cli.py +++ b/prism/cli.py @@ -95,12 +95,10 @@ def param_fixup(value, config, config_name, option): help="The name of a configuration with parameters for connections and logging.") @click.pass_context def cli(ctx, - base_url, - tenant_name, + base_url, tenant_name, username, password, client_id, client_secret, refresh_token, - log_level, - log_file, + log_level, log_file, config_file, config_name): # Attempt to locate a configuration file - this is not required and is only # used if the configuration values are not passed on the command line or in @@ -164,7 +162,7 @@ def config_file(ctx, file): """Configure command""" -@cli.group("tables", help="Commands to list, create, and update Prism tables.") +@cli.group("tables", help="Commands to list, create, load, and update Prism tables.") def tables(): """Tables Command""" @@ -173,6 +171,7 @@ def tables(): tables.add_command(tables_commands.tables_create) tables.add_command(tables_commands.tables_update) tables.add_command(tables_commands.tables_upload) +tables.add_command(tables_commands.tables_truncate) @cli.group("buckets", help="Bucket operations to list, create and load buckets.") @@ -182,9 +181,10 @@ def buckets(): buckets.add_command(buckets_commands.buckets_list) buckets.add_command(buckets_commands.buckets_create) -buckets.add_command(buckets_commands.buckets_generate) buckets.add_command(buckets_commands.buckets_complete) +buckets.add_command(buckets_commands.buckets_status) buckets.add_command(buckets_commands.buckets_upload) +buckets.add_command(buckets_commands.buckets_name) @cli.group("dataChanges", help="Data Change Tasks (dataChanges) operations to list, load, and activate.") @@ -194,7 +194,7 @@ def dataChanges(): dataChanges.add_command(dataChanges_commands.dataChanges_list) dataChanges.add_command(dataChanges_commands.dataChanges_validate) -dataChanges.add_command(dataChanges_commands.dataChanges_execute) +dataChanges.add_command(dataChanges_commands.dataChanges_run) @cli.group("fileContainers", help="File containers (fileContainers) operations to create, load, and list.") @@ -207,7 +207,7 @@ def fileContainers(): fileContainers.add_command(fileContainers_commands.filecontainers_load) -@cli.group("wql", help="dataSources operations to list and query WQL sources.") +@cli.group("wql", help="Operations to list (dataSources) and query WQL sources (data).") def wql(): """dataChanges.py Command""" @@ -216,7 +216,7 @@ def wql(): wql.add_command(wql_commands.data) -@cli.group("raas", help="Run report!") +@cli.group("raas", help="Run custom or Workday delivered report.") def raas(): """dataChanges.py Command""" diff --git a/prism/commands/buckets_commands.py b/prism/commands/buckets_commands.py index a0338ac..e801fd2 100644 --- a/prism/commands/buckets_commands.py +++ b/prism/commands/buckets_commands.py @@ -1,66 +1,92 @@ -import click -import uuid -import logging -import gzip -import shutil import json -import os - -logger = logging.getLogger(__name__) - - -def buckets_generate_impl(): - return "cli_" + uuid.uuid4().hex +import logging +import sys +import click +import pandas as pd +from . import util as u -@click.command("generate", help="Generate a unique bucket name.") -def buckets_generate(): - print(buckets_generate_impl()) +logger = logging.getLogger(__name__) -@click.command("list", help="View the buckets permitted by the security profile of the current user.") +@click.command("list") @click.option("-w", "--wid", help="The Workday ID of the bucket.") @click.option("-n", "--table_name", help="The API name of the table to retrieve (see search option).") -@click.option("-l", "--limit", default=None, +@click.option("-l", "--limit", default=None, type=int, help="The maximum number of object data entries included in the response, default=-1 (all).") -@click.option("-o", "--offset", default=None, +@click.option("-o", "--offset", default=None, type=int, help="The offset to the first object in a collection to include in the response.") @click.option("-t", "--type", "type_", default="summary", show_default=True, help="How much information to be returned in response JSON.") @click.option("-s", "--search", is_flag=True, show_default=True, default=False, - help="Use contains search substring for --table_name or --id.") + help="Use contains search substring for --table_name or --wid.") +@click.option("-f", "--format", "format_", + type=click.Choice(['json', 'summary', 'schema', 'csv'], case_sensitive=False), + default="json", + help="Format output as JSON, summary, schema, or CSV.", + ) @click.argument("bucket_name", required=False) @click.pass_context -def buckets_list(ctx, wid, table_name, limit, offset, type_, search, bucket_name): +def buckets_list(ctx, wid, table_name, limit, offset, type_, search, format_, bucket_name): + """ + View the buckets permitted by the security profile of the current user. + + [BUCKET_NAME] explicit name of bucket to list. + """ + p = ctx.obj["p"] buckets = p.buckets_list(wid, bucket_name, limit, offset, type_, table_name, search) - print(json.dumps(buckets, indent=2)) + if buckets["total"] == 0: + return + + if format_ == "summary": + for bucket in buckets["data"]: + display_name = bucket["displayName"] + operation = bucket["operation"]["descriptor"] + target = bucket["targetDataset"]["descriptor"] + state = bucket["state"]["descriptor"] + + print(f"{display_name}, operation: {operation}, target: {target}, state: {state}") + elif format_ == "csv": + df = pd.json_normalize(buckets["data"]) + print(df.to_csv(index=False)) + else: + print(json.dumps(buckets, indent=2)) -@click.command("create", help="Create a new bucket with the specified name.") +@click.command("create", help="") @click.option("-n", "--table_name", default=None, - help="Name of the table to associate with the bucket.") + help="Table name to associate with the bucket.") @click.option("-w", "--table_wid", default=None, help="Table ID to associate with the table.") @click.option("-f", "--file", "file_", required=False, default=None, type=click.Path(exists=True), help="Schema JSON file for the target table.") -@click.option("-o", "--operation", default="TruncateandInsert", show_default=True, +@click.option("-o", "--operation", default="TruncateAndInsert", show_default=True, help="Operation to perform on the table.") -@click.argument("bucket_name") +@click.argument("bucket_name", required=False) @click.pass_context def buckets_create(ctx, table_name, table_wid, file_, operation, bucket_name): + """ + Create a new bucket with the specified name. + + [BUCKET_NAME] explicit bucket name to create otherwise default. + """ p = ctx.obj["p"] - bucket = buckets_create_impl(p, bucket_name, table_wid, file_, operation) + if table_name is None and table_wid is None and file_ is None: + print("A table must be associated with this bucket (-n, -w, or -f must be specified).") + sys.exit(1) + + bucket = p.buckets_create(bucket_name, table_wid, table_name, file_, operation) print(bucket) -@click.command("upload", help="Upload a CSV or gzip file to the specified bucket.") +@click.command("upload") @click.option("-n", "--table_name", default=None, help="Name of the table to associate with the bucket.") @click.option("-w", "--table_wid", default=None, @@ -71,64 +97,61 @@ def buckets_create(ctx, table_name, table_wid, file_, operation, bucket_name): help="Operation to perform on the table.") @click.option("-g", "--generate", is_flag=True, default=True, help="Generate a unique bucket name.") -@click.option("-b", "--bucket", help="Bucket name to create.") +@click.option("-b", "--bucket", help="Bucket name to load files.") @click.option("-c", "--complete", is_flag=True, default=False, help="Automatically complete bucket and load the data into the table.") @click.argument("file", nargs=-1, required=True, type=click.Path(exists=True)) @click.pass_context def buckets_upload(ctx, table_name, table_wid, schema_file, operation, generate, bucket, complete, file): - p = ctx.obj["p"] - - # We know we have valid file name. Check to see if we have a gzip file or a CSV - # by checking the extension. + """ + Upload a CSV or gzip file to the specified bucket - if file is None: - logger.error("An existing file name is required to upload to a bucket.") - return + [FILE] one or more gzip (.gz) or CSV (.csv) files. - source_file = file[0] - target_file = source_file + NOTE: This operation will create ".csv.gz" files for each .csv file. + """ + p = ctx.obj["p"] - if source_file.lower().endswith(".csv"): - # GZIP the file into the same directory with the appropriate extension. - target_file += ".gz" + # Convert the file(s) provided to a list of compressed files. + target_files = u.compress_files(file) - with open(source_file, 'rb') as f_in: - with gzip.open(target_file, "wb") as f_out: - shutil.copyfileobj(f_in, f_out) - elif not source_file.lower().endswith(".gz"): - logger.error(f"File {target_file} is not a .gz or .csv file.") - return + if len(target_files) == 0: + print("No files to upload.") + sys.exit(1) - # We think we have a file - we don't test the contents. + # We think we have a file(s) - we don't test the contents. # Go ahead and create a new bucket or use an existing. - bucket = buckets_create_impl(p, bucket, table_name, table_wid, schema_file, operation) + bucket = p.buckets_create(bucket, table_name, table_wid, schema_file, operation) if bucket is None: logger.error("Invalid bucket for upload operation.") return - upload = p.buckets_upload(bucket["id"], target_file) + upload = p.buckets_upload(bucket["id"], target_files) if upload is not None and complete: complete = p.buckets_complete(bucket["id"]) -@click.command("complete", help="Complete the specified bucket and load any files in the bucket.") +@click.command("complete") @click.option("-n", "--bucket_name", help="Bucket to complete.") @click.argument("bucket_wid", required=False) @click.pass_context def buckets_complete(ctx, bucket_name, bucket_wid): + """ + Complete the specified bucket and perform the specified operation. + + [BUCKET_WID] the Workday ID of the bucket to complete. + """ p = ctx.obj["p"] if bucket_wid is None and bucket_name is None: - print("Either a bucket wid or a bucket name must be specified.") - return + print("A bucket wid or a bucket name must be specified.") + sys.exit(1) if bucket_wid is not None: - # If the caller passed both a name and WID, then - # use the WID first. + # If the caller passed both a name and WID, then use the WID first. buckets = p.buckets_list(bucket_id=bucket_wid) else: # Lookup the bucket by name. @@ -136,82 +159,33 @@ def buckets_complete(ctx, bucket_name, bucket_wid): if buckets["total"] == 0: logger.error('Bucket not found.') - return + sys.exit(1) bucket = buckets["data"][0] bucket_state = bucket["state"]["descriptor"] if bucket_state != "New": - print(f"Bucket state is \"{bucket_state}\" - only valid state is \"New.\"") - return - - bucket_wid = bucket["id"] - - return p.buckets_complete(bucket_wid) - - -def buckets_create_impl(prism, bucket_name, table_name, table_wid, schema_file, operation): - if bucket_name is not None: - # Let's see if this bucket already exists - buckets = prism.buckets_list(bucket=bucket_name) - - if buckets is not None and buckets["total"] != 0: - logger.warning(f"Bucket {bucket_name} already exists - status: .") - return buckets["data"][0] - else: - # Generate a unique bucket name for this operation. - bucket = buckets_generate_impl() - logger.debug(f"New bucket name: {bucket}") + print(f"Bucket state is \"{bucket_state}\" - only \"New.\" buckets can be completed.") + sys.exit(1) - # A target table must be named and must exist. + print(p.buckets_complete(bucket["id"])) - if table_name is None and table_wid is None: - print("A table name or wid must be specified to create a bucket.") - return None - if table_name is not None: - tables = prism.tables_list(api_name=table_name, type_="full") - else: - tables = prism.tables_list(wid=table_wid, type_="full") - - if tables["total"] != 1: - print("Table for create bucket not found.") - return - - table = tables["data"][0] - - if schema_file is not None: - schema = prism.table_to_bucket_schema(load_schema(schema_file)) - else: - schema = prism.table_to_bucket_schema(table) - - bucket = prism.buckets_create(bucket, table["id"], schema, operation=operation) - - return bucket - - -def load_schema(filename): - """Load a table schema from a JSON file. - - :param filename: - :return: - """ - if not os.path.isfile(filename): - logger.critical("Schema file not found: {filename}") - return None +@click.command("status") +@click.option("-w", "--wid", required=False, help="Bucket name to status") +@click.argument("name", required=False) +@click.pass_context +def buckets_status(ctx, name, wid): + p=ctx.obj["p"] - try: - with open(filename) as file: - schema = json.load(file) + buckets=p.buckets_list(wid, bucket_name=name) - # Check to see if this is a full table definition - # or just a list of fields. + if buckets["total"] != 0: + print(buckets["data"][0]["state"]["descriptor"]) - if type(schema) is list: - schema = {"fields": schema} - except Exception as e: - logger.critical("Invalid schema: %s".format(str(e))) - pass - return None +@click.command("name") +@click.pass_context +def buckets_name(ctx): + print(ctx.obj["p"].buckets_gen_name()) \ No newline at end of file diff --git a/prism/commands/dataChanges_commands.py b/prism/commands/dataChanges_commands.py index 6190540..e4bfdb2 100644 --- a/prism/commands/dataChanges_commands.py +++ b/prism/commands/dataChanges_commands.py @@ -1,8 +1,9 @@ +import sys import click +import json import pandas as pd -@click.command("list", - help="View the data change tasks permitted by the security profile of the current user.") +@click.command("list") @click.option("-w", "--wid", help="The dataChangeID to list.") @click.option("-a", "--activity_wid", @@ -13,68 +14,104 @@ help="The offset to the first object in a collection to include in the response.") @click.option("-t", "--type", "type_", default="summary", help="How much information to be returned in response JSON (default=summary).") -@click.option("-f", "--format", +@click.option("-f", "--format", "format_", default="full", help="Format output as full, summary, schema, or CSV.", type=click.Choice(['full', 'summary', 'schema', 'csv'], case_sensitive=False)) @click.option("-s", "--search", is_flag=True, help="Use contains search substring for --name or --id (default=false).") -@click.argument("api_name", required=False) +@click.argument("name", required=False) @click.pass_context -def dataChanges_list(ctx, api_name, wid, activity_wid, limit, offset, type_, format, search): +def dataChanges_list(ctx, name, wid, activity_wid, limit, offset, type_, format_, search): + """ + View the data change tasks permitted by the security profile of the current user. + + [NAME] data change task to lists. + """ p = ctx.obj["p"] - o = ctx.obj["o"] - o.dataChanges_query() + data_changes = p.dataChanges_list(name, wid, activity_wid, limit, offset, type_, search) + + if data_changes["total"] == 0: + print("No data change tasks found.") + return - dataChanges = p.dataChanges_list(api_name, wid, activity_wid, limit, offset, type_, search) - dataChanges["data"] = sorted(dataChanges["data"], key=lambda dct: dct["displayName"].lower()) + data_changes["data"] = sorted(data_changes["data"], key=lambda dct: dct["displayName"].lower()) # Handle output - for dct in dataChanges["data"]: - print(dct["displayName"]) + if format_ == "summary": + for dct in data_changes["data"]: + display_name = dct["displayName"] + + source_name = dct["source"]["sourceType"] + source_name += ": " + dct["source"]["name"] if "name" in dct["source"] else "" + + target_name = dct["target"]["name"] + operation = dct["operation"]["operationType"]["descriptor"] + + print(f"{display_name}, source: {source_name}, target: {target_name}, operation: {operation}") + elif format_ == "csv": + df = pd.json_normalize(data_changes["data"]) + print(df.to_csv(index=False)) + else: + print(json.dumps(data_changes["data"], index=2)) @click.command("validate", help="Validate the data change specified by name or ID.") @click.option("-w", "--wid", help="The dataChangeID to list.") @click.option("-s", "--search", is_flag=True, help="Use contains search substring for --name or --id (default=false).") -@click.argument("api_name", required=False) +@click.argument("name", required=False) @click.pass_context -def dataChanges_validate(ctx, api_name, wid, search): +def dataChanges_validate(ctx, name, wid, search): p = ctx.obj["p"] + if name is None and wid is None: + print("A data change task name or a wid must be specified.") + sys.exit(1) + # See if we have any matching data change tasks. - dataChanges = p.dataChanges_list( - name=api_name, + data_changes = p.dataChanges_list( + name=name, wid=wid, search=search, refresh=True) - if dataChanges["total"] == 0: + if data_changes["total"] == 0: print("No matching data change task(s) found.") + sys.exit(1) - if len(dataChanges) == 1: - print(p.dataChanges.activities_post(dataChanges["id"])) + for dct in data_changes["data"]: + validate = p.dataChanges_validate(dct["id"]) + print(validate) -@click.command("execute", help="This resource executes a data change.") -@click.argument("api_name", required=True) +@click.command("run") +@click.argument("name", required=True) @click.argument("fileContainerID", required=False) @click.pass_context -def dataChanges_execute(ctx, api_name, filecontainerid): +def dataChanges_run(ctx, name, filecontainerid): + """ + This resource executes a data change. + + [NAME] Data Change Task name. + [FILECONTAINERID] File container with files to load. + """ + p = ctx.obj["p"] - # See if we have any matching data change tasks. - # See if we have any matching data change tasks. - dataChanges = p.dataChanges_list( - name=api_name, - refresh=True) + # See if we have any matching data change task. + data_changes = p.dataChanges_list(name=name.replace(" ", "_"), type_="full", refresh=True) - if dataChanges["total"] != 1: - print("Invalid data change task to execute") - return + if data_changes["total"] != 1: + print(f"Data change task not found: {name}") + sys.exit(1) - dct_id = dataChanges["data"][0]["id"] + dct_id = data_changes["data"][0]["id"] - dataChanges = p.dataChanges_validate(dct_id) + validate = p.dataChanges_validate(dct_id) - print(p.dataChanges_activities_post(dct_id, filecontainerid)) + if "error" in validate: + print("Invalid DCT: " + validate["errors"][0]["error"] + " - code: " + validate["errors"][0]["code"]) + sys.exit(1) + else: + dct = p.dataChanges_activities_post(dct_id, filecontainerid) + print(dct) diff --git a/prism/commands/fileContainers_commands.py b/prism/commands/fileContainers_commands.py index 6684930..933a179 100644 --- a/prism/commands/fileContainers_commands.py +++ b/prism/commands/fileContainers_commands.py @@ -1,5 +1,5 @@ import click -from commands import util as u +from . import util as u @click.command("create", help="Use this method to create a new fileContainers.") @@ -33,7 +33,7 @@ def filecontainers_list(ctx, filecontainerid): def filecontainers_load(ctx, filecontainerid, file): p = ctx.obj["p"] - fid = u.fileContainers_load_impl(p, filecontainerid, file) + fid = u.fileContainers_load(p, filecontainerid, file) if fid is None: print("Error loading fileContainer.") diff --git a/prism/commands/raas_commands.py b/prism/commands/raas_commands.py index cc3135e..5a23457 100644 --- a/prism/commands/raas_commands.py +++ b/prism/commands/raas_commands.py @@ -1,14 +1,23 @@ import click -@click.command("run", help="View the buckets permitted by the security profile of the current user.") -@click.option("-u", "--user", default=None, help="Output query results as CSV.") +@click.command("run", help="Run RaaS report as system or as a specific user.") +@click.option("-s", "--system", is_flag=True, default=False, help="Run delivered Workday report.") +@click.option("-u", "--user", default=None, help="Run custom report as named user.") @click.option("-f", "--format", "format_", default=None, help="Output query results as CSV.") -@click.argument("report", required=True) +@click.argument("report", nargs=1) +@click.argument('params', nargs=-1) @click.pass_context -def run(ctx, user, format_, report): +def run(ctx, system, user, format_, report, params): p = ctx.obj["p"] - report_output = p.raas_run(report, user, format_) + if system and user is not None: + print("Please specify only system or user, not both.") + return + + if not system and user is None: + print("Please specify either system or user.") + + report_output = p.raas_run(report, system, user, params, format_) print(report_output) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index 25311ab..d22905f 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -1,12 +1,14 @@ +import sys +import os + import click import json import pandas as pd -# Lazy instantiation of sources for Instance type columns (if any) for a create operation. -data_sources = None +from . import util as u -@click.command("list", help="View the tables or datasets permitted by the security profile of the current user.") +@click.command("list") @click.option("-w", "--wid", help="Unique WID for Prism table or dataset.") @click.option("-l", "--limit", @@ -28,10 +30,10 @@ @click.option("-s", "--search", help="Search substring in api name or display name (default=false).", is_flag=True) -@click.argument("api_name", required=False) +@click.argument("name", required=False) @click.pass_context -def tables_list(ctx, api_name, wid, limit, offset, type_, format_, search): - """tables list TABLENAME +def tables_list(ctx, name, wid, limit, offset, type_, format_, search): + """View the tables or datasets permitted by the security profile of the current user. Prism API TABLENAME of the table to list. """ @@ -40,12 +42,12 @@ def tables_list(ctx, api_name, wid, limit, offset, type_, format_, search): # Summary results cannot generate schema or CSV output. print(f"Invalid combination of type {type_} and format {format}.") - return + sys.exit(1) p = ctx.obj["p"] # Query the tenant... - tables = p.tables_list(api_name, wid, limit, offset, type_, search) + tables = p.tables_list(name, wid, limit, offset, type_, search) # The return always has a total tables returned value. # note: tables_list never fails, it simply returns 0 tables if there is a problem. @@ -54,16 +56,22 @@ def tables_list(ctx, api_name, wid, limit, offset, type_, format_, search): # Handle output if format_ == "json": - # The results could be one or more tables - simply dump the - # returned object. + # The results could be one table or an array of multiple + # tables - simply dump the returned object. print(json.dumps(tables, indent=2)) elif format_ == "summary": for table in tables["data"]: - print(f'{table["displayName"]}, Rows: {table["stats"]["rows"]}, Size: {table["stats"]["rows"]}, Refreshed: {table["dateRefreshed"]}') + display_name = table["displayName"] + rows = table["stats"]["rows"] if "stats" in table and "rows" in table["stats"] else "Null" + size = table["stats"]["size"] if "stats" in table and "size" in table["stats"] else "Null" + refreshed = table["dateRefreshed"] if "dateRefreshed" in table else "unknown" + enabled = table["enableForAnalysis"] if "enableForAnalysis" in table else "Null" + + print(f'{display_name}, Enabled: {enabled}, Rows: {rows}, Size: {size}, Refreshed: {refreshed}') elif format_ == "csv": df = pd.json_normalize(tables["data"]) - print(df.to_csv()) + print(df.to_csv(index=False)) elif format_ == "schema": # Dump out the fields of the first table in the result in # a format compatible with a schema used to created or edit @@ -86,215 +94,162 @@ def tables_list(ctx, api_name, wid, limit, offset, type_, format_, search): print(json.dumps(fields, indent=2)) else: - table = tables["data"][0] # Only output the first table. - fields = [fld for fld in tables["data"][0]["fields"] if not fld["name"].startswith("WPA_")] - - print(csv_from_fields(fields)) - - -@click.command("create", help="Create a new table with the specified name.") -@click.option("-d", "--displayName", help="Specify a display name - defaults to name") -@click.option("-e", "--enabledForAnalysis", is_flag=True, default=False, help="Enable this table for analytics.") -@click.option("-n", "--sourceName", help="The API name of an existing table to copy.") -@click.option("-w", "--sourceWID", help="The WID of an existing table to copy.") -@click.argument("name", required=True) + print(u.csv_from_fields(tables["data"][0]["fields"])) + + +@click.command("create") +@click.option("-n", "--name", + help="Table name - overrides schema value.") +@click.option("-d", "--displayName", + help="Specify a display name - defaults to name.") +@click.option("-t", "--tags", multiple=True, + help="Tags to organize the table in the Data Catalog.") +@click.option("-e", "--enableForAnalysis", type=bool, is_flag=True, default=None, + help="Enable this table for analytics.") +@click.option("-n", "--sourceName", + help="The API name of an existing table to copy.") +@click.option("-w", "--sourceWID", + help="The WID of an existing table to copy.") @click.argument("file", required=False, type=click.Path(exists=True)) @click.pass_context -def tables_create(ctx, displayname, enabledforanalysis, sourcename, sourcewid, format_, name, file): - p = ctx.obj["p"] - - if file is not None: - if file.lower().endswith(".json"): - schema = json.loads(file.read()) - - # The JSON file could be a complete table definitions (GET:/tables - full) or just - # the list of fields. If we got a list, then we have a list of fields we - # use to start the schema definition. - - if type(schema) is list: - fields = schema - else: - fields = schema["fields"] - elif file.lower().endswith(".csv"): - fields = fields_from_csv(p, file) - else: - print("Invalid file extension - valid extensions are .json or .csv.") - return - else: - if sourcename is None and sourcewid is None: - print("No schema provided and a table to copy (--sourceName or --sourceWID) not specified.") - return - - if sourcewid is not None: - tables = p.tables_list(wid=sourcewid, type_="full") # Exact match on WID - and get the fields - else: - tables = p.tables_list(name=sourcename, type_="full") # Exact match on API Name - - if tables["total"] == 0: - print("Invalid --sourceName or --sourceWID : table not found.") - return - - fields = tables["data"][0]["fields"] +def tables_create(ctx, name, displayname, tags, enableforanalysis, sourcename, sourcewid, file): + """ + Create a new table with the specified name. - fields[:] = [fld for fld in fields if "WPA" not in fld["name"]] + [FILE] Optional file containing a schema definition for the table. - # Initialize a new schema with just the fields. - schema = {"fields": fields} + Note: A schema file, --sourceName, or --sourceWID must be specified. + """ + p = ctx.obj["p"] - # Set the particulars for this table operation. + # We can assume a valid schema - get_schema exits if there is a problem. + schema = u.get_schema(p, file, sourcename, sourcewid) - schema["enableForAnalysis"] = enabledforanalysis - schema["name"] = name.replace(" ", "_") # Minor clean-up + # Initialize a new schema with the particulars for this table operation. + if name is not None: + # If we got a name, set it in the table schema + schema["name"] = name + elif "name" not in schema: + # The schema doesn't have a name and none was given - exit. + print("Table --name must be specified.") + sys.exit(1) if displayname is not None: + # If we got a display name, set it in the schema schema["displayName"] = displayname - else: + elif "displayName" not in schema: + # Default the display name to the name. schema["displayName"] = name - table_def = p.tables_create(schema["name"], schema) + if enableforanalysis is not None: + schema["enableForAnalysis"] = enableforanalysis + elif "enableForAnalysis" not in schema: + schema["enableForAnalysis"] = False + + table_def = p.tables_create(schema) if table_def is not None: print(f"Table {name} created.") -@click.command("update", help="Edit the schema for an existing table.") +@click.command("update") @click.option("-s", "--sourceName", help="The API name of an existing table to copy.") @click.option("-w", "--sourceWID", help="The ID of an existing table to copy.") +@click.option("-t", "--truncate", is_flag=True, default=False, help="Truncate the table before updating.") @click.argument("name", required=True) @click.argument("file", required=False, type=click.Path(exists=True)) @click.pass_context -def tables_update(ctx, name, filename, source_table, source_id): +def tables_update(ctx, name, file, sourcename, sourcewid, truncate): + """Edit the schema for an existing table. + + NAME The API name of the table to update\b + [FILE] Optional file containing an updated schema definition for the table. + + Note: A schema file, --sourceName, or --sourceWID must be specified. + """ + p = ctx.obj["p"] - table = p.tables(name=name) + # Before doing anything, table name must exist + tables = p.tables_list(name=name) + + if tables["total"] == 0: + print(f"Table \"{name}\" to update not found.") + sys.exit(1) + + table_id = tables["data"][0]["id"] + + fields = u.get_schema(p, file, sourcename, sourcewid) - if table is not None: - p.tables_put(name, filename) + p.tables_update(table_id, file) print("update") -@click.command("upload", - help="Upload a file into the table using a bucket.") -@click.option("-n", "--table_name", - help="Specify a name for the table.") -@click.option("-i", "--table_id", - help="Specify a specific table API ID - this value overrides a name, if specified.") -@click.option("-b", "--bucket", - help="Specify a bucket name - defaults to random bucket name.") +@click.command("upload") @click.option("-o", "--operation", default="TruncateandInsert", - help="Operation for the bucket - default to TruncateandInsert.") -@click.option("-f", "--filename", - help="File (csv or gzip) to upload.") + help="Operation for the table operation - default to TruncateandInsert.") +@click.argument("name", required=True) +@click.argument("file", nargs=-1, type=click.Path(exists=True)) +@click.pass_context +def tables_upload(ctx, name, operation, file): + """ + Upload a file into the table using a bucket. + + NOTE: This operation will create ".csv.gz" files for each .csv file. + """ + p = ctx.obj["p"] + + # Convert the file(s) provided to a list of compressed files. + target_files = u.compress_files(file) + + if len(target_files) == 0: + print("No files to upload.") + sys.exit(1) + + bucket = p.buckets_create(target_name=name, + operation=operation) + + if bucket is None: + sys.exit(1) + + results = p.buckets_upload(bucket["id"], target_files) + + if len(results) > 0: + p.buckets_complete(bucket["id"]) + + +@click.command("truncate") +@click.argument("name", required=True) @click.pass_context -def tables_upload(ctx, table_name, table_id, bucket, operation, filename): +def tables_truncate(ctx, name): + """ + Truncate the named table. + + :param name: + :return: + """ + # Create an empty bucket with a delete operation p = ctx.obj["p"] - print("upload") - - -def csv_from_fields(fields): - format_str = '{name},"{displayName}",{ordinal},{type},"{businessObject}",{precision},{scale},"{parseFormat}",{required},{externalId}\n' - - csv = "name,displayName,ordinal,type,businessObject,precision,scale,parseFormat,required,externalId\n" - - for field in fields: - # Suppress Prism auditing fields. - if field["name"].startswith("WPA_"): - continue - - field_def = {"name": field["name"], - "displayName": field["displayName"], - "ordinal": field["ordinal"], - "type": field["type"]["descriptor"], - "businessObject": field["businessObject"]["descriptor"] if "businessObject" in field else "", - "precision": field["precision"] if "precision" in field else "", - "scale": field["scale"] if "scale" in field else "", - "parseFormat": field["parseFormat"] if "parseFormat" in field else "", - "required": field["required"], - "externalId": field["externalId"] - } - - csv += format_str.format_map(field_def) - - return csv - - -def fields_from_csv(prism, file): - global data_sources - - schema = {"fields": []} # Start with an empy schema definition. - - with open(file, newline='') as csvfile: - reader = csv.DictReader(csvfile) - - ordinal = 1 - - for row in reader: - field = { - "ordinal": ordinal, - "name": row["name"], - "displayName": row["displayName"] if "displayName" in row else row["name"], - "required": row["required"] if "required" in row else False, - "externalId": row["externalId"] if "externalId" in row else False - } - - match row["type"].lower(): - case "text": - field["type"] = { - "id": "fdd7dd26156610006a12d4fd1ea300ce", - "descriptor": "Text" - } - case "date": - field["type"] = { - "id": "fdd7dd26156610006a71e070b08200d6", - "descriptor": "Date" - } - - if "parseFormat" in row: - field["parseFormat"] = row["parseFormat"] - - case "numeric": - field["type"] = { - "id": "32e3fa0dd9ea1000072bac410415127a", - "descriptor": "Numeric" - } - - if "precision" in row: - field["precision"] = row["precision"] - - if "scale" in row: - field["scale"] = row["scale"] - - case "instance": - # We need all the data sources to resolve the business objects - # to include their WID. - if data_sources is None: - data_sources = prism.datasources_list() - - if data_sources is None or data_sources["total"] == 0: - print("Error calling WQL/dataSources") - return - - field["type"] = { - "id": "db9cd1dbf95010000e8fc7c78cd012a9", - "descriptor": "Instance" - } - - # Find the matching businessObject - bo = [ds for ds in data_sources["data"] - if ds["businessObject"]["descriptor"] == row["businessObject"]] - - if len(bo) == 1: - field["businessObject"] = bo[0]["businessObject"] - case _: - # Default to string - field["type"] = { - "id": "fdd7dd26156610006a12d4fd1ea300ce", - "descriptor": "Text" - } - - schema["fields"].append(field) - ordinal += 1 - - return schema + # A truncate (delete) operations still needs a file. + blank_file = os.path.join(os.path.dirname(os.path.realpath(__file__))) + blank_file = os.path.join(blank_file, "..", "data", "blank.csv.gz") + + # Get a bucket using a generated name and an explicit Delete operation. + bucket = p.buckets_create(target_name=name, operation="Delete") + + if bucket is None: + print(f"Unable to truncate {name} - error getting bucket.") + sys.exit(1) + + bucket_id = bucket["id"] + + # Put the blank file into the bucket. + bucket = p.buckets_upload(bucket_id, blank_file) + + # Ask Prism to run the delete statement by completing the bucket. + bucket = p.buckets_complete(bucket_id) + + if bucket is None: + print(f"Unable to truncate table {name}.") \ No newline at end of file diff --git a/prism/commands/util.py b/prism/commands/util.py index 38aaec9..a49f230 100644 --- a/prism/commands/util.py +++ b/prism/commands/util.py @@ -1,8 +1,191 @@ import gzip +import os.path +import sys import shutil +import json +import csv -def fileContainers_load_impl(prism, filecontainersid, file): +def get_schema(p, file, sourceName, sourceWID): + # Start witha blank schema definition. + schema = {} + + # A file always takes precedence over sourceName and sourceWID + # options, and must contain a valid schema. + + if file is not None: + if file.lower().endswith(".json"): + try: + with open(file) as json_file: + schema = json.load(json_file) + except Exception as e: + print(f"Invalid schema file: {e.msg}.") + sys.exit(1) + + # The JSON file could be a complete table definitions (GET:/tables - full) or just + # the list of fields. If we got a list, then we have a list of fields we + # use to start the schema definition. + + if type(schema) is list: + schema["fields"] = schema + else: + # This should be a full schema, perhaps from a table list command. + if "name" not in schema and "fields" not in schema: + print("Invalid schema - name and fields attribute not found.") + sys.exit(1) + elif file.lower().endswith(".csv"): + schema = schema_from_csv(p, file) + else: + print("Invalid file extension - valid extensions are .json or .csv.") + sys.exit(1) + else: + # No file was specified, check for a source table. + + if sourceName is None and sourceWID is None: + print("No schema provided and a table (--sourceName or --sourceWID) not specified.") + sys.exit(1) + + if sourceWID is not None: + tables = p.tables_list(wid=sourceWID, type_="full") # Exact match on WID - and get the fields + else: + tables = p.tables_list(name=sourceName, type_="full") # Exact match on API Name + + if tables["total"] == 0: + print("Invalid --sourceName or --sourceWID : table not found.") + sys.exit(1) + else: + schema = tables["data"][0] + + return schema + + +def schema_from_csv(prism, file): + global data_sources + + schema = {"fields": []} # Start with an empy schema definition. + + with open(file, newline='') as csvfile: + reader = csv.DictReader(csvfile) + + # Force all the columns names to lowercase to make lookups consistent + # regardless of the actual case of the columns. + reader.fieldnames = [f_name.lower() for f_name in reader.fieldnames] + + # The minimum definition is a name column - exit if not found. No other + # column definition is required to build a valid field list. + if "name" not in reader.fieldnames: + print("CSV file {file} does not contain a name column header in first line.") + sys.exit(1) + + # Prism fields always have an ordinal sequence assigned to each field. + ordinal = 1 + + for row in reader: + field = { + "ordinal": ordinal, + "name": row["name"], + "displayName": row["displayname"] if "displayname" in row else row["name"] + } + + if "required" in row and isinstance(row["required"], str) and row["required"].lower() == "true": + field["required"] = True + else: + field["required"] = False + + if "externalid" in row and isinstance(row["externalid"], str) and row["externalid"].lower() == "true": + field["externalId"] = True + else: + field["externalId"] = False + + match row["type"].lower(): + case "text": + field["type"] = { + "id": "fdd7dd26156610006a12d4fd1ea300ce", + "descriptor": "Text" + } + case "date": + field["type"] = { + "id": "fdd7dd26156610006a71e070b08200d6", + "descriptor": "Date" + } + + if "parseformat" in row and isinstance(row["parseformat"], str) and len(row["parseformat"]) > 0: + field["parseFormat"] = row["parseformat"] + + case "numeric": + field["type"] = { + "id": "32e3fa0dd9ea1000072bac410415127a", + "descriptor": "Numeric" + } + + if "precision" in row: + field["precision"] = row["precision"] + + if "scale" in row: + field["scale"] = row["scale"] + + case "instance": + # We need all the data sources to resolve the business objects + # to include their WID. + if data_sources is None: + data_sources = prism.datasources_list() + + if data_sources is None or data_sources["total"] == 0: + print("Error calling WQL/dataSources") + return + + field["type"] = { + "id": "db9cd1dbf95010000e8fc7c78cd012a9", + "descriptor": "Instance" + } + + # Find the matching businessObject + bo = [ds for ds in data_sources["data"] + if ds["businessObject"]["descriptor"] == row["businessObject"]] + + if len(bo) == 1: + field["businessObject"] = bo[0]["businessObject"] + case _: + # Default to string + field["type"] = { + "id": "fdd7dd26156610006a12d4fd1ea300ce", + "descriptor": "Text" + } + + schema["fields"].append(field) + ordinal += 1 + + return schema + + +def csv_from_fields(fields): + format_str = '{name},"{displayName}",{ordinal},{type},"{businessObject}",{precision},{scale},"{parseFormat}",{required},{externalId}\n' + + csv = "name,displayName,ordinal,type,businessObject,precision,scale,parseFormat,required,externalId\n" + + for field in fields: + # Suppress Prism auditing fields. + if field["name"].startswith("WPA_"): + continue + + field_def = {"name": field["name"], + "displayName": field["displayName"], + "ordinal": field["ordinal"], + "type": field["type"]["descriptor"], + "businessObject": field["businessObject"]["descriptor"] if "businessObject" in field else "", + "precision": field["precision"] if "precision" in field else "", + "scale": field["scale"] if "scale" in field else "", + "parseFormat": field["parseFormat"] if "parseFormat" in field else "", + "required": field["required"], + "externalId": field["externalId"] + } + + csv += format_str.format_map(field_def) + + return csv + + +def fileContainers_load(prism, filecontainersid, file): # Because Click verified the file already exists, we know we have valid # file name. Check to see if we have a gzip file or a CSV # by checking the extension. @@ -55,3 +238,41 @@ def fileContainers_load_impl(prism, filecontainersid, file): # passed by the caller, or the new fID created by # the load of the first file. return fID + + +def compress_files(files): + target_files = [] + + if files is None: + print("File(s) must be specified.") + return target_files + elif isinstance(files, list) and len(files) == 0: + print("File(s) must be specified.") + return target_files + elif isinstance(files, str) and not files: + print("File(s) must be specified.") + return target_files + + if isinstance(files, str): + files = [ files ] + + for f in files: + if not os.path.exists(f): + print(f"FIle {f} not found - skipping.") + continue + + if f.lower().endswith(".csv"): + # GZIP the file into the same directory with the appropriate extension. + target_file = f + ".gz" + + with open(f, 'rb') as f_in: + with gzip.open(target_file, "wb") as f_out: + shutil.copyfileobj(f_in, f_out) + + target_files.append(target_file) + elif f.lower().endswith(".gz"): + target_files.append(target_file) + else: + print(f"File {f} is not a .gz or .csv file - skipping.") + + return target_files diff --git a/prism/commands/wql_commands.py b/prism/commands/wql_commands.py index c97eee3..be1151e 100644 --- a/prism/commands/wql_commands.py +++ b/prism/commands/wql_commands.py @@ -1,7 +1,6 @@ import click import json -import sys - +import pandas as pd @click.command("dataSources", help="View the buckets permitted by the security profile of the current user.") @@ -52,11 +51,8 @@ def data(ctx, limit, offset, file_, csv_, query): if rows["total"] != 0: if csv_: - headers = rows["data"][0].keys() - - writer = csv.DictWriter(sys.stdout, fieldnames=headers) - writer.writeheader() - writer.writerows(rows["data"]) + df = pd.json_normalize(rows["data"]) + print(df.to_csv(index=False)) else: - print(json.dumps(data, indent=2)) + print(json.dumps(rows, indent=2)) diff --git a/prism/data/blank.csv.gz b/prism/data/blank.csv.gz new file mode 100644 index 0000000..e69de29 diff --git a/prism/data/invalid-schema.json b/prism/data/invalid-schema.json new file mode 100644 index 0000000..f95d6d2 --- /dev/null +++ b/prism/data/invalid-schema.json @@ -0,0 +1 @@ +{ "d" , "mem" } \ No newline at end of file diff --git a/prism/data/schema.csv b/prism/data/schema.csv new file mode 100644 index 0000000..3411907 --- /dev/null +++ b/prism/data/schema.csv @@ -0,0 +1,6 @@ +name,displayName,ordinal,type,businessObject,precision,scale,parseFormat,required,externalId +text_field, text field,,text +date_field, date field,,date,,,,MM/DD/yy +numeric_field, numeric field,,numeric,,19,3 +required_field,required field,,,,,,,true +external_field, external id,,,,,,,true,true \ No newline at end of file diff --git a/prism/prism.py b/prism/prism.py index 8139aaf..c665b6d 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -12,6 +12,7 @@ import os import urllib import sys +import uuid logger = logging.getLogger(__name__) logger.setLevel(logging.WARNING) @@ -72,16 +73,26 @@ def __init__(self, base_url, tenant_name, client_id, client_secret, refresh_toke # Support URLs for additional Workday API calls. self.wql_endpoint = f"{base_url}/api/wql/v1/{tenant_name}" - self.raas_endpoint = f"{base_url}/ccx/service/customreport2/{tenant_name}" + self.raas_endpoint = f"{base_url}/ccx/service" # At creation, there cannot yet be a bearer_token obtained from Workday. self.bearer_token = None self.bearer_token_timestamp = None @staticmethod - def set_log_level(log_level): - logger.setLevel(getattr(logging, log_level)) # Convert the string to the proper log level - logger.debug("set log level: {log_level}") + def set_log_level(log_level=None): + set_level = None + + if log_level is None: + set_level = logging.INFO + else: + set_level = getattr(logging, log_level) + + logger.setLevel(set_level) # Convert the string to the proper log level + logger.debug("set log level: {set_level}") + + def buckets_gen_name(self): + return "cli_" + uuid.uuid4().hex def get(self, url, headers=None, params=None, log_tag="generic get"): if url is None: @@ -169,7 +180,7 @@ def reset_bearer_token(self): def tables_list( self, name=None, - id=None, + wid=None, limit=None, offset=None, type_="summary", @@ -182,7 +193,7 @@ def tables_list( The name of the table to obtain details about. If the default value of None is specified, details regarding first 100 tables is returned. - id : str + wid : str The ID of a table to obtain details about. When specified, all tables are searched for the matching id. @@ -205,6 +216,8 @@ def tables_list( """ operation = "/tables" + logger.debug(f"GET: {operation}") + url = self.prism_endpoint + operation if type_ is None or type_ not in ["full", "summary", "permissions"]: @@ -247,7 +260,7 @@ def tables_list( return_tables = {"total": 0, "data": []} while True: - r = self.get(url, params=params) + r = self.get(url, params=params, log_tag=operation) if r.status_code != 200: logger.error(f"Invalid HTTP return code: {r.status_code}") @@ -264,9 +277,9 @@ def tables_list( if name is not None: # Substring search for matching table names match_tables = [tab for tab in tables["data"] if name in tab["name"]] - elif id is not None: + elif wid is not None: # User is looking for a table by ID - match_tables = [tab for tab in tables["data"] if id == tab["id"]] + match_tables = [tab for tab in tables["data"] if wid == tab["id"]] else: # Grab all the tables in the result match_tables = tables["data"] @@ -290,17 +303,13 @@ def tables_list( return_tables["total"] = len(return_tables["data"]) return return_tables - def tables_create(self, table_name, schema): + def tables_create(self, schema): """Create an empty table of type "API". Parameters ---------- - table_name : str - The table name. The name must be unique and conform to the name - validation rules. - schema : list - A list of dictionaries containing the schema + A dictionary containing the schema Returns ------- @@ -324,25 +333,37 @@ def tables_create(self, table_name, schema): r = requests.post(url, headers=headers, data=json.dumps(schema)) if r.status_code == 201: - logging.info("Successfully created an empty API table") return r.json() elif r.status_code == 400: - logging.warning(r.json()["errors"][0]["error"]) + logger.warning(r.json()["errors"][0]["error"]) + logger.warning(r.text) else: - logging.warning(f"HTTP status code {r.status_code}: {r.content}") + logger.warning(f"HTTP status code {r.status_code}: {r.content}") return None - def tables_update(self, name, schema): - tables = self.tables(name=name) + def tables_update(self, wid, schema, truncate=False): + """ + Update the schema of an existing table. - # We never fail - if the table doesn't exist, only - # log a warning. + """ - if tables["total"] == 0: - # Assume we are doing a create - table = self.tables_create(name, schema) - return None + operation = f"/tables/{wid}" + logger.debug(f"PUT: {operation}") + url = self.prism_endpoint + operation + + headers = { + "Authorization": "Bearer " + self.get_bearer_token(), + "Content-Type": "application/json", + } + + r = requests.put(url=url, data=schema) + + if r.status_code == 200: + return r.json() + + logger.warning(f"Error updating table {wid} - {r.text}.") + return None def tables_patch(self, id, displayName=None, description=None, documentation=None, enableForAnalysis=None, schema=None): return None @@ -355,8 +376,21 @@ def buckets_list(self, type_="summary", table_name=None, search=False): + """ + + :param wid: + :param bucket_name: + :param limit: + :param offset: + :param type_: + :param table_name: + :param search: + :return: + """ operation = "/buckets" + logger.debug(f"GET: {operation}") + url = self.prism_endpoint + operation # Start the return object - this routine NEVER fails @@ -376,17 +410,15 @@ def buckets_list(self, limit = 1 offset = 0 else: - # Any other combination of parameters requires a search. + # Any other combination of parameters requires a search + # through all the buckets in the tenant. search = True + limit = 100 # Max pagesize to retrieve in the fewest REST calls. offset = 0 - if limit is not None: - params["limit"] = limit - params["offset"] = offset if offset is not None else 0 - else: - params["limit"] = 100 - params["offset"] = 0 + params["limit"] = limit + params["offset"] = offset if offset is not None else 0 if type_ in ["summary", "full"]: params["type"] = type_ @@ -397,16 +429,19 @@ def buckets_list(self, r = self.get(url, params=params, log_tag=operation) if r.status_code != 200: + # We never fail, return whatever we got (if any). + logger.debug("Error listing buckets.") return return_buckets buckets = r.json() if not search and bucket_name is not None: # Explicit bucket name # We are not searching, and we have a specific bucket, - # return whatever we got. + # return whatever we got with this call. return buckets - # If we are not searching, simply append all the results to the return object. + # If we are not searching, simply append this page of results to + # the return object. if bucket_name is not None: # Substring search for matching table names @@ -421,10 +456,11 @@ def buckets_list(self, # Grab all the tables in the result - select all buckets. match_buckets = buckets["data"] + # Add to the results. return_buckets["data"] += match_buckets return_buckets["total"] = len(return_buckets["data"]) - # If we get back anything but a full page, we are done + # If we get back a list of buckets fewer than a full page, we are done # paging the results. if len(buckets["data"]) < params["limit"]: break @@ -441,11 +477,11 @@ def buckets_list(self, def buckets_create( self, - name, + name=None, target_id=None, target_name=None, schema=None, - operation="TruncateandInsert"): + operation="TruncateAndInsert"): """Create a temporary bucket to upload files. Parameters @@ -476,56 +512,59 @@ def buckets_create( https://confluence.workday.com/display/PRISM/Public+API+V2+Endpoints+for+WBuckets """ + # If the caller didn't give us a name to use for the bucket, + # create a default name. + if name is None: + bucket_name = self.buckets_gen_name() + else: + bucket_name = name + # A target table must be identified by ID or name. if target_id is None and target_name is None: logger.error("A table id or table name is required to create a bucket.") return None - # The caller didn't include a schema, make a copy of the target table's schema. - if target_id is not None and schema is None: - tables = self.tables_list(table_id=target_id, type_="full") - - if tables["total"] == 0: - logger.error(f"Table ID {target_id} does not exist for bucket operation.") - return None - - schema = tables["data"][0]["fields"] - - if target_id is None: - tables = self.tables_list(api_name=target_name, type_="full") + # The caller gave us a table wid, but didn't include a schema. Make a copy + # of the target table's schema. Note: WID takes precedence over name. + if target_id is not None: + tables = self.tables_list(wid=target_id, type_="full") + else: + tables = self.tables_list(name=target_name, type_="full") - if tables["total"] == 0: - logger.error(f"Table {target_name} does not exist for create bucket operation.") - return None + if tables["total"] == 0: + logger.error(f"Table not found for bucket operation.") + return None - target_id = tables["data"]["0"]["id"] + table_id = tables["data"][0]["id"] - if schema is None: - schema = tables["data"]["0"]["fields"] + if schema is None: + schema = self.table_to_bucket_schema(tables["data"][0]) + logger.debug(f"POST: /buckets") url = self.prism_endpoint + "/buckets" headers = { - "Authorization": "Bearer " + self.bearer_token, + "Authorization": "Bearer " + self.get_bearer_token(), "Content-Type": "application/json", } data = { - "name": name, + "name": bucket_name, "operation": {"id": "Operation_Type=" + operation}, - "targetDataset": {"id": target_id}, + "targetDataset": {"id": table_id}, "schema": schema, } - r = requests.post(url, headers=headers, data=data) + r = requests.post(url, headers=headers, data=json.dumps(data)) if r.status_code == 201: - logging.info("Successfully created a new wBucket") + logger.info("Successfully created a new wBucket") return r.json() elif r.status_code == 400: - logging.warning(r.json()["errors"][0]["error"]) + logger.warning(r.json()["errors"][0]["error"]) + logger.warning(r.text) else: - logging.warning(f"HTTP status code {r.status_code}: {r.content}") + logger.warning(f"HTTP status code {r.status_code}: {r.content}") return None @@ -614,7 +653,7 @@ def table_to_bucket_schema(self, table): return bucket_schema - def buckets_upload(self, bucketid, filename): + def buckets_upload(self, bucket_id, file): """Upload a file to a given bucket. Parameters @@ -632,21 +671,38 @@ def buckets_upload(self, bucketid, filename): None """ - url = self.prism_endpoint + f"/buckets/{bucketid}/files" + operation = f"/buckets/{bucket_id}/files" + logger.debug("POST: {operation}") - headers = {"Authorization": "Bearer " + self.get_bearer_token() - } + url = self.prism_endpoint + operation - files = {"file": open(filename, "rb")} + headers = {"Authorization": "Bearer " + self.get_bearer_token()} - r = requests.post(url, headers=headers, files=files) + results = [] - if r.status_code == 201: - logging.info("Successfully uploaded file to the bucket") - return r.json() + # Convert a single filename to a list. + if isinstance(file, str): + files = [file] else: - logging.warning(f"HTTP status code {r.status_code}: {r.content}") - return None + files = file + + for f in files: + files = {"file": open(f, "rb")} + + r = requests.post(url, headers=headers, files=files) + + if r.status_code == 201: + logging.info(f"Successfully uploaded {f} to the bucket") + + if isinstance(file, str): + # If we got a single file, return the first result. + return r.json() + else: + results.append(r.json()) + else: + logging.warning(f"HTTP status code {r.status_code}: {r.content}") + + return results def dataChanges_list(self, name=None, @@ -675,7 +731,10 @@ def dataChanges_list(self, operation += "?type=full" else: operation += "?type=summary" - logger.warning("/dataChanges: invalid verbosity {verbosity} - defaulting to summary.") + logger.warning(f'/dataChanges: invalid verbosity {type_} - defaulting to summary.') + else: + operation += "?type=summary" + logger.warning("/dataChanges: invalid verbosity - defaulting to summary.") logger.debug(f"dataChanges_activities_get: {operation}") @@ -809,7 +868,7 @@ def dataChanges_activities_post(self, data_change_id, fileContainerID=None): def dataChanges_by_name(self, data_change_name): logger.debug(f"data_changes_by_name: {data_change_name}") - data_changes_list = self.data_changes_list() + data_changes_list = self.dataChanges_list() for data_change in data_changes_list: if data_change.get("displayName") == data_change_name: @@ -817,7 +876,7 @@ def dataChanges_by_name(self, data_change_name): data_change_id = data_change.get("id") logger.debug(f"found {data_change_name}: {data_change_id}") - return self.data_changes_by_id(data_change_id) + return self.dataChanges_by_id(data_change_id) logger.debug(f"{data_change_name} was not found!") @@ -869,10 +928,10 @@ def dataChanges_validate(self, data_change_id): r = self.get(url) - if r.status_code == 200: - return json.loads(r.text) + # If the DCT is invalid, the response will have the errors + # so we return the JSON no matter what. - return None + return json.loads(r.text) def fileContainers_create(self): operation = "/fileContainers" @@ -995,7 +1054,7 @@ def wql_data(self, query, limit, offset): operation = "/data" url = f"{self.wql_endpoint}{operation}" - query_safe = urllib.parse.quote(query) + query_safe = urllib.parse.quote(query.strip()) offset = 0 data = {"total": 0, "data": []} @@ -1007,7 +1066,10 @@ def wql_data(self, query, limit, offset): ds = r.json() data["data"] += ds["data"] else: - return None + logger.error(f"Invalid WQL: {r.status_code}") + logger.error(r.text) + + return data # Return whatever we have... if len(ds["data"]) < 10000: break @@ -1018,8 +1080,26 @@ def wql_data(self, query, limit, offset): return data - def raas_run(self, report, user, format_): - url = f"{self.raas_endpoint}/{user}/{report}?format={format_}" + def raas_run(self, report, system, user, params=None, format_=None): + if system: + url = f"{self.raas_endpoint}/systemreport2/{self.tenant_name}/{report}" + else: + url = f"{self.raas_endpoint}/customreport2/{self.tenant_name}/{user}/{report}" + + separator = "?" + if params is not None and len(params) > 0: + query_str = "" + + for param in range(0, len(params), 2): + query_str += separator + params[param] + "=" + params[param + 1] + separator = "&" + + url += query_str + if format: + if "?" in url: + url = f"{url}&format={format_}" + else: + url = f"{url}?format={format_}" if url is None: raise ValueError("RaaS URL is required") @@ -1041,4 +1121,11 @@ def raas_run(self, report, user, format_): # raise ValueError(f"Output format type {output_format} is unknown") return r.text else: - logging.warning("HTTP Error: {}".format(r.content.decode("utf-8"))) + logging.error("HTTP Error: {}".format(r.content.decode("utf-8"))) + + return None + + def is_valid_operation(self, operation): + operation_list = ["insert", "truncateandinsert", "delete", "upsert", "update" ] + + return operation in operation_list diff --git a/tests/buckets-help.sh b/tests/buckets-help.sh new file mode 100755 index 0000000..2c9f2df --- /dev/null +++ b/tests/buckets-help.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +config_file="../../prism.ini" +config_name="default" + +cli="python3 ../prism/cli.py --config_file $config_file --config_name=$config_name" + +$cli buckets --help +echo "---------------------" + +$cli buckets list --help +echo "---------------------" + +$cli buckets create --help +echo "---------------------" + +$cli buckets upload --help +echo "---------------------" + +$cli buckets complete --help +echo "---------------------" + + diff --git a/tests/test_cli.py b/tests/test_cli.py index 49420a6..aa2ee5d 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,8 +1,8 @@ from click.testing import CliRunner -from prism.cli import main +from prism.cli import cli def test_cli(): runner = CliRunner() - result = runner.invoke(main, ["--help"]) + result = runner.invoke(cli, ["--help"]) assert result.exit_code == 0 From 8f7cd91b84c196211f921740c2e711fc017fea54 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Fri, 15 Sep 2023 16:49:02 -0400 Subject: [PATCH 007/103] Create tables-help.sh --- tests/tables-help.sh | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100755 tests/tables-help.sh diff --git a/tests/tables-help.sh b/tests/tables-help.sh new file mode 100755 index 0000000..8d4933f --- /dev/null +++ b/tests/tables-help.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +config_file="../../prism.ini" +config_name="default" + +cli="python3 ../prism/cli.py --config_file $config_file --config_name=$config_name" + +$cli tables --help +echo "---------------------" + +$cli tables list --help +echo "---------------------" + +$cli tables create --help +echo "---------------------" + +$cli tables upload --help +echo "---------------------" + +$cli tables update --help +echo "---------------------" + +# Bad table name +$cli tables update bob From 2243cc3f7e00347925b60c4304df65f230ee6fa8 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Sat, 16 Sep 2023 10:34:41 -0400 Subject: [PATCH 008/103] Updates to handle .gz files. --- prism/commands/util.py | 2 +- prism/prism.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/prism/commands/util.py b/prism/commands/util.py index a49f230..c25c5ca 100644 --- a/prism/commands/util.py +++ b/prism/commands/util.py @@ -271,7 +271,7 @@ def compress_files(files): target_files.append(target_file) elif f.lower().endswith(".gz"): - target_files.append(target_file) + target_files.append(f) else: print(f"File {f} is not a .gz or .csv file - skipping.") diff --git a/prism/prism.py b/prism/prism.py index c665b6d..36f48cb 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -89,7 +89,7 @@ def set_log_level(log_level=None): set_level = getattr(logging, log_level) logger.setLevel(set_level) # Convert the string to the proper log level - logger.debug("set log level: {set_level}") + logger.debug(f"set log level: {set_level}") def buckets_gen_name(self): return "cli_" + uuid.uuid4().hex From 161618224a926620e15b62dce0b3011d13d61884 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Sun, 17 Sep 2023 05:58:48 -0400 Subject: [PATCH 009/103] Fixes to bucket create and bucket upload. --- prism/commands/buckets_commands.py | 2 +- prism/prism.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/prism/commands/buckets_commands.py b/prism/commands/buckets_commands.py index e801fd2..c733646 100644 --- a/prism/commands/buckets_commands.py +++ b/prism/commands/buckets_commands.py @@ -125,7 +125,7 @@ def buckets_upload(ctx, table_name, table_wid, schema_file, operation, generate, if bucket is None: logger.error("Invalid bucket for upload operation.") - return + sys.exit(1) upload = p.buckets_upload(bucket["id"], target_files) diff --git a/prism/prism.py b/prism/prism.py index 36f48cb..dbff810 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -478,8 +478,8 @@ def buckets_list(self, def buckets_create( self, name=None, - target_id=None, target_name=None, + target_id=None, schema=None, operation="TruncateAndInsert"): """Create a temporary bucket to upload files. From 45692f7045cc6db7af2482a09186dc9b5f5f81f0 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Sun, 17 Sep 2023 06:09:57 -0400 Subject: [PATCH 010/103] Fixes to bucket upload. --- prism/commands/buckets_commands.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/prism/commands/buckets_commands.py b/prism/commands/buckets_commands.py index c733646..e1bb52e 100644 --- a/prism/commands/buckets_commands.py +++ b/prism/commands/buckets_commands.py @@ -129,9 +129,15 @@ def buckets_upload(ctx, table_name, table_wid, schema_file, operation, generate, upload = p.buckets_upload(bucket["id"], target_files) - if upload is not None and complete: - complete = p.buckets_complete(bucket["id"]) + if upload is None: + logger.error("Upload failed.") + sys.exit(1) + if complete: + complete = p.buckets_complete(bucket["id"]) + print(complete) + else: + print(upload) @click.command("complete") @click.option("-n", "--bucket_name", From e1588029d71c21f631c70e0a577191473175ffed Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Sun, 17 Sep 2023 06:54:37 -0400 Subject: [PATCH 011/103] Fixes to dataChanges list --- prism/commands/dataChanges_commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prism/commands/dataChanges_commands.py b/prism/commands/dataChanges_commands.py index e4bfdb2..ab87aa4 100644 --- a/prism/commands/dataChanges_commands.py +++ b/prism/commands/dataChanges_commands.py @@ -53,7 +53,7 @@ def dataChanges_list(ctx, name, wid, activity_wid, limit, offset, type_, format_ df = pd.json_normalize(data_changes["data"]) print(df.to_csv(index=False)) else: - print(json.dumps(data_changes["data"], index=2)) + print(json.dumps(data_changes["data"], indent=2)) @click.command("validate", help="Validate the data change specified by name or ID.") From 37448ea1119a0e3cc87cfbe11ee42e52356b91a8 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Sun, 17 Sep 2023 11:19:51 -0400 Subject: [PATCH 012/103] Fixes to dataChanges run to return activity_id --- prism/commands/dataChanges_commands.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prism/commands/dataChanges_commands.py b/prism/commands/dataChanges_commands.py index ab87aa4..ee9e39f 100644 --- a/prism/commands/dataChanges_commands.py +++ b/prism/commands/dataChanges_commands.py @@ -113,5 +113,5 @@ def dataChanges_run(ctx, name, filecontainerid): print("Invalid DCT: " + validate["errors"][0]["error"] + " - code: " + validate["errors"][0]["code"]) sys.exit(1) else: - dct = p.dataChanges_activities_post(dct_id, filecontainerid) - print(dct) + activity_id = p.dataChanges_activities_post(dct_id, filecontainerid) + print(activity_id) From 635a8667ac4a910d7f610391f496691162e8556e Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Sun, 17 Sep 2023 14:19:06 -0400 Subject: [PATCH 013/103] Added dataChanges activities command. --- prism/cli.py | 1 + prism/commands/dataChanges_commands.py | 37 +++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/prism/cli.py b/prism/cli.py index c69e723..159d160 100644 --- a/prism/cli.py +++ b/prism/cli.py @@ -195,6 +195,7 @@ def dataChanges(): dataChanges.add_command(dataChanges_commands.dataChanges_list) dataChanges.add_command(dataChanges_commands.dataChanges_validate) dataChanges.add_command(dataChanges_commands.dataChanges_run) +dataChanges.add_command(dataChanges_commands.dataChanges_activities) @cli.group("fileContainers", help="File containers (fileContainers) operations to create, load, and list.") diff --git a/prism/commands/dataChanges_commands.py b/prism/commands/dataChanges_commands.py index ee9e39f..e793fc5 100644 --- a/prism/commands/dataChanges_commands.py +++ b/prism/commands/dataChanges_commands.py @@ -114,4 +114,39 @@ def dataChanges_run(ctx, name, filecontainerid): sys.exit(1) else: activity_id = p.dataChanges_activities_post(dct_id, filecontainerid) - print(activity_id) + + if activity_id is None: + sys.exit(1) + else: + print(activity_id) + + +@click.command("activities") +@click.argument("name", required=True) +@click.argument("activity_id", required=True) +@click.pass_context +def dataChanges_activities(ctx, name, activity_id): + """ + This resource executes a data change. + + [NAME] Data Change Task name. + [FILECONTAINERID] File container with files to load. + """ + + p = ctx.obj["p"] + + # See if we have any matching data change task. + data_changes = p.dataChanges_list(name=name.replace(" ", "_"), type_="full", refresh=True) + + if data_changes["total"] != 1: + print(f"Data change task not found: {name}") + sys.exit(1) + + dct_id = data_changes["data"][0]["id"] + + status = p.dataChanges_activities_get(dct_id, activity_id) + + if status is None: + sys.exit(1) + else: + print(status) \ No newline at end of file From 2e5210be12bd4955f5a979db9c4c073a44617ec9 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Sun, 17 Sep 2023 14:34:11 -0400 Subject: [PATCH 014/103] Added status flag to dataChanges activities --- prism/commands/dataChanges_commands.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/prism/commands/dataChanges_commands.py b/prism/commands/dataChanges_commands.py index e793fc5..fb818ae 100644 --- a/prism/commands/dataChanges_commands.py +++ b/prism/commands/dataChanges_commands.py @@ -122,10 +122,12 @@ def dataChanges_run(ctx, name, filecontainerid): @click.command("activities") +@click.option("-s", "--status", is_flag=True, default=False, + help="Return only the status of the activity.") @click.argument("name", required=True) @click.argument("activity_id", required=True) @click.pass_context -def dataChanges_activities(ctx, name, activity_id): +def dataChanges_activities(ctx, status, name, activity_id): """ This resource executes a data change. @@ -149,4 +151,7 @@ def dataChanges_activities(ctx, name, activity_id): if status is None: sys.exit(1) else: - print(status) \ No newline at end of file + if status: + print(status["state"]["descriptor"]) + else: + print(status) \ No newline at end of file From 5e7cef0e4511a37df93323b9229c212ccdbddf57 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Sun, 17 Sep 2023 14:38:33 -0400 Subject: [PATCH 015/103] Bug fix status flag in dataChanges activities --- prism/commands/dataChanges_commands.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/prism/commands/dataChanges_commands.py b/prism/commands/dataChanges_commands.py index fb818ae..6781339 100644 --- a/prism/commands/dataChanges_commands.py +++ b/prism/commands/dataChanges_commands.py @@ -146,12 +146,12 @@ def dataChanges_activities(ctx, status, name, activity_id): dct_id = data_changes["data"][0]["id"] - status = p.dataChanges_activities_get(dct_id, activity_id) + current_status = p.dataChanges_activities_get(dct_id, activity_id) - if status is None: + if current_status is None: sys.exit(1) else: if status: - print(status["state"]["descriptor"]) + print(current_status["state"]["descriptor"]) else: - print(status) \ No newline at end of file + print(current_status) \ No newline at end of file From 89709a6054ec8e4dda9fd1d24ce3e0c915fe49d0 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Mon, 9 Oct 2023 20:36:43 -0400 Subject: [PATCH 016/103] General bug fixes - remove gzip to disk --- prism/cli.py | 204 +++++++++--------- prism/commands/buckets_commands.py | 38 ++-- prism/commands/dataChanges_commands.py | 32 +-- prism/commands/fileContainers_commands.py | 31 ++- prism/commands/raas_commands.py | 6 +- prism/commands/tables_commands.py | 110 +++++----- prism/commands/util.py | 125 ++++------- prism/commands/wql_commands.py | 9 +- prism/data/blank.csv.gz | 0 prism/prism.py | 246 +++++++++++++--------- tests/buckets-help.sh | 23 -- tests/tables-help.sh | 24 --- 12 files changed, 419 insertions(+), 429 deletions(-) delete mode 100644 prism/data/blank.csv.gz delete mode 100755 tests/buckets-help.sh delete mode 100755 tests/tables-help.sh diff --git a/prism/cli.py b/prism/cli.py index 159d160..023614b 100644 --- a/prism/cli.py +++ b/prism/cli.py @@ -2,6 +2,7 @@ import configparser import os import sys +import logging import prism @@ -14,7 +15,7 @@ def param_fixup(value, config, config_name, option): - # If already set by an enviroment or by a command line option, do nothing. + # If already set by an environment or by a command line option, do nothing. if value is not None: return value @@ -26,88 +27,46 @@ def param_fixup(value, config, config_name, option): @click.group(help="CLI for interacting with Workday’s Prism API") -@click.option( - "--base_url", - envvar="workday_base_url", - type=str, - required=False, - help="The base URL for the API client") -@click.option( - "--tenant_name", - envvar="workday_tenant_name", - type=str, - required=False, - help="The name of your Workday tenant") -@click.option( - "--username", - envvar="workday_username", - type=str, - required=False, - help="The login username of your Workday user") -@click.option( - "--password", - envvar="workday_password", - type=str, - required=False, - help="The password of your Workday user") -@click.option( - "--client_id", - envvar="prism_client_id", - type=str, - required=False, - help="The client ID for your registered API client") -@click.option( - "--client_secret", - envvar="prism_client_secret", - type=str, - required=False, - help="The client secret for your registered API client") -@click.option( - "--refresh_token", - envvar="prism_refresh_token", - type=str, - required=False, - help="The refresh token for your registered API client") -@click.option( - "--log_level", - envvar="prism_log_level", - type=str, - required=False, - help="Level of debugging to display - default = warning.") -@click.option( - "--log_file", - envvar="prism_log_file", - type=str, - required=False, - help="Output file for logging - default prism.log.") -@click.option( - "--config_file", - envvar="prism_config", - type=click.Path(exists=True), - required=False, - help="The name of a configuration with parameters for connections and logging.") -@click.option( - "--config_name", - envvar="prism_config", - type=str, - required=False, - default="default", - help="The name of a configuration with parameters for connections and logging.") +# Tenant specific parameters +@click.option("--base_url", envvar="workday_base_url", type=str, required=False, + help="The base URL for the API client") +@click.option("--tenant_name", envvar="workday_tenant_name", type=str, required=False, + help="The name of your Workday tenant") +# Credentials parameters +@click.option("--username", envvar="workday_username", type=str, required=False, + help="The login username of your Workday user") +@click.option("--password", envvar="workday_password", type=str, required=False, + help="The password of your Workday user") +@click.option("--client_id", envvar="prism_client_id", type=str, required=False, + help="The client ID for your registered API client") +@click.option("--client_secret", envvar="prism_client_secret", type=str, required=False, + help="The client secret for your registered API client") +@click.option("--refresh_token", envvar="prism_refresh_token", type=str, required=False, + help="The refresh token for your registered API client") +# Operational parameters +@click.option("--log_level", envvar="prism_log_level", type=str, required=False, + help="Level of debugging to display - default = INFO") +@click.option("--log_file", envvar="prism_log_file", type=str, required=False, + help="Output file for logging - default = prism.log") +@click.option("--config_file", envvar="prism_config_file", type=click.Path(exists=True), required=False, + help="The name of a configuration with parameters for connections and logging.") +@click.option("--config_name", envvar="prism_config_name", type=str, required=False, default="default", + help="The name of a configuration in the configuration file.") @click.pass_context def cli(ctx, base_url, tenant_name, - username, password, - client_id, client_secret, refresh_token, + username, password, client_id, client_secret, refresh_token, log_level, log_file, config_file, config_name): - # Attempt to locate a configuration file - this is not required and is only - # used if the configuration values are not passed on the command line or in - # the environment. + # Attempt to locate a configuration file - this is not required and config + # parameters are only used if the configuration values are not passed on + # the command line or by environment variables. if config_file is None: - # See if we have a configuration file in the current directory + # Assume there might be a configuration file in the current directory filename = os.path.join(os.getcwd(), "prism.ini") else: + # Click already ensured this is a valid file - if specified. filename = config_file # If the configuration path exists, then load values - this overrides @@ -117,13 +76,10 @@ def cli(ctx, config = configparser.ConfigParser() config.read(filename) - # Check to see if a particular configuration was asked for, it must + # Check to see if a particular configuration [name] was asked for, it must # exist in the configuration file otherwise exit with an error. - if not config.has_section(config_name): - print(f"The specified configuration {config_name} does not exist in the configuration file.") - sys.exit(1) - else: + if config.has_section(config_name): # Do fix-up on command line args. Priority comes from the command # line, then environment variables, and finally the config file. # Any value not passed and not in the environment arrives here with @@ -137,34 +93,72 @@ def cli(ctx, client_secret = param_fixup(client_secret, config, config_name, "prism_client_secret") refresh_token = param_fixup(refresh_token, config, config_name, "prism_refresh_token") log_level = param_fixup(log_level, config, config_name, "prism_log_level") - log_file = param_fixup(log_level, config, config_name, "prism_log_file") + log_file = param_fixup(log_file, config, config_name, "prism_log_file") + else: + click.echo(f"The specified configuration [{config_name}] does not exist in the configuration file.") + sys.exit(1) except configparser.Error: - print(f"Error accessing configuration file {filename}.") - # If the configuration is not available, exit - exit(1) + click.echo(f"Error accessing configuration file {filename}.") + # If the configuration is not available or is invalid, exit + sys.exit(1) if log_file is None: + # Assume a file in the PWD of the process, i.e., local directory where invoked. log_file = "prism.log" - # initialize the prism class with your credentials + if log_level is None: + set_level = logging.INFO + else: + set_level = getattr(logging, log_level) + + # Setup logging for CLI operations. + logger = logging.getLogger('prismCLI') + logger.setLevel(set_level) + + # Create a handler as specified by the user (or defaults) + fh = logging.FileHandler(log_file) + fh.setLevel(set_level) + + # Create an explicit console handler with a higher log level + ch = logging.StreamHandler() + ch.setLevel(logging.ERROR) + + # create formatter and add it to the handlers + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + ch.setFormatter(formatter) + fh.setFormatter(formatter) + + # add the handlers to logger + logger.addHandler(ch) + logger.addHandler(fh) + + logger.debug("completed initialization.") + + # initialize the Prism class from our resolved configuration. p = prism.Prism(base_url, tenant_name, client_id, client_secret, refresh_token) - p.set_log_level(log_level) + p.set_logging(log_file, log_level) - # store the prism object in the context + # store the prism object in the Click context ctx.obj = {"p": p} @cli.command("config") @click.argument("file") @click.pass_context -def config_file(ctx, file): - """Configure command""" +def config(ctx, file): + """ + Configuration operations to list, create, and modify parameters + """ + + # TBD -@cli.group("tables", help="Commands to list, create, load, and update Prism tables.") +@cli.group("tables") def tables(): - """Tables Command""" + """ + Table operations (/tables) to list, create, load, update, and truncate Prism tables. + """ tables.add_command(tables_commands.tables_list) @@ -174,9 +168,11 @@ def tables(): tables.add_command(tables_commands.tables_truncate) -@cli.group("buckets", help="Bucket operations to list, create and load buckets.") +@cli.group("buckets") def buckets(): - """You create a bucket for a specific table, load data into the bucket, and then commit (complete) the bucket. """ + """ + Bucket operations (/buckets) to list, create and load buckets. + """ buckets.add_command(buckets_commands.buckets_list) @@ -187,9 +183,11 @@ def buckets(): buckets.add_command(buckets_commands.buckets_name) -@cli.group("dataChanges", help="Data Change Tasks (dataChanges) operations to list, load, and activate.") +@cli.group("dataChanges") def dataChanges(): - """dataChanges.py Command""" + """ + Data Change Tasks (/dataChanges) operations to list, load, and activate. + """ dataChanges.add_command(dataChanges_commands.dataChanges_list) @@ -198,9 +196,11 @@ def dataChanges(): dataChanges.add_command(dataChanges_commands.dataChanges_activities) -@cli.group("fileContainers", help="File containers (fileContainers) operations to create, load, and list.") +@cli.group("fileContainers") def fileContainers(): - """dataChanges.py Command""" + """ + File container (/fileContainers) operations to create, load, and list. + """ fileContainers.add_command(fileContainers_commands.fileContainers_create) @@ -208,18 +208,22 @@ def fileContainers(): fileContainers.add_command(fileContainers_commands.filecontainers_load) -@cli.group("wql", help="Operations to list (dataSources) and query WQL sources (data).") +@cli.group("wql") def wql(): - """dataChanges.py Command""" + """ + Operations to list (dataSources) and query WQL sources (data). + """ wql.add_command(wql_commands.dataSources) wql.add_command(wql_commands.data) -@cli.group("raas", help="Run custom or Workday delivered report.") +@cli.group("raas") def raas(): - """dataChanges.py Command""" + """ + Run custom or Workday delivered report. + """ raas.add_command(raas_commands.run) diff --git a/prism/commands/buckets_commands.py b/prism/commands/buckets_commands.py index e1bb52e..53c9464 100644 --- a/prism/commands/buckets_commands.py +++ b/prism/commands/buckets_commands.py @@ -50,15 +50,15 @@ def buckets_list(ctx, wid, table_name, limit, offset, type_, search, format_, bu target = bucket["targetDataset"]["descriptor"] state = bucket["state"]["descriptor"] - print(f"{display_name}, operation: {operation}, target: {target}, state: {state}") + click.echo(f"{display_name}, operation: {operation}, target: {target}, state: {state}") elif format_ == "csv": df = pd.json_normalize(buckets["data"]) - print(df.to_csv(index=False)) + click.echo(df.to_csv(index=False)) else: - print(json.dumps(buckets, indent=2)) + click.echo(json.dumps(buckets, indent=2)) -@click.command("create", help="") +@click.command("create") @click.option("-n", "--table_name", default=None, help="Table name to associate with the bucket.") @click.option("-w", "--table_wid", default=None, @@ -78,12 +78,12 @@ def buckets_create(ctx, table_name, table_wid, file_, operation, bucket_name): p = ctx.obj["p"] if table_name is None and table_wid is None and file_ is None: - print("A table must be associated with this bucket (-n, -w, or -f must be specified).") + click.echo("A table must be associated with this bucket (-n, -w, or -f must be specified).") sys.exit(1) bucket = p.buckets_create(bucket_name, table_wid, table_name, file_, operation) - print(bucket) + click.echo(bucket) @click.command("upload") @@ -116,7 +116,7 @@ def buckets_upload(ctx, table_name, table_wid, schema_file, operation, generate, target_files = u.compress_files(file) if len(target_files) == 0: - print("No files to upload.") + click.echo("No files to upload.") sys.exit(1) # We think we have a file(s) - we don't test the contents. @@ -135,9 +135,9 @@ def buckets_upload(ctx, table_name, table_wid, schema_file, operation, generate, if complete: complete = p.buckets_complete(bucket["id"]) - print(complete) + click.echo(complete) else: - print(upload) + click.echo(upload) @click.command("complete") @click.option("-n", "--bucket_name", @@ -153,7 +153,7 @@ def buckets_complete(ctx, bucket_name, bucket_wid): p = ctx.obj["p"] if bucket_wid is None and bucket_name is None: - print("A bucket wid or a bucket name must be specified.") + click.echo("A bucket wid or a bucket name must be specified.") sys.exit(1) if bucket_wid is not None: @@ -172,26 +172,34 @@ def buckets_complete(ctx, bucket_name, bucket_wid): bucket_state = bucket["state"]["descriptor"] if bucket_state != "New": - print(f"Bucket state is \"{bucket_state}\" - only \"New.\" buckets can be completed.") + click.echo(f"Bucket state is \"{bucket_state}\" - only \"New.\" buckets can be completed.") sys.exit(1) - print(p.buckets_complete(bucket["id"])) + click.echo(p.buckets_complete(bucket["id"])) @click.command("status") -@click.option("-w", "--wid", required=False, help="Bucket name to status") +@click.option("-w", "--wid", required=False, help="Bucket wid to status") @click.argument("name", required=False) @click.pass_context def buckets_status(ctx, name, wid): + """ + Get the status of a bucket by name or workday ID. + + [NAME] name of bucket. + """ p=ctx.obj["p"] buckets=p.buckets_list(wid, bucket_name=name) if buckets["total"] != 0: - print(buckets["data"][0]["state"]["descriptor"]) + click.echo(buckets["data"][0]["state"]["descriptor"]) @click.command("name") @click.pass_context def buckets_name(ctx): - print(ctx.obj["p"].buckets_gen_name()) \ No newline at end of file + """ + Generate a bucket name to use for other bucket operations. + """ + click.echo(ctx.obj["p"].buckets_gen_name()) \ No newline at end of file diff --git a/prism/commands/dataChanges_commands.py b/prism/commands/dataChanges_commands.py index 6781339..80cae65 100644 --- a/prism/commands/dataChanges_commands.py +++ b/prism/commands/dataChanges_commands.py @@ -32,7 +32,7 @@ def dataChanges_list(ctx, name, wid, activity_wid, limit, offset, type_, format_ data_changes = p.dataChanges_list(name, wid, activity_wid, limit, offset, type_, search) if data_changes["total"] == 0: - print("No data change tasks found.") + click.echo("No data change tasks found.") return data_changes["data"] = sorted(data_changes["data"], key=lambda dct: dct["displayName"].lower()) @@ -48,12 +48,12 @@ def dataChanges_list(ctx, name, wid, activity_wid, limit, offset, type_, format_ target_name = dct["target"]["name"] operation = dct["operation"]["operationType"]["descriptor"] - print(f"{display_name}, source: {source_name}, target: {target_name}, operation: {operation}") + click.echo(f"{display_name}, source: {source_name}, target: {target_name}, operation: {operation}") elif format_ == "csv": df = pd.json_normalize(data_changes["data"]) - print(df.to_csv(index=False)) + click.echo(df.to_csv(index=False)) else: - print(json.dumps(data_changes["data"], indent=2)) + click.echo(json.dumps(data_changes["data"], indent=2)) @click.command("validate", help="Validate the data change specified by name or ID.") @@ -65,7 +65,7 @@ def dataChanges_validate(ctx, name, wid, search): p = ctx.obj["p"] if name is None and wid is None: - print("A data change task name or a wid must be specified.") + click.echo("A data change task name or a wid must be specified.") sys.exit(1) # See if we have any matching data change tasks. @@ -76,12 +76,12 @@ def dataChanges_validate(ctx, name, wid, search): refresh=True) if data_changes["total"] == 0: - print("No matching data change task(s) found.") + click.echo("No matching data change task(s) found.") sys.exit(1) for dct in data_changes["data"]: validate = p.dataChanges_validate(dct["id"]) - print(validate) + click.echo(validate) @click.command("run") @@ -90,7 +90,7 @@ def dataChanges_validate(ctx, name, wid, search): @click.pass_context def dataChanges_run(ctx, name, filecontainerid): """ - This resource executes a data change. + Execute the named data change task with an optional file container. [NAME] Data Change Task name. [FILECONTAINERID] File container with files to load. @@ -102,7 +102,7 @@ def dataChanges_run(ctx, name, filecontainerid): data_changes = p.dataChanges_list(name=name.replace(" ", "_"), type_="full", refresh=True) if data_changes["total"] != 1: - print(f"Data change task not found: {name}") + click.echo(f"Data change task not found: {name}") sys.exit(1) dct_id = data_changes["data"][0]["id"] @@ -110,7 +110,7 @@ def dataChanges_run(ctx, name, filecontainerid): validate = p.dataChanges_validate(dct_id) if "error" in validate: - print("Invalid DCT: " + validate["errors"][0]["error"] + " - code: " + validate["errors"][0]["code"]) + click.echo("Invalid DCT: " + validate["errors"][0]["error"] + " - code: " + validate["errors"][0]["code"]) sys.exit(1) else: activity_id = p.dataChanges_activities_post(dct_id, filecontainerid) @@ -118,7 +118,7 @@ def dataChanges_run(ctx, name, filecontainerid): if activity_id is None: sys.exit(1) else: - print(activity_id) + click.echo(activity_id) @click.command("activities") @@ -129,10 +129,10 @@ def dataChanges_run(ctx, name, filecontainerid): @click.pass_context def dataChanges_activities(ctx, status, name, activity_id): """ - This resource executes a data change. + Get the status for a specific activity associated with a data change task. [NAME] Data Change Task name. - [FILECONTAINERID] File container with files to load. + [ACTIVITY_ID] File container with files to load. """ p = ctx.obj["p"] @@ -141,7 +141,7 @@ def dataChanges_activities(ctx, status, name, activity_id): data_changes = p.dataChanges_list(name=name.replace(" ", "_"), type_="full", refresh=True) if data_changes["total"] != 1: - print(f"Data change task not found: {name}") + click.echo(f"Data change task not found: {name}") sys.exit(1) dct_id = data_changes["data"][0]["id"] @@ -152,6 +152,6 @@ def dataChanges_activities(ctx, status, name, activity_id): sys.exit(1) else: if status: - print(current_status["state"]["descriptor"]) + click.echo(current_status["state"]["descriptor"]) else: - print(current_status) \ No newline at end of file + click.echo(current_status) \ No newline at end of file diff --git a/prism/commands/fileContainers_commands.py b/prism/commands/fileContainers_commands.py index 933a179..2bad422 100644 --- a/prism/commands/fileContainers_commands.py +++ b/prism/commands/fileContainers_commands.py @@ -1,21 +1,26 @@ import click +import sys + from . import util as u -@click.command("create", help="Use this method to create a new fileContainers.") +@click.command("create") @click.pass_context def fileContainers_create(ctx): + """ + Create a new fileContainers object returning the ID. + """ p = ctx.obj["p"] fileContainer = p.fileContainers_create() if fileContainer is not None: - print(fileContainer["id"]) + click.echo(fileContainer["id"]) else: - print("") + sys.exit(1) -@click.command("list", help="This resource returns all files for a file container.") +@click.command("list", help="List the files for a file container.") @click.argument("fileContainerID") @click.pass_context def filecontainers_list(ctx, filecontainerid): @@ -23,20 +28,26 @@ def filecontainers_list(ctx, filecontainerid): files = p.filecontainers_list(filecontainerid) - print(files) + click.echo(files) -@click.command("load", help="This resource loads the file into a file container.") -@click.option("-f", "--fileContainerID", default=None, help="File container ID to load the file into.") +@click.command("load") +@click.option("-f", "--fileContainerID", default=None, help="Target File container ID, default to a new container.") @click.argument("file", nargs=-1, type=click.Path(exists=True)) @click.pass_context def filecontainers_load(ctx, filecontainerid, file): + """ + Load one or more file into a file container. + + [FILE] one or more files to load. + """ p = ctx.obj["p"] fid = u.fileContainers_load(p, filecontainerid, file) if fid is None: - print("Error loading fileContainer.") + click.echo("Error loading fileContainer.") else: - # Return the file container ID to the command line. - print(fid) + # Return the file container ID to the command line. If a + # filecontainerID was passed, simply return that id. + click.echo(fid) diff --git a/prism/commands/raas_commands.py b/prism/commands/raas_commands.py index 5a23457..845f115 100644 --- a/prism/commands/raas_commands.py +++ b/prism/commands/raas_commands.py @@ -12,12 +12,12 @@ def run(ctx, system, user, format_, report, params): p = ctx.obj["p"] if system and user is not None: - print("Please specify only system or user, not both.") + click.echo("Please specify only system or user, not both.") return if not system and user is None: - print("Please specify either system or user.") + click.echo("Please specify either system or user.") report_output = p.raas_run(report, system, user, params, format_) - print(report_output) + click.echo(report_output) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index d22905f..bb778de 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -1,47 +1,41 @@ import sys import os - +import logging import click import json import pandas as pd from . import util as u +logger = logging.getLogger("prismCLI") + @click.command("list") @click.option("-w", "--wid", help="Unique WID for Prism table or dataset.") -@click.option("-l", "--limit", - help="The maximum number of object data entries included in the response, default=all.", - type=int, - default=None) -@click.option("-o", "--offset", - help="The offset to the first object in a collection to include in the response.", - type=int, - default=None) -@click.option("-t", "--type", "type_", - help="How much information to be returned in response JSON.", +@click.option("-l", "--limit", type=int, default=None, + help="The maximum number of object data entries included in the response, default=all.") +@click.option("-o", "--offset", type=int, default=None, + help="The offset to the first object in a collection to include in the response.") +@click.option("-t", "--type", "type_", default="summary", type=click.Choice(["summary", "full", "permissions"], case_sensitive=False), - default="summary") -@click.option("-f", "--format", "format_", - default="json", - help="Format output as JSON, summary, schema, or CSV.", - type=click.Choice(['json', 'summary', 'schema', 'csv'], case_sensitive=False)) -@click.option("-s", "--search", - help="Search substring in api name or display name (default=false).", - is_flag=True) + help="How much information returned for each table.") +@click.option("-f", "--format", "format_", default="json", + type=click.Choice(['json', 'summary', 'schema', 'csv'], case_sensitive=False), + help="Format output as JSON, summary, schema, or CSV.") +@click.option("-s", "--search", is_flag=True, + help="Enable substring search of NAME in api name or display name, default=False (exact match).") @click.argument("name", required=False) @click.pass_context def tables_list(ctx, name, wid, limit, offset, type_, format_, search): - """View the tables or datasets permitted by the security profile of the current user. + """List the tables or datasets permitted by the security profile of the current user. - Prism API TABLENAME of the table to list. + [NAME] Prism table name to list. """ - if type_ in ("summary", "permissions") and format in ("schema", "csv"): - # Summary results cannot generate schema or CSV output. - - print(f"Invalid combination of type {type_} and format {format}.") + if type_ in ("summary", "permissions") and format_ in ("schema", "csv"): + # Summary results cannot generate schema or CSV output since there will be no fields. + logger.critical(f"Invalid combination of type \"{type_}\" and format \"{format_}\".") sys.exit(1) p = ctx.obj["p"] @@ -59,7 +53,7 @@ def tables_list(ctx, name, wid, limit, offset, type_, format_, search): # The results could be one table or an array of multiple # tables - simply dump the returned object. - print(json.dumps(tables, indent=2)) + click.echo(json.dumps(tables, indent=2)) elif format_ == "summary": for table in tables["data"]: display_name = table["displayName"] @@ -68,14 +62,15 @@ def tables_list(ctx, name, wid, limit, offset, type_, format_, search): refreshed = table["dateRefreshed"] if "dateRefreshed" in table else "unknown" enabled = table["enableForAnalysis"] if "enableForAnalysis" in table else "Null" - print(f'{display_name}, Enabled: {enabled}, Rows: {rows}, Size: {size}, Refreshed: {refreshed}') + click.echo(f'{display_name}, Enabled: {enabled}, Rows: {rows}, Size: {size}, Refreshed: {refreshed}') elif format_ == "csv": df = pd.json_normalize(tables["data"]) - print(df.to_csv(index=False)) + click.echo(df.to_csv(index=False)) elif format_ == "schema": # Dump out the fields of the first table in the result in # a format compatible with a schema used to created or edit # a table. + table = tables["data"][0] # Only output the first table. # Remove the Prism audit columns. @@ -92,21 +87,21 @@ def tables_list(ctx, name, wid, limit, offset, type_, format_, search): if "id" in fld: del fld["id"] - print(json.dumps(fields, indent=2)) + click.echo(json.dumps(fields, indent=2)) else: - print(u.csv_from_fields(tables["data"][0]["fields"])) + click.echo(u.csv_from_fields(tables["data"][0]["fields"])) @click.command("create") @click.option("-n", "--name", - help="Table name - overrides schema value.") + help="Table name - overrides name from schema.") @click.option("-d", "--displayName", help="Specify a display name - defaults to name.") @click.option("-t", "--tags", multiple=True, help="Tags to organize the table in the Data Catalog.") @click.option("-e", "--enableForAnalysis", type=bool, is_flag=True, default=None, help="Enable this table for analytics.") -@click.option("-n", "--sourceName", +@click.option("-s", "--sourceName", help="The API name of an existing table to copy.") @click.option("-w", "--sourceWID", help="The WID of an existing table to copy.") @@ -122,34 +117,38 @@ def tables_create(ctx, name, displayname, tags, enableforanalysis, sourcename, s """ p = ctx.obj["p"] - # We can assume a valid schema - get_schema exits if there is a problem. + # We can assume a valid schema - get_schema sys.exits if there is a problem. schema = u.get_schema(p, file, sourcename, sourcewid) # Initialize a new schema with the particulars for this table operation. if name is not None: # If we got a name, set it in the table schema - schema["name"] = name + schema["name"] = name.replace(" ", "_") # Minor clean-up elif "name" not in schema: # The schema doesn't have a name and none was given - exit. - print("Table --name must be specified.") + logger.critical("Table --name must be specified.") sys.exit(1) if displayname is not None: # If we got a display name, set it in the schema schema["displayName"] = displayname elif "displayName" not in schema: - # Default the display name to the name. + # Default the display name to the name if not in the schema. schema["displayName"] = name if enableforanalysis is not None: schema["enableForAnalysis"] = enableforanalysis elif "enableForAnalysis" not in schema: + # Default to False - do not enable. schema["enableForAnalysis"] = False + # Create the table. table_def = p.tables_create(schema) if table_def is not None: - print(f"Table {name} created.") + click.echo(f"Table {name} created.") + else: + click.echo(f"Error creating table {name}.") @click.command("update") @@ -170,25 +169,26 @@ def tables_update(ctx, name, file, sourcename, sourcewid, truncate): p = ctx.obj["p"] - # Before doing anything, table name must exist + # Before doing anything, table name must exist. tables = p.tables_list(name=name) if tables["total"] == 0: - print(f"Table \"{name}\" to update not found.") + logger.critical(f"Table \"{name}\" not found.") sys.exit(1) table_id = tables["data"][0]["id"] + # Figure out the new schema either by file or other table. fields = u.get_schema(p, file, sourcename, sourcewid) - p.tables_update(table_id, file) + p.tables_update(wid=table_id, schema=file, truncate=truncate) - print("update") + click.echo("update") @click.command("upload") -@click.option("-o", "--operation", default="TruncateandInsert", - help="Operation for the table operation - default to TruncateandInsert.") +@click.option("-o", "--operation", default="TruncateAndInsert", + help="Operation for the table operation - default to TruncateAndInsert.") @click.argument("name", required=True) @click.argument("file", nargs=-1, type=click.Path(exists=True)) @click.pass_context @@ -196,21 +196,21 @@ def tables_upload(ctx, name, operation, file): """ Upload a file into the table using a bucket. - NOTE: This operation will create ".csv.gz" files for each .csv file. + NOTE: This operation creates ".csv.gz" files for each .csv file. """ p = ctx.obj["p"] # Convert the file(s) provided to a list of compressed files. - target_files = u.compress_files(file) + target_files = u.get_files(file) if len(target_files) == 0: - print("No files to upload.") + logging.getLogger("prismCLI").critical("No files to upload.") sys.exit(1) - bucket = p.buckets_create(target_name=name, - operation=operation) + bucket = p.buckets_create(target_name=name, operation=operation) if bucket is None: + logging.getLogger("prismCLI").critical("Bucket creation failed.") sys.exit(1) results = p.buckets_upload(bucket["id"], target_files) @@ -232,24 +232,20 @@ def tables_truncate(ctx, name): # Create an empty bucket with a delete operation p = ctx.obj["p"] - # A truncate (delete) operations still needs a file. - blank_file = os.path.join(os.path.dirname(os.path.realpath(__file__))) - blank_file = os.path.join(blank_file, "..", "data", "blank.csv.gz") - # Get a bucket using a generated name and an explicit Delete operation. - bucket = p.buckets_create(target_name=name, operation="Delete") + bucket = p.buckets_create(target_name=name, operation="TruncateAndInsert") if bucket is None: - print(f"Unable to truncate {name} - error getting bucket.") + logging.getLogger("prismCLI").critical(f"Unable to truncate {name} - error getting bucket.") sys.exit(1) bucket_id = bucket["id"] - # Put the blank file into the bucket. - bucket = p.buckets_upload(bucket_id, blank_file) + # Don't specify a file to put a zero sized file into the bucket. + bucket = p.buckets_upload(bucket_id) # Ask Prism to run the delete statement by completing the bucket. bucket = p.buckets_complete(bucket_id) if bucket is None: - print(f"Unable to truncate table {name}.") \ No newline at end of file + click.echo(f"Unable to truncate table {name}.") diff --git a/prism/commands/util.py b/prism/commands/util.py index c25c5ca..a77c371 100644 --- a/prism/commands/util.py +++ b/prism/commands/util.py @@ -1,13 +1,13 @@ -import gzip import os.path import sys -import shutil import json import csv +import click +import logging def get_schema(p, file, sourceName, sourceWID): - # Start witha blank schema definition. + # Start with a blank schema definition. schema = {} # A file always takes precedence over sourceName and sourceWID @@ -19,7 +19,7 @@ def get_schema(p, file, sourceName, sourceWID): with open(file) as json_file: schema = json.load(json_file) except Exception as e: - print(f"Invalid schema file: {e.msg}.") + click.echo(f"Invalid schema file: {e.msg}.") sys.exit(1) # The JSON file could be a complete table definitions (GET:/tables - full) or just @@ -31,18 +31,18 @@ def get_schema(p, file, sourceName, sourceWID): else: # This should be a full schema, perhaps from a table list command. if "name" not in schema and "fields" not in schema: - print("Invalid schema - name and fields attribute not found.") + click.echo("Invalid schema - name and fields attribute not found.") sys.exit(1) elif file.lower().endswith(".csv"): schema = schema_from_csv(p, file) else: - print("Invalid file extension - valid extensions are .json or .csv.") + click.echo("Invalid file extension - valid extensions are .json or .csv.") sys.exit(1) else: # No file was specified, check for a source table. if sourceName is None and sourceWID is None: - print("No schema provided and a table (--sourceName or --sourceWID) not specified.") + click.echo("No schema provided and a table (--sourceName or --sourceWID) not specified.") sys.exit(1) if sourceWID is not None: @@ -51,7 +51,7 @@ def get_schema(p, file, sourceName, sourceWID): tables = p.tables_list(name=sourceName, type_="full") # Exact match on API Name if tables["total"] == 0: - print("Invalid --sourceName or --sourceWID : table not found.") + click.echo("Invalid --sourceName or --sourceWID : table not found.") sys.exit(1) else: schema = tables["data"][0] @@ -60,8 +60,6 @@ def get_schema(p, file, sourceName, sourceWID): def schema_from_csv(prism, file): - global data_sources - schema = {"fields": []} # Start with an empy schema definition. with open(file, newline='') as csvfile: @@ -74,7 +72,7 @@ def schema_from_csv(prism, file): # The minimum definition is a name column - exit if not found. No other # column definition is required to build a valid field list. if "name" not in reader.fieldnames: - print("CSV file {file} does not contain a name column header in first line.") + click.echo(f"CSV file {file} does not contain a name column header in first line.") sys.exit(1) # Prism fields always have an ordinal sequence assigned to each field. @@ -97,27 +95,22 @@ def schema_from_csv(prism, file): else: field["externalId"] = False - match row["type"].lower(): - case "text": - field["type"] = { - "id": "fdd7dd26156610006a12d4fd1ea300ce", - "descriptor": "Text" - } - case "date": - field["type"] = { - "id": "fdd7dd26156610006a71e070b08200d6", - "descriptor": "Date" - } + fld_type = "none" + if "type" in row and row["type"] in ["text", "date", "numeric", "instance"]: + field["type"] = { "id" : f'Schema_Field_Type={row["type"]}'} + fld_type = row["type"].lower() + else: + field["type"] = { "id" : f'Schema_Field_Type=Text'} + + match fld_type: + case "date": if "parseformat" in row and isinstance(row["parseformat"], str) and len(row["parseformat"]) > 0: field["parseFormat"] = row["parseformat"] + else: + field["parseFormat"] = "yyyy-MM-dd" case "numeric": - field["type"] = { - "id": "32e3fa0dd9ea1000072bac410415127a", - "descriptor": "Numeric" - } - if "precision" in row: field["precision"] = row["precision"] @@ -127,17 +120,11 @@ def schema_from_csv(prism, file): case "instance": # We need all the data sources to resolve the business objects # to include their WID. - if data_sources is None: - data_sources = prism.datasources_list() - - if data_sources is None or data_sources["total"] == 0: - print("Error calling WQL/dataSources") - return + data_sources = prism.datasources_list() - field["type"] = { - "id": "db9cd1dbf95010000e8fc7c78cd012a9", - "descriptor": "Instance" - } + if data_sources is None or data_sources["total"] == 0: + click.echo("Error calling WQL/dataSources") + return # Find the matching businessObject bo = [ds for ds in data_sources["data"] @@ -145,12 +132,6 @@ def schema_from_csv(prism, file): if len(bo) == 1: field["businessObject"] = bo[0]["businessObject"] - case _: - # Default to string - field["type"] = { - "id": "fdd7dd26156610006a12d4fd1ea300ce", - "descriptor": "Text" - } schema["fields"].append(field) ordinal += 1 @@ -191,7 +172,7 @@ def fileContainers_load(prism, filecontainersid, file): # by checking the extension. if file is None or len(file) == 0: - print("An existing file name is required to upload to a file container.") + click.echo("An existing file name is required to upload to a file container.") return None # Verify that each file is already a gzip file or a CSV we gzip for them. @@ -199,23 +180,7 @@ def fileContainers_load(prism, filecontainersid, file): # The CSV contents are not validated here - Prism eventually # returns an error if the content is invalid. - target_files = [] - - for f in file: - target_file = file - - if f.lower().endswith(".csv"): - # GZIP the file into the same directory with the appropriate extension. - target_file = f + ".gz" - - with open(f, 'rb') as f_in: - with gzip.open(target_file, "wb") as f_out: - shutil.copyfileobj(f_in, f_out) - elif not f.lower().endswith(".gz"): - print(f"File {f} is not a .gz or .csv file.") - return None - - target_files.append(target_file) + target_files = compress_files(file) # Assume we have a fID - it can be None right now # if the user wants to create a fileContainers during @@ -226,11 +191,13 @@ def fileContainers_load(prism, filecontainersid, file): # Load the file and retrieve the fID - this is only # set by the load on the first file - subsequent # files are loaded into the same container (fID). - fID = prism.filecontainers_load(fID, target_file, ) + fID = prism.filecontainers_load(fID, target_file) # If the fID comes back blank, then something is not # working. Note: any error messages have already # been displayed by the load operation. + + # NOTE: this operation never fails, the file is skipped. if fID is None: break @@ -240,39 +207,33 @@ def fileContainers_load(prism, filecontainersid, file): return fID -def compress_files(files): +def get_files(files): target_files = [] if files is None: - print("File(s) must be specified.") + logging.getLogger("prismCLI").warning("File(s) must be specified.") return target_files elif isinstance(files, list) and len(files) == 0: - print("File(s) must be specified.") + logging.getLogger("prismCLI").warning("File(s) must be specified.") return target_files - elif isinstance(files, str) and not files: - print("File(s) must be specified.") + elif isinstance(files, tuple) and len(files) == 0: + logging.getLogger("prismCLI").warning("File(s) must be specified.") return target_files - - if isinstance(files, str): - files = [ files ] + elif isinstance(files, str): + if not files: + logging.getLogger("prismCLI").warning("File(s) must be specified.") + return target_files + else: + files = [ files ] for f in files: if not os.path.exists(f): - print(f"FIle {f} not found - skipping.") + logging.getLogger("prismCLI").warning(f"FIle {f} not found - skipping.") continue - if f.lower().endswith(".csv"): - # GZIP the file into the same directory with the appropriate extension. - target_file = f + ".gz" - - with open(f, 'rb') as f_in: - with gzip.open(target_file, "wb") as f_out: - shutil.copyfileobj(f_in, f_out) - - target_files.append(target_file) - elif f.lower().endswith(".gz"): + if f.lower().endswith(".csv") or f.lower().endswith(".csv.gz"): target_files.append(f) else: - print(f"File {f} is not a .gz or .csv file - skipping.") + logging.getLogger("prismCLI").warning(f"File {f} is not a .gz or .csv file - skipping.") return target_files diff --git a/prism/commands/wql_commands.py b/prism/commands/wql_commands.py index be1151e..87abf1c 100644 --- a/prism/commands/wql_commands.py +++ b/prism/commands/wql_commands.py @@ -2,6 +2,7 @@ import json import pandas as pd + @click.command("dataSources", help="View the buckets permitted by the security profile of the current user.") @click.option("-w", "--wid", @@ -19,7 +20,7 @@ def dataSources(ctx, wid, limit, offset, search, name): ds = p.wql_dataSources(wid, limit, offset, name, search) - print(json.dumps(ds, indent=2)) + click.echo(json.dumps(ds, indent=2)) @click.command("data", @@ -38,7 +39,7 @@ def data(ctx, limit, offset, file_, csv_, query): p = ctx.obj["p"] if file_ is None and query is None: - print("No query provided") + click.echo("No query provided") return if query is not None: @@ -52,7 +53,7 @@ def data(ctx, limit, offset, file_, csv_, query): if rows["total"] != 0: if csv_: df = pd.json_normalize(rows["data"]) - print(df.to_csv(index=False)) + click.echo(df.to_csv(index=False)) else: - print(json.dumps(rows, indent=2)) + click.echo(json.dumps(rows, indent=2)) diff --git a/prism/data/blank.csv.gz b/prism/data/blank.csv.gz deleted file mode 100644 index e69de29..0000000 diff --git a/prism/prism.py b/prism/prism.py index dbff810..cf642ef 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -13,21 +13,13 @@ import urllib import sys import uuid - -logger = logging.getLogger(__name__) -logger.setLevel(logging.WARNING) - -# writing to stdout -handler = logging.StreamHandler(sys.stdout) -handler.setLevel(logging.DEBUG) -log_format = logging.Formatter('[%(asctime)s] [%(levelname)s] - %(message)s') -handler.setFormatter(log_format) -logger.addHandler(handler) +import io +import gzip def log_elapsed(msg, timedelta): elapsed = timedelta.total_seconds() - logger.debug(f"{msg}: elapsed {elapsed:.5f}") + logging.getLogger(__name__).debug(f"{msg}: elapsed {elapsed:.5f}") class Prism: @@ -79,24 +71,59 @@ def __init__(self, base_url, tenant_name, client_id, client_secret, refresh_toke self.bearer_token = None self.bearer_token_timestamp = None - @staticmethod - def set_log_level(log_level=None): - set_level = None + # Default a logger - the default may be re-configured + # in the set_logging method. + self.logger = logging.getLogger(__name__) + self.logger.setLevel(logging.WARNING) + # writing to stdout only... + handler = logging.StreamHandler(sys.stdout) + handler.setLevel(logging.WARNING) + log_format = logging.Formatter('[%(asctime)s] [%(levelname)s] - %(message)s') + handler.setFormatter(log_format) + self.logger.addHandler(handler) + + def set_logging(self, log_file=None, log_level="INFO"): + # Resolve the log level - default to info if empty or invalid. if log_level is None: set_level = logging.INFO else: - set_level = getattr(logging, log_level) + # Make sure the caller gave us a valid "name" for logging level. + if hasattr(logging, log_level): + set_level = getattr(logging, log_level) + else: + set_level = getattr(logging, "INFO") + + # If no file was specified, simply loop over any handlers and + # set the logging level. + if log_file is None: + for handler in self.logger.handlers: + handler.setLevel(set_level) + else: + # Setup logging for CLI operations. + for handler in self.logger.handlers: + self.logger.removeHandler(handler) + + self.logger.setLevel(set_level) - logger.setLevel(set_level) # Convert the string to the proper log level - logger.debug(f"set log level: {set_level}") + # Create a handler as specified by the user (or defaults) + fh = logging.FileHandler(log_file) + fh.setLevel(set_level) + + # create formatter and add it to the handlers + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + fh.setFormatter(formatter) + + self.logger.addHandler(fh) + + self.logger.debug(f"set log level: {set_level}") def buckets_gen_name(self): return "cli_" + uuid.uuid4().hex def get(self, url, headers=None, params=None, log_tag="generic get"): if url is None: - logger.warning("http_get: missing URL") + self.logger.warning("http_get: missing URL") return None # Every request requires an authorization header - make it true. @@ -112,13 +139,13 @@ def get(self, url, headers=None, params=None, log_tag="generic get"): log_elapsed("GET: " + log_tag, response.elapsed) if response.status_code != 200: - logger.error(f"Invalid HTTP status: {response.status_code}") + self.logger.error(f"Invalid HTTP status: {response.status_code}") return response def validate_schema(self, schema): if "fields" not in schema or not isinstance(schema["fields"], list) or len(schema["fields"]) == 0: - logger.error("Invalid schema detected!") + self.logger.error("Invalid schema detected!") return False # Add a sequential order (ordinal) on the fields to (en)force @@ -141,7 +168,7 @@ def create_bearer_token(self): """ - logger.debug("create_bearer_token") + self.logger.debug("create_bearer_token") headers = {"Content-Type": "application/x-www-form-urlencoded"} @@ -156,33 +183,37 @@ def create_bearer_token(self): log_elapsed("create_bearer_token", r.elapsed) if r.status_code == 200: - logger.debug("successfully obtained bearer token") + self.logger.debug("successfully obtained bearer token") self.bearer_token = r.json()["access_token"] self.bearer_token_timestamp = time.time() - else: - logger.warning(f"HTTP status code {r.status_code}: {r.content}") - self.bearer_token = None - def get_bearer_token(self): - """Get the current bearer token, or create a new one if it doesn't exist, or it's older than 15 minutes.""" - if self.bearer_token is None: - self.create_bearer_token() + return True - if time.time() - self.bearer_token_timestamp > 900: - self.create_bearer_token() + self.logger.error(f"HTTP status code {r.status_code}: {r.content}") + self.bearer_token = None + self.bearer_token_timestamp = None + + return False + + def get_bearer_token(self): + """ + Get the current bearer token, or create a new one if it doesn't exist, or it's older than 15 minutes. + """ + if self.bearer_token is None or (time.time() - self.bearer_token_timestamp) > 900: + if not self.create_bearer_token(): + return "" return self.bearer_token def reset_bearer_token(self): """Remove the current bearer token to force getting a new token on the next API call.""" self.bearer_token = None + self.bearer_token_timestamp = None def tables_list( self, - name=None, - wid=None, - limit=None, - offset=None, + name=None, wid=None, + limit=None, offset=None, type_="summary", search=False): """Obtain details for all tables or a given table(s). @@ -204,7 +235,7 @@ def tables_list( The offset from zero of tables to return. type_ : str - details + Level of detail to return. search : bool Enable substring searching for table names or ids @@ -216,12 +247,12 @@ def tables_list( """ operation = "/tables" - logger.debug(f"GET: {operation}") + self.logger.debug(f"GET: {operation}") url = self.prism_endpoint + operation if type_ is None or type_ not in ["full", "summary", "permissions"]: - logger.warning("Invalid return type for tables list operation.") + self.logger.warning("Invalid type for tables list operation - defaulting to summary.") type_ = "summary" # If we are searching, then we have to get everything using @@ -229,12 +260,13 @@ def tables_list( params = {} - # See if we want to add table name as a search parameter. + # See if we want to add an explicit table name as a search parameter. if not search and name is not None: # Here, the user is not searching, they gave us an exact name. params["name"] = name.replace(" ", "_") # Minor clean-up - limit = 1 # Should only be 0 (not found) or 1 (found) tables found. + # Should only be 0 (not found) or 1 (found) tables found. + limit = 1 offset = 0 # When searching by name or id, set the maximum limit size to @@ -248,31 +280,38 @@ def tables_list( search = True # Force a search so we get all tables limit = 100 - if offset is None: - offset = 0 + offset = offset if offset is not None else 0 - # Always assume we will retrieve more than one page + # Finalized the parameters to the GET:/tables call. params["limit"] = limit params["offset"] = offset params["type"] = type_ - # Always return a valid JSON object of results! + # Always return a valid JSON object of results regardless of + # errors or API responses. return_tables = {"total": 0, "data": []} + # Always assume we will retrieve more than one page. while True: r = self.get(url, params=params, log_tag=operation) if r.status_code != 200: - logger.error(f"Invalid HTTP return code: {r.status_code}") + self.logger.error(f"Invalid HTTP return code: {r.status_code}") + + # Whatever we have captured (perhaps nothing) so far will + # be returned due to unexpected status code. break + # Convert the response to a list of tables. tables = r.json() if not search and name is not None: # Explicit table name - # We are not searching and we have a specific table - return whatever we got. + # We are not searching, and we have a specific table - return + # whatever we got (maybe nothing). return tables - # If we are not searching, simply append all the results to the return object. + # Figure out what of this batch of tables should be part of the + # return results, i.e., search the batch for matches. if name is not None: # Substring search for matching table names @@ -292,7 +331,7 @@ def tables_list( break if search: - # Figure out what to search for on the next page. + # Move on to the next page. offset += limit params["offset"] = offset else: @@ -300,7 +339,7 @@ def tables_list( break # We always return a valid JSON. - return_tables["total"] = len(return_tables["data"]) + return_tables["total"] = len(return_tables["data"]) # Separate step for debugging. return return_tables def tables_create(self, schema): @@ -318,11 +357,11 @@ def tables_create(self, schema): """ operation = "/tables" - logger.debug(f"POST : {operation}") - + self.logger.debug(f"POST : {operation}") url = self.prism_endpoint + "/tables" if not self.validate_schema(schema): + self.logger.error("Invalid schema for create operation.") return None headers = { @@ -335,10 +374,10 @@ def tables_create(self, schema): if r.status_code == 201: return r.json() elif r.status_code == 400: - logger.warning(r.json()["errors"][0]["error"]) - logger.warning(r.text) + self.logger.error(r.json()["errors"][0]["error"]) + self.logger.error(r.text) else: - logger.warning(f"HTTP status code {r.status_code}: {r.content}") + self.logger.error(f"HTTP status code {r.status_code}: {r.content}") return None @@ -349,9 +388,13 @@ def tables_update(self, wid, schema, truncate=False): """ operation = f"/tables/{wid}" - logger.debug(f"PUT: {operation}") + self.logger.debug(f"PUT: {operation}") url = self.prism_endpoint + operation + if not self.validate_schema(schema): + self.logger.error("Invalid schema for update operation.") + return None + headers = { "Authorization": "Bearer " + self.get_bearer_token(), "Content-Type": "application/json", @@ -362,10 +405,11 @@ def tables_update(self, wid, schema, truncate=False): if r.status_code == 200: return r.json() - logger.warning(f"Error updating table {wid} - {r.text}.") + self.logger.error(f"Error updating table {wid} - {r.text}.") return None - def tables_patch(self, id, displayName=None, description=None, documentation=None, enableForAnalysis=None, schema=None): + def tables_patch(self, id, displayName=None, description=None, documentation=None, enableForAnalysis=None, + schema=None): return None def buckets_list(self, @@ -389,8 +433,7 @@ def buckets_list(self, """ operation = "/buckets" - logger.debug(f"GET: {operation}") - + self.logger.debug(f"GET: {operation}") url = self.prism_endpoint + operation # Start the return object - this routine NEVER fails @@ -398,7 +441,7 @@ def buckets_list(self, return_buckets = {"total": 0, "data": []} # If we are searching, then we have to get everything first - # so don't add a name to the bucket. + # so don't add a name to the bucket query. params = {} @@ -417,7 +460,7 @@ def buckets_list(self, limit = 100 # Max pagesize to retrieve in the fewest REST calls. offset = 0 - params["limit"] = limit + params["limit"] = limit if limit is not None else 100 params["offset"] = offset if offset is not None else 0 if type_ in ["summary", "full"]: @@ -430,7 +473,7 @@ def buckets_list(self, if r.status_code != 200: # We never fail, return whatever we got (if any). - logger.debug("Error listing buckets.") + self.logger.debug("Error listing buckets.") return return_buckets buckets = r.json() @@ -467,7 +510,7 @@ def buckets_list(self, if search: # Figure out what to search for on the next page. - params["offset"] += params["limit"] + params["offset"] += limit else: # The caller asked for a specific limit and offset, exit the loop. break @@ -510,6 +553,7 @@ def buckets_create( https://confluence.workday.com/display/PRISM/Public+API+V2+Endpoints+for+WBuckets + :param name: """ # If the caller didn't give us a name to use for the bucket, @@ -521,18 +565,20 @@ def buckets_create( # A target table must be identified by ID or name. if target_id is None and target_name is None: - logger.error("A table id or table name is required to create a bucket.") + self.logger.error("A table id or table name is required to create a bucket.") return None # The caller gave us a table wid, but didn't include a schema. Make a copy # of the target table's schema. Note: WID takes precedence over name. + # Use type_=full to get back the schema definition. + if target_id is not None: tables = self.tables_list(wid=target_id, type_="full") else: tables = self.tables_list(name=target_name, type_="full") if tables["total"] == 0: - logger.error(f"Table not found for bucket operation.") + self.logger.error(f"Table not found for bucket operation.") return None table_id = tables["data"][0]["id"] @@ -540,7 +586,7 @@ def buckets_create( if schema is None: schema = self.table_to_bucket_schema(tables["data"][0]) - logger.debug(f"POST: /buckets") + self.logger.debug(f"POST: /buckets") url = self.prism_endpoint + "/buckets" headers = { @@ -558,38 +604,40 @@ def buckets_create( r = requests.post(url, headers=headers, data=json.dumps(data)) if r.status_code == 201: - logger.info("Successfully created a new wBucket") + self.logger.info("successfully created a new wBucket") return r.json() elif r.status_code == 400: - logger.warning(r.json()["errors"][0]["error"]) - logger.warning(r.text) + self.logger.error(r.json()["errors"][0]["error"]) + self.logger.error(r.text) else: - logger.warning(f"HTTP status code {r.status_code}: {r.content}") + self.logger.error(f"HTTP status code {r.status_code}: {r.content}") return None def buckets_complete(self, bucketid): - url = self.prism_endpoint + f"/buckets/{bucketid}/complete" + operation = f"/buckets/{bucketid}/complete" + self.logger.debug(f"POST: {operation}") + url = self.prism_endpoint + operation headers = { - "Authorization": "Bearer " + self.bearer_token + "Authorization": "Bearer " + self.get_bearer_token() } r = requests.post(url, headers=headers) if r.status_code == 201: - logging.info("Successfully created a new wBucket") + self.logger.info(f"Successfully completed wBucket {bucketid}") return r.json() if r.status_code == 400: - logging.warning(r.json()["errors"][0]["error"]) + self.logger.error(r.json()["errors"][0]["error"]) else: - logging.warning(f"HTTP status code {r.status_code}: {r.content}") + self.logger.error(f"HTTP status code {r.status_code}: {r.content}") return None def table_to_bucket_schema(self, table): - """Convert schema (derived from describe table) to bucket schema + """Convert schema derived from list table to a bucket schema. Parameters ---------- @@ -608,11 +656,14 @@ def table_to_bucket_schema(self, table): # in the dict that is in ['data'][0] if table is None or "fields" not in table: - logger.critical("Invalid table passed to table_to_bucket_schema.") + self.logger.error("Invalid table passed to table_to_bucket_schema.") return None fields = table["fields"] + # Get rid of the WPA_ fields... + fields[:] = [x for x in fields if "WPA" not in x["name"]] + # Create and assign useAsOperationKey field with true/false values based on externalId value operation_key_false = {"useAsOperationKey": False} operation_key_true = {"useAsOperationKey": True} @@ -623,9 +674,6 @@ def table_to_bucket_schema(self, table): else: i.update(operation_key_false) - # Get rid of the WPA_ fields... - fields[:] = [x for x in fields if "WPA" not in x["name"]] - # Now trim our fields data to keep just what we need for i in fields: del i["id"] @@ -653,7 +701,7 @@ def table_to_bucket_schema(self, table): return bucket_schema - def buckets_upload(self, bucket_id, file): + def buckets_upload(self, bucket_id, file=None): """Upload a file to a given bucket. Parameters @@ -661,7 +709,7 @@ def buckets_upload(self, bucket_id, file): bucket_id : str The ID of the bucket that the file should be added to. - filename : str + file : str The path to your file to upload to the bucket. The file must be gzip compressed delimited and the file must conform to the file size limits. @@ -672,8 +720,7 @@ def buckets_upload(self, bucket_id, file): """ operation = f"/buckets/{bucket_id}/files" - logger.debug("POST: {operation}") - + self.logger.debug("POST: {operation}") url = self.prism_endpoint + operation headers = {"Authorization": "Bearer " + self.get_bearer_token()} @@ -681,18 +728,25 @@ def buckets_upload(self, bucket_id, file): results = [] # Convert a single filename to a list. - if isinstance(file, str): - files = [file] - else: + if isinstance(file, list): files = file + else: + files = [file] # Convert to list... for f in files: - files = {"file": open(f, "rb")} + # It is legal to upload an empty file - see the table truncate method. + if f is None: + new_file = {"file": ("dummy", io.BytesIO())} + elif f.lower().endswith(".csv.gz"): + new_file = {"file": open(f, "rb")} + elif f.lower().endswith(".csv"): + with open(f, "rb") as in_file: + new_file = {"file": (f + ".gz", gzip.compress(in_file.read()))} - r = requests.post(url, headers=headers, files=files) + r = requests.post(url, headers=headers, files=new_file) if r.status_code == 201: - logging.info(f"Successfully uploaded {f} to the bucket") + self.logger.info(f"Successfully uploaded {f} to the bucket") if isinstance(file, str): # If we got a single file, return the first result. @@ -700,7 +754,7 @@ def buckets_upload(self, bucket_id, file): else: results.append(r.json()) else: - logging.warning(f"HTTP status code {r.status_code}: {r.content}") + self.logger.error(f"HTTP status code {r.status_code}: {r.content}") return results @@ -802,7 +856,7 @@ def dataChanges_list(self, dataChanges["data"] += \ filter(lambda dtc: dtc["name"].find(name) != -1 or dtc["displayName"].find(name) != -1, - retJSON["data"]) + retJSON["data"]) else: # Without searching, simply paste the current page to the list. dataChanges["data"] += retJSON["data"] @@ -837,7 +891,7 @@ def dataChanges_activities_post(self, data_change_id, fileContainerID=None): url = self.prism_endpoint + operation headers = { - "Authorization": "Bearer " + self.bearer_token, + "Authorization": "Bearer " + self.get_bearer_token(), "Content-Type": "application/json", } @@ -1056,11 +1110,13 @@ def wql_data(self, query, limit, offset): url = f"{self.wql_endpoint}{operation}" query_safe = urllib.parse.quote(query.strip()) + query_limit = limit if limit is not None else 100 + offset = 0 data = {"total": 0, "data": []} while True: - r = self.get(f"{url}?query={query_safe}&limit=10000&offset={offset}") + r = self.get(f"{url}?query={query_safe}&limit={query_limit}&offset={offset}") if r.status_code == 200: ds = r.json() @@ -1126,6 +1182,6 @@ def raas_run(self, report, system, user, params=None, format_=None): return None def is_valid_operation(self, operation): - operation_list = ["insert", "truncateandinsert", "delete", "upsert", "update" ] + operation_list = ["insert", "truncateandinsert", "delete", "upsert", "update"] return operation in operation_list diff --git a/tests/buckets-help.sh b/tests/buckets-help.sh deleted file mode 100755 index 2c9f2df..0000000 --- a/tests/buckets-help.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -config_file="../../prism.ini" -config_name="default" - -cli="python3 ../prism/cli.py --config_file $config_file --config_name=$config_name" - -$cli buckets --help -echo "---------------------" - -$cli buckets list --help -echo "---------------------" - -$cli buckets create --help -echo "---------------------" - -$cli buckets upload --help -echo "---------------------" - -$cli buckets complete --help -echo "---------------------" - - diff --git a/tests/tables-help.sh b/tests/tables-help.sh deleted file mode 100755 index 8d4933f..0000000 --- a/tests/tables-help.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -config_file="../../prism.ini" -config_name="default" - -cli="python3 ../prism/cli.py --config_file $config_file --config_name=$config_name" - -$cli tables --help -echo "---------------------" - -$cli tables list --help -echo "---------------------" - -$cli tables create --help -echo "---------------------" - -$cli tables upload --help -echo "---------------------" - -$cli tables update --help -echo "---------------------" - -# Bad table name -$cli tables update bob From d8349932ac95f25413e96354b5e572cc7de63219 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Wed, 18 Oct 2023 14:20:23 -0400 Subject: [PATCH 017/103] General bug fixes - remove gzip to disk --- .gitignore | 1 + prism/__init__.py | 4 +- prism/cli.py | 93 +- prism/commands/buckets_commands.py | 179 ++- prism/commands/dataChanges_commands.py | 283 ++-- prism/commands/dataExport_commands.py | 35 + prism/commands/fileContainers_commands.py | 64 +- prism/commands/raas_commands.py | 31 +- prism/commands/tables_commands.py | 533 +++++-- prism/commands/util.py | 239 --- prism/commands/wql_commands.py | 34 +- prism/prism.py | 1632 +++++++++++++-------- 12 files changed, 1871 insertions(+), 1257 deletions(-) create mode 100644 prism/commands/dataExport_commands.py delete mode 100644 prism/commands/util.py diff --git a/.gitignore b/.gitignore index 8cdd94d..44aaf04 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ __pycache__/ # Distribution / packaging .Python env/ +venv/ build/ develop-eggs/ dist/ diff --git a/prism/__init__.py b/prism/__init__.py index 41c5de7..939eef2 100644 --- a/prism/__init__.py +++ b/prism/__init__.py @@ -1,8 +1,8 @@ -from prism.prism import Prism, load_schema, create_table, upload_file +from prism.prism import Prism, set_logging from ._version import get_versions __version__ = get_versions()["version"] del get_versions -__all__ = ["load_schema", "Prism", "create_table", "upload_file"] +__all__ = ["Prism","set_logging"] diff --git a/prism/cli.py b/prism/cli.py index 023614b..6c8dd30 100644 --- a/prism/cli.py +++ b/prism/cli.py @@ -6,12 +6,13 @@ import prism -from commands import tables_commands -from commands import buckets_commands -from commands import dataChanges_commands -from commands import fileContainers_commands -from commands import wql_commands -from commands import raas_commands +import commands.tables_commands as t_commands +import commands.buckets_commands as b_commands +import commands.dataChanges_commands as d_commands +import commands.dataExport_commands as e_commands +import commands.fileContainers_commands as f_commands +import commands.wql_commands as w_commands +import commands.raas_commands as r_commands def param_fixup(value, config, config_name, option): @@ -102,10 +103,6 @@ def cli(ctx, # If the configuration is not available or is invalid, exit sys.exit(1) - if log_file is None: - # Assume a file in the PWD of the process, i.e., local directory where invoked. - log_file = "prism.log" - if log_level is None: set_level = logging.INFO else: @@ -116,28 +113,31 @@ def cli(ctx, logger.setLevel(set_level) # Create a handler as specified by the user (or defaults) - fh = logging.FileHandler(log_file) - fh.setLevel(set_level) + + if log_file is not None: + fh = logging.FileHandler(log_file) + + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + fh.setFormatter(formatter) + + fh.setLevel(set_level) + logger.addHandler(fh) # Create an explicit console handler with a higher log level ch = logging.StreamHandler() - ch.setLevel(logging.ERROR) - # create formatter and add it to the handlers - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + formatter = logging.Formatter('%(message)s') ch.setFormatter(formatter) - fh.setFormatter(formatter) - # add the handlers to logger + ch.setLevel(logging.INFO) logger.addHandler(ch) - logger.addHandler(fh) logger.debug("completed initialization.") # initialize the Prism class from our resolved configuration. p = prism.Prism(base_url, tenant_name, client_id, client_secret, refresh_token) - p.set_logging(log_file, log_level) + prism.set_logging(log_file, log_level) # store the prism object in the Click context ctx.obj = {"p": p} @@ -161,11 +161,11 @@ def tables(): """ -tables.add_command(tables_commands.tables_list) -tables.add_command(tables_commands.tables_create) -tables.add_command(tables_commands.tables_update) -tables.add_command(tables_commands.tables_upload) -tables.add_command(tables_commands.tables_truncate) +tables.add_command(t_commands.tables_get) +tables.add_command(t_commands.tables_create) +tables.add_command(t_commands.tables_edit) +tables.add_command(t_commands.tables_upload) +tables.add_command(t_commands.tables_truncate) @cli.group("buckets") @@ -175,12 +175,13 @@ def buckets(): """ -buckets.add_command(buckets_commands.buckets_list) -buckets.add_command(buckets_commands.buckets_create) -buckets.add_command(buckets_commands.buckets_complete) -buckets.add_command(buckets_commands.buckets_status) -buckets.add_command(buckets_commands.buckets_upload) -buckets.add_command(buckets_commands.buckets_name) +buckets.add_command(b_commands.buckets_get) +buckets.add_command(b_commands.buckets_create) +buckets.add_command(b_commands.buckets_complete) +buckets.add_command(b_commands.buckets_status) +buckets.add_command(b_commands.buckets_files) +buckets.add_command(b_commands.buckets_errorFile) +buckets.add_command(b_commands.buckets_name) @cli.group("dataChanges") @@ -190,10 +191,20 @@ def dataChanges(): """ -dataChanges.add_command(dataChanges_commands.dataChanges_list) -dataChanges.add_command(dataChanges_commands.dataChanges_validate) -dataChanges.add_command(dataChanges_commands.dataChanges_run) -dataChanges.add_command(dataChanges_commands.dataChanges_activities) +dataChanges.add_command(d_commands.dataChanges_get) +dataChanges.add_command(d_commands.dataChanges_validate) +dataChanges.add_command(d_commands.dataChanges_run) +dataChanges.add_command(d_commands.dataChanges_activities) +dataChanges.add_command(d_commands.dataChanges_upload) + +@cli.group("dataExport") +def dataExport(): + """ + Data Change Tasks (/dataChanges) operations to list, load, and activate. + """ + + +dataExport.add_command(e_commands.dataExport_get) @cli.group("fileContainers") @@ -203,20 +214,20 @@ def fileContainers(): """ -fileContainers.add_command(fileContainers_commands.fileContainers_create) -fileContainers.add_command(fileContainers_commands.filecontainers_list) -fileContainers.add_command(fileContainers_commands.filecontainers_load) +fileContainers.add_command(f_commands.fileContainers_create) +fileContainers.add_command(f_commands.fileContainers_get) +fileContainers.add_command(f_commands.fileContainers_load) @cli.group("wql") def wql(): """ - Operations to list (dataSources) and query WQL sources (data). + Operations to list (/dataSources) and run WQL queries (/data). """ -wql.add_command(wql_commands.dataSources) -wql.add_command(wql_commands.data) +wql.add_command(w_commands.dataSources) +wql.add_command(w_commands.data) @cli.group("raas") @@ -226,7 +237,7 @@ def raas(): """ -raas.add_command(raas_commands.run) +raas.add_command(r_commands.run) if __name__ == "__main__": diff --git a/prism/commands/buckets_commands.py b/prism/commands/buckets_commands.py index 53c9464..ce3e4f5 100644 --- a/prism/commands/buckets_commands.py +++ b/prism/commands/buckets_commands.py @@ -4,16 +4,14 @@ import click import pandas as pd -from . import util as u +logger = logging.getLogger('prismCLI') -logger = logging.getLogger(__name__) - -@click.command("list") -@click.option("-w", "--wid", +@click.command("get") +@click.option("-i", "--id", "id", help="The Workday ID of the bucket.") @click.option("-n", "--table_name", - help="The API name of the table to retrieve (see search option).") + help="The display name of the table to retrieve (see search option).") @click.option("-l", "--limit", default=None, type=int, help="The maximum number of object data entries included in the response, default=-1 (all).") @click.option("-o", "--offset", default=None, type=int, @@ -27,18 +25,18 @@ default="json", help="Format output as JSON, summary, schema, or CSV.", ) -@click.argument("bucket_name", required=False) +@click.argument("name", required=False) @click.pass_context -def buckets_list(ctx, wid, table_name, limit, offset, type_, search, format_, bucket_name): +def buckets_get(ctx, id, table_name, limit, offset, type_, search, format_, name): """ View the buckets permitted by the security profile of the current user. - [BUCKET_NAME] explicit name of bucket to list. + [NAME] explicit name of bucket to list. """ p = ctx.obj["p"] - buckets = p.buckets_list(wid, bucket_name, limit, offset, type_, table_name, search) + buckets = p.buckets_get(id, name, limit, offset, type_, table_name, search) if buckets["total"] == 0: return @@ -53,121 +51,113 @@ def buckets_list(ctx, wid, table_name, limit, offset, type_, search, format_, bu click.echo(f"{display_name}, operation: {operation}, target: {target}, state: {state}") elif format_ == "csv": df = pd.json_normalize(buckets["data"]) - click.echo(df.to_csv(index=False)) + logger.info(df.to_csv(index=False)) else: - click.echo(json.dumps(buckets, indent=2)) + logger.info(json.dumps(buckets, indent=2)) @click.command("create") -@click.option("-n", "--table_name", default=None, +@click.option("-n", "--target_name", default=None, help="Table name to associate with the bucket.") -@click.option("-w", "--table_wid", default=None, +@click.option("-i", "--target_id", default=None, help="Table ID to associate with the table.") @click.option("-f", "--file", "file_", required=False, default=None, type=click.Path(exists=True), help="Schema JSON file for the target table.") @click.option("-o", "--operation", default="TruncateAndInsert", show_default=True, help="Operation to perform on the table.") -@click.argument("bucket_name", required=False) +@click.argument("name", required=False) @click.pass_context -def buckets_create(ctx, table_name, table_wid, file_, operation, bucket_name): +def buckets_create(ctx, target_name, target_id, file_, operation, name): """ Create a new bucket with the specified name. - [BUCKET_NAME] explicit bucket name to create otherwise default. + [NAME] explicit bucket name to create otherwise default. """ p = ctx.obj["p"] - if table_name is None and table_wid is None and file_ is None: - click.echo("A table must be associated with this bucket (-n, -w, or -f must be specified).") + if target_name is None and target_id is None and file_ is None: + logger.error("A table must be associated with this bucket (-n, -i, or -f must be specified).") sys.exit(1) - bucket = p.buckets_create(bucket_name, table_wid, table_name, file_, operation) + bucket = p.buckets_create(name=name, target_id=target_id, target_name=target_name, + schema=file_, operation=operation) - click.echo(bucket) + if bucket is not None: + logger.info(json.dumps(bucket, indent=2)) + else: + sys.exit(1) -@click.command("upload") -@click.option("-n", "--table_name", default=None, +@click.command("files") +@click.option("-n", "--target_name", default=None, help="Name of the table to associate with the bucket.") -@click.option("-w", "--table_wid", default=None, +@click.option("-i", "--target_id", default=None, help="Table ID to associate with the table.") -@click.option("-s", "--schema_file", default=None, +@click.option("-f", "--file", default=None, help="Schema JSON file for the target table.") -@click.option("-o", "--operation", default="TruncateandInsert", show_default=True, +@click.option("-o", "--operation", default="TruncateAndInsert", show_default=True, help="Operation to perform on the table.") -@click.option("-g", "--generate", is_flag=True, default=True, - help="Generate a unique bucket name.") -@click.option("-b", "--bucket", help="Bucket name to load files.") +@click.option("-b", "--bucket", help="Bucket name to load files.", default=None) @click.option("-c", "--complete", is_flag=True, default=False, help="Automatically complete bucket and load the data into the table.") -@click.argument("file", nargs=-1, required=True, type=click.Path(exists=True)) +@click.argument("files", nargs=-1, required=True, type=click.Path(exists=True)) @click.pass_context -def buckets_upload(ctx, table_name, table_wid, schema_file, operation, generate, bucket, complete, file): +def buckets_files(ctx, target_name, target_id, file, operation, bucket, complete, files): """ - Upload a CSV or gzip file to the specified bucket + Upload one or more CSV or gzip files to the specified bucket - [FILE] one or more gzip (.gz) or CSV (.csv) files. + [FILES] one or more gzip (.gz) or CSV (.csv) files. NOTE: This operation will create ".csv.gz" files for each .csv file. """ p = ctx.obj["p"] - # Convert the file(s) provided to a list of compressed files. - target_files = u.compress_files(file) - - if len(target_files) == 0: - click.echo("No files to upload.") - sys.exit(1) - # We think we have a file(s) - we don't test the contents. # Go ahead and create a new bucket or use an existing. - bucket = p.buckets_create(bucket, table_name, table_wid, schema_file, operation) + bucket = p.buckets_create(bucket, target_name, target_id, file, operation) if bucket is None: logger.error("Invalid bucket for upload operation.") sys.exit(1) - upload = p.buckets_upload(bucket["id"], target_files) + results = p.buckets_files(bucket["id"], files) - if upload is None: - logger.error("Upload failed.") - sys.exit(1) - - if complete: + if results['total'] > 0 and complete: complete = p.buckets_complete(bucket["id"]) - click.echo(complete) + logger.info(complete) else: - click.echo(upload) + logger.info(json.dumps(results, indent=2)) + @click.command("complete") -@click.option("-n", "--bucket_name", - help="Bucket to complete.") -@click.argument("bucket_wid", required=False) +@click.option("-n", "name", + help="Bucket name to complete.") +@click.argument("id", required=False) @click.pass_context -def buckets_complete(ctx, bucket_name, bucket_wid): +def buckets_complete(ctx, name, id): """ Complete the specified bucket and perform the specified operation. - [BUCKET_WID] the Workday ID of the bucket to complete. + [ID] A reference to a Prism Analytics bucket. """ p = ctx.obj["p"] - if bucket_wid is None and bucket_name is None: + if id is None and name is None: click.echo("A bucket wid or a bucket name must be specified.") sys.exit(1) - if bucket_wid is not None: + if id is not None: # If the caller passed both a name and WID, then use the WID first. - buckets = p.buckets_list(bucket_id=bucket_wid) + bucket = p.buckets_list(bucket_id=id) else: # Lookup the bucket by name. - buckets = p.buckets_list(bucket=bucket_name, verbosity="full") + buckets = p.buckets_list(bucket=name, verbosity="full") - if buckets["total"] == 0: - logger.error('Bucket not found.') - sys.exit(1) + if buckets["total"] == 0: + logger.error('Bucket not found.') + sys.exit(1) - bucket = buckets["data"][0] + bucket = buckets["data"][0] bucket_state = bucket["state"]["descriptor"] @@ -178,22 +168,67 @@ def buckets_complete(ctx, bucket_name, bucket_wid): click.echo(p.buckets_complete(bucket["id"])) +@click.command("errorFile") +@click.option("-n", "name", + help="Bucket name.") +@click.argument("id", required=False) +@click.pass_context +def buckets_errorFile(ctx, name, id): + """ + Return the error file for a bucket. + + [ID] A reference to a Prism Analytics bucket. + """ + p = ctx.obj["p"] + + if id is not None: + # If the caller passed both a name and WID, then use the WID first. + error_file = p.buckets_errorFile(id=id) + else: + # Lookup the bucket by name. + buckets = p.buckets_get(name=name) + + if buckets["total"] == 0: + logger.error('Bucket not found.') + sys.exit(1) + + error_file = p.buckets_errorFile(id=buckets['data'][0]['id']) + + logger.info(error_file) + + @click.command("status") -@click.option("-w", "--wid", required=False, help="Bucket wid to status") -@click.argument("name", required=False) +@click.option("-n", "--name", required=False, help="Bucket name to status") +@click.argument("id", required=False) @click.pass_context -def buckets_status(ctx, name, wid): +def buckets_status(ctx, name, id): """ - Get the status of a bucket by name or workday ID. + Get the status of a bucket by ID or name. - [NAME] name of bucket. + [ID] A reference to a Prism Analytics bucket. """ - p=ctx.obj["p"] + p = ctx.obj["p"] + + if id is None and name is None: + logger.error('Please specify the ID or name of a bucket.') + sys.exit(1) + + if id is not None: + bucket = p.buckets_get(id) + + if bucket is None: + logger.error(f'Bucket {id} not found.') + sys.exit(1) + else: + buckets = p.buckets_get(id, name=name) + + if buckets["total"] == 0: + logger.error(f'Bucket name {name} not found.') + sys.exit(1) - buckets=p.buckets_list(wid, bucket_name=name) + bucket = buckets['data'][0] - if buckets["total"] != 0: - click.echo(buckets["data"][0]["state"]["descriptor"]) + logger.info(bucket["state"]["descriptor"]) @click.command("name") @@ -202,4 +237,4 @@ def buckets_name(ctx): """ Generate a bucket name to use for other bucket operations. """ - click.echo(ctx.obj["p"].buckets_gen_name()) \ No newline at end of file + click.echo(ctx.obj["p"].buckets_gen_name()) diff --git a/prism/commands/dataChanges_commands.py b/prism/commands/dataChanges_commands.py index 80cae65..14f8fb8 100644 --- a/prism/commands/dataChanges_commands.py +++ b/prism/commands/dataChanges_commands.py @@ -1,13 +1,16 @@ import sys import click import json +import logging +import time import pandas as pd -@click.command("list") -@click.option("-w", "--wid", - help="The dataChangeID to list.") -@click.option("-a", "--activity_wid", - help="A specific activity associated with the data change task.") +logger = logging.getLogger('prismCLI') + + +@click.command("get") +@click.option("-n", "--name", + help="The name of a dataChangeID to list.") @click.option("-l", "--limit", default=-1, help="The maximum number of object data entries included in the response, default=-1 (all).") @click.option("-o", "--offset", default=0, @@ -18,140 +21,256 @@ default="full", help="Format output as full, summary, schema, or CSV.", type=click.Choice(['full', 'summary', 'schema', 'csv'], case_sensitive=False)) -@click.option("-s", "--search", is_flag=True, help="Use contains search substring for --name or --id (default=false).") -@click.argument("name", required=False) +@click.option("-s", "--search", is_flag=True, default=False, + help="Use contains search substring for --name or --id (default=false).") +@click.argument("id", required=False) @click.pass_context -def dataChanges_list(ctx, name, wid, activity_wid, limit, offset, type_, format_, search): +def dataChanges_get(ctx, name, id, limit, offset, type_, format_, search): """ View the data change tasks permitted by the security profile of the current user. - [NAME] data change task to lists. + [ID] data change task to lists. """ p = ctx.obj["p"] - data_changes = p.dataChanges_list(name, wid, activity_wid, limit, offset, type_, search) + def output_dct(dct): + display_name = dct["displayName"] - if data_changes["total"] == 0: - click.echo("No data change tasks found.") - return + source_name = dct["source"]["sourceType"] + source_name += ": " + dct["source"]["name"] if "name" in dct["source"] else "" - data_changes["data"] = sorted(data_changes["data"], key=lambda dct: dct["displayName"].lower()) + target_name = dct["target"]["name"] + operation = dct["operation"]["operationType"]["descriptor"] - # Handle output - if format_ == "summary": - for dct in data_changes["data"]: - display_name = dct["displayName"] + logger.info(f"{display_name}, source: {source_name}, target: {target_name}, operation: {operation}") - source_name = dct["source"]["sourceType"] - source_name += ": " + dct["source"]["name"] if "name" in dct["source"] else "" + # Separate the get calls because an ID lookup returns a dict and a name lookup + # always returns an object/list structure with zero or more matching DCTs. + if id is not None: + data_change_task = p.dataChanges_get(id=id, limit=limit, offset=offset, type_=type_) - target_name = dct["target"]["name"] - operation = dct["operation"]["operationType"]["descriptor"] + if data_change_task is None: + logger.error(f'Data change task {id} not found.') + sys.exit(1) - click.echo(f"{display_name}, source: {source_name}, target: {target_name}, operation: {operation}") - elif format_ == "csv": - df = pd.json_normalize(data_changes["data"]) - click.echo(df.to_csv(index=False)) + if format_ == 'summary': + output_dct(data_change_task) + elif format_ == 'csv': + df = pd.json_normalize(data_change_task) + logger.info(df.to_csv(index=False)) + else: + logger.info(json.dumps(data_change_task)) else: - click.echo(json.dumps(data_changes["data"], indent=2)) + data_change_tasks = p.dataChanges_get(name=name, limit=limit, offset=offset, search=search, type=type_) + + if data_change_tasks["total"] == 0: + logger.info("No data change task(s) found.") + sys.exit(1) + + # For display purposes, sort by display name (case-insensitive) + data_change_tasks["data"] = sorted(data_change_tasks["data"], key=lambda dct: dct["displayName"].lower()) + + # Handle output + if format_ == "summary": + for dct in data_change_tasks["data"]: + output_dct(dct) + elif format_ == "csv": + df = pd.json_normalize(data_change_tasks["data"]) + logger.info(df.to_csv(index=False)) + else: + logger.info(json.dumps(data_change_tasks["data"], indent=2)) -@click.command("validate", help="Validate the data change specified by name or ID.") -@click.option("-w", "--wid", help="The dataChangeID to list.") -@click.option("-s", "--search", is_flag=True, help="Use contains search substring for --name or --id (default=false).") -@click.argument("name", required=False) +@click.command("validate") +@click.option("-n", "--isName", default=False, is_flag=True, + help="Flag to treat the dct argument as a name.") +@click.option("-s", "--search", is_flag=True, + help="Use contains search substring for --name.") +@click.argument("dct", required=True) @click.pass_context -def dataChanges_validate(ctx, name, wid, search): - p = ctx.obj["p"] +def dataChanges_validate(ctx, isname, dct, search): + """ + Validate the data change specified by name or ID. - if name is None and wid is None: - click.echo("A data change task name or a wid must be specified.") - sys.exit(1) + [DCT] A reference to a Prism Analytics Data Change Task. + """ + + p = ctx.obj["p"] # See if we have any matching data change tasks. - data_changes = p.dataChanges_list( - name=name, - wid=wid, - search=search, - refresh=True) - - if data_changes["total"] == 0: - click.echo("No matching data change task(s) found.") - sys.exit(1) + # Note: datachanges_get never fails - errors may appear in the log + if not isname: + validate = p.dataChanges_validate(id) + logger.info(json.dumps(validate, indent=2)) + else: + data_change_tasks = p.dataChanges_get(name=dct, search=search) + + if data_change_tasks["total"] == 0: + logger.error("No matching data change task(s) found.") + sys.exit(1) + + results = [] - for dct in data_changes["data"]: - validate = p.dataChanges_validate(dct["id"]) - click.echo(validate) + for dct in data_change_tasks["data"]: + validate = p.dataChanges_validate(dct["id"]) + + if 'error' in validate: + validate['id'] = dct['id'] + validate['descriptor'] = dct['displayName'] + + results.append(validate) + + logger.info(json.dumps(results, indent=2)) @click.command("run") -@click.argument("name", required=True) -@click.argument("fileContainerID", required=False) +@click.option("-n", "--isName", default=False, is_flag=True, + help="Flag to treat the dct argument as a name.") +@click.argument('dct', required=True) +@click.argument('fid', required=False) @click.pass_context -def dataChanges_run(ctx, name, filecontainerid): +def dataChanges_run(ctx, dct, fid, isname): """ Execute the named data change task with an optional file container. - [NAME] Data Change Task name. - [FILECONTAINERID] File container with files to load. + [DCT] A reference to a Prism Analytics data change. + [FID] An optional reference to a Prism Analytics file container. """ p = ctx.obj["p"] - # See if we have any matching data change task. - data_changes = p.dataChanges_list(name=name.replace(" ", "_"), type_="full", refresh=True) + if isname: + # See if we have any matching data change task. + data_changes = p.dataChanges_get(name=dct.replace(" ", "_")) - if data_changes["total"] != 1: - click.echo(f"Data change task not found: {name}") - sys.exit(1) + if data_changes["total"] != 1: + click.echo(f"Data change task not found: {dct}") + sys.exit(1) - dct_id = data_changes["data"][0]["id"] + dct_id = data_changes["data"][0]["id"] + else: + dct_id = dct - validate = p.dataChanges_validate(dct_id) + # It is valid to run a data change task without a fileContainerID value. + activity = p.dataChanges_activities_post(dct_id, fid) - if "error" in validate: - click.echo("Invalid DCT: " + validate["errors"][0]["error"] + " - code: " + validate["errors"][0]["code"]) + if activity is None: + logger.error("Failed to run data change task - please review the log.") sys.exit(1) - else: - activity_id = p.dataChanges_activities_post(dct_id, filecontainerid) - if activity_id is None: - sys.exit(1) - else: - click.echo(activity_id) + if 'error' in activity: + logger.error("Error running data change task.") + else: + logger.info(activity) @click.command("activities") +@click.option("-n", "--isName", default=False, is_flag=True, + help="Flag to treat the dct argument as a name.") @click.option("-s", "--status", is_flag=True, default=False, help="Return only the status of the activity.") -@click.argument("name", required=True) -@click.argument("activity_id", required=True) +@click.argument("dct", required=True) +@click.argument("activityID", required=True) @click.pass_context -def dataChanges_activities(ctx, status, name, activity_id): +def dataChanges_activities(ctx, dct, activityid, status, isname): """ Get the status for a specific activity associated with a data change task. - [NAME] Data Change Task name. - [ACTIVITY_ID] File container with files to load. + [ID] A reference to a data change task. + [ACTIVITYID] A reference to a data change task activity. """ p = ctx.obj["p"] - # See if we have any matching data change task. - data_changes = p.dataChanges_list(name=name.replace(" ", "_"), type_="full", refresh=True) + if isname: + # See if we have any matching data change task. + data_changes = p.dataChanges_list(name=dct.replace(" ", "_")) - if data_changes["total"] != 1: - click.echo(f"Data change task not found: {name}") - sys.exit(1) + if data_changes["total"] != 1: + click.echo(f"Data change task not found: {dct}") + sys.exit(1) - dct_id = data_changes["data"][0]["id"] + dct_id = data_changes["data"][0]["id"] + else: + dct_id = dct - current_status = p.dataChanges_activities_get(dct_id, activity_id) + current_status = p.dataChanges_activities_get(dct_id, activityid) if current_status is None: + logger.info("Activity for DCT not found.") sys.exit(1) else: if status: - click.echo(current_status["state"]["descriptor"]) + logger.info(current_status["state"]["descriptor"]) else: - click.echo(current_status) \ No newline at end of file + logger.info(current_status) + + +@click.command("upload") +@click.option("-n", "--isName", default=False, is_flag=True, + help="Flag to treat the dct argument as a name.") +@click.option("-w", "--wait", default=False, is_flag=True, + help="Wait for the data change task to complete.") +@click.option("-v", "--verbose", default=False, is_flag=True, + help="Display additional information.") +@click.argument("dct", required=True) +@click.argument("file", nargs=-1, type=click.Path(exists=True)) +@click.pass_context +def dataChanges_upload(ctx, isname, dct, file, wait, verbose): + """ + Validate the data change specified by name or ID. + + [dct] A reference to a Prism Analytics Data Change Task. + """ + + p = ctx.obj["p"] + + if len(file) == 0: + logger.error('One or more files must be specified.') + + if isname: + data_change_tasks = p.dataChanges_get(name=dct) + + if data_change_tasks['total'] == 0: + logger.error('Data change task not found.') + sys.exit(1) + + dct_id = data_change_tasks['data'][0]['id'] + else: + dct_id = dct + + file_container = p.fileContainers_load(id=None, file=file) + + if file_container['total'] == 0: + logger.error('Error loading file container.') + sys.exit(1) + + fid = file_container['id'] + + activity = p.dataChanges_activities_post(id=dct_id, fileContainerID=fid) + + if not wait: + logger.info(json.dumps(activity, indent=2)) + else: + if 'errors' in activity: + activity['id'] = dct_id + logger.error(json.dumps(activity, indent=2)) + + sys.exit(1) + + activity_id = activity['id'] + + while True: + time.sleep(10) + + activity = p.dataChanges_activities_get(id=dct_id, activityID=activity_id) + + status = activity['state']['descriptor'] + + if verbose: + logger.info(f'Status: {status}') + + if status not in ['New', 'Queued', 'Processing', 'Loading']: + break + + logger.info(json.dumps(activity, indent=2)) diff --git a/prism/commands/dataExport_commands.py b/prism/commands/dataExport_commands.py new file mode 100644 index 0000000..59a5f48 --- /dev/null +++ b/prism/commands/dataExport_commands.py @@ -0,0 +1,35 @@ +import click +import json +import logging + +logger = logging.getLogger('prismCLI') + +@click.command('get') +@click.option('-l', '--limit', type=int, default=None, + help='The maximum number of object data entries included in the response, default=all.') +@click.option('-o', '--offset', type=int, default=None, + help='The offset to the first object in a collection to include in the response.') +@click.option('-t', '--type', 'type_', default='summary', + type=click.Choice(['summary', 'full'], case_sensitive=False), + help='How much information returned for each table.') +@click.option('-f', '--format', 'format_', default='json', + type=click.Choice(['json', 'summary', 'schema', 'csv'], case_sensitive=False), + help='Format output as JSON, summary, schema, or CSV.') +@click.pass_context +def dataExport_get(ctx, limit, offset, type_, format_): + """List the tables or datasets permitted by the security profile of the current user. + + [NAME] Prism table name to list. + """ + + p = ctx.obj['p'] + + data_export_list = p.dataExport_get(limit=limit, offset=offset, type_=type_) + + logger.info(json.dumps(data_export_list, indent=2)) + + +@click.command('create') +@click.pass_context +def dataExport_create(ctx): + logger.info("here") \ No newline at end of file diff --git a/prism/commands/fileContainers_commands.py b/prism/commands/fileContainers_commands.py index 2bad422..5ac9588 100644 --- a/prism/commands/fileContainers_commands.py +++ b/prism/commands/fileContainers_commands.py @@ -1,53 +1,77 @@ import click import sys +import json +import logging -from . import util as u +logger = logging.getLogger('prismCLI') @click.command("create") @click.pass_context def fileContainers_create(ctx): - """ - Create a new fileContainers object returning the ID. + """Create a new fileContainers object returning the ID. + + Returns + ------- + str + File container ID of the new container. """ p = ctx.obj["p"] - fileContainer = p.fileContainers_create() + file_container = p.fileContainers_create() - if fileContainer is not None: - click.echo(fileContainer["id"]) + if file_container is not None: + logger.info(file_container["id"]) else: sys.exit(1) -@click.command("list", help="List the files for a file container.") -@click.argument("fileContainerID") +@click.command("get") +@click.argument("id", required=True) @click.pass_context -def filecontainers_list(ctx, filecontainerid): +def fileContainers_get(ctx, id): + """List the files in a file container. + + [ID] File container ID to list. + """ + p = ctx.obj["p"] - files = p.filecontainers_list(filecontainerid) + files_list = p.fileContainers_get(id) - click.echo(files) + logger.info(json.dumps(files_list, indent=2)) @click.command("load") -@click.option("-f", "--fileContainerID", default=None, help="Target File container ID, default to a new container.") +@click.option("-i", "--id", default=None, + help="Target File container ID - defaults to a new container.") @click.argument("file", nargs=-1, type=click.Path(exists=True)) @click.pass_context -def filecontainers_load(ctx, filecontainerid, file): +def fileContainers_load(ctx, id, file): """ - Load one or more file into a file container. + Load one or more files into a file container returning the container ID. - [FILE] one or more files to load. + [FILE] one or more CSV or GZipped CSV files to load. """ + + if len(file) == 0: # Click gives a tuple - even if no files included + logger.error("One or more files must be specified.") + p = ctx.obj["p"] - fid = u.fileContainers_load(p, filecontainerid, file) + # Load the file and retrieve the ID - a new fID is + # created if the command line ID was not specified. + # Subsequent files are loaded into the same container (fID). + results = p.fileContainers_load(id=id, file=file) - if fid is None: - click.echo("Error loading fileContainer.") + # If the fID comes back blank, then something is not + # working. Note: any error messages have already + # been logged by the load operation. + + if results['total'] == 0: + logger.error("A file container id is required to load a file.") + sys.exit(1) else: # Return the file container ID to the command line. If a - # filecontainerID was passed, simply return that id. - click.echo(fid) + # fileContainerID was passed, simply return that id. + logger.info(json.dumps(results, indent=2)) diff --git a/prism/commands/raas_commands.py b/prism/commands/raas_commands.py index 845f115..3d8eff8 100644 --- a/prism/commands/raas_commands.py +++ b/prism/commands/raas_commands.py @@ -1,23 +1,26 @@ import click +import logging +logger = logging.getLogger('prismCLI') -@click.command("run", help="Run RaaS report as system or as a specific user.") -@click.option("-s", "--system", is_flag=True, default=False, help="Run delivered Workday report.") -@click.option("-u", "--user", default=None, help="Run custom report as named user.") + +@click.command("run", help="Run a system or custom RaaS report.") +@click.option("-u", "--user", default=None, help="Run custom report as named user - default to delivered reports.") @click.option("-f", "--format", "format_", default=None, help="Output query results as CSV.") -@click.argument("report", nargs=1) -@click.argument('params', nargs=-1) +@click.argument("report", nargs=1, required=True) +@click.argument('params', nargs=-1, required=False) @click.pass_context -def run(ctx, system, user, format_, report, params): - p = ctx.obj["p"] +def run(ctx, user, format_, report, params): + """ + Run a Workday report. - if system and user is not None: - click.echo("Please specify only system or user, not both.") - return - - if not system and user is None: - click.echo("Please specify either system or user.") + [REPORT] Report name to run. + [PARAMS] Parameters expected by the report as list. + """ + p = ctx.obj["p"] - report_output = p.raas_run(report, system, user, params, format_) + # Return to a variable for easy debugging. + report_output = p.raas_run(report, user, params, format_) + # Don't log the output - pusht click.echo(report_output) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index bb778de..a61229a 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -1,164 +1,189 @@ +import json +import logging import sys import os -import logging +import csv import click -import json import pandas as pd -from . import util as u +logger = logging.getLogger('prismCLI') -logger = logging.getLogger("prismCLI") - -@click.command("list") -@click.option("-w", "--wid", - help="Unique WID for Prism table or dataset.") -@click.option("-l", "--limit", type=int, default=None, - help="The maximum number of object data entries included in the response, default=all.") -@click.option("-o", "--offset", type=int, default=None, - help="The offset to the first object in a collection to include in the response.") -@click.option("-t", "--type", "type_", default="summary", - type=click.Choice(["summary", "full", "permissions"], case_sensitive=False), - help="How much information returned for each table.") -@click.option("-f", "--format", "format_", default="json", +@click.command('get') +@click.option('-n', '--name', + help='Specific WID of Prism table or dataset to list.') +@click.option('-l', '--limit', type=int, default=None, + help='The maximum number of object data entries included in the response, default=all.') +@click.option('-o', '--offset', type=int, default=None, + help='The offset to the first object in a collection to include in the response.') +@click.option('-t', '--type', 'type_', default='summary', + type=click.Choice(['summary', 'full', 'permissions'], case_sensitive=False), + help='How much information returned for each table.') +@click.option('-f', '--format', 'format_', default='json', type=click.Choice(['json', 'summary', 'schema', 'csv'], case_sensitive=False), - help="Format output as JSON, summary, schema, or CSV.") -@click.option("-s", "--search", is_flag=True, - help="Enable substring search of NAME in api name or display name, default=False (exact match).") -@click.argument("name", required=False) + help='Format output as JSON, summary, schema, or CSV.') +@click.option('-s', '--search', is_flag=True, + help='Enable substring search of NAME in api name or display name, default=False (exact match).') +@click.argument('id', required=False) @click.pass_context -def tables_list(ctx, name, wid, limit, offset, type_, format_, search): +def tables_get(ctx, name, id, limit, offset, type_, format_, search): """List the tables or datasets permitted by the security profile of the current user. [NAME] Prism table name to list. """ - if type_ in ("summary", "permissions") and format_ in ("schema", "csv"): + if type_ in ('summary', 'permissions') and format_ in ('schema', 'csv'): # Summary results cannot generate schema or CSV output since there will be no fields. - logger.critical(f"Invalid combination of type \"{type_}\" and format \"{format_}\".") + logger.error(f'Invalid combination of type "{type_}" and format "{format_}".') sys.exit(1) - p = ctx.obj["p"] + p = ctx.obj['p'] # Query the tenant... - tables = p.tables_list(name, wid, limit, offset, type_, search) + tables = p.tables_get(name, id, limit, offset, type_, search) + + if id is not None: + if tables is None: + logger.error(f"Table ID {id} not found.") + sys.exit(1) + else: + # When using ID, the returned object is NOT an + # array of tables - dump the single table object. + logger.info(json.dumps(tables, indent=2)) - # The return always has a total tables returned value. - # note: tables_list never fails, it simply returns 0 tables if there is a problem. - if tables["total"] == 0: + return + + # For any other type of GET:/tables, the return ALWAYS has a total + # tables returned value. + if tables['total'] == 0: return # Handle output - if format_ == "json": + if format_ == 'json': # The results could be one table or an array of multiple # tables - simply dump the returned object. - click.echo(json.dumps(tables, indent=2)) - elif format_ == "summary": - for table in tables["data"]: - display_name = table["displayName"] - rows = table["stats"]["rows"] if "stats" in table and "rows" in table["stats"] else "Null" - size = table["stats"]["size"] if "stats" in table and "size" in table["stats"] else "Null" - refreshed = table["dateRefreshed"] if "dateRefreshed" in table else "unknown" - enabled = table["enableForAnalysis"] if "enableForAnalysis" in table else "Null" - - click.echo(f'{display_name}, Enabled: {enabled}, Rows: {rows}, Size: {size}, Refreshed: {refreshed}') - elif format_ == "csv": - df = pd.json_normalize(tables["data"]) - click.echo(df.to_csv(index=False)) - elif format_ == "schema": + logger.info(json.dumps(tables, indent=2)) + elif format_ == 'summary': + for table in tables['data']: + display_name = table['displayName'] + rows = table['stats']['rows'] if 'stats' in table and 'rows' in table['stats'] else 'Null' + size = table['stats']['size'] if 'stats' in table and 'size' in table['stats'] else 'Null' + refreshed = table['dateRefreshed'] if 'dateRefreshed' in table else 'unknown' + enabled = table['enableForAnalysis'] if 'enableForAnalysis' in table else 'Null' + + logger.info(f'{display_name}, Enabled: {enabled}, Rows: {rows}, Size: {size}, Refreshed: {refreshed}') + elif format_ == 'csv': + df = pd.json_normalize(tables['data']) + logger.info(df.to_csv(index=False)) + elif format_ == 'schema': # Dump out the fields of the first table in the result in # a format compatible with a schema used to created or edit # a table. - table = tables["data"][0] # Only output the first table. + table = tables['data'][0] # Only output the first table. # Remove the Prism audit columns. - fields = [fld for fld in tables["data"][0]["fields"] if not fld["name"].startswith("WPA_")] + fields = [fld for fld in table['fields'] if not fld['name'].startswith('WPA_')] # Remove tenant specific values - these are not needed # if the user wants to update a table definition. for fld in fields: - if "fieldId" in fld: - if "fieldId" in fld: - del fld["fieldId"] - - if "id" in fld: - del fld["id"] - - click.echo(json.dumps(fields, indent=2)) - else: - click.echo(u.csv_from_fields(tables["data"][0]["fields"])) - - -@click.command("create") -@click.option("-n", "--name", - help="Table name - overrides name from schema.") -@click.option("-d", "--displayName", - help="Specify a display name - defaults to name.") -@click.option("-t", "--tags", multiple=True, - help="Tags to organize the table in the Data Catalog.") -@click.option("-e", "--enableForAnalysis", type=bool, is_flag=True, default=None, - help="Enable this table for analytics.") -@click.option("-s", "--sourceName", - help="The API name of an existing table to copy.") -@click.option("-w", "--sourceWID", - help="The WID of an existing table to copy.") -@click.argument("file", required=False, type=click.Path(exists=True)) + if 'fieldId' in fld: + del fld['fieldId'] + + if 'id' in fld: + del fld['id'] + + if 'type' in fld: + if 'descriptor' in fld['type']: + # Convert the descriptor to shorten the Prism type syntax. + fld['type']['id'] = f"Schema_Field_Type={fld['type']['descriptor']}" + del fld['type']['descriptor'] + + logger.info(json.dumps(fields, indent=2)) + + +@click.command('create') +@click.option('-n', '--name', + help='Table name - overrides name from schema.') +@click.option('-d', '--displayName', + help='Specify a display name - defaults to name.') +@click.option('-t', '--tags', multiple=True, + help='Tags to organize the table in the Data Catalog.') +@click.option('-e', '--enableForAnalysis', type=bool, is_flag=True, default=None, + help='Enable this table for analytics.') +@click.option('-s', '--sourceName', + help='The API name of an existing table to copy.') +@click.option('-w', '--sourceWID', + help='The WID of an existing table to copy.') +@click.argument('file', required=False, type=click.Path(exists=True)) @click.pass_context def tables_create(ctx, name, displayname, tags, enableforanalysis, sourcename, sourcewid, file): """ Create a new table with the specified name. - [FILE] Optional file containing a schema definition for the table. + [FILE] Optional file containing a Prism schema definition for the new table. Note: A schema file, --sourceName, or --sourceWID must be specified. """ - p = ctx.obj["p"] + p = ctx.obj['p'] # We can assume a valid schema - get_schema sys.exits if there is a problem. - schema = u.get_schema(p, file, sourcename, sourcewid) + schema = resolve_schema(p, file, sourcename, sourcewid) # Initialize a new schema with the particulars for this table operation. if name is not None: # If we got a name, set it in the table schema - schema["name"] = name.replace(" ", "_") # Minor clean-up - elif "name" not in schema: + schema['name'] = name.replace(' ', '_') # Minor clean-up + elif 'name' not in schema: # The schema doesn't have a name and none was given - exit. - logger.critical("Table --name must be specified.") + logger.error('Table --name must be specified.') sys.exit(1) if displayname is not None: # If we got a display name, set it in the schema - schema["displayName"] = displayname - elif "displayName" not in schema: + schema['displayName'] = displayname + elif 'displayName' not in schema: # Default the display name to the name if not in the schema. - schema["displayName"] = name + schema['displayName'] = name if enableforanalysis is not None: - schema["enableForAnalysis"] = enableforanalysis - elif "enableForAnalysis" not in schema: + schema['enableForAnalysis'] = enableforanalysis + elif 'enableForAnalysis' not in schema: # Default to False - do not enable. - schema["enableForAnalysis"] = False + schema['enableForAnalysis'] = False # Create the table. - table_def = p.tables_create(schema) + table_def = p.tables_post(schema) if table_def is not None: - click.echo(f"Table {name} created.") + logger.info(f'Table {name} created.') else: - click.echo(f"Error creating table {name}.") - - -@click.command("update") -@click.option("-s", "--sourceName", help="The API name of an existing table to copy.") -@click.option("-w", "--sourceWID", help="The ID of an existing table to copy.") -@click.option("-t", "--truncate", is_flag=True, default=False, help="Truncate the table before updating.") -@click.argument("name", required=True) -@click.argument("file", required=False, type=click.Path(exists=True)) + logger.error(f'Error creating table {name}.') + + +@click.command('edit') +@click.option('-n', '--name', default=None, + help='Table name - overrides name from schema.') +@click.option('-i', '--id', default=None, + help='Prism table ID.') +@click.option('-t', '--truncate', is_flag=True, default=False, + help='Truncate the table before updating.') +@click.option('--displayName', is_flag=False, flag_value="*-clear-*", default=None, + help='Set the display name for an existing table.') +@click.option('--description', is_flag=False, flag_value="*-clear-*", default=None, + help='Set the display name for an existing table.') +@click.option('--documentation', is_flag=False, flag_value="*-clear-*", default=None, + help='Set the documentation for an existing table.') +@click.option('--enableForAnalysis', is_flag=False, default=None, required=False, + type=click.Choice(['true', 'false'], case_sensitive=False)) +@click.option('-f', '--file', type=click.Path(exists=True, dir_okay=False, readable=True)) +@click.argument('id', required=False, type=str) @click.pass_context -def tables_update(ctx, name, file, sourcename, sourcewid, truncate): +def tables_edit(ctx, name, id, file, truncate, + displayname, description, documentation, enableforanalysis): """Edit the schema for an existing table. NAME The API name of the table to update\b @@ -167,85 +192,329 @@ def tables_update(ctx, name, file, sourcename, sourcewid, truncate): Note: A schema file, --sourceName, or --sourceWID must be specified. """ - p = ctx.obj["p"] + p = ctx.obj['p'] - # Before doing anything, table name must exist. - tables = p.tables_list(name=name) - - if tables["total"] == 0: - logger.critical(f"Table \"{name}\" not found.") + # Figure out the new schema either by file or other table. + schema = None + resolved_id = None + + # The user can specify a GET:/tables output file containing + # the ID and other attributes that could be passed on the + # command line. + if file is not None: + schema = resolve_schema(p, file) + + # If we got a file name, do a quick sanity check. + if 'id' not in schema or 'fields' not in schema: + logger.error(f'Specify a valid table schema file.') + sys.exit(1) + + resolved_id = schema['id'] + + # See if the user is overriding the ID we may have from + # a specified schema file. + if id is not None: + # No verification, simply assume the ID is valid. + resolved_id = id + elif name is not None: + # Before doing anything, table name must exist. + tables = p.tables_get(name=name) # Exact match + + if tables['total'] == 0: + logger.error(f'Table name "{name}" not found.') + sys.exit(1) + + resolved_id = tables['data'][0]['id'] + + if resolved_id is None: + logger.error('Specify a schema file, ID or name to update.') sys.exit(1) - table_id = tables["data"][0]["id"] + # If the caller sent specified attributes, do a patch not put. + patch_data = {} - # Figure out the new schema either by file or other table. - fields = u.get_schema(p, file, sourcename, sourcewid) + def set_patch_value(attr, value): + """Utility function to set or clear a table attribute.""" + if value == '*-clear-*': + patch_data[attr] = '' + else: + patch_data[attr] = value + + if displayname is not None: # Specified on CLI + set_patch_value('displayName', displayname) - p.tables_update(wid=table_id, schema=file, truncate=truncate) + if description is not None: + set_patch_value('description', description) - click.echo("update") + if documentation is not None: + set_patch_value('documentation', documentation) + if enableforanalysis is not None: + if enableforanalysis.lower() == 'true': + patch_data['enableForAnalysis'] = 'true' + else: + patch_data['enableForAnalysis'] = 'false' + + if len(patch_data) == 0 and file is None: + logger.error("Specify values to update or a schema file with updates.") + sys.exit(1) -@click.command("upload") -@click.option("-o", "--operation", default="TruncateAndInsert", - help="Operation for the table operation - default to TruncateAndInsert.") -@click.argument("name", required=True) -@click.argument("file", nargs=-1, type=click.Path(exists=True)) + if len(patch_data) > 0: + table = p.tables_patch(id=resolved_id, patch=patch_data) + else: + table = p.tables_put(id=resolved_id, schema=schema, truncate=truncate) + + if table is None: + logger.error(f'Error updating table ID {resolved_id}') + else: + logger.debug(json.dumps(table, indent=2)) + logger.info(f'Table {resolved_id} updated.') + + +@click.command('upload') +@click.option('-o', '--operation', default='TruncateAndInsert', + help='Operation for the table operation - default to TruncateAndInsert.') +@click.argument('name', required=True) +@click.argument('file', nargs=-1, type=click.Path(exists=True)) @click.pass_context def tables_upload(ctx, name, operation, file): """ Upload a file into the table using a bucket. - - NOTE: This operation creates ".csv.gz" files for each .csv file. """ - p = ctx.obj["p"] + + p = ctx.obj['p'] # Convert the file(s) provided to a list of compressed files. - target_files = u.get_files(file) - if len(target_files) == 0: - logging.getLogger("prismCLI").critical("No files to upload.") + if len(file) == 0: + logger.error('No files to upload.') sys.exit(1) bucket = p.buckets_create(target_name=name, operation=operation) if bucket is None: - logging.getLogger("prismCLI").critical("Bucket creation failed.") + logger.error('Bucket creation failed.') sys.exit(1) - results = p.buckets_upload(bucket["id"], target_files) + results = p.buckets_upload(bucket['id'], file) if len(results) > 0: - p.buckets_complete(bucket["id"]) + p.buckets_complete(bucket['id']) -@click.command("truncate") -@click.argument("name", required=True) +@click.command('truncate') +@click.argument('name', required=True) @click.pass_context def tables_truncate(ctx, name): """ Truncate the named table. - :param name: - :return: + [NAME] The API name of the Prism table to truncate. """ - # Create an empty bucket with a delete operation - p = ctx.obj["p"] + p = ctx.obj['p'] + msg = f'Unable to truncate table "{name}" - see log for details.' - # Get a bucket using a generated name and an explicit Delete operation. - bucket = p.buckets_create(target_name=name, operation="TruncateAndInsert") + # To do a truncate, we still need a bucket with a truncate operation. + bucket = p.buckets_create(target_name=name, operation='TruncateAndInsert') if bucket is None: - logging.getLogger("prismCLI").critical(f"Unable to truncate {name} - error getting bucket.") + logger.error(msg) sys.exit(1) - bucket_id = bucket["id"] + bucket_id = bucket['id'] # Don't specify a file to put a zero sized file into the bucket. - bucket = p.buckets_upload(bucket_id) + p.buckets_upload(bucket_id) # Ask Prism to run the delete statement by completing the bucket. bucket = p.buckets_complete(bucket_id) if bucket is None: - click.echo(f"Unable to truncate table {name}.") + logger.error(msg) + sys.exit(1) + + +def schema_from_csv(prism, file): + """Convert a CSV list of fields into a proper Prism schema JSON object""" + + if not os.path.exists(file): + logger.error(f'FIle {file} not found - skipping.') + sys.exit(1) + + schema = {'fields': []} # Start with an empy schema definition. + + with open(file, newline='') as csvfile: + reader = csv.DictReader(csvfile) + + # Force all the columns names from the CSV to lowercase to make + # lookups consistent regardless of the actual case of the columns. + reader.fieldnames = [f_name.lower() for f_name in reader.fieldnames] + + # The minimum definition is a name column - exit if not found. No other + # column definition is required to build a valid field list. + if 'name' not in reader.fieldnames: + logger.error(f'CSV file {file} does not contain a name column header in first line.') + sys.exit(1) + + # Prism fields always have an ordinal sequence assigned to each field. + ordinal = 1 + + for row in reader: + if len(row['name']) == 0: + logger.error('Missing column name in CSV file.') + sys.exit(1) + + # Start the new field definition with what we know so far. + field = { + 'ordinal': ordinal, + 'name': row['name'], + 'displayName': row['displayname'] if 'displayname' in row else row['name'] + } + + # The following two items may not be in the CSV, the columns are not required and may not be present. + + if 'required' in row and isinstance(row['required'], str) and row['required'].lower() == 'true': + field['required'] = True + else: + field['required'] = False + + if 'externalid' in row and isinstance(row['externalid'], str) and row['externalid'].lower() == 'true': + field['externalId'] = True + else: + field['externalId'] = False + + fld_type = 'none' + + prism_data_types = ['boolean', 'integer', 'text', 'date', 'long', 'decimal', + 'numeric', 'instance', 'currency', 'multi_instance'] + + if 'type' in row and row['type'].lower() in prism_data_types: + field['type'] = {'id': f'Schema_Field_Type={row["type"]}'} + fld_type = row['type'].lower() + else: + # Default all "un-typed" fields to text. + field['type'] = {'id': 'Schema_Field_Type=Text'} + + match fld_type: + case 'date': + if 'parseformat' in row and isinstance(row['parseformat'], str) and len(row['parseformat']) > 0: + field['parseFormat'] = row['parseformat'] + else: + field['parseFormat'] = 'yyyy-MM-dd' + + case 'numeric': + if 'precision' in row: + field['precision'] = row['precision'] + + if 'scale' in row: + field['scale'] = row['scale'] + + case 'instance': + # We need all the data sources to resolve the business objects + # to include their WID. + data_sources = prism.datasources_list() + + if data_sources is None or data_sources['total'] == 0: + click.echo('Error calling WQL/dataSources') + return + + # Find the matching businessObject + bo = [ds for ds in data_sources['data'] + if ds['businessObject']['descriptor'] == row['businessObject']] + + if len(bo) == 1: + field['businessObject'] = bo[0]['businessObject'] + + schema['fields'].append(field) + ordinal += 1 + + return schema + + +def csv_from_fields(fields): + """Convert a Prism field list to CSV representation.""" + + format_str = '{name},"{displayName}",{ordinal},{type},"{businessObject}",{precision},{scale},"{parseFormat}",{required},{externalId}\n' + + # Start with the CSV column headings. + csv_str = 'name,displayName,ordinal,type,businessObject,precision,scale,parseFormat,required,externalId\n' + + for field in fields: + # Suppress the Prism audit columns. + if field['name'].startswith('WPA_'): + continue + + field_def = {'name': field['name'], + 'displayName': field['displayName'], + 'ordinal': field['ordinal'], + 'type': field['type']['descriptor'], + 'businessObject': field['businessObject']['descriptor'] if 'businessObject' in field else '', + 'precision': field['precision'] if 'precision' in field else '', + 'scale': field['scale'] if 'scale' in field else '', + 'parseFormat': field['parseFormat'] if 'parseFormat' in field else '', + 'required': field['required'], + 'externalId': field['externalId'] + } + + # Add the new field to the CSV text. + csv_str += format_str.format_map(field_def) + + return csv_str + + +def resolve_schema(p, file, source_name=None, source_id=None): + """Get or extract a schema from a file or existing Prism table.""" + + # Start with a blank schema definition. + schema = {} + + # A file always takes precedence over sourceName and sourceWID + # options, and must BE, or contain a valid schema. + + if file is not None: + if file.lower().endswith('.json'): + try: + with open(file) as json_file: + schema = json.load(json_file) + except Exception as e: + logger.error(f'Invalid schema file: {e}.') + sys.exit(1) + + # The JSON file could be a complete table definitions (GET:/tables - full) or just + # the list of fields. If we got a list, then we have a list of fields we + # use to start the schema definition. + + if type(schema) is list: + schema['fields'] = schema + else: + # This should be a full schema, perhaps from a table list command. + if 'name' not in schema and 'fields' not in schema: + logger.error('Invalid schema - name and fields attribute not found.') + sys.exit(1) + elif file.lower().endswith('.csv'): + schema = schema_from_csv(p, file) + else: + logger.error('Invalid file extension - valid extensions are .json or .csv.') + sys.exit(1) + else: + # No file was specified, check for a Prism source table. + if source_name is None and source_id is None: + logger.error('No schema file provided and a table (--sourceName or --sourceId) not specified.') + sys.exit(1) + + if source_id is not None: + schema = p.tables_list(id=source_id, type_='full') # Exact match on WID - and get the fields (full) + + if schema is None: + logger.error(f'Invalid --sourceId {source_id} : table not found.') + sys.exit(1) + else: + tables = p.tables_list(name=source_name, type_='full') # Exact match on API Name + + if tables['total'] == 0: + logger.error(f'Invalid --sourceName {source_name} : table not found.') + sys.exit(1) + + schema = tables['data'][0] + + return schema diff --git a/prism/commands/util.py b/prism/commands/util.py deleted file mode 100644 index a77c371..0000000 --- a/prism/commands/util.py +++ /dev/null @@ -1,239 +0,0 @@ -import os.path -import sys -import json -import csv -import click -import logging - - -def get_schema(p, file, sourceName, sourceWID): - # Start with a blank schema definition. - schema = {} - - # A file always takes precedence over sourceName and sourceWID - # options, and must contain a valid schema. - - if file is not None: - if file.lower().endswith(".json"): - try: - with open(file) as json_file: - schema = json.load(json_file) - except Exception as e: - click.echo(f"Invalid schema file: {e.msg}.") - sys.exit(1) - - # The JSON file could be a complete table definitions (GET:/tables - full) or just - # the list of fields. If we got a list, then we have a list of fields we - # use to start the schema definition. - - if type(schema) is list: - schema["fields"] = schema - else: - # This should be a full schema, perhaps from a table list command. - if "name" not in schema and "fields" not in schema: - click.echo("Invalid schema - name and fields attribute not found.") - sys.exit(1) - elif file.lower().endswith(".csv"): - schema = schema_from_csv(p, file) - else: - click.echo("Invalid file extension - valid extensions are .json or .csv.") - sys.exit(1) - else: - # No file was specified, check for a source table. - - if sourceName is None and sourceWID is None: - click.echo("No schema provided and a table (--sourceName or --sourceWID) not specified.") - sys.exit(1) - - if sourceWID is not None: - tables = p.tables_list(wid=sourceWID, type_="full") # Exact match on WID - and get the fields - else: - tables = p.tables_list(name=sourceName, type_="full") # Exact match on API Name - - if tables["total"] == 0: - click.echo("Invalid --sourceName or --sourceWID : table not found.") - sys.exit(1) - else: - schema = tables["data"][0] - - return schema - - -def schema_from_csv(prism, file): - schema = {"fields": []} # Start with an empy schema definition. - - with open(file, newline='') as csvfile: - reader = csv.DictReader(csvfile) - - # Force all the columns names to lowercase to make lookups consistent - # regardless of the actual case of the columns. - reader.fieldnames = [f_name.lower() for f_name in reader.fieldnames] - - # The minimum definition is a name column - exit if not found. No other - # column definition is required to build a valid field list. - if "name" not in reader.fieldnames: - click.echo(f"CSV file {file} does not contain a name column header in first line.") - sys.exit(1) - - # Prism fields always have an ordinal sequence assigned to each field. - ordinal = 1 - - for row in reader: - field = { - "ordinal": ordinal, - "name": row["name"], - "displayName": row["displayname"] if "displayname" in row else row["name"] - } - - if "required" in row and isinstance(row["required"], str) and row["required"].lower() == "true": - field["required"] = True - else: - field["required"] = False - - if "externalid" in row and isinstance(row["externalid"], str) and row["externalid"].lower() == "true": - field["externalId"] = True - else: - field["externalId"] = False - - fld_type = "none" - - if "type" in row and row["type"] in ["text", "date", "numeric", "instance"]: - field["type"] = { "id" : f'Schema_Field_Type={row["type"]}'} - fld_type = row["type"].lower() - else: - field["type"] = { "id" : f'Schema_Field_Type=Text'} - - match fld_type: - case "date": - if "parseformat" in row and isinstance(row["parseformat"], str) and len(row["parseformat"]) > 0: - field["parseFormat"] = row["parseformat"] - else: - field["parseFormat"] = "yyyy-MM-dd" - - case "numeric": - if "precision" in row: - field["precision"] = row["precision"] - - if "scale" in row: - field["scale"] = row["scale"] - - case "instance": - # We need all the data sources to resolve the business objects - # to include their WID. - data_sources = prism.datasources_list() - - if data_sources is None or data_sources["total"] == 0: - click.echo("Error calling WQL/dataSources") - return - - # Find the matching businessObject - bo = [ds for ds in data_sources["data"] - if ds["businessObject"]["descriptor"] == row["businessObject"]] - - if len(bo) == 1: - field["businessObject"] = bo[0]["businessObject"] - - schema["fields"].append(field) - ordinal += 1 - - return schema - - -def csv_from_fields(fields): - format_str = '{name},"{displayName}",{ordinal},{type},"{businessObject}",{precision},{scale},"{parseFormat}",{required},{externalId}\n' - - csv = "name,displayName,ordinal,type,businessObject,precision,scale,parseFormat,required,externalId\n" - - for field in fields: - # Suppress Prism auditing fields. - if field["name"].startswith("WPA_"): - continue - - field_def = {"name": field["name"], - "displayName": field["displayName"], - "ordinal": field["ordinal"], - "type": field["type"]["descriptor"], - "businessObject": field["businessObject"]["descriptor"] if "businessObject" in field else "", - "precision": field["precision"] if "precision" in field else "", - "scale": field["scale"] if "scale" in field else "", - "parseFormat": field["parseFormat"] if "parseFormat" in field else "", - "required": field["required"], - "externalId": field["externalId"] - } - - csv += format_str.format_map(field_def) - - return csv - - -def fileContainers_load(prism, filecontainersid, file): - # Because Click verified the file already exists, we know we have valid - # file name. Check to see if we have a gzip file or a CSV - # by checking the extension. - - if file is None or len(file) == 0: - click.echo("An existing file name is required to upload to a file container.") - return None - - # Verify that each file is already a gzip file or a CSV we gzip for them. - - # The CSV contents are not validated here - Prism eventually - # returns an error if the content is invalid. - - target_files = compress_files(file) - - # Assume we have a fID - it can be None right now - # if the user wants to create a fileContainers during - # this operation. - fID = filecontainersid - - for target_file in target_files: - # Load the file and retrieve the fID - this is only - # set by the load on the first file - subsequent - # files are loaded into the same container (fID). - fID = prism.filecontainers_load(fID, target_file) - - # If the fID comes back blank, then something is not - # working. Note: any error messages have already - # been displayed by the load operation. - - # NOTE: this operation never fails, the file is skipped. - if fID is None: - break - - # Return the fID to the caller - this is the value - # passed by the caller, or the new fID created by - # the load of the first file. - return fID - - -def get_files(files): - target_files = [] - - if files is None: - logging.getLogger("prismCLI").warning("File(s) must be specified.") - return target_files - elif isinstance(files, list) and len(files) == 0: - logging.getLogger("prismCLI").warning("File(s) must be specified.") - return target_files - elif isinstance(files, tuple) and len(files) == 0: - logging.getLogger("prismCLI").warning("File(s) must be specified.") - return target_files - elif isinstance(files, str): - if not files: - logging.getLogger("prismCLI").warning("File(s) must be specified.") - return target_files - else: - files = [ files ] - - for f in files: - if not os.path.exists(f): - logging.getLogger("prismCLI").warning(f"FIle {f} not found - skipping.") - continue - - if f.lower().endswith(".csv") or f.lower().endswith(".csv.gz"): - target_files.append(f) - else: - logging.getLogger("prismCLI").warning(f"File {f} is not a .gz or .csv file - skipping.") - - return target_files diff --git a/prism/commands/wql_commands.py b/prism/commands/wql_commands.py index 87abf1c..0b096f0 100644 --- a/prism/commands/wql_commands.py +++ b/prism/commands/wql_commands.py @@ -1,12 +1,12 @@ import click import json +import sys import pandas as pd -@click.command("dataSources", - help="View the buckets permitted by the security profile of the current user.") +@click.command("dataSources") @click.option("-w", "--wid", - help="The Workday ID of the dataSources.") + help="The Workday ID of the data source.") @click.option("-l", "--limit", default=None, help="The maximum number of object data entries included in the response, default=-1 (all).") @click.option("-o", "--offset", default=None, @@ -16,6 +16,7 @@ @click.argument("name", required=False) @click.pass_context def dataSources(ctx, wid, limit, offset, search, name): + """Returns a collection of data sources (/dataSources) for use in a WQL query.""" p = ctx.obj["p"] ds = p.wql_dataSources(wid, limit, offset, name, search) @@ -23,37 +24,40 @@ def dataSources(ctx, wid, limit, offset, search, name): click.echo(json.dumps(ds, indent=2)) -@click.command("data", - help="View the buckets permitted by the security profile of the current user.") +@click.command("data") @click.option("-l", "--limit", default=None, help="The maximum number of object data entries included in the response, default=-1 (all).") @click.option("-o", "--offset", default=None, help="The offset to the first object in a collection to include in the response.") -@click.option("-f", "--file", "file_", default=None, type=click.Path(exists=True), - help="Filename of a query") -@click.option("-c", "--csv", "csv_", is_flag=True, show_default=True, default=False, +@click.option("-f", "--file", "wql_file", default=None, type=click.Path(exists=True), + help="Filename containing a WQL query.") +@click.option("-c", "--as_csv", "as_csv", is_flag=True, show_default=True, default=False, help="Output query results as CSV.") @click.argument("query", required=False) @click.pass_context -def data(ctx, limit, offset, file_, csv_, query): +def data(ctx, limit, offset, wql_file, as_csv, query): + """ + Returns the data from a WQL query. + + [QUERY] WQL query string to execute (/data). + """ p = ctx.obj["p"] - if file_ is None and query is None: + if wql_file is None and query is None: click.echo("No query provided") - return + sys.exit(1) if query is not None: query_resolved = query else: - with open(file_) as file: - query_resolved = file.read().replace('\n',' ') + with open(wql_file) as file: + query_resolved = file.read().replace('\n', ' ') rows = p.wql_data(query_resolved, limit, offset) if rows["total"] != 0: - if csv_: + if as_csv: df = pd.json_normalize(rows["data"]) click.echo(df.to_csv(index=False)) else: click.echo(json.dumps(rows, indent=2)) - diff --git a/prism/prism.py b/prism/prism.py index cf642ef..1cf9ecb 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -3,6 +3,8 @@ The Prism API provides a flexible, secure and scalable way to load data into Workday Prism Analytics. + +DocString style: https://www.sphinx-doc.org/en/master/usage/extensions/example_numpy.html """ import logging @@ -10,40 +12,207 @@ import requests import time import os -import urllib import sys import uuid import io import gzip +import inspect + +from urllib import parse as urlparse + +# Default a logger - the default may be re-configured in the set_logging method. +logger = logging.getLogger(__name__) +logger.setLevel(logging.WARNING) + +# writing to stdout only... +handler = logging.StreamHandler(sys.stdout) +handler.setLevel(logging.WARNING) +log_format = logging.Formatter("%(asctime)s %(levelname)s: %(message)s", "%Y-%m-%d %H:%M:%S") +handler.setFormatter(log_format) +logger.addHandler(handler) + + +def set_logging(log_file=None, log_level="INFO"): + """ + + :param log_file: + :param log_level: + :return: + """ + # Resolve the log level - default to info if empty or invalid. + if log_level is None: + set_level = logging.INFO + else: + # Make sure the caller gave us a valid "name" for logging level. + if hasattr(logging, log_level): + set_level = getattr(logging, log_level) + else: + set_level = getattr(logging, "INFO") + + # If no file was specified, simply loop over any handlers and + # set the logging level. + if log_file is None: + for log_handler in logger.handlers: + log_handler.setLevel(set_level) + else: + # Setup logging for CLI operations. + for log_handler in logger.handlers: + logger.removeHandler(log_handler) + + logger.setLevel(set_level) + + # Create a handler as specified by the user (or defaults) + fh = logging.FileHandler(log_file) + fh.setLevel(set_level) + + # create formatter and add it to the handlers + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + fh.setFormatter(formatter) + + logger.addHandler(fh) + + logger.debug(f"set log level: {set_level}") def log_elapsed(msg, timedelta): + """Log the elapsed time of a get/post/put/patch HTTP operation.""" elapsed = timedelta.total_seconds() - logging.getLogger(__name__).debug(f"{msg}: elapsed {elapsed:.5f}") + logger.debug(f"{msg}: elapsed {elapsed:.5f}") -class Prism: - """Base class for interacting with the Workday Prism API. +def buckets_gen_name(): + bucket_name = "cli_" + uuid.uuid4().hex + logger.debug(f"buckets_gen_name: created bucket name: {bucket_name}") + + return bucket_name + + +def schema_fixup(schema): + """Utility function to revise a schema for a bucket operations.""" + + if schema is None: + logger.error("schema_fixup: schema cannot be None.") + return False + + if not isinstance(schema, dict): + logger.error("schema_fixup: schema is not a dictionary.") + return False + + def is_valid_string(attr): + if attr not in schema or not isinstance(schema[attr], str) or len(schema[attr]) == 0: + return False + + return True + + def is_valid_list(attr): + if attr not in schema or not isinstance(schema[attr], list): + return False + + return True + + if not is_valid_string('id'): + logger.error("id attribute missing") + return False + + if not is_valid_list('fields'): + logger.error("fields attribute missing from schema!") + return False + + # Remove Prism managed fields "WPA_*" + schema['fields'] = [fld for fld in schema['fields'] if not fld['name'].startswith('WPA_')] + + # Add a sequential order (ordinal) on the fields to (en)force + # required sequencing of fields. + for ordinal in range(len(schema["fields"])): + fld = schema["fields"][ordinal] + fld["ordinal"] = ordinal + 1 + + keys = list(schema.keys()) + + for k in keys: + if k not in ['name', 'id', 'fields', 'tags', 'displayName', 'description', 'documentation', + 'enableForAnalysis']: + del schema[k] + + return True - Attributes + +def table_to_bucket_schema(table): + """Convert schema derived from list table to a bucket schema. + + Parameters ---------- - base_url : str - The URL for the API client + table: dict + A dictionary containing the schema definition for your dataset. - tenant_name : str - The name of your Workday tenant + Returns + ------- + If the request is successful, a dictionary containing the bucket schema is returned. + The results can then be passed to the create_bucket function + + """ + + # describe_schema is a python dict object and needs to be accessed as such, 'data' is the top level object, + # but this is itself a list (with just one item) so needs the list index, in this case 0. 'fields' is found + # in the dict that is in ['data'][0] + + if table is None or "fields" not in table: + logger.error("Invalid table passed to table_to_bucket_schema.") + return None - client_id : str - The Client ID for your registered API client + bucket_schema = { + "schemaVersion": {"id": "Schema_Version=1.0"}, + } + + fields = table["fields"] + + # Get rid of any WPA_ fields... + fields[:] = [x for x in fields if "WPA" not in x["name"]] + + # Create and assign useAsOperationKey field with true/false values based on externalId value + operation_key_false = {"useAsOperationKey": False} + operation_key_true = {"useAsOperationKey": True} + + for fld in fields: + if fld["externalId"] is True: + fld.update(operation_key_true) + else: + fld.update(operation_key_false) + + # Now trim our field attributes to keep just what we need + for fld in fields: + for attr in ['id', 'displayName', 'fieldId', 'required', 'externalId']: + if attr in fld: + del fld[attr] + + if 'parseOptions' in table: + bucket_schema['parseOptions'] = table['parseOptions'] + else: + bucket_schema['parseOptions'] = { + "fieldsDelimitedBy": ",", + "fieldsEnclosedBy": '"', + "headerLinesToIgnore": 1, + "charset": {"id": "Encoding=UTF-8"}, + "type": {"id": "Schema_File_Type=Delimited"}, + } - client_secret : str - The Client Secret for your registered API client + # Build the final bucket definition. + bucket_schema['fields'] = fields - refresh_token : str - The Refresh Token for your registered API client + return bucket_schema - version : str - The version of the Prism API to use + +class Prism: + """Class for interacting with the Workday Prism API. + + + Attributes: + base_url (str): URL for the Workday API client + tenant_name (str): Workday tenant name + client_id (str): Client ID for the registered API client + client_secret (str): Client Secret for the registered API client + refresh_token (str): Refresh Token for the Workday user + version (str): Version of the Prism API to use """ def __init__(self, base_url, tenant_name, client_id, client_secret, refresh_token, version="v3"): @@ -65,95 +234,149 @@ def __init__(self, base_url, tenant_name, client_id, client_secret, refresh_toke # Support URLs for additional Workday API calls. self.wql_endpoint = f"{base_url}/api/wql/v1/{tenant_name}" + """str: Workday Report as a Service (raas) endpoint.""" + self.raas_endpoint = f"{base_url}/ccx/service" + """str: Workday Report as a Service (raas) endpoint.""" # At creation, there cannot yet be a bearer_token obtained from Workday. self.bearer_token = None + """str: Active bearer token for the session.""" + self.bearer_token_timestamp = None + """time.time: Last bearer token time.""" + + # Helper constants. + self.CONTENT_APP_JSON = {"Content-Type": "application/json"} + self.CONTENT_FORM = {"Content-Type": "application/x-www-form-urlencoded"} - # Default a logger - the default may be re-configured - # in the set_logging method. - self.logger = logging.getLogger(__name__) - self.logger.setLevel(logging.WARNING) - - # writing to stdout only... - handler = logging.StreamHandler(sys.stdout) - handler.setLevel(logging.WARNING) - log_format = logging.Formatter('[%(asctime)s] [%(levelname)s] - %(message)s') - handler.setFormatter(log_format) - self.logger.addHandler(handler) - - def set_logging(self, log_file=None, log_level="INFO"): - # Resolve the log level - default to info if empty or invalid. - if log_level is None: - set_level = logging.INFO + def http_get(self, url, headers=None, params=None): + """Pass the headers and params to the URL to retrieve + + :param url: + :param headers: + :param params: + :return: + """ + caller = inspect.stack()[1][3] + logger.debug(f"get: called by {caller}") + + if url is None or not isinstance(url, str) or len(url) == 0: + # Create a fake response object for standard error handling. + msg = "get: missing URL" + + response = {"status_code": 600, + "text": msg, + "errors": [{"error": msg}]} else: - # Make sure the caller gave us a valid "name" for logging level. - if hasattr(logging, log_level): - set_level = getattr(logging, log_level) - else: - set_level = getattr(logging, "INFO") + logger.debug(f"get: {url}") + + # Every request requires an authorization header - make it true. + if headers is None: + headers = {} - # If no file was specified, simply loop over any handlers and - # set the logging level. - if log_file is None: - for handler in self.logger.handlers: - handler.setLevel(set_level) + if "Authorization" not in headers: + headers["Authorization"] = "Bearer " + self.get_bearer_token() + + response = requests.get(url, params=params, headers=headers) + log_elapsed(f"get: {caller}", response.elapsed) + + if response.status_code != 200: + logger.error(f"Invalid HTTP status: {response.status_code}") + logger.error(f"Reason: {response.reason}") + logger.error(f"Text: {response.text}") + + return response + + def http_post(self, url, headers=None, data=None, files=None): + caller = inspect.stack()[1][3] + logger.debug(f"post: called by {caller}") + + if url is None or not isinstance(url, str) or len(url) == 0: + # Create a fake response object for standard error handling. + msg = "POST: missing URL" + + response = {"status_code": 600, + "text": msg, + "errors": [{"error": msg}]} else: - # Setup logging for CLI operations. - for handler in self.logger.handlers: - self.logger.removeHandler(handler) + logger.debug(f"post: {url}") - self.logger.setLevel(set_level) + # Every request requires an authorization header - make it true. + if headers is None: + headers = {} - # Create a handler as specified by the user (or defaults) - fh = logging.FileHandler(log_file) - fh.setLevel(set_level) + if "Authorization" not in headers and caller != "create_bearer_token": + headers["Authorization"] = "Bearer " + self.get_bearer_token() - # create formatter and add it to the handlers - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') - fh.setFormatter(formatter) + response = requests.post(url, headers=headers, data=data, files=files) + log_elapsed(f"put: {caller}", response.elapsed) - self.logger.addHandler(fh) + if response.status_code > 299: + logger.error(response.text) - self.logger.debug(f"set log level: {set_level}") + return response - def buckets_gen_name(self): - return "cli_" + uuid.uuid4().hex + def http_patch(self, url, headers=None, data=None): + caller = inspect.stack()[1][3] + logger.debug(f"patch: called by {caller}") - def get(self, url, headers=None, params=None, log_tag="generic get"): - if url is None: - self.logger.warning("http_get: missing URL") - return None + if url is None or not isinstance(url, str) or len(url) == 0: + # Create a fake response object for standard error handling. + msg = "PATCH: missing URL" - # Every request requires an authorization header - make it true. - auth_attr = "Authorization" + response = {"status_code": 600, + "text": msg, + "errors": [{"error": msg}]} + else: + logger.debug(f"patch: {url}") - if headers is None: - headers = {} + # Every request requires an authorization header - make it true. + if headers is None: + headers = {} - if auth_attr not in headers: - headers[auth_attr] = "Bearer " + self.get_bearer_token() + if "Authorization" not in headers and caller != "create_bearer_token": + headers["Authorization"] = "Bearer " + self.get_bearer_token() - response = requests.get(url, params=params, headers=headers) - log_elapsed("GET: " + log_tag, response.elapsed) + response = requests.patch(url, headers=headers, data=json.dumps(data)) + log_elapsed(f"patch: {caller}", response.elapsed) - if response.status_code != 200: - self.logger.error(f"Invalid HTTP status: {response.status_code}") + if response.status_code > 299: + logger.error(response.text) return response - def validate_schema(self, schema): - if "fields" not in schema or not isinstance(schema["fields"], list) or len(schema["fields"]) == 0: - self.logger.error("Invalid schema detected!") - return False + def http_put(self, url, headers=None, data=None): + caller = inspect.stack()[1][3] + logger.debug(f"put: called by {caller}") - # Add a sequential order (ordinal) on the fields to (en)force - # proper numbering. - for ordinal in range(len(schema["fields"])): - schema["fields"][ordinal]["ordinal"] = ordinal + 1 + if url is None or not isinstance(url, str) or len(url) == 0: + # Create a fake response object for standard error handling. + msg = "PUT: missing URL" - return True + response = {"status_code": 600, + "text": msg, + "errors": [{"error": msg}]} + else: + logger.debug(f"put: {url}") + + # Every request requires an authorization header - make it true. + if headers is None: + headers = {} + + if "Authorization" not in headers and caller != "create_bearer_token": + headers["Authorization"] = "Bearer " + self.get_bearer_token() + + if "Content-Type" not in headers: + headers["Content-Type"] = 'application/json' + + response = requests.put(url, headers=headers, data=json.dumps(data)) + log_elapsed(f"put: {caller}", response.elapsed) + + if response.status_code > 299: + logger.error(response.text) + + return response def create_bearer_token(self): """Exchange a refresh token for an access token. @@ -168,10 +391,6 @@ def create_bearer_token(self): """ - self.logger.debug("create_bearer_token") - - headers = {"Content-Type": "application/x-www-form-urlencoded"} - data = { "grant_type": "refresh_token", "refresh_token": self.refresh_token, @@ -179,127 +398,139 @@ def create_bearer_token(self): "client_secret": self.client_secret, } - r = requests.post(self.token_endpoint, headers=headers, data=data) - log_elapsed("create_bearer_token", r.elapsed) + r = self.http_post(url=self.token_endpoint, headers=self.CONTENT_FORM, data=data) if r.status_code == 200: - self.logger.debug("successfully obtained bearer token") + logger.debug("successfully obtained bearer token") self.bearer_token = r.json()["access_token"] self.bearer_token_timestamp = time.time() + else: + logger.error(f"create bearer token failed: HTTP status code.") + self.bearer_token = None + self.bearer_token_timestamp = None - return True - - self.logger.error(f"HTTP status code {r.status_code}: {r.content}") - self.bearer_token = None - self.bearer_token_timestamp = None + def get_bearer_token(self): + """Get the current bearer token, or create a new one - return False + Note: + If the token doesn't exist, or it's older than 15 minutes create + a new token. - def get_bearer_token(self): - """ - Get the current bearer token, or create a new one if it doesn't exist, or it's older than 15 minutes. + Returns: + Workday bearer token. """ if self.bearer_token is None or (time.time() - self.bearer_token_timestamp) > 900: - if not self.create_bearer_token(): - return "" + self.create_bearer_token() + + if self.bearer_token is None: + return "" # Only return strings return self.bearer_token def reset_bearer_token(self): - """Remove the current bearer token to force getting a new token on the next API call.""" + """Reset the current bearer token to none. + + Note: Use this to force getting a new token on the next API call. + """ self.bearer_token = None self.bearer_token_timestamp = None - def tables_list( + def tables_get( self, - name=None, wid=None, + name=None, id=None, limit=None, offset=None, type_="summary", search=False): """Obtain details for all tables or a given table(s). + Notes + ----- + This method never fails and always returns a valid Dict. + Parameters ---------- name : str The name of the table to obtain details about. If the default value - of None is specified, details regarding first 100 tables is returned. - - wid : str + of None is specified. + id : str The ID of a table to obtain details about. When specified, all tables are searched for the matching id. - limit : int - The maximum number of tables to be queried, to the maximum of 100. - + The maximum number of tables to be queried, if None all tables are returned. offset: int The offset from zero of tables to return. - type_ : str Level of detail to return. - search : bool - Enable substring searching for table names or ids + Enable contains searching for table names ans display names. Returns ------- - If the request is successful, a dictionary containing information about - the table is returned. - + dict + For an ID query, return the table information as a dict. For any other + table list query, return a total attribute of the number of tables found and data + attribute containing the list tables. """ operation = "/tables" - self.logger.debug(f"GET: {operation}") - url = self.prism_endpoint + operation + if type_ is None or type_.lower() not in ["full", "summary", "permissions"]: + logger.warning("Invalid output type for tables list operation - defaulting to summary.") + output_type = "summary" + else: + output_type = type_.lower() + + # If we got a WID, then do a direct query by ID - no paging or searching required. + if id is not None: + operation = f"{operation}/{id}?format={output_type}" + logger.debug(f"get: {operation}") + url = self.prism_endpoint + operation + + response = self.http_get(url) - if type_ is None or type_ not in ["full", "summary", "permissions"]: - self.logger.warning("Invalid type for tables list operation - defaulting to summary.") - type_ = "summary" + if response.status_code == 200: + return response.json() + else: + return None - # If we are searching, then we have to get everything using - # limits and offsets, i.e., paging of results. + # We are doing a query by attributes other than ID. + logger.debug(f"get: {operation}") + url = self.prism_endpoint + operation - params = {} + # Always return a valid JSON object of results regardless of + # errors or API responses. THIS METHOD NEVER FAILS. + return_tables = {"total": 0, "data": []} + + # Start setting up the API call parameters. + params = { + 'limit': limit if limit is not None else 100, + 'offset': offset if offset is not None else 0, + 'type': output_type + } # See if we want to add an explicit table name as a search parameter. if not search and name is not None: - # Here, the user is not searching, they gave us an exact name. + # Here, the caller is not searching, they gave us an exact name. params["name"] = name.replace(" ", "_") # Minor clean-up # Should only be 0 (not found) or 1 (found) tables found. - limit = 1 - offset = 0 + params['limit'] = 1 + params['offset'] = 0 - # When searching by name or id, set the maximum limit size to - # reduce the number of individual REST API calls. - if search: - limit = 100 - offset = 0 - - # If we didn't get a limit, set it to the maximum supported by the API + # If we didn't get a limit, turn on searching to retrieve all tables. if limit is None: search = True # Force a search so we get all tables - limit = 100 - - offset = offset if offset is not None else 0 - # Finalized the parameters to the GET:/tables call. - params["limit"] = limit - params["offset"] = offset - params["type"] = type_ - - # Always return a valid JSON object of results regardless of - # errors or API responses. - return_tables = {"total": 0, "data": []} + params["limit"] = 100 # Max pagesize to retrieve in the fewest REST calls. + params["offset"] = 0 # Always assume we will retrieve more than one page. while True: - r = self.get(url, params=params, log_tag=operation) + r = self.http_get(url, params=params) if r.status_code != 200: - self.logger.error(f"Invalid HTTP return code: {r.status_code}") - - # Whatever we have captured (perhaps nothing) so far will - # be returned due to unexpected status code. + # Whatever we have captured (perhaps zero tables) so far + # will be returned due to unexpected status code. Break + # and do final clean-up on exit. break # Convert the response to a list of tables. @@ -310,15 +541,11 @@ def tables_list( # whatever we got (maybe nothing). return tables - # Figure out what of this batch of tables should be part of the - # return results, i.e., search the batch for matches. - + # Figure out what tables of this batch of tables should be part of the + # return results, i.e., search the this batch for matches. if name is not None: - # Substring search for matching table names - match_tables = [tab for tab in tables["data"] if name in tab["name"]] - elif wid is not None: - # User is looking for a table by ID - match_tables = [tab for tab in tables["data"] if wid == tab["id"]] + # Substring search for matching table names, display names + match_tables = [tab for tab in tables["data"] if name in tab["name"] or name in tab["displayName"]] else: # Grab all the tables in the result match_tables = tables["data"] @@ -327,181 +554,231 @@ def tables_list( # If we get back anything but a full page, we are done # paging the results. - if len(tables["data"]) < limit: + if len(tables["data"]) < params['limit']: break if search: # Move on to the next page. - offset += limit - params["offset"] = offset + params['offset'] += params['limit'] else: # The caller asked for a specific limit and offset, exit the loop. break - # We always return a valid JSON. - return_tables["total"] = len(return_tables["data"]) # Separate step for debugging. + # We always return a dict with the total tables found. + return_tables['total'] = len(return_tables['data']) # Separate step for debugging. return return_tables - def tables_create(self, schema): + def tables_post(self, schema): """Create an empty table of type "API". Parameters ---------- - schema : list + schema : dict A dictionary containing the schema Returns ------- - If the request is successful, a dictionary containing information about - the new table is returned. - + dict + If the request is successful, a dictionary containing information about + the new table is returned, otherwise None. """ operation = "/tables" - self.logger.debug(f"POST : {operation}") + logger.debug(f"POST : {operation}") url = self.prism_endpoint + "/tables" - if not self.validate_schema(schema): - self.logger.error("Invalid schema for create operation.") + if not schema_fixup(schema): + logger.error("Invalid schema for create operation.") return None - headers = { - "Authorization": "Bearer " + self.get_bearer_token(), - "Content-Type": "application/json", - } - - r = requests.post(url, headers=headers, data=json.dumps(schema)) + response = self.http_post(url=url, headers=self.CONTENT_APP_JSON, data=json.dumps(schema)) - if r.status_code == 201: - return r.json() - elif r.status_code == 400: - self.logger.error(r.json()["errors"][0]["error"]) - self.logger.error(r.text) - else: - self.logger.error(f"HTTP status code {r.status_code}: {r.content}") + if response.status_code == 201: + return response.json() return None - def tables_update(self, wid, schema, truncate=False): - """ - Update the schema of an existing table. + def tables_put(self, id, schema, truncate=False): + """Update an existing table using a full schema definition. - """ + Notes + ----- + For certain changes, e.g., changing a data type, the table cannot + have any data. + + Parameters + ---------- + id : str + Prism Table ID of an existing table. + + schema : dict + A dictionary containing the schema - operation = f"/tables/{wid}" - self.logger.debug(f"PUT: {operation}") + truncate : bool + True to automatically truncate the table before + applying the new schema. + + Returns + ------- + dict + If the request is successful, a dictionary containing information about + the new table is returned, otherwise None. + """ + operation = f"/tables/{id}" + logger.debug(f"PUT: {operation}") url = self.prism_endpoint + operation - if not self.validate_schema(schema): - self.logger.error("Invalid schema for update operation.") + if not schema_fixup(schema): + logger.error("Invalid schema for update operation.") return None - headers = { - "Authorization": "Bearer " + self.get_bearer_token(), - "Content-Type": "application/json", - } + response = self.http_put(url=url, data=schema) - r = requests.put(url=url, data=schema) + if response.status_code == 200: + return response.json() - if r.status_code == 200: - return r.json() - - self.logger.error(f"Error updating table {wid} - {r.text}.") return None - def tables_patch(self, id, displayName=None, description=None, documentation=None, enableForAnalysis=None, - schema=None): - return None + def tables_patch(self, id, patch): + """Patch the table with specified values. - def buckets_list(self, - wid=None, - bucket_name=None, - limit=None, - offset=None, - type_="summary", - table_name=None, - search=False): - """ + Notes + ----- + Patching only changes a short list of table + level attributes. - :param wid: - :param bucket_name: - :param limit: - :param offset: - :param type_: - :param table_name: - :param search: - :return: + Parameters + ---------- + id : str + Prism Table ID of an existing table. + + patch : dict + One or more table attributes to update. + + Returns + ------- + dict + If the request is successful, a dictionary containing information about + the new table is returned, otherwise None. """ + operation = f'/tables/{id}' + logger.debug(f'PATCH: {operation}') + url = self.prism_endpoint + operation + + response = self.http_patch(url=url, headers=self.CONTENT_APP_JSON, data=patch) + + if response.status_code == 200: + return response.json() + + return None + + def buckets_get(self, + id=None, name=None, + limit=None, offset=None, + type_="summary", + table_name=None, search=False): + """Get a one or more bucket definitions. + + Parameters + ---------- + id : str + The ID of an existing bucket. + name : str + The name of an existing bucket. + limit : int + The maximum number of tables to be queried, if None all tables are returned. + offset: int + The offset from zero of tables to return. + type_ : str + Level of detail to return. + table_name : str + List all/any buckets for associated with the table name. + search : bool + Enable contains searching for bucket names and display names. + Returns + ------- + dict + For an ID query, return the bucket information as a dict. For any other + bucket query, return a total attribute of the number of buckets found and data + attribute containing the list buckets. + """ operation = "/buckets" - self.logger.debug(f"GET: {operation}") + + output_type = type_.lower() if type_.lower() in ['full', 'summary'] else 'summary' + + # If we got an ID, then do a direct query by ID - no paging or searching required. + if id is not None: + operation = f"{operation}/{id}?format={output_type}" + logger.debug(f"get: {operation}") + url = self.prism_endpoint + operation + + response = self.http_get(url) + + if response.status_code == 200: + return response.json() + else: + return None + + logger.debug(f"get: {operation}") url = self.prism_endpoint + operation # Start the return object - this routine NEVER fails - # and always returns a valid JSON object. + # and always returns a valid dict object. return_buckets = {"total": 0, "data": []} - # If we are searching, then we have to get everything first - # so don't add a name to the bucket query. - - params = {} + params = { + 'limit': limit if limit is not None else 100, + 'offset': offset if offset is not None else 0, + 'type': output_type + } - if not search and bucket_name is not None: + if not search and name is not None: # List a specific bucket name overrides any other # combination of search/table/bucket name/wid. - params["name"] = bucket_name + params['name'] = name - limit = 1 - offset = 0 + params['limit'] = 1 # Can ONLY be one matching bucket. + params['offset'] = 0 else: # Any other combination of parameters requires a search # through all the buckets in the tenant. search = True - limit = 100 # Max pagesize to retrieve in the fewest REST calls. - offset = 0 - - params["limit"] = limit if limit is not None else 100 - params["offset"] = offset if offset is not None else 0 - - if type_ in ["summary", "full"]: - params["type"] = type_ - else: - params["type"] = "summary" + params['limit'] = 100 # Max pagesize to retrieve in the fewest REST calls. + params['offset'] = 0 while True: - r = self.get(url, params=params, log_tag=operation) + r = self.http_get(url, params=params) if r.status_code != 200: - # We never fail, return whatever we got (if any). - self.logger.debug("Error listing buckets.") - return return_buckets + # This routine never fails, return whatever we got (if any). + break buckets = r.json() - if not search and bucket_name is not None: # Explicit bucket name + if not search and name is not None: # Explicit bucket name # We are not searching, and we have a specific bucket, - # return whatever we got with this call. + # return whatever we got with this call (it will be in + # the necessary dict structure). return buckets - # If we are not searching, simply append this page of results to - # the return object. - - if bucket_name is not None: + if name is not None: # We are searching at this point. # Substring search for matching table names - match_buckets = [bck for bck in buckets["data"] if bucket_name in bck["name"]] - elif wid is not None: - # User is looking for a bucket by ID - match_buckets = [bck for bck in buckets["data"] if wid == bck["id"]] + match_buckets = [bck for bck in buckets["data"] if + name in bck["name"] or name in bck["displayName"]] elif table_name is not None: # Caller is looking for any/all buckets by target table - match_buckets = [bck for bck in buckets["data"] if table_name in bck["targetDataset"]["descriptor"]] + match_buckets = [ + bck for bck in buckets["data"] + if table_name == bck["targetDataset"]["descriptor"] + or (search and table_name.lower() in bck["targetDataset"]["descriptor"].lower()) + ] else: # Grab all the tables in the result - select all buckets. match_buckets = buckets["data"] # Add to the results. return_buckets["data"] += match_buckets - return_buckets["total"] = len(return_buckets["data"]) # If we get back a list of buckets fewer than a full page, we are done # paging the results. @@ -509,13 +786,15 @@ def buckets_list(self, break if search: - # Figure out what to search for on the next page. - params["offset"] += limit + # Move on to the next page... + params["offset"] += params["limit"] else: # The caller asked for a specific limit and offset, exit the loop. break - # We always return a valid JSON. + # We always return a valid count of buckets found. + return_buckets["total"] = len(return_buckets["data"]) + return return_buckets def buckets_create( @@ -525,291 +804,279 @@ def buckets_create( target_id=None, schema=None, operation="TruncateAndInsert"): - """Create a temporary bucket to upload files. + """Create a Prism bucket to upload files. + + Notes + ----- + A table name (without a table id) retrieves the table id. + + Default operation is TruncateAndInsert, valid operations include + “Insert”, “Update”, “Upsert” and “Delete” + + For Update/Upsert/Delete operations, one field in the table must have the + ‘useAsOperationKey’ attribute set to True. Only fields marked as ExternalID + or WPA_RowID or WPA_LoadId on Table schema can be used as operation keys + during loads into the table. Parameters ---------- - schema : dict - A dictionary containing the schema for your table. - + name : str + Name of the bucket to create, default to a new generated name. target_id : str - The ID of the table that this bucket is to be associated with. - + The ID of the table for this bucket. target_name : str - The name of the table that this bucket is to be associated with. - + The name of the table for bucket. + schema : dict + A dictionary containing the schema for your table. operation : str - Required, defaults to "TruncateandInsert" operation - Additional Operations - “Insert”, “Update”, “Upsert”, “Delete” - When you use Update/Upsert/Delete operation you must specify which field to use - as the matching key by setting the ‘useAsOperationKey’ attribute on that field as True. - Only fields marked as ExternalID or WPA_RowID or WPA_LoadId on Table schema can be used - as operation keys during loads into the table. + Required, defaults to "TruncateAndInsert" operation Returns ------- - If the request is successful, a dictionary containing information about - the new bucket is returned. - - - https://confluence.workday.com/display/PRISM/Public+API+V2+Endpoints+for+WBuckets - :param name: + dict + Information about the new bucket, or None if there was a problem. """ # If the caller didn't give us a name to use for the bucket, # create a default name. if name is None: - bucket_name = self.buckets_gen_name() + bucket_name = buckets_gen_name() else: bucket_name = name - # A target table must be identified by ID or name. - if target_id is None and target_name is None: - self.logger.error("A table id or table name is required to create a bucket.") - return None + table_schema = None + bucket_schema = None + + if schema is not None: + if isinstance(schema, dict): + table_schema = schema + elif isinstance(schema, str): + try: + with open(schema) as schema_file: + table_schema = json.load(schema_file) + except Exception as e: + logger.error(e) + return None + else: + logger.error('invalid schema expecting dict or file name.') + return None - # The caller gave us a table wid, but didn't include a schema. Make a copy - # of the target table's schema. Note: WID takes precedence over name. - # Use type_=full to get back the schema definition. + # Resolve the target table; if specified. + if target_id is None and target_name is None: + if table_schema is None: + logger.error("schema, target id or target name is required to create a bucket.") + return None - if target_id is not None: - tables = self.tables_list(wid=target_id, type_="full") + if 'id' not in table_schema or 'fields' not in table_schema: + logger.error('schema missing "id" or "fields" attribute.') + return None else: - tables = self.tables_list(name=target_name, type_="full") + if target_id is not None: # Always use ID if provided. + table = self.tables_get(id=target_id, type_="full") # Full=include fields object - if tables["total"] == 0: - self.logger.error(f"Table not found for bucket operation.") - return None + if table is None: + logger.error(f'table ID {target_id} not found.') + return None + else: + tables = self.tables_get(name=target_name, type_="full") - table_id = tables["data"][0]["id"] + if tables["total"] == 0: + logger.error(f"table not found for bucket operation.") + return None - if schema is None: - schema = self.table_to_bucket_schema(tables["data"][0]) + table = tables['data'][0] - self.logger.debug(f"POST: /buckets") - url = self.prism_endpoint + "/buckets" + if table_schema is None: + table_schema = table + else: + # Override the definition of the table in the schema. + table_schema['id'] = table['id'] - headers = { - "Authorization": "Bearer " + self.get_bearer_token(), - "Content-Type": "application/json", - } + # We have the table and the user didn't include a schema. Make a copy + # of the target table's schema. + if not schema_fixup(table_schema): + logger.error('Invalid schema for bucket operation.') + return None + + bucket_schema = table_to_bucket_schema(table_schema) + + logger.debug(f"post: /buckets") + url = self.prism_endpoint + "/buckets" data = { "name": bucket_name, "operation": {"id": "Operation_Type=" + operation}, - "targetDataset": {"id": table_id}, - "schema": schema, + "targetDataset": {"id": table_schema["id"]}, + "schema": bucket_schema, } - r = requests.post(url, headers=headers, data=json.dumps(data)) + response = self.http_post(url, headers=self.CONTENT_APP_JSON, data=json.dumps(data)) - if r.status_code == 201: - self.logger.info("successfully created a new wBucket") - return r.json() - elif r.status_code == 400: - self.logger.error(r.json()["errors"][0]["error"]) - self.logger.error(r.text) - else: - self.logger.error(f"HTTP status code {r.status_code}: {r.content}") + if response.status_code == 201: + logger.info("successfully created a new wBucket") + return response.json() return None - def buckets_complete(self, bucketid): - operation = f"/buckets/{bucketid}/complete" - self.logger.debug(f"POST: {operation}") - url = self.prism_endpoint + operation + def buckets_complete(self, id): + """ + Commit the data contained in the bucket to the associated table. - headers = { - "Authorization": "Bearer " + self.get_bearer_token() - } + Parameters + ---------- + id : str + The ID of an existing bucket with a "New" status. + + Returns + ------- + dict + Information about the completed bucket, or None if there was a problem. + """ + operation = f'/buckets/{id}/complete' + logger.debug(f'post: {operation}') + url = self.prism_endpoint + operation - r = requests.post(url, headers=headers) + r = self.http_post(url) if r.status_code == 201: - self.logger.info(f"Successfully completed wBucket {bucketid}") + logger.info(f'successfully completed wBucket {id}.') return r.json() - if r.status_code == 400: - self.logger.error(r.json()["errors"][0]["error"]) - else: - self.logger.error(f"HTTP status code {r.status_code}: {r.content}") - return None - def table_to_bucket_schema(self, table): - """Convert schema derived from list table to a bucket schema. + def buckets_files(self, id, file=None): + """Upload a file to a given bucket. + + Notes + ----- + The file may be a single file or a list of files having + and extension of .CSV or .CSV.GZ (lowercase). + + When a .CSV file is encountered, automatically GZIP before + uploading. Parameters ---------- - describe_schema: dict - A dictionary containing the describe schema for your dataset. + id : str + Upload the file to the bucket identified by ID. + + file : str | list(str) + The file(s) to upload to the bucket. Each file must conform + to the file size limits. Returns ------- - If the request is successful, a dictionary containing the bucket schema is returned. - The results can then be passed to the create_bucket function - + Upload information or None if there was a problem. When uploading + multiple files, an array of upload information with information for + each file. """ + operation = f"/buckets/{id}/files" + logger.debug("post: {operation}") + url = self.prism_endpoint + operation - # describe_schema is a python dict object and needs to be accessed as such, 'data' is the top level object, - # but this is itself a list (with just one item) so needs the list index, in this case 0. 'fields' is found - # in the dict that is in ['data'][0] - - if table is None or "fields" not in table: - self.logger.error("Invalid table passed to table_to_bucket_schema.") - return None + results = {'total': 0, 'data': []} # Always return a valid list - regardless of files - fields = table["fields"] + if file is None: + # It is legal to upload an empty file - see the table truncate command. + target_files = [None] # Provide one empty file to iterate over. + else: + target_files = resolve_file_list(file) - # Get rid of the WPA_ fields... - fields[:] = [x for x in fields if "WPA" not in x["name"]] + for target_file in target_files: + if target_file is None: + new_file = {"file": ("empty", io.BytesIO())} + elif target_file.lower().endswith(".csv.gz"): + new_file = {"file": open(target_file, "rb")} + elif target_file.lower().endswith(".csv"): + upload_filename = os.path.basename(target_file) + upload_filename += ".gz" - # Create and assign useAsOperationKey field with true/false values based on externalId value - operation_key_false = {"useAsOperationKey": False} - operation_key_true = {"useAsOperationKey": True} + # Buckets can only load gzip files - do it. + with open(target_file, "rb") as in_file: + new_file = {"file": (upload_filename, gzip.compress(in_file.read()))} - for i in fields: - if i["externalId"] is True: - i.update(operation_key_true) - else: - i.update(operation_key_false) - - # Now trim our fields data to keep just what we need - for i in fields: - del i["id"] - del i["displayName"] - del i["fieldId"] - del i["required"] - del i["externalId"] - - # The "header" for the load schema - bucket_schema = { - "parseOptions": { - "fieldsDelimitedBy": ",", - "fieldsEnclosedBy": '"', - "headerLinesToIgnore": 1, - "charset": {"id": "Encoding=UTF-8"}, - "type": {"id": "Schema_File_Type=Delimited"}, - } - } + response = self.http_post(url, files=new_file) - # The footer for the load schema - schema_version = {"id": "Schema_Version=1.0"} + if response.status_code == 201: + logger.debug(f"successfully uploaded {target_file} to the bucket") - bucket_schema["fields"] = fields - bucket_schema["schemaVersion"] = schema_version + results['data'].append(response.json()) # Add this file's info to the return list - return bucket_schema + results['total'] = len(results['data']) + return results - def buckets_upload(self, bucket_id, file=None): - """Upload a file to a given bucket. + def buckets_errorFile(self, id): + """Get a list of all rows that failed to load into the table Parameters ---------- - bucket_id : str - The ID of the bucket that the file should be added to. - - file : str - The path to your file to upload to the bucket. The file must be - gzip compressed delimited and the file must conform to the file - size limits. + id : str + A reference to a Prism Analytics bucket. Returns ------- - None - + str """ - operation = f"/buckets/{bucket_id}/files" - self.logger.debug("POST: {operation}") - url = self.prism_endpoint + operation - headers = {"Authorization": "Bearer " + self.get_bearer_token()} + if id is None: + logger.error('bucket id is required.') + return None - results = [] + operation = f"/buckets/{id}/errorFile" + logger.debug("post: {operation}") + url = self.prism_endpoint + operation - # Convert a single filename to a list. - if isinstance(file, list): - files = file - else: - files = [file] # Convert to list... + response = self.http_get(url) - for f in files: - # It is legal to upload an empty file - see the table truncate method. - if f is None: - new_file = {"file": ("dummy", io.BytesIO())} - elif f.lower().endswith(".csv.gz"): - new_file = {"file": open(f, "rb")} - elif f.lower().endswith(".csv"): - with open(f, "rb") as in_file: - new_file = {"file": (f + ".gz", gzip.compress(in_file.read()))} - - r = requests.post(url, headers=headers, files=new_file) - - if r.status_code == 201: - self.logger.info(f"Successfully uploaded {f} to the bucket") - - if isinstance(file, str): - # If we got a single file, return the first result. - return r.json() - else: - results.append(r.json()) - else: - self.logger.error(f"HTTP status code {r.status_code}: {r.content}") + if response.status_code == 200: + return response.text - return results + return None - def dataChanges_list(self, - name=None, - wid=None, - activity_id=None, - limit=-1, offset=None, - type_="summary", - search=False, - refresh=False): + def dataChanges_get(self, + name=None, id=None, + limit=None, offset=None, + type_='summary', search=False, + refresh=False): + """ + + :param name: + :param wid: + :param activity_id: + :param limit: + :param offset: + :param type_: + :param search: + :param refresh: + :return: + """ # We are doing a dataChanges GET operation. operation = "/dataChanges" - # If an ID is provided, add it to the URL as part of the path. - if wid is not None and isinstance(wid, str) and len(wid) > 0: - operation += f"/{wid}" - search_by_id = True - else: - search_by_id = False - - # We know what kind of list we want, add in the ability to - # search by name and pages. - if type_ and isinstance(type_, str): - if type_ == "summary": - operation += "?type=summary" - elif type_ == "full": - operation += "?type=full" - else: - operation += "?type=summary" - logger.warning(f'/dataChanges: invalid verbosity {type_} - defaulting to summary.') - else: - operation += "?type=summary" - logger.warning("/dataChanges: invalid verbosity - defaulting to summary.") - - logger.debug(f"dataChanges_activities_get: {operation}") - - # Start building the full URL for the call - url = self.prism_endpoint + operation + # Make sure output type is valid. + output_type = type_.lower() if type_.lower() in ['summary', 'full'] else 'summary' # Searching by ID is a special case that eliminates all other types # of search. Ask for the datachange by id and return just this # result - even blank. + if id is not None and isinstance(id, str) and len(id) > 0: + operation = f"{operation}/{id}?type={output_type}" + logger.debug(f'dataChanges_get: {operation}') + url = self.prism_endpoint + operation - if search_by_id: - response = self.get(url=url, log_tag="dataChanges") + response = self.http_get(url) if response.status_code == 200: return response.json() - else: - return None + + return None + + logger.debug(f"dataChanges_get: {operation}") + url = self.prism_endpoint + operation # Get a list of tasks by page, with or without searching. - search_limit = 500 # Assume all DCTs should be returned + search_limit = 500 # Assume all DCTs should be returned - max API limit search_offset = 0 # API default value if limit is not None and isinstance(limit, int) and limit > 0: @@ -823,271 +1090,314 @@ def dataChanges_list(self, if name is not None and isinstance(name, str) and len(name) > 0: if search is not None and isinstance(search, bool) and search: # Force a return of ALL data change tasks, so we can search the names. + name_param = "" searching = True search_limit = 500 search_offset = 0 else: - # Should return at most 1 result. - url += "&name=" + urllib.parse.quote(name) - + # With an explicit name, we should return at most 1 result. + name_param = "&name=" + urlparse.quote(name) searching = False + search_limit = 1 search_offset = 0 # Assume we will be looping based on limit and offset values; however, we may - # execute only once. + # execute only once. NOTE: this routine NEVER fails, but may return zero + # data change tasks. - dataChanges = {"total": 0, "data": []} + data_changes = {"total": 0, "data": []} while True: - search_url = f"{url}&limit={search_limit}&offset={search_offset}" + search_url = f"{url}?type={output_type}&limit={search_limit}&offset={search_offset}{name_param}" logger.debug(f"dataChangesID url: {search_url}") - response = self.get(url=search_url, log_tag=operation) + response = self.http_get(url=search_url) if response.status_code != 200: break - retJSON = response.json() + return_json = response.json() if searching: # Only add matching rows - dataChanges["data"] += \ + data_changes["data"] += \ filter(lambda dtc: dtc["name"].find(name) != -1 or dtc["displayName"].find(name) != -1, - retJSON["data"]) + return_json["data"]) else: # Without searching, simply paste the current page to the list. - dataChanges["data"] += retJSON["data"] + data_changes["data"] += return_json["data"] break - # If we didn't get a full page, then we done. - if len(retJSON["data"]) < search_limit: + # If we didn't get a full page, then we are done. + if len(return_json["data"]) < search_limit: break # Go to the next page. offset += search_limit - dataChanges["total"] = len(dataChanges["data"]) + data_changes["total"] = len(data_changes["data"]) + + return data_changes - return dataChanges + def dataChanges_activities_get(self, id, activityID): + """Returns details of the activity specified by activityID. - def dataChanges_activities_get(self, data_change_id, activity_id): - operation = f"/dataChanges/{data_change_id}/activities/{activity_id}" + Parameters + ---------- + id : str + A reference to a Prism Analytics data change.""" + operation = f"/dataChanges/{id}/activities/{activityID}" logger.debug(f"dataChanges_activities_get: {operation}") + url = self.prism_endpoint + operation - r = self.get(self.prism_endpoint + operation) + r = self.http_get(url) if r.status_code == 200: - return json.loads(r.text) + return r.json() return None - def dataChanges_activities_post(self, data_change_id, fileContainerID=None): - operation = f"/dataChanges/{data_change_id}/activities" - logger.debug(f"dataChanges_activities_post: {operation}") + def dataChanges_activities_post(self, id, fileContainerID=None): + """Execute a data change task. - url = self.prism_endpoint + operation + Parameters + ---------- + id : str + A reference to a Prism Analytics data change. + fileContainerID : str + A reference to a Prism Analytics File Container. - headers = { - "Authorization": "Bearer " + self.get_bearer_token(), - "Content-Type": "application/json", - } + Returns + ------- + """ + operation = f"/dataChanges/{id}/activities" + logger.debug(f"post: {operation}") + url = self.prism_endpoint + operation if fileContainerID is None: logger.debug("no file container ID") - data = None else: - logger.debug("with file container ID: {fileContainerID") + logger.debug('with file container ID: {fileContainerID}') + # NOTE: the name is NOT correct based on the API definition data = json.dumps({"fileContainerWid": fileContainerID}) - r = requests.post(url, data=data, headers=headers) - log_elapsed(f"POST {operation}", r.elapsed) + r = self.http_post(url, headers=self.CONTENT_APP_JSON, data=data) if r.status_code == 201: - activityID = json.loads(r.text)["id"] + return_json = r.json() + activity_id = return_json["id"] - logging.debug(f"Successfully started data load task - id: {activityID}") - return activityID + logger.debug(f"successfully started data load task - id: {activity_id}") + return return_json elif r.status_code == 400: - logging.warning(r.json()["errors"][0]["error"]) - else: - logging.warning(f"HTTP status code {r.status_code}: {r.content}") - - return None - - def dataChanges_by_name(self, data_change_name): - logger.debug(f"data_changes_by_name: {data_change_name}") - - data_changes_list = self.dataChanges_list() - - for data_change in data_changes_list: - if data_change.get("displayName") == data_change_name: - # We found the DCT by name, lookup all the details. - data_change_id = data_change.get("id") - logger.debug(f"found {data_change_name}: {data_change_id}") - - return self.dataChanges_by_id(data_change_id) - - logger.debug(f"{data_change_name} was not found!") + logger.debug(f'error running data change task.') + return r.json() return None - def dataChanges_by_id(self, data_change_id): - operation = f"/dataChanges/{data_change_id}" - logger.debug(f"dataChanges_by_id: {operation}") + def dataChanges_is_valid(self, id): + """Utility method to return the validation status of a data change task. - url = self.prism_endpoint + f"/dataChanges/{data_change_id}" - - headers = {"Authorization": "Bearer " + self.get_bearer_token()} - - r = requests.get(url, headers=headers) - log_elapsed(logger, operation, r.elapsed) - r.raise_for_status() + Parameters + ---------- + id : str + A reference to a Prism Analytics data change. - if r.status_code == 200: - logger.debug(f"Found data change task: id = {data_change_id}") + Returns + ------- + bool + True if data change task is valid or False if the task does not + exist or is not valid. + """ + dct = self.dataChanges_validate(id) - return json.loads(r.text) - elif r.status_code == 400: - logger.warning(r.json()["errors"][0]["error"]) - else: - logger.warning(f"HTTP status code {r.status_code}: {r.content}") + if dct is None: + logger.error(f"data_change_id {id} not found!") + return False - return json.loads(r.text) + if "error" in dct: + logger.critical(f"data_change_id {id} is not valid!") + return False - def dataChanges_is_valid(self, data_change_id): - dtc = self.dataChanges_validate(data_change_id) + return True - if dtc is None: - logger.critical(f"data_change_id {data_change_id} not found!") + def dataChanges_validate(self, id): + """validates the data change specified by dataChangeID - return False + Parameters + ---------- + id : str + The data change task ID to validate. - if "error" in dtc: - logger.critical(f"data_change_id {data_change_id} is not valid!") + Returns + ------- + """ + operation = f"/dataChanges/{id}/validate" + logger.debug(f"dataChanges_validate: get {operation}") + url = self.prism_endpoint + operation - return False + r = self.http_get(url) - return True + if r.status_code in [ 200, 400, 404]: + return r.json() - def dataChanges_validate(self, data_change_id): - operation = f"/dataChanges/{data_change_id}/validate" - logger.debug(f"dataChanges_validate: GET {operation}") + return None + def dataExport_get(self, limit=None, offset=None, type_=None): + operation = '/dataExport' + logger.debug(f"dataExport_get: get {operation}") url = self.prism_endpoint + operation - r = self.get(url) + r = self.http_get(url) - # If the DCT is invalid, the response will have the errors - # so we return the JSON no matter what. + if r.status_code == 200: + return r.json() - return json.loads(r.text) + return None def fileContainers_create(self): - operation = "/fileContainers" - logger.debug(f"fileContainer_create: POST {operation}") + """Create a new file container. + Returns + ------- + Dict object with an "id" attribute or None if there was a problem. + """ + operation = "/fileContainers" + logger.debug(f"fileContainer_create: post {operation}") url = self.prism_endpoint + operation - headers = {"Authorization": "Bearer " + self.get_bearer_token()} - - r = requests.post(url, headers=headers) - log_elapsed(f"POST {operation}", r.elapsed) + r = self.http_post(url) if r.status_code == 201: return_json = r.json() - fileContainerID = return_json["id"] - logger.debug(f"successfully created file container: {fileContainerID}") + file_container_id = return_json["id"] + logger.debug(f"successfully created file container: {file_container_id}") return return_json - elif r.status_code == 400: - logging.warning(r.json()["errors"][0]["error"]) - else: - logging.warning(f"HTTP status code {r.status_code}: {r.content}") return None - def fileContainers_list(self, fileContainerID): - operation = f"/fileContainers/{fileContainerID}/files" - logger.debug(f"fileContainers_list: GET {operation}") + def fileContainers_get(self, id): + """Return all files for a file container. - url = self.prism_endpoint + operation + Parameters + ---------- + id : str + File container ID to list. - r = self.get(url) + Returns + ------- + Dictionary of found files having a "total" attribute with the count + of files uploaded and a data attribute with an array of file metadata + for each file in the container. + """ + operation = f"/fileContainers/{id}/files" + logger.debug(f"fileContainers_list: get {operation}") + url = self.prism_endpoint + operation - if r.status_code == 200: - return r.json() + response = self.http_get(url) - return None + if response.status_code == 200: + return_json = response.json() - def fileContainers_load(self, fileContainerID, fqfn): - # Do a sanity check and make sure the fqfn exists and - # has a gzip extension. + return {'total': len(return_json), 'data': return_json} - if not os.path.isfile(fqfn): - logger.critical("file not found: {fqfn}") - return None + if response.status_code == 404: + logger.warning('verify: Self-Service: Prism File Container domain in the Prism Analytics functional area.') - # Create the file container and get the ID. We use the - # file container ID to load the file and then return the - # value to the caller for use in a data change call. + return {"total": 0, 'data' : []} # Always return a list. - if fileContainerID is None: - file_container_response = self.fileContainers_create() + def fileContainers_load(self, id, file): + """ + Load one or more files to a fileContainer. - if file_container_response is None: - return None + Parameters + ---------- + id : str + File container ID of target container. + file : str|list + File name(s) to load into the container - fID = file_container_response["id"] - else: - fID = fileContainerID + Returns + ------- + For a single file, the upload results are returned as a + dict. For multiple files, an array of results is returned. + """ - print(self.fileContainers_list(fID)) + # Create the specified fID - a new ID is created if None. + resolved_fid = id - # We have our container, load the file + target_files = resolve_file_list(file) - headers = { - "Authorization": "Bearer " + self.get_bearer_token() + results = { + 'id': None, + 'total': 0, + 'data': [] } - operation = f"/fileContainers/{fID}/files" - logger.debug(f"fileContainer_load: POST {operation}") + for target_file in target_files: + # It is legal to upload an empty file - see the table truncate method. + if target_file is None: + new_file = {"file": ("dummy", io.BytesIO())} + elif target_file.lower().endswith(".csv.gz"): + new_file = {"file": open(target_file, "rb")} + elif target_file.lower().endswith(".csv"): + upload_filename = os.path.basename(target_file) + upload_filename += ".gz" - files = {"file": open(fqfn, "rb")} + with open(target_file, "rb") as in_file: + new_file = {"file": (upload_filename, gzip.compress(in_file.read()))} - url = self.prism_endpoint + operation + # Create the file container and get the ID. We use the + # file container ID to load the file and then return the + # value to the caller for use in a data change call. - r = requests.post(url, files=files, headers=headers) - log_elapsed(f"POST {operation}", r.elapsed) + if resolved_fid is None: + # The caller is asking us to create a new container. + file_container_response = self.fileContainers_create() - if r.status_code == 201: - logging.info("successfully loaded fileContainer") + if file_container_response is None: + logger.error("Unable to create fileContainer") + return None - print(self.fileContainers_list(fID)) + resolved_fid = file_container_response["id"] - return fID - elif r.status_code == 400: - logging.warning(r.json()["errors"][0]["error"]) - else: - logging.warning(f"HTTP status code {r.status_code}: {r.content}") + results['id'] = resolved_fid - return None + logger.debug(f"resolved fID: {resolved_fid}") - def wql_dataSources(self, wid=None, limit=100, offset=0, dataSources_name=None, search=False): - operation = "/dataSources" + # We have our container, load the file + + operation = f"/fileContainers/{resolved_fid}/files" + logger.debug(f"fileContainer_load: POST {operation}") + url = self.prism_endpoint + operation + + response = self.http_post(url, files=new_file) + + if response.status_code == 201: + logger.debug(f"successfully loaded file: {file}") + results['data'].append(response.json()) + + results['total'] = len(results['data']) + + return results + def wql_dataSources(self, id=None, limit=100, offset=0, name=None, search=False): + operation = "/dataSources" + logger.debug("wql_dataSources: {operation}") url = f"{self.wql_endpoint}{operation}" offset = 0 return_sources = {"total": 0, "data": []} while True: - r = self.get(f"{url}?limit=100&offset={offset}") + r = self.http_get(f"{url}?limit=100&offset={offset}") if r.status_code == 200: ds = r.json() @@ -1108,38 +1418,57 @@ def wql_data(self, query, limit, offset): operation = "/data" url = f"{self.wql_endpoint}{operation}" - query_safe = urllib.parse.quote(query.strip()) + query_safe = urlparse.quote(query.strip()) - query_limit = limit if limit is not None else 100 + if limit is None or not isinstance(limit, int) or limit > 10000: + query_limit = 10000 + offset = 0 + else: + query_limit = limit + + offset = offset if offset is not None and isinstance(offset, int) else 0 - offset = 0 data = {"total": 0, "data": []} while True: - r = self.get(f"{url}?query={query_safe}&limit={query_limit}&offset={offset}") + r = self.http_get(f"{url}?query={query_safe}&limit={query_limit}&offset={offset}") if r.status_code == 200: - ds = r.json() - data["data"] += ds["data"] + page = r.json() + data["data"] += page["data"] else: - logger.error(f"Invalid WQL: {r.status_code}") - logger.error(r.text) - return data # Return whatever we have... - if len(ds["data"]) < 10000: + if len(page["data"]) < query_limit: break - offset += 100 + offset += query_limit + # Set the final row count. data["total"] = len(data["data"]) return data - def raas_run(self, report, system, user, params=None, format_=None): - if system: + def raas_run(self, report, user, params=None, format_='XML'): + """ + Run a Workday system or custom report. + + Parameters + ---------- + report : str + Name of the Workday report to run. + user : str + Username to include on URL + params : list + Array of parameter/value pairs to include on the URL + format_ : str + Output format, i.e., XML, JSON, CSV + """ + if user is None or not isinstance(user, str) or len(user) == 0: + logger.warning("generating delivered report (systemreport2).") url = f"{self.raas_endpoint}/systemreport2/{self.tenant_name}/{report}" else: + logger.debug(f"generating report as {user}.") url = f"{self.raas_endpoint}/customreport2/{self.tenant_name}/{user}/{report}" separator = "?" @@ -1151,37 +1480,60 @@ def raas_run(self, report, system, user, params=None, format_=None): separator = "&" url += query_str - if format: - if "?" in url: - url = f"{url}&format={format_}" - else: - url = f"{url}?format={format_}" - if url is None: - raise ValueError("RaaS URL is required") - else: - if url.find("format=") == -1: - output_format = "xml" - else: - output_format = url.split("format=")[1] + if format_: + url = f"{url}{separator}format={format_}" - headers = {"Accept": "text/csv"} - r = self.get(url, headers=headers) + r = self.http_get(url) if r.status_code == 200: - # if output_format == "json": - # return r.json()["Report_Entry"] - # elif output_format == "csv": - # return list(csv.reader(io.StringIO(r.content.decode("utf8")))) - # else: - # raise ValueError(f"Output format type {output_format} is unknown") return r.text - else: - logging.error("HTTP Error: {}".format(r.content.decode("utf-8"))) + logging.error("HTTP Error: {}".format(r.content.decode("utf-8"))) return None - def is_valid_operation(self, operation): - operation_list = ["insert", "truncateandinsert", "delete", "upsert", "update"] - return operation in operation_list +def resolve_file_list(files): + """Evaluate file name(s)s and return the list of supported files. + + Parameters + ---------- + files : str|list + One (str) or more (list) file names. + + Returns + ------- + list + List of files that can be uploaded. + """ + # At a minimum, an empty list will always be returned. + target_files = [] + + if files is None: + logger.warning("File(s) must be specified.") + return target_files + elif isinstance(files, list) and len(files) == 0: + logger.warning("File(s) must be specified.") + return target_files + elif isinstance(files, tuple) and len(files) == 0: + logger.warning("File(s) must be specified.") + return target_files + elif isinstance(files, str): + if not files: + logger.warning("File(s) must be specified.") + return target_files + else: + files = [files] + + # Check the extension of each file in the list. + for f in files: + if not os.path.exists(f): + logger.warning(f"File {f} not found - skipping.") + continue + + if f.lower().endswith(".csv") or f.lower().endswith(".csv.gz"): + target_files.append(f) + else: + logger.warning(f"File {f} is not a .csv.gz or .csv file - skipping.") + + return target_files From 74dc4b34fce9463490664349d6380576fb2064c4 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Wed, 18 Oct 2023 14:31:58 -0400 Subject: [PATCH 018/103] First release of Prism V3 --- .gitignore | 1 + docs/Makefile | 20 + docs/make.bat | 35 + docs/source/conf.py | 36 + docs/source/index.rst | 23 + docs/source/modules.rst | 7 + docs/source/prism.rst | 29 + prism/__init__.py | 4 +- prism/cli.py | 81 ++- prism/commands/buckets_commands.py | 7 +- prism/commands/dataChanges_commands.py | 26 +- prism/commands/fileContainers_commands.py | 46 +- prism/commands/raas_commands.py | 28 +- prism/commands/tables_commands.py | 400 ++++++++--- prism/commands/util.py | 210 +----- prism/commands/wql_commands.py | 34 +- prism/prism.py | 786 +++++++++++----------- 17 files changed, 962 insertions(+), 811 deletions(-) create mode 100644 docs/Makefile create mode 100644 docs/make.bat create mode 100644 docs/source/conf.py create mode 100644 docs/source/index.rst create mode 100644 docs/source/modules.rst create mode 100644 docs/source/prism.rst diff --git a/.gitignore b/.gitignore index 8cdd94d..44aaf04 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ __pycache__/ # Distribution / packaging .Python env/ +venv/ build/ develop-eggs/ dist/ diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..747ffb7 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 0000000..8893e9a --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,36 @@ +import os +import sys +sys.path.insert(0, os.path.abspath('..')) + +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = 'Prism-Python' +copyright = '2023, Mark Greynolds' +author = 'Mark Greynolds' +release = '0.1' + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = ['sphinx_click'] + +templates_path = ['_templates'] +exclude_patterns = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', + 'sphinx_click' + ] + + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = 'alabaster' +html_static_path = ['_static'] diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 0000000..a0bf9f9 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,23 @@ +.. Prism-Python documentation master file, created by + sphinx-quickstart on Fri Oct 13 08:31:20 2023. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to Prism-Python's documentation! +======================================== + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + +.. click:: prism:cli + :prog: prism-python + :nested: full + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/source/modules.rst b/docs/source/modules.rst new file mode 100644 index 0000000..032735b --- /dev/null +++ b/docs/source/modules.rst @@ -0,0 +1,7 @@ +prism +===== + +.. toctree:: + :maxdepth: 4 + + prism diff --git a/docs/source/prism.rst b/docs/source/prism.rst new file mode 100644 index 0000000..6abdf80 --- /dev/null +++ b/docs/source/prism.rst @@ -0,0 +1,29 @@ +prism package +============= + +Submodules +---------- + +prism.cli module +---------------- + +.. automodule:: prism.cli + :members: + :undoc-members: + :show-inheritance: + +prism.prism module +------------------ + +.. automodule:: prism.prism + :members: + :undoc-members: + :show-inheritance: + +Module contents +--------------- + +.. automodule:: prism + :members: + :undoc-members: + :show-inheritance: diff --git a/prism/__init__.py b/prism/__init__.py index 41c5de7..939eef2 100644 --- a/prism/__init__.py +++ b/prism/__init__.py @@ -1,8 +1,8 @@ -from prism.prism import Prism, load_schema, create_table, upload_file +from prism.prism import Prism, set_logging from ._version import get_versions __version__ = get_versions()["version"] del get_versions -__all__ = ["load_schema", "Prism", "create_table", "upload_file"] +__all__ = ["Prism","set_logging"] diff --git a/prism/cli.py b/prism/cli.py index 023614b..c254798 100644 --- a/prism/cli.py +++ b/prism/cli.py @@ -6,12 +6,12 @@ import prism -from commands import tables_commands -from commands import buckets_commands -from commands import dataChanges_commands -from commands import fileContainers_commands -from commands import wql_commands -from commands import raas_commands +import commands.tables_commands as t_commands +import commands.buckets_commands as b_commands +import commands.dataChanges_commands as d_commands +import commands.fileContainers_commands as f_commands +import commands.wql_commands as w_commands +import commands.raas_commands as r_commands def param_fixup(value, config, config_name, option): @@ -102,10 +102,6 @@ def cli(ctx, # If the configuration is not available or is invalid, exit sys.exit(1) - if log_file is None: - # Assume a file in the PWD of the process, i.e., local directory where invoked. - log_file = "prism.log" - if log_level is None: set_level = logging.INFO else: @@ -116,28 +112,31 @@ def cli(ctx, logger.setLevel(set_level) # Create a handler as specified by the user (or defaults) - fh = logging.FileHandler(log_file) - fh.setLevel(set_level) + + if log_file is not None: + fh = logging.FileHandler(log_file) + + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + fh.setFormatter(formatter) + + fh.setLevel(set_level) + logger.addHandler(fh) # Create an explicit console handler with a higher log level ch = logging.StreamHandler() - ch.setLevel(logging.ERROR) - # create formatter and add it to the handlers - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + formatter = logging.Formatter('%(message)s') ch.setFormatter(formatter) - fh.setFormatter(formatter) - # add the handlers to logger + ch.setLevel(logging.INFO) logger.addHandler(ch) - logger.addHandler(fh) logger.debug("completed initialization.") # initialize the Prism class from our resolved configuration. p = prism.Prism(base_url, tenant_name, client_id, client_secret, refresh_token) - p.set_logging(log_file, log_level) + prism.set_logging(log_file, log_level) # store the prism object in the Click context ctx.obj = {"p": p} @@ -161,11 +160,11 @@ def tables(): """ -tables.add_command(tables_commands.tables_list) -tables.add_command(tables_commands.tables_create) -tables.add_command(tables_commands.tables_update) -tables.add_command(tables_commands.tables_upload) -tables.add_command(tables_commands.tables_truncate) +tables.add_command(t_commands.tables_list) +tables.add_command(t_commands.tables_create) +tables.add_command(t_commands.tables_update) +tables.add_command(t_commands.tables_upload) +tables.add_command(t_commands.tables_truncate) @cli.group("buckets") @@ -175,12 +174,12 @@ def buckets(): """ -buckets.add_command(buckets_commands.buckets_list) -buckets.add_command(buckets_commands.buckets_create) -buckets.add_command(buckets_commands.buckets_complete) -buckets.add_command(buckets_commands.buckets_status) -buckets.add_command(buckets_commands.buckets_upload) -buckets.add_command(buckets_commands.buckets_name) +buckets.add_command(b_commands.buckets_list) +buckets.add_command(b_commands.buckets_create) +buckets.add_command(b_commands.buckets_complete) +buckets.add_command(b_commands.buckets_status) +buckets.add_command(b_commands.buckets_upload) +buckets.add_command(b_commands.buckets_name) @cli.group("dataChanges") @@ -190,10 +189,10 @@ def dataChanges(): """ -dataChanges.add_command(dataChanges_commands.dataChanges_list) -dataChanges.add_command(dataChanges_commands.dataChanges_validate) -dataChanges.add_command(dataChanges_commands.dataChanges_run) -dataChanges.add_command(dataChanges_commands.dataChanges_activities) +dataChanges.add_command(d_commands.dataChanges_list) +dataChanges.add_command(d_commands.dataChanges_validate) +dataChanges.add_command(d_commands.dataChanges_run) +dataChanges.add_command(d_commands.dataChanges_activities) @cli.group("fileContainers") @@ -203,20 +202,20 @@ def fileContainers(): """ -fileContainers.add_command(fileContainers_commands.fileContainers_create) -fileContainers.add_command(fileContainers_commands.filecontainers_list) -fileContainers.add_command(fileContainers_commands.filecontainers_load) +fileContainers.add_command(f_commands.fileContainers_create) +fileContainers.add_command(f_commands.filecontainers_list) +fileContainers.add_command(f_commands.filecontainers_load) @cli.group("wql") def wql(): """ - Operations to list (dataSources) and query WQL sources (data). + Operations to list (/dataSources) and run WQL queries (/data). """ -wql.add_command(wql_commands.dataSources) -wql.add_command(wql_commands.data) +wql.add_command(w_commands.dataSources) +wql.add_command(w_commands.data) @cli.group("raas") @@ -226,7 +225,7 @@ def raas(): """ -raas.add_command(raas_commands.run) +raas.add_command(r_commands.run) if __name__ == "__main__": diff --git a/prism/commands/buckets_commands.py b/prism/commands/buckets_commands.py index 53c9464..8d05c75 100644 --- a/prism/commands/buckets_commands.py +++ b/prism/commands/buckets_commands.py @@ -81,9 +81,10 @@ def buckets_create(ctx, table_name, table_wid, file_, operation, bucket_name): click.echo("A table must be associated with this bucket (-n, -w, or -f must be specified).") sys.exit(1) - bucket = p.buckets_create(bucket_name, table_wid, table_name, file_, operation) + bucket = p.buckets_create(name=bucket_name, target_wid=table_wid, target_name=table_name, schema=file_, operation=operation) - click.echo(bucket) + if bucket is not None: + click.echo(json.dumps(bucket,indent=2)) @click.command("upload") @@ -192,7 +193,7 @@ def buckets_status(ctx, name, wid): buckets=p.buckets_list(wid, bucket_name=name) - if buckets["total"] != 0: + if buckets["total"] == 1: click.echo(buckets["data"][0]["state"]["descriptor"]) diff --git a/prism/commands/dataChanges_commands.py b/prism/commands/dataChanges_commands.py index 80cae65..7f7a3ab 100644 --- a/prism/commands/dataChanges_commands.py +++ b/prism/commands/dataChanges_commands.py @@ -31,10 +31,13 @@ def dataChanges_list(ctx, name, wid, activity_wid, limit, offset, type_, format_ data_changes = p.dataChanges_list(name, wid, activity_wid, limit, offset, type_, search) + # Regardless of success, the list operation always returns + # a valid object. Error messages will appear in the log. if data_changes["total"] == 0: click.echo("No data change tasks found.") return + # For display purposes, sort by display name (case-insensitive) data_changes["data"] = sorted(data_changes["data"], key=lambda dct: dct["displayName"].lower()) # Handle output @@ -56,19 +59,26 @@ def dataChanges_list(ctx, name, wid, activity_wid, limit, offset, type_, format_ click.echo(json.dumps(data_changes["data"], indent=2)) -@click.command("validate", help="Validate the data change specified by name or ID.") +@click.command("validate") @click.option("-w", "--wid", help="The dataChangeID to list.") @click.option("-s", "--search", is_flag=True, help="Use contains search substring for --name or --id (default=false).") @click.argument("name", required=False) @click.pass_context def dataChanges_validate(ctx, name, wid, search): + """ + Validate the data change specified by name or ID. + + [NAME] The API name of the data change task to validate + """ + p = ctx.obj["p"] if name is None and wid is None: - click.echo("A data change task name or a wid must be specified.") + click.echo("A data change task name or wid must be specified.") sys.exit(1) # See if we have any matching data change tasks. + # Note: datachanges_list never fails - errors may appear in the log data_changes = p.dataChanges_list( name=name, wid=wid, @@ -77,11 +87,10 @@ def dataChanges_validate(ctx, name, wid, search): if data_changes["total"] == 0: click.echo("No matching data change task(s) found.") - sys.exit(1) - - for dct in data_changes["data"]: - validate = p.dataChanges_validate(dct["id"]) - click.echo(validate) + else: + for dct in data_changes["data"]: + validate = p.dataChanges_validate(dct["id"]) + click.echo(validate) @click.command("run") @@ -113,9 +122,11 @@ def dataChanges_run(ctx, name, filecontainerid): click.echo("Invalid DCT: " + validate["errors"][0]["error"] + " - code: " + validate["errors"][0]["code"]) sys.exit(1) else: + # It is valid to run a data change task without a fileContainerID value. activity_id = p.dataChanges_activities_post(dct_id, filecontainerid) if activity_id is None: + click.echo("Failed to run data change task - please review the log.") sys.exit(1) else: click.echo(activity_id) @@ -149,6 +160,7 @@ def dataChanges_activities(ctx, status, name, activity_id): current_status = p.dataChanges_activities_get(dct_id, activity_id) if current_status is None: + click.echo("Activity for DCT not found.") sys.exit(1) else: if status: diff --git a/prism/commands/fileContainers_commands.py b/prism/commands/fileContainers_commands.py index 2bad422..e0955cf 100644 --- a/prism/commands/fileContainers_commands.py +++ b/prism/commands/fileContainers_commands.py @@ -1,5 +1,6 @@ import click import sys +import json from . import util as u @@ -7,47 +8,64 @@ @click.command("create") @click.pass_context def fileContainers_create(ctx): - """ - Create a new fileContainers object returning the ID. - """ + """Create a new fileContainers object returning the ID.""" p = ctx.obj["p"] - fileContainer = p.fileContainers_create() + file_container = p.fileContainers_create() - if fileContainer is not None: - click.echo(fileContainer["id"]) + if file_container is not None: + click.echo(file_container["id"]) else: sys.exit(1) -@click.command("list", help="List the files for a file container.") +@click.command("list") @click.argument("fileContainerID") @click.pass_context def filecontainers_list(ctx, filecontainerid): + """ + List the files in a file container. + + [fileContainerID] Container ID to list loaded files. + """ + p = ctx.obj["p"] - files = p.filecontainers_list(filecontainerid) + files = p.fileContainers_list(filecontainerid) - click.echo(files) + click.echo(json.dumps(files,indent=2)) @click.command("load") -@click.option("-f", "--fileContainerID", default=None, help="Target File container ID, default to a new container.") +@click.option("-f", "--fileContainerID", default=None, + help="Target File container ID - defaults to a new container.") @click.argument("file", nargs=-1, type=click.Path(exists=True)) @click.pass_context def filecontainers_load(ctx, filecontainerid, file): """ - Load one or more file into a file container. + Load one or more files into a file container returning the container ID. - [FILE] one or more files to load. + [FILE] one or more CSV or GZipped CSV files to load. """ + + if len(file) == 0: + click.echo("One or more files must be specified.") + p = ctx.obj["p"] - fid = u.fileContainers_load(p, filecontainerid, file) + # Load the file and retrieve the fID - a new fID is + # created if the command line fID is not specified. + # Subsequent files are loaded into the same container (fID). + fid = p.fileContainers_load(filecontainerid, u.get_files(file)) + + # If the fID comes back blank, then something is not + # working. Note: any error messages have already + # been logged by the load operation. if fid is None: click.echo("Error loading fileContainer.") + sys.exit(1) else: # Return the file container ID to the command line. If a - # filecontainerID was passed, simply return that id. + # fileContainerID was passed, simply return that id. click.echo(fid) diff --git a/prism/commands/raas_commands.py b/prism/commands/raas_commands.py index 845f115..bb9cac1 100644 --- a/prism/commands/raas_commands.py +++ b/prism/commands/raas_commands.py @@ -1,23 +1,23 @@ import click +import sys +import logging - -@click.command("run", help="Run RaaS report as system or as a specific user.") -@click.option("-s", "--system", is_flag=True, default=False, help="Run delivered Workday report.") -@click.option("-u", "--user", default=None, help="Run custom report as named user.") +@click.command("run", help="Run a system or custom RaaS report.") +@click.option("-u", "--user", default=None, help="Run custom report as named user - default to delivered reports.") @click.option("-f", "--format", "format_", default=None, help="Output query results as CSV.") -@click.argument("report", nargs=1) -@click.argument('params', nargs=-1) +@click.argument("report", nargs=1, required=True) +@click.argument('params', nargs=-1, required=False) @click.pass_context def run(ctx, system, user, format_, report, params): - p = ctx.obj["p"] + """ + Run a Workday report. - if system and user is not None: - click.echo("Please specify only system or user, not both.") - return - - if not system and user is None: - click.echo("Please specify either system or user.") + [REPORT] Report name to run. + [PARAMS] Parameters expected by the report as list. + """ + p = ctx.obj["p"] - report_output = p.raas_run(report, system, user, params, format_) + # Return to a variable for easy debugging. + report_output = p.raas_run(report, user, params, format_) click.echo(report_output) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index bb778de..b90131d 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -1,31 +1,32 @@ +import json +import logging import sys import os -import logging +import csv import click -import json import pandas as pd from . import util as u -logger = logging.getLogger("prismCLI") +logger = logging.getLogger('prismCLI') -@click.command("list") -@click.option("-w", "--wid", - help="Unique WID for Prism table or dataset.") -@click.option("-l", "--limit", type=int, default=None, - help="The maximum number of object data entries included in the response, default=all.") -@click.option("-o", "--offset", type=int, default=None, - help="The offset to the first object in a collection to include in the response.") -@click.option("-t", "--type", "type_", default="summary", - type=click.Choice(["summary", "full", "permissions"], case_sensitive=False), - help="How much information returned for each table.") -@click.option("-f", "--format", "format_", default="json", +@click.command('list') +@click.option('-w', '--wid', + help='Specific WID of Prism table or dataset to list.') +@click.option('-l', '--limit', type=int, default=None, + help='The maximum number of object data entries included in the response, default=all.') +@click.option('-o', '--offset', type=int, default=None, + help='The offset to the first object in a collection to include in the response.') +@click.option('-t', '--type', 'type_', default='summary', + type=click.Choice(['summary', 'full', 'permissions'], case_sensitive=False), + help='How much information returned for each table.') +@click.option('-f', '--format', 'format_', default='json', type=click.Choice(['json', 'summary', 'schema', 'csv'], case_sensitive=False), - help="Format output as JSON, summary, schema, or CSV.") -@click.option("-s", "--search", is_flag=True, - help="Enable substring search of NAME in api name or display name, default=False (exact match).") -@click.argument("name", required=False) + help='Format output as JSON, summary, schema, or CSV.') +@click.option('-s', '--search', is_flag=True, + help='Enable substring search of NAME in api name or display name, default=False (exact match).') +@click.argument('name', required=False) @click.pass_context def tables_list(ctx, name, wid, limit, offset, type_, format_, search): """List the tables or datasets permitted by the security profile of the current user. @@ -33,130 +34,133 @@ def tables_list(ctx, name, wid, limit, offset, type_, format_, search): [NAME] Prism table name to list. """ - if type_ in ("summary", "permissions") and format_ in ("schema", "csv"): + if type_ in ('summary', 'permissions') and format_ in ('schema', 'csv'): # Summary results cannot generate schema or CSV output since there will be no fields. - logger.critical(f"Invalid combination of type \"{type_}\" and format \"{format_}\".") + logger.error(f'Invalid combination of type "{type_}" and format "{format_}".') sys.exit(1) - p = ctx.obj["p"] + p = ctx.obj['p'] # Query the tenant... tables = p.tables_list(name, wid, limit, offset, type_, search) # The return always has a total tables returned value. # note: tables_list never fails, it simply returns 0 tables if there is a problem. - if tables["total"] == 0: + if tables['total'] == 0: return # Handle output - if format_ == "json": + if format_ == 'json': # The results could be one table or an array of multiple # tables - simply dump the returned object. click.echo(json.dumps(tables, indent=2)) - elif format_ == "summary": - for table in tables["data"]: - display_name = table["displayName"] - rows = table["stats"]["rows"] if "stats" in table and "rows" in table["stats"] else "Null" - size = table["stats"]["size"] if "stats" in table and "size" in table["stats"] else "Null" - refreshed = table["dateRefreshed"] if "dateRefreshed" in table else "unknown" - enabled = table["enableForAnalysis"] if "enableForAnalysis" in table else "Null" + elif format_ == 'summary': + for table in tables['data']: + display_name = table['displayName'] + rows = table['stats']['rows'] if 'stats' in table and 'rows' in table['stats'] else 'Null' + size = table['stats']['size'] if 'stats' in table and 'size' in table['stats'] else 'Null' + refreshed = table['dateRefreshed'] if 'dateRefreshed' in table else 'unknown' + enabled = table['enableForAnalysis'] if 'enableForAnalysis' in table else 'Null' click.echo(f'{display_name}, Enabled: {enabled}, Rows: {rows}, Size: {size}, Refreshed: {refreshed}') - elif format_ == "csv": - df = pd.json_normalize(tables["data"]) + elif format_ == 'csv': + df = pd.json_normalize(tables['data']) click.echo(df.to_csv(index=False)) - elif format_ == "schema": + elif format_ == 'schema': # Dump out the fields of the first table in the result in # a format compatible with a schema used to created or edit # a table. - table = tables["data"][0] # Only output the first table. + table = tables['data'][0] # Only output the first table. # Remove the Prism audit columns. - fields = [fld for fld in tables["data"][0]["fields"] if not fld["name"].startswith("WPA_")] + fields = [fld for fld in table['fields'] if not fld['name'].startswith('WPA_')] # Remove tenant specific values - these are not needed # if the user wants to update a table definition. for fld in fields: - if "fieldId" in fld: - if "fieldId" in fld: - del fld["fieldId"] + if 'fieldId' in fld: + del fld['fieldId'] - if "id" in fld: - del fld["id"] + if 'id' in fld: + del fld['id'] + + if 'type' in fld: + if 'descriptor' in fld['type']: + # Shorten the Prism type syntax to remove the GUID id value. + fld['type']['id'] = f"Schema_Field_Type={fld['type']['descriptor']}" + del fld['type']['descriptor'] click.echo(json.dumps(fields, indent=2)) - else: - click.echo(u.csv_from_fields(tables["data"][0]["fields"])) - - -@click.command("create") -@click.option("-n", "--name", - help="Table name - overrides name from schema.") -@click.option("-d", "--displayName", - help="Specify a display name - defaults to name.") -@click.option("-t", "--tags", multiple=True, - help="Tags to organize the table in the Data Catalog.") -@click.option("-e", "--enableForAnalysis", type=bool, is_flag=True, default=None, - help="Enable this table for analytics.") -@click.option("-s", "--sourceName", - help="The API name of an existing table to copy.") -@click.option("-w", "--sourceWID", - help="The WID of an existing table to copy.") -@click.argument("file", required=False, type=click.Path(exists=True)) + + +@click.command('create') +@click.option('-n', '--name', + help='Table name - overrides name from schema.') +@click.option('-d', '--displayName', + help='Specify a display name - defaults to name.') +@click.option('-t', '--tags', multiple=True, + help='Tags to organize the table in the Data Catalog.') +@click.option('-e', '--enableForAnalysis', type=bool, is_flag=True, default=None, + help='Enable this table for analytics.') +@click.option('-s', '--sourceName', + help='The API name of an existing table to copy.') +@click.option('-w', '--sourceWID', + help='The WID of an existing table to copy.') +@click.argument('file', required=False, type=click.Path(exists=True)) @click.pass_context def tables_create(ctx, name, displayname, tags, enableforanalysis, sourcename, sourcewid, file): """ Create a new table with the specified name. - [FILE] Optional file containing a schema definition for the table. + [FILE] Optional file containing a Prism schema definition for the new table. Note: A schema file, --sourceName, or --sourceWID must be specified. """ - p = ctx.obj["p"] + p = ctx.obj['p'] # We can assume a valid schema - get_schema sys.exits if there is a problem. - schema = u.get_schema(p, file, sourcename, sourcewid) + schema = get_schema(p, file, sourcename, sourcewid) # Initialize a new schema with the particulars for this table operation. if name is not None: # If we got a name, set it in the table schema - schema["name"] = name.replace(" ", "_") # Minor clean-up - elif "name" not in schema: + schema['name'] = name.replace(' ', '_') # Minor clean-up + elif 'name' not in schema: # The schema doesn't have a name and none was given - exit. - logger.critical("Table --name must be specified.") + logger.error('Table --name must be specified.') sys.exit(1) if displayname is not None: # If we got a display name, set it in the schema - schema["displayName"] = displayname - elif "displayName" not in schema: + schema['displayName'] = displayname + elif 'displayName' not in schema: # Default the display name to the name if not in the schema. - schema["displayName"] = name + schema['displayName'] = name if enableforanalysis is not None: - schema["enableForAnalysis"] = enableforanalysis - elif "enableForAnalysis" not in schema: + schema['enableForAnalysis'] = enableforanalysis + elif 'enableForAnalysis' not in schema: # Default to False - do not enable. - schema["enableForAnalysis"] = False + schema['enableForAnalysis'] = False # Create the table. table_def = p.tables_create(schema) if table_def is not None: - click.echo(f"Table {name} created.") + click.echo(f'Table {name} created.') else: - click.echo(f"Error creating table {name}.") + logger.error(f'Error creating table {name}.') -@click.command("update") -@click.option("-s", "--sourceName", help="The API name of an existing table to copy.") -@click.option("-w", "--sourceWID", help="The ID of an existing table to copy.") -@click.option("-t", "--truncate", is_flag=True, default=False, help="Truncate the table before updating.") -@click.argument("name", required=True) -@click.argument("file", required=False, type=click.Path(exists=True)) +@click.command('update') +@click.option('-s', '--sourceName', help='The API name of an existing table to copy.') +@click.option('-w', '--sourceWID', help='The ID of an existing table to copy.') +@click.option('-t', '--truncate', is_flag=True, default=False, help='Truncate the table before updating.') +@click.argument('name', required=True) +@click.argument('file', required=False, type=click.Path(exists=True)) @click.pass_context def tables_update(ctx, name, file, sourcename, sourcewid, truncate): """Edit the schema for an existing table. @@ -167,30 +171,30 @@ def tables_update(ctx, name, file, sourcename, sourcewid, truncate): Note: A schema file, --sourceName, or --sourceWID must be specified. """ - p = ctx.obj["p"] + p = ctx.obj['p'] # Before doing anything, table name must exist. tables = p.tables_list(name=name) - if tables["total"] == 0: - logger.critical(f"Table \"{name}\" not found.") + if tables['total'] == 0: + logger.error(f'Table \"{name}\" not found.') sys.exit(1) - table_id = tables["data"][0]["id"] + table_id = tables['data'][0]['id'] # Figure out the new schema either by file or other table. - fields = u.get_schema(p, file, sourcename, sourcewid) + fields = get_schema(p, file, sourcename, sourcewid) - p.tables_update(wid=table_id, schema=file, truncate=truncate) + p.tables_update(wid=table_id, schema=fields, truncate=truncate) - click.echo("update") + click.echo(f'Table {name} updated.') -@click.command("upload") -@click.option("-o", "--operation", default="TruncateAndInsert", - help="Operation for the table operation - default to TruncateAndInsert.") -@click.argument("name", required=True) -@click.argument("file", nargs=-1, type=click.Path(exists=True)) +@click.command('upload') +@click.option('-o', '--operation', default='TruncateAndInsert', + help='Operation for the table operation - default to TruncateAndInsert.') +@click.argument('name', required=True) +@click.argument('file', nargs=-1, type=click.Path(exists=True)) @click.pass_context def tables_upload(ctx, name, operation, file): """ @@ -198,54 +202,236 @@ def tables_upload(ctx, name, operation, file): NOTE: This operation creates ".csv.gz" files for each .csv file. """ - p = ctx.obj["p"] + p = ctx.obj['p'] # Convert the file(s) provided to a list of compressed files. target_files = u.get_files(file) if len(target_files) == 0: - logging.getLogger("prismCLI").critical("No files to upload.") + logger.error('No files to upload.') sys.exit(1) bucket = p.buckets_create(target_name=name, operation=operation) if bucket is None: - logging.getLogger("prismCLI").critical("Bucket creation failed.") + logger.error('Bucket creation failed.') sys.exit(1) - results = p.buckets_upload(bucket["id"], target_files) + results = p.buckets_upload(bucket['id'], target_files) if len(results) > 0: - p.buckets_complete(bucket["id"]) + p.buckets_complete(bucket['id']) -@click.command("truncate") -@click.argument("name", required=True) +@click.command('truncate') +@click.argument('name', required=True) @click.pass_context def tables_truncate(ctx, name): """ Truncate the named table. - :param name: - :return: + [NAME] The API name of the Prism table to truncate. """ - # Create an empty bucket with a delete operation - p = ctx.obj["p"] + p = ctx.obj['p'] + msg = f'Unable to truncate table "{name}" - see log for details.' - # Get a bucket using a generated name and an explicit Delete operation. - bucket = p.buckets_create(target_name=name, operation="TruncateAndInsert") + # To do a truncate, we still need a bucket with a truncate operation. + bucket = p.buckets_create(target_name=name, operation='TruncateAndInsert') if bucket is None: - logging.getLogger("prismCLI").critical(f"Unable to truncate {name} - error getting bucket.") + logger.error(msg) sys.exit(1) - bucket_id = bucket["id"] + bucket_id = bucket['id'] # Don't specify a file to put a zero sized file into the bucket. - bucket = p.buckets_upload(bucket_id) + p.buckets_upload(bucket_id) # Ask Prism to run the delete statement by completing the bucket. bucket = p.buckets_complete(bucket_id) if bucket is None: - click.echo(f"Unable to truncate table {name}.") + logger.error(msg) + sys.exit(1) + + +def schema_from_csv(prism, file): + """Convert a CSV list of fields into a proper Prism schema JSON object""" + + if not os.path.exists(file): + logger.error(f'FIle {file} not found - skipping.') + sys.exit(1) + + schema = {'fields': []} # Start with an empy schema definition. + + with open(file, newline='') as csvfile: + reader = csv.DictReader(csvfile) + + # Force all the columns names from the CSV to lowercase to make + # lookups consistent regardless of the actual case of the columns. + reader.fieldnames = [f_name.lower() for f_name in reader.fieldnames] + + # The minimum definition is a name column - exit if not found. No other + # column definition is required to build a valid field list. + if 'name' not in reader.fieldnames: + logger.error(f'CSV file {file} does not contain a name column header in first line.') + sys.exit(1) + + # Prism fields always have an ordinal sequence assigned to each field. + ordinal = 1 + + for row in reader: + if len(row['name']) == 0: + logger.error('Missing column name in CSV file.') + sys.exit(1) + + # Start the new field definition with what we know so far. + field = { + 'ordinal': ordinal, + 'name': row['name'], + 'displayName': row['displayname'] if 'displayname' in row else row['name'] + } + + # The following two items may not be in the CSV, the columns are not required and may not be present. + + if 'required' in row and isinstance(row['required'], str) and row['required'].lower() == 'true': + field['required'] = True + else: + field['required'] = False + + if 'externalid' in row and isinstance(row['externalid'], str) and row['externalid'].lower() == 'true': + field['externalId'] = True + else: + field['externalId'] = False + + fld_type = 'none' + + prism_data_types = ['boolean', 'integer', 'text', 'date', 'long', 'decimal', + 'numeric', 'instance', 'currency', 'multi_instance'] + + if 'type' in row and row['type'].lower() in prism_data_types: + field['type'] = {'id': f'Schema_Field_Type={row["type"]}'} + fld_type = row['type'].lower() + else: + # Default all "un-typed" fields to text. + field['type'] = {'id': 'Schema_Field_Type=Text'} + + match fld_type: + case 'date': + if 'parseformat' in row and isinstance(row['parseformat'], str) and len(row['parseformat']) > 0: + field['parseFormat'] = row['parseformat'] + else: + field['parseFormat'] = 'yyyy-MM-dd' + + case 'numeric': + if 'precision' in row: + field['precision'] = row['precision'] + + if 'scale' in row: + field['scale'] = row['scale'] + + case 'instance': + # We need all the data sources to resolve the business objects + # to include their WID. + data_sources = prism.datasources_list() + + if data_sources is None or data_sources['total'] == 0: + click.echo('Error calling WQL/dataSources') + return + + # Find the matching businessObject + bo = [ds for ds in data_sources['data'] + if ds['businessObject']['descriptor'] == row['businessObject']] + + if len(bo) == 1: + field['businessObject'] = bo[0]['businessObject'] + + schema['fields'].append(field) + ordinal += 1 + + return schema + + +def csv_from_fields(fields): + """Convert a Prism field list to CSV representation.""" + + format_str = '{name},"{displayName}",{ordinal},{type},"{businessObject}",{precision},{scale},"{parseFormat}",{required},{externalId}\n' + + # Start with the CSV column headings. + csv_str = 'name,displayName,ordinal,type,businessObject,precision,scale,parseFormat,required,externalId\n' + + for field in fields: + # Suppress the Prism audit columns. + if field['name'].startswith('WPA_'): + continue + + field_def = {'name': field['name'], + 'displayName': field['displayName'], + 'ordinal': field['ordinal'], + 'type': field['type']['descriptor'], + 'businessObject': field['businessObject']['descriptor'] if 'businessObject' in field else '', + 'precision': field['precision'] if 'precision' in field else '', + 'scale': field['scale'] if 'scale' in field else '', + 'parseFormat': field['parseFormat'] if 'parseFormat' in field else '', + 'required': field['required'], + 'externalId': field['externalId'] + } + + # Add the new field to the CSV text. + csv_str += format_str.format_map(field_def) + + return csv_str + + +def resolve_schema(p, file, source_name, source_wid): + """Get or extract a schema from a file or existing Prism table.""" + + # Start with a blank schema definition. + schema = {} + + # A file always takes precedence over sourceName and sourceWID + # options, and must BE, or contain a valid schema. + + if file is not None: + if file.lower().endswith('.json'): + try: + with open(file) as json_file: + schema = json.load(json_file) + except Exception as e: + logger.error(f'Invalid schema file: {e}.') + sys.exit(1) + + # The JSON file could be a complete table definitions (GET:/tables - full) or just + # the list of fields. If we got a list, then we have a list of fields we + # use to start the schema definition. + + if type(schema) is list: + schema['fields'] = schema + else: + # This should be a full schema, perhaps from a table list command. + if 'name' not in schema and 'fields' not in schema: + logger.error('Invalid schema - name and fields attribute not found.') + sys.exit(1) + elif file.lower().endswith('.csv'): + schema = schema_from_csv(p, file) + else: + logger.error('Invalid file extension - valid extensions are .json or .csv.') + sys.exit(1) + else: + # No file was specified, check for a Prism source table. + if source_name is None and source_wid is None: + logger.error('No schema file provided and a table (--sourceName or --sourceWID) not specified.') + sys.exit(1) + + if source_wid is not None: + tables = p.tables_list(wid=source_wid, type_='full') # Exact match on WID - and get the fields (full) + else: + tables = p.tables_list(name=source_name, type_='full') # Exact match on API Name + + if tables['total'] == 0: + logger.error('Invalid --sourceName or --sourceWID : table not found.') + sys.exit(1) + else: + schema = tables['data'][0] + + return schema diff --git a/prism/commands/util.py b/prism/commands/util.py index a77c371..88484ad 100644 --- a/prism/commands/util.py +++ b/prism/commands/util.py @@ -1,213 +1,11 @@ -import os.path -import sys -import json -import csv -import click import logging - - -def get_schema(p, file, sourceName, sourceWID): - # Start with a blank schema definition. - schema = {} - - # A file always takes precedence over sourceName and sourceWID - # options, and must contain a valid schema. - - if file is not None: - if file.lower().endswith(".json"): - try: - with open(file) as json_file: - schema = json.load(json_file) - except Exception as e: - click.echo(f"Invalid schema file: {e.msg}.") - sys.exit(1) - - # The JSON file could be a complete table definitions (GET:/tables - full) or just - # the list of fields. If we got a list, then we have a list of fields we - # use to start the schema definition. - - if type(schema) is list: - schema["fields"] = schema - else: - # This should be a full schema, perhaps from a table list command. - if "name" not in schema and "fields" not in schema: - click.echo("Invalid schema - name and fields attribute not found.") - sys.exit(1) - elif file.lower().endswith(".csv"): - schema = schema_from_csv(p, file) - else: - click.echo("Invalid file extension - valid extensions are .json or .csv.") - sys.exit(1) - else: - # No file was specified, check for a source table. - - if sourceName is None and sourceWID is None: - click.echo("No schema provided and a table (--sourceName or --sourceWID) not specified.") - sys.exit(1) - - if sourceWID is not None: - tables = p.tables_list(wid=sourceWID, type_="full") # Exact match on WID - and get the fields - else: - tables = p.tables_list(name=sourceName, type_="full") # Exact match on API Name - - if tables["total"] == 0: - click.echo("Invalid --sourceName or --sourceWID : table not found.") - sys.exit(1) - else: - schema = tables["data"][0] - - return schema - - -def schema_from_csv(prism, file): - schema = {"fields": []} # Start with an empy schema definition. - - with open(file, newline='') as csvfile: - reader = csv.DictReader(csvfile) - - # Force all the columns names to lowercase to make lookups consistent - # regardless of the actual case of the columns. - reader.fieldnames = [f_name.lower() for f_name in reader.fieldnames] - - # The minimum definition is a name column - exit if not found. No other - # column definition is required to build a valid field list. - if "name" not in reader.fieldnames: - click.echo(f"CSV file {file} does not contain a name column header in first line.") - sys.exit(1) - - # Prism fields always have an ordinal sequence assigned to each field. - ordinal = 1 - - for row in reader: - field = { - "ordinal": ordinal, - "name": row["name"], - "displayName": row["displayname"] if "displayname" in row else row["name"] - } - - if "required" in row and isinstance(row["required"], str) and row["required"].lower() == "true": - field["required"] = True - else: - field["required"] = False - - if "externalid" in row and isinstance(row["externalid"], str) and row["externalid"].lower() == "true": - field["externalId"] = True - else: - field["externalId"] = False - - fld_type = "none" - - if "type" in row and row["type"] in ["text", "date", "numeric", "instance"]: - field["type"] = { "id" : f'Schema_Field_Type={row["type"]}'} - fld_type = row["type"].lower() - else: - field["type"] = { "id" : f'Schema_Field_Type=Text'} - - match fld_type: - case "date": - if "parseformat" in row and isinstance(row["parseformat"], str) and len(row["parseformat"]) > 0: - field["parseFormat"] = row["parseformat"] - else: - field["parseFormat"] = "yyyy-MM-dd" - - case "numeric": - if "precision" in row: - field["precision"] = row["precision"] - - if "scale" in row: - field["scale"] = row["scale"] - - case "instance": - # We need all the data sources to resolve the business objects - # to include their WID. - data_sources = prism.datasources_list() - - if data_sources is None or data_sources["total"] == 0: - click.echo("Error calling WQL/dataSources") - return - - # Find the matching businessObject - bo = [ds for ds in data_sources["data"] - if ds["businessObject"]["descriptor"] == row["businessObject"]] - - if len(bo) == 1: - field["businessObject"] = bo[0]["businessObject"] - - schema["fields"].append(field) - ordinal += 1 - - return schema - - -def csv_from_fields(fields): - format_str = '{name},"{displayName}",{ordinal},{type},"{businessObject}",{precision},{scale},"{parseFormat}",{required},{externalId}\n' - - csv = "name,displayName,ordinal,type,businessObject,precision,scale,parseFormat,required,externalId\n" - - for field in fields: - # Suppress Prism auditing fields. - if field["name"].startswith("WPA_"): - continue - - field_def = {"name": field["name"], - "displayName": field["displayName"], - "ordinal": field["ordinal"], - "type": field["type"]["descriptor"], - "businessObject": field["businessObject"]["descriptor"] if "businessObject" in field else "", - "precision": field["precision"] if "precision" in field else "", - "scale": field["scale"] if "scale" in field else "", - "parseFormat": field["parseFormat"] if "parseFormat" in field else "", - "required": field["required"], - "externalId": field["externalId"] - } - - csv += format_str.format_map(field_def) - - return csv - - -def fileContainers_load(prism, filecontainersid, file): - # Because Click verified the file already exists, we know we have valid - # file name. Check to see if we have a gzip file or a CSV - # by checking the extension. - - if file is None or len(file) == 0: - click.echo("An existing file name is required to upload to a file container.") - return None - - # Verify that each file is already a gzip file or a CSV we gzip for them. - - # The CSV contents are not validated here - Prism eventually - # returns an error if the content is invalid. - - target_files = compress_files(file) - - # Assume we have a fID - it can be None right now - # if the user wants to create a fileContainers during - # this operation. - fID = filecontainersid - - for target_file in target_files: - # Load the file and retrieve the fID - this is only - # set by the load on the first file - subsequent - # files are loaded into the same container (fID). - fID = prism.filecontainers_load(fID, target_file) - - # If the fID comes back blank, then something is not - # working. Note: any error messages have already - # been displayed by the load operation. - - # NOTE: this operation never fails, the file is skipped. - if fID is None: - break - - # Return the fID to the caller - this is the value - # passed by the caller, or the new fID created by - # the load of the first file. - return fID +import os.path def get_files(files): + """Evaluate one (str) or more (list) file names and return a valid list for load operations.""" + + # At a minimum, an empty list will always be returned. target_files = [] if files is None: diff --git a/prism/commands/wql_commands.py b/prism/commands/wql_commands.py index 87abf1c..0b096f0 100644 --- a/prism/commands/wql_commands.py +++ b/prism/commands/wql_commands.py @@ -1,12 +1,12 @@ import click import json +import sys import pandas as pd -@click.command("dataSources", - help="View the buckets permitted by the security profile of the current user.") +@click.command("dataSources") @click.option("-w", "--wid", - help="The Workday ID of the dataSources.") + help="The Workday ID of the data source.") @click.option("-l", "--limit", default=None, help="The maximum number of object data entries included in the response, default=-1 (all).") @click.option("-o", "--offset", default=None, @@ -16,6 +16,7 @@ @click.argument("name", required=False) @click.pass_context def dataSources(ctx, wid, limit, offset, search, name): + """Returns a collection of data sources (/dataSources) for use in a WQL query.""" p = ctx.obj["p"] ds = p.wql_dataSources(wid, limit, offset, name, search) @@ -23,37 +24,40 @@ def dataSources(ctx, wid, limit, offset, search, name): click.echo(json.dumps(ds, indent=2)) -@click.command("data", - help="View the buckets permitted by the security profile of the current user.") +@click.command("data") @click.option("-l", "--limit", default=None, help="The maximum number of object data entries included in the response, default=-1 (all).") @click.option("-o", "--offset", default=None, help="The offset to the first object in a collection to include in the response.") -@click.option("-f", "--file", "file_", default=None, type=click.Path(exists=True), - help="Filename of a query") -@click.option("-c", "--csv", "csv_", is_flag=True, show_default=True, default=False, +@click.option("-f", "--file", "wql_file", default=None, type=click.Path(exists=True), + help="Filename containing a WQL query.") +@click.option("-c", "--as_csv", "as_csv", is_flag=True, show_default=True, default=False, help="Output query results as CSV.") @click.argument("query", required=False) @click.pass_context -def data(ctx, limit, offset, file_, csv_, query): +def data(ctx, limit, offset, wql_file, as_csv, query): + """ + Returns the data from a WQL query. + + [QUERY] WQL query string to execute (/data). + """ p = ctx.obj["p"] - if file_ is None and query is None: + if wql_file is None and query is None: click.echo("No query provided") - return + sys.exit(1) if query is not None: query_resolved = query else: - with open(file_) as file: - query_resolved = file.read().replace('\n',' ') + with open(wql_file) as file: + query_resolved = file.read().replace('\n', ' ') rows = p.wql_data(query_resolved, limit, offset) if rows["total"] != 0: - if csv_: + if as_csv: df = pd.json_normalize(rows["data"]) click.echo(df.to_csv(index=False)) else: click.echo(json.dumps(rows, indent=2)) - diff --git a/prism/prism.py b/prism/prism.py index cf642ef..fa33014 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -10,11 +10,60 @@ import requests import time import os -import urllib import sys import uuid import io import gzip +import inspect + +from urllib import parse as urlparse + +# Default a logger - the default may be re-configured in the set_logging method. +logger = logging.getLogger(__name__) +logger.setLevel(logging.WARNING) + +# writing to stdout only... +handler = logging.StreamHandler(sys.stdout) +handler.setLevel(logging.WARNING) +log_format = logging.Formatter("%(asctime)s %(levelname)s: %(message)s", "%Y-%m-%d %H:%M:%S") +handler.setFormatter(log_format) +logger.addHandler(handler) + + +def set_logging(log_file=None, log_level="INFO"): + # Resolve the log level - default to info if empty or invalid. + if log_level is None: + set_level = logging.INFO + else: + # Make sure the caller gave us a valid "name" for logging level. + if hasattr(logging, log_level): + set_level = getattr(logging, log_level) + else: + set_level = getattr(logging, "INFO") + + # If no file was specified, simply loop over any handlers and + # set the logging level. + if log_file is None: + for log_handler in logger.handlers: + log_handler.setLevel(set_level) + else: + # Setup logging for CLI operations. + for log_handler in logger.handlers: + logger.removeHandler(log_handler) + + logger.setLevel(set_level) + + # Create a handler as specified by the user (or defaults) + fh = logging.FileHandler(log_file) + fh.setLevel(set_level) + + # create formatter and add it to the handlers + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + fh.setFormatter(formatter) + + logger.addHandler(fh) + + logger.debug(f"set log level: {set_level}") def log_elapsed(msg, timedelta): @@ -22,6 +71,88 @@ def log_elapsed(msg, timedelta): logging.getLogger(__name__).debug(f"{msg}: elapsed {elapsed:.5f}") +def buckets_gen_name(): + bucket_name = "cli_" + uuid.uuid4().hex + logger.debug(f"buckets_gen_name: created bucket name: {bucket_name}") + + return bucket_name + + +def validate_schema(schema): + if "fields" not in schema or not isinstance(schema["fields"], list) or len(schema["fields"]) == 0: + logger.error("fields attribute missing from schema!") + return False + + # Add a sequential order (ordinal) on the fields to (en)force + # required sequencing of fields. + for ordinal in range(len(schema["fields"])): + schema["fields"][ordinal]["ordinal"] = ordinal + 1 + + return True + + +def table_to_bucket_schema(table): + """Convert schema derived from list table to a bucket schema. + + Parameters + ---------- + table: dict + A dictionary containing the schema definition for your dataset. + + Returns + ------- + If the request is successful, a dictionary containing the bucket schema is returned. + The results can then be passed to the create_bucket function + + """ + + # describe_schema is a python dict object and needs to be accessed as such, 'data' is the top level object, + # but this is itself a list (with just one item) so needs the list index, in this case 0. 'fields' is found + # in the dict that is in ['data'][0] + + if table is None or "fields" not in table: + logger.error("Invalid table passed to table_to_bucket_schema.") + return None + + fields = table["fields"] + + # Get rid of any WPA_ fields... + fields[:] = [x for x in fields if "WPA" not in x["name"]] + + # Create and assign useAsOperationKey field with true/false values based on externalId value + operation_key_false = {"useAsOperationKey": False} + operation_key_true = {"useAsOperationKey": True} + + for fld in fields: + if fld["externalId"] is True: + fld.update(operation_key_true) + else: + fld.update(operation_key_false) + + # Now trim our fields data to keep just what we need + for fld in fields: + del fld["id"] + del fld["displayName"] + del fld["fieldId"] + del fld["required"] + del fld["externalId"] + + # Build the final bucket definition. + bucket_schema = { + "parseOptions": { + "fieldsDelimitedBy": ",", + "fieldsEnclosedBy": '"', + "headerLinesToIgnore": 1, + "charset": {"id": "Encoding=UTF-8"}, + "type": {"id": "Schema_File_Type=Delimited"}, + }, + "schemaVersion": {"id": "Schema_Version=1.0"}, + "fields": fields + } + + return bucket_schema + + class Prism: """Base class for interacting with the Workday Prism API. @@ -71,89 +202,68 @@ def __init__(self, base_url, tenant_name, client_id, client_secret, refresh_toke self.bearer_token = None self.bearer_token_timestamp = None - # Default a logger - the default may be re-configured - # in the set_logging method. - self.logger = logging.getLogger(__name__) - self.logger.setLevel(logging.WARNING) - - # writing to stdout only... - handler = logging.StreamHandler(sys.stdout) - handler.setLevel(logging.WARNING) - log_format = logging.Formatter('[%(asctime)s] [%(levelname)s] - %(message)s') - handler.setFormatter(log_format) - self.logger.addHandler(handler) - - def set_logging(self, log_file=None, log_level="INFO"): - # Resolve the log level - default to info if empty or invalid. - if log_level is None: - set_level = logging.INFO - else: - # Make sure the caller gave us a valid "name" for logging level. - if hasattr(logging, log_level): - set_level = getattr(logging, log_level) - else: - set_level = getattr(logging, "INFO") - - # If no file was specified, simply loop over any handlers and - # set the logging level. - if log_file is None: - for handler in self.logger.handlers: - handler.setLevel(set_level) - else: - # Setup logging for CLI operations. - for handler in self.logger.handlers: - self.logger.removeHandler(handler) + self.CONTENT_APP_JSON = {"Content-Type": "application/json"} + self.CONTENT_FORM = {"Content-Type": "application/x-www-form-urlencoded"} - self.logger.setLevel(set_level) + def http_get(self, url, headers=None, params=None): + caller = inspect.stack()[1][3] + logger.debug(f"get: called by {caller}") - # Create a handler as specified by the user (or defaults) - fh = logging.FileHandler(log_file) - fh.setLevel(set_level) + if url is None or not isinstance(url, str) or len(url) == 0: + # Create a fake response object for standard error handling. + msg = "get: missing URL" - # create formatter and add it to the handlers - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') - fh.setFormatter(formatter) + response = {"status_code": 600, + "text": msg, + "errors": [{"error": msg}]} + else: + logger.debug(f"get: {url}") - self.logger.addHandler(fh) + # Every request requires an authorization header - make it true. + if headers is None: + headers = {} - self.logger.debug(f"set log level: {set_level}") + if "Authorization" not in headers: + headers["Authorization"] = "Bearer " + self.get_bearer_token() - def buckets_gen_name(self): - return "cli_" + uuid.uuid4().hex + response = requests.get(url, params=params, headers=headers) + log_elapsed(f"get: {caller}", response.elapsed) - def get(self, url, headers=None, params=None, log_tag="generic get"): - if url is None: - self.logger.warning("http_get: missing URL") - return None + if response.status_code != 200: + logger.error(f"Invalid HTTP status: {response.status_code}") + logger.error(f"Reason: {response.reason}") + logger.error(f"Text: {response.text}") - # Every request requires an authorization header - make it true. - auth_attr = "Authorization" + return response - if headers is None: - headers = {} + def http_post(self, url, headers=None, data=None, files=None): + caller = inspect.stack()[1][3] + logger.debug(f"post: called by {caller}") - if auth_attr not in headers: - headers[auth_attr] = "Bearer " + self.get_bearer_token() + if url is None or not isinstance(url, str) or len(url) == 0: + # Create a fake response object for standard error handling. + msg = "POST: missing URL" - response = requests.get(url, params=params, headers=headers) - log_elapsed("GET: " + log_tag, response.elapsed) + response = {"status_code": 600, + "text": msg, + "errors": [{"error": msg}]} + else: + logger.debug(f"post: {url}") - if response.status_code != 200: - self.logger.error(f"Invalid HTTP status: {response.status_code}") + # Every request requires an authorization header - make it true. + if headers is None: + headers = {} - return response + if "Authorization" not in headers and caller != "create_bearer_token": + headers["Authorization"] = "Bearer " + self.get_bearer_token() - def validate_schema(self, schema): - if "fields" not in schema or not isinstance(schema["fields"], list) or len(schema["fields"]) == 0: - self.logger.error("Invalid schema detected!") - return False + response = requests.post(url, headers=headers, data=data, files=files) + log_elapsed(f"get: {caller}", response.elapsed) - # Add a sequential order (ordinal) on the fields to (en)force - # proper numbering. - for ordinal in range(len(schema["fields"])): - schema["fields"][ordinal]["ordinal"] = ordinal + 1 + if response.status_code > 299: + logger.error(response.text) - return True + return response def create_bearer_token(self): """Exchange a refresh token for an access token. @@ -168,10 +278,6 @@ def create_bearer_token(self): """ - self.logger.debug("create_bearer_token") - - headers = {"Content-Type": "application/x-www-form-urlencoded"} - data = { "grant_type": "refresh_token", "refresh_token": self.refresh_token, @@ -179,29 +285,27 @@ def create_bearer_token(self): "client_secret": self.client_secret, } - r = requests.post(self.token_endpoint, headers=headers, data=data) - log_elapsed("create_bearer_token", r.elapsed) + # Call requests directly for this one get operation. + r = self.http_post(url=self.token_endpoint, headers=self.CONTENT_FORM, data=data) if r.status_code == 200: - self.logger.debug("successfully obtained bearer token") + logger.debug("successfully obtained bearer token") self.bearer_token = r.json()["access_token"] self.bearer_token_timestamp = time.time() - - return True - - self.logger.error(f"HTTP status code {r.status_code}: {r.content}") - self.bearer_token = None - self.bearer_token_timestamp = None - - return False + else: + logger.error(f"create bearer token failed: HTTP status code {r.status_code}: {r.content}") + self.bearer_token = None + self.bearer_token_timestamp = None def get_bearer_token(self): """ Get the current bearer token, or create a new one if it doesn't exist, or it's older than 15 minutes. """ if self.bearer_token is None or (time.time() - self.bearer_token_timestamp) > 900: - if not self.create_bearer_token(): - return "" + self.create_bearer_token() + + if self.bearer_token is None: + return "" return self.bearer_token @@ -247,22 +351,20 @@ def tables_list( """ operation = "/tables" - self.logger.debug(f"GET: {operation}") + logger.debug(f"get: {operation}") url = self.prism_endpoint + operation if type_ is None or type_ not in ["full", "summary", "permissions"]: - self.logger.warning("Invalid type for tables list operation - defaulting to summary.") + logger.warning("Invalid type for tables list operation - defaulting to summary.") type_ = "summary" - # If we are searching, then we have to get everything using - # limits and offsets, i.e., paging of results. - + # Start setting up the API call parameters. params = {} # See if we want to add an explicit table name as a search parameter. if not search and name is not None: - # Here, the user is not searching, they gave us an exact name. + # Here, the caller is not searching, they gave us an exact name. params["name"] = name.replace(" ", "_") # Minor clean-up # Should only be 0 (not found) or 1 (found) tables found. @@ -278,7 +380,8 @@ def tables_list( # If we didn't get a limit, set it to the maximum supported by the API if limit is None: search = True # Force a search so we get all tables - limit = 100 + limit = 100 # The caller didn't say + offset = 0 # Offset cannot be anything other than zero offset = offset if offset is not None else 0 @@ -293,10 +396,10 @@ def tables_list( # Always assume we will retrieve more than one page. while True: - r = self.get(url, params=params, log_tag=operation) + r = self.http_get(url, params=params) if r.status_code != 200: - self.logger.error(f"Invalid HTTP return code: {r.status_code}") + logger.error(f"Tables list - invalid HTTP return code: {r.status_code}") # Whatever we have captured (perhaps nothing) so far will # be returned due to unexpected status code. @@ -310,12 +413,12 @@ def tables_list( # whatever we got (maybe nothing). return tables - # Figure out what of this batch of tables should be part of the - # return results, i.e., search the batch for matches. + # Figure out what tables of this batch of tables should be part of the + # return results, i.e., search the this batch for matches. if name is not None: - # Substring search for matching table names - match_tables = [tab for tab in tables["data"] if name in tab["name"]] + # Substring search for matching table names, display names + match_tables = [tab for tab in tables["data"] if name in tab["name"] or name in tab["displayName"]] elif wid is not None: # User is looking for a table by ID match_tables = [tab for tab in tables["data"] if wid == tab["id"]] @@ -357,27 +460,17 @@ def tables_create(self, schema): """ operation = "/tables" - self.logger.debug(f"POST : {operation}") + logger.debug(f"POST : {operation}") url = self.prism_endpoint + "/tables" - if not self.validate_schema(schema): - self.logger.error("Invalid schema for create operation.") + if not validate_schema(schema): + logger.error("Invalid schema for create operation.") return None - headers = { - "Authorization": "Bearer " + self.get_bearer_token(), - "Content-Type": "application/json", - } - - r = requests.post(url, headers=headers, data=json.dumps(schema)) + r = self.http_post(url=url, headers=self.CONTENT_APP_JSON, data=json.dumps(schema)) if r.status_code == 201: return r.json() - elif r.status_code == 400: - self.logger.error(r.json()["errors"][0]["error"]) - self.logger.error(r.text) - else: - self.logger.error(f"HTTP status code {r.status_code}: {r.content}") return None @@ -388,11 +481,11 @@ def tables_update(self, wid, schema, truncate=False): """ operation = f"/tables/{wid}" - self.logger.debug(f"PUT: {operation}") + logger.debug(f"PUT: {operation}") url = self.prism_endpoint + operation - if not self.validate_schema(schema): - self.logger.error("Invalid schema for update operation.") + if not validate_schema(schema): + logger.error("Invalid schema for update operation.") return None headers = { @@ -405,21 +498,19 @@ def tables_update(self, wid, schema, truncate=False): if r.status_code == 200: return r.json() - self.logger.error(f"Error updating table {wid} - {r.text}.") + logger.error(f"Error updating table {wid} - {r.text}.") return None - def tables_patch(self, id, displayName=None, description=None, documentation=None, enableForAnalysis=None, + def tables_patch(self, wid, displayName=None, description=None, documentation=None, enableForAnalysis=None, schema=None): + x = self.tenant_name return None def buckets_list(self, - wid=None, - bucket_name=None, - limit=None, - offset=None, + wid=None, bucket_name=None, + limit=None, offset=None, type_="summary", - table_name=None, - search=False): + table_name=None, search=False): """ :param wid: @@ -433,7 +524,7 @@ def buckets_list(self, """ operation = "/buckets" - self.logger.debug(f"GET: {operation}") + logger.debug(f"get: {operation}") url = self.prism_endpoint + operation # Start the return object - this routine NEVER fails @@ -443,25 +534,23 @@ def buckets_list(self, # If we are searching, then we have to get everything first # so don't add a name to the bucket query. - params = {} + params = {"limit": limit if limit is not None else 100, + "offset": offset if offset is not None else 0} if not search and bucket_name is not None: # List a specific bucket name overrides any other # combination of search/table/bucket name/wid. params["name"] = bucket_name - limit = 1 - offset = 0 + params["limit"] = 1 + params["offset"] = 0 else: # Any other combination of parameters requires a search # through all the buckets in the tenant. search = True - limit = 100 # Max pagesize to retrieve in the fewest REST calls. - offset = 0 - - params["limit"] = limit if limit is not None else 100 - params["offset"] = offset if offset is not None else 0 + params["limit"] = 100 # Max pagesize to retrieve in the fewest REST calls. + params["offset"] = 0 if type_ in ["summary", "full"]: params["type"] = type_ @@ -469,11 +558,11 @@ def buckets_list(self, params["type"] = "summary" while True: - r = self.get(url, params=params, log_tag=operation) + r = self.http_get(url, params=params) if r.status_code != 200: # We never fail, return whatever we got (if any). - self.logger.debug("Error listing buckets.") + logger.error("error listing buckets.") return return_buckets buckets = r.json() @@ -488,7 +577,8 @@ def buckets_list(self, if bucket_name is not None: # Substring search for matching table names - match_buckets = [bck for bck in buckets["data"] if bucket_name in bck["name"]] + match_buckets = [bck for bck in buckets["data"] if + bucket_name in bck["name"] or bucket_name in bck["displayName"]] elif wid is not None: # User is looking for a bucket by ID match_buckets = [bck for bck in buckets["data"] if wid == bck["id"]] @@ -522,17 +612,20 @@ def buckets_create( self, name=None, target_name=None, - target_id=None, + target_wid=None, schema=None, operation="TruncateAndInsert"): """Create a temporary bucket to upload files. Parameters ---------- + name : str + Name of the bucket to create. + schema : dict A dictionary containing the schema for your table. - target_id : str + target_wid : str The ID of the table that this bucket is to be associated with. target_name : str @@ -551,49 +644,42 @@ def buckets_create( If the request is successful, a dictionary containing information about the new bucket is returned. - https://confluence.workday.com/display/PRISM/Public+API+V2+Endpoints+for+WBuckets - :param name: """ # If the caller didn't give us a name to use for the bucket, # create a default name. if name is None: - bucket_name = self.buckets_gen_name() + bucket_name = buckets_gen_name() else: bucket_name = name # A target table must be identified by ID or name. - if target_id is None and target_name is None: - self.logger.error("A table id or table name is required to create a bucket.") + if target_wid is None and target_name is None: + logger.error("A table id or table name is required to create a bucket.") return None # The caller gave us a table wid, but didn't include a schema. Make a copy # of the target table's schema. Note: WID takes precedence over name. # Use type_=full to get back the schema definition. - if target_id is not None: - tables = self.tables_list(wid=target_id, type_="full") + if target_wid is not None: + tables = self.tables_list(wid=target_wid, type_="full") else: tables = self.tables_list(name=target_name, type_="full") if tables["total"] == 0: - self.logger.error(f"Table not found for bucket operation.") + logger.error(f"Table not found for bucket operation.") return None table_id = tables["data"][0]["id"] if schema is None: - schema = self.table_to_bucket_schema(tables["data"][0]) + schema = table_to_bucket_schema(tables["data"][0]) - self.logger.debug(f"POST: /buckets") + logger.debug(f"post: /buckets") url = self.prism_endpoint + "/buckets" - headers = { - "Authorization": "Bearer " + self.get_bearer_token(), - "Content-Type": "application/json", - } - data = { "name": bucket_name, "operation": {"id": "Operation_Type=" + operation}, @@ -601,112 +687,33 @@ def buckets_create( "schema": schema, } - r = requests.post(url, headers=headers, data=json.dumps(data)) + r = self.http_post(url, headers=self.CONTENT_APP_JSON, data=json.dumps(data)) if r.status_code == 201: - self.logger.info("successfully created a new wBucket") + logger.info("successfully created a new wBucket") return r.json() - elif r.status_code == 400: - self.logger.error(r.json()["errors"][0]["error"]) - self.logger.error(r.text) - else: - self.logger.error(f"HTTP status code {r.status_code}: {r.content}") return None def buckets_complete(self, bucketid): operation = f"/buckets/{bucketid}/complete" - self.logger.debug(f"POST: {operation}") + logger.debug(f"post: {operation}") url = self.prism_endpoint + operation - headers = { - "Authorization": "Bearer " + self.get_bearer_token() - } - - r = requests.post(url, headers=headers) + r = self.http_post(url) if r.status_code == 201: - self.logger.info(f"Successfully completed wBucket {bucketid}") + logger.info(f"successfully completed wBucket {bucketid}") return r.json() - if r.status_code == 400: - self.logger.error(r.json()["errors"][0]["error"]) - else: - self.logger.error(f"HTTP status code {r.status_code}: {r.content}") - return None - def table_to_bucket_schema(self, table): - """Convert schema derived from list table to a bucket schema. - - Parameters - ---------- - describe_schema: dict - A dictionary containing the describe schema for your dataset. - - Returns - ------- - If the request is successful, a dictionary containing the bucket schema is returned. - The results can then be passed to the create_bucket function - - """ - - # describe_schema is a python dict object and needs to be accessed as such, 'data' is the top level object, - # but this is itself a list (with just one item) so needs the list index, in this case 0. 'fields' is found - # in the dict that is in ['data'][0] - - if table is None or "fields" not in table: - self.logger.error("Invalid table passed to table_to_bucket_schema.") - return None - - fields = table["fields"] - - # Get rid of the WPA_ fields... - fields[:] = [x for x in fields if "WPA" not in x["name"]] - - # Create and assign useAsOperationKey field with true/false values based on externalId value - operation_key_false = {"useAsOperationKey": False} - operation_key_true = {"useAsOperationKey": True} - - for i in fields: - if i["externalId"] is True: - i.update(operation_key_true) - else: - i.update(operation_key_false) - - # Now trim our fields data to keep just what we need - for i in fields: - del i["id"] - del i["displayName"] - del i["fieldId"] - del i["required"] - del i["externalId"] - - # The "header" for the load schema - bucket_schema = { - "parseOptions": { - "fieldsDelimitedBy": ",", - "fieldsEnclosedBy": '"', - "headerLinesToIgnore": 1, - "charset": {"id": "Encoding=UTF-8"}, - "type": {"id": "Schema_File_Type=Delimited"}, - } - } - - # The footer for the load schema - schema_version = {"id": "Schema_Version=1.0"} - - bucket_schema["fields"] = fields - bucket_schema["schemaVersion"] = schema_version - - return bucket_schema - - def buckets_upload(self, bucket_id, file=None): + def buckets_upload(self, bucketid, file=None): """Upload a file to a given bucket. Parameters ---------- - bucket_id : str + bucketid : str The ID of the bucket that the file should be added to. file : str @@ -719,19 +726,17 @@ def buckets_upload(self, bucket_id, file=None): None """ - operation = f"/buckets/{bucket_id}/files" - self.logger.debug("POST: {operation}") + operation = f"/buckets/{bucketid}/files" + logger.debug("post: {operation}") url = self.prism_endpoint + operation - headers = {"Authorization": "Bearer " + self.get_bearer_token()} - results = [] - # Convert a single filename to a list. + # Convert a single filename to a list, so we can loop. if isinstance(file, list): files = file else: - files = [file] # Convert to list... + files = [file] # Convert to list... for f in files: # It is legal to upload an empty file - see the table truncate method. @@ -743,18 +748,16 @@ def buckets_upload(self, bucket_id, file=None): with open(f, "rb") as in_file: new_file = {"file": (f + ".gz", gzip.compress(in_file.read()))} - r = requests.post(url, headers=headers, files=new_file) + r = requests.post(url, files=new_file) if r.status_code == 201: - self.logger.info(f"Successfully uploaded {f} to the bucket") + logger.debug(f"successfully uploaded {f} to the bucket") if isinstance(file, str): # If we got a single file, return the first result. return r.json() else: results.append(r.json()) - else: - self.logger.error(f"HTTP status code {r.status_code}: {r.content}") return results @@ -762,7 +765,7 @@ def dataChanges_list(self, name=None, wid=None, activity_id=None, - limit=-1, offset=None, + limit=None, offset=None, type_="summary", search=False, refresh=False): @@ -776,19 +779,21 @@ def dataChanges_list(self, else: search_by_id = False - # We know what kind of list we want, add in the ability to - # search by name and pages. + logger.debug(f"get: {operation}") + + # We know what kind of list (all or specific DCT) we want, add in the + # ability to search by name and pages. if type_ and isinstance(type_, str): if type_ == "summary": operation += "?type=summary" elif type_ == "full": operation += "?type=full" else: - operation += "?type=summary" logger.warning(f'/dataChanges: invalid verbosity {type_} - defaulting to summary.') + operation += "?type=summary" else: - operation += "?type=summary" logger.warning("/dataChanges: invalid verbosity - defaulting to summary.") + operation += "?type=summary" logger.debug(f"dataChanges_activities_get: {operation}") @@ -800,7 +805,7 @@ def dataChanges_list(self, # result - even blank. if search_by_id: - response = self.get(url=url, log_tag="dataChanges") + response = self.http_get(url=url) if response.status_code == 200: return response.json() @@ -809,7 +814,7 @@ def dataChanges_list(self, # Get a list of tasks by page, with or without searching. - search_limit = 500 # Assume all DCTs should be returned + search_limit = 500 # Assume all DCTs should be returned - max API limit search_offset = 0 # API default value if limit is not None and isinstance(limit, int) and limit > 0: @@ -828,101 +833,92 @@ def dataChanges_list(self, search_limit = 500 search_offset = 0 else: - # Should return at most 1 result. - url += "&name=" + urllib.parse.quote(name) + # With an explicit name, we should return at most 1 result. + url += "&name=" + urlparse.quote(name) searching = False search_limit = 1 search_offset = 0 # Assume we will be looping based on limit and offset values; however, we may - # execute only once. + # execute only once. NOTE: this routine NEVER fails, but may return zero + # data change tasks. - dataChanges = {"total": 0, "data": []} + data_changes = {"total": 0, "data": []} while True: search_url = f"{url}&limit={search_limit}&offset={search_offset}" logger.debug(f"dataChangesID url: {search_url}") - response = self.get(url=search_url, log_tag=operation) + response = self.http_get(url=search_url) if response.status_code != 200: break - retJSON = response.json() + return_json = response.json() if searching: # Only add matching rows - dataChanges["data"] += \ + data_changes["data"] += \ filter(lambda dtc: dtc["name"].find(name) != -1 or dtc["displayName"].find(name) != -1, - retJSON["data"]) + return_json["data"]) else: # Without searching, simply paste the current page to the list. - dataChanges["data"] += retJSON["data"] + data_changes["data"] += return_json["data"] break - # If we didn't get a full page, then we done. - if len(retJSON["data"]) < search_limit: + # If we didn't get a full page, then we are done. + if len(return_json["data"]) < search_limit: break # Go to the next page. offset += search_limit - dataChanges["total"] = len(dataChanges["data"]) + data_changes["total"] = len(data_changes["data"]) - return dataChanges + return data_changes - def dataChanges_activities_get(self, data_change_id, activity_id): + def dataChanges_activitietrs_get(self, data_change_id, activity_id): operation = f"/dataChanges/{data_change_id}/activities/{activity_id}" logger.debug(f"dataChanges_activities_get: {operation}") + url = self.prism_endpoint + operation - r = self.get(self.prism_endpoint + operation) + r = self.http_get(url) if r.status_code == 200: - return json.loads(r.text) + return r.json() return None def dataChanges_activities_post(self, data_change_id, fileContainerID=None): operation = f"/dataChanges/{data_change_id}/activities" - logger.debug(f"dataChanges_activities_post: {operation}") - + logger.debug(f"post: {operation}") url = self.prism_endpoint + operation - headers = { - "Authorization": "Bearer " + self.get_bearer_token(), - "Content-Type": "application/json", - } - if fileContainerID is None: logger.debug("no file container ID") - data = None else: logger.debug("with file container ID: {fileContainerID") + # NOTE: the name is NOT correct based on the API definition data = json.dumps({"fileContainerWid": fileContainerID}) - r = requests.post(url, data=data, headers=headers) - log_elapsed(f"POST {operation}", r.elapsed) + r = self.http_post(url, headers=self.CONTENT_APP_JSON, data=data) if r.status_code == 201: - activityID = json.loads(r.text)["id"] + activity_id = r.json()["id"] - logging.debug(f"Successfully started data load task - id: {activityID}") - return activityID - elif r.status_code == 400: - logging.warning(r.json()["errors"][0]["error"]) - else: - logging.warning(f"HTTP status code {r.status_code}: {r.content}") + logger.debug(f"Successfully started data load task - id: {activity_id}") + return activity_id return None def dataChanges_by_name(self, data_change_name): logger.debug(f"data_changes_by_name: {data_change_name}") - data_changes_list = self.dataChanges_list() + data_changes_list = self.dataChanges_list(name=data_change_name) for data_change in data_changes_list: if data_change.get("displayName") == data_change_name: @@ -944,139 +940,139 @@ def dataChanges_by_id(self, data_change_id): headers = {"Authorization": "Bearer " + self.get_bearer_token()} - r = requests.get(url, headers=headers) - log_elapsed(logger, operation, r.elapsed) - r.raise_for_status() + r = self.http_get(url, headers=headers) if r.status_code == 200: logger.debug(f"Found data change task: id = {data_change_id}") return json.loads(r.text) - elif r.status_code == 400: - logger.warning(r.json()["errors"][0]["error"]) - else: - logger.warning(f"HTTP status code {r.status_code}: {r.content}") - return json.loads(r.text) + return None def dataChanges_is_valid(self, data_change_id): - dtc = self.dataChanges_validate(data_change_id) - - if dtc is None: - logger.critical(f"data_change_id {data_change_id} not found!") + dct = self.dataChanges_validate(data_change_id) + if dct is None: + logger.error(f"data_change_id {data_change_id} not found!") return False - if "error" in dtc: + if "error" in dct: logger.critical(f"data_change_id {data_change_id} is not valid!") - return False return True def dataChanges_validate(self, data_change_id): operation = f"/dataChanges/{data_change_id}/validate" - logger.debug(f"dataChanges_validate: GET {operation}") - + logger.debug(f"dataChanges_validate: get {operation}") url = self.prism_endpoint + operation - r = self.get(url) + r = self.http_get(url) - # If the DCT is invalid, the response will have the errors - # so we return the JSON no matter what. + if r.status_code == 200: + return r.json() - return json.loads(r.text) + return None def fileContainers_create(self): operation = "/fileContainers" - logger.debug(f"fileContainer_create: POST {operation}") - + logger.debug(f"fileContainer_create: post {operation}") url = self.prism_endpoint + operation - headers = {"Authorization": "Bearer " + self.get_bearer_token()} - - r = requests.post(url, headers=headers) - log_elapsed(f"POST {operation}", r.elapsed) + r = self.http_post(url) if r.status_code == 201: return_json = r.json() - fileContainerID = return_json["id"] - logger.debug(f"successfully created file container: {fileContainerID}") + file_container_id = return_json["id"] + logger.debug(f"successfully created file container: {file_container_id}") return return_json - elif r.status_code == 400: - logging.warning(r.json()["errors"][0]["error"]) - else: - logging.warning(f"HTTP status code {r.status_code}: {r.content}") return None - def fileContainers_list(self, fileContainerID): - operation = f"/fileContainers/{fileContainerID}/files" - logger.debug(f"fileContainers_list: GET {operation}") - + def fileContainers_list(self, filecontainerid): + """ + + :param filecontainerid: + :return: + """ + operation = f"/fileContainers/{filecontainerid}/files" + logger.debug(f"fileContainers_list: get {operation}") url = self.prism_endpoint + operation - r = self.get(url) + r = self.http_get(url) if r.status_code == 200: return r.json() - return None + if r.status_code == 404: + logger.warning("verify: Self-Service: Prism File Container domain in the Prism Analytics functional area") - def fileContainers_load(self, fileContainerID, fqfn): - # Do a sanity check and make sure the fqfn exists and - # has a gzip extension. - - if not os.path.isfile(fqfn): - logger.critical("file not found: {fqfn}") - return None + return [] # Always return a list. - # Create the file container and get the ID. We use the - # file container ID to load the file and then return the - # value to the caller for use in a data change call. + def fileContainers_load(self, fileContainerID, file): + """ + Load one or more files to a fileContainer. - if fileContainerID is None: - file_container_response = self.fileContainers_create() + :param fileContainerID: + :param file: + :return: + """ - if file_container_response is None: - return None + fid = fileContainerID # Create target fID from param - fID = file_container_response["id"] + # Convert a single filename to a list, so we can loop. + if isinstance(file, list): + files = file else: - fID = fileContainerID + files = [file] - print(self.fileContainers_list(fID)) + for file in files: + # Do a sanity check and make sure the fi exists and + # has a gzip extension. - # We have our container, load the file + if not os.path.isfile(file): + logger.warning(f"skipping - file not found: {file}") + continue # Next file... - headers = { - "Authorization": "Bearer " + self.get_bearer_token() - } + # It is legal to upload an empty file - see the table truncate method. + if file is None: + new_file = {"file": ("dummy", io.BytesIO())} + elif file.lower().endswith(".csv.gz"): + new_file = {"file": open(file, "rb")} + elif file.lower().endswith(".csv"): + with open(file, "rb") as in_file: + new_file = {"file": (file + ".gz", gzip.compress(in_file.read()))} - operation = f"/fileContainers/{fID}/files" - logger.debug(f"fileContainer_load: POST {operation}") + # Create the file container and get the ID. We use the + # file container ID to load the file and then return the + # value to the caller for use in a data change call. - files = {"file": open(fqfn, "rb")} + if fid is None: + # The caller is asking us to create a new container. + file_container_response = self.fileContainers_create() - url = self.prism_endpoint + operation + if file_container_response is None: + logger.error("Unable to create fileContainer") + return None - r = requests.post(url, files=files, headers=headers) - log_elapsed(f"POST {operation}", r.elapsed) + fid = file_container_response["id"] - if r.status_code == 201: - logging.info("successfully loaded fileContainer") + logger.debug(f"resolved fID: {fid}") - print(self.fileContainers_list(fID)) + # We have our container, load the file - return fID - elif r.status_code == 400: - logging.warning(r.json()["errors"][0]["error"]) - else: - logging.warning(f"HTTP status code {r.status_code}: {r.content}") + operation = f"/fileContainers/{fid}/files" + logger.debug(f"fileContainer_load: POST {operation}") + url = self.prism_endpoint + operation - return None + r = self.http_post(url, files=new_file) + + if r.status_code == 201: + logger.info(f"successfully loaded file to container: {file}") + + return fid def wql_dataSources(self, wid=None, limit=100, offset=0, dataSources_name=None, search=False): operation = "/dataSources" @@ -1087,7 +1083,7 @@ def wql_dataSources(self, wid=None, limit=100, offset=0, dataSources_name=None, return_sources = {"total": 0, "data": []} while True: - r = self.get(f"{url}?limit=100&offset={offset}") + r = self.http_get(f"{url}?limit=100&offset={offset}") if r.status_code == 200: ds = r.json() @@ -1108,38 +1104,46 @@ def wql_data(self, query, limit, offset): operation = "/data" url = f"{self.wql_endpoint}{operation}" - query_safe = urllib.parse.quote(query.strip()) + query_safe = urlparse.quote(query.strip()) - query_limit = limit if limit is not None else 100 + if limit is None or not isinstance(limit, int) or limit > 10000: + query_limit = 10000 + offset = 0 + else: + query_limit = limit + + offset = offset if offset is not None and isinstance(offset, int) else 0 - offset = 0 data = {"total": 0, "data": []} while True: - r = self.get(f"{url}?query={query_safe}&limit={query_limit}&offset={offset}") + r = self.http_get(f"{url}?query={query_safe}&limit={query_limit}&offset={offset}") if r.status_code == 200: - ds = r.json() - data["data"] += ds["data"] + page = r.json() + data["data"] += page["data"] else: - logger.error(f"Invalid WQL: {r.status_code}") - logger.error(r.text) - return data # Return whatever we have... - if len(ds["data"]) < 10000: + if len(page["data"]) < query_limit: break - offset += 100 + offset += query_limit + # Set the final row count. data["total"] = len(data["data"]) return data - def raas_run(self, report, system, user, params=None, format_=None): - if system: + def raas_run(self, report, user, params=None, format_=None): + """ + Run a Workday system or custom report. + """ + if user is None or not isinstance(user, str) or len(user) == 0: + logger.warning("generating delivered report (systemreport2).") url = f"{self.raas_endpoint}/systemreport2/{self.tenant_name}/{report}" else: + logger.debug(f"generating report as {user}.") url = f"{self.raas_endpoint}/customreport2/{self.tenant_name}/{user}/{report}" separator = "?" @@ -1151,37 +1155,15 @@ def raas_run(self, report, system, user, params=None, format_=None): separator = "&" url += query_str - if format: - if "?" in url: - url = f"{url}&format={format_}" - else: - url = f"{url}?format={format_}" - if url is None: - raise ValueError("RaaS URL is required") - else: - if url.find("format=") == -1: - output_format = "xml" - else: - output_format = url.split("format=")[1] + if format_: + url = f"{url}{separator}format={format_}" - headers = {"Accept": "text/csv"} - r = self.get(url, headers=headers) + r = self.http_get(url) if r.status_code == 200: - # if output_format == "json": - # return r.json()["Report_Entry"] - # elif output_format == "csv": - # return list(csv.reader(io.StringIO(r.content.decode("utf8")))) - # else: - # raise ValueError(f"Output format type {output_format} is unknown") return r.text else: logging.error("HTTP Error: {}".format(r.content.decode("utf-8"))) return None - - def is_valid_operation(self, operation): - operation_list = ["insert", "truncateandinsert", "delete", "upsert", "update"] - - return operation in operation_list From cd21a03d96cf784e1737a1066140e6f6d579e2ba Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Thu, 19 Oct 2023 12:50:47 -0400 Subject: [PATCH 019/103] Clean-up of DataChanges commands. --- prism/commands/dataChanges_commands.py | 133 ++++++++++++------------- prism/prism.py | 68 ++++++++++--- 2 files changed, 118 insertions(+), 83 deletions(-) diff --git a/prism/commands/dataChanges_commands.py b/prism/commands/dataChanges_commands.py index 14f8fb8..bc01835 100644 --- a/prism/commands/dataChanges_commands.py +++ b/prism/commands/dataChanges_commands.py @@ -9,76 +9,78 @@ @click.command("get") -@click.option("-n", "--name", - help="The name of a dataChangeID to list.") +@click.option("-n", "--isName", default=False, is_flag=True, + help="Flag to treat the dct argument as a name.") @click.option("-l", "--limit", default=-1, - help="The maximum number of object data entries included in the response, default=-1 (all).") + help="The maximum number of object data entries included in the response.") @click.option("-o", "--offset", default=0, help="The offset to the first object in a collection to include in the response.") @click.option("-t", "--type", "type_", default="summary", help="How much information to be returned in response JSON (default=summary).") -@click.option("-f", "--format", "format_", - default="full", +@click.option("-f", "--format", "format_", default="full", help="Format output as full, summary, schema, or CSV.", type=click.Choice(['full', 'summary', 'schema', 'csv'], case_sensitive=False)) @click.option("-s", "--search", is_flag=True, default=False, help="Use contains search substring for --name or --id (default=false).") -@click.argument("id", required=False) +@click.argument("dct", required=False) @click.pass_context -def dataChanges_get(ctx, name, id, limit, offset, type_, format_, search): - """ - View the data change tasks permitted by the security profile of the current user. +def dataChanges_get(ctx, isname, dct, limit, offset, type_, format_, search): + """View the data change tasks permitted by the security profile of the current user. - [ID] data change task to lists. + [dct] A reference to a Prism Analytics Data Change Task. """ p = ctx.obj["p"] - def output_dct(dct): - display_name = dct["displayName"] + def output_summary_dct(current_dct): + """Display the one-line summary version of a DCT.""" + display_name = current_dct["displayName"] - source_name = dct["source"]["sourceType"] - source_name += ": " + dct["source"]["name"] if "name" in dct["source"] else "" + source_name = current_dct["source"]["sourceType"] + source_name += ": " + current_dct["source"]["name"] if "name" in current_dct["source"] else "" - target_name = dct["target"]["name"] - operation = dct["operation"]["operationType"]["descriptor"] + target_name = current_dct["target"]["name"] + operation = current_dct["operation"]["operationType"]["descriptor"] logger.info(f"{display_name}, source: {source_name}, target: {target_name}, operation: {operation}") # Separate the get calls because an ID lookup returns a dict and a name lookup # always returns an object/list structure with zero or more matching DCTs. - if id is not None: - data_change_task = p.dataChanges_get(id=id, limit=limit, offset=offset, type_=type_) + if isname: + data_change_task = p.dataChanges_get(name=dct, limit=limit, offset=offset, search=search, type=type_) - if data_change_task is None: - logger.error(f'Data change task {id} not found.') + if data_change_task["total"] == 0: + logger.warning("No data change task(s) found.") sys.exit(1) - if format_ == 'summary': - output_dct(data_change_task) - elif format_ == 'csv': - df = pd.json_normalize(data_change_task) - logger.info(df.to_csv(index=False)) - else: - logger.info(json.dumps(data_change_task)) + # For display purposes, sort by display name (case-insensitive) + data_change_task["data"] = sorted( + data_change_task["data"], + key=lambda dct_srt: dct_srt["displayName"].lower()) else: - data_change_tasks = p.dataChanges_get(name=name, limit=limit, offset=offset, search=search, type=type_) + data_change_task = p.dataChanges_get(id=dct, limit=limit, offset=offset, type_=type_) - if data_change_tasks["total"] == 0: - logger.info("No data change task(s) found.") + if data_change_task is None: + logger.error(f'Data change task {dct} not found.') sys.exit(1) - # For display purposes, sort by display name (case-insensitive) - data_change_tasks["data"] = sorted(data_change_tasks["data"], key=lambda dct: dct["displayName"].lower()) - - # Handle output - if format_ == "summary": - for dct in data_change_tasks["data"]: - output_dct(dct) - elif format_ == "csv": - df = pd.json_normalize(data_change_tasks["data"]) - logger.info(df.to_csv(index=False)) + if format_ == "summary": + if 'total' in data_change_task: + for dct_item in data_change_task["data"]: + output_summary_dct(dct_item) + else: + output_summary_dct(data_change_task) + elif format_ == "csv": + if 'data' in data_change_task: + df = pd.json_normalize(data_change_task["data"]) + else: + df = pd.json_normalize(data_change_task) + + logger.info(df.to_csv(index=False)) + else: + if 'data' in data_change_task: + logger.info(json.dumps(data_change_task["data"], indent=2)) else: - logger.info(json.dumps(data_change_tasks["data"], indent=2)) + logger.info(json.dumps(data_change_task, indent=2)) @click.command("validate") @@ -97,8 +99,6 @@ def dataChanges_validate(ctx, isname, dct, search): p = ctx.obj["p"] - # See if we have any matching data change tasks. - # Note: datachanges_get never fails - errors may appear in the log if not isname: validate = p.dataChanges_validate(id) logger.info(json.dumps(validate, indent=2)) @@ -115,6 +115,7 @@ def dataChanges_validate(ctx, isname, dct, search): validate = p.dataChanges_validate(dct["id"]) if 'error' in validate: + # Add identifying attributes to the error message. validate['id'] = dct['id'] validate['descriptor'] = dct['displayName'] @@ -130,8 +131,7 @@ def dataChanges_validate(ctx, isname, dct, search): @click.argument('fid', required=False) @click.pass_context def dataChanges_run(ctx, dct, fid, isname): - """ - Execute the named data change task with an optional file container. + """Execute the named data change task with an optional file container. [DCT] A reference to a Prism Analytics data change. [FID] An optional reference to a Prism Analytics file container. @@ -140,7 +140,7 @@ def dataChanges_run(ctx, dct, fid, isname): p = ctx.obj["p"] if isname: - # See if we have any matching data change task. + # See if we have any matching data change task by name (with minor clean-up). data_changes = p.dataChanges_get(name=dct.replace(" ", "_")) if data_changes["total"] != 1: @@ -158,10 +158,8 @@ def dataChanges_run(ctx, dct, fid, isname): logger.error("Failed to run data change task - please review the log.") sys.exit(1) - if 'error' in activity: - logger.error("Error running data change task.") - else: - logger.info(activity) + # Output the results - could be the new activity id or an error message. + logger.info(json.dumps(activity, indent=2)) @click.command("activities") @@ -173,8 +171,7 @@ def dataChanges_run(ctx, dct, fid, isname): @click.argument("activityID", required=True) @click.pass_context def dataChanges_activities(ctx, dct, activityid, status, isname): - """ - Get the status for a specific activity associated with a data change task. + """Get the status for a specific activity associated with a data change task. [ID] A reference to a data change task. [ACTIVITYID] A reference to a data change task activity. @@ -197,13 +194,13 @@ def dataChanges_activities(ctx, dct, activityid, status, isname): current_status = p.dataChanges_activities_get(dct_id, activityid) if current_status is None: - logger.info("Activity for DCT not found.") + logger.error("Activity for DCT not found.") sys.exit(1) else: if status: logger.info(current_status["state"]["descriptor"]) else: - logger.info(current_status) + logger.info(json.dumps(current_status, indent=2)) @click.command("upload") @@ -214,20 +211,17 @@ def dataChanges_activities(ctx, dct, activityid, status, isname): @click.option("-v", "--verbose", default=False, is_flag=True, help="Display additional information.") @click.argument("dct", required=True) -@click.argument("file", nargs=-1, type=click.Path(exists=True)) +@click.argument("file", required=True, nargs=-1, type=click.Path(exists=True)) @click.pass_context def dataChanges_upload(ctx, isname, dct, file, wait, verbose): - """ - Validate the data change specified by name or ID. + """Execute a data change task using the provided file(s). - [dct] A reference to a Prism Analytics Data Change Task. + [DCT] A reference to a Prism Analytics Data Change Task. + [FILE] One or more .CSV or .CSV.GZ files. """ p = ctx.obj["p"] - if len(file) == 0: - logger.error('One or more files must be specified.') - if isname: data_change_tasks = p.dataChanges_get(name=dct) @@ -239,6 +233,7 @@ def dataChanges_upload(ctx, isname, dct, file, wait, verbose): else: dct_id = dct + # Specifying None for the ID to create a new file container. file_container = p.fileContainers_load(id=None, file=file) if file_container['total'] == 0: @@ -247,17 +242,20 @@ def dataChanges_upload(ctx, isname, dct, file, wait, verbose): fid = file_container['id'] + # Execute the DCT. activity = p.dataChanges_activities_post(id=dct_id, fileContainerID=fid) + if 'errors' in activity: + # Add the ID of the DCT for easy identification. + activity['id'] = dct_id + + logger.error(json.dumps(activity, indent=2)) + + sys.exit(1) + if not wait: logger.info(json.dumps(activity, indent=2)) else: - if 'errors' in activity: - activity['id'] = dct_id - logger.error(json.dumps(activity, indent=2)) - - sys.exit(1) - activity_id = activity['id'] while True: @@ -273,4 +271,5 @@ def dataChanges_upload(ctx, isname, dct, file, wait, verbose): if status not in ['New', 'Queued', 'Processing', 'Loading']: break + # Output the final status of the activity. logger.info(json.dumps(activity, indent=2)) diff --git a/prism/prism.py b/prism/prism.py index 1cf9ecb..5e111e2 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -1388,36 +1388,70 @@ def fileContainers_load(self, id, file): return results - def wql_dataSources(self, id=None, limit=100, offset=0, name=None, search=False): - operation = "/dataSources" - logger.debug("wql_dataSources: {operation}") - url = f"{self.wql_endpoint}{operation}" + def wql_dataSources(self, id=None, alias=None, limit=100, offset=0, search=False): + operation = '/dataSources' + + if id is not None: + operation = f'{operation}/{id}' + logger.debug('wql_dataSources: {operation}') + url = f'{self.prism_endpoint}{operation}' + + response = self.http_get(url) + + return response.json() + + if limit is None: + logger.debug('wql_dataSources: {operation}') + url = f'{self.wql_endpoint}{operation}' + + # Always return a valid list - even if empty. + return_sources = {'total': 0, 'data': []} offset = 0 - return_sources = {"total": 0, "data": []} while True: - r = self.http_get(f"{url}?limit=100&offset={offset}") + r = self.http_get(f'{url}?limit=100&offset={offset}') if r.status_code == 200: ds = r.json() - return_sources["data"] += ds["data"] + + # Add this page to the final output. + return_sources['data'] += ds['data'] else: return None - if len(ds["data"]) < 100: + if len(ds['data']) < 100: + # A page size less than the limit means we are done. break offset += 100 + # Fix-up the final total of sources. return_sources["total"] = len(return_sources["data"]) return return_sources def wql_data(self, query, limit, offset): - operation = "/data" + """Returns the data from a WQL query. + + Parameters + ---------- + query : str + The WQL query that retrieves the data. + limit: int + The maximum number of objects in a single response - maximum 10,000. + offset: int + The zero-based index of the first object in a response collection. + + Returns + ------- + dict + Returned dict with a "total" row count attribute and a "data" + array of rows. + """ + operation = '/data' - url = f"{self.wql_endpoint}{operation}" + url = f'{self.wql_endpoint}{operation}' query_safe = urlparse.quote(query.strip()) if limit is None or not isinstance(limit, int) or limit > 10000: @@ -1428,24 +1462,26 @@ def wql_data(self, query, limit, offset): offset = offset if offset is not None and isinstance(offset, int) else 0 - data = {"total": 0, "data": []} + # Always return a valid object - even if no rows are returned. + data = {'total': 0, 'data': []} while True: - r = self.http_get(f"{url}?query={query_safe}&limit={query_limit}&offset={offset}") + r = self.http_get(f'{url}?query={query_safe}&limit={query_limit}&offset={offset}') if r.status_code == 200: page = r.json() - data["data"] += page["data"] + data['data'] += page['data'] else: - return data # Return whatever we have... + # There was a problem, return whatever we have... + return data - if len(page["data"]) < query_limit: + if len(page['data']) < query_limit: break offset += query_limit # Set the final row count. - data["total"] = len(data["data"]) + data['total'] = len(data['data']) return data From f3c5401ea15bff9f96ce1aba437f80062af10660 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Fri, 20 Oct 2023 08:55:56 -0400 Subject: [PATCH 020/103] Add/enhance WQL - select * expansion --- prism/cli.py | 1 + prism/commands/wql_commands.py | 147 +++++++++++++++++++++++++++++---- prism/prism.py | 133 +++++++++++++++++++++++++---- 3 files changed, 249 insertions(+), 32 deletions(-) diff --git a/prism/cli.py b/prism/cli.py index 6c8dd30..2deed62 100644 --- a/prism/cli.py +++ b/prism/cli.py @@ -227,6 +227,7 @@ def wql(): wql.add_command(w_commands.dataSources) +wql.add_command(w_commands.dataSources_fields) wql.add_command(w_commands.data) diff --git a/prism/commands/wql_commands.py b/prism/commands/wql_commands.py index 0b096f0..4fd1838 100644 --- a/prism/commands/wql_commands.py +++ b/prism/commands/wql_commands.py @@ -1,27 +1,91 @@ import click import json import sys +import logging +import re import pandas as pd +logger = logging.getLogger('prismCLI') + @click.command("dataSources") -@click.option("-w", "--wid", - help="The Workday ID of the data source.") -@click.option("-l", "--limit", default=None, +@click.option("-a", "--alias", default=None, type=str, + help="The alias of the data source.") +@click.option("-s", "--searchString", default=None, type=str, + help="The string to be searched in case insensitive manner within the descriptors of the data sources.") +@click.option("-l", "--limit", default=None, type=int, help="The maximum number of object data entries included in the response, default=-1 (all).") -@click.option("-o", "--offset", default=None, +@click.option("-o", "--offset", default=None, type=int, help="The offset to the first object in a collection to include in the response.") -@click.option("-s", "--search", is_flag=True, show_default=True, default=False, - help="Use contains search substring for --table_name or --id.") -@click.argument("name", required=False) +@click.option('-f', '--format', 'format_', default='json', + type=click.Choice(['json', 'tabular'], case_sensitive=True), + help='Gets the fields you have access to in the given data source.') +@click.argument("dataSource", required=False) @click.pass_context -def dataSources(ctx, wid, limit, offset, search, name): - """Returns a collection of data sources (/dataSources) for use in a WQL query.""" +def dataSources(ctx, alias, searchstring, limit, offset, format_, datasource): + """Returns a collection of data sources (/dataSources) for use in a WQL query. + + [DATASOURCE] The Workday ID of the resource. + """ p = ctx.obj["p"] - ds = p.wql_dataSources(wid, limit, offset, name, search) + if datasource is not None: + data_sources = p.wql_dataSources(id=datasource, limit=limit, offset=offset) + else: + data_sources = p.wql_dataSources(alias=alias, searchString=searchstring, limit=limit, offset=offset) + + if format_ == 'json': + logger.info(json.dumps(data_sources, indent=2)) + else: + df = pd.json_normalize(data_sources['data']) + logger.info(df.to_csv(index=False)) + + +@click.command("fields") +@click.option("-d", "--sourceSearch", is_flag=True, default=False, + help="The alias of the data source.") +@click.option("-a", "--alias", default=None, type=str, + help="The alias of the data source.") +@click.option("-s", "--searchString", default=None, type=str, + help="The string to be searched in case insensitive manner within the descriptors of the data sources.") +@click.option("-l", "--limit", default=None, type=int, + help="The maximum number of object data entries included in the response.") +@click.option("-o", "--offset", default=None, type=int, + help="The offset to the first object in a collection to include in the response.") +@click.option('-f', '--format', 'format_', default='json', + type=click.Choice(['json', 'tabular'], case_sensitive=True), + help='Gets the fields you have access to in the given data source.') +@click.argument("dataSource", required=True) +@click.pass_context +def dataSources_fields(ctx, sourcesearch, alias, searchstring, limit, offset, format_, datasource): + """Returns a collection of data sources (/dataSources) for use in a WQL query. + + [DATASOURCE] The Workday ID of the resource. + """ + p = ctx.obj["p"] + + if sourcesearch: + data_sources = p.wql_dataSources(alias=datasource, searchString=datasource, limit=1, offset=0) + + if data_sources['total'] != 1: + logger.error(f'Unexpected number of data sources: {data_sources["total"]}') + sys.exit(1) + + ds_id = data_sources['data'][0]['id'] + else: + ds_id = datasource + + fields = p.wql_dataSources_fields(id=ds_id, alias=alias, searchString=searchstring, limit=limit, offset=offset) - click.echo(json.dumps(ds, indent=2)) + if fields['total'] == 0: + logger.error('No WQL fields found.') + sys.exit(1) + + if format_ == 'json': + logger.info(json.dumps(fields, indent=2)) + else: + df = pd.json_normalize(fields['data']) + logger.info(df.to_csv(index=False)) @click.command("data") @@ -29,34 +93,83 @@ def dataSources(ctx, wid, limit, offset, search, name): help="The maximum number of object data entries included in the response, default=-1 (all).") @click.option("-o", "--offset", default=None, help="The offset to the first object in a collection to include in the response.") -@click.option("-f", "--file", "wql_file", default=None, type=click.Path(exists=True), +@click.option("-w", "--wql-file", "wql_file", default=None, type=click.Path(exists=True), help="Filename containing a WQL query.") -@click.option("-c", "--as_csv", "as_csv", is_flag=True, show_default=True, default=False, - help="Output query results as CSV.") +@click.option('-f', '--format', 'format_', default='json', + type=click.Choice(['json', 'tabular'], case_sensitive=True), + help='Gets the fields you have access to in the given data source.') @click.argument("query", required=False) @click.pass_context -def data(ctx, limit, offset, wql_file, as_csv, query): +def data(ctx, limit, offset, wql_file, format_, query): """ Returns the data from a WQL query. [QUERY] WQL query string to execute (/data). + + Note: specify "select *" to automatically expand the column list. """ p = ctx.obj["p"] if wql_file is None and query is None: - click.echo("No query provided") + click.echo("No query provided.") sys.exit(1) if query is not None: + # Passed as an explicit string. query_resolved = query else: + # Passed as a file name. with open(wql_file) as file: query_resolved = file.read().replace('\n', ' ') + query_resolved = query_resolved.strip() + + # If the WQL statements starts with exactly "select *", attempt + # to replace the asterick with the field list. + + if query_resolved.lower().startswith('select *'): + # Locate the "FROM {ds}" clause to get the data source name. + + # To query data from a data source: + # FROM dataSourceAlias + # To query data from a data source with a data source filter: + # FROM dataSourceAlias(dataSourceFilter=filterAlias, filterPrompt1=value1, filterPrompt2=value2) + # To query data from a data source using entry and effective date filters: + # FROM dataSourceAlias(effectiveAsOfDate=date, entryMoment=dateTime) + + from_regex = re.compile(r'\s+from[\s+|\(](\w+)', flags=re.IGNORECASE) + from_clause = from_regex.search(query_resolved) + + ds_alias = query_resolved[from_clause.start(1):from_clause.end(1)] + logger.debug(f'Detected data source: {ds_alias}.') + + ds = p.wql_dataSources(alias=ds_alias, limit=1) + + if ds['total'] != 1: + logger.error(f'Data source {ds_alias} not found.') + sys.exit(1) + + ds_id = ds['data'][0]['id'] + + fields = p.wql_dataSources_fields(id=ds_id) # No limit gets all fields. + + if fields['total'] == 0: + logger.error(f'No fields found for {ds_alias}.') + sys.exit(1) + + columns = '' + comma = '' + + for field in fields['data']: + columns += comma + field['alias'] + comma = ',' + + query_resolved = query_resolved.replace('*', columns, 1) + rows = p.wql_data(query_resolved, limit, offset) if rows["total"] != 0: - if as_csv: + if format_ == 'tabular': df = pd.json_normalize(rows["data"]) click.echo(df.to_csv(index=False)) else: diff --git a/prism/prism.py b/prism/prism.py index 5e111e2..8bc08f7 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -1388,49 +1388,152 @@ def fileContainers_load(self, id, file): return results - def wql_dataSources(self, id=None, alias=None, limit=100, offset=0, search=False): + def wql_dataSources(self, id=None, alias=None, searchString=None, limit=None, offset=None): operation = '/dataSources' if id is not None: operation = f'{operation}/{id}' logger.debug('wql_dataSources: {operation}') - url = f'{self.prism_endpoint}{operation}' + url = f'{self.wql_endpoint}{operation}' response = self.http_get(url) return response.json() - if limit is None: - logger.debug('wql_dataSources: {operation}') - url = f'{self.wql_endpoint}{operation}' + url_separator = '?' + + if alias is not None: + operation += f'?alias={urlparse.quote(alias)}' + url_separator = '&' + elif searchString is not None: + operation += f'?searchString={urlparse.quote(searchString)}' + url_separator = '&' + + logger.debug('wql_dataSources: {operation}') + url = f'{self.wql_endpoint}{operation}' # Always return a valid list - even if empty. return_sources = {'total': 0, 'data': []} - offset = 0 + if limit is not None and isinstance(limit, int) and 0 < limit <= 100: + return_all = False + + query_limit = limit + + if offset is not None and isinstance(offset, int) and offset > 0: + query_offset = offset + else: + query_offset = 0 + else: + return_all = True + query_limit = 100 + query_offset = 0 + + # Assume we'll loop over more than one page. while True: - r = self.http_get(f'{url}?limit=100&offset={offset}') + r = self.http_get(f'{url}{url_separator}limit={query_limit}&offset={query_offset}') - if r.status_code == 200: - ds = r.json() + if r.status_code != 200: + break - # Add this page to the final output. - return_sources['data'] += ds['data'] - else: - return None + ds = r.json() + + # Add this page to the final output. + return_sources['data'] += ds['data'] + + if not return_all: + break - if len(ds['data']) < 100: + if len(ds['data']) < query_limit: # A page size less than the limit means we are done. break - offset += 100 + query_offset += query_limit # Fix-up the final total of sources. return_sources["total"] = len(return_sources["data"]) return return_sources + def wql_dataSources_fields(self, id=None, alias=None, searchString=None, limit=None, offset=None): + """Retrieves a field of the data source instance. + + Parameters + ---------- + id : str + The Workday ID of the resource. + alias : str + The alias of the data source field. + searchString : str + The string to be searched in case-insensitive manner within the descriptors of the data source fields. + limit : int + The maximum number of objects in a single response. The default is 20, the maximum is 100, and None is all. + offset : int + The zero-based index of the first object in a response collection. + operation = '/dataSources' + """ + + if id is None: + return None + + operation = f'/dataSources/{id}/fields' + logger.debug('wql_dataSources_fields: {operation}') + url = f'{self.wql_endpoint}{operation}' + + url_separator = '?' + + if alias is not None: + operation += f'?alias={urlparse.quote(alias)}' + url_separator = '&' + + if searchString is not None: + operation += f'{url_separator}searchString={urlparse.quote(searchString)}' + url_separator = '&' + + if limit is not None and isinstance(limit, int) and 0 < limit <= 100: + return_all = False + + query_limit = limit + + if offset is not None and isinstance(offset, int) and offset > 0: + query_offset = offset + else: + query_offset = 0 + else: + return_all = True + + query_limit = 100 + query_offset = 0 + + return_fields = {'total':0, 'data':[]} + + while True: + url = f'{url}{url_separator}limit={query_limit}&offset={query_offset}' + + response = self.http_get(url) + + if response.status_code != 200: + break + + fields = response.json() + + # Add this page of fields to the final output. + return_fields['data'] += fields['data'] + + if not return_all: + break + + if len(fields['data']) < query_limit: + # A page size less than the limit means we are done. + break + + query_offset += query_limit + + return_fields['total'] = len(return_fields['data']) + + return return_fields + def wql_data(self, query, limit, offset): """Returns the data from a WQL query. From a8462be0188966fb3acf2cad0a3cf448ec1b4c69 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Fri, 20 Oct 2023 09:05:23 -0400 Subject: [PATCH 021/103] Docstring updates --- prism/commands/wql_commands.py | 4 ++-- prism/prism.py | 27 ++++++++++++++------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/prism/commands/wql_commands.py b/prism/commands/wql_commands.py index 4fd1838..233c0ed 100644 --- a/prism/commands/wql_commands.py +++ b/prism/commands/wql_commands.py @@ -125,7 +125,7 @@ def data(ctx, limit, offset, wql_file, format_, query): query_resolved = query_resolved.strip() # If the WQL statements starts with exactly "select *", attempt - # to replace the asterick with the field list. + # to replace the asterisk with the field list. if query_resolved.lower().startswith('select *'): # Locate the "FROM {ds}" clause to get the data source name. @@ -137,7 +137,7 @@ def data(ctx, limit, offset, wql_file, format_, query): # To query data from a data source using entry and effective date filters: # FROM dataSourceAlias(effectiveAsOfDate=date, entryMoment=dateTime) - from_regex = re.compile(r'\s+from[\s+|\(](\w+)', flags=re.IGNORECASE) + from_regex = re.compile(r'\s+from[\s*|(](\w+)', flags=re.IGNORECASE) from_clause = from_regex.search(query_resolved) ds_alias = query_resolved[from_clause.start(1):from_clause.end(1)] diff --git a/prism/prism.py b/prism/prism.py index 8bc08f7..dc367be 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -1039,16 +1039,6 @@ def dataChanges_get(self, type_='summary', search=False, refresh=False): """ - - :param name: - :param wid: - :param activity_id: - :param limit: - :param offset: - :param type_: - :param search: - :param refresh: - :return: """ # We are doing a dataChanges GET operation. operation = "/dataChanges" @@ -1124,7 +1114,7 @@ def dataChanges_get(self, # Only add matching rows data_changes["data"] += \ filter(lambda dtc: dtc["name"].find(name) != -1 or - dtc["displayName"].find(name) != -1, + dtc["displayName"].find(name) != -1, return_json["data"]) else: # Without searching, simply paste the current page to the list. @@ -1148,7 +1138,8 @@ def dataChanges_activities_get(self, id, activityID): Parameters ---------- id : str - A reference to a Prism Analytics data change.""" + A reference to a Prism Analytics data change. + """ operation = f"/dataChanges/{id}/activities/{activityID}" logger.debug(f"dataChanges_activities_get: {operation}") url = self.prism_endpoint + operation @@ -1389,6 +1380,16 @@ def fileContainers_load(self, id, file): return results def wql_dataSources(self, id=None, alias=None, searchString=None, limit=None, offset=None): + """Returns a collection of data sources for use in a WQL query. + + Parameters + ---------- + id : str + The ID of a Workday data source. + alias : str + Filters by alias match + searchString : str + """ operation = '/dataSources' if id is not None: @@ -1549,7 +1550,7 @@ def wql_data(self, query, limit, offset): Returns ------- dict - Returned dict with a "total" row count attribute and a "data" + Returns a dict with a "total" row count attribute and a "data" array of rows. """ operation = '/data' From deca3a68f44f9c995e7ec44399243aaa4dccf24c Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Sat, 21 Oct 2023 15:42:05 -0400 Subject: [PATCH 022/103] Updates for logging and install. --- prism/cli.py | 35 ++++++++++++++--------- prism/commands/__init__.py | 0 prism/commands/buckets_commands.py | 2 +- prism/commands/dataChanges_commands.py | 12 +++++--- prism/commands/fileContainers_commands.py | 16 ++++------- prism/commands/raas_commands.py | 2 +- prism/commands/wql_commands.py | 13 +++++++-- prism/prism.py | 21 ++++++++------ 8 files changed, 61 insertions(+), 40 deletions(-) create mode 100644 prism/commands/__init__.py diff --git a/prism/cli.py b/prism/cli.py index 2deed62..da3c638 100644 --- a/prism/cli.py +++ b/prism/cli.py @@ -106,32 +106,41 @@ def cli(ctx, if log_level is None: set_level = logging.INFO else: - set_level = getattr(logging, log_level) + set_level = getattr(logging, log_level) # Translate text level to level value. # Setup logging for CLI operations. logger = logging.getLogger('prismCLI') logger.setLevel(set_level) + # Create an explicit console handler to handle just INFO message, i.e., + # script output. + formatter = logging.Formatter('%(message)s') + + ch = logging.StreamHandler() + ch.setFormatter(formatter) + ch.setLevel(logging.INFO) + logger.addHandler(ch) + + # If the log level is not INFO, create a separate stream + # for logging additional levels. + logging_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + formatter = logging.Formatter(logging_format) + + if set_level != logging.INFO: + other_handler = logging.StreamHandler() + other_handler.setFormatter(formatter) + other_handler.setLevel(set_level) + logger.addHandler(other_handler) + # Create a handler as specified by the user (or defaults) if log_file is not None: + # If a log file was specified, log EVERYTHING to the log. fh = logging.FileHandler(log_file) - - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) - fh.setLevel(set_level) logger.addHandler(fh) - # Create an explicit console handler with a higher log level - ch = logging.StreamHandler() - - formatter = logging.Formatter('%(message)s') - ch.setFormatter(formatter) - - ch.setLevel(logging.INFO) - logger.addHandler(ch) - logger.debug("completed initialization.") # initialize the Prism class from our resolved configuration. diff --git a/prism/commands/__init__.py b/prism/commands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/prism/commands/buckets_commands.py b/prism/commands/buckets_commands.py index ce3e4f5..be853e0 100644 --- a/prism/commands/buckets_commands.py +++ b/prism/commands/buckets_commands.py @@ -48,7 +48,7 @@ def buckets_get(ctx, id, table_name, limit, offset, type_, search, format_, name target = bucket["targetDataset"]["descriptor"] state = bucket["state"]["descriptor"] - click.echo(f"{display_name}, operation: {operation}, target: {target}, state: {state}") + logger.info(f"{display_name}, operation: {operation}, target: {target}, state: {state}") elif format_ == "csv": df = pd.json_normalize(buckets["data"]) logger.info(df.to_csv(index=False)) diff --git a/prism/commands/dataChanges_commands.py b/prism/commands/dataChanges_commands.py index bc01835..57fb2a7 100644 --- a/prism/commands/dataChanges_commands.py +++ b/prism/commands/dataChanges_commands.py @@ -144,10 +144,11 @@ def dataChanges_run(ctx, dct, fid, isname): data_changes = p.dataChanges_get(name=dct.replace(" ", "_")) if data_changes["total"] != 1: - click.echo(f"Data change task not found: {dct}") + logger.error(f"Data change task not found: {dct}") sys.exit(1) dct_id = data_changes["data"][0]["id"] + logger.debug(f'resolved ID: {dct_id}') else: dct_id = dct @@ -173,7 +174,7 @@ def dataChanges_run(ctx, dct, fid, isname): def dataChanges_activities(ctx, dct, activityid, status, isname): """Get the status for a specific activity associated with a data change task. - [ID] A reference to a data change task. + [DCT] A reference to a data change task. [ACTIVITYID] A reference to a data change task activity. """ @@ -184,10 +185,11 @@ def dataChanges_activities(ctx, dct, activityid, status, isname): data_changes = p.dataChanges_list(name=dct.replace(" ", "_")) if data_changes["total"] != 1: - click.echo(f"Data change task not found: {dct}") + logger.error(f"Data change task not found: {dct}") sys.exit(1) dct_id = data_changes["data"][0]["id"] + logger.debug(f'resolved ID: {dct_id}') else: dct_id = dct @@ -216,7 +218,7 @@ def dataChanges_activities(ctx, dct, activityid, status, isname): def dataChanges_upload(ctx, isname, dct, file, wait, verbose): """Execute a data change task using the provided file(s). - [DCT] A reference to a Prism Analytics Data Change Task. + [DCT] A reference to a Prism Analytics Data Change Task. [FILE] One or more .CSV or .CSV.GZ files. """ @@ -230,6 +232,7 @@ def dataChanges_upload(ctx, isname, dct, file, wait, verbose): sys.exit(1) dct_id = data_change_tasks['data'][0]['id'] + logger.debug(f'resolved ID: {dct_id}') else: dct_id = dct @@ -241,6 +244,7 @@ def dataChanges_upload(ctx, isname, dct, file, wait, verbose): sys.exit(1) fid = file_container['id'] + logger.debug(f'new file container ID: {fid}') # Execute the DCT. activity = p.dataChanges_activities_post(id=dct_id, fileContainerID=fid) diff --git a/prism/commands/fileContainers_commands.py b/prism/commands/fileContainers_commands.py index 5ac9588..bd920a8 100644 --- a/prism/commands/fileContainers_commands.py +++ b/prism/commands/fileContainers_commands.py @@ -9,20 +9,16 @@ @click.command("create") @click.pass_context def fileContainers_create(ctx): - """Create a new fileContainers object returning the ID. + """Create a new fileContainers object returning the ID.""" - Returns - ------- - str - File container ID of the new container. - """ p = ctx.obj["p"] file_container = p.fileContainers_create() if file_container is not None: - logger.info(file_container["id"]) + logger.info(json.dumps(file_container, indent=2)) else: + logger.error('Error creating file container.') sys.exit(1) @@ -48,14 +44,14 @@ def fileContainers_get(ctx, id): @click.argument("file", nargs=-1, type=click.Path(exists=True)) @click.pass_context def fileContainers_load(ctx, id, file): - """ - Load one or more files into a file container returning the container ID. + """Load one or more files into a file container returning the container ID. - [FILE] one or more CSV or GZipped CSV files to load. + [FILE] one or more CSV or GZipped CSV (.csv.gz) files to load. """ if len(file) == 0: # Click gives a tuple - even if no files included logger.error("One or more files must be specified.") + sys.exit(1) p = ctx.obj["p"] diff --git a/prism/commands/raas_commands.py b/prism/commands/raas_commands.py index 3d8eff8..e3567a9 100644 --- a/prism/commands/raas_commands.py +++ b/prism/commands/raas_commands.py @@ -23,4 +23,4 @@ def run(ctx, user, format_, report, params): report_output = p.raas_run(report, user, params, format_) # Don't log the output - pusht - click.echo(report_output) + logger.info(report_output) diff --git a/prism/commands/wql_commands.py b/prism/commands/wql_commands.py index 233c0ed..b046cba 100644 --- a/prism/commands/wql_commands.py +++ b/prism/commands/wql_commands.py @@ -111,7 +111,7 @@ def data(ctx, limit, offset, wql_file, format_, query): p = ctx.obj["p"] if wql_file is None and query is None: - click.echo("No query provided.") + logger.error("No query provided.") sys.exit(1) if query is not None: @@ -123,13 +123,18 @@ def data(ctx, limit, offset, wql_file, format_, query): query_resolved = file.read().replace('\n', ' ') query_resolved = query_resolved.strip() + logger.debug(f'resolved query: {query_resolved}') # If the WQL statements starts with exactly "select *", attempt # to replace the asterisk with the field list. if query_resolved.lower().startswith('select *'): + logger.debug('wql "select *" detected.') + # Locate the "FROM {ds}" clause to get the data source name. + # Notes from documentation: + # # To query data from a data source: # FROM dataSourceAlias # To query data from a data source with a data source filter: @@ -150,6 +155,7 @@ def data(ctx, limit, offset, wql_file, format_, query): sys.exit(1) ds_id = ds['data'][0]['id'] + logger.debug(f'alias {ds_alias} resolved: {ds_id}') fields = p.wql_dataSources_fields(id=ds_id) # No limit gets all fields. @@ -165,12 +171,13 @@ def data(ctx, limit, offset, wql_file, format_, query): comma = ',' query_resolved = query_resolved.replace('*', columns, 1) + logger.debug(f'resolved WQL: {query_resolved}') rows = p.wql_data(query_resolved, limit, offset) if rows["total"] != 0: if format_ == 'tabular': df = pd.json_normalize(rows["data"]) - click.echo(df.to_csv(index=False)) + logger.info(df.to_csv(index=False)) else: - click.echo(json.dumps(rows, indent=2)) + logger.info(json.dumps(rows, indent=2)) diff --git a/prism/prism.py b/prism/prism.py index dc367be..700bfef 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -43,15 +43,17 @@ def set_logging(log_file=None, log_level="INFO"): if log_level is None: set_level = logging.INFO else: - # Make sure the caller gave us a valid "name" for logging level. + # Make sure the caller gave us a valid "name" (INFO/DEBUG/etc) for logging level. if hasattr(logging, log_level): set_level = getattr(logging, log_level) else: - set_level = getattr(logging, "INFO") + set_level = logging.INFO # If no file was specified, simply loop over any handlers and # set the logging level. if log_file is None: + logger.setLevel(set_level) + for log_handler in logger.handlers: log_handler.setLevel(set_level) else: @@ -1186,8 +1188,8 @@ def dataChanges_activities_post(self, id, fileContainerID=None): logger.debug(f"successfully started data load task - id: {activity_id}") return return_json elif r.status_code == 400: - logger.debug(f'error running data change task.') - return r.json() + logger.error(f'error running data change task.') + return r.json() # This is still valid JSON with the error. return None @@ -1212,9 +1214,11 @@ def dataChanges_is_valid(self, id): return False if "error" in dct: - logger.critical(f"data_change_id {id} is not valid!") + logger.error(f"data_change_id {id} is not valid!") return False + # There is no specific status value to check, we simply get + # a small JSON object with the ID of the DCT if it is valid. return True def dataChanges_validate(self, id): @@ -1234,7 +1238,8 @@ def dataChanges_validate(self, id): r = self.http_get(url) - if r.status_code in [ 200, 400, 404]: + if r.status_code in [200, 400, 404]: + # For these status codes, simply return what we got. return r.json() return None @@ -1394,7 +1399,7 @@ def wql_dataSources(self, id=None, alias=None, searchString=None, limit=None, of if id is not None: operation = f'{operation}/{id}' - logger.debug('wql_dataSources: {operation}') + logger.debug(f'wql_dataSources: {operation}') url = f'{self.wql_endpoint}{operation}' response = self.http_get(url) @@ -1410,7 +1415,7 @@ def wql_dataSources(self, id=None, alias=None, searchString=None, limit=None, of operation += f'?searchString={urlparse.quote(searchString)}' url_separator = '&' - logger.debug('wql_dataSources: {operation}') + logger.debug(f'wql_dataSources: {operation}') url = f'{self.wql_endpoint}{operation}' # Always return a valid list - even if empty. From 7c7d4643e8b5645e446c8ac184753f6a44577ae0 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Sat, 21 Oct 2023 15:59:20 -0400 Subject: [PATCH 023/103] Updates for install. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e2ed2a1..b058c03 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ author_email="CurtLHampton@gmail.com", url="https://github.com/Workday/prism-python", packages=["prism"], - package_data={"prism": ["data/*"]}, + package_data={"prism": ["data/*", "commands/*"]}, entry_points={"console_scripts": ["prism=prism.cli:main"]}, install_requires=requirements, extras_require={"dev": ["pytest"]}, From 43247b146f6c664547b58950eb218d9c36e33a4e Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Sat, 21 Oct 2023 19:01:33 -0400 Subject: [PATCH 024/103] Updates to command imports. --- prism/cli.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/prism/cli.py b/prism/cli.py index da3c638..3835a57 100644 --- a/prism/cli.py +++ b/prism/cli.py @@ -6,13 +6,13 @@ import prism -import commands.tables_commands as t_commands -import commands.buckets_commands as b_commands -import commands.dataChanges_commands as d_commands -import commands.dataExport_commands as e_commands -import commands.fileContainers_commands as f_commands -import commands.wql_commands as w_commands -import commands.raas_commands as r_commands +from commands import tables_commands as t_commands +from commands import buckets_commands as b_commands +from commands import dataChanges_commands as d_commands +from commands import dataExport_commands as e_commands +from commands import fileContainers_commands as f_commands +from commands import wql_commands as w_commands +from commands import raas_commands as r_commands def param_fixup(value, config, config_name, option): From 4f9ce6095db566ceed9cb35bc759de076fa2136a Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Sat, 21 Oct 2023 19:07:22 -0400 Subject: [PATCH 025/103] Updates to command imports. --- prism/cli.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/prism/cli.py b/prism/cli.py index 3835a57..18124dc 100644 --- a/prism/cli.py +++ b/prism/cli.py @@ -6,13 +6,13 @@ import prism -from commands import tables_commands as t_commands -from commands import buckets_commands as b_commands -from commands import dataChanges_commands as d_commands -from commands import dataExport_commands as e_commands -from commands import fileContainers_commands as f_commands -from commands import wql_commands as w_commands -from commands import raas_commands as r_commands +from .commands import tables_commands as t_commands +from .commands import buckets_commands as b_commands +from .commands import dataChanges_commands as d_commands +from .commands import dataExport_commands as e_commands +from .commands import fileContainers_commands as f_commands +from .commands import wql_commands as w_commands +from .commands import raas_commands as r_commands def param_fixup(value, config, config_name, option): From 196776e5b254a75b92eb9d7b9074983fa17617ee Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Sat, 21 Oct 2023 19:12:13 -0400 Subject: [PATCH 026/103] Downgrade to match 3.9. --- prism/commands/tables_commands.py | 55 +++++++++++++++---------------- 1 file changed, 26 insertions(+), 29 deletions(-) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index a61229a..3e4e2e2 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -395,35 +395,32 @@ def schema_from_csv(prism, file): # Default all "un-typed" fields to text. field['type'] = {'id': 'Schema_Field_Type=Text'} - match fld_type: - case 'date': - if 'parseformat' in row and isinstance(row['parseformat'], str) and len(row['parseformat']) > 0: - field['parseFormat'] = row['parseformat'] - else: - field['parseFormat'] = 'yyyy-MM-dd' - - case 'numeric': - if 'precision' in row: - field['precision'] = row['precision'] - - if 'scale' in row: - field['scale'] = row['scale'] - - case 'instance': - # We need all the data sources to resolve the business objects - # to include their WID. - data_sources = prism.datasources_list() - - if data_sources is None or data_sources['total'] == 0: - click.echo('Error calling WQL/dataSources') - return - - # Find the matching businessObject - bo = [ds for ds in data_sources['data'] - if ds['businessObject']['descriptor'] == row['businessObject']] - - if len(bo) == 1: - field['businessObject'] = bo[0]['businessObject'] + if fld_type == 'date': + if 'parseformat' in row and isinstance(row['parseformat'], str) and len(row['parseformat']) > 0: + field['parseFormat'] = row['parseformat'] + else: + field['parseFormat'] = 'yyyy-MM-dd' + elif fld_type == 'numeric': + if 'precision' in row: + field['precision'] = row['precision'] + + if 'scale' in row: + field['scale'] = row['scale'] + elif fld_type == 'instance': + # We need all the data sources to resolve the business objects + # to include their WID. + data_sources = prism.datasources_list() + + if data_sources is None or data_sources['total'] == 0: + click.echo('Error calling WQL/dataSources') + return + + # Find the matching businessObject + bo = [ds for ds in data_sources['data'] + if ds['businessObject']['descriptor'] == row['businessObject']] + + if len(bo) == 1: + field['businessObject'] = bo[0]['businessObject'] schema['fields'].append(field) ordinal += 1 From c7c36d587cd6c03ebbb471c79ac98717872b0b7e Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Sat, 21 Oct 2023 19:19:43 -0400 Subject: [PATCH 027/103] Change entry point to cli. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b058c03..c330997 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ url="https://github.com/Workday/prism-python", packages=["prism"], package_data={"prism": ["data/*", "commands/*"]}, - entry_points={"console_scripts": ["prism=prism.cli:main"]}, + entry_points={"console_scripts": ["prism=prism.cli:cli"]}, install_requires=requirements, extras_require={"dev": ["pytest"]}, keywords="prism", From 2d1d45b850119ce2640860e17a2721b11af6c336 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Sat, 21 Oct 2023 19:40:04 -0400 Subject: [PATCH 028/103] Tables get - fixed case-insensitive search. --- prism/prism.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/prism/prism.py b/prism/prism.py index 700bfef..50659cc 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -547,7 +547,8 @@ def tables_get( # return results, i.e., search the this batch for matches. if name is not None: # Substring search for matching table names, display names - match_tables = [tab for tab in tables["data"] if name in tab["name"] or name in tab["displayName"]] + match_tables = [tab for tab in tables["data"] + if name.lower() in tab["name"].lower() or name.lower() in tab["displayName"].lower()] else: # Grab all the tables in the result match_tables = tables["data"] @@ -1116,7 +1117,7 @@ def dataChanges_get(self, # Only add matching rows data_changes["data"] += \ filter(lambda dtc: dtc["name"].find(name) != -1 or - dtc["displayName"].find(name) != -1, + dtc["displayName"].find(name) != -1, return_json["data"]) else: # Without searching, simply paste the current page to the list. @@ -1307,7 +1308,7 @@ def fileContainers_get(self, id): if response.status_code == 404: logger.warning('verify: Self-Service: Prism File Container domain in the Prism Analytics functional area.') - return {"total": 0, 'data' : []} # Always return a list. + return {"total": 0, 'data': []} # Always return a list. def fileContainers_load(self, id, file): """ @@ -1512,7 +1513,7 @@ def wql_dataSources_fields(self, id=None, alias=None, searchString=None, limit=N query_limit = 100 query_offset = 0 - return_fields = {'total':0, 'data':[]} + return_fields = {'total': 0, 'data': []} while True: url = f'{url}{url_separator}limit={query_limit}&offset={query_offset}' From d5f0b36f20f9d831ba2f7fd05a826b47257ed935 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Sun, 22 Oct 2023 10:49:04 -0400 Subject: [PATCH 029/103] Update table commands to use isName nomenclature. --- prism/cli.py | 3 +- prism/commands/tables_commands.py | 300 +++++++++++++++++------------- prism/prism.py | 10 +- 3 files changed, 179 insertions(+), 134 deletions(-) diff --git a/prism/cli.py b/prism/cli.py index 18124dc..cb7ef40 100644 --- a/prism/cli.py +++ b/prism/cli.py @@ -172,7 +172,8 @@ def tables(): tables.add_command(t_commands.tables_get) tables.add_command(t_commands.tables_create) -tables.add_command(t_commands.tables_edit) +tables.add_command(t_commands.tables_put) +tables.add_command(t_commands.tables_patch) tables.add_command(t_commands.tables_upload) tables.add_command(t_commands.tables_truncate) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index 3e4e2e2..1c61511 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -10,8 +10,8 @@ @click.command('get') -@click.option('-n', '--name', - help='Specific WID of Prism table or dataset to list.') +@click.option('-n', '--isName', is_flag=True, default=False, + help='Flag to treat the table argument as a name.') @click.option('-l', '--limit', type=int, default=None, help='The maximum number of object data entries included in the response, default=all.') @click.option('-o', '--offset', type=int, default=None, @@ -20,89 +20,65 @@ type=click.Choice(['summary', 'full', 'permissions'], case_sensitive=False), help='How much information returned for each table.') @click.option('-f', '--format', 'format_', default='json', - type=click.Choice(['json', 'summary', 'schema', 'csv'], case_sensitive=False), - help='Format output as JSON, summary, schema, or CSV.') + type=click.Choice(['json', 'tabular', 'schema'], case_sensitive=False), + help='Format output as JSON, tabular, or bucket schema.') @click.option('-s', '--search', is_flag=True, help='Enable substring search of NAME in api name or display name, default=False (exact match).') -@click.argument('id', required=False) +@click.argument('table', required=False) @click.pass_context -def tables_get(ctx, name, id, limit, offset, type_, format_, search): +def tables_get(ctx, isname, table, limit, offset, type_, format_, search): """List the tables or datasets permitted by the security profile of the current user. [NAME] Prism table name to list. """ - if type_ in ('summary', 'permissions') and format_ in ('schema', 'csv'): - # Summary results cannot generate schema or CSV output since there will be no fields. + if type_ in ('summary', 'permissions') and format_ == 'schema': + # Summary results cannot generate schema since there will be no fields. logger.error(f'Invalid combination of type "{type_}" and format "{format_}".') sys.exit(1) p = ctx.obj['p'] - # Query the tenant... - tables = p.tables_get(name, id, limit, offset, type_, search) + # Query the tenant...see if the caller said to treat the + # table as a name, AND that a table was provided. + if not isname and table is not None: + # When using an ID, the get operation returns a simple + # dictionary of the table definition. + table = p.tables_get(id=table, type_=type_) - if id is not None: - if tables is None: - logger.error(f"Table ID {id} not found.") + if table is None: + logger.error(f"Table ID {table} not found.") sys.exit(1) - else: - # When using ID, the returned object is NOT an - # array of tables - dump the single table object. - logger.info(json.dumps(tables, indent=2)) - - return - - # For any other type of GET:/tables, the return ALWAYS has a total - # tables returned value. - if tables['total'] == 0: - return - - # Handle output - if format_ == 'json': - # The results could be one table or an array of multiple - # tables - simply dump the returned object. - - logger.info(json.dumps(tables, indent=2)) - elif format_ == 'summary': - for table in tables['data']: - display_name = table['displayName'] - rows = table['stats']['rows'] if 'stats' in table and 'rows' in table['stats'] else 'Null' - size = table['stats']['size'] if 'stats' in table and 'size' in table['stats'] else 'Null' - refreshed = table['dateRefreshed'] if 'dateRefreshed' in table else 'unknown' - enabled = table['enableForAnalysis'] if 'enableForAnalysis' in table else 'Null' - - logger.info(f'{display_name}, Enabled: {enabled}, Rows: {rows}, Size: {size}, Refreshed: {refreshed}') - elif format_ == 'csv': - df = pd.json_normalize(tables['data']) - logger.info(df.to_csv(index=False)) - elif format_ == 'schema': - # Dump out the fields of the first table in the result in - # a format compatible with a schema used to created or edit - # a table. - table = tables['data'][0] # Only output the first table. - - # Remove the Prism audit columns. - fields = [fld for fld in table['fields'] if not fld['name'].startswith('WPA_')] - - # Remove tenant specific values - these are not needed - # if the user wants to update a table definition. + if format_ == 'schema': + logger.info(json.dumps(get_fields(table), indent=2)) + elif format_ == 'tabular': + df = pd.json_normalize(table) + logger.info(df.to_csv(index=False)) + else: + logger.info(json.dumps(table, indent=2)) + else: + # When querying by name, the get operation returns a + # dict with a count of found tables and a list of + # tables. + tables = p.tables_get(name=table, limit=limit, offset=offset, search=search) - for fld in fields: - if 'fieldId' in fld: - del fld['fieldId'] + if tables['total'] == 0: + logger.error(f"Table ID {table} not found.") + return - if 'id' in fld: - del fld['id'] + if format_ == 'json': + logger.info(json.dumps(tables, indent=2)) + elif format_ == 'tabular': + df = pd.json_normalize(tables['data']) + logger.info(df.to_csv(index=False)) + elif format_ == 'schema': + fields = [] - if 'type' in fld: - if 'descriptor' in fld['type']: - # Convert the descriptor to shorten the Prism type syntax. - fld['type']['id'] = f"Schema_Field_Type={fld['type']['descriptor']}" - del fld['type']['descriptor'] + for tab in tables['data']: + fields.append(get_fields(tab)) - logger.info(json.dumps(fields, indent=2)) + logger.info(json.dumps(fields, indent=2)) @click.command('create') @@ -130,15 +106,17 @@ def tables_create(ctx, name, displayname, tags, enableforanalysis, sourcename, s """ p = ctx.obj['p'] - # We can assume a valid schema - get_schema sys.exits if there is a problem. + # We can assume a schema was found/built - get_schema sys.exits if there is a problem. schema = resolve_schema(p, file, sourcename, sourcewid) # Initialize a new schema with the particulars for this table operation. if name is not None: # If we got a name, set it in the table schema schema['name'] = name.replace(' ', '_') # Minor clean-up + logger.debug(f'setting table name to {schema["name"]}') elif 'name' not in schema: # The schema doesn't have a name and none was given - exit. + # Note: this could be true if we have a schema of only fields. logger.error('Table --name must be specified.') sys.exit(1) @@ -148,90 +126,98 @@ def tables_create(ctx, name, displayname, tags, enableforanalysis, sourcename, s elif 'displayName' not in schema: # Default the display name to the name if not in the schema. schema['displayName'] = name + logger.debug(f'defaulting displayName to {schema["displayName"]}') if enableforanalysis is not None: schema['enableForAnalysis'] = enableforanalysis elif 'enableForAnalysis' not in schema: # Default to False - do not enable. schema['enableForAnalysis'] = False + logger.debug('defaulting enableForAnalysis to False.') # Create the table. table_def = p.tables_post(schema) if table_def is not None: - logger.info(f'Table {name} created.') + logger.info(json.dumps(table_def, indent=2)) else: logger.error(f'Error creating table {name}.') + sys.exit(1) -@click.command('edit') -@click.option('-n', '--name', default=None, - help='Table name - overrides name from schema.') -@click.option('-i', '--id', default=None, - help='Prism table ID.') +@click.command('put') @click.option('-t', '--truncate', is_flag=True, default=False, help='Truncate the table before updating.') +@click.argument('file', required=True, type=click.Path(exists=True, dir_okay=False, readable=True)) +@click.pass_context +def tables_put(ctx, file, truncate): + """Edit the schema for an existing table. + + [FILE] File containing an updated schema definition for the table. + """ + p = ctx.obj['p'] + + # The user can specify a GET:/tables output file containing + # the ID and other attributes that could be passed on the + # command line. + schema = resolve_schema(file=file) + + table = p.tables_put(schema, truncate=truncate) + + if table is None: + logger.error(f'Error updating table.') + else: + logger.info(json.dumps(table, indent=2)) + + +@click.command('patch') +@click.option('-n', '--isName', + help='Flag to treat the table argument as a name.') @click.option('--displayName', is_flag=False, flag_value="*-clear-*", default=None, help='Set the display name for an existing table.') @click.option('--description', is_flag=False, flag_value="*-clear-*", default=None, help='Set the display name for an existing table.') @click.option('--documentation', is_flag=False, flag_value="*-clear-*", default=None, help='Set the documentation for an existing table.') -@click.option('--enableForAnalysis', is_flag=False, default=None, required=False, +@click.option('--enableForAnalysis', is_flag=False, default=None, type=click.Choice(['true', 'false'], case_sensitive=False)) -@click.option('-f', '--file', type=click.Path(exists=True, dir_okay=False, readable=True)) -@click.argument('id', required=False, type=str) +@click.argument('table', required=True, type=str) +@click.argument('file', required=False, type=click.Path(exists=True, dir_okay=False, readable=True)) @click.pass_context -def tables_edit(ctx, name, id, file, truncate, - displayname, description, documentation, enableforanalysis): +def tables_patch(ctx, isname, table, file, + displayname, description, documentation, enableforanalysis): """Edit the schema for an existing table. - NAME The API name of the table to update\b - [FILE] Optional file containing an updated schema definition for the table. - - Note: A schema file, --sourceName, or --sourceWID must be specified. + TABLE The ID or API name (use -n option) of the table to patch + [FILE] Optional file containing patch values for the table. """ p = ctx.obj['p'] # Figure out the new schema either by file or other table. - schema = None - resolved_id = None + patch_data = {} # The user can specify a GET:/tables output file containing # the ID and other attributes that could be passed on the # command line. if file is not None: - schema = resolve_schema(p, file) - - # If we got a file name, do a quick sanity check. - if 'id' not in schema or 'fields' not in schema: - logger.error(f'Specify a valid table schema file.') + try: + with open(file, "r") as patch_file: + patch_data = json.load(patch_file) + except Exception as e: + logger.error(e) sys.exit(1) - resolved_id = schema['id'] - - # See if the user is overriding the ID we may have from - # a specified schema file. - if id is not None: - # No verification, simply assume the ID is valid. - resolved_id = id - elif name is not None: - # Before doing anything, table name must exist. - tables = p.tables_get(name=name) # Exact match - - if tables['total'] == 0: - logger.error(f'Table name "{name}" not found.') + if not isinstance(patch_data, dict): + logger.error('invalid patch file - should be a dictionary') sys.exit(1) - resolved_id = tables['data'][0]['id'] - - if resolved_id is None: - logger.error('Specify a schema file, ID or name to update.') - sys.exit(1) + valid_attributes = ['displayName', 'description', 'enableForAnalysis', 'documentation'] - # If the caller sent specified attributes, do a patch not put. - patch_data = {} + for patch_attr in patch_data.keys(): + if patch_attr not in valid_attributes: + logger.error(f'unexpected attribute {patch_attr} in patch file') + sys.exit(1) def set_patch_value(attr, value): """Utility function to set or clear a table attribute.""" @@ -240,6 +226,11 @@ def set_patch_value(attr, value): else: patch_data[attr] = value + # See if the user creating new patch variables or overriding + # values from the patch file. + + # Note: specifying the option without a value creates a + # patch value to clear the value in the table def. if displayname is not None: # Specified on CLI set_patch_value('displayName', displayname) @@ -255,31 +246,47 @@ def set_patch_value(attr, value): else: patch_data['enableForAnalysis'] = 'false' - if len(patch_data) == 0 and file is None: - logger.error("Specify values to update or a schema file with updates.") + # The caller must be asking for something to change! + if len(patch_data) == 0: + logger.error("Specify at least one schema value to update.") sys.exit(1) - if len(patch_data) > 0: - table = p.tables_patch(id=resolved_id, patch=patch_data) + # Identify the existing table we are about to patch. + if not isname: + # No verification, simply assume the ID is valid. + resolved_id = table else: - table = p.tables_put(id=resolved_id, schema=schema, truncate=truncate) + # Before doing anything, table name must exist. + tables = p.tables_get(name=table, limit=1, search=False) # Exact match + + if tables['total'] == 0: + logger.error(f'Table name "{table}" not found.') + sys.exit(1) + + resolved_id = tables['data'][0]['id'] + + table = p.tables_patch(id=resolved_id, patch=patch_data) if table is None: logger.error(f'Error updating table ID {resolved_id}') else: - logger.debug(json.dumps(table, indent=2)) - logger.info(f'Table {resolved_id} updated.') + logger.info(json.dumps(table, indent=2)) @click.command('upload') +@click.option('-n', '--isName', is_flag=True, default=False, + help='Flag to treat the table argument as a name.') @click.option('-o', '--operation', default='TruncateAndInsert', help='Operation for the table operation - default to TruncateAndInsert.') -@click.argument('name', required=True) +@click.argument('table', required=True) @click.argument('file', nargs=-1, type=click.Path(exists=True)) @click.pass_context -def tables_upload(ctx, name, operation, file): +def tables_upload(ctx, table, isname, operation, file): """ Upload a file into the table using a bucket. + + [TABLE] A Prism Table identifier. + [FILE] One or more CSV or GZIP.CSV files. """ p = ctx.obj['p'] @@ -290,7 +297,10 @@ def tables_upload(ctx, name, operation, file): logger.error('No files to upload.') sys.exit(1) - bucket = p.buckets_create(target_name=name, operation=operation) + if isname: + bucket = p.buckets_create(target_id=table, operation=operation) + else: + bucket = p.buckets_create(target_name=table, operation=operation) if bucket is None: logger.error('Bucket creation failed.') @@ -303,19 +313,24 @@ def tables_upload(ctx, name, operation, file): @click.command('truncate') -@click.argument('name', required=True) +@click.option('-n', '--isName', is_flag=True, default=False, + help='Flag to treat the table argument as a name.') +@click.argument('table', required=True) @click.pass_context -def tables_truncate(ctx, name): +def tables_truncate(ctx, table, isname): """ Truncate the named table. - [NAME] The API name of the Prism table to truncate. + [TABLE] The Prism Table ID or API name of the table to truncate. """ p = ctx.obj['p'] - msg = f'Unable to truncate table "{name}" - see log for details.' + msg = f'Unable to truncate table "{table}" - see log for details.' # To do a truncate, we still need a bucket with a truncate operation. - bucket = p.buckets_create(target_name=name, operation='TruncateAndInsert') + if isname: + bucket = p.buckets_create(target_name=table, operation='TruncateAndInsert') + else: + bucket = p.buckets_create(target_id=table, operation='TruncateAndInsert') if bucket is None: logger.error(msg) @@ -341,12 +356,12 @@ def schema_from_csv(prism, file): logger.error(f'FIle {file} not found - skipping.') sys.exit(1) - schema = {'fields': []} # Start with an empy schema definition. + schema = {'fields': []} # Start with an empty schema definition. with open(file, newline='') as csvfile: reader = csv.DictReader(csvfile) - # Force all the columns names from the CSV to lowercase to make + # Force all the columns names (first row) from the CSV to lowercase to make # lookups consistent regardless of the actual case of the columns. reader.fieldnames = [f_name.lower() for f_name in reader.fieldnames] @@ -371,7 +386,7 @@ def schema_from_csv(prism, file): 'displayName': row['displayname'] if 'displayname' in row else row['name'] } - # The following two items may not be in the CSV, the columns are not required and may not be present. + # The following two columns are not required and may not be present. if 'required' in row and isinstance(row['required'], str) and row['required'].lower() == 'true': field['required'] = True @@ -431,7 +446,8 @@ def schema_from_csv(prism, file): def csv_from_fields(fields): """Convert a Prism field list to CSV representation.""" - format_str = '{name},"{displayName}",{ordinal},{type},"{businessObject}",{precision},{scale},"{parseFormat}",{required},{externalId}\n' + format_str = '{name},"{displayName}",{ordinal},{type},"{businessObject}",' + format_str += '{precision},{scale},"{parseFormat}",{required},{externalId}\n' # Start with the CSV column headings. csv_str = 'name,displayName,ordinal,type,businessObject,precision,scale,parseFormat,required,externalId\n' @@ -459,7 +475,7 @@ def csv_from_fields(fields): return csv_str -def resolve_schema(p, file, source_name=None, source_id=None): +def resolve_schema(p=None, file=None, source_name=None, source_id=None): """Get or extract a schema from a file or existing Prism table.""" # Start with a blank schema definition. @@ -481,7 +497,7 @@ def resolve_schema(p, file, source_name=None, source_id=None): # the list of fields. If we got a list, then we have a list of fields we # use to start the schema definition. - if type(schema) is list: + if isinstance(schema, list): schema['fields'] = schema else: # This should be a full schema, perhaps from a table list command. @@ -515,3 +531,29 @@ def resolve_schema(p, file, source_name=None, source_id=None): schema = tables['data'][0] return schema + + +def get_fields(table): + if 'fields' not in table: + logger.error('get_fields: table object does not contain fields attribute.') + return None + + # Remove the Prism audit columns. + fields = [fld for fld in table['fields'] if not fld['name'].startswith('WPA_')] + + # Remove tenant specific values - these are not needed + # if the user wants to update a table definition. + for fld in fields: + if 'fieldId' in fld: + del fld['fieldId'] + + if 'id' in fld: + del fld['id'] + + if 'type' in fld: + if 'descriptor' in fld['type']: + # Convert the descriptor to the shortened Prism type syntax. + fld['type']['id'] = f"Schema_Field_Type={fld['type']['descriptor']}" + del fld['type']['descriptor'] + + return fields diff --git a/prism/prism.py b/prism/prism.py index 50659cc..d362037 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -464,7 +464,7 @@ def tables_get( type_ : str Level of detail to return. search : bool - Enable contains searching for table names ans display names. + Enable contains searching for table names and display names. Returns ------- @@ -773,8 +773,8 @@ def buckets_get(self, # Caller is looking for any/all buckets by target table match_buckets = [ bck for bck in buckets["data"] - if table_name == bck["targetDataset"]["descriptor"] - or (search and table_name.lower() in bck["targetDataset"]["descriptor"].lower()) + if table_name == bck["targetDataset"]["descriptor"] or + (search and table_name.lower() in bck["targetDataset"]["descriptor"].lower()) ] else: # Grab all the tables in the result - select all buckets. @@ -985,6 +985,7 @@ def buckets_files(self, id, file=None): else: target_files = resolve_file_list(file) + target_file: str for target_file in target_files: if target_file is None: new_file = {"file": ("empty", io.BytesIO())} @@ -1079,6 +1080,7 @@ def dataChanges_get(self, search_offset = offset searching = False + name_param = "" if name is not None and isinstance(name, str) and len(name) > 0: if search is not None and isinstance(search, bool) and search: @@ -1117,7 +1119,7 @@ def dataChanges_get(self, # Only add matching rows data_changes["data"] += \ filter(lambda dtc: dtc["name"].find(name) != -1 or - dtc["displayName"].find(name) != -1, + dtc["displayName"].find(name) != -1, return_json["data"]) else: # Without searching, simply paste the current page to the list. From 1dc03362bb67aa60224997334ed7de1bbb8f21ad Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Mon, 23 Oct 2023 05:49:10 -0400 Subject: [PATCH 030/103] Update table commands to use tabular nomenclature. --- prism/commands/tables_commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index 1c61511..ff60cdf 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -61,7 +61,7 @@ def tables_get(ctx, isname, table, limit, offset, type_, format_, search): # When querying by name, the get operation returns a # dict with a count of found tables and a list of # tables. - tables = p.tables_get(name=table, limit=limit, offset=offset, search=search) + tables = p.tables_get(name=table, limit=limit, offset=offset, type_=type_, search=search) if tables['total'] == 0: logger.error(f"Table ID {table} not found.") From a220df89db7e2e6dd8a643915b6b74bea7b57adf Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Mon, 23 Oct 2023 06:00:10 -0400 Subject: [PATCH 031/103] Set output to stdout for INFO logging. --- prism/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prism/cli.py b/prism/cli.py index cb7ef40..025d99c 100644 --- a/prism/cli.py +++ b/prism/cli.py @@ -116,7 +116,7 @@ def cli(ctx, # script output. formatter = logging.Formatter('%(message)s') - ch = logging.StreamHandler() + ch = logging.StreamHandler(sys.stdout) ch.setFormatter(formatter) ch.setLevel(logging.INFO) logger.addHandler(ch) From be29d71cae01e5a768e82894d3a161a1a2338411 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Mon, 23 Oct 2023 06:05:45 -0400 Subject: [PATCH 032/103] Update patch docstring. --- prism/commands/tables_commands.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index ff60cdf..01671c8 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -186,7 +186,10 @@ def tables_put(ctx, file, truncate): @click.pass_context def tables_patch(ctx, isname, table, file, displayname, description, documentation, enableforanalysis): - """Edit the schema for an existing table. + """Edit the specified attributes of an existing table with the specified id (or name). + + If an attribute is not provided in the request, it will not be changed. To set an + attribute to blank (empty), include the attribute without specifying a value. TABLE The ID or API name (use -n option) of the table to patch [FILE] Optional file containing patch values for the table. From f085f15daef0faf34571b8d26dc4e42cb7b91ae3 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Mon, 23 Oct 2023 06:34:36 -0400 Subject: [PATCH 033/103] Bug fixes to put:/tables update. --- prism/prism.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/prism/prism.py b/prism/prism.py index d362037..3a2b8c3 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -107,6 +107,7 @@ def is_valid_string(attr): return True def is_valid_list(attr): + """Ensure the attribute exists in the schema.""" if attr not in schema or not isinstance(schema[attr], list): return False @@ -132,7 +133,8 @@ def is_valid_list(attr): keys = list(schema.keys()) for k in keys: - if k not in ['name', 'id', 'fields', 'tags', 'displayName', 'description', 'documentation', + if k not in ['name', 'id', 'fields', 'tags', 'categories', + 'displayName', 'description', 'documentation', 'enableForAnalysis']: del schema[k] @@ -600,7 +602,7 @@ def tables_post(self, schema): return None - def tables_put(self, id, schema, truncate=False): + def tables_put(self, schema, truncate=False): """Update an existing table using a full schema definition. Notes @@ -610,9 +612,6 @@ def tables_put(self, id, schema, truncate=False): Parameters ---------- - id : str - Prism Table ID of an existing table. - schema : dict A dictionary containing the schema @@ -626,14 +625,16 @@ def tables_put(self, id, schema, truncate=False): If the request is successful, a dictionary containing information about the new table is returned, otherwise None. """ - operation = f"/tables/{id}" - logger.debug(f"PUT: {operation}") - url = self.prism_endpoint + operation - if not schema_fixup(schema): logger.error("Invalid schema for update operation.") return None + table_id = schema['id'] + + operation = f"/tables/{table_id}" + logger.debug(f"PUT: {operation}") + url = self.prism_endpoint + operation + response = self.http_put(url=url, data=schema) if response.status_code == 200: From d1a75c3356d44df73511657288714e19f5e797d3 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Mon, 23 Oct 2023 06:47:40 -0400 Subject: [PATCH 034/103] Remove ID test from schema_fixup. --- prism/prism.py | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/prism/prism.py b/prism/prism.py index 3a2b8c3..13746c3 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -100,24 +100,7 @@ def schema_fixup(schema): logger.error("schema_fixup: schema is not a dictionary.") return False - def is_valid_string(attr): - if attr not in schema or not isinstance(schema[attr], str) or len(schema[attr]) == 0: - return False - - return True - - def is_valid_list(attr): - """Ensure the attribute exists in the schema.""" - if attr not in schema or not isinstance(schema[attr], list): - return False - - return True - - if not is_valid_string('id'): - logger.error("id attribute missing") - return False - - if not is_valid_list('fields'): + if 'fields' not in schema or not isinstance(schema['fields'], list): logger.error("fields attribute missing from schema!") return False From 4ba7f8bf3c26681b4e6cf7ca945f8b8076b04df8 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Mon, 23 Oct 2023 07:14:27 -0400 Subject: [PATCH 035/103] Bug fixes to tables_upload. --- prism/commands/tables_commands.py | 21 ++++++++++++++++----- prism/prism.py | 14 +++++++++----- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index 01671c8..a2354be 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -301,18 +301,29 @@ def tables_upload(ctx, table, isname, operation, file): sys.exit(1) if isname: - bucket = p.buckets_create(target_id=table, operation=operation) - else: bucket = p.buckets_create(target_name=table, operation=operation) + else: + bucket = p.buckets_create(target_id=table, operation=operation) if bucket is None: logger.error('Bucket creation failed.') sys.exit(1) - results = p.buckets_upload(bucket['id'], file) + logger.debug(json.dumps(bucket, indent=2)) + bucket_id = bucket['id'] - if len(results) > 0: - p.buckets_complete(bucket['id']) + file_results = p.buckets_files(bucket_id, file) + + if file_results['total'] > 0: + results = p.buckets_complete(bucket_id) + + # Add the file upload results to the bucket + # info returned to the caller. + results['files'] = file_results + + logger.info(json.dumps(results, indent=2)) + else: + logger.info('No files uploaded to table.') @click.command('truncate') diff --git a/prism/prism.py b/prism/prism.py index 13746c3..b7f0770 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -814,7 +814,7 @@ def buckets_create( target_name : str The name of the table for bucket. schema : dict - A dictionary containing the schema for your table. + A dictionary containing the schema fields describing the file. operation : str Required, defaults to "TruncateAndInsert" operation @@ -850,6 +850,8 @@ def buckets_create( # Resolve the target table; if specified. if target_id is None and target_name is None: + # The caller expects the schema to come from the + # passed schema - do a quick sanity check. if table_schema is None: logger.error("schema, target id or target name is required to create a bucket.") return None @@ -858,7 +860,7 @@ def buckets_create( logger.error('schema missing "id" or "fields" attribute.') return None else: - if target_id is not None: # Always use ID if provided. + if target_id is not None: # Always use ID if provided - has precedence. table = self.tables_get(id=target_id, type_="full") # Full=include fields object if table is None: @@ -900,8 +902,10 @@ def buckets_create( response = self.http_post(url, headers=self.CONTENT_APP_JSON, data=json.dumps(data)) if response.status_code == 201: - logger.info("successfully created a new wBucket") - return response.json() + response_json = response.json() + + logger.debug(f'successfully created a new wBucket: {response_json["id"]}') + return response_json return None @@ -926,7 +930,7 @@ def buckets_complete(self, id): r = self.http_post(url) if r.status_code == 201: - logger.info(f'successfully completed wBucket {id}.') + logger.debug(f'successfully completed wBucket {id}.') return r.json() return None From 66820bd8924c6f5a4e410700c434924c8eaf4cd2 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Mon, 23 Oct 2023 07:20:36 -0400 Subject: [PATCH 036/103] Bug fixes to buckets_complete to handle http 400 errors. --- prism/prism.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/prism/prism.py b/prism/prism.py index b7f0770..53dc712 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -932,6 +932,9 @@ def buckets_complete(self, id): if r.status_code == 201: logger.debug(f'successfully completed wBucket {id}.') return r.json() + elif r.status_code == 400: + logger.debug(f'error completing bucket') + return r.json() return None From 8065034a577f473c95c06c4e451f8268aea99344 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Mon, 23 Oct 2023 07:55:22 -0400 Subject: [PATCH 037/103] Update to tables upload to include bucket info in result. --- prism/commands/tables_commands.py | 1 + 1 file changed, 1 insertion(+) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index a2354be..dfb7a16 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -320,6 +320,7 @@ def tables_upload(ctx, table, isname, operation, file): # Add the file upload results to the bucket # info returned to the caller. results['files'] = file_results + results['bucket'] = bucket # Ensure bucket info is present. logger.info(json.dumps(results, indent=2)) else: From f53a317a3a7e890b099ca7804ed387c5be7e6225 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Mon, 23 Oct 2023 10:59:50 -0400 Subject: [PATCH 038/103] Updated schema_fixup to remove tenant specific attributes. --- prism/__init__.py | 4 ++-- prism/commands/buckets_commands.py | 2 +- prism/commands/tables_commands.py | 21 +++++++++++++++------ prism/prism.py | 14 ++++++++++++++ 4 files changed, 32 insertions(+), 9 deletions(-) diff --git a/prism/__init__.py b/prism/__init__.py index 939eef2..347796c 100644 --- a/prism/__init__.py +++ b/prism/__init__.py @@ -1,8 +1,8 @@ -from prism.prism import Prism, set_logging +from prism.prism import Prism, set_logging, schema_fixup from ._version import get_versions __version__ = get_versions()["version"] del get_versions -__all__ = ["Prism","set_logging"] +__all__ = ["Prism", "set_logging", "schema_fixup"] diff --git a/prism/commands/buckets_commands.py b/prism/commands/buckets_commands.py index be853e0..2b40037 100644 --- a/prism/commands/buckets_commands.py +++ b/prism/commands/buckets_commands.py @@ -11,7 +11,7 @@ @click.option("-i", "--id", "id", help="The Workday ID of the bucket.") @click.option("-n", "--table_name", - help="The display name of the table to retrieve (see search option).") + help="The API name of the table to retrieve (see search option).") @click.option("-l", "--limit", default=None, type=int, help="The maximum number of object data entries included in the response, default=-1 (all).") @click.option("-o", "--offset", default=None, type=int, diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index dfb7a16..8ebc1a5 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -6,6 +6,8 @@ import click import pandas as pd +from prism import schema_fixup + logger = logging.getLogger('prismCLI') @@ -21,7 +23,7 @@ help='How much information returned for each table.') @click.option('-f', '--format', 'format_', default='json', type=click.Choice(['json', 'tabular', 'schema'], case_sensitive=False), - help='Format output as JSON, tabular, or bucket schema.') + help='Format output as JSON, tabular, or simplified table schema.') @click.option('-s', '--search', is_flag=True, help='Enable substring search of NAME in api name or display name, default=False (exact match).') @click.argument('table', required=False) @@ -51,7 +53,13 @@ def tables_get(ctx, isname, table, limit, offset, type_, format_, search): sys.exit(1) if format_ == 'schema': - logger.info(json.dumps(get_fields(table), indent=2)) + # Same as JSON, but with extraneous attributes removed. + if schema_fixup(table): + logger.info(json.dumps(schema_fixup(table), indent=2)) + else: + # This should never happen. + logger.error('invalid schema detected.') + sys.exit(1) elif format_ == 'tabular': df = pd.json_normalize(table) logger.info(df.to_csv(index=False)) @@ -73,12 +81,13 @@ def tables_get(ctx, isname, table, limit, offset, type_, format_, search): df = pd.json_normalize(tables['data']) logger.info(df.to_csv(index=False)) elif format_ == 'schema': - fields = [] - + # Slim down all the tables we got back. for tab in tables['data']: - fields.append(get_fields(tab)) + if not schema_fixup(tab): + logger.error('unexpected error in schema_fixup.') + sys.exit() - logger.info(json.dumps(fields, indent=2)) + logger.info(json.dumps(tables, indent=2)) @click.command('create') diff --git a/prism/prism.py b/prism/prism.py index 53dc712..7a843a9 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -113,6 +113,20 @@ def schema_fixup(schema): fld = schema["fields"][ordinal] fld["ordinal"] = ordinal + 1 + if 'fieldId' in fld: + del fld['fieldId'] + + if 'id' in fld: + del fld['id'] + + if 'type' in fld: + if 'descriptor' in fld['type']: + # Convert the descriptor to the shortened Prism type syntax. + fld['type']['id'] = f"Schema_Field_Type={fld['type']['descriptor']}" + del fld['type']['descriptor'] + + # Remove all attributes from the schema that cannot be specified on + # a post or put operation. keys = list(schema.keys()) for k in keys: From edccda21ef91583b26ade0db4c046bdd3e80fcc9 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Mon, 23 Oct 2023 11:02:21 -0400 Subject: [PATCH 039/103] Bug fix tables_get. --- prism/commands/tables_commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index 8ebc1a5..20faa3f 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -55,7 +55,7 @@ def tables_get(ctx, isname, table, limit, offset, type_, format_, search): if format_ == 'schema': # Same as JSON, but with extraneous attributes removed. if schema_fixup(table): - logger.info(json.dumps(schema_fixup(table), indent=2)) + logger.info(json.dumps(table, indent=2)) else: # This should never happen. logger.error('invalid schema detected.') From 6b6a9f848aa593a3d0b135ca4ca473ee75ed1e75 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Mon, 23 Oct 2023 11:12:21 -0400 Subject: [PATCH 040/103] Remove get_fields, migrated to prism.schema_fixup. --- prism/commands/tables_commands.py | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index 20faa3f..3b86cd8 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -555,29 +555,3 @@ def resolve_schema(p=None, file=None, source_name=None, source_id=None): schema = tables['data'][0] return schema - - -def get_fields(table): - if 'fields' not in table: - logger.error('get_fields: table object does not contain fields attribute.') - return None - - # Remove the Prism audit columns. - fields = [fld for fld in table['fields'] if not fld['name'].startswith('WPA_')] - - # Remove tenant specific values - these are not needed - # if the user wants to update a table definition. - for fld in fields: - if 'fieldId' in fld: - del fld['fieldId'] - - if 'id' in fld: - del fld['id'] - - if 'type' in fld: - if 'descriptor' in fld['type']: - # Convert the descriptor to the shortened Prism type syntax. - fld['type']['id'] = f"Schema_Field_Type={fld['type']['descriptor']}" - del fld['type']['descriptor'] - - return fields From 6c349272710805d6e766161ae21680579127c969 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Mon, 23 Oct 2023 11:14:53 -0400 Subject: [PATCH 041/103] Bug fix truncate_tables. --- prism/commands/tables_commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index 3b86cd8..f48e825 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -363,7 +363,7 @@ def tables_truncate(ctx, table, isname): bucket_id = bucket['id'] # Don't specify a file to put a zero sized file into the bucket. - p.buckets_upload(bucket_id) + p.buckets_files(bucket_id) # Ask Prism to run the delete statement by completing the bucket. bucket = p.buckets_complete(bucket_id) From b6e9ebba0f46c6f51ef9c031d7fd6f89ff442940 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Mon, 23 Oct 2023 13:58:01 -0400 Subject: [PATCH 042/103] Bug fix bucket_files upload of empty file. --- prism/prism.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prism/prism.py b/prism/prism.py index 7a843a9..9b3b248 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -993,7 +993,7 @@ def buckets_files(self, id, file=None): target_file: str for target_file in target_files: if target_file is None: - new_file = {"file": ("empty", io.BytesIO())} + new_file = {"file": ("empty.csv.gz", gzip.compress(bytearray()))} elif target_file.lower().endswith(".csv.gz"): new_file = {"file": open(target_file, "rb")} elif target_file.lower().endswith(".csv"): From f50891014c8d9a68a3c70fc099c1089246618bea Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Mon, 23 Oct 2023 14:00:10 -0400 Subject: [PATCH 043/103] Bug fix file_containers_load upload of empty file. --- prism/prism.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prism/prism.py b/prism/prism.py index 9b3b248..d7b65d8 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -1348,7 +1348,7 @@ def fileContainers_load(self, id, file): for target_file in target_files: # It is legal to upload an empty file - see the table truncate method. if target_file is None: - new_file = {"file": ("dummy", io.BytesIO())} + new_file = {"file": ("empty.csv.gz", gzip.compress(bytearray()))} elif target_file.lower().endswith(".csv.gz"): new_file = {"file": open(target_file, "rb")} elif target_file.lower().endswith(".csv"): From 150b6ecf8b240a38dd1b57ba1c7b42c763af16cf Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Mon, 23 Oct 2023 16:10:47 -0400 Subject: [PATCH 044/103] Update for Python 3.9+ and bucket get fixes. --- prism/commands/buckets_commands.py | 73 ++++++++++++++++++++---------- prism/prism.py | 33 ++++++++------ setup.py | 2 +- 3 files changed, 70 insertions(+), 38 deletions(-) diff --git a/prism/commands/buckets_commands.py b/prism/commands/buckets_commands.py index 2b40037..acac442 100644 --- a/prism/commands/buckets_commands.py +++ b/prism/commands/buckets_commands.py @@ -8,10 +8,8 @@ @click.command("get") -@click.option("-i", "--id", "id", - help="The Workday ID of the bucket.") -@click.option("-n", "--table_name", - help="The API name of the table to retrieve (see search option).") +@click.option('-n', '--isName', is_flag=True, default=False, + help='Flag to treat the bucket or table argument as a name.') @click.option("-l", "--limit", default=None, type=int, help="The maximum number of object data entries included in the response, default=-1 (all).") @click.option("-o", "--offset", default=None, type=int, @@ -19,41 +17,68 @@ @click.option("-t", "--type", "type_", default="summary", show_default=True, help="How much information to be returned in response JSON.") @click.option("-s", "--search", is_flag=True, show_default=True, default=False, - help="Use contains search substring for --table_name or --wid.") + help="Use substring search bucket or table.") @click.option("-f", "--format", "format_", - type=click.Choice(['json', 'summary', 'schema', 'csv'], case_sensitive=False), + type=click.Choice(['json', 'tabular', 'schema'], case_sensitive=False), default="json", - help="Format output as JSON, summary, schema, or CSV.", + help="Format output as JSON, tabular, or CSV.", ) -@click.argument("name", required=False) +@click.option("--table", + help="The id or name of a Prism table to list all buckets.") +@click.argument("bucket", required=False) @click.pass_context -def buckets_get(ctx, id, table_name, limit, offset, type_, search, format_, name): +def buckets_get(ctx, bucket, table, isname, + limit, offset, type_, search, format_): """ View the buckets permitted by the security profile of the current user. - [NAME] explicit name of bucket to list. + [BUCKET] ID or name of a Prism bucket. + + NOTE: For table name searching, this will be the Display Name not + the API Name. """ p = ctx.obj["p"] - buckets = p.buckets_get(id, name, limit, offset, type_, table_name, search) + if isname and bucket is None and table is None: + # It's invalid to add the --isName switch without providing a bucket name. + logger.error('To get buckets by name, please provide a bucket name.') + sys.exit(1) + + if not isname and bucket is not None: + # This should be a bucket ID - ignore all other options. + bucket = p.buckets_get(id=bucket, type_=type_) + + if format_ == "tabular": + df = pd.json_normalize(bucket) + logger.info(df.to_csv(index=False)) + else: + logger.info(json.dumps(bucket, indent=2)) - if buckets["total"] == 0: return - if format_ == "summary": - for bucket in buckets["data"]: - display_name = bucket["displayName"] - operation = bucket["operation"]["descriptor"] - target = bucket["targetDataset"]["descriptor"] - state = bucket["state"]["descriptor"] - - logger.info(f"{display_name}, operation: {operation}, target: {target}, state: {state}") - elif format_ == "csv": - df = pd.json_normalize(buckets["data"]) - logger.info(df.to_csv(index=False)) + # We are doing some form of search. + + if isname and bucket is not None: + # This should be a search by bucket name. + buckets = p.buckets_get(name=bucket, type_=type_, search=search) + else: + # Search by table ID or name. + if isname: + buckets = p.buckets_get(table_name=table, search=search, + limit=limit, offset=offset, type_=type_) + else: + buckets = p.buckets_get(table_id=table, + limit=limit, offset=offset, type_=type_) + + if buckets['total'] == 0: + logger.info('No buckets found.') else: - logger.info(json.dumps(buckets, indent=2)) + if format_ == "tabular": + df = pd.json_normalize(buckets["data"]) + logger.info(df.to_csv(index=False)) + else: + logger.info(json.dumps(buckets, indent=2)) @click.command("create") diff --git a/prism/prism.py b/prism/prism.py index d7b65d8..bc40b1c 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -673,10 +673,9 @@ def tables_patch(self, id, patch): return None def buckets_get(self, - id=None, name=None, - limit=None, offset=None, - type_="summary", - table_name=None, search=False): + id=None, name=None, search=False, + limit=None, offset=None, type_="summary", + table_id=None, table_name=None): """Get a one or more bucket definitions. Parameters @@ -691,6 +690,8 @@ def buckets_get(self, The offset from zero of tables to return. type_ : str Level of detail to return. + table_id : str + List all/any buckets for associated with the table id. table_name : str List all/any buckets for associated with the table name. search : bool @@ -707,7 +708,8 @@ def buckets_get(self, output_type = type_.lower() if type_.lower() in ['full', 'summary'] else 'summary' - # If we got an ID, then do a direct query by ID - no paging or searching required. + # If we got an ID, then do a direct query by ID - no paging or + # searching required. if id is not None: operation = f"{operation}/{id}?format={output_type}" logger.debug(f"get: {operation}") @@ -723,7 +725,7 @@ def buckets_get(self, logger.debug(f"get: {operation}") url = self.prism_endpoint + operation - # Start the return object - this routine NEVER fails + # Start the return object - this method NEVER fails # and always returns a valid dict object. return_buckets = {"total": 0, "data": []} @@ -736,13 +738,13 @@ def buckets_get(self, if not search and name is not None: # List a specific bucket name overrides any other # combination of search/table/bucket name/wid. - params['name'] = name + params['name'] = urlparse.quote(name) params['limit'] = 1 # Can ONLY be one matching bucket. params['offset'] = 0 else: # Any other combination of parameters requires a search - # through all the buckets in the tenant. + # through all the buckets in the data catalog. search = True params['limit'] = 100 # Max pagesize to retrieve in the fewest REST calls. @@ -757,25 +759,30 @@ def buckets_get(self, buckets = r.json() - if not search and name is not None: # Explicit bucket name + if not search and name is not None: # exact bucket name # We are not searching, and we have a specific bucket, - # return whatever we got with this call (it will be in - # the necessary dict structure). + # return whatever we got with this call even if no buckets + # were found (it will be in the necessary dict structure). return buckets if name is not None: # We are searching at this point. # Substring search for matching table names match_buckets = [bck for bck in buckets["data"] if name in bck["name"] or name in bck["displayName"]] + elif table_id is not None: + match_buckets = [ + bck for bck in buckets["data"] + if table_id == bck["targetDataset"]["id"] + ] elif table_name is not None: - # Caller is looking for any/all buckets by target table + # Caller is looking for any/all buckets by target table(s) match_buckets = [ bck for bck in buckets["data"] if table_name == bck["targetDataset"]["descriptor"] or (search and table_name.lower() in bck["targetDataset"]["descriptor"].lower()) ] else: - # Grab all the tables in the result - select all buckets. + # No search in progress, grab all the buckets in this page. match_buckets = buckets["data"] # Add to the results. diff --git a/setup.py b/setup.py index c330997..54a5c01 100644 --- a/setup.py +++ b/setup.py @@ -17,5 +17,5 @@ install_requires=requirements, extras_require={"dev": ["pytest"]}, keywords="prism", - classifiers=["Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7"], + classifiers=["Programming Language :: Python :: 3.9"], ) From 03f39d31aefb8d29254fdff584ed1a09f369a809 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Mon, 23 Oct 2023 16:14:56 -0400 Subject: [PATCH 045/103] Remove Python 3.7 and 3.8 --- .github/workflows/pythonpackage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 2bd543e..a12f634 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7, 3.8, 3.9] + python-version: [3.9] steps: - uses: actions/checkout@v2 From 8356ceb83f77b8b15298d71329088c33dd517848 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Mon, 23 Oct 2023 20:17:38 -0400 Subject: [PATCH 046/103] Remove WQL and RaaS commands. --- prism/cli.py | 24 ----- prism/commands/raas_commands.py | 26 ----- prism/commands/wql_commands.py | 183 -------------------------------- requirements.txt | 1 - 4 files changed, 234 deletions(-) delete mode 100644 prism/commands/raas_commands.py delete mode 100644 prism/commands/wql_commands.py diff --git a/prism/cli.py b/prism/cli.py index 025d99c..90b70c4 100644 --- a/prism/cli.py +++ b/prism/cli.py @@ -11,8 +11,6 @@ from .commands import dataChanges_commands as d_commands from .commands import dataExport_commands as e_commands from .commands import fileContainers_commands as f_commands -from .commands import wql_commands as w_commands -from .commands import raas_commands as r_commands def param_fixup(value, config, config_name, option): @@ -229,27 +227,5 @@ def fileContainers(): fileContainers.add_command(f_commands.fileContainers_load) -@cli.group("wql") -def wql(): - """ - Operations to list (/dataSources) and run WQL queries (/data). - """ - - -wql.add_command(w_commands.dataSources) -wql.add_command(w_commands.dataSources_fields) -wql.add_command(w_commands.data) - - -@cli.group("raas") -def raas(): - """ - Run custom or Workday delivered report. - """ - - -raas.add_command(r_commands.run) - - if __name__ == "__main__": cli() diff --git a/prism/commands/raas_commands.py b/prism/commands/raas_commands.py deleted file mode 100644 index e3567a9..0000000 --- a/prism/commands/raas_commands.py +++ /dev/null @@ -1,26 +0,0 @@ -import click -import logging - -logger = logging.getLogger('prismCLI') - - -@click.command("run", help="Run a system or custom RaaS report.") -@click.option("-u", "--user", default=None, help="Run custom report as named user - default to delivered reports.") -@click.option("-f", "--format", "format_", default=None, help="Output query results as CSV.") -@click.argument("report", nargs=1, required=True) -@click.argument('params', nargs=-1, required=False) -@click.pass_context -def run(ctx, user, format_, report, params): - """ - Run a Workday report. - - [REPORT] Report name to run. - [PARAMS] Parameters expected by the report as list. - """ - p = ctx.obj["p"] - - # Return to a variable for easy debugging. - report_output = p.raas_run(report, user, params, format_) - - # Don't log the output - pusht - logger.info(report_output) diff --git a/prism/commands/wql_commands.py b/prism/commands/wql_commands.py deleted file mode 100644 index b046cba..0000000 --- a/prism/commands/wql_commands.py +++ /dev/null @@ -1,183 +0,0 @@ -import click -import json -import sys -import logging -import re -import pandas as pd - -logger = logging.getLogger('prismCLI') - - -@click.command("dataSources") -@click.option("-a", "--alias", default=None, type=str, - help="The alias of the data source.") -@click.option("-s", "--searchString", default=None, type=str, - help="The string to be searched in case insensitive manner within the descriptors of the data sources.") -@click.option("-l", "--limit", default=None, type=int, - help="The maximum number of object data entries included in the response, default=-1 (all).") -@click.option("-o", "--offset", default=None, type=int, - help="The offset to the first object in a collection to include in the response.") -@click.option('-f', '--format', 'format_', default='json', - type=click.Choice(['json', 'tabular'], case_sensitive=True), - help='Gets the fields you have access to in the given data source.') -@click.argument("dataSource", required=False) -@click.pass_context -def dataSources(ctx, alias, searchstring, limit, offset, format_, datasource): - """Returns a collection of data sources (/dataSources) for use in a WQL query. - - [DATASOURCE] The Workday ID of the resource. - """ - p = ctx.obj["p"] - - if datasource is not None: - data_sources = p.wql_dataSources(id=datasource, limit=limit, offset=offset) - else: - data_sources = p.wql_dataSources(alias=alias, searchString=searchstring, limit=limit, offset=offset) - - if format_ == 'json': - logger.info(json.dumps(data_sources, indent=2)) - else: - df = pd.json_normalize(data_sources['data']) - logger.info(df.to_csv(index=False)) - - -@click.command("fields") -@click.option("-d", "--sourceSearch", is_flag=True, default=False, - help="The alias of the data source.") -@click.option("-a", "--alias", default=None, type=str, - help="The alias of the data source.") -@click.option("-s", "--searchString", default=None, type=str, - help="The string to be searched in case insensitive manner within the descriptors of the data sources.") -@click.option("-l", "--limit", default=None, type=int, - help="The maximum number of object data entries included in the response.") -@click.option("-o", "--offset", default=None, type=int, - help="The offset to the first object in a collection to include in the response.") -@click.option('-f', '--format', 'format_', default='json', - type=click.Choice(['json', 'tabular'], case_sensitive=True), - help='Gets the fields you have access to in the given data source.') -@click.argument("dataSource", required=True) -@click.pass_context -def dataSources_fields(ctx, sourcesearch, alias, searchstring, limit, offset, format_, datasource): - """Returns a collection of data sources (/dataSources) for use in a WQL query. - - [DATASOURCE] The Workday ID of the resource. - """ - p = ctx.obj["p"] - - if sourcesearch: - data_sources = p.wql_dataSources(alias=datasource, searchString=datasource, limit=1, offset=0) - - if data_sources['total'] != 1: - logger.error(f'Unexpected number of data sources: {data_sources["total"]}') - sys.exit(1) - - ds_id = data_sources['data'][0]['id'] - else: - ds_id = datasource - - fields = p.wql_dataSources_fields(id=ds_id, alias=alias, searchString=searchstring, limit=limit, offset=offset) - - if fields['total'] == 0: - logger.error('No WQL fields found.') - sys.exit(1) - - if format_ == 'json': - logger.info(json.dumps(fields, indent=2)) - else: - df = pd.json_normalize(fields['data']) - logger.info(df.to_csv(index=False)) - - -@click.command("data") -@click.option("-l", "--limit", default=None, - help="The maximum number of object data entries included in the response, default=-1 (all).") -@click.option("-o", "--offset", default=None, - help="The offset to the first object in a collection to include in the response.") -@click.option("-w", "--wql-file", "wql_file", default=None, type=click.Path(exists=True), - help="Filename containing a WQL query.") -@click.option('-f', '--format', 'format_', default='json', - type=click.Choice(['json', 'tabular'], case_sensitive=True), - help='Gets the fields you have access to in the given data source.') -@click.argument("query", required=False) -@click.pass_context -def data(ctx, limit, offset, wql_file, format_, query): - """ - Returns the data from a WQL query. - - [QUERY] WQL query string to execute (/data). - - Note: specify "select *" to automatically expand the column list. - """ - p = ctx.obj["p"] - - if wql_file is None and query is None: - logger.error("No query provided.") - sys.exit(1) - - if query is not None: - # Passed as an explicit string. - query_resolved = query - else: - # Passed as a file name. - with open(wql_file) as file: - query_resolved = file.read().replace('\n', ' ') - - query_resolved = query_resolved.strip() - logger.debug(f'resolved query: {query_resolved}') - - # If the WQL statements starts with exactly "select *", attempt - # to replace the asterisk with the field list. - - if query_resolved.lower().startswith('select *'): - logger.debug('wql "select *" detected.') - - # Locate the "FROM {ds}" clause to get the data source name. - - # Notes from documentation: - # - # To query data from a data source: - # FROM dataSourceAlias - # To query data from a data source with a data source filter: - # FROM dataSourceAlias(dataSourceFilter=filterAlias, filterPrompt1=value1, filterPrompt2=value2) - # To query data from a data source using entry and effective date filters: - # FROM dataSourceAlias(effectiveAsOfDate=date, entryMoment=dateTime) - - from_regex = re.compile(r'\s+from[\s*|(](\w+)', flags=re.IGNORECASE) - from_clause = from_regex.search(query_resolved) - - ds_alias = query_resolved[from_clause.start(1):from_clause.end(1)] - logger.debug(f'Detected data source: {ds_alias}.') - - ds = p.wql_dataSources(alias=ds_alias, limit=1) - - if ds['total'] != 1: - logger.error(f'Data source {ds_alias} not found.') - sys.exit(1) - - ds_id = ds['data'][0]['id'] - logger.debug(f'alias {ds_alias} resolved: {ds_id}') - - fields = p.wql_dataSources_fields(id=ds_id) # No limit gets all fields. - - if fields['total'] == 0: - logger.error(f'No fields found for {ds_alias}.') - sys.exit(1) - - columns = '' - comma = '' - - for field in fields['data']: - columns += comma + field['alias'] - comma = ',' - - query_resolved = query_resolved.replace('*', columns, 1) - logger.debug(f'resolved WQL: {query_resolved}') - - rows = p.wql_data(query_resolved, limit, offset) - - if rows["total"] != 0: - if format_ == 'tabular': - df = pd.json_normalize(rows["data"]) - logger.info(df.to_csv(index=False)) - else: - logger.info(json.dumps(rows, indent=2)) diff --git a/requirements.txt b/requirements.txt index daa2546..8570659 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ click>=6 requests>=2.20.0,<3 -configparser>=6 pandas~=2.0.0 From 3bcf7e9129079453a5d7d32e0432d1fab79e0c9c Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Mon, 23 Oct 2023 20:24:47 -0400 Subject: [PATCH 047/103] Remove Pandas dependency. --- prism/commands/buckets_commands.py | 11 ++++++----- prism/commands/dataChanges_commands.py | 12 ++++++------ prism/commands/tables_commands.py | 11 ++++++----- requirements.txt | 1 - 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/prism/commands/buckets_commands.py b/prism/commands/buckets_commands.py index acac442..1381464 100644 --- a/prism/commands/buckets_commands.py +++ b/prism/commands/buckets_commands.py @@ -2,7 +2,6 @@ import logging import sys import click -import pandas as pd logger = logging.getLogger('prismCLI') @@ -50,8 +49,9 @@ def buckets_get(ctx, bucket, table, isname, bucket = p.buckets_get(id=bucket, type_=type_) if format_ == "tabular": - df = pd.json_normalize(bucket) - logger.info(df.to_csv(index=False)) + pass + # df = pd.json_normalize(bucket) + # logger.info(df.to_csv(index=False)) else: logger.info(json.dumps(bucket, indent=2)) @@ -75,8 +75,9 @@ def buckets_get(ctx, bucket, table, isname, logger.info('No buckets found.') else: if format_ == "tabular": - df = pd.json_normalize(buckets["data"]) - logger.info(df.to_csv(index=False)) + pass + # df = pd.json_normalize(buckets["data"]) + # logger.info(df.to_csv(index=False)) else: logger.info(json.dumps(buckets, indent=2)) diff --git a/prism/commands/dataChanges_commands.py b/prism/commands/dataChanges_commands.py index 57fb2a7..251e1d4 100644 --- a/prism/commands/dataChanges_commands.py +++ b/prism/commands/dataChanges_commands.py @@ -3,7 +3,6 @@ import json import logging import time -import pandas as pd logger = logging.getLogger('prismCLI') @@ -70,12 +69,13 @@ def output_summary_dct(current_dct): else: output_summary_dct(data_change_task) elif format_ == "csv": - if 'data' in data_change_task: - df = pd.json_normalize(data_change_task["data"]) - else: - df = pd.json_normalize(data_change_task) + pass + #if 'data' in data_change_task: + # df = pd.json_normalize(data_change_task["data"]) + #else: + # df = pd.json_normalize(data_change_task) - logger.info(df.to_csv(index=False)) + #logger.info(df.to_csv(index=False)) else: if 'data' in data_change_task: logger.info(json.dumps(data_change_task["data"], indent=2)) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index f48e825..27e4700 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -4,7 +4,6 @@ import os import csv import click -import pandas as pd from prism import schema_fixup @@ -61,8 +60,9 @@ def tables_get(ctx, isname, table, limit, offset, type_, format_, search): logger.error('invalid schema detected.') sys.exit(1) elif format_ == 'tabular': - df = pd.json_normalize(table) - logger.info(df.to_csv(index=False)) + pass + # df = pd.json_normalize(table) + # logger.info(df.to_csv(index=False)) else: logger.info(json.dumps(table, indent=2)) else: @@ -78,8 +78,9 @@ def tables_get(ctx, isname, table, limit, offset, type_, format_, search): if format_ == 'json': logger.info(json.dumps(tables, indent=2)) elif format_ == 'tabular': - df = pd.json_normalize(tables['data']) - logger.info(df.to_csv(index=False)) + pass + # df = pd.json_normalize(tables['data']) + # logger.info(df.to_csv(index=False)) elif format_ == 'schema': # Slim down all the tables we got back. for tab in tables['data']: diff --git a/requirements.txt b/requirements.txt index 8570659..6e05bdc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,2 @@ click>=6 requests>=2.20.0,<3 -pandas~=2.0.0 From 89caf74e84a6b30d7395ea07c34ce52ac72cf5cf Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Mon, 23 Oct 2023 20:53:27 -0400 Subject: [PATCH 048/103] Restored Python 3.8 --- .github/workflows/pythonpackage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index a12f634..490c627 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.9] + python-version: [3.8, 3.9] steps: - uses: actions/checkout@v2 From cfc10814b143a76ab1e273440a6717552696c12d Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 05:07:00 -0400 Subject: [PATCH 049/103] Replace format option with compact. --- prism/__init__.py | 4 +-- prism/commands/tables_commands.py | 55 +++++++++---------------------- prism/prism.py | 22 ++++++++----- 3 files changed, 30 insertions(+), 51 deletions(-) diff --git a/prism/__init__.py b/prism/__init__.py index 347796c..e8e6984 100644 --- a/prism/__init__.py +++ b/prism/__init__.py @@ -1,8 +1,8 @@ -from prism.prism import Prism, set_logging, schema_fixup +from prism.prism import Prism, set_logging, schema_compact from ._version import get_versions __version__ = get_versions()["version"] del get_versions -__all__ = ["Prism", "set_logging", "schema_fixup"] +__all__ = ["Prism", "set_logging", "schema_compact"] diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index 27e4700..fd22aa9 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -5,7 +5,7 @@ import csv import click -from prism import schema_fixup +from prism import schema_compact logger = logging.getLogger('prismCLI') @@ -20,30 +20,24 @@ @click.option('-t', '--type', 'type_', default='summary', type=click.Choice(['summary', 'full', 'permissions'], case_sensitive=False), help='How much information returned for each table.') -@click.option('-f', '--format', 'format_', default='json', - type=click.Choice(['json', 'tabular', 'schema'], case_sensitive=False), - help='Format output as JSON, tabular, or simplified table schema.') +@click.option('-c', '--compact', is_flag=True, default=False, + help='Compact the table schema for use in edit operations.') @click.option('-s', '--search', is_flag=True, help='Enable substring search of NAME in api name or display name, default=False (exact match).') @click.argument('table', required=False) @click.pass_context -def tables_get(ctx, isname, table, limit, offset, type_, format_, search): +def tables_get(ctx, isname, table, limit, offset, type_, compact, search): """List the tables or datasets permitted by the security profile of the current user. - [NAME] Prism table name to list. + [TABLE] Prism table ID or name (--isName flag) to list. """ - if type_ in ('summary', 'permissions') and format_ == 'schema': - # Summary results cannot generate schema since there will be no fields. - logger.error(f'Invalid combination of type "{type_}" and format "{format_}".') - sys.exit(1) - p = ctx.obj['p'] # Query the tenant...see if the caller said to treat the # table as a name, AND that a table was provided. if not isname and table is not None: - # When using an ID, the get operation returns a simple + # When using an ID, the GET:/tables operation returns a simple # dictionary of the table definition. table = p.tables_get(id=table, type_=type_) @@ -51,20 +45,10 @@ def tables_get(ctx, isname, table, limit, offset, type_, format_, search): logger.error(f"Table ID {table} not found.") sys.exit(1) - if format_ == 'schema': - # Same as JSON, but with extraneous attributes removed. - if schema_fixup(table): - logger.info(json.dumps(table, indent=2)) - else: - # This should never happen. - logger.error('invalid schema detected.') - sys.exit(1) - elif format_ == 'tabular': - pass - # df = pd.json_normalize(table) - # logger.info(df.to_csv(index=False)) - else: - logger.info(json.dumps(table, indent=2)) + if compact: + table = schema_compact(table) + + logger.info(json.dumps(table, indent=2)) else: # When querying by name, the get operation returns a # dict with a count of found tables and a list of @@ -75,20 +59,11 @@ def tables_get(ctx, isname, table, limit, offset, type_, format_, search): logger.error(f"Table ID {table} not found.") return - if format_ == 'json': - logger.info(json.dumps(tables, indent=2)) - elif format_ == 'tabular': - pass - # df = pd.json_normalize(tables['data']) - # logger.info(df.to_csv(index=False)) - elif format_ == 'schema': - # Slim down all the tables we got back. + if compact: for tab in tables['data']: - if not schema_fixup(tab): - logger.error('unexpected error in schema_fixup.') - sys.exit() + tab = schema_compact(tab) - logger.info(json.dumps(tables, indent=2)) + logger.info(json.dumps(tables, indent=2)) @click.command('create') @@ -155,12 +130,12 @@ def tables_create(ctx, name, displayname, tags, enableforanalysis, sourcename, s sys.exit(1) -@click.command('put') +@click.command('edit') @click.option('-t', '--truncate', is_flag=True, default=False, help='Truncate the table before updating.') @click.argument('file', required=True, type=click.Path(exists=True, dir_okay=False, readable=True)) @click.pass_context -def tables_put(ctx, file, truncate): +def tables_edit(ctx, file, truncate): """Edit the schema for an existing table. [FILE] File containing an updated schema definition for the table. diff --git a/prism/prism.py b/prism/prism.py index bc40b1c..343a12d 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -17,6 +17,7 @@ import io import gzip import inspect +import copy from urllib import parse as urlparse @@ -89,27 +90,30 @@ def buckets_gen_name(): return bucket_name -def schema_fixup(schema): +def schema_compact(schema): """Utility function to revise a schema for a bucket operations.""" if schema is None: logger.error("schema_fixup: schema cannot be None.") - return False + return None if not isinstance(schema, dict): logger.error("schema_fixup: schema is not a dictionary.") - return False + return None if 'fields' not in schema or not isinstance(schema['fields'], list): logger.error("fields attribute missing from schema!") - return False + return None + + compact_schema = copy.deepcopy(schema) # Remove Prism managed fields "WPA_*" - schema['fields'] = [fld for fld in schema['fields'] if not fld['name'].startswith('WPA_')] + compact_schema['fields'] = [fld for fld in compact_schema['fields'] + if not fld['name'].startswith('WPA_')] # Add a sequential order (ordinal) on the fields to (en)force # required sequencing of fields. - for ordinal in range(len(schema["fields"])): + for ordinal in range(len(compact_schema["fields"])): fld = schema["fields"][ordinal] fld["ordinal"] = ordinal + 1 @@ -127,15 +131,15 @@ def schema_fixup(schema): # Remove all attributes from the schema that cannot be specified on # a post or put operation. - keys = list(schema.keys()) + keys = list(compact_schema.keys()) for k in keys: if k not in ['name', 'id', 'fields', 'tags', 'categories', 'displayName', 'description', 'documentation', 'enableForAnalysis']: - del schema[k] + del compact_schema[k] - return True + return compact_schema def table_to_bucket_schema(table): From c8b9bd384cf230643912b6fb0721cb8f339d39b1 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 05:12:04 -0400 Subject: [PATCH 050/103] Replace schema_fixup with schema_compact. --- prism/prism.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/prism/prism.py b/prism/prism.py index 343a12d..8102ffd 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -94,15 +94,15 @@ def schema_compact(schema): """Utility function to revise a schema for a bucket operations.""" if schema is None: - logger.error("schema_fixup: schema cannot be None.") + logger.error("schema_compact: schema cannot be None.") return None if not isinstance(schema, dict): - logger.error("schema_fixup: schema is not a dictionary.") + logger.error("schema_compact: schema is not a dictionary.") return None if 'fields' not in schema or not isinstance(schema['fields'], list): - logger.error("fields attribute missing from schema!") + logger.error("schema_compact: fields attribute missing from schema!") return None compact_schema = copy.deepcopy(schema) @@ -592,11 +592,13 @@ def tables_post(self, schema): logger.debug(f"POST : {operation}") url = self.prism_endpoint + "/tables" - if not schema_fixup(schema): + compact_schema = schema_compact(schema) + + if compact_schema is None: logger.error("Invalid schema for create operation.") return None - response = self.http_post(url=url, headers=self.CONTENT_APP_JSON, data=json.dumps(schema)) + response = self.http_post(url=url, headers=self.CONTENT_APP_JSON, data=json.dumps(compact_schema)) if response.status_code == 201: return response.json() @@ -626,17 +628,19 @@ def tables_put(self, schema, truncate=False): If the request is successful, a dictionary containing information about the new table is returned, otherwise None. """ - if not schema_fixup(schema): + compact_schema = schema_compact(schema) + + if compact_schema is None: logger.error("Invalid schema for update operation.") return None - table_id = schema['id'] + table_id = compact_schema['id'] operation = f"/tables/{table_id}" logger.debug(f"PUT: {operation}") url = self.prism_endpoint + operation - response = self.http_put(url=url, data=schema) + response = self.http_put(url=url, data=compact_schema) if response.status_code == 200: return response.json() @@ -908,11 +912,14 @@ def buckets_create( # We have the table and the user didn't include a schema. Make a copy # of the target table's schema. - if not schema_fixup(table_schema): + + compact_schema = schema_compact(table_schema) + + if compact_schema is None: logger.error('Invalid schema for bucket operation.') return None - bucket_schema = table_to_bucket_schema(table_schema) + bucket_schema = table_to_bucket_schema(compact_schema) logger.debug(f"post: /buckets") url = self.prism_endpoint + "/buckets" From 915c7edf2e8eab9016960df400bbf5030c44bbad Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 05:14:31 -0400 Subject: [PATCH 051/103] Remove --format option. --- prism/commands/buckets_commands.py | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/prism/commands/buckets_commands.py b/prism/commands/buckets_commands.py index 1381464..f140623 100644 --- a/prism/commands/buckets_commands.py +++ b/prism/commands/buckets_commands.py @@ -17,17 +17,12 @@ help="How much information to be returned in response JSON.") @click.option("-s", "--search", is_flag=True, show_default=True, default=False, help="Use substring search bucket or table.") -@click.option("-f", "--format", "format_", - type=click.Choice(['json', 'tabular', 'schema'], case_sensitive=False), - default="json", - help="Format output as JSON, tabular, or CSV.", - ) @click.option("--table", help="The id or name of a Prism table to list all buckets.") @click.argument("bucket", required=False) @click.pass_context def buckets_get(ctx, bucket, table, isname, - limit, offset, type_, search, format_): + limit, offset, type_, search): """ View the buckets permitted by the security profile of the current user. @@ -47,13 +42,7 @@ def buckets_get(ctx, bucket, table, isname, if not isname and bucket is not None: # This should be a bucket ID - ignore all other options. bucket = p.buckets_get(id=bucket, type_=type_) - - if format_ == "tabular": - pass - # df = pd.json_normalize(bucket) - # logger.info(df.to_csv(index=False)) - else: - logger.info(json.dumps(bucket, indent=2)) + logger.info(json.dumps(bucket, indent=2)) return @@ -74,12 +63,7 @@ def buckets_get(ctx, bucket, table, isname, if buckets['total'] == 0: logger.info('No buckets found.') else: - if format_ == "tabular": - pass - # df = pd.json_normalize(buckets["data"]) - # logger.info(df.to_csv(index=False)) - else: - logger.info(json.dumps(buckets, indent=2)) + logger.info(json.dumps(buckets, indent=2)) @click.command("create") From 3a58a7d5bd46495fa08a39f1d6e83b7ea0b417a1 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 05:17:08 -0400 Subject: [PATCH 052/103] Remove --format option. --- prism/commands/dataChanges_commands.py | 35 +------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/prism/commands/dataChanges_commands.py b/prism/commands/dataChanges_commands.py index 251e1d4..b09ac10 100644 --- a/prism/commands/dataChanges_commands.py +++ b/prism/commands/dataChanges_commands.py @@ -16,9 +16,6 @@ help="The offset to the first object in a collection to include in the response.") @click.option("-t", "--type", "type_", default="summary", help="How much information to be returned in response JSON (default=summary).") -@click.option("-f", "--format", "format_", default="full", - help="Format output as full, summary, schema, or CSV.", - type=click.Choice(['full', 'summary', 'schema', 'csv'], case_sensitive=False)) @click.option("-s", "--search", is_flag=True, default=False, help="Use contains search substring for --name or --id (default=false).") @click.argument("dct", required=False) @@ -30,18 +27,6 @@ def dataChanges_get(ctx, isname, dct, limit, offset, type_, format_, search): """ p = ctx.obj["p"] - def output_summary_dct(current_dct): - """Display the one-line summary version of a DCT.""" - display_name = current_dct["displayName"] - - source_name = current_dct["source"]["sourceType"] - source_name += ": " + current_dct["source"]["name"] if "name" in current_dct["source"] else "" - - target_name = current_dct["target"]["name"] - operation = current_dct["operation"]["operationType"]["descriptor"] - - logger.info(f"{display_name}, source: {source_name}, target: {target_name}, operation: {operation}") - # Separate the get calls because an ID lookup returns a dict and a name lookup # always returns an object/list structure with zero or more matching DCTs. if isname: @@ -62,25 +47,7 @@ def output_summary_dct(current_dct): logger.error(f'Data change task {dct} not found.') sys.exit(1) - if format_ == "summary": - if 'total' in data_change_task: - for dct_item in data_change_task["data"]: - output_summary_dct(dct_item) - else: - output_summary_dct(data_change_task) - elif format_ == "csv": - pass - #if 'data' in data_change_task: - # df = pd.json_normalize(data_change_task["data"]) - #else: - # df = pd.json_normalize(data_change_task) - - #logger.info(df.to_csv(index=False)) - else: - if 'data' in data_change_task: - logger.info(json.dumps(data_change_task["data"], indent=2)) - else: - logger.info(json.dumps(data_change_task, indent=2)) + logger.info(json.dumps(data_change_task, indent=2)) @click.command("validate") From 59f824da6aec92e0c12cbe6746323a00ce74181a Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 05:19:11 -0400 Subject: [PATCH 053/103] Remove RaaS and WQL methods. --- prism/prism.py | 260 ------------------------------------------------- 1 file changed, 260 deletions(-) diff --git a/prism/prism.py b/prism/prism.py index 8102ffd..08454b8 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -237,13 +237,6 @@ def __init__(self, base_url, tenant_name, client_id, client_secret, refresh_toke self.prism_endpoint = f"{base_url}/api/prismAnalytics/{version}/{tenant_name}" self.upload_endpoint = f"{base_url}/wday/opa/tenant/{tenant_name}/service/wBuckets" - # Support URLs for additional Workday API calls. - self.wql_endpoint = f"{base_url}/api/wql/v1/{tenant_name}" - """str: Workday Report as a Service (raas) endpoint.""" - - self.raas_endpoint = f"{base_url}/ccx/service" - """str: Workday Report as a Service (raas) endpoint.""" - # At creation, there cannot yet be a bearer_token obtained from Workday. self.bearer_token = None """str: Active bearer token for the session.""" @@ -1410,259 +1403,6 @@ def fileContainers_load(self, id, file): return results - def wql_dataSources(self, id=None, alias=None, searchString=None, limit=None, offset=None): - """Returns a collection of data sources for use in a WQL query. - - Parameters - ---------- - id : str - The ID of a Workday data source. - alias : str - Filters by alias match - searchString : str - """ - operation = '/dataSources' - - if id is not None: - operation = f'{operation}/{id}' - logger.debug(f'wql_dataSources: {operation}') - url = f'{self.wql_endpoint}{operation}' - - response = self.http_get(url) - - return response.json() - - url_separator = '?' - - if alias is not None: - operation += f'?alias={urlparse.quote(alias)}' - url_separator = '&' - elif searchString is not None: - operation += f'?searchString={urlparse.quote(searchString)}' - url_separator = '&' - - logger.debug(f'wql_dataSources: {operation}') - url = f'{self.wql_endpoint}{operation}' - - # Always return a valid list - even if empty. - return_sources = {'total': 0, 'data': []} - - if limit is not None and isinstance(limit, int) and 0 < limit <= 100: - return_all = False - - query_limit = limit - - if offset is not None and isinstance(offset, int) and offset > 0: - query_offset = offset - else: - query_offset = 0 - else: - return_all = True - - query_limit = 100 - query_offset = 0 - - # Assume we'll loop over more than one page. - while True: - r = self.http_get(f'{url}{url_separator}limit={query_limit}&offset={query_offset}') - - if r.status_code != 200: - break - - ds = r.json() - - # Add this page to the final output. - return_sources['data'] += ds['data'] - - if not return_all: - break - - if len(ds['data']) < query_limit: - # A page size less than the limit means we are done. - break - - query_offset += query_limit - - # Fix-up the final total of sources. - return_sources["total"] = len(return_sources["data"]) - - return return_sources - - def wql_dataSources_fields(self, id=None, alias=None, searchString=None, limit=None, offset=None): - """Retrieves a field of the data source instance. - - Parameters - ---------- - id : str - The Workday ID of the resource. - alias : str - The alias of the data source field. - searchString : str - The string to be searched in case-insensitive manner within the descriptors of the data source fields. - limit : int - The maximum number of objects in a single response. The default is 20, the maximum is 100, and None is all. - offset : int - The zero-based index of the first object in a response collection. - operation = '/dataSources' - """ - - if id is None: - return None - - operation = f'/dataSources/{id}/fields' - logger.debug('wql_dataSources_fields: {operation}') - url = f'{self.wql_endpoint}{operation}' - - url_separator = '?' - - if alias is not None: - operation += f'?alias={urlparse.quote(alias)}' - url_separator = '&' - - if searchString is not None: - operation += f'{url_separator}searchString={urlparse.quote(searchString)}' - url_separator = '&' - - if limit is not None and isinstance(limit, int) and 0 < limit <= 100: - return_all = False - - query_limit = limit - - if offset is not None and isinstance(offset, int) and offset > 0: - query_offset = offset - else: - query_offset = 0 - else: - return_all = True - - query_limit = 100 - query_offset = 0 - - return_fields = {'total': 0, 'data': []} - - while True: - url = f'{url}{url_separator}limit={query_limit}&offset={query_offset}' - - response = self.http_get(url) - - if response.status_code != 200: - break - - fields = response.json() - - # Add this page of fields to the final output. - return_fields['data'] += fields['data'] - - if not return_all: - break - - if len(fields['data']) < query_limit: - # A page size less than the limit means we are done. - break - - query_offset += query_limit - - return_fields['total'] = len(return_fields['data']) - - return return_fields - - def wql_data(self, query, limit, offset): - """Returns the data from a WQL query. - - Parameters - ---------- - query : str - The WQL query that retrieves the data. - limit: int - The maximum number of objects in a single response - maximum 10,000. - offset: int - The zero-based index of the first object in a response collection. - - Returns - ------- - dict - Returns a dict with a "total" row count attribute and a "data" - array of rows. - """ - operation = '/data' - - url = f'{self.wql_endpoint}{operation}' - query_safe = urlparse.quote(query.strip()) - - if limit is None or not isinstance(limit, int) or limit > 10000: - query_limit = 10000 - offset = 0 - else: - query_limit = limit - - offset = offset if offset is not None and isinstance(offset, int) else 0 - - # Always return a valid object - even if no rows are returned. - data = {'total': 0, 'data': []} - - while True: - r = self.http_get(f'{url}?query={query_safe}&limit={query_limit}&offset={offset}') - - if r.status_code == 200: - page = r.json() - data['data'] += page['data'] - else: - # There was a problem, return whatever we have... - return data - - if len(page['data']) < query_limit: - break - - offset += query_limit - - # Set the final row count. - data['total'] = len(data['data']) - - return data - - def raas_run(self, report, user, params=None, format_='XML'): - """ - Run a Workday system or custom report. - - Parameters - ---------- - report : str - Name of the Workday report to run. - user : str - Username to include on URL - params : list - Array of parameter/value pairs to include on the URL - format_ : str - Output format, i.e., XML, JSON, CSV - """ - if user is None or not isinstance(user, str) or len(user) == 0: - logger.warning("generating delivered report (systemreport2).") - url = f"{self.raas_endpoint}/systemreport2/{self.tenant_name}/{report}" - else: - logger.debug(f"generating report as {user}.") - url = f"{self.raas_endpoint}/customreport2/{self.tenant_name}/{user}/{report}" - - separator = "?" - if params is not None and len(params) > 0: - query_str = "" - - for param in range(0, len(params), 2): - query_str += separator + params[param] + "=" + params[param + 1] - separator = "&" - - url += query_str - - if format_: - url = f"{url}{separator}format={format_}" - - r = self.http_get(url) - - if r.status_code == 200: - return r.text - - logging.error("HTTP Error: {}".format(r.content.decode("utf-8"))) - return None - def resolve_file_list(files): """Evaluate file name(s)s and return the list of supported files. From 33d999698c56fd894db5778b067b2a1fc690e117 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 05:38:54 -0400 Subject: [PATCH 054/103] Add table_upload_files utility function. --- prism/__init__.py | 4 +-- prism/commands/tables_commands.py | 26 +++-------------- prism/prism.py | 47 +++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 24 deletions(-) diff --git a/prism/__init__.py b/prism/__init__.py index e8e6984..39bb069 100644 --- a/prism/__init__.py +++ b/prism/__init__.py @@ -1,8 +1,8 @@ -from prism.prism import Prism, set_logging, schema_compact +from prism.prism import Prism, set_logging, schema_compact, table_upload_file from ._version import get_versions __version__ = get_versions()["version"] del get_versions -__all__ = ["Prism", "set_logging", "schema_compact"] +__all__ = ["Prism", "set_logging", "schema_compact", "table_upload_file"] diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index fd22aa9..93af086 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -6,6 +6,7 @@ import click from prism import schema_compact +from prism import table_upload_file logger = logging.getLogger('prismCLI') @@ -286,30 +287,11 @@ def tables_upload(ctx, table, isname, operation, file): sys.exit(1) if isname: - bucket = p.buckets_create(target_name=table, operation=operation) + results = table_upload_file(p, table_name=table, operation=operation) else: - bucket = p.buckets_create(target_id=table, operation=operation) + results = table_upload_file(p, table_id=table, operation=operation) - if bucket is None: - logger.error('Bucket creation failed.') - sys.exit(1) - - logger.debug(json.dumps(bucket, indent=2)) - bucket_id = bucket['id'] - - file_results = p.buckets_files(bucket_id, file) - - if file_results['total'] > 0: - results = p.buckets_complete(bucket_id) - - # Add the file upload results to the bucket - # info returned to the caller. - results['files'] = file_results - results['bucket'] = bucket # Ensure bucket info is present. - - logger.info(json.dumps(results, indent=2)) - else: - logger.info('No files uploaded to table.') + logger.debug(json.dumps(results, indent=2)) @click.command('truncate') diff --git a/prism/prism.py b/prism/prism.py index 08454b8..ca7bfbc 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -1448,3 +1448,50 @@ def resolve_file_list(files): logger.warning(f"File {f} is not a .csv.gz or .csv file - skipping.") return target_files + + +def table_upload_file(p, file, table_id=None, table_name=None, operation="TruncateAndInsert"): + """Create a new Prism table. + + Parameters + ---------- + p : Prism + Instantiated Prism class from prism.Prism() + + file : str | list + The path to CSV or GZIP compressed file(s) to upload. + + table_id : str + The ID of the Prism table to upload your file to. + + table_name : str + The API name of the Prism table to upload your file to. + + operation : str (default = TruncateandInsert) + The table load operation. + Possible options include TruncateandInsert, Insert, Update, Upsert, Delete. + + Returns + ------- + If the request is successful, a dictionary containing information about + the table is returned. + """ + + bucket = p.buckets_create(target_id=table_id, target_name=table_name, operation=operation) + + if bucket is None: + return None + + file_results = p.buckets_files(bucket["id"], file) + + if file_results['total'] > 0: + results = p.buckets_complete(bucket["id"]) + + # Add the file upload results to the bucket + # info returned to the caller. + results['files'] = file_results + results['bucket'] = bucket # Ensure bucket info is present. + + return results + else: + return file_results From 981e9c63b80c99a32c6797bffdbbb8c7930f136b Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 06:22:46 -0400 Subject: [PATCH 055/103] Added resolve_schema utility function. --- prism/__init__.py | 5 +- prism/commands/tables_commands.py | 191 +----------------------------- prism/prism.py | 56 ++++++++- 3 files changed, 61 insertions(+), 191 deletions(-) diff --git a/prism/__init__.py b/prism/__init__.py index 39bb069..c6c181c 100644 --- a/prism/__init__.py +++ b/prism/__init__.py @@ -1,8 +1,9 @@ -from prism.prism import Prism, set_logging, schema_compact, table_upload_file +from prism.prism import Prism, set_logging, \ + schema_compact, table_upload_file, resolve_schema from ._version import get_versions __version__ = get_versions()["version"] del get_versions -__all__ = ["Prism", "set_logging", "schema_compact", "table_upload_file"] +__all__ = ["Prism", "set_logging", "schema_compact", "table_upload_file", "resolve_schema"] diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index 93af086..d7fc142 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -1,12 +1,9 @@ import json import logging import sys -import os -import csv import click -from prism import schema_compact -from prism import table_upload_file +from prism import * logger = logging.getLogger('prismCLI') @@ -22,9 +19,9 @@ type=click.Choice(['summary', 'full', 'permissions'], case_sensitive=False), help='How much information returned for each table.') @click.option('-c', '--compact', is_flag=True, default=False, - help='Compact the table schema for use in edit operations.') + help='Compact the table schema for use in edit (put) operations.') @click.option('-s', '--search', is_flag=True, - help='Enable substring search of NAME in api name or display name, default=False (exact match).') + help='Enable substring search of NAME in api name or display name.') @click.argument('table', required=False) @click.pass_context def tables_get(ctx, isname, table, limit, offset, type_, compact, search): @@ -331,185 +328,3 @@ def tables_truncate(ctx, table, isname): sys.exit(1) -def schema_from_csv(prism, file): - """Convert a CSV list of fields into a proper Prism schema JSON object""" - - if not os.path.exists(file): - logger.error(f'FIle {file} not found - skipping.') - sys.exit(1) - - schema = {'fields': []} # Start with an empty schema definition. - - with open(file, newline='') as csvfile: - reader = csv.DictReader(csvfile) - - # Force all the columns names (first row) from the CSV to lowercase to make - # lookups consistent regardless of the actual case of the columns. - reader.fieldnames = [f_name.lower() for f_name in reader.fieldnames] - - # The minimum definition is a name column - exit if not found. No other - # column definition is required to build a valid field list. - if 'name' not in reader.fieldnames: - logger.error(f'CSV file {file} does not contain a name column header in first line.') - sys.exit(1) - - # Prism fields always have an ordinal sequence assigned to each field. - ordinal = 1 - - for row in reader: - if len(row['name']) == 0: - logger.error('Missing column name in CSV file.') - sys.exit(1) - - # Start the new field definition with what we know so far. - field = { - 'ordinal': ordinal, - 'name': row['name'], - 'displayName': row['displayname'] if 'displayname' in row else row['name'] - } - - # The following two columns are not required and may not be present. - - if 'required' in row and isinstance(row['required'], str) and row['required'].lower() == 'true': - field['required'] = True - else: - field['required'] = False - - if 'externalid' in row and isinstance(row['externalid'], str) and row['externalid'].lower() == 'true': - field['externalId'] = True - else: - field['externalId'] = False - - fld_type = 'none' - - prism_data_types = ['boolean', 'integer', 'text', 'date', 'long', 'decimal', - 'numeric', 'instance', 'currency', 'multi_instance'] - - if 'type' in row and row['type'].lower() in prism_data_types: - field['type'] = {'id': f'Schema_Field_Type={row["type"]}'} - fld_type = row['type'].lower() - else: - # Default all "un-typed" fields to text. - field['type'] = {'id': 'Schema_Field_Type=Text'} - - if fld_type == 'date': - if 'parseformat' in row and isinstance(row['parseformat'], str) and len(row['parseformat']) > 0: - field['parseFormat'] = row['parseformat'] - else: - field['parseFormat'] = 'yyyy-MM-dd' - elif fld_type == 'numeric': - if 'precision' in row: - field['precision'] = row['precision'] - - if 'scale' in row: - field['scale'] = row['scale'] - elif fld_type == 'instance': - # We need all the data sources to resolve the business objects - # to include their WID. - data_sources = prism.datasources_list() - - if data_sources is None or data_sources['total'] == 0: - click.echo('Error calling WQL/dataSources') - return - - # Find the matching businessObject - bo = [ds for ds in data_sources['data'] - if ds['businessObject']['descriptor'] == row['businessObject']] - - if len(bo) == 1: - field['businessObject'] = bo[0]['businessObject'] - - schema['fields'].append(field) - ordinal += 1 - - return schema - - -def csv_from_fields(fields): - """Convert a Prism field list to CSV representation.""" - - format_str = '{name},"{displayName}",{ordinal},{type},"{businessObject}",' - format_str += '{precision},{scale},"{parseFormat}",{required},{externalId}\n' - - # Start with the CSV column headings. - csv_str = 'name,displayName,ordinal,type,businessObject,precision,scale,parseFormat,required,externalId\n' - - for field in fields: - # Suppress the Prism audit columns. - if field['name'].startswith('WPA_'): - continue - - field_def = {'name': field['name'], - 'displayName': field['displayName'], - 'ordinal': field['ordinal'], - 'type': field['type']['descriptor'], - 'businessObject': field['businessObject']['descriptor'] if 'businessObject' in field else '', - 'precision': field['precision'] if 'precision' in field else '', - 'scale': field['scale'] if 'scale' in field else '', - 'parseFormat': field['parseFormat'] if 'parseFormat' in field else '', - 'required': field['required'], - 'externalId': field['externalId'] - } - - # Add the new field to the CSV text. - csv_str += format_str.format_map(field_def) - - return csv_str - - -def resolve_schema(p=None, file=None, source_name=None, source_id=None): - """Get or extract a schema from a file or existing Prism table.""" - - # Start with a blank schema definition. - schema = {} - - # A file always takes precedence over sourceName and sourceWID - # options, and must BE, or contain a valid schema. - - if file is not None: - if file.lower().endswith('.json'): - try: - with open(file) as json_file: - schema = json.load(json_file) - except Exception as e: - logger.error(f'Invalid schema file: {e}.') - sys.exit(1) - - # The JSON file could be a complete table definitions (GET:/tables - full) or just - # the list of fields. If we got a list, then we have a list of fields we - # use to start the schema definition. - - if isinstance(schema, list): - schema['fields'] = schema - else: - # This should be a full schema, perhaps from a table list command. - if 'name' not in schema and 'fields' not in schema: - logger.error('Invalid schema - name and fields attribute not found.') - sys.exit(1) - elif file.lower().endswith('.csv'): - schema = schema_from_csv(p, file) - else: - logger.error('Invalid file extension - valid extensions are .json or .csv.') - sys.exit(1) - else: - # No file was specified, check for a Prism source table. - if source_name is None and source_id is None: - logger.error('No schema file provided and a table (--sourceName or --sourceId) not specified.') - sys.exit(1) - - if source_id is not None: - schema = p.tables_list(id=source_id, type_='full') # Exact match on WID - and get the fields (full) - - if schema is None: - logger.error(f'Invalid --sourceId {source_id} : table not found.') - sys.exit(1) - else: - tables = p.tables_list(name=source_name, type_='full') # Exact match on API Name - - if tables['total'] == 0: - logger.error(f'Invalid --sourceName {source_name} : table not found.') - sys.exit(1) - - schema = tables['data'][0] - - return schema diff --git a/prism/prism.py b/prism/prism.py index ca7bfbc..959d69a 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -14,7 +14,7 @@ import os import sys import uuid -import io +import csv import gzip import inspect import copy @@ -1495,3 +1495,57 @@ def table_upload_file(p, file, table_id=None, table_name=None, operation="Trunca return results else: return file_results + + +def resolve_schema(p=None, file=None, source_name=None, source_id=None): + """Get or extract a schema from a file or existing Prism table.""" + + # Start with a blank schema definition. + schema = {} + + # A file always takes precedence over sourceName and sourceWID + # options, and must BE a valid schema. + + if file is not None: + if not os.path.isfile(file): + logger.error('File not found.') + return None + + # We can expect either a JSON file or a CSV file. + try: + with open(file) as json_file: + schema = json.load(json_file) + + if isinstance(schema, list): + # Convert a list of fields into a basic schema. + schema['fields'] = schema + else: + # This should be a full schema, perhaps from a table list command. + if 'name' not in schema and 'fields' not in schema: + logger.error('Invalid schema - name and fields attribute not found.') + return None + except Exception as e: + logger.error(e) + return None + else: + # No file was specified, check for a Prism source table. + if source_name is None and source_id is None: + logger.error('No schema file provided and a table (--sourceName or --sourceId) not specified.') + return None + + if source_id is not None: + schema = p.tables_list(id=source_id, type_='full') # Exact match on WID - and get the fields (full) + + if schema is None: + logger.error(f'Invalid --sourceId {source_id} : table not found.') + return None + else: + tables = p.tables_list(name=source_name, type_='full') # Exact match on API Name + + if tables['total'] == 0: + logger.error(f'Invalid --sourceName {source_name} : table not found.') + return None + + schema = tables['data'][0] + + return schema From 98f2e6361a82595156e804bda0571fdb57e2915e Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 06:24:32 -0400 Subject: [PATCH 056/103] Rename tables_put to tables_edit. --- prism/cli.py | 2 +- prism/commands/tables_commands.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/prism/cli.py b/prism/cli.py index 90b70c4..6bc5f83 100644 --- a/prism/cli.py +++ b/prism/cli.py @@ -170,7 +170,7 @@ def tables(): tables.add_command(t_commands.tables_get) tables.add_command(t_commands.tables_create) -tables.add_command(t_commands.tables_put) +tables.add_command(t_commands.tables_edit) tables.add_command(t_commands.tables_patch) tables.add_command(t_commands.tables_upload) tables.add_command(t_commands.tables_truncate) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index d7fc142..1c5b36f 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -326,5 +326,3 @@ def tables_truncate(ctx, table, isname): if bucket is None: logger.error(msg) sys.exit(1) - - From cd2e7176e5e0305e78b734baa365931c248a2bbb Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 06:28:07 -0400 Subject: [PATCH 057/103] Bug fix compact_schema. --- prism/prism.py | 46 ++++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/prism/prism.py b/prism/prism.py index 959d69a..2d0cfb3 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -101,33 +101,31 @@ def schema_compact(schema): logger.error("schema_compact: schema is not a dictionary.") return None - if 'fields' not in schema or not isinstance(schema['fields'], list): - logger.error("schema_compact: fields attribute missing from schema!") - return None - compact_schema = copy.deepcopy(schema) - # Remove Prism managed fields "WPA_*" - compact_schema['fields'] = [fld for fld in compact_schema['fields'] - if not fld['name'].startswith('WPA_')] - # Add a sequential order (ordinal) on the fields to (en)force - # required sequencing of fields. - for ordinal in range(len(compact_schema["fields"])): - fld = schema["fields"][ordinal] - fld["ordinal"] = ordinal + 1 - - if 'fieldId' in fld: - del fld['fieldId'] - - if 'id' in fld: - del fld['id'] - - if 'type' in fld: - if 'descriptor' in fld['type']: - # Convert the descriptor to the shortened Prism type syntax. - fld['type']['id'] = f"Schema_Field_Type={fld['type']['descriptor']}" - del fld['type']['descriptor'] + # required sequencing of fields. Note: for summary tables + # there will not be a fields attribute. + if 'fields' in compact_schema: + # Remove Prism managed fields "WPA_*" + compact_schema['fields'] = [fld for fld in compact_schema['fields'] + if not fld['name'].startswith('WPA_')] + + for ordinal in range(len(compact_schema["fields"])): + fld = schema["fields"][ordinal] + fld["ordinal"] = ordinal + 1 + + if 'fieldId' in fld: + del fld['fieldId'] + + if 'id' in fld: + del fld['id'] + + if 'type' in fld: + if 'descriptor' in fld['type']: + # Convert the descriptor to the shortened Prism type syntax. + fld['type']['id'] = f"Schema_Field_Type={fld['type']['descriptor']}" + del fld['type']['descriptor'] # Remove all attributes from the schema that cannot be specified on # a post or put operation. From 6b6a2876a75dfa4a2e5371f38dfa7da72a50891c Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 07:22:13 -0400 Subject: [PATCH 058/103] Clean-up of --isName option. --- prism/commands/buckets_commands.py | 109 ++++++++++++++--------------- 1 file changed, 51 insertions(+), 58 deletions(-) diff --git a/prism/commands/buckets_commands.py b/prism/commands/buckets_commands.py index f140623..f9656d3 100644 --- a/prism/commands/buckets_commands.py +++ b/prism/commands/buckets_commands.py @@ -21,14 +21,13 @@ help="The id or name of a Prism table to list all buckets.") @click.argument("bucket", required=False) @click.pass_context -def buckets_get(ctx, bucket, table, isname, - limit, offset, type_, search): +def buckets_get(ctx, bucket, table, isname, limit, offset, type_, search): """ View the buckets permitted by the security profile of the current user. [BUCKET] ID or name of a Prism bucket. - NOTE: For table name searching, this will be the Display Name not + NOTE: For table name searching, the Display Name is searched not the API Name. """ @@ -60,10 +59,7 @@ def buckets_get(ctx, bucket, table, isname, buckets = p.buckets_get(table_id=table, limit=limit, offset=offset, type_=type_) - if buckets['total'] == 0: - logger.info('No buckets found.') - else: - logger.info(json.dumps(buckets, indent=2)) + logger.info(json.dumps(buckets, indent=2)) @click.command("create") @@ -71,13 +67,13 @@ def buckets_get(ctx, bucket, table, isname, help="Table name to associate with the bucket.") @click.option("-i", "--target_id", default=None, help="Table ID to associate with the table.") -@click.option("-f", "--file", "file_", required=False, default=None, type=click.Path(exists=True), +@click.option("-f", "--file", "file", required=False, default=None, type=click.Path(exists=True), help="Schema JSON file for the target table.") @click.option("-o", "--operation", default="TruncateAndInsert", show_default=True, help="Operation to perform on the table.") @click.argument("name", required=False) @click.pass_context -def buckets_create(ctx, target_name, target_id, file_, operation, name): +def buckets_create(ctx, target_name, target_id, file, operation, name): """ Create a new bucket with the specified name. @@ -85,16 +81,17 @@ def buckets_create(ctx, target_name, target_id, file_, operation, name): """ p = ctx.obj["p"] - if target_name is None and target_id is None and file_ is None: + if target_name is None and target_id is None and file is None: logger.error("A table must be associated with this bucket (-n, -i, or -f must be specified).") sys.exit(1) bucket = p.buckets_create(name=name, target_id=target_id, target_name=target_name, - schema=file_, operation=operation) + schema=file, operation=operation) if bucket is not None: logger.info(json.dumps(bucket, indent=2)) else: + logger.error('Error creating bucket.') sys.exit(1) @@ -140,78 +137,78 @@ def buckets_files(ctx, target_name, target_id, file, operation, bucket, complete @click.command("complete") -@click.option("-n", "name", - help="Bucket name to complete.") -@click.argument("id", required=False) +@click.option('-n', '--isName', is_flag=True, default=False, + help='Flag to treat the bucket argument as a name.') +@click.argument("bucket", required=True) @click.pass_context -def buckets_complete(ctx, name, id): +def buckets_complete(ctx, isname, bucket): """ Complete the specified bucket and perform the specified operation. - [ID] A reference to a Prism Analytics bucket. + [BUCKET] A reference to a Prism Analytics bucket. """ p = ctx.obj["p"] - if id is None and name is None: - click.echo("A bucket wid or a bucket name must be specified.") - sys.exit(1) + if isname: + buckets = p.buckets_list(bucket=bucket, verbosity="full") - if id is not None: + if buckets["total"] == 0: + bucket = None + else: + bucket = buckets["data"][0] + else: # If the caller passed both a name and WID, then use the WID first. bucket = p.buckets_list(bucket_id=id) - else: - # Lookup the bucket by name. - buckets = p.buckets_list(bucket=name, verbosity="full") - - if buckets["total"] == 0: - logger.error('Bucket not found.') - sys.exit(1) - bucket = buckets["data"][0] + if bucket is None: + logger.error(f'Bucket {bucket} not found.') + sys.exit(1) bucket_state = bucket["state"]["descriptor"] if bucket_state != "New": - click.echo(f"Bucket state is \"{bucket_state}\" - only \"New.\" buckets can be completed.") + logger.error(f"Bucket state is \"{bucket_state}\" - only \"New.\" buckets can be completed.") sys.exit(1) - click.echo(p.buckets_complete(bucket["id"])) + logger.info(p.buckets_complete(bucket["id"])) @click.command("errorFile") -@click.option("-n", "name", - help="Bucket name.") -@click.argument("id", required=False) +@click.option('-n', '--isName', is_flag=True, default=False, + help='Flag to treat the bucket argument as a name.') +@click.argument("bucket", required=True) @click.pass_context -def buckets_errorFile(ctx, name, id): +def buckets_errorFile(ctx, isname, bucket): """ Return the error file for a bucket. - [ID] A reference to a Prism Analytics bucket. + [BUCKET] A reference to a Prism Analytics bucket. """ p = ctx.obj["p"] - if id is not None: - # If the caller passed both a name and WID, then use the WID first. - error_file = p.buckets_errorFile(id=id) - else: + if isname: # Lookup the bucket by name. - buckets = p.buckets_get(name=name) + buckets = p.buckets_get(name=bucket) if buckets["total"] == 0: - logger.error('Bucket not found.') + logger.error(f'Bucket {bucket} not found.') sys.exit(1) + else: + bucket_id = buckets['data'][0]['id'] + else: + bucket_id = bucket - error_file = p.buckets_errorFile(id=buckets['data'][0]['id']) + error_file = p.buckets_errorFile(id=buckets['data'][0]['id']) logger.info(error_file) @click.command("status") -@click.option("-n", "--name", required=False, help="Bucket name to status") -@click.argument("id", required=False) +@click.option("-n", "--isName", is_flag=True, default=False, + help="Bucket name to status") +@click.argument("bucket", required=True) @click.pass_context -def buckets_status(ctx, name, id): +def buckets_status(ctx, isname, bucket): """ Get the status of a bucket by ID or name. @@ -219,24 +216,20 @@ def buckets_status(ctx, name, id): """ p = ctx.obj["p"] - if id is None and name is None: - logger.error('Please specify the ID or name of a bucket.') - sys.exit(1) - - if id is not None: - bucket = p.buckets_get(id) - - if bucket is None: - logger.error(f'Bucket {id} not found.') - sys.exit(1) - else: - buckets = p.buckets_get(id, name=name) + if isname: + buckets = p.buckets_get(id, name=bucket) if buckets["total"] == 0: - logger.error(f'Bucket name {name} not found.') + logger.error(f'Bucket name {bucket} not found.') sys.exit(1) bucket = buckets['data'][0] + else: + bucket = p.buckets_get(id) + + if bucket is None: + logger.error(f'Bucket {bucket} not found.') + sys.exit(1) logger.info(bucket["state"]["descriptor"]) From 4204100c36323c0110ef88ab54938954bb61098b Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 08:31:29 -0400 Subject: [PATCH 059/103] Buckets get use bucket_id and bucket_name. --- prism/commands/buckets_commands.py | 4 ++-- prism/prism.py | 19 +++++++++---------- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/prism/commands/buckets_commands.py b/prism/commands/buckets_commands.py index f9656d3..0b633f6 100644 --- a/prism/commands/buckets_commands.py +++ b/prism/commands/buckets_commands.py @@ -40,7 +40,7 @@ def buckets_get(ctx, bucket, table, isname, limit, offset, type_, search): if not isname and bucket is not None: # This should be a bucket ID - ignore all other options. - bucket = p.buckets_get(id=bucket, type_=type_) + bucket = p.buckets_get(bucket_id=bucket, type_=type_) logger.info(json.dumps(bucket, indent=2)) return @@ -49,7 +49,7 @@ def buckets_get(ctx, bucket, table, isname, limit, offset, type_, search): if isname and bucket is not None: # This should be a search by bucket name. - buckets = p.buckets_get(name=bucket, type_=type_, search=search) + buckets = p.buckets_get(bucket_name=bucket, type_=type_, search=search) else: # Search by table ID or name. if isname: diff --git a/prism/prism.py b/prism/prism.py index 2d0cfb3..9e2d34b 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -14,7 +14,6 @@ import os import sys import uuid -import csv import gzip import inspect import copy @@ -672,16 +671,16 @@ def tables_patch(self, id, patch): return None def buckets_get(self, - id=None, name=None, search=False, + bucket_id=None, bucket_name=None, search=False, limit=None, offset=None, type_="summary", table_id=None, table_name=None): """Get a one or more bucket definitions. Parameters ---------- - id : str + bucket_id : str The ID of an existing bucket. - name : str + bucket_name : str The name of an existing bucket. limit : int The maximum number of tables to be queried, if None all tables are returned. @@ -710,7 +709,7 @@ def buckets_get(self, # If we got an ID, then do a direct query by ID - no paging or # searching required. if id is not None: - operation = f"{operation}/{id}?format={output_type}" + operation = f"{operation}/{bucket_id}?format={output_type}" logger.debug(f"get: {operation}") url = self.prism_endpoint + operation @@ -734,10 +733,10 @@ def buckets_get(self, 'type': output_type } - if not search and name is not None: + if not search and bucket_name is not None: # List a specific bucket name overrides any other # combination of search/table/bucket name/wid. - params['name'] = urlparse.quote(name) + params['name'] = urlparse.quote(bucket_name) params['limit'] = 1 # Can ONLY be one matching bucket. params['offset'] = 0 @@ -758,16 +757,16 @@ def buckets_get(self, buckets = r.json() - if not search and name is not None: # exact bucket name + if not search and bucket_name is not None: # exact bucket name # We are not searching, and we have a specific bucket, # return whatever we got with this call even if no buckets # were found (it will be in the necessary dict structure). return buckets - if name is not None: # We are searching at this point. + if bucket_name is not None: # We are searching at this point. # Substring search for matching table names match_buckets = [bck for bck in buckets["data"] if - name in bck["name"] or name in bck["displayName"]] + bucket_name in bck["name"] or bucket_name in bck["displayName"]] elif table_id is not None: match_buckets = [ bck for bck in buckets["data"] From 8c3127b66eeb911369c99c6dca383ad9afad7dbf Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 08:33:14 -0400 Subject: [PATCH 060/103] Buckets get bug fix. --- prism/prism.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prism/prism.py b/prism/prism.py index 9e2d34b..323b0fa 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -708,7 +708,7 @@ def buckets_get(self, # If we got an ID, then do a direct query by ID - no paging or # searching required. - if id is not None: + if bucket_id is not None: operation = f"{operation}/{bucket_id}?format={output_type}" logger.debug(f"get: {operation}") url = self.prism_endpoint + operation From a4a566a0c59c703752afe5270f1670734429ec0d Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 08:47:39 -0400 Subject: [PATCH 061/103] Replace id with bucket_id. --- prism/commands/buckets_commands.py | 16 ++++++++-------- prism/prism.py | 30 +++++++++++++++--------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/prism/commands/buckets_commands.py b/prism/commands/buckets_commands.py index 0b633f6..4fab57e 100644 --- a/prism/commands/buckets_commands.py +++ b/prism/commands/buckets_commands.py @@ -34,7 +34,7 @@ def buckets_get(ctx, bucket, table, isname, limit, offset, type_, search): p = ctx.obj["p"] if isname and bucket is None and table is None: - # It's invalid to add the --isName switch without providing a bucket name. + # It's invalid to add the --isName switch without providing a bucket id or table name. logger.error('To get buckets by name, please provide a bucket name.') sys.exit(1) @@ -85,7 +85,7 @@ def buckets_create(ctx, target_name, target_id, file, operation, name): logger.error("A table must be associated with this bucket (-n, -i, or -f must be specified).") sys.exit(1) - bucket = p.buckets_create(name=name, target_id=target_id, target_name=target_name, + bucket = p.buckets_create(bucket_name=name, target_id=target_id, target_name=target_name, schema=file, operation=operation) if bucket is not None: @@ -150,7 +150,7 @@ def buckets_complete(ctx, isname, bucket): p = ctx.obj["p"] if isname: - buckets = p.buckets_list(bucket=bucket, verbosity="full") + buckets = p.buckets_get(bucket_name=bucket, verbosity="full") if buckets["total"] == 0: bucket = None @@ -158,7 +158,7 @@ def buckets_complete(ctx, isname, bucket): bucket = buckets["data"][0] else: # If the caller passed both a name and WID, then use the WID first. - bucket = p.buckets_list(bucket_id=id) + bucket = p.buckets_list(bucket_id=bucket) if bucket is None: logger.error(f'Bucket {bucket} not found.') @@ -188,7 +188,7 @@ def buckets_errorFile(ctx, isname, bucket): if isname: # Lookup the bucket by name. - buckets = p.buckets_get(name=bucket) + buckets = p.buckets_get(bucket_name=bucket) if buckets["total"] == 0: logger.error(f'Bucket {bucket} not found.') @@ -198,7 +198,7 @@ def buckets_errorFile(ctx, isname, bucket): else: bucket_id = bucket - error_file = p.buckets_errorFile(id=buckets['data'][0]['id']) + error_file = p.buckets_errorFile(bucket_id=buckets['data'][0]['id']) logger.info(error_file) @@ -217,7 +217,7 @@ def buckets_status(ctx, isname, bucket): p = ctx.obj["p"] if isname: - buckets = p.buckets_get(id, name=bucket) + buckets = p.buckets_get(bucket_name=bucket) if buckets["total"] == 0: logger.error(f'Bucket name {bucket} not found.') @@ -225,7 +225,7 @@ def buckets_status(ctx, isname, bucket): bucket = buckets['data'][0] else: - bucket = p.buckets_get(id) + bucket = p.buckets_get(bucket_id=bucket) if bucket is None: logger.error(f'Bucket {bucket} not found.') diff --git a/prism/prism.py b/prism/prism.py index 323b0fa..daebb69 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -805,7 +805,7 @@ def buckets_get(self, def buckets_create( self, - name=None, + bucket_name=None, target_name=None, target_id=None, schema=None, @@ -826,7 +826,7 @@ def buckets_create( Parameters ---------- - name : str + bucket_name : str Name of the bucket to create, default to a new generated name. target_id : str The ID of the table for this bucket. @@ -845,10 +845,10 @@ def buckets_create( # If the caller didn't give us a name to use for the bucket, # create a default name. - if name is None: + if bucket_name is None: bucket_name = buckets_gen_name() else: - bucket_name = name + bucket_name = bucket_name table_schema = None bucket_schema = None @@ -931,13 +931,13 @@ def buckets_create( return None - def buckets_complete(self, id): + def buckets_complete(self, bucket_id): """ Commit the data contained in the bucket to the associated table. Parameters ---------- - id : str + bucket_id : str The ID of an existing bucket with a "New" status. Returns @@ -945,14 +945,14 @@ def buckets_complete(self, id): dict Information about the completed bucket, or None if there was a problem. """ - operation = f'/buckets/{id}/complete' + operation = f'/buckets/{bucket_id}/complete' logger.debug(f'post: {operation}') url = self.prism_endpoint + operation r = self.http_post(url) if r.status_code == 201: - logger.debug(f'successfully completed wBucket {id}.') + logger.debug(f'successfully completed wBucket {bucket_id}.') return r.json() elif r.status_code == 400: logger.debug(f'error completing bucket') @@ -960,7 +960,7 @@ def buckets_complete(self, id): return None - def buckets_files(self, id, file=None): + def buckets_files(self, bucket_id, file=None): """Upload a file to a given bucket. Notes @@ -973,7 +973,7 @@ def buckets_files(self, id, file=None): Parameters ---------- - id : str + bucket_id : str Upload the file to the bucket identified by ID. file : str | list(str) @@ -986,7 +986,7 @@ def buckets_files(self, id, file=None): multiple files, an array of upload information with information for each file. """ - operation = f"/buckets/{id}/files" + operation = f"/buckets/{bucket_id}/files" logger.debug("post: {operation}") url = self.prism_endpoint + operation @@ -1022,12 +1022,12 @@ def buckets_files(self, id, file=None): results['total'] = len(results['data']) return results - def buckets_errorFile(self, id): + def buckets_errorFile(self, bucket_id): """Get a list of all rows that failed to load into the table Parameters ---------- - id : str + bucket_id : str A reference to a Prism Analytics bucket. Returns @@ -1035,11 +1035,11 @@ def buckets_errorFile(self, id): str """ - if id is None: + if bucket_id is None: logger.error('bucket id is required.') return None - operation = f"/buckets/{id}/errorFile" + operation = f"/buckets/{bucket_id}/errorFile" logger.debug("post: {operation}") url = self.prism_endpoint + operation From 2a3c81499e1a187655a9ce88f962fdac09d5ce94 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 08:52:26 -0400 Subject: [PATCH 062/103] Replace id/name with table_id/table_name. --- prism/commands/tables_commands.py | 4 ++-- prism/prism.py | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index 1c5b36f..0d90d09 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -37,7 +37,7 @@ def tables_get(ctx, isname, table, limit, offset, type_, compact, search): if not isname and table is not None: # When using an ID, the GET:/tables operation returns a simple # dictionary of the table definition. - table = p.tables_get(id=table, type_=type_) + table = p.tables_get(table_id=table, type_=type_) if table is None: logger.error(f"Table ID {table} not found.") @@ -51,7 +51,7 @@ def tables_get(ctx, isname, table, limit, offset, type_, compact, search): # When querying by name, the get operation returns a # dict with a count of found tables and a list of # tables. - tables = p.tables_get(name=table, limit=limit, offset=offset, type_=type_, search=search) + tables = p.tables_get(table_name=table, limit=limit, offset=offset, type_=type_, search=search) if tables['total'] == 0: logger.error(f"Table ID {table} not found.") diff --git a/prism/prism.py b/prism/prism.py index daebb69..66dd179 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -432,7 +432,7 @@ def reset_bearer_token(self): def tables_get( self, - name=None, id=None, + table_name=None, table_id=None, limit=None, offset=None, type_="summary", search=False): @@ -444,10 +444,10 @@ def tables_get( Parameters ---------- - name : str + table_name : str The name of the table to obtain details about. If the default value of None is specified. - id : str + table_id : str The ID of a table to obtain details about. When specified, all tables are searched for the matching id. limit : int @@ -475,8 +475,8 @@ def tables_get( output_type = type_.lower() # If we got a WID, then do a direct query by ID - no paging or searching required. - if id is not None: - operation = f"{operation}/{id}?format={output_type}" + if table_id is not None: + operation = f"{operation}/{table_id}?format={output_type}" logger.debug(f"get: {operation}") url = self.prism_endpoint + operation @@ -503,9 +503,9 @@ def tables_get( } # See if we want to add an explicit table name as a search parameter. - if not search and name is not None: + if not search and table_name is not None: # Here, the caller is not searching, they gave us an exact name. - params["name"] = name.replace(" ", "_") # Minor clean-up + params["name"] = table_name.replace(" ", "_") # Minor clean-up # Should only be 0 (not found) or 1 (found) tables found. params['limit'] = 1 @@ -531,17 +531,17 @@ def tables_get( # Convert the response to a list of tables. tables = r.json() - if not search and name is not None: # Explicit table name + if not search and table_name is not None: # Explicit table name # We are not searching, and we have a specific table - return # whatever we got (maybe nothing). return tables # Figure out what tables of this batch of tables should be part of the # return results, i.e., search the this batch for matches. - if name is not None: + if table_name is not None: # Substring search for matching table names, display names match_tables = [tab for tab in tables["data"] - if name.lower() in tab["name"].lower() or name.lower() in tab["displayName"].lower()] + if table_name.lower() in tab["name"].lower() or table_name.lower() in tab["displayName"].lower()] else: # Grab all the tables in the result match_tables = tables["data"] @@ -637,7 +637,7 @@ def tables_put(self, schema, truncate=False): return None - def tables_patch(self, id, patch): + def tables_patch(self, table_id, patch): """Patch the table with specified values. Notes @@ -647,7 +647,7 @@ def tables_patch(self, id, patch): Parameters ---------- - id : str + table_id : str Prism Table ID of an existing table. patch : dict @@ -659,7 +659,7 @@ def tables_patch(self, id, patch): If the request is successful, a dictionary containing information about the new table is returned, otherwise None. """ - operation = f'/tables/{id}' + operation = f'/tables/{table_id}' logger.debug(f'PATCH: {operation}') url = self.prism_endpoint + operation From b9d870f2a2d24288254de1cc3bad4ea244c42488 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 08:55:03 -0400 Subject: [PATCH 063/103] Replace id/name with table_id/table_name. --- prism/commands/tables_commands.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index 0d90d09..26f9719 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -243,7 +243,7 @@ def set_patch_value(attr, value): resolved_id = table else: # Before doing anything, table name must exist. - tables = p.tables_get(name=table, limit=1, search=False) # Exact match + tables = p.tables_get(table_name=table) # Exact match if tables['total'] == 0: logger.error(f'Table name "{table}" not found.') @@ -251,7 +251,7 @@ def set_patch_value(attr, value): resolved_id = tables['data'][0]['id'] - table = p.tables_patch(id=resolved_id, patch=patch_data) + table = p.tables_patch(table_id=resolved_id, patch=patch_data) if table is None: logger.error(f'Error updating table ID {resolved_id}') From 00428b31c68133b893fecc704e8de8978fcb8806 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 10:56:35 -0400 Subject: [PATCH 064/103] Fixup on table commands and added truncate_table to prism. --- prism/__init__.py | 5 ++- prism/commands/tables_commands.py | 72 +++++++++++++------------------ prism/prism.py | 26 ++++++++++- 3 files changed, 58 insertions(+), 45 deletions(-) diff --git a/prism/__init__.py b/prism/__init__.py index c6c181c..f6ee6d5 100644 --- a/prism/__init__.py +++ b/prism/__init__.py @@ -1,9 +1,10 @@ from prism.prism import Prism, set_logging, \ - schema_compact, table_upload_file, resolve_schema + schema_compact, upload_file, load_schema, truncate_table from ._version import get_versions __version__ = get_versions()["version"] del get_versions -__all__ = ["Prism", "set_logging", "schema_compact", "table_upload_file", "resolve_schema"] +__all__ = ["Prism", "set_logging", "schema_compact", "upload_file", + "load_schema", "truncate_table"] diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index 26f9719..f22b77f 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -65,12 +65,10 @@ def tables_get(ctx, isname, table, limit, offset, type_, compact, search): @click.command('create') -@click.option('-n', '--name', +@click.option('-n', '--table_name', help='Table name - overrides name from schema.') @click.option('-d', '--displayName', help='Specify a display name - defaults to name.') -@click.option('-t', '--tags', multiple=True, - help='Tags to organize the table in the Data Catalog.') @click.option('-e', '--enableForAnalysis', type=bool, is_flag=True, default=None, help='Enable this table for analytics.') @click.option('-s', '--sourceName', @@ -79,7 +77,7 @@ def tables_get(ctx, isname, table, limit, offset, type_, compact, search): help='The WID of an existing table to copy.') @click.argument('file', required=False, type=click.Path(exists=True)) @click.pass_context -def tables_create(ctx, name, displayname, tags, enableforanalysis, sourcename, sourcewid, file): +def tables_create(ctx, table_name, displayname, enableforanalysis, sourcename, sourcewid, file): """ Create a new table with the specified name. @@ -90,12 +88,12 @@ def tables_create(ctx, name, displayname, tags, enableforanalysis, sourcename, s p = ctx.obj['p'] # We can assume a schema was found/built - get_schema sys.exits if there is a problem. - schema = resolve_schema(p, file, sourcename, sourcewid) + schema = load_schema(p, file, sourcename, sourcewid) # Initialize a new schema with the particulars for this table operation. - if name is not None: + if table_name is not None: # If we got a name, set it in the table schema - schema['name'] = name.replace(' ', '_') # Minor clean-up + schema['name'] = table_name.replace(' ', '_') # Minor clean-up logger.debug(f'setting table name to {schema["name"]}') elif 'name' not in schema: # The schema doesn't have a name and none was given - exit. @@ -108,7 +106,7 @@ def tables_create(ctx, name, displayname, tags, enableforanalysis, sourcename, s schema['displayName'] = displayname elif 'displayName' not in schema: # Default the display name to the name if not in the schema. - schema['displayName'] = name + schema['displayName'] = table_name logger.debug(f'defaulting displayName to {schema["displayName"]}') if enableforanalysis is not None: @@ -124,7 +122,7 @@ def tables_create(ctx, name, displayname, tags, enableforanalysis, sourcename, s if table_def is not None: logger.info(json.dumps(table_def, indent=2)) else: - logger.error(f'Error creating table {name}.') + logger.error(f'Error creating table {schema["name"]}.') sys.exit(1) @@ -143,7 +141,7 @@ def tables_edit(ctx, file, truncate): # The user can specify a GET:/tables output file containing # the ID and other attributes that could be passed on the # command line. - schema = resolve_schema(file=file) + schema = load_schema(file=file) table = p.tables_put(schema, truncate=truncate) @@ -174,7 +172,7 @@ def tables_patch(ctx, isname, table, file, If an attribute is not provided in the request, it will not be changed. To set an attribute to blank (empty), include the attribute without specifying a value. - TABLE The ID or API name (use -n option) of the table to patch + [TABLE] The ID or API name (use -n option) of the table to patch [FILE] Optional file containing patch values for the table. """ @@ -183,9 +181,8 @@ def tables_patch(ctx, isname, table, file, # Figure out the new schema either by file or other table. patch_data = {} - # The user can specify a GET:/tables output file containing - # the ID and other attributes that could be passed on the - # command line. + # If a file is specified, there can only be patch values and + # cannot be a full Prism schema. if file is not None: try: with open(file, "r") as patch_file: @@ -206,7 +203,11 @@ def tables_patch(ctx, isname, table, file, sys.exit(1) def set_patch_value(attr, value): - """Utility function to set or clear a table attribute.""" + """Utility function to set or clear a table attribute. + + If the user specifies an attribute but does not provide a value, + add a patch value to clears/null the value + """ if value == '*-clear-*': patch_data[attr] = '' else: @@ -216,7 +217,9 @@ def set_patch_value(attr, value): # values from the patch file. # Note: specifying the option without a value creates a - # patch value to clear the value in the table def. + # patch value to clear the value in the table def. The + # caller can override the values from the patch file using + # command line arguments. if displayname is not None: # Specified on CLI set_patch_value('displayName', displayname) @@ -234,14 +237,11 @@ def set_patch_value(attr, value): # The caller must be asking for something to change! if len(patch_data) == 0: - logger.error("Specify at least one schema value to update.") + logger.error("Specify at least one table schema value to update.") sys.exit(1) # Identify the existing table we are about to patch. if not isname: - # No verification, simply assume the ID is valid. - resolved_id = table - else: # Before doing anything, table name must exist. tables = p.tables_get(table_name=table) # Exact match @@ -250,6 +250,9 @@ def set_patch_value(attr, value): sys.exit(1) resolved_id = tables['data'][0]['id'] + else: + # No verification needed, simply assume the ID is valid. + resolved_id = table table = p.tables_patch(table_id=resolved_id, patch=patch_data) @@ -284,9 +287,9 @@ def tables_upload(ctx, table, isname, operation, file): sys.exit(1) if isname: - results = table_upload_file(p, table_name=table, operation=operation) + results = upload_file(p, table_name=table, operation=operation) else: - results = table_upload_file(p, table_id=table, operation=operation) + results = upload_file(p, table_id=table, operation=operation) logger.debug(json.dumps(results, indent=2)) @@ -303,26 +306,13 @@ def tables_truncate(ctx, table, isname): [TABLE] The Prism Table ID or API name of the table to truncate. """ p = ctx.obj['p'] - msg = f'Unable to truncate table "{table}" - see log for details.' - # To do a truncate, we still need a bucket with a truncate operation. if isname: - bucket = p.buckets_create(target_name=table, operation='TruncateAndInsert') + result = truncate_table(p, table_name=table) else: - bucket = p.buckets_create(target_id=table, operation='TruncateAndInsert') + result = truncate_table(p, table_id=table) - if bucket is None: - logger.error(msg) - sys.exit(1) - - bucket_id = bucket['id'] - - # Don't specify a file to put a zero sized file into the bucket. - p.buckets_files(bucket_id) - - # Ask Prism to run the delete statement by completing the bucket. - bucket = p.buckets_complete(bucket_id) - - if bucket is None: - logger.error(msg) - sys.exit(1) + if result is None: + logger.warning("Table was not truncated.") + else: + logger.info(json.dumps(result, indent=2)) diff --git a/prism/prism.py b/prism/prism.py index 66dd179..45ea7cc 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -1447,7 +1447,7 @@ def resolve_file_list(files): return target_files -def table_upload_file(p, file, table_id=None, table_name=None, operation="TruncateAndInsert"): +def upload_file(p, file, table_id=None, table_name=None, operation="TruncateAndInsert"): """Create a new Prism table. Parameters @@ -1494,7 +1494,29 @@ def table_upload_file(p, file, table_id=None, table_name=None, operation="Trunca return file_results -def resolve_schema(p=None, file=None, source_name=None, source_id=None): +def truncate_table(p, table_id=None, table_name=None): + # To do a truncate, we still need a bucket with a truncate operation. + if table_id is not None: + bucket = p.buckets_create(target_id=table_id, operation='TruncateAndInsert') + else: + bucket = p.buckets_create(target_name=table_name, operation='TruncateAndInsert') + + if bucket is None: + logger.error(f'Unable to truncate table - see log for details.') + return None + + bucket_id = bucket['id'] + + # Don't specify a file to put a zero sized file into the bucket. + p.buckets_files(bucket_id) + + # Ask Prism to run the delete statement by completing the bucket. + bucket = p.buckets_complete(bucket_id) + + return bucket + + +def load_schema(p=None, file=None, source_name=None, source_id=None): """Get or extract a schema from a file or existing Prism table.""" # Start with a blank schema definition. From b1976d5bba199edd486c7e6932f59d2ae1c4836f Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 10:59:37 -0400 Subject: [PATCH 065/103] Bug fix table_name in tables_get by name --- prism/prism.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prism/prism.py b/prism/prism.py index 45ea7cc..c7ab152 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -880,13 +880,13 @@ def buckets_create( return None else: if target_id is not None: # Always use ID if provided - has precedence. - table = self.tables_get(id=target_id, type_="full") # Full=include fields object + table = self.tables_get(table_id=target_id, type_="full") # Full=include fields object if table is None: logger.error(f'table ID {target_id} not found.') return None else: - tables = self.tables_get(name=target_name, type_="full") + tables = self.tables_get(table_name=target_name, type_="full") if tables["total"] == 0: logger.error(f"table not found for bucket operation.") From b0b67b2586bad4a6190fcb458b2e87ce27b5f6ee Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 11:04:38 -0400 Subject: [PATCH 066/103] Remove username and password paramenters. --- prism/cli.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/prism/cli.py b/prism/cli.py index 6bc5f83..6bbdc0a 100644 --- a/prism/cli.py +++ b/prism/cli.py @@ -86,8 +86,6 @@ def cli(ctx, base_url = param_fixup(base_url, config, config_name, "workday_base_url") tenant_name = param_fixup(tenant_name, config, config_name, "workday_tenant_name") - username = param_fixup(username, config, config_name, "workday_username") - password = param_fixup(password, config, config_name, "workday_password") client_id = param_fixup(client_id, config, config_name, "prism_client_id") client_secret = param_fixup(client_secret, config, config_name, "prism_client_secret") refresh_token = param_fixup(refresh_token, config, config_name, "prism_refresh_token") From 9d61ee829ae01b60e056df4178457e23b5a95439 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 11:09:31 -0400 Subject: [PATCH 067/103] Fixed bucket post messages. --- prism/prism.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prism/prism.py b/prism/prism.py index c7ab152..e3be0e4 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -987,7 +987,7 @@ def buckets_files(self, bucket_id, file=None): each file. """ operation = f"/buckets/{bucket_id}/files" - logger.debug("post: {operation}") + logger.debug(f"post: {operation}") url = self.prism_endpoint + operation results = {'total': 0, 'data': []} # Always return a valid list - regardless of files @@ -1040,7 +1040,7 @@ def buckets_errorFile(self, bucket_id): return None operation = f"/buckets/{bucket_id}/errorFile" - logger.debug("post: {operation}") + logger.debug(f"post: {operation}") url = self.prism_endpoint + operation response = self.http_get(url) From 80134a17e96fa8e9b0b8d6fbff12c321662cea3a Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 11:17:01 -0400 Subject: [PATCH 068/103] Fixed bug in tables_upload to pass file list. --- prism/commands/tables_commands.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index f22b77f..bfb6b59 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -287,9 +287,9 @@ def tables_upload(ctx, table, isname, operation, file): sys.exit(1) if isname: - results = upload_file(p, table_name=table, operation=operation) + results = upload_file(p, table_name=table, file=file, operation=operation) else: - results = upload_file(p, table_id=table, operation=operation) + results = upload_file(p, table_id=table, file=file, operation=operation) logger.debug(json.dumps(results, indent=2)) From 047cad8569767ff1a10eb3d6b051eff2566494f8 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 12:47:22 -0400 Subject: [PATCH 069/103] Fixed bug in tables_upload to pass file list. --- prism/commands/buckets_commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prism/commands/buckets_commands.py b/prism/commands/buckets_commands.py index 4fab57e..5ae37c6 100644 --- a/prism/commands/buckets_commands.py +++ b/prism/commands/buckets_commands.py @@ -198,7 +198,7 @@ def buckets_errorFile(ctx, isname, bucket): else: bucket_id = bucket - error_file = p.buckets_errorFile(bucket_id=buckets['data'][0]['id']) + error_file = p.buckets_errorFile(bucket_id=bucket_id) logger.info(error_file) From 70ebd03654cd96104a23efeaebd3d44c7a087919 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 13:14:30 -0400 Subject: [PATCH 070/103] Updated datachange and filecontainers methods to use datachange_id and filecontainer_id. --- prism/commands/buckets_commands.py | 20 ++++++------ prism/commands/dataChanges_commands.py | 18 +++++------ prism/prism.py | 44 +++++++++++++------------- 3 files changed, 41 insertions(+), 41 deletions(-) diff --git a/prism/commands/buckets_commands.py b/prism/commands/buckets_commands.py index 5ae37c6..0c7311a 100644 --- a/prism/commands/buckets_commands.py +++ b/prism/commands/buckets_commands.py @@ -30,11 +30,11 @@ def buckets_get(ctx, bucket, table, isname, limit, offset, type_, search): NOTE: For table name searching, the Display Name is searched not the API Name. """ - p = ctx.obj["p"] if isname and bucket is None and table is None: - # It's invalid to add the --isName switch without providing a bucket id or table name. + # It's invalid to add the --isName switch without providing + # a bucket id or table name. logger.error('To get buckets by name, please provide a bucket name.') sys.exit(1) @@ -71,21 +71,22 @@ def buckets_get(ctx, bucket, table, isname, limit, offset, type_, search): help="Schema JSON file for the target table.") @click.option("-o", "--operation", default="TruncateAndInsert", show_default=True, help="Operation to perform on the table.") -@click.argument("name", required=False) +@click.argument("bucket", required=False) @click.pass_context -def buckets_create(ctx, target_name, target_id, file, operation, name): +def buckets_create(ctx, target_name, target_id, file, operation, bucket): """ Create a new bucket with the specified name. - [NAME] explicit bucket name to create otherwise default. + [BUCKET] explicit bucket name to create otherwise default. """ p = ctx.obj["p"] if target_name is None and target_id is None and file is None: - logger.error("A table must be associated with this bucket (-n, -i, or -f must be specified).") + logger.error("A table must be associated with this bucket.") sys.exit(1) - bucket = p.buckets_create(bucket_name=name, target_id=target_id, target_name=target_name, + bucket = p.buckets_create(bucket_name=bucket, + target_id=target_id, target_name=target_name, schema=file, operation=operation) if bucket is not None: @@ -157,7 +158,6 @@ def buckets_complete(ctx, isname, bucket): else: bucket = buckets["data"][0] else: - # If the caller passed both a name and WID, then use the WID first. bucket = p.buckets_list(bucket_id=bucket) if bucket is None: @@ -166,8 +166,8 @@ def buckets_complete(ctx, isname, bucket): bucket_state = bucket["state"]["descriptor"] - if bucket_state != "New": - logger.error(f"Bucket state is \"{bucket_state}\" - only \"New.\" buckets can be completed.") + if bucket_state != 'New': + logger.error(f'Bucket state is "{bucket_state}" - only "New" buckets can be completed.') sys.exit(1) logger.info(p.buckets_complete(bucket["id"])) diff --git a/prism/commands/dataChanges_commands.py b/prism/commands/dataChanges_commands.py index b09ac10..73075af 100644 --- a/prism/commands/dataChanges_commands.py +++ b/prism/commands/dataChanges_commands.py @@ -20,7 +20,7 @@ help="Use contains search substring for --name or --id (default=false).") @click.argument("dct", required=False) @click.pass_context -def dataChanges_get(ctx, isname, dct, limit, offset, type_, format_, search): +def dataChanges_get(ctx, isname, dct, limit, offset, type_, search): """View the data change tasks permitted by the security profile of the current user. [dct] A reference to a Prism Analytics Data Change Task. @@ -30,7 +30,7 @@ def dataChanges_get(ctx, isname, dct, limit, offset, type_, format_, search): # Separate the get calls because an ID lookup returns a dict and a name lookup # always returns an object/list structure with zero or more matching DCTs. if isname: - data_change_task = p.dataChanges_get(name=dct, limit=limit, offset=offset, search=search, type=type_) + data_change_task = p.dataChanges_get(datachange_name=dct, limit=limit, offset=offset, search=search, type=type_) if data_change_task["total"] == 0: logger.warning("No data change task(s) found.") @@ -41,7 +41,7 @@ def dataChanges_get(ctx, isname, dct, limit, offset, type_, format_, search): data_change_task["data"], key=lambda dct_srt: dct_srt["displayName"].lower()) else: - data_change_task = p.dataChanges_get(id=dct, limit=limit, offset=offset, type_=type_) + data_change_task = p.dataChanges_get(datachange_id=dct, limit=limit, offset=offset, type_=type_) if data_change_task is None: logger.error(f'Data change task {dct} not found.') @@ -192,7 +192,7 @@ def dataChanges_upload(ctx, isname, dct, file, wait, verbose): p = ctx.obj["p"] if isname: - data_change_tasks = p.dataChanges_get(name=dct) + data_change_tasks = p.dataChanges_get(datachange_name=dct) if data_change_tasks['total'] == 0: logger.error('Data change task not found.') @@ -204,17 +204,17 @@ def dataChanges_upload(ctx, isname, dct, file, wait, verbose): dct_id = dct # Specifying None for the ID to create a new file container. - file_container = p.fileContainers_load(id=None, file=file) + file_container = p.fileContainers_load(filecontainer_id=None, file=file) if file_container['total'] == 0: logger.error('Error loading file container.') sys.exit(1) - fid = file_container['id'] - logger.debug(f'new file container ID: {fid}') + filecontainer_id = file_container['id'] + logger.debug(f'new file container ID: {filecontainer_id}') # Execute the DCT. - activity = p.dataChanges_activities_post(id=dct_id, fileContainerID=fid) + activity = p.dataChanges_activities_post(datachange_id=dct_id, fileContainer_id=filecontainer_id) if 'errors' in activity: # Add the ID of the DCT for easy identification. @@ -232,7 +232,7 @@ def dataChanges_upload(ctx, isname, dct, file, wait, verbose): while True: time.sleep(10) - activity = p.dataChanges_activities_get(id=dct_id, activityID=activity_id) + activity = p.dataChanges_activities_get(datachange_id=dct_id, activityID=activity_id) status = activity['state']['descriptor'] diff --git a/prism/prism.py b/prism/prism.py index e3be0e4..9da1758 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -1150,7 +1150,7 @@ def dataChanges_get(self, return data_changes - def dataChanges_activities_get(self, id, activityID): + def dataChanges_activities_get(self, datachange_id, activity_id): """Returns details of the activity specified by activityID. Parameters @@ -1158,7 +1158,7 @@ def dataChanges_activities_get(self, id, activityID): id : str A reference to a Prism Analytics data change. """ - operation = f"/dataChanges/{id}/activities/{activityID}" + operation = f"/dataChanges/{datachange_id}/activities/{activity_id}" logger.debug(f"dataChanges_activities_get: {operation}") url = self.prism_endpoint + operation @@ -1169,31 +1169,31 @@ def dataChanges_activities_get(self, id, activityID): return None - def dataChanges_activities_post(self, id, fileContainerID=None): + def dataChanges_activities_post(self, datachange_id, filecontainer_id=None): """Execute a data change task. Parameters ---------- - id : str + datachange_id : str A reference to a Prism Analytics data change. - fileContainerID : str + filecontainer_id : str A reference to a Prism Analytics File Container. Returns ------- """ - operation = f"/dataChanges/{id}/activities" + operation = f"/dataChanges/{datachange_id}/activities" logger.debug(f"post: {operation}") url = self.prism_endpoint + operation - if fileContainerID is None: + if filecontainer_id is None: logger.debug("no file container ID") data = None else: - logger.debug('with file container ID: {fileContainerID}') + logger.debug('with file container ID: {filecontainer_id}') # NOTE: the name is NOT correct based on the API definition - data = json.dumps({"fileContainerWid": fileContainerID}) + data = json.dumps({"fileContainerWid": filecontainer_id}) r = self.http_post(url, headers=self.CONTENT_APP_JSON, data=data) @@ -1209,12 +1209,12 @@ def dataChanges_activities_post(self, id, fileContainerID=None): return None - def dataChanges_is_valid(self, id): + def dataChanges_is_valid(self, datachange_id): """Utility method to return the validation status of a data change task. Parameters ---------- - id : str + datachange_id : str A reference to a Prism Analytics data change. Returns @@ -1226,29 +1226,29 @@ def dataChanges_is_valid(self, id): dct = self.dataChanges_validate(id) if dct is None: - logger.error(f"data_change_id {id} not found!") + logger.error(f"data_change_id {datachange_id} not found!") return False if "error" in dct: - logger.error(f"data_change_id {id} is not valid!") + logger.error(f"data_change_id {datachange_id} is not valid!") return False # There is no specific status value to check, we simply get # a small JSON object with the ID of the DCT if it is valid. return True - def dataChanges_validate(self, id): + def dataChanges_validate(self, datachange_id): """validates the data change specified by dataChangeID Parameters ---------- - id : str + datachange_id : str The data change task ID to validate. Returns ------- """ - operation = f"/dataChanges/{id}/validate" + operation = f"/dataChanges/{datachange_id}/validate" logger.debug(f"dataChanges_validate: get {operation}") url = self.prism_endpoint + operation @@ -1288,14 +1288,14 @@ def fileContainers_create(self): if r.status_code == 201: return_json = r.json() - file_container_id = return_json["id"] - logger.debug(f"successfully created file container: {file_container_id}") + filecontainer_id = return_json["id"] + logger.debug(f"successfully created file container: {filecontainer_id}") return return_json return None - def fileContainers_get(self, id): + def fileContainers_get(self, filecontainer_id): """Return all files for a file container. Parameters @@ -1309,7 +1309,7 @@ def fileContainers_get(self, id): of files uploaded and a data attribute with an array of file metadata for each file in the container. """ - operation = f"/fileContainers/{id}/files" + operation = f"/fileContainers/{filecontainer_id}/files" logger.debug(f"fileContainers_list: get {operation}") url = self.prism_endpoint + operation @@ -1325,13 +1325,13 @@ def fileContainers_get(self, id): return {"total": 0, 'data': []} # Always return a list. - def fileContainers_load(self, id, file): + def fileContainers_load(self, filecontainer_id, file): """ Load one or more files to a fileContainer. Parameters ---------- - id : str + filecontainer_id : str File container ID of target container. file : str|list File name(s) to load into the container From 9004bdb1396147517d4a4c585778f4682ec24297 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 13:22:33 -0400 Subject: [PATCH 071/103] Bug fix dataChanges_get. --- prism/prism.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/prism/prism.py b/prism/prism.py index 9da1758..4377625 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -1051,7 +1051,7 @@ def buckets_errorFile(self, bucket_id): return None def dataChanges_get(self, - name=None, id=None, + datachange_name=None, datachange_id=None, limit=None, offset=None, type_='summary', search=False, refresh=False): @@ -1066,8 +1066,8 @@ def dataChanges_get(self, # Searching by ID is a special case that eliminates all other types # of search. Ask for the datachange by id and return just this # result - even blank. - if id is not None and isinstance(id, str) and len(id) > 0: - operation = f"{operation}/{id}?type={output_type}" + if datachange_id is not None and isinstance(datachange_id, str) and len(datachange_id) > 0: + operation = f"{operation}/{datachange_id}?type={output_type}" logger.debug(f'dataChanges_get: {operation}') url = self.prism_endpoint + operation @@ -1095,17 +1095,17 @@ def dataChanges_get(self, searching = False name_param = "" - if name is not None and isinstance(name, str) and len(name) > 0: + if datachange_name is not None and isinstance(datachange_name, str) and len(datachange_name) > 0: if search is not None and isinstance(search, bool) and search: # Force a return of ALL data change tasks, so we can search the names. - name_param = "" + name_param = "" # Added to the query params searching = True search_limit = 500 search_offset = 0 else: # With an explicit name, we should return at most 1 result. - name_param = "&name=" + urlparse.quote(name) + name_param = "&name=" + urlparse.quote(datachange_name) searching = False search_limit = 1 @@ -1131,8 +1131,8 @@ def dataChanges_get(self, if searching: # Only add matching rows data_changes["data"] += \ - filter(lambda dtc: dtc["name"].find(name) != -1 or - dtc["displayName"].find(name) != -1, + filter(lambda dtc: dtc["name"].find(datachange_name) != -1 or + dtc["displayName"].find(datachange_name) != -1, return_json["data"]) else: # Without searching, simply paste the current page to the list. @@ -1144,7 +1144,7 @@ def dataChanges_get(self, break # Go to the next page. - offset += search_limit + search_offset += search_limit data_changes["total"] = len(data_changes["data"]) @@ -1155,8 +1155,10 @@ def dataChanges_activities_get(self, datachange_id, activity_id): Parameters ---------- - id : str + datachange_id : str A reference to a Prism Analytics data change. + activity_id : str + A reference to a Prism Analytics activity. """ operation = f"/dataChanges/{datachange_id}/activities/{activity_id}" logger.debug(f"dataChanges_activities_get: {operation}") From 723bf2c8a975da4aed8cdf0d5e991e5fd7270172 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 13:24:04 -0400 Subject: [PATCH 072/103] Bug fix dataChanges get. --- prism/commands/dataChanges_commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prism/commands/dataChanges_commands.py b/prism/commands/dataChanges_commands.py index 73075af..4f43459 100644 --- a/prism/commands/dataChanges_commands.py +++ b/prism/commands/dataChanges_commands.py @@ -30,7 +30,7 @@ def dataChanges_get(ctx, isname, dct, limit, offset, type_, search): # Separate the get calls because an ID lookup returns a dict and a name lookup # always returns an object/list structure with zero or more matching DCTs. if isname: - data_change_task = p.dataChanges_get(datachange_name=dct, limit=limit, offset=offset, search=search, type=type_) + data_change_task = p.dataChanges_get(datachange_name=dct, limit=limit, offset=offset, search=search, type_=type_) if data_change_task["total"] == 0: logger.warning("No data change task(s) found.") From 737c4d621904603d51f876c0c3496b8b75ad755b Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 15:18:01 -0400 Subject: [PATCH 073/103] Bug fix dataChanges get case-insensitive search. --- prism/commands/dataChanges_commands.py | 3 ++- prism/prism.py | 20 ++++++++------------ 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/prism/commands/dataChanges_commands.py b/prism/commands/dataChanges_commands.py index 4f43459..20929a7 100644 --- a/prism/commands/dataChanges_commands.py +++ b/prism/commands/dataChanges_commands.py @@ -30,7 +30,8 @@ def dataChanges_get(ctx, isname, dct, limit, offset, type_, search): # Separate the get calls because an ID lookup returns a dict and a name lookup # always returns an object/list structure with zero or more matching DCTs. if isname: - data_change_task = p.dataChanges_get(datachange_name=dct, limit=limit, offset=offset, search=search, type_=type_) + data_change_task = p.dataChanges_get(datachange_name=dct, limit=limit, + offset=offset, search=search, type_=type_) if data_change_task["total"] == 0: logger.warning("No data change task(s) found.") diff --git a/prism/prism.py b/prism/prism.py index 4377625..2b32dce 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -541,7 +541,8 @@ def tables_get( if table_name is not None: # Substring search for matching table names, display names match_tables = [tab for tab in tables["data"] - if table_name.lower() in tab["name"].lower() or table_name.lower() in tab["displayName"].lower()] + if table_name.lower() in tab["name"].lower() or + table_name.lower() in tab["displayName"].lower()] else: # Grab all the tables in the result match_tables = tables["data"] @@ -595,7 +596,7 @@ def tables_post(self, schema): return None - def tables_put(self, schema, truncate=False): + def tables_put(self, schema): """Update an existing table using a full schema definition. Notes @@ -608,10 +609,6 @@ def tables_put(self, schema, truncate=False): schema : dict A dictionary containing the schema - truncate : bool - True to automatically truncate the table before - applying the new schema. - Returns ------- dict @@ -851,7 +848,6 @@ def buckets_create( bucket_name = bucket_name table_schema = None - bucket_schema = None if schema is not None: if isinstance(schema, dict): @@ -1129,10 +1125,10 @@ def dataChanges_get(self, return_json = response.json() if searching: - # Only add matching rows + # Only add matching rows - check name and displayName data_changes["data"] += \ - filter(lambda dtc: dtc["name"].find(datachange_name) != -1 or - dtc["displayName"].find(datachange_name) != -1, + filter(lambda dtc: dtc["name"].lower().find(datachange_name.lower()) != -1 or + dtc["displayName"].lower().find(datachange_name.lower()) != -1, return_json["data"]) else: # Without searching, simply paste the current page to the list. @@ -1302,7 +1298,7 @@ def fileContainers_get(self, filecontainer_id): Parameters ---------- - id : str + filecontainer_id : str File container ID to list. Returns @@ -1345,7 +1341,7 @@ def fileContainers_load(self, filecontainer_id, file): """ # Create the specified fID - a new ID is created if None. - resolved_fid = id + resolved_fid = filecontainer_id # No testing here, just use it. target_files = resolve_file_list(file) From 86a3bb4eabbff4881cb448fa9e91f2a7753c4215 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 15:29:51 -0400 Subject: [PATCH 074/103] Bug fix dataChanges validate. --- prism/commands/dataChanges_commands.py | 2 +- prism/prism.py | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/prism/commands/dataChanges_commands.py b/prism/commands/dataChanges_commands.py index 20929a7..7181df1 100644 --- a/prism/commands/dataChanges_commands.py +++ b/prism/commands/dataChanges_commands.py @@ -68,7 +68,7 @@ def dataChanges_validate(ctx, isname, dct, search): p = ctx.obj["p"] if not isname: - validate = p.dataChanges_validate(id) + validate = p.dataChanges_validate(dct) logger.info(json.dumps(validate, indent=2)) else: data_change_tasks = p.dataChanges_get(name=dct, search=search) diff --git a/prism/prism.py b/prism/prism.py index 2b32dce..3694c16 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -1074,7 +1074,7 @@ def dataChanges_get(self, return None - logger.debug(f"dataChanges_get: {operation}") + logger.debug(f'dataChanges_get: {operation}') url = self.prism_endpoint + operation # Get a list of tasks by page, with or without searching. @@ -1101,7 +1101,7 @@ def dataChanges_get(self, search_offset = 0 else: # With an explicit name, we should return at most 1 result. - name_param = "&name=" + urlparse.quote(datachange_name) + name_param = f'&name={urlparse.quote(datachange_name)}' searching = False search_limit = 1 @@ -1114,8 +1114,7 @@ def dataChanges_get(self, data_changes = {"total": 0, "data": []} while True: - search_url = f"{url}?type={output_type}&limit={search_limit}&offset={search_offset}{name_param}" - logger.debug(f"dataChangesID url: {search_url}") + search_url = f'{url}?type={output_type}&limit={search_limit}&offset={search_offset}{name_param}' response = self.http_get(url=search_url) From 69ceea41c774cbb3f52d14daf76e2f63c691c8dd Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 24 Oct 2023 15:31:28 -0400 Subject: [PATCH 075/103] Bug fix dataChanges validate. --- prism/commands/dataChanges_commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prism/commands/dataChanges_commands.py b/prism/commands/dataChanges_commands.py index 7181df1..beceae8 100644 --- a/prism/commands/dataChanges_commands.py +++ b/prism/commands/dataChanges_commands.py @@ -71,7 +71,7 @@ def dataChanges_validate(ctx, isname, dct, search): validate = p.dataChanges_validate(dct) logger.info(json.dumps(validate, indent=2)) else: - data_change_tasks = p.dataChanges_get(name=dct, search=search) + data_change_tasks = p.dataChanges_get(datachange_name=dct, search=search) if data_change_tasks["total"] == 0: logger.error("No matching data change task(s) found.") From af5c7a217d62ad40a118eba0ff60857a4b1dd182 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Wed, 25 Oct 2023 08:02:34 -0400 Subject: [PATCH 076/103] Reformat to PEP8 using Black. --- prism/__init__.py | 20 +- prism/cli.py | 141 +++++--- prism/commands/buckets_commands.py | 189 +++++++--- prism/commands/dataChanges_commands.py | 178 +++++++--- prism/commands/dataExport_commands.py | 53 ++- prism/commands/fileContainers_commands.py | 14 +- prism/commands/tables_commands.py | 274 +++++++++------ prism/prism.py | 398 +++++++++++++--------- 8 files changed, 842 insertions(+), 425 deletions(-) diff --git a/prism/__init__.py b/prism/__init__.py index f6ee6d5..a0770d5 100644 --- a/prism/__init__.py +++ b/prism/__init__.py @@ -1,10 +1,22 @@ -from prism.prism import Prism, set_logging, \ - schema_compact, upload_file, load_schema, truncate_table +from prism.prism import ( + Prism, + set_logging, + schema_compact, + upload_file, + load_schema, + truncate_table, +) from ._version import get_versions __version__ = get_versions()["version"] del get_versions -__all__ = ["Prism", "set_logging", "schema_compact", "upload_file", - "load_schema", "truncate_table"] +__all__ = [ + "Prism", + "set_logging", + "schema_compact", + "upload_file", + "load_schema", + "truncate_table", +] diff --git a/prism/cli.py b/prism/cli.py index 6bbdc0a..091ae75 100644 --- a/prism/cli.py +++ b/prism/cli.py @@ -27,36 +27,86 @@ def param_fixup(value, config, config_name, option): @click.group(help="CLI for interacting with Workday’s Prism API") # Tenant specific parameters -@click.option("--base_url", envvar="workday_base_url", type=str, required=False, - help="The base URL for the API client") -@click.option("--tenant_name", envvar="workday_tenant_name", type=str, required=False, - help="The name of your Workday tenant") -# Credentials parameters -@click.option("--username", envvar="workday_username", type=str, required=False, - help="The login username of your Workday user") -@click.option("--password", envvar="workday_password", type=str, required=False, - help="The password of your Workday user") -@click.option("--client_id", envvar="prism_client_id", type=str, required=False, - help="The client ID for your registered API client") -@click.option("--client_secret", envvar="prism_client_secret", type=str, required=False, - help="The client secret for your registered API client") -@click.option("--refresh_token", envvar="prism_refresh_token", type=str, required=False, - help="The refresh token for your registered API client") +@click.option( + "--base_url", + envvar="workday_base_url", + type=str, + required=False, + help="The base URL for the API client", +) +@click.option( + "--tenant_name", + envvar="workday_tenant_name", + type=str, + required=False, + help="The name of your Workday tenant", +) +@click.option( + "--client_id", + envvar="prism_client_id", + type=str, + required=False, + help="The client ID for your registered API client", +) +@click.option( + "--client_secret", + envvar="prism_client_secret", + type=str, + required=False, + help="The client secret for your registered API client", +) +@click.option( + "--refresh_token", + envvar="prism_refresh_token", + type=str, + required=False, + help="The refresh token for your registered API client", +) # Operational parameters -@click.option("--log_level", envvar="prism_log_level", type=str, required=False, - help="Level of debugging to display - default = INFO") -@click.option("--log_file", envvar="prism_log_file", type=str, required=False, - help="Output file for logging - default = prism.log") -@click.option("--config_file", envvar="prism_config_file", type=click.Path(exists=True), required=False, - help="The name of a configuration with parameters for connections and logging.") -@click.option("--config_name", envvar="prism_config_name", type=str, required=False, default="default", - help="The name of a configuration in the configuration file.") +@click.option( + "--log_level", + envvar="prism_log_level", + type=str, + required=False, + help="Level of debugging to display - default = INFO", +) +@click.option( + "--log_file", + envvar="prism_log_file", + type=str, + required=False, + help="Output file for logging - default = prism.log", +) +@click.option( + "--config_file", + envvar="prism_config_file", + type=click.Path(exists=True), + required=False, + help="The name of a configuration with parameters for connections and logging.", +) +@click.option( + "--config_name", + envvar="prism_config_name", + type=str, + required=False, + default="default", + help="The name of a configuration in the configuration file.", +) @click.pass_context -def cli(ctx, - base_url, tenant_name, - username, password, client_id, client_secret, refresh_token, - log_level, log_file, - config_file, config_name): +def cli( + ctx, + base_url, + tenant_name, + username, + password, + client_id, + client_secret, + refresh_token, + log_level, + log_file, + config_file, + config_name, +): # Attempt to locate a configuration file - this is not required and config # parameters are only used if the configuration values are not passed on # the command line or by environment variables. @@ -84,15 +134,29 @@ def cli(ctx, # Any value not passed and not in the environment arrives here with # the value "None" - override these with the configuration values. - base_url = param_fixup(base_url, config, config_name, "workday_base_url") - tenant_name = param_fixup(tenant_name, config, config_name, "workday_tenant_name") - client_id = param_fixup(client_id, config, config_name, "prism_client_id") - client_secret = param_fixup(client_secret, config, config_name, "prism_client_secret") - refresh_token = param_fixup(refresh_token, config, config_name, "prism_refresh_token") - log_level = param_fixup(log_level, config, config_name, "prism_log_level") + base_url = param_fixup( + base_url, config, config_name, "workday_base_url" + ) + tenant_name = param_fixup( + tenant_name, config, config_name, "workday_tenant_name" + ) + client_id = param_fixup( + client_id, config, config_name, "prism_client_id" + ) + client_secret = param_fixup( + client_secret, config, config_name, "prism_client_secret" + ) + refresh_token = param_fixup( + refresh_token, config, config_name, "prism_refresh_token" + ) + log_level = param_fixup( + log_level, config, config_name, "prism_log_level" + ) log_file = param_fixup(log_file, config, config_name, "prism_log_file") else: - click.echo(f"The specified configuration [{config_name}] does not exist in the configuration file.") + click.echo( + f"The specified configuration [{config_name}] does not exist in the configuration file." + ) sys.exit(1) except configparser.Error: click.echo(f"Error accessing configuration file {filename}.") @@ -105,12 +169,12 @@ def cli(ctx, set_level = getattr(logging, log_level) # Translate text level to level value. # Setup logging for CLI operations. - logger = logging.getLogger('prismCLI') + logger = logging.getLogger("prismCLI") logger.setLevel(set_level) # Create an explicit console handler to handle just INFO message, i.e., # script output. - formatter = logging.Formatter('%(message)s') + formatter = logging.Formatter("%(message)s") ch = logging.StreamHandler(sys.stdout) ch.setFormatter(formatter) @@ -119,7 +183,7 @@ def cli(ctx, # If the log level is not INFO, create a separate stream # for logging additional levels. - logging_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + logging_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s" formatter = logging.Formatter(logging_format) if set_level != logging.INFO: @@ -203,6 +267,7 @@ def dataChanges(): dataChanges.add_command(d_commands.dataChanges_activities) dataChanges.add_command(d_commands.dataChanges_upload) + @cli.group("dataExport") def dataExport(): """ diff --git a/prism/commands/buckets_commands.py b/prism/commands/buckets_commands.py index 0c7311a..e711018 100644 --- a/prism/commands/buckets_commands.py +++ b/prism/commands/buckets_commands.py @@ -3,22 +3,48 @@ import sys import click -logger = logging.getLogger('prismCLI') +logger = logging.getLogger("prismCLI") @click.command("get") -@click.option('-n', '--isName', is_flag=True, default=False, - help='Flag to treat the bucket or table argument as a name.') -@click.option("-l", "--limit", default=None, type=int, - help="The maximum number of object data entries included in the response, default=-1 (all).") -@click.option("-o", "--offset", default=None, type=int, - help="The offset to the first object in a collection to include in the response.") -@click.option("-t", "--type", "type_", default="summary", show_default=True, - help="How much information to be returned in response JSON.") -@click.option("-s", "--search", is_flag=True, show_default=True, default=False, - help="Use substring search bucket or table.") -@click.option("--table", - help="The id or name of a Prism table to list all buckets.") +@click.option( + "-n", + "--isName", + is_flag=True, + default=False, + help="Flag to treat the bucket or table argument as a name.", +) +@click.option( + "-l", + "--limit", + default=None, + type=int, + help="The maximum number of object data entries included in the response, default=-1 (all).", +) +@click.option( + "-o", + "--offset", + default=None, + type=int, + help="The offset to the first object in a collection to include in the response.", +) +@click.option( + "-t", + "--type", + "type_", + default="summary", + show_default=True, + help="How much information to be returned in response JSON.", +) +@click.option( + "-s", + "--search", + is_flag=True, + show_default=True, + default=False, + help="Use substring search bucket or table.", +) +@click.option("--table", help="The id or name of a Prism table to list all buckets.") @click.argument("bucket", required=False) @click.pass_context def buckets_get(ctx, bucket, table, isname, limit, offset, type_, search): @@ -35,7 +61,7 @@ def buckets_get(ctx, bucket, table, isname, limit, offset, type_, search): if isname and bucket is None and table is None: # It's invalid to add the --isName switch without providing # a bucket id or table name. - logger.error('To get buckets by name, please provide a bucket name.') + logger.error("To get buckets by name, please provide a bucket name.") sys.exit(1) if not isname and bucket is not None: @@ -53,24 +79,40 @@ def buckets_get(ctx, bucket, table, isname, limit, offset, type_, search): else: # Search by table ID or name. if isname: - buckets = p.buckets_get(table_name=table, search=search, - limit=limit, offset=offset, type_=type_) + buckets = p.buckets_get( + table_name=table, search=search, limit=limit, offset=offset, type_=type_ + ) else: - buckets = p.buckets_get(table_id=table, - limit=limit, offset=offset, type_=type_) + buckets = p.buckets_get( + table_id=table, limit=limit, offset=offset, type_=type_ + ) logger.info(json.dumps(buckets, indent=2)) @click.command("create") -@click.option("-n", "--target_name", default=None, - help="Table name to associate with the bucket.") -@click.option("-i", "--target_id", default=None, - help="Table ID to associate with the table.") -@click.option("-f", "--file", "file", required=False, default=None, type=click.Path(exists=True), - help="Schema JSON file for the target table.") -@click.option("-o", "--operation", default="TruncateAndInsert", show_default=True, - help="Operation to perform on the table.") +@click.option( + "-n", "--target_name", default=None, help="Table name to associate with the bucket." +) +@click.option( + "-i", "--target_id", default=None, help="Table ID to associate with the table." +) +@click.option( + "-f", + "--file", + "file", + required=False, + default=None, + type=click.Path(exists=True), + help="Schema JSON file for the target table.", +) +@click.option( + "-o", + "--operation", + default="TruncateAndInsert", + show_default=True, + help="Operation to perform on the table.", +) @click.argument("bucket", required=False) @click.pass_context def buckets_create(ctx, target_name, target_id, file, operation, bucket): @@ -85,32 +127,54 @@ def buckets_create(ctx, target_name, target_id, file, operation, bucket): logger.error("A table must be associated with this bucket.") sys.exit(1) - bucket = p.buckets_create(bucket_name=bucket, - target_id=target_id, target_name=target_name, - schema=file, operation=operation) + bucket = p.buckets_create( + bucket_name=bucket, + target_id=target_id, + target_name=target_name, + schema=file, + operation=operation, + ) if bucket is not None: logger.info(json.dumps(bucket, indent=2)) else: - logger.error('Error creating bucket.') + logger.error("Error creating bucket.") sys.exit(1) @click.command("files") -@click.option("-n", "--target_name", default=None, - help="Name of the table to associate with the bucket.") -@click.option("-i", "--target_id", default=None, - help="Table ID to associate with the table.") -@click.option("-f", "--file", default=None, - help="Schema JSON file for the target table.") -@click.option("-o", "--operation", default="TruncateAndInsert", show_default=True, - help="Operation to perform on the table.") +@click.option( + "-n", + "--target_name", + default=None, + help="Name of the table to associate with the bucket.", +) +@click.option( + "-i", "--target_id", default=None, help="Table ID to associate with the table." +) +@click.option( + "-f", "--file", default=None, help="Schema JSON file for the target table." +) +@click.option( + "-o", + "--operation", + default="TruncateAndInsert", + show_default=True, + help="Operation to perform on the table.", +) @click.option("-b", "--bucket", help="Bucket name to load files.", default=None) -@click.option("-c", "--complete", is_flag=True, default=False, - help="Automatically complete bucket and load the data into the table.") +@click.option( + "-c", + "--complete", + is_flag=True, + default=False, + help="Automatically complete bucket and load the data into the table.", +) @click.argument("files", nargs=-1, required=True, type=click.Path(exists=True)) @click.pass_context -def buckets_files(ctx, target_name, target_id, file, operation, bucket, complete, files): +def buckets_files( + ctx, target_name, target_id, file, operation, bucket, complete, files +): """ Upload one or more CSV or gzip files to the specified bucket @@ -130,7 +194,7 @@ def buckets_files(ctx, target_name, target_id, file, operation, bucket, complete results = p.buckets_files(bucket["id"], files) - if results['total'] > 0 and complete: + if results["total"] > 0 and complete: complete = p.buckets_complete(bucket["id"]) logger.info(complete) else: @@ -138,8 +202,13 @@ def buckets_files(ctx, target_name, target_id, file, operation, bucket, complete @click.command("complete") -@click.option('-n', '--isName', is_flag=True, default=False, - help='Flag to treat the bucket argument as a name.') +@click.option( + "-n", + "--isName", + is_flag=True, + default=False, + help="Flag to treat the bucket argument as a name.", +) @click.argument("bucket", required=True) @click.pass_context def buckets_complete(ctx, isname, bucket): @@ -161,21 +230,28 @@ def buckets_complete(ctx, isname, bucket): bucket = p.buckets_list(bucket_id=bucket) if bucket is None: - logger.error(f'Bucket {bucket} not found.') + logger.error(f"Bucket {bucket} not found.") sys.exit(1) bucket_state = bucket["state"]["descriptor"] - if bucket_state != 'New': - logger.error(f'Bucket state is "{bucket_state}" - only "New" buckets can be completed.') + if bucket_state != "New": + logger.error( + f'Bucket state is "{bucket_state}" - only "New" buckets can be completed.' + ) sys.exit(1) logger.info(p.buckets_complete(bucket["id"])) @click.command("errorFile") -@click.option('-n', '--isName', is_flag=True, default=False, - help='Flag to treat the bucket argument as a name.') +@click.option( + "-n", + "--isName", + is_flag=True, + default=False, + help="Flag to treat the bucket argument as a name.", +) @click.argument("bucket", required=True) @click.pass_context def buckets_errorFile(ctx, isname, bucket): @@ -191,10 +267,10 @@ def buckets_errorFile(ctx, isname, bucket): buckets = p.buckets_get(bucket_name=bucket) if buckets["total"] == 0: - logger.error(f'Bucket {bucket} not found.') + logger.error(f"Bucket {bucket} not found.") sys.exit(1) else: - bucket_id = buckets['data'][0]['id'] + bucket_id = buckets["data"][0]["id"] else: bucket_id = bucket @@ -204,8 +280,9 @@ def buckets_errorFile(ctx, isname, bucket): @click.command("status") -@click.option("-n", "--isName", is_flag=True, default=False, - help="Bucket name to status") +@click.option( + "-n", "--isName", is_flag=True, default=False, help="Bucket name to status" +) @click.argument("bucket", required=True) @click.pass_context def buckets_status(ctx, isname, bucket): @@ -220,15 +297,15 @@ def buckets_status(ctx, isname, bucket): buckets = p.buckets_get(bucket_name=bucket) if buckets["total"] == 0: - logger.error(f'Bucket name {bucket} not found.') + logger.error(f"Bucket name {bucket} not found.") sys.exit(1) - bucket = buckets['data'][0] + bucket = buckets["data"][0] else: bucket = p.buckets_get(bucket_id=bucket) if bucket is None: - logger.error(f'Bucket {bucket} not found.') + logger.error(f"Bucket {bucket} not found.") sys.exit(1) logger.info(bucket["state"]["descriptor"]) diff --git a/prism/commands/dataChanges_commands.py b/prism/commands/dataChanges_commands.py index beceae8..b6ca71d 100644 --- a/prism/commands/dataChanges_commands.py +++ b/prism/commands/dataChanges_commands.py @@ -4,20 +4,43 @@ import logging import time -logger = logging.getLogger('prismCLI') +logger = logging.getLogger("prismCLI") @click.command("get") -@click.option("-n", "--isName", default=False, is_flag=True, - help="Flag to treat the dct argument as a name.") -@click.option("-l", "--limit", default=-1, - help="The maximum number of object data entries included in the response.") -@click.option("-o", "--offset", default=0, - help="The offset to the first object in a collection to include in the response.") -@click.option("-t", "--type", "type_", default="summary", - help="How much information to be returned in response JSON (default=summary).") -@click.option("-s", "--search", is_flag=True, default=False, - help="Use contains search substring for --name or --id (default=false).") +@click.option( + "-n", + "--isName", + default=False, + is_flag=True, + help="Flag to treat the dct argument as a name.", +) +@click.option( + "-l", + "--limit", + default=-1, + help="The maximum number of object data entries included in the response.", +) +@click.option( + "-o", + "--offset", + default=0, + help="The offset to the first object in a collection to include in the response.", +) +@click.option( + "-t", + "--type", + "type_", + default="summary", + help="How much information to be returned in response JSON (default=summary).", +) +@click.option( + "-s", + "--search", + is_flag=True, + default=False, + help="Use contains search substring for --name or --id (default=false).", +) @click.argument("dct", required=False) @click.pass_context def dataChanges_get(ctx, isname, dct, limit, offset, type_, search): @@ -30,8 +53,9 @@ def dataChanges_get(ctx, isname, dct, limit, offset, type_, search): # Separate the get calls because an ID lookup returns a dict and a name lookup # always returns an object/list structure with zero or more matching DCTs. if isname: - data_change_task = p.dataChanges_get(datachange_name=dct, limit=limit, - offset=offset, search=search, type_=type_) + data_change_task = p.dataChanges_get( + datachange_name=dct, limit=limit, offset=offset, search=search, type_=type_ + ) if data_change_task["total"] == 0: logger.warning("No data change task(s) found.") @@ -39,23 +63,31 @@ def dataChanges_get(ctx, isname, dct, limit, offset, type_, search): # For display purposes, sort by display name (case-insensitive) data_change_task["data"] = sorted( - data_change_task["data"], - key=lambda dct_srt: dct_srt["displayName"].lower()) + data_change_task["data"], key=lambda dct_srt: dct_srt["displayName"].lower() + ) else: - data_change_task = p.dataChanges_get(datachange_id=dct, limit=limit, offset=offset, type_=type_) + data_change_task = p.dataChanges_get( + datachange_id=dct, limit=limit, offset=offset, type_=type_ + ) if data_change_task is None: - logger.error(f'Data change task {dct} not found.') + logger.error(f"Data change task {dct} not found.") sys.exit(1) logger.info(json.dumps(data_change_task, indent=2)) @click.command("validate") -@click.option("-n", "--isName", default=False, is_flag=True, - help="Flag to treat the dct argument as a name.") -@click.option("-s", "--search", is_flag=True, - help="Use contains search substring for --name.") +@click.option( + "-n", + "--isName", + default=False, + is_flag=True, + help="Flag to treat the dct argument as a name.", +) +@click.option( + "-s", "--search", is_flag=True, help="Use contains search substring for --name." +) @click.argument("dct", required=True) @click.pass_context def dataChanges_validate(ctx, isname, dct, search): @@ -82,10 +114,10 @@ def dataChanges_validate(ctx, isname, dct, search): for dct in data_change_tasks["data"]: validate = p.dataChanges_validate(dct["id"]) - if 'error' in validate: + if "error" in validate: # Add identifying attributes to the error message. - validate['id'] = dct['id'] - validate['descriptor'] = dct['displayName'] + validate["id"] = dct["id"] + validate["descriptor"] = dct["displayName"] results.append(validate) @@ -93,10 +125,15 @@ def dataChanges_validate(ctx, isname, dct, search): @click.command("run") -@click.option("-n", "--isName", default=False, is_flag=True, - help="Flag to treat the dct argument as a name.") -@click.argument('dct', required=True) -@click.argument('fid', required=False) +@click.option( + "-n", + "--isName", + default=False, + is_flag=True, + help="Flag to treat the dct argument as a name.", +) +@click.argument("dct", required=True) +@click.argument("fid", required=False) @click.pass_context def dataChanges_run(ctx, dct, fid, isname): """Execute the named data change task with an optional file container. @@ -116,7 +153,7 @@ def dataChanges_run(ctx, dct, fid, isname): sys.exit(1) dct_id = data_changes["data"][0]["id"] - logger.debug(f'resolved ID: {dct_id}') + logger.debug(f"resolved ID: {dct_id}") else: dct_id = dct @@ -132,10 +169,20 @@ def dataChanges_run(ctx, dct, fid, isname): @click.command("activities") -@click.option("-n", "--isName", default=False, is_flag=True, - help="Flag to treat the dct argument as a name.") -@click.option("-s", "--status", is_flag=True, default=False, - help="Return only the status of the activity.") +@click.option( + "-n", + "--isName", + default=False, + is_flag=True, + help="Flag to treat the dct argument as a name.", +) +@click.option( + "-s", + "--status", + is_flag=True, + default=False, + help="Return only the status of the activity.", +) @click.argument("dct", required=True) @click.argument("activityID", required=True) @click.pass_context @@ -157,7 +204,7 @@ def dataChanges_activities(ctx, dct, activityid, status, isname): sys.exit(1) dct_id = data_changes["data"][0]["id"] - logger.debug(f'resolved ID: {dct_id}') + logger.debug(f"resolved ID: {dct_id}") else: dct_id = dct @@ -174,12 +221,27 @@ def dataChanges_activities(ctx, dct, activityid, status, isname): @click.command("upload") -@click.option("-n", "--isName", default=False, is_flag=True, - help="Flag to treat the dct argument as a name.") -@click.option("-w", "--wait", default=False, is_flag=True, - help="Wait for the data change task to complete.") -@click.option("-v", "--verbose", default=False, is_flag=True, - help="Display additional information.") +@click.option( + "-n", + "--isName", + default=False, + is_flag=True, + help="Flag to treat the dct argument as a name.", +) +@click.option( + "-w", + "--wait", + default=False, + is_flag=True, + help="Wait for the data change task to complete.", +) +@click.option( + "-v", + "--verbose", + default=False, + is_flag=True, + help="Display additional information.", +) @click.argument("dct", required=True) @click.argument("file", required=True, nargs=-1, type=click.Path(exists=True)) @click.pass_context @@ -195,31 +257,33 @@ def dataChanges_upload(ctx, isname, dct, file, wait, verbose): if isname: data_change_tasks = p.dataChanges_get(datachange_name=dct) - if data_change_tasks['total'] == 0: - logger.error('Data change task not found.') + if data_change_tasks["total"] == 0: + logger.error("Data change task not found.") sys.exit(1) - dct_id = data_change_tasks['data'][0]['id'] - logger.debug(f'resolved ID: {dct_id}') + dct_id = data_change_tasks["data"][0]["id"] + logger.debug(f"resolved ID: {dct_id}") else: dct_id = dct # Specifying None for the ID to create a new file container. file_container = p.fileContainers_load(filecontainer_id=None, file=file) - if file_container['total'] == 0: - logger.error('Error loading file container.') + if file_container["total"] == 0: + logger.error("Error loading file container.") sys.exit(1) - filecontainer_id = file_container['id'] - logger.debug(f'new file container ID: {filecontainer_id}') + filecontainer_id = file_container["id"] + logger.debug(f"new file container ID: {filecontainer_id}") # Execute the DCT. - activity = p.dataChanges_activities_post(datachange_id=dct_id, fileContainer_id=filecontainer_id) + activity = p.dataChanges_activities_post( + datachange_id=dct_id, fileContainer_id=filecontainer_id + ) - if 'errors' in activity: + if "errors" in activity: # Add the ID of the DCT for easy identification. - activity['id'] = dct_id + activity["id"] = dct_id logger.error(json.dumps(activity, indent=2)) @@ -228,19 +292,21 @@ def dataChanges_upload(ctx, isname, dct, file, wait, verbose): if not wait: logger.info(json.dumps(activity, indent=2)) else: - activity_id = activity['id'] + activity_id = activity["id"] while True: time.sleep(10) - activity = p.dataChanges_activities_get(datachange_id=dct_id, activityID=activity_id) + activity = p.dataChanges_activities_get( + datachange_id=dct_id, activityID=activity_id + ) - status = activity['state']['descriptor'] + status = activity["state"]["descriptor"] if verbose: - logger.info(f'Status: {status}') + logger.info(f"Status: {status}") - if status not in ['New', 'Queued', 'Processing', 'Loading']: + if status not in ["New", "Queued", "Processing", "Loading"]: break # Output the final status of the activity. diff --git a/prism/commands/dataExport_commands.py b/prism/commands/dataExport_commands.py index 59a5f48..7aa6586 100644 --- a/prism/commands/dataExport_commands.py +++ b/prism/commands/dataExport_commands.py @@ -2,19 +2,40 @@ import json import logging -logger = logging.getLogger('prismCLI') - -@click.command('get') -@click.option('-l', '--limit', type=int, default=None, - help='The maximum number of object data entries included in the response, default=all.') -@click.option('-o', '--offset', type=int, default=None, - help='The offset to the first object in a collection to include in the response.') -@click.option('-t', '--type', 'type_', default='summary', - type=click.Choice(['summary', 'full'], case_sensitive=False), - help='How much information returned for each table.') -@click.option('-f', '--format', 'format_', default='json', - type=click.Choice(['json', 'summary', 'schema', 'csv'], case_sensitive=False), - help='Format output as JSON, summary, schema, or CSV.') +logger = logging.getLogger("prismCLI") + + +@click.command("get") +@click.option( + "-l", + "--limit", + type=int, + default=None, + help="The maximum number of object data entries included in the response, default=all.", +) +@click.option( + "-o", + "--offset", + type=int, + default=None, + help="The offset to the first object in a collection to include in the response.", +) +@click.option( + "-t", + "--type", + "type_", + default="summary", + type=click.Choice(["summary", "full"], case_sensitive=False), + help="How much information returned for each table.", +) +@click.option( + "-f", + "--format", + "format_", + default="json", + type=click.Choice(["json", "summary", "schema", "csv"], case_sensitive=False), + help="Format output as JSON, summary, schema, or CSV.", +) @click.pass_context def dataExport_get(ctx, limit, offset, type_, format_): """List the tables or datasets permitted by the security profile of the current user. @@ -22,14 +43,14 @@ def dataExport_get(ctx, limit, offset, type_, format_): [NAME] Prism table name to list. """ - p = ctx.obj['p'] + p = ctx.obj["p"] data_export_list = p.dataExport_get(limit=limit, offset=offset, type_=type_) logger.info(json.dumps(data_export_list, indent=2)) -@click.command('create') +@click.command("create") @click.pass_context def dataExport_create(ctx): - logger.info("here") \ No newline at end of file + logger.info("here") diff --git a/prism/commands/fileContainers_commands.py b/prism/commands/fileContainers_commands.py index bd920a8..775ac03 100644 --- a/prism/commands/fileContainers_commands.py +++ b/prism/commands/fileContainers_commands.py @@ -3,7 +3,7 @@ import json import logging -logger = logging.getLogger('prismCLI') +logger = logging.getLogger("prismCLI") @click.command("create") @@ -18,7 +18,7 @@ def fileContainers_create(ctx): if file_container is not None: logger.info(json.dumps(file_container, indent=2)) else: - logger.error('Error creating file container.') + logger.error("Error creating file container.") sys.exit(1) @@ -39,8 +39,12 @@ def fileContainers_get(ctx, id): @click.command("load") -@click.option("-i", "--id", default=None, - help="Target File container ID - defaults to a new container.") +@click.option( + "-i", + "--id", + default=None, + help="Target File container ID - defaults to a new container.", +) @click.argument("file", nargs=-1, type=click.Path(exists=True)) @click.pass_context def fileContainers_load(ctx, id, file): @@ -64,7 +68,7 @@ def fileContainers_load(ctx, id, file): # working. Note: any error messages have already # been logged by the load operation. - if results['total'] == 0: + if results["total"] == 0: logger.error("A file container id is required to load a file.") sys.exit(1) else: diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index bfb6b59..aa4282a 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -5,24 +5,53 @@ from prism import * -logger = logging.getLogger('prismCLI') - - -@click.command('get') -@click.option('-n', '--isName', is_flag=True, default=False, - help='Flag to treat the table argument as a name.') -@click.option('-l', '--limit', type=int, default=None, - help='The maximum number of object data entries included in the response, default=all.') -@click.option('-o', '--offset', type=int, default=None, - help='The offset to the first object in a collection to include in the response.') -@click.option('-t', '--type', 'type_', default='summary', - type=click.Choice(['summary', 'full', 'permissions'], case_sensitive=False), - help='How much information returned for each table.') -@click.option('-c', '--compact', is_flag=True, default=False, - help='Compact the table schema for use in edit (put) operations.') -@click.option('-s', '--search', is_flag=True, - help='Enable substring search of NAME in api name or display name.') -@click.argument('table', required=False) +logger = logging.getLogger("prismCLI") + + +@click.command("get") +@click.option( + "-n", + "--isName", + is_flag=True, + default=False, + help="Flag to treat the table argument as a name.", +) +@click.option( + "-l", + "--limit", + type=int, + default=None, + help="The maximum number of object data entries included in the response, default=all.", +) +@click.option( + "-o", + "--offset", + type=int, + default=None, + help="The offset to the first object in a collection to include in the response.", +) +@click.option( + "-t", + "--type", + "type_", + default="summary", + type=click.Choice(["summary", "full", "permissions"], case_sensitive=False), + help="How much information returned for each table.", +) +@click.option( + "-c", + "--compact", + is_flag=True, + default=False, + help="Compact the table schema for use in edit (put) operations.", +) +@click.option( + "-s", + "--search", + is_flag=True, + help="Enable substring search of NAME in api name or display name.", +) +@click.argument("table", required=False) @click.pass_context def tables_get(ctx, isname, table, limit, offset, type_, compact, search): """List the tables or datasets permitted by the security profile of the current user. @@ -30,7 +59,7 @@ def tables_get(ctx, isname, table, limit, offset, type_, compact, search): [TABLE] Prism table ID or name (--isName flag) to list. """ - p = ctx.obj['p'] + p = ctx.obj["p"] # Query the tenant...see if the caller said to treat the # table as a name, AND that a table was provided. @@ -51,33 +80,39 @@ def tables_get(ctx, isname, table, limit, offset, type_, compact, search): # When querying by name, the get operation returns a # dict with a count of found tables and a list of # tables. - tables = p.tables_get(table_name=table, limit=limit, offset=offset, type_=type_, search=search) + tables = p.tables_get( + table_name=table, limit=limit, offset=offset, type_=type_, search=search + ) - if tables['total'] == 0: + if tables["total"] == 0: logger.error(f"Table ID {table} not found.") return if compact: - for tab in tables['data']: + for tab in tables["data"]: tab = schema_compact(tab) logger.info(json.dumps(tables, indent=2)) -@click.command('create') -@click.option('-n', '--table_name', - help='Table name - overrides name from schema.') -@click.option('-d', '--displayName', - help='Specify a display name - defaults to name.') -@click.option('-e', '--enableForAnalysis', type=bool, is_flag=True, default=None, - help='Enable this table for analytics.') -@click.option('-s', '--sourceName', - help='The API name of an existing table to copy.') -@click.option('-w', '--sourceWID', - help='The WID of an existing table to copy.') -@click.argument('file', required=False, type=click.Path(exists=True)) +@click.command("create") +@click.option("-n", "--table_name", help="Table name - overrides name from schema.") +@click.option("-d", "--displayName", help="Specify a display name - defaults to name.") +@click.option( + "-e", + "--enableForAnalysis", + type=bool, + is_flag=True, + default=None, + help="Enable this table for analytics.", +) +@click.option("-s", "--sourceName", help="The API name of an existing table to copy.") +@click.option("-w", "--sourceWID", help="The WID of an existing table to copy.") +@click.argument("file", required=False, type=click.Path(exists=True)) @click.pass_context -def tables_create(ctx, table_name, displayname, enableforanalysis, sourcename, sourcewid, file): +def tables_create( + ctx, table_name, displayname, enableforanalysis, sourcename, sourcewid, file +): """ Create a new table with the specified name. @@ -85,7 +120,7 @@ def tables_create(ctx, table_name, displayname, enableforanalysis, sourcename, s Note: A schema file, --sourceName, or --sourceWID must be specified. """ - p = ctx.obj['p'] + p = ctx.obj["p"] # We can assume a schema was found/built - get_schema sys.exits if there is a problem. schema = load_schema(p, file, sourcename, sourcewid) @@ -93,28 +128,28 @@ def tables_create(ctx, table_name, displayname, enableforanalysis, sourcename, s # Initialize a new schema with the particulars for this table operation. if table_name is not None: # If we got a name, set it in the table schema - schema['name'] = table_name.replace(' ', '_') # Minor clean-up + schema["name"] = table_name.replace(" ", "_") # Minor clean-up logger.debug(f'setting table name to {schema["name"]}') - elif 'name' not in schema: + elif "name" not in schema: # The schema doesn't have a name and none was given - exit. # Note: this could be true if we have a schema of only fields. - logger.error('Table --name must be specified.') + logger.error("Table --name must be specified.") sys.exit(1) if displayname is not None: # If we got a display name, set it in the schema - schema['displayName'] = displayname - elif 'displayName' not in schema: + schema["displayName"] = displayname + elif "displayName" not in schema: # Default the display name to the name if not in the schema. - schema['displayName'] = table_name + schema["displayName"] = table_name logger.debug(f'defaulting displayName to {schema["displayName"]}') if enableforanalysis is not None: - schema['enableForAnalysis'] = enableforanalysis - elif 'enableForAnalysis' not in schema: + schema["enableForAnalysis"] = enableforanalysis + elif "enableForAnalysis" not in schema: # Default to False - do not enable. - schema['enableForAnalysis'] = False - logger.debug('defaulting enableForAnalysis to False.') + schema["enableForAnalysis"] = False + logger.debug("defaulting enableForAnalysis to False.") # Create the table. table_def = p.tables_post(schema) @@ -126,17 +161,24 @@ def tables_create(ctx, table_name, displayname, enableforanalysis, sourcename, s sys.exit(1) -@click.command('edit') -@click.option('-t', '--truncate', is_flag=True, default=False, - help='Truncate the table before updating.') -@click.argument('file', required=True, type=click.Path(exists=True, dir_okay=False, readable=True)) +@click.command("edit") +@click.option( + "-t", + "--truncate", + is_flag=True, + default=False, + help="Truncate the table before updating.", +) +@click.argument( + "file", required=True, type=click.Path(exists=True, dir_okay=False, readable=True) +) @click.pass_context def tables_edit(ctx, file, truncate): """Edit the schema for an existing table. [FILE] File containing an updated schema definition for the table. """ - p = ctx.obj['p'] + p = ctx.obj["p"] # The user can specify a GET:/tables output file containing # the ID and other attributes that could be passed on the @@ -146,27 +188,48 @@ def tables_edit(ctx, file, truncate): table = p.tables_put(schema, truncate=truncate) if table is None: - logger.error(f'Error updating table.') + logger.error(f"Error updating table.") else: logger.info(json.dumps(table, indent=2)) -@click.command('patch') -@click.option('-n', '--isName', - help='Flag to treat the table argument as a name.') -@click.option('--displayName', is_flag=False, flag_value="*-clear-*", default=None, - help='Set the display name for an existing table.') -@click.option('--description', is_flag=False, flag_value="*-clear-*", default=None, - help='Set the display name for an existing table.') -@click.option('--documentation', is_flag=False, flag_value="*-clear-*", default=None, - help='Set the documentation for an existing table.') -@click.option('--enableForAnalysis', is_flag=False, default=None, - type=click.Choice(['true', 'false'], case_sensitive=False)) -@click.argument('table', required=True, type=str) -@click.argument('file', required=False, type=click.Path(exists=True, dir_okay=False, readable=True)) +@click.command("patch") +@click.option("-n", "--isName", help="Flag to treat the table argument as a name.") +@click.option( + "--displayName", + is_flag=False, + flag_value="*-clear-*", + default=None, + help="Set the display name for an existing table.", +) +@click.option( + "--description", + is_flag=False, + flag_value="*-clear-*", + default=None, + help="Set the display name for an existing table.", +) +@click.option( + "--documentation", + is_flag=False, + flag_value="*-clear-*", + default=None, + help="Set the documentation for an existing table.", +) +@click.option( + "--enableForAnalysis", + is_flag=False, + default=None, + type=click.Choice(["true", "false"], case_sensitive=False), +) +@click.argument("table", required=True, type=str) +@click.argument( + "file", required=False, type=click.Path(exists=True, dir_okay=False, readable=True) +) @click.pass_context -def tables_patch(ctx, isname, table, file, - displayname, description, documentation, enableforanalysis): +def tables_patch( + ctx, isname, table, file, displayname, description, documentation, enableforanalysis +): """Edit the specified attributes of an existing table with the specified id (or name). If an attribute is not provided in the request, it will not be changed. To set an @@ -176,7 +239,7 @@ def tables_patch(ctx, isname, table, file, [FILE] Optional file containing patch values for the table. """ - p = ctx.obj['p'] + p = ctx.obj["p"] # Figure out the new schema either by file or other table. patch_data = {} @@ -192,14 +255,19 @@ def tables_patch(ctx, isname, table, file, sys.exit(1) if not isinstance(patch_data, dict): - logger.error('invalid patch file - should be a dictionary') + logger.error("invalid patch file - should be a dictionary") sys.exit(1) - valid_attributes = ['displayName', 'description', 'enableForAnalysis', 'documentation'] + valid_attributes = [ + "displayName", + "description", + "enableForAnalysis", + "documentation", + ] for patch_attr in patch_data.keys(): if patch_attr not in valid_attributes: - logger.error(f'unexpected attribute {patch_attr} in patch file') + logger.error(f"unexpected attribute {patch_attr} in patch file") sys.exit(1) def set_patch_value(attr, value): @@ -208,8 +276,8 @@ def set_patch_value(attr, value): If the user specifies an attribute but does not provide a value, add a patch value to clears/null the value """ - if value == '*-clear-*': - patch_data[attr] = '' + if value == "*-clear-*": + patch_data[attr] = "" else: patch_data[attr] = value @@ -221,19 +289,19 @@ def set_patch_value(attr, value): # caller can override the values from the patch file using # command line arguments. if displayname is not None: # Specified on CLI - set_patch_value('displayName', displayname) + set_patch_value("displayName", displayname) if description is not None: - set_patch_value('description', description) + set_patch_value("description", description) if documentation is not None: - set_patch_value('documentation', documentation) + set_patch_value("documentation", documentation) if enableforanalysis is not None: - if enableforanalysis.lower() == 'true': - patch_data['enableForAnalysis'] = 'true' + if enableforanalysis.lower() == "true": + patch_data["enableForAnalysis"] = "true" else: - patch_data['enableForAnalysis'] = 'false' + patch_data["enableForAnalysis"] = "false" # The caller must be asking for something to change! if len(patch_data) == 0: @@ -245,11 +313,11 @@ def set_patch_value(attr, value): # Before doing anything, table name must exist. tables = p.tables_get(table_name=table) # Exact match - if tables['total'] == 0: + if tables["total"] == 0: logger.error(f'Table name "{table}" not found.') sys.exit(1) - resolved_id = tables['data'][0]['id'] + resolved_id = tables["data"][0]["id"] else: # No verification needed, simply assume the ID is valid. resolved_id = table @@ -257,18 +325,27 @@ def set_patch_value(attr, value): table = p.tables_patch(table_id=resolved_id, patch=patch_data) if table is None: - logger.error(f'Error updating table ID {resolved_id}') + logger.error(f"Error updating table ID {resolved_id}") else: logger.info(json.dumps(table, indent=2)) -@click.command('upload') -@click.option('-n', '--isName', is_flag=True, default=False, - help='Flag to treat the table argument as a name.') -@click.option('-o', '--operation', default='TruncateAndInsert', - help='Operation for the table operation - default to TruncateAndInsert.') -@click.argument('table', required=True) -@click.argument('file', nargs=-1, type=click.Path(exists=True)) +@click.command("upload") +@click.option( + "-n", + "--isName", + is_flag=True, + default=False, + help="Flag to treat the table argument as a name.", +) +@click.option( + "-o", + "--operation", + default="TruncateAndInsert", + help="Operation for the table operation - default to TruncateAndInsert.", +) +@click.argument("table", required=True) +@click.argument("file", nargs=-1, type=click.Path(exists=True)) @click.pass_context def tables_upload(ctx, table, isname, operation, file): """ @@ -278,12 +355,12 @@ def tables_upload(ctx, table, isname, operation, file): [FILE] One or more CSV or GZIP.CSV files. """ - p = ctx.obj['p'] + p = ctx.obj["p"] # Convert the file(s) provided to a list of compressed files. if len(file) == 0: - logger.error('No files to upload.') + logger.error("No files to upload.") sys.exit(1) if isname: @@ -294,10 +371,15 @@ def tables_upload(ctx, table, isname, operation, file): logger.debug(json.dumps(results, indent=2)) -@click.command('truncate') -@click.option('-n', '--isName', is_flag=True, default=False, - help='Flag to treat the table argument as a name.') -@click.argument('table', required=True) +@click.command("truncate") +@click.option( + "-n", + "--isName", + is_flag=True, + default=False, + help="Flag to treat the table argument as a name.", +) +@click.argument("table", required=True) @click.pass_context def tables_truncate(ctx, table, isname): """ @@ -305,7 +387,7 @@ def tables_truncate(ctx, table, isname): [TABLE] The Prism Table ID or API name of the table to truncate. """ - p = ctx.obj['p'] + p = ctx.obj["p"] if isname: result = truncate_table(p, table_name=table) diff --git a/prism/prism.py b/prism/prism.py index 3694c16..5b19794 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -27,7 +27,9 @@ # writing to stdout only... handler = logging.StreamHandler(sys.stdout) handler.setLevel(logging.WARNING) -log_format = logging.Formatter("%(asctime)s %(levelname)s: %(message)s", "%Y-%m-%d %H:%M:%S") +log_format = logging.Formatter( + "%(asctime)s %(levelname)s: %(message)s", "%Y-%m-%d %H:%M:%S" +) handler.setFormatter(log_format) logger.addHandler(handler) @@ -68,7 +70,9 @@ def set_logging(log_file=None, log_level="INFO"): fh.setLevel(set_level) # create formatter and add it to the handlers - formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) fh.setFormatter(formatter) logger.addHandler(fh) @@ -105,35 +109,46 @@ def schema_compact(schema): # Add a sequential order (ordinal) on the fields to (en)force # required sequencing of fields. Note: for summary tables # there will not be a fields attribute. - if 'fields' in compact_schema: + if "fields" in compact_schema: # Remove Prism managed fields "WPA_*" - compact_schema['fields'] = [fld for fld in compact_schema['fields'] - if not fld['name'].startswith('WPA_')] + compact_schema["fields"] = [ + fld + for fld in compact_schema["fields"] + if not fld["name"].startswith("WPA_") + ] for ordinal in range(len(compact_schema["fields"])): fld = schema["fields"][ordinal] fld["ordinal"] = ordinal + 1 - if 'fieldId' in fld: - del fld['fieldId'] + if "fieldId" in fld: + del fld["fieldId"] - if 'id' in fld: - del fld['id'] + if "id" in fld: + del fld["id"] - if 'type' in fld: - if 'descriptor' in fld['type']: + if "type" in fld: + if "descriptor" in fld["type"]: # Convert the descriptor to the shortened Prism type syntax. - fld['type']['id'] = f"Schema_Field_Type={fld['type']['descriptor']}" - del fld['type']['descriptor'] + fld["type"]["id"] = f"Schema_Field_Type={fld['type']['descriptor']}" + del fld["type"]["descriptor"] # Remove all attributes from the schema that cannot be specified on # a post or put operation. keys = list(compact_schema.keys()) for k in keys: - if k not in ['name', 'id', 'fields', 'tags', 'categories', - 'displayName', 'description', 'documentation', - 'enableForAnalysis']: + if k not in [ + "name", + "id", + "fields", + "tags", + "categories", + "displayName", + "description", + "documentation", + "enableForAnalysis", + ]: del compact_schema[k] return compact_schema @@ -183,14 +198,14 @@ def table_to_bucket_schema(table): # Now trim our field attributes to keep just what we need for fld in fields: - for attr in ['id', 'displayName', 'fieldId', 'required', 'externalId']: + for attr in ["id", "displayName", "fieldId", "required", "externalId"]: if attr in fld: del fld[attr] - if 'parseOptions' in table: - bucket_schema['parseOptions'] = table['parseOptions'] + if "parseOptions" in table: + bucket_schema["parseOptions"] = table["parseOptions"] else: - bucket_schema['parseOptions'] = { + bucket_schema["parseOptions"] = { "fieldsDelimitedBy": ",", "fieldsEnclosedBy": '"', "headerLinesToIgnore": 1, @@ -199,7 +214,7 @@ def table_to_bucket_schema(table): } # Build the final bucket definition. - bucket_schema['fields'] = fields + bucket_schema["fields"] = fields return bucket_schema @@ -217,7 +232,15 @@ class Prism: version (str): Version of the Prism API to use """ - def __init__(self, base_url, tenant_name, client_id, client_secret, refresh_token, version="v3"): + def __init__( + self, + base_url, + tenant_name, + client_id, + client_secret, + refresh_token, + version="v3", + ): """Init the Prism class with required attributes.""" # Capture the arguments into the class variables. @@ -232,7 +255,9 @@ def __init__(self, base_url, tenant_name, client_id, client_secret, refresh_toke self.token_endpoint = f"{base_url}/ccx/oauth2/{tenant_name}/token" self.rest_endpoint = f"{base_url}/ccx/api/{version}/{tenant_name}" self.prism_endpoint = f"{base_url}/api/prismAnalytics/{version}/{tenant_name}" - self.upload_endpoint = f"{base_url}/wday/opa/tenant/{tenant_name}/service/wBuckets" + self.upload_endpoint = ( + f"{base_url}/wday/opa/tenant/{tenant_name}/service/wBuckets" + ) # At creation, there cannot yet be a bearer_token obtained from Workday. self.bearer_token = None @@ -260,9 +285,7 @@ def http_get(self, url, headers=None, params=None): # Create a fake response object for standard error handling. msg = "get: missing URL" - response = {"status_code": 600, - "text": msg, - "errors": [{"error": msg}]} + response = {"status_code": 600, "text": msg, "errors": [{"error": msg}]} else: logger.debug(f"get: {url}") @@ -291,9 +314,7 @@ def http_post(self, url, headers=None, data=None, files=None): # Create a fake response object for standard error handling. msg = "POST: missing URL" - response = {"status_code": 600, - "text": msg, - "errors": [{"error": msg}]} + response = {"status_code": 600, "text": msg, "errors": [{"error": msg}]} else: logger.debug(f"post: {url}") @@ -320,9 +341,7 @@ def http_patch(self, url, headers=None, data=None): # Create a fake response object for standard error handling. msg = "PATCH: missing URL" - response = {"status_code": 600, - "text": msg, - "errors": [{"error": msg}]} + response = {"status_code": 600, "text": msg, "errors": [{"error": msg}]} else: logger.debug(f"patch: {url}") @@ -349,9 +368,7 @@ def http_put(self, url, headers=None, data=None): # Create a fake response object for standard error handling. msg = "PUT: missing URL" - response = {"status_code": 600, - "text": msg, - "errors": [{"error": msg}]} + response = {"status_code": 600, "text": msg, "errors": [{"error": msg}]} else: logger.debug(f"put: {url}") @@ -363,7 +380,7 @@ def http_put(self, url, headers=None, data=None): headers["Authorization"] = "Bearer " + self.get_bearer_token() if "Content-Type" not in headers: - headers["Content-Type"] = 'application/json' + headers["Content-Type"] = "application/json" response = requests.put(url, headers=headers, data=json.dumps(data)) log_elapsed(f"put: {caller}", response.elapsed) @@ -393,7 +410,9 @@ def create_bearer_token(self): "client_secret": self.client_secret, } - r = self.http_post(url=self.token_endpoint, headers=self.CONTENT_FORM, data=data) + r = self.http_post( + url=self.token_endpoint, headers=self.CONTENT_FORM, data=data + ) if r.status_code == 200: logger.debug("successfully obtained bearer token") @@ -414,7 +433,10 @@ def get_bearer_token(self): Returns: Workday bearer token. """ - if self.bearer_token is None or (time.time() - self.bearer_token_timestamp) > 900: + if ( + self.bearer_token is None + or (time.time() - self.bearer_token_timestamp) > 900 + ): self.create_bearer_token() if self.bearer_token is None: @@ -431,11 +453,14 @@ def reset_bearer_token(self): self.bearer_token_timestamp = None def tables_get( - self, - table_name=None, table_id=None, - limit=None, offset=None, - type_="summary", - search=False): + self, + table_name=None, + table_id=None, + limit=None, + offset=None, + type_="summary", + search=False, + ): """Obtain details for all tables or a given table(s). Notes @@ -469,7 +494,9 @@ def tables_get( operation = "/tables" if type_ is None or type_.lower() not in ["full", "summary", "permissions"]: - logger.warning("Invalid output type for tables list operation - defaulting to summary.") + logger.warning( + "Invalid output type for tables list operation - defaulting to summary." + ) output_type = "summary" else: output_type = type_.lower() @@ -497,9 +524,9 @@ def tables_get( # Start setting up the API call parameters. params = { - 'limit': limit if limit is not None else 100, - 'offset': offset if offset is not None else 0, - 'type': output_type + "limit": limit if limit is not None else 100, + "offset": offset if offset is not None else 0, + "type": output_type, } # See if we want to add an explicit table name as a search parameter. @@ -508,8 +535,8 @@ def tables_get( params["name"] = table_name.replace(" ", "_") # Minor clean-up # Should only be 0 (not found) or 1 (found) tables found. - params['limit'] = 1 - params['offset'] = 0 + params["limit"] = 1 + params["offset"] = 0 # If we didn't get a limit, turn on searching to retrieve all tables. if limit is None: @@ -540,9 +567,12 @@ def tables_get( # return results, i.e., search the this batch for matches. if table_name is not None: # Substring search for matching table names, display names - match_tables = [tab for tab in tables["data"] - if table_name.lower() in tab["name"].lower() or - table_name.lower() in tab["displayName"].lower()] + match_tables = [ + tab + for tab in tables["data"] + if table_name.lower() in tab["name"].lower() + or table_name.lower() in tab["displayName"].lower() + ] else: # Grab all the tables in the result match_tables = tables["data"] @@ -551,18 +581,20 @@ def tables_get( # If we get back anything but a full page, we are done # paging the results. - if len(tables["data"]) < params['limit']: + if len(tables["data"]) < params["limit"]: break if search: # Move on to the next page. - params['offset'] += params['limit'] + params["offset"] += params["limit"] else: # The caller asked for a specific limit and offset, exit the loop. break # We always return a dict with the total tables found. - return_tables['total'] = len(return_tables['data']) # Separate step for debugging. + return_tables["total"] = len( + return_tables["data"] + ) # Separate step for debugging. return return_tables def tables_post(self, schema): @@ -589,7 +621,9 @@ def tables_post(self, schema): logger.error("Invalid schema for create operation.") return None - response = self.http_post(url=url, headers=self.CONTENT_APP_JSON, data=json.dumps(compact_schema)) + response = self.http_post( + url=url, headers=self.CONTENT_APP_JSON, data=json.dumps(compact_schema) + ) if response.status_code == 201: return response.json() @@ -621,7 +655,7 @@ def tables_put(self, schema): logger.error("Invalid schema for update operation.") return None - table_id = compact_schema['id'] + table_id = compact_schema["id"] operation = f"/tables/{table_id}" logger.debug(f"PUT: {operation}") @@ -656,8 +690,8 @@ def tables_patch(self, table_id, patch): If the request is successful, a dictionary containing information about the new table is returned, otherwise None. """ - operation = f'/tables/{table_id}' - logger.debug(f'PATCH: {operation}') + operation = f"/tables/{table_id}" + logger.debug(f"PATCH: {operation}") url = self.prism_endpoint + operation response = self.http_patch(url=url, headers=self.CONTENT_APP_JSON, data=patch) @@ -667,10 +701,17 @@ def tables_patch(self, table_id, patch): return None - def buckets_get(self, - bucket_id=None, bucket_name=None, search=False, - limit=None, offset=None, type_="summary", - table_id=None, table_name=None): + def buckets_get( + self, + bucket_id=None, + bucket_name=None, + search=False, + limit=None, + offset=None, + type_="summary", + table_id=None, + table_name=None, + ): """Get a one or more bucket definitions. Parameters @@ -701,7 +742,9 @@ def buckets_get(self, """ operation = "/buckets" - output_type = type_.lower() if type_.lower() in ['full', 'summary'] else 'summary' + output_type = ( + type_.lower() if type_.lower() in ["full", "summary"] else "summary" + ) # If we got an ID, then do a direct query by ID - no paging or # searching required. @@ -725,25 +768,25 @@ def buckets_get(self, return_buckets = {"total": 0, "data": []} params = { - 'limit': limit if limit is not None else 100, - 'offset': offset if offset is not None else 0, - 'type': output_type + "limit": limit if limit is not None else 100, + "offset": offset if offset is not None else 0, + "type": output_type, } if not search and bucket_name is not None: # List a specific bucket name overrides any other # combination of search/table/bucket name/wid. - params['name'] = urlparse.quote(bucket_name) + params["name"] = urlparse.quote(bucket_name) - params['limit'] = 1 # Can ONLY be one matching bucket. - params['offset'] = 0 + params["limit"] = 1 # Can ONLY be one matching bucket. + params["offset"] = 0 else: # Any other combination of parameters requires a search # through all the buckets in the data catalog. search = True - params['limit'] = 100 # Max pagesize to retrieve in the fewest REST calls. - params['offset'] = 0 + params["limit"] = 100 # Max pagesize to retrieve in the fewest REST calls. + params["offset"] = 0 while True: r = self.http_get(url, params=params) @@ -762,19 +805,28 @@ def buckets_get(self, if bucket_name is not None: # We are searching at this point. # Substring search for matching table names - match_buckets = [bck for bck in buckets["data"] if - bucket_name in bck["name"] or bucket_name in bck["displayName"]] + match_buckets = [ + bck + for bck in buckets["data"] + if bucket_name in bck["name"] or bucket_name in bck["displayName"] + ] elif table_id is not None: match_buckets = [ - bck for bck in buckets["data"] + bck + for bck in buckets["data"] if table_id == bck["targetDataset"]["id"] ] elif table_name is not None: # Caller is looking for any/all buckets by target table(s) match_buckets = [ - bck for bck in buckets["data"] - if table_name == bck["targetDataset"]["descriptor"] or - (search and table_name.lower() in bck["targetDataset"]["descriptor"].lower()) + bck + for bck in buckets["data"] + if table_name == bck["targetDataset"]["descriptor"] + or ( + search + and table_name.lower() + in bck["targetDataset"]["descriptor"].lower() + ) ] else: # No search in progress, grab all the buckets in this page. @@ -801,12 +853,13 @@ def buckets_get(self, return return_buckets def buckets_create( - self, - bucket_name=None, - target_name=None, - target_id=None, - schema=None, - operation="TruncateAndInsert"): + self, + bucket_name=None, + target_name=None, + target_id=None, + schema=None, + operation="TruncateAndInsert", + ): """Create a Prism bucket to upload files. Notes @@ -860,7 +913,7 @@ def buckets_create( logger.error(e) return None else: - logger.error('invalid schema expecting dict or file name.') + logger.error("invalid schema expecting dict or file name.") return None # Resolve the target table; if specified. @@ -868,18 +921,22 @@ def buckets_create( # The caller expects the schema to come from the # passed schema - do a quick sanity check. if table_schema is None: - logger.error("schema, target id or target name is required to create a bucket.") + logger.error( + "schema, target id or target name is required to create a bucket." + ) return None - if 'id' not in table_schema or 'fields' not in table_schema: + if "id" not in table_schema or "fields" not in table_schema: logger.error('schema missing "id" or "fields" attribute.') return None else: if target_id is not None: # Always use ID if provided - has precedence. - table = self.tables_get(table_id=target_id, type_="full") # Full=include fields object + table = self.tables_get( + table_id=target_id, type_="full" + ) # Full=include fields object if table is None: - logger.error(f'table ID {target_id} not found.') + logger.error(f"table ID {target_id} not found.") return None else: tables = self.tables_get(table_name=target_name, type_="full") @@ -888,13 +945,13 @@ def buckets_create( logger.error(f"table not found for bucket operation.") return None - table = tables['data'][0] + table = tables["data"][0] if table_schema is None: table_schema = table else: # Override the definition of the table in the schema. - table_schema['id'] = table['id'] + table_schema["id"] = table["id"] # We have the table and the user didn't include a schema. Make a copy # of the target table's schema. @@ -902,7 +959,7 @@ def buckets_create( compact_schema = schema_compact(table_schema) if compact_schema is None: - logger.error('Invalid schema for bucket operation.') + logger.error("Invalid schema for bucket operation.") return None bucket_schema = table_to_bucket_schema(compact_schema) @@ -917,7 +974,9 @@ def buckets_create( "schema": bucket_schema, } - response = self.http_post(url, headers=self.CONTENT_APP_JSON, data=json.dumps(data)) + response = self.http_post( + url, headers=self.CONTENT_APP_JSON, data=json.dumps(data) + ) if response.status_code == 201: response_json = response.json() @@ -941,17 +1000,17 @@ def buckets_complete(self, bucket_id): dict Information about the completed bucket, or None if there was a problem. """ - operation = f'/buckets/{bucket_id}/complete' - logger.debug(f'post: {operation}') + operation = f"/buckets/{bucket_id}/complete" + logger.debug(f"post: {operation}") url = self.prism_endpoint + operation r = self.http_post(url) if r.status_code == 201: - logger.debug(f'successfully completed wBucket {bucket_id}.') + logger.debug(f"successfully completed wBucket {bucket_id}.") return r.json() elif r.status_code == 400: - logger.debug(f'error completing bucket') + logger.debug(f"error completing bucket") return r.json() return None @@ -986,7 +1045,10 @@ def buckets_files(self, bucket_id, file=None): logger.debug(f"post: {operation}") url = self.prism_endpoint + operation - results = {'total': 0, 'data': []} # Always return a valid list - regardless of files + results = { + "total": 0, + "data": [], + } # Always return a valid list - regardless of files if file is None: # It is legal to upload an empty file - see the table truncate command. @@ -1006,16 +1068,20 @@ def buckets_files(self, bucket_id, file=None): # Buckets can only load gzip files - do it. with open(target_file, "rb") as in_file: - new_file = {"file": (upload_filename, gzip.compress(in_file.read()))} + new_file = { + "file": (upload_filename, gzip.compress(in_file.read())) + } response = self.http_post(url, files=new_file) if response.status_code == 201: logger.debug(f"successfully uploaded {target_file} to the bucket") - results['data'].append(response.json()) # Add this file's info to the return list + results["data"].append( + response.json() + ) # Add this file's info to the return list - results['total'] = len(results['data']) + results["total"] = len(results["data"]) return results def buckets_errorFile(self, bucket_id): @@ -1032,7 +1098,7 @@ def buckets_errorFile(self, bucket_id): """ if bucket_id is None: - logger.error('bucket id is required.') + logger.error("bucket id is required.") return None operation = f"/buckets/{bucket_id}/errorFile" @@ -1046,25 +1112,34 @@ def buckets_errorFile(self, bucket_id): return None - def dataChanges_get(self, - datachange_name=None, datachange_id=None, - limit=None, offset=None, - type_='summary', search=False, - refresh=False): - """ - """ + def dataChanges_get( + self, + datachange_name=None, + datachange_id=None, + limit=None, + offset=None, + type_="summary", + search=False, + ): + """ """ # We are doing a dataChanges GET operation. operation = "/dataChanges" # Make sure output type is valid. - output_type = type_.lower() if type_.lower() in ['summary', 'full'] else 'summary' + output_type = ( + type_.lower() if type_.lower() in ["summary", "full"] else "summary" + ) # Searching by ID is a special case that eliminates all other types # of search. Ask for the datachange by id and return just this # result - even blank. - if datachange_id is not None and isinstance(datachange_id, str) and len(datachange_id) > 0: + if ( + datachange_id is not None + and isinstance(datachange_id, str) + and len(datachange_id) > 0 + ): operation = f"{operation}/{datachange_id}?type={output_type}" - logger.debug(f'dataChanges_get: {operation}') + logger.debug(f"dataChanges_get: {operation}") url = self.prism_endpoint + operation response = self.http_get(url) @@ -1074,7 +1149,7 @@ def dataChanges_get(self, return None - logger.debug(f'dataChanges_get: {operation}') + logger.debug(f"dataChanges_get: {operation}") url = self.prism_endpoint + operation # Get a list of tasks by page, with or without searching. @@ -1091,7 +1166,11 @@ def dataChanges_get(self, searching = False name_param = "" - if datachange_name is not None and isinstance(datachange_name, str) and len(datachange_name) > 0: + if ( + datachange_name is not None + and isinstance(datachange_name, str) + and len(datachange_name) > 0 + ): if search is not None and isinstance(search, bool) and search: # Force a return of ALL data change tasks, so we can search the names. name_param = "" # Added to the query params @@ -1101,7 +1180,7 @@ def dataChanges_get(self, search_offset = 0 else: # With an explicit name, we should return at most 1 result. - name_param = f'&name={urlparse.quote(datachange_name)}' + name_param = f"&name={urlparse.quote(datachange_name)}" searching = False search_limit = 1 @@ -1114,7 +1193,7 @@ def dataChanges_get(self, data_changes = {"total": 0, "data": []} while True: - search_url = f'{url}?type={output_type}&limit={search_limit}&offset={search_offset}{name_param}' + search_url = f"{url}?type={output_type}&limit={search_limit}&offset={search_offset}{name_param}" response = self.http_get(url=search_url) @@ -1125,10 +1204,11 @@ def dataChanges_get(self, if searching: # Only add matching rows - check name and displayName - data_changes["data"] += \ - filter(lambda dtc: dtc["name"].lower().find(datachange_name.lower()) != -1 or - dtc["displayName"].lower().find(datachange_name.lower()) != -1, - return_json["data"]) + data_changes["data"] += filter( + lambda dtc: dtc["name"].lower().find(datachange_name.lower()) != -1 + or dtc["displayName"].lower().find(datachange_name.lower()) != -1, + return_json["data"], + ) else: # Without searching, simply paste the current page to the list. data_changes["data"] += return_json["data"] @@ -1187,7 +1267,7 @@ def dataChanges_activities_post(self, datachange_id, filecontainer_id=None): logger.debug("no file container ID") data = None else: - logger.debug('with file container ID: {filecontainer_id}') + logger.debug("with file container ID: {filecontainer_id}") # NOTE: the name is NOT correct based on the API definition data = json.dumps({"fileContainerWid": filecontainer_id}) @@ -1201,7 +1281,7 @@ def dataChanges_activities_post(self, datachange_id, filecontainer_id=None): logger.debug(f"successfully started data load task - id: {activity_id}") return return_json elif r.status_code == 400: - logger.error(f'error running data change task.') + logger.error(f"error running data change task.") return r.json() # This is still valid JSON with the error. return None @@ -1258,7 +1338,7 @@ def dataChanges_validate(self, datachange_id): return None def dataExport_get(self, limit=None, offset=None, type_=None): - operation = '/dataExport' + operation = "/dataExport" logger.debug(f"dataExport_get: get {operation}") url = self.prism_endpoint + operation @@ -1315,12 +1395,14 @@ def fileContainers_get(self, filecontainer_id): if response.status_code == 200: return_json = response.json() - return {'total': len(return_json), 'data': return_json} + return {"total": len(return_json), "data": return_json} if response.status_code == 404: - logger.warning('verify: Self-Service: Prism File Container domain in the Prism Analytics functional area.') + logger.warning( + "verify: Self-Service: Prism File Container domain in the Prism Analytics functional area." + ) - return {"total": 0, 'data': []} # Always return a list. + return {"total": 0, "data": []} # Always return a list. def fileContainers_load(self, filecontainer_id, file): """ @@ -1344,11 +1426,7 @@ def fileContainers_load(self, filecontainer_id, file): target_files = resolve_file_list(file) - results = { - 'id': None, - 'total': 0, - 'data': [] - } + results = {"id": None, "total": 0, "data": []} for target_file in target_files: # It is legal to upload an empty file - see the table truncate method. @@ -1361,7 +1439,9 @@ def fileContainers_load(self, filecontainer_id, file): upload_filename += ".gz" with open(target_file, "rb") as in_file: - new_file = {"file": (upload_filename, gzip.compress(in_file.read()))} + new_file = { + "file": (upload_filename, gzip.compress(in_file.read())) + } # Create the file container and get the ID. We use the # file container ID to load the file and then return the @@ -1377,7 +1457,7 @@ def fileContainers_load(self, filecontainer_id, file): resolved_fid = file_container_response["id"] - results['id'] = resolved_fid + results["id"] = resolved_fid logger.debug(f"resolved fID: {resolved_fid}") @@ -1391,9 +1471,9 @@ def fileContainers_load(self, filecontainer_id, file): if response.status_code == 201: logger.debug(f"successfully loaded file: {file}") - results['data'].append(response.json()) + results["data"].append(response.json()) - results['total'] = len(results['data']) + results["total"] = len(results["data"]) return results @@ -1471,20 +1551,22 @@ def upload_file(p, file, table_id=None, table_name=None, operation="TruncateAndI the table is returned. """ - bucket = p.buckets_create(target_id=table_id, target_name=table_name, operation=operation) + bucket = p.buckets_create( + target_id=table_id, target_name=table_name, operation=operation + ) if bucket is None: return None file_results = p.buckets_files(bucket["id"], file) - if file_results['total'] > 0: + if file_results["total"] > 0: results = p.buckets_complete(bucket["id"]) # Add the file upload results to the bucket # info returned to the caller. - results['files'] = file_results - results['bucket'] = bucket # Ensure bucket info is present. + results["files"] = file_results + results["bucket"] = bucket # Ensure bucket info is present. return results else: @@ -1494,15 +1576,15 @@ def upload_file(p, file, table_id=None, table_name=None, operation="TruncateAndI def truncate_table(p, table_id=None, table_name=None): # To do a truncate, we still need a bucket with a truncate operation. if table_id is not None: - bucket = p.buckets_create(target_id=table_id, operation='TruncateAndInsert') + bucket = p.buckets_create(target_id=table_id, operation="TruncateAndInsert") else: - bucket = p.buckets_create(target_name=table_name, operation='TruncateAndInsert') + bucket = p.buckets_create(target_name=table_name, operation="TruncateAndInsert") if bucket is None: - logger.error(f'Unable to truncate table - see log for details.') + logger.error(f"Unable to truncate table - see log for details.") return None - bucket_id = bucket['id'] + bucket_id = bucket["id"] # Don't specify a file to put a zero sized file into the bucket. p.buckets_files(bucket_id) @@ -1524,7 +1606,7 @@ def load_schema(p=None, file=None, source_name=None, source_id=None): if file is not None: if not os.path.isfile(file): - logger.error('File not found.') + logger.error("File not found.") return None # We can expect either a JSON file or a CSV file. @@ -1534,11 +1616,13 @@ def load_schema(p=None, file=None, source_name=None, source_id=None): if isinstance(schema, list): # Convert a list of fields into a basic schema. - schema['fields'] = schema + schema["fields"] = schema else: # This should be a full schema, perhaps from a table list command. - if 'name' not in schema and 'fields' not in schema: - logger.error('Invalid schema - name and fields attribute not found.') + if "name" not in schema and "fields" not in schema: + logger.error( + "Invalid schema - name and fields attribute not found." + ) return None except Exception as e: logger.error(e) @@ -1546,22 +1630,28 @@ def load_schema(p=None, file=None, source_name=None, source_id=None): else: # No file was specified, check for a Prism source table. if source_name is None and source_id is None: - logger.error('No schema file provided and a table (--sourceName or --sourceId) not specified.') + logger.error( + "No schema file provided and a table (--sourceName or --sourceId) not specified." + ) return None if source_id is not None: - schema = p.tables_list(id=source_id, type_='full') # Exact match on WID - and get the fields (full) + schema = p.tables_list( + id=source_id, type_="full" + ) # Exact match on WID - and get the fields (full) if schema is None: - logger.error(f'Invalid --sourceId {source_id} : table not found.') + logger.error(f"Invalid --sourceId {source_id} : table not found.") return None else: - tables = p.tables_list(name=source_name, type_='full') # Exact match on API Name + tables = p.tables_list( + name=source_name, type_="full" + ) # Exact match on API Name - if tables['total'] == 0: - logger.error(f'Invalid --sourceName {source_name} : table not found.') + if tables["total"] == 0: + logger.error(f"Invalid --sourceName {source_name} : table not found.") return None - schema = tables['data'][0] + schema = tables["data"][0] return schema From 43f8d8fe523a05c24bd6f278272f4f40d3fafbf5 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Wed, 25 Oct 2023 15:45:20 -0400 Subject: [PATCH 077/103] Reformat to PEP8 using Black. --- docs/source/conf.py | 26 +++-- prism/cli.py | 28 ++---- prism/commands/buckets_commands.py | 36 ++----- prism/commands/dataChanges_commands.py | 20 +--- prism/commands/tables_commands.py | 20 +--- prism/prism.py | 126 ++++++------------------- 6 files changed, 66 insertions(+), 190 deletions(-) diff --git a/docs/source/conf.py b/docs/source/conf.py index 8893e9a..445d0a3 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,6 +1,7 @@ import os import sys -sys.path.insert(0, os.path.abspath('..')) + +sys.path.insert(0, os.path.abspath("..")) # Configuration file for the Sphinx documentation builder. # @@ -10,27 +11,22 @@ # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information -project = 'Prism-Python' -copyright = '2023, Mark Greynolds' -author = 'Mark Greynolds' -release = '0.1' +project = "Prism-Python" +copyright = "2023, Mark Greynolds" +author = "Mark Greynolds" +release = "0.1" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration -extensions = ['sphinx_click'] - -templates_path = ['_templates'] -exclude_patterns = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx_click' - ] +extensions = ["sphinx_click"] +templates_path = ["_templates"] +exclude_patterns = ["sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx_click"] # -- Options for HTML output ------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output -html_theme = 'alabaster' -html_static_path = ['_static'] +html_theme = "alabaster" +html_static_path = ["_static"] diff --git a/prism/cli.py b/prism/cli.py index 091ae75..c7e6b4a 100644 --- a/prism/cli.py +++ b/prism/cli.py @@ -134,29 +134,15 @@ def cli( # Any value not passed and not in the environment arrives here with # the value "None" - override these with the configuration values. - base_url = param_fixup( - base_url, config, config_name, "workday_base_url" - ) - tenant_name = param_fixup( - tenant_name, config, config_name, "workday_tenant_name" - ) - client_id = param_fixup( - client_id, config, config_name, "prism_client_id" - ) - client_secret = param_fixup( - client_secret, config, config_name, "prism_client_secret" - ) - refresh_token = param_fixup( - refresh_token, config, config_name, "prism_refresh_token" - ) - log_level = param_fixup( - log_level, config, config_name, "prism_log_level" - ) + base_url = param_fixup(base_url, config, config_name, "workday_base_url") + tenant_name = param_fixup(tenant_name, config, config_name, "workday_tenant_name") + client_id = param_fixup(client_id, config, config_name, "prism_client_id") + client_secret = param_fixup(client_secret, config, config_name, "prism_client_secret") + refresh_token = param_fixup(refresh_token, config, config_name, "prism_refresh_token") + log_level = param_fixup(log_level, config, config_name, "prism_log_level") log_file = param_fixup(log_file, config, config_name, "prism_log_file") else: - click.echo( - f"The specified configuration [{config_name}] does not exist in the configuration file." - ) + click.echo(f"The specified configuration [{config_name}] does not exist in the configuration file.") sys.exit(1) except configparser.Error: click.echo(f"Error accessing configuration file {filename}.") diff --git a/prism/commands/buckets_commands.py b/prism/commands/buckets_commands.py index e711018..1ce3c72 100644 --- a/prism/commands/buckets_commands.py +++ b/prism/commands/buckets_commands.py @@ -79,24 +79,16 @@ def buckets_get(ctx, bucket, table, isname, limit, offset, type_, search): else: # Search by table ID or name. if isname: - buckets = p.buckets_get( - table_name=table, search=search, limit=limit, offset=offset, type_=type_ - ) + buckets = p.buckets_get(table_name=table, search=search, limit=limit, offset=offset, type_=type_) else: - buckets = p.buckets_get( - table_id=table, limit=limit, offset=offset, type_=type_ - ) + buckets = p.buckets_get(table_id=table, limit=limit, offset=offset, type_=type_) logger.info(json.dumps(buckets, indent=2)) @click.command("create") -@click.option( - "-n", "--target_name", default=None, help="Table name to associate with the bucket." -) -@click.option( - "-i", "--target_id", default=None, help="Table ID to associate with the table." -) +@click.option("-n", "--target_name", default=None, help="Table name to associate with the bucket.") +@click.option("-i", "--target_id", default=None, help="Table ID to associate with the table.") @click.option( "-f", "--file", @@ -149,12 +141,8 @@ def buckets_create(ctx, target_name, target_id, file, operation, bucket): default=None, help="Name of the table to associate with the bucket.", ) -@click.option( - "-i", "--target_id", default=None, help="Table ID to associate with the table." -) -@click.option( - "-f", "--file", default=None, help="Schema JSON file for the target table." -) +@click.option("-i", "--target_id", default=None, help="Table ID to associate with the table.") +@click.option("-f", "--file", default=None, help="Schema JSON file for the target table.") @click.option( "-o", "--operation", @@ -172,9 +160,7 @@ def buckets_create(ctx, target_name, target_id, file, operation, bucket): ) @click.argument("files", nargs=-1, required=True, type=click.Path(exists=True)) @click.pass_context -def buckets_files( - ctx, target_name, target_id, file, operation, bucket, complete, files -): +def buckets_files(ctx, target_name, target_id, file, operation, bucket, complete, files): """ Upload one or more CSV or gzip files to the specified bucket @@ -236,9 +222,7 @@ def buckets_complete(ctx, isname, bucket): bucket_state = bucket["state"]["descriptor"] if bucket_state != "New": - logger.error( - f'Bucket state is "{bucket_state}" - only "New" buckets can be completed.' - ) + logger.error(f'Bucket state is "{bucket_state}" - only "New" buckets can be completed.') sys.exit(1) logger.info(p.buckets_complete(bucket["id"])) @@ -280,9 +264,7 @@ def buckets_errorFile(ctx, isname, bucket): @click.command("status") -@click.option( - "-n", "--isName", is_flag=True, default=False, help="Bucket name to status" -) +@click.option("-n", "--isName", is_flag=True, default=False, help="Bucket name to status") @click.argument("bucket", required=True) @click.pass_context def buckets_status(ctx, isname, bucket): diff --git a/prism/commands/dataChanges_commands.py b/prism/commands/dataChanges_commands.py index b6ca71d..63b8892 100644 --- a/prism/commands/dataChanges_commands.py +++ b/prism/commands/dataChanges_commands.py @@ -62,13 +62,9 @@ def dataChanges_get(ctx, isname, dct, limit, offset, type_, search): sys.exit(1) # For display purposes, sort by display name (case-insensitive) - data_change_task["data"] = sorted( - data_change_task["data"], key=lambda dct_srt: dct_srt["displayName"].lower() - ) + data_change_task["data"] = sorted(data_change_task["data"], key=lambda dct_srt: dct_srt["displayName"].lower()) else: - data_change_task = p.dataChanges_get( - datachange_id=dct, limit=limit, offset=offset, type_=type_ - ) + data_change_task = p.dataChanges_get(datachange_id=dct, limit=limit, offset=offset, type_=type_) if data_change_task is None: logger.error(f"Data change task {dct} not found.") @@ -85,9 +81,7 @@ def dataChanges_get(ctx, isname, dct, limit, offset, type_, search): is_flag=True, help="Flag to treat the dct argument as a name.", ) -@click.option( - "-s", "--search", is_flag=True, help="Use contains search substring for --name." -) +@click.option("-s", "--search", is_flag=True, help="Use contains search substring for --name.") @click.argument("dct", required=True) @click.pass_context def dataChanges_validate(ctx, isname, dct, search): @@ -277,9 +271,7 @@ def dataChanges_upload(ctx, isname, dct, file, wait, verbose): logger.debug(f"new file container ID: {filecontainer_id}") # Execute the DCT. - activity = p.dataChanges_activities_post( - datachange_id=dct_id, fileContainer_id=filecontainer_id - ) + activity = p.dataChanges_activities_post(datachange_id=dct_id, fileContainer_id=filecontainer_id) if "errors" in activity: # Add the ID of the DCT for easy identification. @@ -297,9 +289,7 @@ def dataChanges_upload(ctx, isname, dct, file, wait, verbose): while True: time.sleep(10) - activity = p.dataChanges_activities_get( - datachange_id=dct_id, activityID=activity_id - ) + activity = p.dataChanges_activities_get(datachange_id=dct_id, activityID=activity_id) status = activity["state"]["descriptor"] diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index aa4282a..4c76202 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -80,9 +80,7 @@ def tables_get(ctx, isname, table, limit, offset, type_, compact, search): # When querying by name, the get operation returns a # dict with a count of found tables and a list of # tables. - tables = p.tables_get( - table_name=table, limit=limit, offset=offset, type_=type_, search=search - ) + tables = p.tables_get(table_name=table, limit=limit, offset=offset, type_=type_, search=search) if tables["total"] == 0: logger.error(f"Table ID {table} not found.") @@ -110,9 +108,7 @@ def tables_get(ctx, isname, table, limit, offset, type_, compact, search): @click.option("-w", "--sourceWID", help="The WID of an existing table to copy.") @click.argument("file", required=False, type=click.Path(exists=True)) @click.pass_context -def tables_create( - ctx, table_name, displayname, enableforanalysis, sourcename, sourcewid, file -): +def tables_create(ctx, table_name, displayname, enableforanalysis, sourcename, sourcewid, file): """ Create a new table with the specified name. @@ -169,9 +165,7 @@ def tables_create( default=False, help="Truncate the table before updating.", ) -@click.argument( - "file", required=True, type=click.Path(exists=True, dir_okay=False, readable=True) -) +@click.argument("file", required=True, type=click.Path(exists=True, dir_okay=False, readable=True)) @click.pass_context def tables_edit(ctx, file, truncate): """Edit the schema for an existing table. @@ -223,13 +217,9 @@ def tables_edit(ctx, file, truncate): type=click.Choice(["true", "false"], case_sensitive=False), ) @click.argument("table", required=True, type=str) -@click.argument( - "file", required=False, type=click.Path(exists=True, dir_okay=False, readable=True) -) +@click.argument("file", required=False, type=click.Path(exists=True, dir_okay=False, readable=True)) @click.pass_context -def tables_patch( - ctx, isname, table, file, displayname, description, documentation, enableforanalysis -): +def tables_patch(ctx, isname, table, file, displayname, description, documentation, enableforanalysis): """Edit the specified attributes of an existing table with the specified id (or name). If an attribute is not provided in the request, it will not be changed. To set an diff --git a/prism/prism.py b/prism/prism.py index 5b19794..98d18c1 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -27,9 +27,7 @@ # writing to stdout only... handler = logging.StreamHandler(sys.stdout) handler.setLevel(logging.WARNING) -log_format = logging.Formatter( - "%(asctime)s %(levelname)s: %(message)s", "%Y-%m-%d %H:%M:%S" -) +log_format = logging.Formatter("%(asctime)s %(levelname)s: %(message)s", "%Y-%m-%d %H:%M:%S") handler.setFormatter(log_format) logger.addHandler(handler) @@ -70,9 +68,7 @@ def set_logging(log_file=None, log_level="INFO"): fh.setLevel(set_level) # create formatter and add it to the handlers - formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(message)s" - ) + formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") fh.setFormatter(formatter) logger.addHandler(fh) @@ -111,11 +107,7 @@ def schema_compact(schema): # there will not be a fields attribute. if "fields" in compact_schema: # Remove Prism managed fields "WPA_*" - compact_schema["fields"] = [ - fld - for fld in compact_schema["fields"] - if not fld["name"].startswith("WPA_") - ] + compact_schema["fields"] = [fld for fld in compact_schema["fields"] if not fld["name"].startswith("WPA_")] for ordinal in range(len(compact_schema["fields"])): fld = schema["fields"][ordinal] @@ -255,9 +247,7 @@ def __init__( self.token_endpoint = f"{base_url}/ccx/oauth2/{tenant_name}/token" self.rest_endpoint = f"{base_url}/ccx/api/{version}/{tenant_name}" self.prism_endpoint = f"{base_url}/api/prismAnalytics/{version}/{tenant_name}" - self.upload_endpoint = ( - f"{base_url}/wday/opa/tenant/{tenant_name}/service/wBuckets" - ) + self.upload_endpoint = f"{base_url}/wday/opa/tenant/{tenant_name}/service/wBuckets" # At creation, there cannot yet be a bearer_token obtained from Workday. self.bearer_token = None @@ -410,9 +400,7 @@ def create_bearer_token(self): "client_secret": self.client_secret, } - r = self.http_post( - url=self.token_endpoint, headers=self.CONTENT_FORM, data=data - ) + r = self.http_post(url=self.token_endpoint, headers=self.CONTENT_FORM, data=data) if r.status_code == 200: logger.debug("successfully obtained bearer token") @@ -433,10 +421,7 @@ def get_bearer_token(self): Returns: Workday bearer token. """ - if ( - self.bearer_token is None - or (time.time() - self.bearer_token_timestamp) > 900 - ): + if self.bearer_token is None or (time.time() - self.bearer_token_timestamp) > 900: self.create_bearer_token() if self.bearer_token is None: @@ -494,9 +479,7 @@ def tables_get( operation = "/tables" if type_ is None or type_.lower() not in ["full", "summary", "permissions"]: - logger.warning( - "Invalid output type for tables list operation - defaulting to summary." - ) + logger.warning("Invalid output type for tables list operation - defaulting to summary.") output_type = "summary" else: output_type = type_.lower() @@ -570,8 +553,7 @@ def tables_get( match_tables = [ tab for tab in tables["data"] - if table_name.lower() in tab["name"].lower() - or table_name.lower() in tab["displayName"].lower() + if table_name.lower() in tab["name"].lower() or table_name.lower() in tab["displayName"].lower() ] else: # Grab all the tables in the result @@ -592,9 +574,7 @@ def tables_get( break # We always return a dict with the total tables found. - return_tables["total"] = len( - return_tables["data"] - ) # Separate step for debugging. + return_tables["total"] = len(return_tables["data"]) # Separate step for debugging. return return_tables def tables_post(self, schema): @@ -621,9 +601,7 @@ def tables_post(self, schema): logger.error("Invalid schema for create operation.") return None - response = self.http_post( - url=url, headers=self.CONTENT_APP_JSON, data=json.dumps(compact_schema) - ) + response = self.http_post(url=url, headers=self.CONTENT_APP_JSON, data=json.dumps(compact_schema)) if response.status_code == 201: return response.json() @@ -742,9 +720,7 @@ def buckets_get( """ operation = "/buckets" - output_type = ( - type_.lower() if type_.lower() in ["full", "summary"] else "summary" - ) + output_type = type_.lower() if type_.lower() in ["full", "summary"] else "summary" # If we got an ID, then do a direct query by ID - no paging or # searching required. @@ -806,27 +782,17 @@ def buckets_get( if bucket_name is not None: # We are searching at this point. # Substring search for matching table names match_buckets = [ - bck - for bck in buckets["data"] - if bucket_name in bck["name"] or bucket_name in bck["displayName"] + bck for bck in buckets["data"] if bucket_name in bck["name"] or bucket_name in bck["displayName"] ] elif table_id is not None: - match_buckets = [ - bck - for bck in buckets["data"] - if table_id == bck["targetDataset"]["id"] - ] + match_buckets = [bck for bck in buckets["data"] if table_id == bck["targetDataset"]["id"]] elif table_name is not None: # Caller is looking for any/all buckets by target table(s) match_buckets = [ bck for bck in buckets["data"] if table_name == bck["targetDataset"]["descriptor"] - or ( - search - and table_name.lower() - in bck["targetDataset"]["descriptor"].lower() - ) + or (search and table_name.lower() in bck["targetDataset"]["descriptor"].lower()) ] else: # No search in progress, grab all the buckets in this page. @@ -921,9 +887,7 @@ def buckets_create( # The caller expects the schema to come from the # passed schema - do a quick sanity check. if table_schema is None: - logger.error( - "schema, target id or target name is required to create a bucket." - ) + logger.error("schema, target id or target name is required to create a bucket.") return None if "id" not in table_schema or "fields" not in table_schema: @@ -931,9 +895,7 @@ def buckets_create( return None else: if target_id is not None: # Always use ID if provided - has precedence. - table = self.tables_get( - table_id=target_id, type_="full" - ) # Full=include fields object + table = self.tables_get(table_id=target_id, type_="full") # Full=include fields object if table is None: logger.error(f"table ID {target_id} not found.") @@ -974,9 +936,7 @@ def buckets_create( "schema": bucket_schema, } - response = self.http_post( - url, headers=self.CONTENT_APP_JSON, data=json.dumps(data) - ) + response = self.http_post(url, headers=self.CONTENT_APP_JSON, data=json.dumps(data)) if response.status_code == 201: response_json = response.json() @@ -1068,18 +1028,14 @@ def buckets_files(self, bucket_id, file=None): # Buckets can only load gzip files - do it. with open(target_file, "rb") as in_file: - new_file = { - "file": (upload_filename, gzip.compress(in_file.read())) - } + new_file = {"file": (upload_filename, gzip.compress(in_file.read()))} response = self.http_post(url, files=new_file) if response.status_code == 201: logger.debug(f"successfully uploaded {target_file} to the bucket") - results["data"].append( - response.json() - ) # Add this file's info to the return list + results["data"].append(response.json()) # Add this file's info to the return list results["total"] = len(results["data"]) return results @@ -1126,18 +1082,12 @@ def dataChanges_get( operation = "/dataChanges" # Make sure output type is valid. - output_type = ( - type_.lower() if type_.lower() in ["summary", "full"] else "summary" - ) + output_type = type_.lower() if type_.lower() in ["summary", "full"] else "summary" # Searching by ID is a special case that eliminates all other types # of search. Ask for the datachange by id and return just this # result - even blank. - if ( - datachange_id is not None - and isinstance(datachange_id, str) - and len(datachange_id) > 0 - ): + if datachange_id is not None and isinstance(datachange_id, str) and len(datachange_id) > 0: operation = f"{operation}/{datachange_id}?type={output_type}" logger.debug(f"dataChanges_get: {operation}") url = self.prism_endpoint + operation @@ -1166,11 +1116,7 @@ def dataChanges_get( searching = False name_param = "" - if ( - datachange_name is not None - and isinstance(datachange_name, str) - and len(datachange_name) > 0 - ): + if datachange_name is not None and isinstance(datachange_name, str) and len(datachange_name) > 0: if search is not None and isinstance(search, bool) and search: # Force a return of ALL data change tasks, so we can search the names. name_param = "" # Added to the query params @@ -1398,9 +1344,7 @@ def fileContainers_get(self, filecontainer_id): return {"total": len(return_json), "data": return_json} if response.status_code == 404: - logger.warning( - "verify: Self-Service: Prism File Container domain in the Prism Analytics functional area." - ) + logger.warning("verify: Self-Service: Prism File Container domain in the Prism Analytics functional area.") return {"total": 0, "data": []} # Always return a list. @@ -1439,9 +1383,7 @@ def fileContainers_load(self, filecontainer_id, file): upload_filename += ".gz" with open(target_file, "rb") as in_file: - new_file = { - "file": (upload_filename, gzip.compress(in_file.read())) - } + new_file = {"file": (upload_filename, gzip.compress(in_file.read()))} # Create the file container and get the ID. We use the # file container ID to load the file and then return the @@ -1551,9 +1493,7 @@ def upload_file(p, file, table_id=None, table_name=None, operation="TruncateAndI the table is returned. """ - bucket = p.buckets_create( - target_id=table_id, target_name=table_name, operation=operation - ) + bucket = p.buckets_create(target_id=table_id, target_name=table_name, operation=operation) if bucket is None: return None @@ -1620,9 +1560,7 @@ def load_schema(p=None, file=None, source_name=None, source_id=None): else: # This should be a full schema, perhaps from a table list command. if "name" not in schema and "fields" not in schema: - logger.error( - "Invalid schema - name and fields attribute not found." - ) + logger.error("Invalid schema - name and fields attribute not found.") return None except Exception as e: logger.error(e) @@ -1630,23 +1568,17 @@ def load_schema(p=None, file=None, source_name=None, source_id=None): else: # No file was specified, check for a Prism source table. if source_name is None and source_id is None: - logger.error( - "No schema file provided and a table (--sourceName or --sourceId) not specified." - ) + logger.error("No schema file provided and a table (--sourceName or --sourceId) not specified.") return None if source_id is not None: - schema = p.tables_list( - id=source_id, type_="full" - ) # Exact match on WID - and get the fields (full) + schema = p.tables_list(id=source_id, type_="full") # Exact match on WID - and get the fields (full) if schema is None: logger.error(f"Invalid --sourceId {source_id} : table not found.") return None else: - tables = p.tables_list( - name=source_name, type_="full" - ) # Exact match on API Name + tables = p.tables_list(name=source_name, type_="full") # Exact match on API Name if tables["total"] == 0: logger.error(f"Invalid --sourceName {source_name} : table not found.") From e6bb737e9514e9695a9a7b689951bdae7e7743ff Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Wed, 25 Oct 2023 15:50:55 -0400 Subject: [PATCH 078/103] Set default bucket name prefix to "prism_python_". --- prism/prism.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prism/prism.py b/prism/prism.py index 98d18c1..0635a53 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -83,7 +83,7 @@ def log_elapsed(msg, timedelta): def buckets_gen_name(): - bucket_name = "cli_" + uuid.uuid4().hex + bucket_name = "prism_python_" + uuid.uuid4().hex logger.debug(f"buckets_gen_name: created bucket name: {bucket_name}") return bucket_name From c9bf9787aadb54894ee98f12e830ba422de4f212 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Wed, 25 Oct 2023 15:54:05 -0400 Subject: [PATCH 079/103] Test setup.py for package_data. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 54a5c01..17c898d 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ author_email="CurtLHampton@gmail.com", url="https://github.com/Workday/prism-python", packages=["prism"], - package_data={"prism": ["data/*", "commands/*"]}, + package_data={"prism": ["data/*"]}, entry_points={"console_scripts": ["prism=prism.cli:cli"]}, install_requires=requirements, extras_require={"dev": ["pytest"]}, From 02ee979a8fff8e46b08046b7cef6d28e91c85075 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Wed, 25 Oct 2023 16:01:19 -0400 Subject: [PATCH 080/103] Replace package_data commands/* package. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 17c898d..54a5c01 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ author_email="CurtLHampton@gmail.com", url="https://github.com/Workday/prism-python", packages=["prism"], - package_data={"prism": ["data/*"]}, + package_data={"prism": ["data/*", "commands/*"]}, entry_points={"console_scripts": ["prism=prism.cli:cli"]}, install_requires=requirements, extras_require={"dev": ["pytest"]}, From 51974d9b73a5460fc8076452762ee23fe65d379b Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Thu, 26 Oct 2023 06:12:26 -0400 Subject: [PATCH 081/103] Revise table searching limits/offset handling --- prism/prism.py | 52 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/prism/prism.py b/prism/prism.py index 0635a53..83748cc 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -484,7 +484,8 @@ def tables_get( else: output_type = type_.lower() - # If we got a WID, then do a direct query by ID - no paging or searching required. + # If we got an ID, then do a direct query by ID - no validation, paging + # or searching required. if table_id is not None: operation = f"{operation}/{table_id}?format={output_type}" logger.debug(f"get: {operation}") @@ -493,22 +494,26 @@ def tables_get( response = self.http_get(url) if response.status_code == 200: + # Return the dict object to the caller - note: no + # 'total' or 'data' attributes for this single + # response to match the return from the API call. return response.json() else: return None # We are doing a query by attributes other than ID. - logger.debug(f"get: {operation}") + logger.debug(f"tables_get: {operation}") url = self.prism_endpoint + operation # Always return a valid JSON object of results regardless of # errors or API responses. THIS METHOD NEVER FAILS. return_tables = {"total": 0, "data": []} - # Start setting up the API call parameters. + # Start setting up the API call parameters - this is the minimal + # parameters to perform a search. params = { - "limit": limit if limit is not None else 100, - "offset": offset if offset is not None else 0, + "limit": limit if isinstance(limit, int) and limit <= 100 else 20, + "offset": offset if isinstance(limit, int) and offset >= 0 else 0, "type": output_type, } @@ -520,20 +525,26 @@ def tables_get( # Should only be 0 (not found) or 1 (found) tables found. params["limit"] = 1 params["offset"] = 0 - - # If we didn't get a limit, turn on searching to retrieve all tables. - if limit is None: - search = True # Force a search so we get all tables - + elif search and table_name is not None: + # If the caller asked for a search, open up the limits on + # the GETs for maximum retrieval since we need to look at + # every table to check for matches - a user specified limit + # (if specified) applies as tables are found. + params["limit"] = 100 # Max pagesize to retrieve in the fewest REST calls. + params["offset"] = 0 + elif not search and limit is None: + # The caller asked for all the tables, i.e., no ID, table substring search, + # or limit - open up the limits for maximum retrieval. + search = True params["limit"] = 100 # Max pagesize to retrieve in the fewest REST calls. params["offset"] = 0 - # Always assume we will retrieve more than one page. + # Assume we are paging the results. while True: r = self.http_get(url, params=params) if r.status_code != 200: - # Whatever we have captured (perhaps zero tables) so far + # Whatever we've captured (perhaps zero tables) so far # will be returned due to unexpected status code. Break # and do final clean-up on exit. break @@ -541,19 +552,22 @@ def tables_get( # Convert the response to a list of tables. tables = r.json() + # We are not searching, and we have a specific table - return + # whatever we got - maybe zero if table was not found. if not search and table_name is not None: # Explicit table name - # We are not searching, and we have a specific table - return - # whatever we got (maybe nothing). return tables # Figure out what tables of this batch of tables should be part of the # return results, i.e., search the this batch for matches. if table_name is not None: - # Substring search for matching table names, display names + table_lower = table_name.lower() + + # We are searching, do a substring search for matching strings + # anywhere in table names and display names match_tables = [ tab for tab in tables["data"] - if table_name.lower() in tab["name"].lower() or table_name.lower() in tab["displayName"].lower() + if table_lower in tab["name"].lower() or table_lower in tab["displayName"].lower() ] else: # Grab all the tables in the result @@ -851,7 +865,7 @@ def buckets_create( schema : dict A dictionary containing the schema fields describing the file. operation : str - Required, defaults to "TruncateAndInsert" operation + Required, defaults to 'TruncateAndInsert' operation Returns ------- @@ -879,7 +893,7 @@ def buckets_create( logger.error(e) return None else: - logger.error("invalid schema expecting dict or file name.") + logger.error("invalid schema - expecting dict or file name.") return None # Resolve the target table; if specified. @@ -1328,7 +1342,7 @@ def fileContainers_get(self, filecontainer_id): Returns ------- - Dictionary of found files having a "total" attribute with the count + Dictionary of found files having a 'total' attribute with the count of files uploaded and a data attribute with an array of file metadata for each file in the container. """ From b94b9d938e31befc004d6b37f1e43f8eb2bd0c78 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Thu, 26 Oct 2023 08:12:10 -0400 Subject: [PATCH 082/103] Updates for flake8 and pytest workflow steps. --- prism/commands/tables_commands.py | 4 ++-- prism/data/invalid-schema.json | 1 - prism/data/schema.csv | 6 ------ prism/prism.py | 14 ++++++++------ tests/test_prism.py | 2 +- 5 files changed, 11 insertions(+), 16 deletions(-) delete mode 100644 prism/data/invalid-schema.json delete mode 100644 prism/data/schema.csv diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index 4c76202..3c691c2 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -3,7 +3,7 @@ import sys import click -from prism import * +from prism import (schema_compact, load_schema, upload_file, truncate_table) logger = logging.getLogger("prismCLI") @@ -182,7 +182,7 @@ def tables_edit(ctx, file, truncate): table = p.tables_put(schema, truncate=truncate) if table is None: - logger.error(f"Error updating table.") + logger.error("Error updating table.") else: logger.info(json.dumps(table, indent=2)) diff --git a/prism/data/invalid-schema.json b/prism/data/invalid-schema.json deleted file mode 100644 index f95d6d2..0000000 --- a/prism/data/invalid-schema.json +++ /dev/null @@ -1 +0,0 @@ -{ "d" , "mem" } \ No newline at end of file diff --git a/prism/data/schema.csv b/prism/data/schema.csv deleted file mode 100644 index 3411907..0000000 --- a/prism/data/schema.csv +++ /dev/null @@ -1,6 +0,0 @@ -name,displayName,ordinal,type,businessObject,precision,scale,parseFormat,required,externalId -text_field, text field,,text -date_field, date field,,date,,,,MM/DD/yy -numeric_field, numeric field,,numeric,,19,3 -required_field,required field,,,,,,,true -external_field, external id,,,,,,,true,true \ No newline at end of file diff --git a/prism/prism.py b/prism/prism.py index 83748cc..558d09e 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -407,7 +407,7 @@ def create_bearer_token(self): self.bearer_token = r.json()["access_token"] self.bearer_token_timestamp = time.time() else: - logger.error(f"create bearer token failed: HTTP status code.") + # Error handling occurred in http_post, fail silently here. self.bearer_token = None self.bearer_token_timestamp = None @@ -918,7 +918,7 @@ def buckets_create( tables = self.tables_get(table_name=target_name, type_="full") if tables["total"] == 0: - logger.error(f"table not found for bucket operation.") + logger.error(f"table {target_name} not found for bucket operation.") return None table = tables["data"][0] @@ -940,7 +940,7 @@ def buckets_create( bucket_schema = table_to_bucket_schema(compact_schema) - logger.debug(f"post: /buckets") + logger.debug("post: /buckets") url = self.prism_endpoint + "/buckets" data = { @@ -984,7 +984,9 @@ def buckets_complete(self, bucket_id): logger.debug(f"successfully completed wBucket {bucket_id}.") return r.json() elif r.status_code == 400: - logger.debug(f"error completing bucket") + # This is an error coming back from the API call and + # is actually valid JSON with an "error" attribute. + logger.debug("non-fatal error completing bucket") return r.json() return None @@ -1241,7 +1243,7 @@ def dataChanges_activities_post(self, datachange_id, filecontainer_id=None): logger.debug(f"successfully started data load task - id: {activity_id}") return return_json elif r.status_code == 400: - logger.error(f"error running data change task.") + logger.error("error running data change task.") return r.json() # This is still valid JSON with the error. return None @@ -1535,7 +1537,7 @@ def truncate_table(p, table_id=None, table_name=None): bucket = p.buckets_create(target_name=table_name, operation="TruncateAndInsert") if bucket is None: - logger.error(f"Unable to truncate table - see log for details.") + logger.error("Unable to truncate table - see log for details.") return None bucket_id = bucket["id"] diff --git a/tests/test_prism.py b/tests/test_prism.py index d9793f9..80390af 100644 --- a/tests/test_prism.py +++ b/tests/test_prism.py @@ -2,5 +2,5 @@ def test_load_schema(schema_file): - schema = prism.load_schema(schema_file) + schema = prism.load_schema(file=schema_file) assert type(schema) is dict From 02a7daa47e82a42a0bd2c1fb0685b0a7d2ee36db Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Thu, 26 Oct 2023 18:28:02 -0400 Subject: [PATCH 083/103] Black re-format. --- prism/commands/tables_commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index 3c691c2..4fe9ac4 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -3,7 +3,7 @@ import sys import click -from prism import (schema_compact, load_schema, upload_file, truncate_table) +from prism import schema_compact, load_schema, upload_file, truncate_table logger = logging.getLogger("prismCLI") From 89cc57d28a0bae1878c0e8de933f1da60145ca07 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Fri, 27 Oct 2023 10:46:09 -0400 Subject: [PATCH 084/103] Added prism.commands to packages - removed from package_data. --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 54a5c01..abae36a 100644 --- a/setup.py +++ b/setup.py @@ -11,8 +11,8 @@ author="Curtis Hampton", author_email="CurtLHampton@gmail.com", url="https://github.com/Workday/prism-python", - packages=["prism"], - package_data={"prism": ["data/*", "commands/*"]}, + packages=["prism", "prism.commands"], + package_data={"prism": ["data/*"]}, entry_points={"console_scripts": ["prism=prism.cli:cli"]}, install_requires=requirements, extras_require={"dev": ["pytest"]}, From a597c9a7b6a2ee3524a6de8435d0e27d0d8b169b Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Fri, 27 Oct 2023 10:46:41 -0400 Subject: [PATCH 085/103] Added flake8 ignore rules to align with black formatting. --- setup.cfg | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.cfg b/setup.cfg index 897f8d4..81dd7b5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -4,6 +4,7 @@ # resulting files. [flake8] +ignore = E203,W503 max-line-length = 120 exclude = versioneer.py From 1117c050b497701480fab5f87da4b66592b6014c Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Fri, 27 Oct 2023 10:47:36 -0400 Subject: [PATCH 086/103] Removed username/password parameters. --- prism/cli.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/prism/cli.py b/prism/cli.py index c7e6b4a..3a1fd1f 100644 --- a/prism/cli.py +++ b/prism/cli.py @@ -97,8 +97,6 @@ def cli( ctx, base_url, tenant_name, - username, - password, client_id, client_secret, refresh_token, @@ -149,6 +147,13 @@ def cli( # If the configuration is not available or is invalid, exit sys.exit(1) + # Do a quick sanity check - if we don't have connection information + # there is nothing we can do. + + if base_url is None or tenant_name is None or client_id is None or client_secret is None or refresh_token is None: + click.echo('No Prism connectivity information found - use "prism --help" for more information.') + sys.exit(1) + if log_level is None: set_level = logging.INFO else: From 66557610e068f266cfd999518922c15c28287e0a Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Sun, 29 Oct 2023 09:43:49 -0400 Subject: [PATCH 087/103] Fixup for README content and examples. --- README.md | 184 +++++++++++++++++++++++++++++++++++------ prism/data/schema.json | 71 +++++++++------- prism/prism.py | 46 ++++++----- 3 files changed, 226 insertions(+), 75 deletions(-) diff --git a/README.md b/README.md index 742e6e0..d79ed0e 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,101 @@ # Prism-Python Python client library and command line interface (CLI) for interacting with -Workday’s Prism API V2. +Workday’s Prism API V3. + +Workday provides the Prism Analytics REST API web service that works with +Prism Analytics tables, data change tasks, and datasets. You can develop +a software program that uses the different resource endpoints to +programmatically create Prism Analytics tables and datasets and modify +data in them. + +The Python client library and CLI work together to provide no-code/low-code +access to the Workday Prism Analytics REST API. + +The Python client library is a REST API wrapper managing the HTTP methods, +URL endpoints and the data required by specific Workday Prism Analytics API +REST operations. Using this library in Python projects simplifies interactions +with the API while providing the rich functionality of the Workday Prism Analytics +REST API. + +The CLI is a powerful tool for interacting with a Workday Prism Analytics REST API +client library, allowing you to quickly and easily perform Workday Prism Analytics +tasks from the command line. + +## Workday Prism Analytics REST API Requirements + +The Prism client library requires an api + +1. [Register a Workday Prism Analytics API Client.](https://doc.workday.com/admin-guide/en-us/workday-studio/integration-design/common-components/the-prismanalytics-subassembly/tzr1533120600898.html) + +In Workday, register an integrations API client with Prism Analytics as its +scope. Obtain the Client ID, Client Secret, and Refresh Token values that the +Prism client library requires as parameters. + +2. [Obtain the Workday REST API Endpoint.](https://doc.workday.com/reader/J1YvI9CYZUWl1U7_PSHyHA/L_RKkfJI6bKu1M2~_mfesQ) + +In Workday, obtain the Workday REST API endpoint that the Prism class requires +as a parameter. + +Before configuring the CLI or using the Prism client library, ensure you have the following values: + +- Base URL +- Tenant Name +- Client ID +- Client Secret +- Refresh Token + +## Python Prerequisites ## + +First determine whether you have up-to-date versions of Python, pip, and Git. +If you need to install Python and Git, please refer to the official +installation guides. + +### Python ### +Python comes preinstalled on most Linux distributions, and is available as a package on all others. +You can check which version of Python (if any) is installed, +by entering the following command in a terminal or command window: + +```bash +[user@host ~]$ python --version +Python 3.11.1 +``` + +or + +```bash +[user@host ~]$ python3 --version +Python 3.9.16 +``` + +### Pip ## + +**pip** is the preferred installer program. Starting with Python 3.4, it is included by default with the Python binary installers. +You can check if pip is already installed and up-to-date by entering the following command: + +```bash +[user@host ~]$ pip --version +pip 23.3.1 from //python3.11/site-packages/pip (python 3.11) +``` + +or + +```bash +[user@host ~]$ pip3 --version +pip 23.3.1 from //python3.9/site-packages/pip (python 3.9) +``` + +### Git Installation ### + +Before installing Git, you should first determine whether you have it installed by running the following git command: + +```bash +[user@host ~]$ git --version +git version 2.40.1 +``` + +## Prism-Python Install ## -## Install You may install the latest version directly from GitHub with: ```bash @@ -18,20 +110,27 @@ It is also possible to install a specific tagged release with: pip install git+https://github.com/Workday/prism-python.git@0.2.0 ``` -## Requirements +## Configuration ## -1. [Register a Workday Prism Analytics API Client.](https://doc.workday.com/reader/J1YvI9CYZUWl1U7_PSHyHA/qAugF2pRAGtECVLHKdMO_A) +The CLI allows you to set provide user to change its behaviour via 3 mechanisms: -In Workday, register an integrations API client with Prism Analytics as its -scope. Obtain the Client ID, Client Secret, and Refresh Token values that the -Prism class requires as parameters. +1. command line options +2. environment variables +3. configuration files -2. [Obtain the Workday REST API Endpoint.](https://doc.workday.com/reader/J1YvI9CYZUWl1U7_PSHyHA/L_RKkfJI6bKu1M2~_mfesQ) +### Command line options ### +always used regardless of other configurations -In Workday, obtain the Workday REST API endpoint that the Prism class requires -as a parameter. + ``` +prism --base_url= \ + --tenant_name ... +``` + +### Environment variables ### -3. For ease of use, set the following environment variables using the values obtained above: +Used if present and not also on the command line + +2. For ease of use, set the following environment variables using the values obtained above: ```bash export workday_base_url= @@ -39,37 +138,66 @@ export workday_tenant_name= export prism_client_id= export prism_client_secret= export prism_refresh_token= +export prism_log_file= +export prism_log_level=INFO|DEBUG|WARN|ERROR ``` -## Python Example - -### Create a new table with Prism API Version 2 +### Configuration file ### + +automatically looks for prism.ini - can be overridden with --config_file option. +NOTE the client secret and refresh tokens are the same as passwords and should be protected + + [default] + workday_base_url = https:// + workday_tenant_name = + prism_client_id = MTFmZWZjZTItZTk0NS00MWQ5LTkwMzItNTc5NWU4MGI3ZWYx + prism_client_secret = c9fta3s2b5j5zfppthi19zdflncjljzgml4rk430mk9y1n5fm0lp9kstzzvmo0th0389mbve6gr5rg5kax9jmsn9l5om3vsanmq + prism_refresh_token = we9o31mrs15z7g9qpcd6jedaf74mhv4weadki7uwldhbz99mn0s2u3skjy9zshst2r2wgda502q44g4m8pka2g26xvyzgboakc + prism_log_level = DEBUG + + [integration] + workday_base_url = https:// + workday_tenant_name = + prism_client_id = NTFmMDYxZTktM2FjNi00MDJiLWI0YjctMGYwYTkyMmZlYmUy + prism_client_secret = qym8c79g9inthk6ytodjmwhzhcss4qd8x06cepnvhd8g69hhp8ihle701sna8fv2myfyktj8br3fogz7yhzo5oo1oien3f4kkmi + prism_refresh_token = jt8bkmo3q7ejqn0tcs3e171a1ytgzl18q942w44wbkfy0zflgyhkx82ldjllwlxnl91ngbp6x74ilfxca20smmom9mvzqfcm9s5 + prism_log_level = INFO + +## Python client library example + +### Create a new Prism table +The following Python script uses the Prism-Python client library to create +a new Workday Prism Analytics table and load the contents of a CSV file. ```python import os import prism -# initialize the prism class with your credentials +# STEP 1 - Initialize the Prism-Python client library +# using environment variables. p = prism.Prism( os.getenv("workday_base_url"), os.getenv("workday_tenant_name"), os.getenv("prism_client_id"), os.getenv("prism_client_secret"), - os.getenv("prism_refresh_token"), - version="v2" + os.getenv("prism_refresh_token") ) -# read in your table schema -schema = prism.load_schema("/path/to/schema.json") - -# create an empty API table with your schema -table = prism.create_table(p, "my_new_table", schema=schema["fields"]) +# STEP 2 - Create a new table using the definition of fields +# provided by the schema.jsob file. +table = prism.tables_create(table_name="my_new_table", file="/path/to/schema.json") -# print details about new table +# Print JSON result about the new table print(table) + +# STEP 3 - Use the convenience function prism.upload_file +# to upload a local file to the table. Notice the operation +# is Insert on the first load. +prism.upload_file(p, "/path/to/file.csv.gz", table["id"], operation="Insert") ``` -### Manage data in an existing table with Prism API Version 2 +### Manage data in an existing table +A Table Operations Available: `TruncateandInsert`, `Insert`, `Update`, `Upsert`, `Delete`. @@ -77,8 +205,7 @@ To use the `Update`, `Upsert`, or `Delete` operations, you must specify an external id field within your table schema. ```python -# upload GZIP CSV file to your table -prism.upload_file(p, "/path/to/file.csv.gz", table["id"], operation="TruncateandInsert") + ``` ## CLI Example @@ -101,6 +228,11 @@ prism create my_new_table /path/to/schema.json prism upload /path/to/file.csv.gz bbab30e3018b01a723524ce18010811b ``` +## Notes on schema files +1. Can be a full table definition including name, displayName and fields attributes +2. Can be a list of only field definitions +3. Field definitions are either full or compact(should i say this) + ## Bugs Please report any bugs that you find [here](https://github.com/Workday/prism-python/issues). Or, even better, fork the repository on [GitHub](https://github.com/Workday/prism-python) diff --git a/prism/data/schema.json b/prism/data/schema.json index 896c055..00bed87 100644 --- a/prism/data/schema.json +++ b/prism/data/schema.json @@ -1,42 +1,57 @@ { "fields": [ { - "defaultValue": "textField", - "description": "this is a Text Field", - "name": "State2", - "parseFormat": "", - "precision": 0, + "name": "id", + "displayName" : "Record ID", + "description": "This is an example of text primary key.", "ordinal": 1, - "scale": 0, "type": { "id": "Schema_Field_Type=Text" - } + }, + "required" : true, + "externalId" : true }, { - "defaultValue": "0", - "description": "this is an Integer Field", - "name": "Population2", - "parseFormat": "", - "precision": 9, + "name": "name", + "displayName" : "Full Name", + "description": "Full name of employee.", "ordinal": 2, - "scale": 0, "type": { - "id": "Schema_Field_Type=Numeric" + "id": "Schema_Field_Type=Text" + }, + "required" : true + }, + { + "name": "employ_yrs", + "displayName" : "Employee Tenure Years", + "description": "Integer number of years in role.", + "ordinal": 3, + "defaultValue": "0", + "type": { + "id": "Schema_Field_Type=Integer" } - } - ], - "parseOptions": { - "charset": { - "id": "Encoding=UTF-8" }, - "fieldsDelimitedBy": ",", - "fieldsEnclosedBy": "\"", - "headerLinesToIgnore": 1, - "type": { - "id": "Schema_File_Type=Delimited" + { + "name": "average_rating", + "displayName" : "Average Rating", + "description": "Average performance rating.", + "ordinal": 4, + "precision" : 9, + "scale" : 2, + "defaultValue": "0.00", + "type": { + "id": "Schema_Field_Type=Integer" + } + }, + { + "name": "hired", + "displayName" : "Hire Date", + "description": "Date of hire.", + "ordinal": 5, + "parseFormat": "yyyy-MM-DD", + "type": { + "id": "Schema_Field_Type=Date" + } } - }, - "schemaVersion": { - "id": "Schema_Version=1.0" - } + ] } \ No newline at end of file diff --git a/prism/prism.py b/prism/prism.py index 558d09e..63e2092 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -188,12 +188,15 @@ def table_to_bucket_schema(table): else: fld.update(operation_key_false) - # Now trim our field attributes to keep just what we need + # Now trim our field attributes to keep just what we need - these may + # or may not be in the schema - just make sure. for fld in fields: for attr in ["id", "displayName", "fieldId", "required", "externalId"]: if attr in fld: del fld[attr] + # Use the parse options from the schema file if provided, otherwise + # automatically add defaults suitable for most CSV files. if "parseOptions" in table: bucket_schema["parseOptions"] = table["parseOptions"] else: @@ -857,13 +860,13 @@ def buckets_create( Parameters ---------- bucket_name : str - Name of the bucket to create, default to a new generated name. + Name of the bucket to create, default to a generated name. target_id : str The ID of the table for this bucket. target_name : str The name of the table for bucket. - schema : dict - A dictionary containing the schema fields describing the file. + schema : dict|file + A dictionary or JSON file containing the schema fields describing the file. operation : str Required, defaults to 'TruncateAndInsert' operation @@ -873,42 +876,41 @@ def buckets_create( Information about the new bucket, or None if there was a problem. """ - # If the caller didn't give us a name to use for the bucket, - # create a default name. - if bucket_name is None: - bucket_name = buckets_gen_name() - else: - bucket_name = bucket_name - + # If the caller didn't give us a name for the new bucket, create a default name. + new_bucket_name = bucket_name if bucket_name is not None else buckets_gen_name() table_schema = None if schema is not None: if isinstance(schema, dict): - table_schema = schema + table_schema = schema # Use as provided. elif isinstance(schema, str): try: with open(schema) as schema_file: table_schema = json.load(schema_file) except Exception as e: + # We don't care what the problem is (missing file, bad JSON). logger.error(e) return None else: logger.error("invalid schema - expecting dict or file name.") return None - # Resolve the target table; if specified. if target_id is None and target_name is None: - # The caller expects the schema to come from the - # passed schema - do a quick sanity check. + # The caller expects the target table to be identified in the passed dict/file - do a quick sanity check. if table_schema is None: logger.error("schema, target id or target name is required to create a bucket.") return None + # To create a bucket based on ONLY the schema dict/file, the caller + # must have provide the ID of the target table and the fields + # expected in the CSV file. if "id" not in table_schema or "fields" not in table_schema: logger.error('schema missing "id" or "fields" attribute.') return None else: - if target_id is not None: # Always use ID if provided - has precedence. + # The caller gave us in ID or name of the target table, make sure the table exists. + if target_id is not None: + # Always use ID if provided - has precedence over name. table = self.tables_get(table_id=target_id, type_="full") # Full=include fields object if table is None: @@ -923,15 +925,17 @@ def buckets_create( table = tables["data"][0] + # If the caller DIDN'T provide a schema dict/file, use the table + # we just found to supply the ID and fields for the bucket. if table_schema is None: table_schema = table else: - # Override the definition of the table in the schema. + # Use everything from the schema dict/file, but set/overwrite the ID + # to the target table we just looked up. table_schema["id"] = table["id"] - # We have the table and the user didn't include a schema. Make a copy - # of the target table's schema. - + # Regardless of how we got the table definition, reduce the definition + # to remove extrainious attributes for a bucket operation. compact_schema = schema_compact(table_schema) if compact_schema is None: @@ -944,7 +948,7 @@ def buckets_create( url = self.prism_endpoint + "/buckets" data = { - "name": bucket_name, + "name": new_bucket_name, "operation": {"id": "Operation_Type=" + operation}, "targetDataset": {"id": table_schema["id"]}, "schema": bucket_schema, From 8cf8165b4d7d2f6db63bfbca906961018286fa50 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Sun, 29 Oct 2023 09:50:54 -0400 Subject: [PATCH 088/103] Update --table_name required message. --- prism/commands/tables_commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index 4fe9ac4..2fcdef2 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -129,7 +129,7 @@ def tables_create(ctx, table_name, displayname, enableforanalysis, sourcename, s elif "name" not in schema: # The schema doesn't have a name and none was given - exit. # Note: this could be true if we have a schema of only fields. - logger.error("Table --name must be specified.") + logger.error("Table --table_name must be specified.") sys.exit(1) if displayname is not None: From 721c6078e3dde484d48cea4d8baa4f6f1ee81078 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Sun, 29 Oct 2023 09:54:50 -0400 Subject: [PATCH 089/103] Fix data types for numeric fields. --- prism/data/schema.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/prism/data/schema.json b/prism/data/schema.json index 00bed87..58466f0 100644 --- a/prism/data/schema.json +++ b/prism/data/schema.json @@ -26,7 +26,7 @@ "displayName" : "Employee Tenure Years", "description": "Integer number of years in role.", "ordinal": 3, - "defaultValue": "0", + "defaultValue": 0, "type": { "id": "Schema_Field_Type=Integer" } @@ -38,9 +38,9 @@ "ordinal": 4, "precision" : 9, "scale" : 2, - "defaultValue": "0.00", + "defaultValue": 1.00, "type": { - "id": "Schema_Field_Type=Integer" + "id": "Schema_Field_Type=Numeric" } }, { From 1cd582d069e2b13f6e57161f574d78cc154156f3 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Sun, 29 Oct 2023 10:04:01 -0400 Subject: [PATCH 090/103] Fix default displayName to avoid duplicates. --- prism/commands/tables_commands.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index 2fcdef2..f66f497 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -125,6 +125,11 @@ def tables_create(ctx, table_name, displayname, enableforanalysis, sourcename, s if table_name is not None: # If we got a name, set it in the table schema schema["name"] = table_name.replace(" ", "_") # Minor clean-up + + # Force the display name - there cannot be duplicate displayNames + # in the data catalog. + schema["displayName"] = table_name + logger.debug(f'setting table name to {schema["name"]}') elif "name" not in schema: # The schema doesn't have a name and none was given - exit. From d1596e68a923adfe2d9aa78caadaf30397319bf8 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Sun, 29 Oct 2023 10:26:07 -0400 Subject: [PATCH 091/103] Fix tables_patch command options. --- prism/commands/tables_commands.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index f66f497..d9559f8 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -193,7 +193,13 @@ def tables_edit(ctx, file, truncate): @click.command("patch") -@click.option("-n", "--isName", help="Flag to treat the table argument as a name.") +@click.option( + "-n", + "--isName", + is_flag=True, + default=False, + help="Flag to treat the table argument as a name.", +) @click.option( "--displayName", is_flag=False, @@ -222,7 +228,7 @@ def tables_edit(ctx, file, truncate): type=click.Choice(["true", "false"], case_sensitive=False), ) @click.argument("table", required=True, type=str) -@click.argument("file", required=False, type=click.Path(exists=True, dir_okay=False, readable=True)) +@click.argument("file", required=False, type=click.Path(dir_okay=False)) @click.pass_context def tables_patch(ctx, isname, table, file, displayname, description, documentation, enableforanalysis): """Edit the specified attributes of an existing table with the specified id (or name). @@ -230,7 +236,8 @@ def tables_patch(ctx, isname, table, file, displayname, description, documentati If an attribute is not provided in the request, it will not be changed. To set an attribute to blank (empty), include the attribute without specifying a value. - [TABLE] The ID or API name (use -n option) of the table to patch + TABLE The ID or API name (use -n option) of the table to patch. + [FILE] Optional file containing patch values for the table. """ @@ -304,7 +311,7 @@ def set_patch_value(attr, value): sys.exit(1) # Identify the existing table we are about to patch. - if not isname: + if isname: # Before doing anything, table name must exist. tables = p.tables_get(table_name=table) # Exact match From f1d9c88c0151378acc6c59707348dce03a3bb886 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Mon, 30 Oct 2023 05:50:21 -0400 Subject: [PATCH 092/103] Improve tables_patch attribute message. --- prism/commands/tables_commands.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index d9559f8..a9f2523 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -269,7 +269,7 @@ def tables_patch(ctx, isname, table, file, displayname, description, documentati for patch_attr in patch_data.keys(): if patch_attr not in valid_attributes: - logger.error(f"unexpected attribute {patch_attr} in patch file") + logger.error(f'unexpected attribute "{patch_attr}" in patch file') sys.exit(1) def set_patch_value(attr, value): From a79d04487207e79c91c4a7bc5339ff72d96af885 Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Tue, 31 Oct 2023 08:03:00 -0400 Subject: [PATCH 093/103] Updated README. --- README.md | 369 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 266 insertions(+), 103 deletions(-) diff --git a/README.md b/README.md index d79ed0e..acc801b 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Python client library and command line interface (CLI) for interacting with Workday’s Prism API V3. -Workday provides the Prism Analytics REST API web service that works with +Workday provides the Prism Analytics REST API web service to work with Prism Analytics tables, data change tasks, and datasets. You can develop a software program that uses the different resource endpoints to programmatically create Prism Analytics tables and datasets and modify @@ -14,30 +14,69 @@ data in them. The Python client library and CLI work together to provide no-code/low-code access to the Workday Prism Analytics REST API. -The Python client library is a REST API wrapper managing the HTTP methods, +The Python **client library** is a REST API wrapper managing the HTTP methods, URL endpoints and the data required by specific Workday Prism Analytics API -REST operations. Using this library in Python projects simplifies interactions -with the API while providing the rich functionality of the Workday Prism Analytics -REST API. +REST operations. Using this client library in Python projects simplifies interactions +with the Workday Prism Analytics REST API while providing the rich functionality +of Workday Prism Analytics to your Python project. -The CLI is a powerful tool for interacting with a Workday Prism Analytics REST API -client library, allowing you to quickly and easily perform Workday Prism Analytics -tasks from the command line. +The **CLI** is a powerful tool for interacting with a Workday Prism Analytics +REST API client library, allowing you to quickly and easily perform Workday +Prism Analytics tasks from the command line. ## Workday Prism Analytics REST API Requirements -The Prism client library requires an api +Workday Prism Analytics REST APIs use OAuth and the Workday configurable +security model to secure Workday Prism Analytics operations in end-user +applications. The Workday Prism REST APIs act on behalf of the individual +using the client. The user's security profile affects the REST API access +to Workday resources. -1. [Register a Workday Prism Analytics API Client.](https://doc.workday.com/admin-guide/en-us/workday-studio/integration-design/common-components/the-prismanalytics-subassembly/tzr1533120600898.html) +The Prism client library requires API Client credentials defined in the +target Workday tenant. The API Client credentials authorize programmatic +access to the Workday tenant and provides the identity of the Workday user +to enforce security for all operations. -In Workday, register an integrations API client with Prism Analytics as its -scope. Obtain the Client ID, Client Secret, and Refresh Token values that the -Prism client library requires as parameters. +#### [Register a Workday Prism Analytics API Client.](https://doc.workday.com/admin-guide/en-us/workday-studio/integration-design/common-components/the-prismanalytics-subassembly/tzr1533120600898.html) #### -2. [Obtain the Workday REST API Endpoint.](https://doc.workday.com/reader/J1YvI9CYZUWl1U7_PSHyHA/L_RKkfJI6bKu1M2~_mfesQ) +In the target Workday Prism enabled tenant, register an integrations API client +with Prism Analytics as its scope (task _Register API Client for Integrations_) to +create the Client ID and Client Secret values allowing programmatic access to the tenant. -In Workday, obtain the Workday REST API endpoint that the Prism class requires -as a parameter. +![Register API](https://workday-prism-python.s3.amazonaws.com/Prism-Python-RegisterAPI.png) + +After clicking the Done button, the confirmation screen shows the +two important REST API credentials: **Client ID** and **Client Secret**. + +![Client ID](https://workday-prism-python.s3.amazonaws.com/Prism-Python-Secret.png) + +**Record the secret value** for use with the Prism-Python client library. Workday +never shows the secret value again after clicking the Done button. + +> **Note**: As a Workday best practice, try to minimize the number +> of unique API Clients since, for auditing reasons, they cannot be removed. + +> **Note**: If the client secret is ever lost or compromised, a new secret +> can be generated. However, a new secret invalidates any application +> using the old secret. + +> **Note**: Protect the Client ID and Client Secret values the same way as +> any password. + +#### [Create Refresh Token](https://doc.workday.com/reader/J1YvI9CYZUWl1U7_PSHyHA/L_RKkfJI6bKu1M2~_mfesQ) #### + +Creating a Refresh Token assigns a Workday user identity to an API Client to control +access to Workday Prism Analytics tables and data change tasks. From the API Clients +for Integration, take the related action to Manage Refresh Tokens for Integrations. + +![View API for Client Integrations](https://workday-prism-python.s3.amazonaws.com/ViewApiClients.png) + +#### [Obtain the Workday REST API Endpoint.](https://doc.workday.com/reader/J1YvI9CYZUWl1U7_PSHyHA/L_RKkfJI6bKu1M2~_mfesQ) #### + +In Workday, obtain the Workday REST API base URL endpoint that the Prism class requires +as a parameter. From the View API Client report, locate the base_url and tenant_name values. + +![Base URL and Tenant Name](https://workday-prism-python.s3.amazonaws.com/URL-tenant.png) Before configuring the CLI or using the Prism client library, ensure you have the following values: @@ -46,17 +85,19 @@ Before configuring the CLI or using the Prism client library, ensure you have th - Client ID - Client Secret - Refresh Token +- Log File Name (optional) +- Log Level (optional) ## Python Prerequisites ## -First determine whether you have up-to-date versions of Python, pip, and Git. -If you need to install Python and Git, please refer to the official -installation guides. +First determine whether you have an up-to-date versions of Python, +pip, and Git. If you need to install Python and Git, please refer +to the official installation guides. ### Python ### -Python comes preinstalled on most Linux distributions, and is available as a package on all others. -You can check which version of Python (if any) is installed, -by entering the following command in a terminal or command window: +Python comes preinstalled on most Linux distributions, and is available +as a package on all others. You can check which version of Python (if any) +is installed, by entering the following command in a terminal or command window: ```bash [user@host ~]$ python --version @@ -72,8 +113,9 @@ Python 3.9.16 ### Pip ## -**pip** is the preferred installer program. Starting with Python 3.4, it is included by default with the Python binary installers. -You can check if pip is already installed and up-to-date by entering the following command: +**pip** is the preferred installer program. Starting with Python 3.4, it is included +by default with the Python binary installers. You can check if pip is already installed +and up-to-date by entering the following command: ```bash [user@host ~]$ pip --version @@ -89,7 +131,12 @@ pip 23.3.1 from //python3.9/site-packages/pip (python 3.9) ### Git Installation ### -Before installing Git, you should first determine whether you have it installed by running the following git command: +The installation instructions below use the Git client to retrieve +and install the Prism-Python package automatically. You can also retrieve this +package using your Git preferred method. + +Before installing Git, you should first determine whether you already have it +installed by running the following git command: ```bash [user@host ~]$ git --version @@ -98,7 +145,8 @@ git version 2.40.1 ## Prism-Python Install ## -You may install the latest version directly from GitHub with: +To automatically retrieve and install the latest version of this +package directly from GitHub, use the following command: ```bash pip install git+https://github.com/Workday/prism-python.git @@ -110,64 +158,129 @@ It is also possible to install a specific tagged release with: pip install git+https://github.com/Workday/prism-python.git@0.2.0 ``` +When either of these installations completes, the **prism** command +is available in your shell and provides access to the CLI. For +example, after installation the following command returns +help on the available commands: + +```bash +prism --list +``` + ## Configuration ## -The CLI allows you to set provide user to change its behaviour via 3 mechanisms: +The Python client library and CLI require the security credentials created and other +operational options. For the Python client library, these options must be +supplied when the client library object is created. + +```python +import prism + +pClientLib = prism.Prism( + base_url=, + tenant_name=, + client_id=, + client_secret=, + refrest_token= +) +``` -1. command line options -2. environment variables -3. configuration files +For the command line, the options can be set in three ways: -### Command line options ### -always used regardless of other configurations +| Configuration | Description | +| ------------- |----------------------------------------------------------------------| +| Command Line| Specified for each CLI operation. | +| Environment Variables | Set in the operating system environment and used for CLI operations. | +| Configuration File | One or more configurations stored in a file. | - ``` +When multiple configurations are available, i.e., specified on the command line +AND as environment variables, AND in a configuration file the first instance of +an option is used, i.e., command line preferred over environment and environment +is preferred over configuration file. + +The following configuration options should be available: + +| Configuration | Description | +|---------------|-----------------------------------------------------------------------------------------------------------------| +| base_url | The service endpoint for the Workday tenant. | +| tenant_name | The tenant_name available at the service endpoint. | +| client_id | The API Client for Integration ID created using the _Register API Client for Integration_ task. | +| client_secret | The API Client for Integration Secret created using the _Register API Client for Integration_ task. | +| refresh_token | The Refresh Token for a Workday user created with the _Maintain Refresh Tokens for Integration_ related action. | +| config_file | The name of a file containing configuration options. The default name is prism.ini. | +| config_name | The name of a configuration section in the config_file. The [default] section is used if not specified. | +| log_file | The name of a log file to capture information about the operation of the client library and CLI. | +| log_level | The output logging level, the default is INFO. To see more information, set the value to DEBUG. | + +### Using Command line options ### + +Command line are always used regardless of other configurations and should appear **before** the CLI command. + + ```bash prism --base_url= \ - --tenant_name ... + --tenant_name \ + --client_id \ + --client_secret \ + --refresh_token \ + tables get ``` -### Environment variables ### - -Used if present and not also on the command line +### Using Environment variables ### -2. For ease of use, set the following environment variables using the values obtained above: +These options are set using operating specific commands. For example, the following commands +set the environment variables in a Bash shell: ```bash -export workday_base_url= -export workday_tenant_name= -export prism_client_id= -export prism_client_secret= -export prism_refresh_token= -export prism_log_file= -export prism_log_level=INFO|DEBUG|WARN|ERROR -``` - -### Configuration file ### - -automatically looks for prism.ini - can be overridden with --config_file option. -NOTE the client secret and refresh tokens are the same as passwords and should be protected - - [default] - workday_base_url = https:// - workday_tenant_name = - prism_client_id = MTFmZWZjZTItZTk0NS00MWQ5LTkwMzItNTc5NWU4MGI3ZWYx - prism_client_secret = c9fta3s2b5j5zfppthi19zdflncjljzgml4rk430mk9y1n5fm0lp9kstzzvmo0th0389mbve6gr5rg5kax9jmsn9l5om3vsanmq - prism_refresh_token = we9o31mrs15z7g9qpcd6jedaf74mhv4weadki7uwldhbz99mn0s2u3skjy9zshst2r2wgda502q44g4m8pka2g26xvyzgboakc - prism_log_level = DEBUG - - [integration] - workday_base_url = https:// - workday_tenant_name = - prism_client_id = NTFmMDYxZTktM2FjNi00MDJiLWI0YjctMGYwYTkyMmZlYmUy - prism_client_secret = qym8c79g9inthk6ytodjmwhzhcss4qd8x06cepnvhd8g69hhp8ihle701sna8fv2myfyktj8br3fogz7yhzo5oo1oien3f4kkmi - prism_refresh_token = jt8bkmo3q7ejqn0tcs3e171a1ytgzl18q942w44wbkfy0zflgyhkx82ldjllwlxnl91ngbp6x74ilfxca20smmom9mvzqfcm9s5 - prism_log_level = INFO +export workday_base_url= +export workday_tenant_name= +export prism_client_id= +export prism_client_secret= +export prism_refresh_token= +export prism_log_file= +export prism_log_level=INFO + +prism tables get +``` + +### Using a Configuration file ### + +The CLI automatically looks for ``prism.ini`` in the current directory, and if found +reads configuration options from one section. Use the ``--config_name`` option to +select a configuration other than [default]. +a config_name option is not passed on the command line. + +> **NOTE**: The client secrets and refresh tokens are the same as passwords and should be protected. + +```ini +[default] +workday_base_url = https:// +workday_tenant_name = +prism_client_id = MTFxx...MGI3ZWYx +prism_client_secret = cxxxx...vsanmq +prism_refresh_token = weyyyyy...boakc +prism_log_level = INFO + +[integration] +workday_base_url = https:// +workday_tenant_name = +prism_client_id = NTFmx...MmZlYmUy +prism_client_secret = qnnnn...3f4kkmi +prism_refresh_token = jtqqqq...qfcm9s5 +prism_log_level = INFO +``` + +```python +prism --config_file=myconfig.ini \ + --config_name=integration \ + tables get +``` ## Python client library example ### Create a new Prism table The following Python script uses the Prism-Python client library to create -a new Workday Prism Analytics table and load the contents of a CSV file. +a new Workday Prism Analytics Table and loads the contents of a delimited +and compressed CSV file (.csv.gz). ```python import os @@ -175,7 +288,7 @@ import prism # STEP 1 - Initialize the Prism-Python client library # using environment variables. -p = prism.Prism( +pClientLib = prism.Prism( os.getenv("workday_base_url"), os.getenv("workday_tenant_name"), os.getenv("prism_client_id"), @@ -183,56 +296,106 @@ p = prism.Prism( os.getenv("prism_refresh_token") ) -# STEP 2 - Create a new table using the definition of fields -# provided by the schema.jsob file. -table = prism.tables_create(table_name="my_new_table", file="/path/to/schema.json") +# STEP 2 - Create a new table using the definition +# of fields provided by the schema.json file. +table = prism.tables_create( + table_name="my_new_table", + file="/path/to/schema.json" +) -# Print JSON result about the new table +# Print JSON response body describing +# the new table. print(table) -# STEP 3 - Use the convenience function prism.upload_file -# to upload a local file to the table. Notice the operation -# is Insert on the first load. -prism.upload_file(p, "/path/to/file.csv.gz", table["id"], operation="Insert") +# STEP 3 - Use the convenience function +# prism.upload_file() to upload a local file +# to the table. Notice the "operation" +# is Insert for the first load. +prism.upload_file( + pClientLib, + "/path/to/data.csv.gz", + table["id"], + operation="Insert" +) ``` ### Manage data in an existing table -A -Table Operations Available: `TruncateandInsert`, `Insert`, `Update`, `Upsert`, -`Delete`. -To use the `Update`, `Upsert`, or `Delete` operations, you must specify an -external id field within your table schema. +The Workday Prism REST API provides multiple operations for adding, +updating or removing data from a Workday Prism Analytics table. One of +the following table operations must be specified for a loading operation.\: + +- **Insert**: Workday keeps any existing data in the target table +and adds new data from the source. +- **TruncateAndInsert**: Workday deletes all existing data +in the target table and replaces it with data from the source. +- **Delete**: Workday deletes data from the target table +based external ID data from the source. +- **Update**: Workday updates only existing data in the +target table based on data from the source. All matching rows, +based on the external ID value, are updated. +- **Upsert**: Workday inserts data from the source if it +doesn't exist in the target table, and updates the data if it does +based on the external ID value from the source. + +When using a `Delete`, `Update`, or `Upsert` operation, the source data +must contain an ``externalId`` attribute matching the ``externalId`` +defined in the target table, i.e., a primary key value. ```python - +# STEP 4 - Use the prism.upload convenience function +# prism.upload() to truncate the existing data and +# load new data from two CSV files. +prism.upload_file( + pClientLib, + ["/path/to/newdata-1.csv", "/path/to/newdata-2.csv"], + table["id"], + operation="TruncateAndInsert" +) ``` -## CLI Example +Note: the Workday Prism Analytics REST API only accepts delimited +and gzip compressed (.csv.gz) files. The ``upload`` convenience +function automatically performs the gzip operation. -The command line interface (CLI) provides another way to interact with the Prism API. -The CLI expects your credentials to be stored as environment variables, but they can -also be passed into the CLI manually through the use of optional arguments. - -```bash -# get help with the CLI -prism --help +## CLI Example -# list the Prism API tables that you have access to -prism list +The command line interface (CLI) provides another way to interact with +the Workday Prism Analytics REST API. The CLI expects tenant and credential +options to be passed on the command line, stored as environment variables, +or stored in a configuration file (see Configuration section above). -# create a new Prism API table -prism create my_new_table /path/to/schema.json +For the following examples, a ``prism.ini`` exists in the current +working directory with a ``[default]`` section. -# upload data to a Prism API table -prism upload /path/to/file.csv.gz bbab30e3018b01a723524ce18010811b +```bash +# Get help with the CLI. +[ user@host]$ prism --help + +# Get help for the tables command. +[ user@host]$ prism tables --help + +# Use the Workday Prism Analytics REST API +# GET:/tables endpoint to list Prism tables +# you have access to. +[ user@host]$ prism tables get + +# Create a new Prism Table using the Workday +# Prism Analytics REST API POST:/tables endpoint +[ user@host]$ prism tables create my_new_table /path/to/schema.json + +# Upload data to the new table using the ID value - the default +# table operation is "TruncateAndInsert" +[ user@host]$ prism tables upload 83dd72bd7b911000ca2d790e719a0000 /path/to/file1.csv.gz + +# Upload mulitple CSV files to a Prism API table. Notice the -isName (-n) +# option tells the CLI to retrieve the table id. +[ user@host]$ prism tables upload \ + -operation Insert \ + -isName my_new_table \ + /path/to/*.csv ``` -## Notes on schema files -1. Can be a full table definition including name, displayName and fields attributes -2. Can be a list of only field definitions -3. Field definitions are either full or compact(should i say this) - ## Bugs Please report any bugs that you find [here](https://github.com/Workday/prism-python/issues). Or, even better, fork the repository on [GitHub](https://github.com/Workday/prism-python) @@ -240,4 +403,4 @@ and create a pull request (PR). We welcome all changes, big or small, and we will help you make the PR if you are new to `git`. ## License -Released under the Apache-2.0 license (see [LICENSE](https://github.com/Workday/prism-python/blob/master/LICENSE)) +Released under the Apache-2.0 license (see [LICENSE](LICENSE)) From 2fb16884373a738557191a4ee3cd6f5b1fe2af5c Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Wed, 1 Nov 2023 06:46:51 -0400 Subject: [PATCH 094/103] Updated README. --- README.md | 157 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 88 insertions(+), 69 deletions(-) diff --git a/README.md b/README.md index acc801b..20e395c 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,8 @@ Python client library and command line interface (CLI) for interacting with Workday’s Prism API V3. Workday provides the Prism Analytics REST API web service to work with -Prism Analytics tables, data change tasks, and datasets. You can develop -a software program that uses the different resource endpoints to +Workday Prism Analytics tables, data change tasks, and datasets. You can develop +a software program that uses the different REST endpoints to programmatically create Prism Analytics tables and datasets and modify data in them. @@ -17,41 +17,42 @@ access to the Workday Prism Analytics REST API. The Python **client library** is a REST API wrapper managing the HTTP methods, URL endpoints and the data required by specific Workday Prism Analytics API REST operations. Using this client library in Python projects simplifies interactions -with the Workday Prism Analytics REST API while providing the rich functionality +with the Workday Prism Analytics REST API providing the rich functionality of Workday Prism Analytics to your Python project. The **CLI** is a powerful tool for interacting with a Workday Prism Analytics REST API client library, allowing you to quickly and easily perform Workday -Prism Analytics tasks from the command line. +Prism Analytics tasks from any command line. ## Workday Prism Analytics REST API Requirements -Workday Prism Analytics REST APIs use OAuth and the Workday configurable -security model to secure Workday Prism Analytics operations in end-user -applications. The Workday Prism REST APIs act on behalf of the individual -using the client. The user's security profile affects the REST API access -to Workday resources. +Workday Prism Analytics REST APIs use OAuth authentication and the Workday +configurable security model to authorize Workday Prism Analytics operations +in end-user applications. The Workday Prism REST APIs act on behalf of the +a Workday user using the client. The user's security profile affects the +REST API access to Workday resources. -The Prism client library requires API Client credentials defined in the -target Workday tenant. The API Client credentials authorize programmatic -access to the Workday tenant and provides the identity of the Workday user -to enforce security for all operations. +The Prism client library, and by extension the CLI, require API Client +credentials setup in the target Workday tenant. The API Client credentials +authorize programmatic access to the Workday tenant and provides the identity +of the Workday user to enforce security for all operations. -#### [Register a Workday Prism Analytics API Client.](https://doc.workday.com/admin-guide/en-us/workday-studio/integration-design/common-components/the-prismanalytics-subassembly/tzr1533120600898.html) #### +### [Register a Workday Prism Analytics API Client.](https://doc.workday.com/admin-guide/en-us/workday-studio/integration-design/common-components/the-prismanalytics-subassembly/tzr1533120600898.html) ### In the target Workday Prism enabled tenant, register an integrations API client with Prism Analytics as its scope (task _Register API Client for Integrations_) to -create the Client ID and Client Secret values allowing programmatic access to the tenant. +create the Client ID and Client Secret values allowing REST API access to the tenant. ![Register API](https://workday-prism-python.s3.amazonaws.com/Prism-Python-RegisterAPI.png) -After clicking the Done button, the confirmation screen shows the +After clicking the OK button, the confirmation screen shows the two important REST API credentials: **Client ID** and **Client Secret**. ![Client ID](https://workday-prism-python.s3.amazonaws.com/Prism-Python-Secret.png) -**Record the secret value** for use with the Prism-Python client library. Workday -never shows the secret value again after clicking the Done button. +**Record the secret value** for use with the Prism-Python client library. + +> **Note**: Workday **never** shows the secret value again after clicking the Done button. > **Note**: As a Workday best practice, try to minimize the number > of unique API Clients since, for auditing reasons, they cannot be removed. @@ -63,21 +64,40 @@ never shows the secret value again after clicking the Done button. > **Note**: Protect the Client ID and Client Secret values the same way as > any password. -#### [Create Refresh Token](https://doc.workday.com/reader/J1YvI9CYZUWl1U7_PSHyHA/L_RKkfJI6bKu1M2~_mfesQ) #### +### [Create Refresh Token](https://doc.workday.com/reader/J1YvI9CYZUWl1U7_PSHyHA/L_RKkfJI6bKu1M2~_mfesQ) ### + +Creating a Refresh Token assigns a Workday user identity to an API Client to authorize +access to Workday Prism Analytics tables and data change tasks. There can be many +refresh tokens for diffent Workday user associated with a single API Client. -Creating a Refresh Token assigns a Workday user identity to an API Client to control -access to Workday Prism Analytics tables and data change tasks. From the API Clients -for Integration, take the related action to Manage Refresh Tokens for Integrations. +From the _View API Clients_ task, on the API Clients for Integration tab, take the related +action to Manage Refresh Tokens for Integrations. ![View API for Client Integrations](https://workday-prism-python.s3.amazonaws.com/ViewApiClients.png) -#### [Obtain the Workday REST API Endpoint.](https://doc.workday.com/reader/J1YvI9CYZUWl1U7_PSHyHA/L_RKkfJI6bKu1M2~_mfesQ) #### +Refresh tokens always identify a Workday user. In this example, +the Refresh Token is for Logan Mcneil (lmcneil) and her security +groups and policies are applied to every REST API operation. + +![Manage Refresh Token](https://workday-prism-python.s3.amazonaws.com/ManageRefreshToken.png) + +After clicking the OK button, copy the Refresh Token. + +![Refresh Token](https://workday-prism-python.s3.amazonaws.com/RefreshToken.png) + +> **Note**: Refresh Tokens can be created, re-generated, and removed as often as +> necessary to identify the users allowed to use this API Client for Integration +> end point. + +### [Obtain the Workday REST API Endpoint.](https://doc.workday.com/reader/J1YvI9CYZUWl1U7_PSHyHA/L_RKkfJI6bKu1M2~_mfesQ) ### In Workday, obtain the Workday REST API base URL endpoint that the Prism class requires as a parameter. From the View API Client report, locate the base_url and tenant_name values. ![Base URL and Tenant Name](https://workday-prism-python.s3.amazonaws.com/URL-tenant.png) +### Configuration Summary ### + Before configuring the CLI or using the Prism client library, ensure you have the following values: - Base URL @@ -146,7 +166,7 @@ git version 2.40.1 ## Prism-Python Install ## To automatically retrieve and install the latest version of this -package directly from GitHub, use the following command: +package directly GitHub, use the following command: ```bash pip install git+https://github.com/Workday/prism-python.git @@ -158,7 +178,7 @@ It is also possible to install a specific tagged release with: pip install git+https://github.com/Workday/prism-python.git@0.2.0 ``` -When either of these installations completes, the **prism** command +When either of these installations commands complete, the **prism** command is available in your shell and provides access to the CLI. For example, after installation the following command returns help on the available commands: @@ -167,11 +187,12 @@ help on the available commands: prism --list ``` -## Configuration ## +## Prism-Python Configuration ## -The Python client library and CLI require the security credentials created and other -operational options. For the Python client library, these options must be -supplied when the client library object is created. +The Python client library and CLI require the security credentials from the target +Workday Prism-enabled tenant, as well as other operational options. For the Python +client library, these options must be supplied when the Python client library object +is created. ```python import prism @@ -187,16 +208,16 @@ pClientLib = prism.Prism( For the command line, the options can be set in three ways: -| Configuration | Description | -| ------------- |----------------------------------------------------------------------| -| Command Line| Specified for each CLI operation. | +| Configuration | Description | +|-----------------------|----------------------------------------------------------------------| +| Command Line | Specified for each CLI operation. | | Environment Variables | Set in the operating system environment and used for CLI operations. | -| Configuration File | One or more configurations stored in a file. | +| Configuration File | One or more configurations stored in a file. | When multiple configurations are available, i.e., specified on the command line -AND as environment variables, AND in a configuration file the first instance of -an option is used, i.e., command line preferred over environment and environment -is preferred over configuration file. +and as environment variables, and in a configuration file the first instance of +an option is used, i.e., command line used before environment variables and environment +variables are used before configuration file values. The following configuration options should be available: @@ -214,7 +235,8 @@ The following configuration options should be available: ### Using Command line options ### -Command line are always used regardless of other configurations and should appear **before** the CLI command. +Command line options are always used regardless of other configurations and should appear **before** +the CLI command. ```bash prism --base_url= \ @@ -227,7 +249,7 @@ prism --base_url= \ ### Using Environment variables ### -These options are set using operating specific commands. For example, the following commands +Set these options using operating specific commands. For example, the following commands set the environment variables in a Bash shell: ```bash @@ -244,10 +266,9 @@ prism tables get ### Using a Configuration file ### -The CLI automatically looks for ``prism.ini`` in the current directory, and if found -reads configuration options from one section. Use the ``--config_name`` option to -select a configuration other than [default]. -a config_name option is not passed on the command line. +The CLI automatically looks for the file ``prism.ini`` in the current directory, and if found +reads configuration options from a section, by default the **[default]** section. Use the +``--config_name`` option to select a configuration other than **[default]**. > **NOTE**: The client secrets and refresh tokens are the same as passwords and should be protected. @@ -270,8 +291,8 @@ prism_log_level = INFO ``` ```python -prism --config_file=myconfig.ini \ - --config_name=integration \ +prism --config_file myconfig.ini \ + --config_name integration \ tables get ``` @@ -279,15 +300,15 @@ prism --config_file=myconfig.ini \ ### Create a new Prism table The following Python script uses the Prism-Python client library to create -a new Workday Prism Analytics Table and loads the contents of a delimited +a new Workday Prism Analytics Table and load the contents of a delimited and compressed CSV file (.csv.gz). ```python import os import prism -# STEP 1 - Initialize the Prism-Python client library -# using environment variables. +# STEP 1 - Initialize the Prism-Python client library using +# environment variables. pClientLib = prism.Prism( os.getenv("workday_base_url"), os.getenv("workday_tenant_name"), @@ -296,25 +317,24 @@ pClientLib = prism.Prism( os.getenv("prism_refresh_token") ) -# STEP 2 - Create a new table using the definition -# of fields provided by the schema.json file. +# STEP 2 - Create a new table using the definition of fields provided +# by the schema.json file. table = prism.tables_create( + p=pClientLib, table_name="my_new_table", file="/path/to/schema.json" ) -# Print JSON response body describing -# the new table. +# Print JSON response body describing the new table. print(table) -# STEP 3 - Use the convenience function -# prism.upload_file() to upload a local file -# to the table. Notice the "operation" -# is Insert for the first load. +# STEP 3 - Use the convenience function prism.upload_file() to upload +# a local file to the table. Notice the "operation" is Insert +# for the first load. prism.upload_file( - pClientLib, - "/path/to/data.csv.gz", - table["id"], + p=pClientLib, + file="/path/to/data.csv.gz", + table_id=table["id"], operation="Insert" ) ``` @@ -322,8 +342,8 @@ prism.upload_file( ### Manage data in an existing table The Workday Prism REST API provides multiple operations for adding, -updating or removing data from a Workday Prism Analytics table. One of -the following table operations must be specified for a loading operation.\: +updating and removing data from a Workday Prism Analytics table. One of +the following table operations must be specified for a loading operation. - **Insert**: Workday keeps any existing data in the target table and adds new data from the source. @@ -336,7 +356,7 @@ target table based on data from the source. All matching rows, based on the external ID value, are updated. - **Upsert**: Workday inserts data from the source if it doesn't exist in the target table, and updates the data if it does -based on the external ID value from the source. +using the external ID value from the source. When using a `Delete`, `Update`, or `Upsert` operation, the source data must contain an ``externalId`` attribute matching the ``externalId`` @@ -360,7 +380,7 @@ function automatically performs the gzip operation. ## CLI Example -The command line interface (CLI) provides another way to interact with +The command line interface (CLI) is a no-code way to interact with the Workday Prism Analytics REST API. The CLI expects tenant and credential options to be passed on the command line, stored as environment variables, or stored in a configuration file (see Configuration section above). @@ -375,21 +395,20 @@ working directory with a ``[default]`` section. # Get help for the tables command. [ user@host]$ prism tables --help -# Use the Workday Prism Analytics REST API -# GET:/tables endpoint to list Prism tables -# you have access to. +# Use the Workday Prism Analytics REST API GET:/tables endpoint +# to list Prism tables you have access to. [ user@host]$ prism tables get -# Create a new Prism Table using the Workday -# Prism Analytics REST API POST:/tables endpoint +# Create a new Prism Table using the Workday Prism Analytics +# REST API POST:/tables endpoint [ user@host]$ prism tables create my_new_table /path/to/schema.json # Upload data to the new table using the ID value - the default # table operation is "TruncateAndInsert" [ user@host]$ prism tables upload 83dd72bd7b911000ca2d790e719a0000 /path/to/file1.csv.gz -# Upload mulitple CSV files to a Prism API table. Notice the -isName (-n) -# option tells the CLI to retrieve the table id. +# Upload mulitple CSV files to a Prism API table. Notice the --isName (-n) +# option tells the CLI to lookup the table id. [ user@host]$ prism tables upload \ -operation Insert \ -isName my_new_table \ From e2c8fd79439f4d5d57de99a8602c8e13a020d0df Mon Sep 17 00:00:00 2001 From: wd-mgreynolds Date: Thu, 2 Nov 2023 06:19:29 -0400 Subject: [PATCH 095/103] Updated README. --- README.md | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 20e395c..1d02210 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ Prism Analytics tasks from any command line. Workday Prism Analytics REST APIs use OAuth authentication and the Workday configurable security model to authorize Workday Prism Analytics operations -in end-user applications. The Workday Prism REST APIs act on behalf of the +in end-user applications. The Workday Prism REST APIs act on behalf of a Workday user using the client. The user's security profile affects the REST API access to Workday resources. @@ -68,7 +68,7 @@ two important REST API credentials: **Client ID** and **Client Secret**. Creating a Refresh Token assigns a Workday user identity to an API Client to authorize access to Workday Prism Analytics tables and data change tasks. There can be many -refresh tokens for diffent Workday user associated with a single API Client. +refresh tokens for different Workday user associated with a single API Client. From the _View API Clients_ task, on the API Clients for Integration tab, take the related action to Manage Refresh Tokens for Integrations. @@ -194,7 +194,7 @@ Workday Prism-enabled tenant, as well as other operational options. For the Pyt client library, these options must be supplied when the Python client library object is created. -```python +``` import prism pClientLib = prism.Prism( @@ -216,7 +216,7 @@ For the command line, the options can be set in three ways: When multiple configurations are available, i.e., specified on the command line and as environment variables, and in a configuration file the first instance of -an option is used, i.e., command line used before environment variables and environment +an option is used, e.g., command line used before environment variables and environment variables are used before configuration file values. The following configuration options should be available: @@ -290,7 +290,7 @@ prism_refresh_token = jtqqqq...qfcm9s5 prism_log_level = INFO ``` -```python +```bash prism --config_file myconfig.ini \ --config_name integration \ tables get @@ -303,7 +303,7 @@ The following Python script uses the Prism-Python client library to create a new Workday Prism Analytics Table and load the contents of a delimited and compressed CSV file (.csv.gz). -```python +``` import os import prism @@ -354,18 +354,19 @@ based external ID data from the source. - **Update**: Workday updates only existing data in the target table based on data from the source. All matching rows, based on the external ID value, are updated. -- **Upsert**: Workday inserts data from the source if it -doesn't exist in the target table, and updates the data if it does -using the external ID value from the source. +- **Upsert**: Workday inserts new data from the source if it +doesn't exist in the target table, and updates existing data +using the external ID value from the source to locate matching +rows. When using a `Delete`, `Update`, or `Upsert` operation, the source data must contain an ``externalId`` attribute matching the ``externalId`` defined in the target table, i.e., a primary key value. -```python +``` # STEP 4 - Use the prism.upload convenience function -# prism.upload() to truncate the existing data and -# load new data from two CSV files. +# to truncate the existing data and load new data +# from two CSV files. prism.upload_file( pClientLib, ["/path/to/newdata-1.csv", "/path/to/newdata-2.csv"], @@ -407,7 +408,7 @@ working directory with a ``[default]`` section. # table operation is "TruncateAndInsert" [ user@host]$ prism tables upload 83dd72bd7b911000ca2d790e719a0000 /path/to/file1.csv.gz -# Upload mulitple CSV files to a Prism API table. Notice the --isName (-n) +# Upload multiple CSV files to a Prism API table. Notice the --isName (-n) # option tells the CLI to lookup the table id. [ user@host]$ prism tables upload \ -operation Insert \ From e3f242955e95d5bb49c23b34508b30c22950315a Mon Sep 17 00:00:00 2001 From: CurtLH Date: Fri, 3 Nov 2023 18:00:48 -0700 Subject: [PATCH 096/103] Remove docs for now --- docs/Makefile | 20 -------------------- docs/make.bat | 35 ----------------------------------- docs/source/conf.py | 32 -------------------------------- docs/source/index.rst | 23 ----------------------- docs/source/modules.rst | 7 ------- docs/source/prism.rst | 29 ----------------------------- 6 files changed, 146 deletions(-) delete mode 100644 docs/Makefile delete mode 100644 docs/make.bat delete mode 100644 docs/source/conf.py delete mode 100644 docs/source/index.rst delete mode 100644 docs/source/modules.rst delete mode 100644 docs/source/prism.rst diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index d0c3cbf..0000000 --- a/docs/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = source -BUILDDIR = build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/make.bat b/docs/make.bat deleted file mode 100644 index 747ffb7..0000000 --- a/docs/make.bat +++ /dev/null @@ -1,35 +0,0 @@ -@ECHO OFF - -pushd %~dp0 - -REM Command file for Sphinx documentation - -if "%SPHINXBUILD%" == "" ( - set SPHINXBUILD=sphinx-build -) -set SOURCEDIR=source -set BUILDDIR=build - -%SPHINXBUILD% >NUL 2>NUL -if errorlevel 9009 ( - echo. - echo.The 'sphinx-build' command was not found. Make sure you have Sphinx - echo.installed, then set the SPHINXBUILD environment variable to point - echo.to the full path of the 'sphinx-build' executable. Alternatively you - echo.may add the Sphinx directory to PATH. - echo. - echo.If you don't have Sphinx installed, grab it from - echo.https://www.sphinx-doc.org/ - exit /b 1 -) - -if "%1" == "" goto help - -%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% -goto end - -:help -%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% - -:end -popd diff --git a/docs/source/conf.py b/docs/source/conf.py deleted file mode 100644 index 445d0a3..0000000 --- a/docs/source/conf.py +++ /dev/null @@ -1,32 +0,0 @@ -import os -import sys - -sys.path.insert(0, os.path.abspath("..")) - -# Configuration file for the Sphinx documentation builder. -# -# For the full list of built-in configuration values, see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Project information ----------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information - -project = "Prism-Python" -copyright = "2023, Mark Greynolds" -author = "Mark Greynolds" -release = "0.1" - -# -- General configuration --------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration - -extensions = ["sphinx_click"] - -templates_path = ["_templates"] -exclude_patterns = ["sphinx.ext.autodoc", "sphinx.ext.autosummary", "sphinx_click"] - - -# -- Options for HTML output ------------------------------------------------- -# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output - -html_theme = "alabaster" -html_static_path = ["_static"] diff --git a/docs/source/index.rst b/docs/source/index.rst deleted file mode 100644 index a0bf9f9..0000000 --- a/docs/source/index.rst +++ /dev/null @@ -1,23 +0,0 @@ -.. Prism-Python documentation master file, created by - sphinx-quickstart on Fri Oct 13 08:31:20 2023. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Welcome to Prism-Python's documentation! -======================================== - -.. toctree:: - :maxdepth: 2 - :caption: Contents: - -.. click:: prism:cli - :prog: prism-python - :nested: full - - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/docs/source/modules.rst b/docs/source/modules.rst deleted file mode 100644 index 032735b..0000000 --- a/docs/source/modules.rst +++ /dev/null @@ -1,7 +0,0 @@ -prism -===== - -.. toctree:: - :maxdepth: 4 - - prism diff --git a/docs/source/prism.rst b/docs/source/prism.rst deleted file mode 100644 index 6abdf80..0000000 --- a/docs/source/prism.rst +++ /dev/null @@ -1,29 +0,0 @@ -prism package -============= - -Submodules ----------- - -prism.cli module ----------------- - -.. automodule:: prism.cli - :members: - :undoc-members: - :show-inheritance: - -prism.prism module ------------------- - -.. automodule:: prism.prism - :members: - :undoc-members: - :show-inheritance: - -Module contents ---------------- - -.. automodule:: prism - :members: - :undoc-members: - :show-inheritance: From 7b3557783cf92a2fba00ab423372eb2b553796ff Mon Sep 17 00:00:00 2001 From: CurtLH Date: Fri, 3 Nov 2023 18:32:45 -0700 Subject: [PATCH 097/103] Simplify README --- README.md | 386 ++++++------------------------------------------------ 1 file changed, 42 insertions(+), 344 deletions(-) diff --git a/README.md b/README.md index 1d02210..fc71cdf 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,6 @@ a software program that uses the different REST endpoints to programmatically create Prism Analytics tables and datasets and modify data in them. -The Python client library and CLI work together to provide no-code/low-code -access to the Workday Prism Analytics REST API. - The Python **client library** is a REST API wrapper managing the HTTP methods, URL endpoints and the data required by specific Workday Prism Analytics API REST operations. Using this client library in Python projects simplifies interactions @@ -24,292 +21,49 @@ The **CLI** is a powerful tool for interacting with a Workday Prism Analytics REST API client library, allowing you to quickly and easily perform Workday Prism Analytics tasks from any command line. -## Workday Prism Analytics REST API Requirements - -Workday Prism Analytics REST APIs use OAuth authentication and the Workday -configurable security model to authorize Workday Prism Analytics operations -in end-user applications. The Workday Prism REST APIs act on behalf of -a Workday user using the client. The user's security profile affects the -REST API access to Workday resources. - -The Prism client library, and by extension the CLI, require API Client -credentials setup in the target Workday tenant. The API Client credentials -authorize programmatic access to the Workday tenant and provides the identity -of the Workday user to enforce security for all operations. - -### [Register a Workday Prism Analytics API Client.](https://doc.workday.com/admin-guide/en-us/workday-studio/integration-design/common-components/the-prismanalytics-subassembly/tzr1533120600898.html) ### - -In the target Workday Prism enabled tenant, register an integrations API client -with Prism Analytics as its scope (task _Register API Client for Integrations_) to -create the Client ID and Client Secret values allowing REST API access to the tenant. - -![Register API](https://workday-prism-python.s3.amazonaws.com/Prism-Python-RegisterAPI.png) - -After clicking the OK button, the confirmation screen shows the -two important REST API credentials: **Client ID** and **Client Secret**. - -![Client ID](https://workday-prism-python.s3.amazonaws.com/Prism-Python-Secret.png) - -**Record the secret value** for use with the Prism-Python client library. - -> **Note**: Workday **never** shows the secret value again after clicking the Done button. - -> **Note**: As a Workday best practice, try to minimize the number -> of unique API Clients since, for auditing reasons, they cannot be removed. - -> **Note**: If the client secret is ever lost or compromised, a new secret -> can be generated. However, a new secret invalidates any application -> using the old secret. - -> **Note**: Protect the Client ID and Client Secret values the same way as -> any password. - -### [Create Refresh Token](https://doc.workday.com/reader/J1YvI9CYZUWl1U7_PSHyHA/L_RKkfJI6bKu1M2~_mfesQ) ### - -Creating a Refresh Token assigns a Workday user identity to an API Client to authorize -access to Workday Prism Analytics tables and data change tasks. There can be many -refresh tokens for different Workday user associated with a single API Client. - -From the _View API Clients_ task, on the API Clients for Integration tab, take the related -action to Manage Refresh Tokens for Integrations. - -![View API for Client Integrations](https://workday-prism-python.s3.amazonaws.com/ViewApiClients.png) - -Refresh tokens always identify a Workday user. In this example, -the Refresh Token is for Logan Mcneil (lmcneil) and her security -groups and policies are applied to every REST API operation. - -![Manage Refresh Token](https://workday-prism-python.s3.amazonaws.com/ManageRefreshToken.png) - -After clicking the OK button, copy the Refresh Token. - -![Refresh Token](https://workday-prism-python.s3.amazonaws.com/RefreshToken.png) - -> **Note**: Refresh Tokens can be created, re-generated, and removed as often as -> necessary to identify the users allowed to use this API Client for Integration -> end point. - -### [Obtain the Workday REST API Endpoint.](https://doc.workday.com/reader/J1YvI9CYZUWl1U7_PSHyHA/L_RKkfJI6bKu1M2~_mfesQ) ### - -In Workday, obtain the Workday REST API base URL endpoint that the Prism class requires -as a parameter. From the View API Client report, locate the base_url and tenant_name values. - -![Base URL and Tenant Name](https://workday-prism-python.s3.amazonaws.com/URL-tenant.png) - -### Configuration Summary ### - -Before configuring the CLI or using the Prism client library, ensure you have the following values: - -- Base URL -- Tenant Name -- Client ID -- Client Secret -- Refresh Token -- Log File Name (optional) -- Log Level (optional) - -## Python Prerequisites ## - -First determine whether you have an up-to-date versions of Python, -pip, and Git. If you need to install Python and Git, please refer -to the official installation guides. - -### Python ### -Python comes preinstalled on most Linux distributions, and is available -as a package on all others. You can check which version of Python (if any) -is installed, by entering the following command in a terminal or command window: - -```bash -[user@host ~]$ python --version -Python 3.11.1 -``` - -or - -```bash -[user@host ~]$ python3 --version -Python 3.9.16 -``` - -### Pip ## - -**pip** is the preferred installer program. Starting with Python 3.4, it is included -by default with the Python binary installers. You can check if pip is already installed -and up-to-date by entering the following command: - -```bash -[user@host ~]$ pip --version -pip 23.3.1 from //python3.11/site-packages/pip (python 3.11) -``` - -or - -```bash -[user@host ~]$ pip3 --version -pip 23.3.1 from //python3.9/site-packages/pip (python 3.9) -``` - -### Git Installation ### - -The installation instructions below use the Git client to retrieve -and install the Prism-Python package automatically. You can also retrieve this -package using your Git preferred method. - -Before installing Git, you should first determine whether you already have it -installed by running the following git command: - -```bash -[user@host ~]$ git --version -git version 2.40.1 -``` - -## Prism-Python Install ## +## Install To automatically retrieve and install the latest version of this package directly GitHub, use the following command: ```bash -pip install git+https://github.com/Workday/prism-python.git +$ pip install git+https://github.com/Workday/prism-python.git ``` It is also possible to install a specific tagged release with: ```bash -pip install git+https://github.com/Workday/prism-python.git@0.2.0 -``` - -When either of these installations commands complete, the **prism** command -is available in your shell and provides access to the CLI. For -example, after installation the following command returns -help on the available commands: - -```bash -prism --list -``` - -## Prism-Python Configuration ## - -The Python client library and CLI require the security credentials from the target -Workday Prism-enabled tenant, as well as other operational options. For the Python -client library, these options must be supplied when the Python client library object -is created. - -``` -import prism - -pClientLib = prism.Prism( - base_url=, - tenant_name=, - client_id=, - client_secret=, - refrest_token= -) -``` - -For the command line, the options can be set in three ways: - -| Configuration | Description | -|-----------------------|----------------------------------------------------------------------| -| Command Line | Specified for each CLI operation. | -| Environment Variables | Set in the operating system environment and used for CLI operations. | -| Configuration File | One or more configurations stored in a file. | - -When multiple configurations are available, i.e., specified on the command line -and as environment variables, and in a configuration file the first instance of -an option is used, e.g., command line used before environment variables and environment -variables are used before configuration file values. - -The following configuration options should be available: - -| Configuration | Description | -|---------------|-----------------------------------------------------------------------------------------------------------------| -| base_url | The service endpoint for the Workday tenant. | -| tenant_name | The tenant_name available at the service endpoint. | -| client_id | The API Client for Integration ID created using the _Register API Client for Integration_ task. | -| client_secret | The API Client for Integration Secret created using the _Register API Client for Integration_ task. | -| refresh_token | The Refresh Token for a Workday user created with the _Maintain Refresh Tokens for Integration_ related action. | -| config_file | The name of a file containing configuration options. The default name is prism.ini. | -| config_name | The name of a configuration section in the config_file. The [default] section is used if not specified. | -| log_file | The name of a log file to capture information about the operation of the client library and CLI. | -| log_level | The output logging level, the default is INFO. To see more information, set the value to DEBUG. | - -### Using Command line options ### - -Command line options are always used regardless of other configurations and should appear **before** -the CLI command. - - ```bash -prism --base_url= \ - --tenant_name \ - --client_id \ - --client_secret \ - --refresh_token \ - tables get -``` - -### Using Environment variables ### - -Set these options using operating specific commands. For example, the following commands -set the environment variables in a Bash shell: - -```bash -export workday_base_url= -export workday_tenant_name= -export prism_client_id= -export prism_client_secret= -export prism_refresh_token= -export prism_log_file= -export prism_log_level=INFO - -prism tables get +$ pip install git+https://github.com/Workday/prism-python.git@0.2.0 ``` -### Using a Configuration file ### +## Requirements -The CLI automatically looks for the file ``prism.ini`` in the current directory, and if found -reads configuration options from a section, by default the **[default]** section. Use the -``--config_name`` option to select a configuration other than **[default]**. - -> **NOTE**: The client secrets and refresh tokens are the same as passwords and should be protected. +Workday Prism Analytics REST APIs use OAuth authentication and the Workday +configurable security model to authorize Workday Prism Analytics operations +in end-user applications. The Workday Prism REST APIs act on behalf of +a Workday user using the client. The user's security profile affects the +REST API access to Workday resources. -```ini -[default] -workday_base_url = https:// -workday_tenant_name = -prism_client_id = MTFxx...MGI3ZWYx -prism_client_secret = cxxxx...vsanmq -prism_refresh_token = weyyyyy...boakc -prism_log_level = INFO +The Prism client library, and by extension the CLI, require API Client +credentials setup in the target Workday tenant. The API Client credentials +authorize programmatic access to the Workday tenant and provides the identity +of the Workday user to enforce security for all operations. -[integration] -workday_base_url = https:// -workday_tenant_name = -prism_client_id = NTFmx...MmZlYmUy -prism_client_secret = qnnnn...3f4kkmi -prism_refresh_token = jtqqqq...qfcm9s5 -prism_log_level = INFO -``` +1. [Register a Workday Prism Analytics API Client.](https://doc.workday.com/admin-guide/en-us/workday-studio/integration-design/common-components/the-prismanalytics-subassembly/tzr1533120600898.html) +2. [Create Refresh Token](https://doc.workday.com/reader/J1YvI9CYZUWl1U7_PSHyHA/L_RKkfJI6bKu1M2~_mfesQ) +3. [Obtain the Workday REST API Endpoint.](https://doc.workday.com/reader/J1YvI9CYZUWl1U7_PSHyHA/L_RKkfJI6bKu1M2~_mfesQ) -```bash -prism --config_file myconfig.ini \ - --config_name integration \ - tables get -``` -## Python client library example +## Python Example -### Create a new Prism table -The following Python script uses the Prism-Python client library to create -a new Workday Prism Analytics Table and load the contents of a delimited -and compressed CSV file (.csv.gz). +### Create a new table with Prism API Version 3 -``` +```{python} import os import prism -# STEP 1 - Initialize the Prism-Python client library using -# environment variables. -pClientLib = prism.Prism( +# initialize the prism class with your credentials +p = prism.Prism( os.getenv("workday_base_url"), os.getenv("workday_tenant_name"), os.getenv("prism_client_id"), @@ -317,103 +71,47 @@ pClientLib = prism.Prism( os.getenv("prism_refresh_token") ) -# STEP 2 - Create a new table using the definition of fields provided -# by the schema.json file. +# create a new table based on the schema.json file table = prism.tables_create( - p=pClientLib, + p, table_name="my_new_table", file="/path/to/schema.json" ) -# Print JSON response body describing the new table. +# print JSON response body describing the new table. print(table) +``` -# STEP 3 - Use the convenience function prism.upload_file() to upload -# a local file to the table. Notice the "operation" is Insert -# for the first load. +### Manage data in an existing table with Prism API Version 3 +Table Operations Available: `TruncateandInsert`, `Insert`, `Update`, `Upsert`, +`Delete`. + +```{python} prism.upload_file( - p=pClientLib, + p, file="/path/to/data.csv.gz", table_id=table["id"], operation="Insert" ) ``` -### Manage data in an existing table - -The Workday Prism REST API provides multiple operations for adding, -updating and removing data from a Workday Prism Analytics table. One of -the following table operations must be specified for a loading operation. - -- **Insert**: Workday keeps any existing data in the target table -and adds new data from the source. -- **TruncateAndInsert**: Workday deletes all existing data -in the target table and replaces it with data from the source. -- **Delete**: Workday deletes data from the target table -based external ID data from the source. -- **Update**: Workday updates only existing data in the -target table based on data from the source. All matching rows, -based on the external ID value, are updated. -- **Upsert**: Workday inserts new data from the source if it -doesn't exist in the target table, and updates existing data -using the external ID value from the source to locate matching -rows. - -When using a `Delete`, `Update`, or `Upsert` operation, the source data -must contain an ``externalId`` attribute matching the ``externalId`` -defined in the target table, i.e., a primary key value. - -``` -# STEP 4 - Use the prism.upload convenience function -# to truncate the existing data and load new data -# from two CSV files. -prism.upload_file( - pClientLib, - ["/path/to/newdata-1.csv", "/path/to/newdata-2.csv"], - table["id"], - operation="TruncateAndInsert" -) -``` - -Note: the Workday Prism Analytics REST API only accepts delimited -and gzip compressed (.csv.gz) files. The ``upload`` convenience -function automatically performs the gzip operation. - ## CLI Example -The command line interface (CLI) is a no-code way to interact with -the Workday Prism Analytics REST API. The CLI expects tenant and credential -options to be passed on the command line, stored as environment variables, -or stored in a configuration file (see Configuration section above). - -For the following examples, a ``prism.ini`` exists in the current -working directory with a ``[default]`` section. - ```bash -# Get help with the CLI. -[ user@host]$ prism --help - -# Get help for the tables command. -[ user@host]$ prism tables --help +# get help with the CLI +$ prism --help -# Use the Workday Prism Analytics REST API GET:/tables endpoint -# to list Prism tables you have access to. -[ user@host]$ prism tables get +# get help for the tables command +$ prism tables --help -# Create a new Prism Table using the Workday Prism Analytics -# REST API POST:/tables endpoint -[ user@host]$ prism tables create my_new_table /path/to/schema.json +# list Prism tables you have access to. +$ prism tables get -# Upload data to the new table using the ID value - the default -# table operation is "TruncateAndInsert" -[ user@host]$ prism tables upload 83dd72bd7b911000ca2d790e719a0000 /path/to/file1.csv.gz +# create a new Prism table +$ prism tables create my_new_table /path/to/schema.json -# Upload multiple CSV files to a Prism API table. Notice the --isName (-n) -# option tells the CLI to lookup the table id. -[ user@host]$ prism tables upload \ - -operation Insert \ - -isName my_new_table \ - /path/to/*.csv +# upload data to the new table +$ prism tables upload 83dd72bd7b911000ca2d790e719a0000 /path/to/file1.csv.gz ``` ## Bugs @@ -423,4 +121,4 @@ and create a pull request (PR). We welcome all changes, big or small, and we will help you make the PR if you are new to `git`. ## License -Released under the Apache-2.0 license (see [LICENSE](LICENSE)) +Released under the Apache-2.0 license (see [LICENSE](https://github.com/Workday/prism-python/blob/master/LICENSE)) From b37628593db904d3652409c8d94a067eeb05c8d6 Mon Sep 17 00:00:00 2001 From: CurtLH Date: Fri, 3 Nov 2023 18:37:37 -0700 Subject: [PATCH 098/103] Add authors to setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index abae36a..1691b59 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ name="prism", version=versioneer.get_version(), description="Python API client to load data into Prism.", - author="Curtis Hampton", + author="Curtis Hampton, Mark Waldron, Jacinta Corbett, Mark Greynolds", author_email="CurtLHampton@gmail.com", url="https://github.com/Workday/prism-python", packages=["prism", "prism.commands"], From f070283cfbb1c0bd91cf860e3052342b39534736 Mon Sep 17 00:00:00 2001 From: CurtLH Date: Sun, 5 Nov 2023 18:05:50 -0800 Subject: [PATCH 099/103] Remove examples --- examples/1_extract_data.py | 25 --------- examples/2_predict_topics.py | 94 -------------------------------- examples/3_upload_predictions.py | 20 ------- examples/README.md | 23 -------- examples/environment.yml | 13 ----- examples/schema.json | 41 -------------- 6 files changed, 216 deletions(-) delete mode 100644 examples/1_extract_data.py delete mode 100644 examples/2_predict_topics.py delete mode 100644 examples/3_upload_predictions.py delete mode 100644 examples/README.md delete mode 100644 examples/environment.yml delete mode 100644 examples/schema.json diff --git a/examples/1_extract_data.py b/examples/1_extract_data.py deleted file mode 100644 index 30abfee..0000000 --- a/examples/1_extract_data.py +++ /dev/null @@ -1,25 +0,0 @@ -import csv -import logging -import os -import requests - -# configure logging -logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") - -# download Workday data from RaaS -r = requests.get( - os.getenv("workday_raas_url"), - auth=(os.getenv("workday_username"), os.getenv("workday_password")), -) - -# f the request was successful, write data to CSV file -if r.status_code == 200: - data = r.json()["Report_Entry"] - fname = "survey_responses.csv" - with open(fname, "w") as f: - writer = csv.DictWriter(f, data[0].keys()) - writer.writeheader() - writer.writerows(data) - logging.info(f"{fname} created") -else: - logging.warning(f"Request not successful ({r.status_code})") diff --git a/examples/2_predict_topics.py b/examples/2_predict_topics.py deleted file mode 100644 index 4b69415..0000000 --- a/examples/2_predict_topics.py +++ /dev/null @@ -1,94 +0,0 @@ -import csv -import en_core_web_sm -import gensim -import gzip -import logging -from nltk.corpus import stopwords - -# configure logging -logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") - - -class clean_document: - def __init__(self, input_string, stopwords, nlp): - self.input_string = input_string - self.string_lower = self.lower_string() - self.tokens = self.tokenizer() - self.tokens_no_stopwords = self.remove_stopwords(stopwords) - self.annotated = self.annotate(nlp) - - def lower_string(self): - string_lower = self.input_string.lower() - return string_lower - - def tokenizer(self): - tokens = gensim.utils.simple_preprocess(self.string_lower, deacc=False) - return tokens - - def remove_stopwords(self, stopwords): - no_stopwords = [line for line in self.tokens if line not in stopwords] - return no_stopwords - - def annotate(self, nlp): - doc = nlp(" ".join(self.tokens_no_stopwords)) - new = [token.lemma_ for token in doc if token.pos_ in ["NOUN", "VERB", "ADJ"]] - return new - - -# load the spaCy model -nlp = en_core_web_sm.load() -logging.info("Trained pipeline loaded") - -# load stopwords -stopwords = set(stopwords.words("english")) -logging.info("Stopwords loaded") - -# load responses into a dict -responses = {} -with open("survey_responses.csv") as f: - for line in list(csv.DictReader(f)): - responses[line["Respondent WID"]] = {"answer": line["Questionnaire Answer"]} -logging.info(f"{len(responses)} survey responses loaded") - -# clean and normalize the survey responses -for wid in responses.keys(): - x = clean_document(responses[wid]["answer"], stopwords, nlp) - responses[wid]["clean"] = x.annotated -logging.info("Survey responses cleaned and normalized") - -# load cleaned comments into a dictionary -id2word = gensim.corpora.Dictionary([responses[wid]["clean"] for wid in responses.keys()]) -logging.info("Cleaned responses converted into a Gensim dictionary") - -# convert the cleaned documents into a bag-of-words -corpus = [id2word.doc2bow(responses[wid]["clean"]) for wid in responses.keys()] -logging.info("Gensim dictionary converted into a corpus") - -# fit LDA model to corpus -model = gensim.models.ldamodel.LdaModel( - corpus=corpus, - num_topics=3, - id2word=id2word, - random_state=42, - chunksize=200, - iterations=41, - passes=16, -) -logging.info("LDA topic model fit to corpus") - -# predict topic for each comment -predictions = [] -for wid, text, vec in zip(responses.keys(), [responses[wid]["answer"] for wid in responses.keys()], corpus): - pred = model[vec] - stats = {f"Topic {line[0]+1}": line[1] for line in pred} - row = {"wid": wid, "topic": max(stats, key=stats.get), "topic_score": round(stats[max(stats, key=stats.get)], 4)} - predictions.append(row) -logging.info("Topics predicted for survey resposnes") - -# write predictions to a compressed file -fname = "predictions.csv.gz" -with gzip.open(fname, "wt") as f: - writer = csv.DictWriter(f, predictions[0].keys()) - writer.writeheader() - writer.writerows(predictions) -logging.info(f"{fname} created") diff --git a/examples/3_upload_predictions.py b/examples/3_upload_predictions.py deleted file mode 100644 index 3a8b894..0000000 --- a/examples/3_upload_predictions.py +++ /dev/null @@ -1,20 +0,0 @@ -import os -import prism - -# instantiate the Prism class -p = prism.Prism( - os.getenv("workday_base_url"), - os.getenv("workday_tenant_name"), - os.getenv("prism_client_id"), - os.getenv("prism_client_secret"), - os.getenv("prism_refresh_token"), -) - -# load schema for new table -schema = prism.load_schema("schema.json") - -# create the table in Prism -table = prism.create_table(p, "Topic_Model_Predictions_BDS", schema["fields"]) - -# upload the file to the table -prism.upload_file(p, "predictions.csv.gz", table["id"], operation="TruncateandInsert") diff --git a/examples/README.md b/examples/README.md deleted file mode 100644 index 5c19e7f..0000000 --- a/examples/README.md +++ /dev/null @@ -1,23 +0,0 @@ -# Workday Data Science Workflow - -The goal of this project is to demonstrate use of the Workday Data Science Workflow. This workflow includes 3 steps: - -1. Extract data from Workday using Report as a Service (RaaS) -2. Enrich your data using your desired Data Science tools -3. Push enriched data back into Workday via Prism API - -## Example Use Case - -This example demonstrates how to obtain survey responses via Workday RaaS, apply an Latent Dirichlet Allocation (LDA) topic model to the open-ended responses, and upload the predicted topics to Prism. This is meant to be a generic example of the workflow and should serve as inspiration of one way to integrate machine learning with Workday. - -## Prism Python Package - -To upload your dataset to Prism, we recommend using the Python package `prism`. This package makes it easy to programatically interact with the Prism API. To learn more about the Prism Python package, refer to the [package repository on GitHub](https://github.com/Workday/prism-python). - -To install the latest version of the Prism package: - -``` -pip install git+git://github.com/Workday/prism-python.git -``` - -> Note: when you install an additional package in Google Colab using this method, you will need to reinstall the package each time you launch a new session. diff --git a/examples/environment.yml b/examples/environment.yml deleted file mode 100644 index f87d5a6..0000000 --- a/examples/environment.yml +++ /dev/null @@ -1,13 +0,0 @@ -name: wday-dev -channels: - - defaults - - conda-forge -dependencies: - - python=3.8 - - nltk=3.4.4 - - spacy=3.0.3 - - spacy-model-en_core_web_sm=3.0.0 - - gensim - - pip - - pip: - - git+git://github.com/Workday/prism-python.git@0.2.2 diff --git a/examples/schema.json b/examples/schema.json deleted file mode 100644 index bfab781..0000000 --- a/examples/schema.json +++ /dev/null @@ -1,41 +0,0 @@ -{ - "fields": [ - { - "ordinal": 1, - "name": "worker_wid", - "type": { - "id": "Schema_Field_Type=Text" - } - }, - { - "ordinal": 2, - "name": "topic", - "type": { - "id": "Schema_Field_Type=Text" - } - }, - { - "ordinal": 3, - "name": "topic_score", - "precision": 38, - "scale": 4, - "type": { - "id": "Schema_Field_Type=Numeric" - } - } - ], - "parseOptions": { - "charset": { - "id": "Encoding=UTF-8" - }, - "fieldsDelimitedBy": ",", - "fieldsEnclosedBy": "\"", - "headerLinesToIgnore": 1, - "type": { - "id": "Schema_File_Type=Delimited" - } - }, - "schemaVersion": { - "id": "Schema_Version=1.0" - } -} From 684ac8f13b08b9651e2afd5386aba34ecb371a5b Mon Sep 17 00:00:00 2001 From: CurtLH Date: Sun, 5 Nov 2023 18:25:10 -0800 Subject: [PATCH 100/103] Fix typo for docstring of upload_file() --- prism/prism.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prism/prism.py b/prism/prism.py index 63e2092..18faab3 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -1487,7 +1487,7 @@ def resolve_file_list(files): def upload_file(p, file, table_id=None, table_name=None, operation="TruncateAndInsert"): - """Create a new Prism table. + """Upload a file to an existing Prism table Parameters ---------- From 919492a9e85354e18dded9528c11c08e9f17605b Mon Sep 17 00:00:00 2001 From: CurtLH Date: Sun, 5 Nov 2023 19:31:57 -0800 Subject: [PATCH 101/103] Add tables_create() --- prism/__init__.py | 2 ++ prism/prism.py | 74 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/prism/__init__.py b/prism/__init__.py index a0770d5..9682cd5 100644 --- a/prism/__init__.py +++ b/prism/__init__.py @@ -2,6 +2,7 @@ Prism, set_logging, schema_compact, + tables_create, upload_file, load_schema, truncate_table, @@ -16,6 +17,7 @@ "Prism", "set_logging", "schema_compact", + "tables_create", "upload_file", "load_schema", "truncate_table", diff --git a/prism/prism.py b/prism/prism.py index 18faab3..a88e1a6 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -1486,6 +1486,80 @@ def resolve_file_list(files): return target_files +def tables_create(p, table_name=None, display_name=None, enable_for_analysis=True, source_name=None, source_wid=None, file=None): + """Create Prism table + + Parameters + ---------- + p : Prism + Instantiated Prism class from prism.Prism() + + table_name : str + Table name - overrides name from schema + + display_name : str + Specify a display name - defaults to name + + enableForAnalysis : boolean + Enable this table for analytic + + sourceName : str + The API name of an existing table to copy + + sourceWID : str + The WID of an existing table to copy + + file : str + File containing the schema to be used to create the table + + Returns + ------- + If the request is successful, a dictionary containing information about + the table is returned. + """ + + # We can assume a schema was found/built - get_schema sys.exits if there is a problem. + schema = load_schema(p, file, source_name, source_wid) + + # Initialize a new schema with the particulars for this table operation. + if table_name is not None: + + # If we got a name, set it in the table schema + schema["name"] = table_name.replace(" ", "_") # Minor clean-up + + # Force the display name - there cannot be duplicate displayNames + # in the data catalog. + schema["displayName"] = table_name + logger.debug(f'setting table name to {schema["name"]}') + + elif "name" not in schema: + # The schema doesn't have a name and none was given - exit. + # Note: this could be true if we have a schema of only fields. + logger.error("Table --table_name must be specified.") + sys.exit(1) + + if display_name is not None: + # If we got a display name, set it in the schema + schema["displayName"] = display_name + + elif "displayName" not in schema: + # Default the display name to the name if not in the schema. + schema["displayName"] = table_name + logger.debug(f'defaulting displayName to {schema["displayName"]}') + + if enable_for_analysis is not None: + schema["enableForAnalysis"] = enable_for_analysis + + elif "enableForAnalysis" not in schema: + # Default to False - do not enable. + schema["enableForAnalysis"] = False + logger.debug("defaulting enableForAnalysis to False.") + + # create the table + table = p.tables_post(schema) + + return table + def upload_file(p, file, table_id=None, table_name=None, operation="TruncateAndInsert"): """Upload a file to an existing Prism table From 2b55d3448d96ed57331eea4ce43d77b3620c5543 Mon Sep 17 00:00:00 2001 From: CurtLH Date: Sun, 5 Nov 2023 19:32:43 -0800 Subject: [PATCH 102/103] Update README example to install specific version --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fc71cdf..24ea0b7 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ $ pip install git+https://github.com/Workday/prism-python.git It is also possible to install a specific tagged release with: ```bash -$ pip install git+https://github.com/Workday/prism-python.git@0.2.0 +$ pip install git+https://github.com/Workday/prism-python.git@0.3.0 ``` ## Requirements From 5ad16f3cf5d9fe2f0fe7d829e4ebe45b48c799b4 Mon Sep 17 00:00:00 2001 From: CurtLH Date: Sun, 5 Nov 2023 19:45:22 -0800 Subject: [PATCH 103/103] Format using Black --- prism/cli.py | 23 ++------- prism/commands/buckets_commands.py | 48 ++++--------------- prism/commands/dataChanges_commands.py | 58 ++++------------------- prism/commands/fileContainers_commands.py | 5 +- prism/commands/tables_commands.py | 53 ++++----------------- prism/prism.py | 36 ++++---------- 6 files changed, 39 insertions(+), 184 deletions(-) diff --git a/prism/cli.py b/prism/cli.py index 3a1fd1f..e13f102 100644 --- a/prism/cli.py +++ b/prism/cli.py @@ -28,18 +28,10 @@ def param_fixup(value, config, config_name, option): @click.group(help="CLI for interacting with Workday’s Prism API") # Tenant specific parameters @click.option( - "--base_url", - envvar="workday_base_url", - type=str, - required=False, - help="The base URL for the API client", + "--base_url", envvar="workday_base_url", type=str, required=False, help="The base URL for the API client", ) @click.option( - "--tenant_name", - envvar="workday_tenant_name", - type=str, - required=False, - help="The name of your Workday tenant", + "--tenant_name", envvar="workday_tenant_name", type=str, required=False, help="The name of your Workday tenant", ) @click.option( "--client_id", @@ -94,16 +86,7 @@ def param_fixup(value, config, config_name, option): ) @click.pass_context def cli( - ctx, - base_url, - tenant_name, - client_id, - client_secret, - refresh_token, - log_level, - log_file, - config_file, - config_name, + ctx, base_url, tenant_name, client_id, client_secret, refresh_token, log_level, log_file, config_file, config_name, ): # Attempt to locate a configuration file - this is not required and config # parameters are only used if the configuration values are not passed on diff --git a/prism/commands/buckets_commands.py b/prism/commands/buckets_commands.py index 1ce3c72..b5721a4 100644 --- a/prism/commands/buckets_commands.py +++ b/prism/commands/buckets_commands.py @@ -8,11 +8,7 @@ @click.command("get") @click.option( - "-n", - "--isName", - is_flag=True, - default=False, - help="Flag to treat the bucket or table argument as a name.", + "-n", "--isName", is_flag=True, default=False, help="Flag to treat the bucket or table argument as a name.", ) @click.option( "-l", @@ -37,12 +33,7 @@ help="How much information to be returned in response JSON.", ) @click.option( - "-s", - "--search", - is_flag=True, - show_default=True, - default=False, - help="Use substring search bucket or table.", + "-s", "--search", is_flag=True, show_default=True, default=False, help="Use substring search bucket or table.", ) @click.option("--table", help="The id or name of a Prism table to list all buckets.") @click.argument("bucket", required=False) @@ -99,11 +90,7 @@ def buckets_get(ctx, bucket, table, isname, limit, offset, type_, search): help="Schema JSON file for the target table.", ) @click.option( - "-o", - "--operation", - default="TruncateAndInsert", - show_default=True, - help="Operation to perform on the table.", + "-o", "--operation", default="TruncateAndInsert", show_default=True, help="Operation to perform on the table.", ) @click.argument("bucket", required=False) @click.pass_context @@ -120,11 +107,7 @@ def buckets_create(ctx, target_name, target_id, file, operation, bucket): sys.exit(1) bucket = p.buckets_create( - bucket_name=bucket, - target_id=target_id, - target_name=target_name, - schema=file, - operation=operation, + bucket_name=bucket, target_id=target_id, target_name=target_name, schema=file, operation=operation, ) if bucket is not None: @@ -136,19 +119,12 @@ def buckets_create(ctx, target_name, target_id, file, operation, bucket): @click.command("files") @click.option( - "-n", - "--target_name", - default=None, - help="Name of the table to associate with the bucket.", + "-n", "--target_name", default=None, help="Name of the table to associate with the bucket.", ) @click.option("-i", "--target_id", default=None, help="Table ID to associate with the table.") @click.option("-f", "--file", default=None, help="Schema JSON file for the target table.") @click.option( - "-o", - "--operation", - default="TruncateAndInsert", - show_default=True, - help="Operation to perform on the table.", + "-o", "--operation", default="TruncateAndInsert", show_default=True, help="Operation to perform on the table.", ) @click.option("-b", "--bucket", help="Bucket name to load files.", default=None) @click.option( @@ -189,11 +165,7 @@ def buckets_files(ctx, target_name, target_id, file, operation, bucket, complete @click.command("complete") @click.option( - "-n", - "--isName", - is_flag=True, - default=False, - help="Flag to treat the bucket argument as a name.", + "-n", "--isName", is_flag=True, default=False, help="Flag to treat the bucket argument as a name.", ) @click.argument("bucket", required=True) @click.pass_context @@ -230,11 +202,7 @@ def buckets_complete(ctx, isname, bucket): @click.command("errorFile") @click.option( - "-n", - "--isName", - is_flag=True, - default=False, - help="Flag to treat the bucket argument as a name.", + "-n", "--isName", is_flag=True, default=False, help="Flag to treat the bucket argument as a name.", ) @click.argument("bucket", required=True) @click.pass_context diff --git a/prism/commands/dataChanges_commands.py b/prism/commands/dataChanges_commands.py index 63b8892..7bb0b5a 100644 --- a/prism/commands/dataChanges_commands.py +++ b/prism/commands/dataChanges_commands.py @@ -9,23 +9,13 @@ @click.command("get") @click.option( - "-n", - "--isName", - default=False, - is_flag=True, - help="Flag to treat the dct argument as a name.", + "-n", "--isName", default=False, is_flag=True, help="Flag to treat the dct argument as a name.", ) @click.option( - "-l", - "--limit", - default=-1, - help="The maximum number of object data entries included in the response.", + "-l", "--limit", default=-1, help="The maximum number of object data entries included in the response.", ) @click.option( - "-o", - "--offset", - default=0, - help="The offset to the first object in a collection to include in the response.", + "-o", "--offset", default=0, help="The offset to the first object in a collection to include in the response.", ) @click.option( "-t", @@ -75,11 +65,7 @@ def dataChanges_get(ctx, isname, dct, limit, offset, type_, search): @click.command("validate") @click.option( - "-n", - "--isName", - default=False, - is_flag=True, - help="Flag to treat the dct argument as a name.", + "-n", "--isName", default=False, is_flag=True, help="Flag to treat the dct argument as a name.", ) @click.option("-s", "--search", is_flag=True, help="Use contains search substring for --name.") @click.argument("dct", required=True) @@ -120,11 +106,7 @@ def dataChanges_validate(ctx, isname, dct, search): @click.command("run") @click.option( - "-n", - "--isName", - default=False, - is_flag=True, - help="Flag to treat the dct argument as a name.", + "-n", "--isName", default=False, is_flag=True, help="Flag to treat the dct argument as a name.", ) @click.argument("dct", required=True) @click.argument("fid", required=False) @@ -164,18 +146,10 @@ def dataChanges_run(ctx, dct, fid, isname): @click.command("activities") @click.option( - "-n", - "--isName", - default=False, - is_flag=True, - help="Flag to treat the dct argument as a name.", + "-n", "--isName", default=False, is_flag=True, help="Flag to treat the dct argument as a name.", ) @click.option( - "-s", - "--status", - is_flag=True, - default=False, - help="Return only the status of the activity.", + "-s", "--status", is_flag=True, default=False, help="Return only the status of the activity.", ) @click.argument("dct", required=True) @click.argument("activityID", required=True) @@ -216,25 +190,13 @@ def dataChanges_activities(ctx, dct, activityid, status, isname): @click.command("upload") @click.option( - "-n", - "--isName", - default=False, - is_flag=True, - help="Flag to treat the dct argument as a name.", + "-n", "--isName", default=False, is_flag=True, help="Flag to treat the dct argument as a name.", ) @click.option( - "-w", - "--wait", - default=False, - is_flag=True, - help="Wait for the data change task to complete.", + "-w", "--wait", default=False, is_flag=True, help="Wait for the data change task to complete.", ) @click.option( - "-v", - "--verbose", - default=False, - is_flag=True, - help="Display additional information.", + "-v", "--verbose", default=False, is_flag=True, help="Display additional information.", ) @click.argument("dct", required=True) @click.argument("file", required=True, nargs=-1, type=click.Path(exists=True)) diff --git a/prism/commands/fileContainers_commands.py b/prism/commands/fileContainers_commands.py index 775ac03..e217e6b 100644 --- a/prism/commands/fileContainers_commands.py +++ b/prism/commands/fileContainers_commands.py @@ -40,10 +40,7 @@ def fileContainers_get(ctx, id): @click.command("load") @click.option( - "-i", - "--id", - default=None, - help="Target File container ID - defaults to a new container.", + "-i", "--id", default=None, help="Target File container ID - defaults to a new container.", ) @click.argument("file", nargs=-1, type=click.Path(exists=True)) @click.pass_context diff --git a/prism/commands/tables_commands.py b/prism/commands/tables_commands.py index a9f2523..66b08e2 100644 --- a/prism/commands/tables_commands.py +++ b/prism/commands/tables_commands.py @@ -10,11 +10,7 @@ @click.command("get") @click.option( - "-n", - "--isName", - is_flag=True, - default=False, - help="Flag to treat the table argument as a name.", + "-n", "--isName", is_flag=True, default=False, help="Flag to treat the table argument as a name.", ) @click.option( "-l", @@ -39,17 +35,10 @@ help="How much information returned for each table.", ) @click.option( - "-c", - "--compact", - is_flag=True, - default=False, - help="Compact the table schema for use in edit (put) operations.", + "-c", "--compact", is_flag=True, default=False, help="Compact the table schema for use in edit (put) operations.", ) @click.option( - "-s", - "--search", - is_flag=True, - help="Enable substring search of NAME in api name or display name.", + "-s", "--search", is_flag=True, help="Enable substring search of NAME in api name or display name.", ) @click.argument("table", required=False) @click.pass_context @@ -97,12 +86,7 @@ def tables_get(ctx, isname, table, limit, offset, type_, compact, search): @click.option("-n", "--table_name", help="Table name - overrides name from schema.") @click.option("-d", "--displayName", help="Specify a display name - defaults to name.") @click.option( - "-e", - "--enableForAnalysis", - type=bool, - is_flag=True, - default=None, - help="Enable this table for analytics.", + "-e", "--enableForAnalysis", type=bool, is_flag=True, default=None, help="Enable this table for analytics.", ) @click.option("-s", "--sourceName", help="The API name of an existing table to copy.") @click.option("-w", "--sourceWID", help="The WID of an existing table to copy.") @@ -164,11 +148,7 @@ def tables_create(ctx, table_name, displayname, enableforanalysis, sourcename, s @click.command("edit") @click.option( - "-t", - "--truncate", - is_flag=True, - default=False, - help="Truncate the table before updating.", + "-t", "--truncate", is_flag=True, default=False, help="Truncate the table before updating.", ) @click.argument("file", required=True, type=click.Path(exists=True, dir_okay=False, readable=True)) @click.pass_context @@ -194,11 +174,7 @@ def tables_edit(ctx, file, truncate): @click.command("patch") @click.option( - "-n", - "--isName", - is_flag=True, - default=False, - help="Flag to treat the table argument as a name.", + "-n", "--isName", is_flag=True, default=False, help="Flag to treat the table argument as a name.", ) @click.option( "--displayName", @@ -222,10 +198,7 @@ def tables_edit(ctx, file, truncate): help="Set the documentation for an existing table.", ) @click.option( - "--enableForAnalysis", - is_flag=False, - default=None, - type=click.Choice(["true", "false"], case_sensitive=False), + "--enableForAnalysis", is_flag=False, default=None, type=click.Choice(["true", "false"], case_sensitive=False), ) @click.argument("table", required=True, type=str) @click.argument("file", required=False, type=click.Path(dir_okay=False)) @@ -334,11 +307,7 @@ def set_patch_value(attr, value): @click.command("upload") @click.option( - "-n", - "--isName", - is_flag=True, - default=False, - help="Flag to treat the table argument as a name.", + "-n", "--isName", is_flag=True, default=False, help="Flag to treat the table argument as a name.", ) @click.option( "-o", @@ -375,11 +344,7 @@ def tables_upload(ctx, table, isname, operation, file): @click.command("truncate") @click.option( - "-n", - "--isName", - is_flag=True, - default=False, - help="Flag to treat the table argument as a name.", + "-n", "--isName", is_flag=True, default=False, help="Flag to treat the table argument as a name.", ) @click.argument("table", required=True) @click.pass_context diff --git a/prism/prism.py b/prism/prism.py index a88e1a6..60a3fc9 100644 --- a/prism/prism.py +++ b/prism/prism.py @@ -228,13 +228,7 @@ class Prism: """ def __init__( - self, - base_url, - tenant_name, - client_id, - client_secret, - refresh_token, - version="v3", + self, base_url, tenant_name, client_id, client_secret, refresh_token, version="v3", ): """Init the Prism class with required attributes.""" @@ -441,13 +435,7 @@ def reset_bearer_token(self): self.bearer_token_timestamp = None def tables_get( - self, - table_name=None, - table_id=None, - limit=None, - offset=None, - type_="summary", - search=False, + self, table_name=None, table_id=None, limit=None, offset=None, type_="summary", search=False, ): """Obtain details for all tables or a given table(s). @@ -836,12 +824,7 @@ def buckets_get( return return_buckets def buckets_create( - self, - bucket_name=None, - target_name=None, - target_id=None, - schema=None, - operation="TruncateAndInsert", + self, bucket_name=None, target_name=None, target_id=None, schema=None, operation="TruncateAndInsert", ): """Create a Prism bucket to upload files. @@ -1089,13 +1072,7 @@ def buckets_errorFile(self, bucket_id): return None def dataChanges_get( - self, - datachange_name=None, - datachange_id=None, - limit=None, - offset=None, - type_="summary", - search=False, + self, datachange_name=None, datachange_id=None, limit=None, offset=None, type_="summary", search=False, ): """ """ # We are doing a dataChanges GET operation. @@ -1486,7 +1463,9 @@ def resolve_file_list(files): return target_files -def tables_create(p, table_name=None, display_name=None, enable_for_analysis=True, source_name=None, source_wid=None, file=None): +def tables_create( + p, table_name=None, display_name=None, enable_for_analysis=True, source_name=None, source_wid=None, file=None +): """Create Prism table Parameters @@ -1560,6 +1539,7 @@ def tables_create(p, table_name=None, display_name=None, enable_for_analysis=Tru return table + def upload_file(p, file, table_id=None, table_name=None, operation="TruncateAndInsert"): """Upload a file to an existing Prism table