From 4fe3c49d8f145d11616f7f12dcecc13d2eca0979 Mon Sep 17 00:00:00 2001 From: Budi Dharmawan <29maret@gmail.com> Date: Thu, 30 May 2019 13:17:04 +0800 Subject: [PATCH 01/10] fix create_dataset to create table inside feast's dataset --- .../BigQueryTraningDatasetCreator.java | 110 ++-- .../examples/quickstart/Quickstart.ipynb | 523 ++++++++++++++++-- sdk/python/feast/sdk/client.py | 4 +- sdk/python/feast/sdk/resources/feature_set.py | 10 +- sdk/python/feast/sdk/utils/bq_util.py | 12 +- .../tests/sdk/resources/test_feature_set.py | 6 +- sdk/python/tests/sdk/test_client.py | 12 +- sdk/python/tests/sdk/utils/test_bq_utils.py | 29 +- 8 files changed, 600 insertions(+), 106 deletions(-) diff --git a/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java b/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java index 5b86928a78..ce240210ae 100644 --- a/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java +++ b/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java @@ -21,16 +21,25 @@ import com.google.cloud.bigquery.BigQueryOptions; import com.google.cloud.bigquery.JobException; import com.google.cloud.bigquery.QueryJobConfiguration; +import com.google.cloud.bigquery.Table; import com.google.cloud.bigquery.TableId; +import com.google.cloud.bigquery.TableInfo; import com.google.common.base.Strings; import com.google.protobuf.Timestamp; import feast.core.DatasetServiceProto.DatasetInfo; import feast.core.DatasetServiceProto.FeatureSet; import feast.core.exception.TrainingDatasetCreationException; +import java.math.BigInteger; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.time.Clock; import java.time.Instant; import java.time.ZoneId; import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; import lombok.extern.slf4j.Slf4j; @Slf4j @@ -85,23 +94,35 @@ public DatasetInfo createDataset( String namePrefix) { try { String query = templater.createQuery(featureSet, startDate, endDate, limit); - String tableName = createBqTableName(startDate, endDate, namePrefix); - String bqDatasetName = createBqDatasetName(featureSet.getEntityName()); - - createBqDatasetIfMissing(bqDatasetName); - - TableId destinationTable = - TableId.of(projectId, createBqDatasetName(featureSet.getEntityName()), tableName); - QueryJobConfiguration queryConfig = - QueryJobConfiguration.newBuilder(query) - .setAllowLargeResults(true) - .setDestinationTable(destinationTable) - .build(); - JobOption jobOption = JobOption.fields(); - bigQuery.query(queryConfig, jobOption); + String tableName = createBqTableName(datasetPrefix, featureSet, startDate, endDate, + namePrefix); + String tableDescription = createBqTableDescription(featureSet, startDate, endDate, query); + + Map options = templater.getStorageSpec().getOptionsMap(); + + TableId destinationTableId = + TableId.of(projectId, options.get("dataset"), tableName); + + if (!bigQuery.getTable(destinationTableId).exists()) { + QueryJobConfiguration queryConfig = + QueryJobConfiguration.newBuilder(query) + .setAllowLargeResults(true) + .setDestinationTable(destinationTableId) + .build(); + JobOption jobOption = JobOption.fields(); + bigQuery.query(queryConfig, jobOption); + + } + + Table destinationTable = bigQuery.getTable(destinationTableId); + TableInfo tableInfo = destinationTable.toBuilder() + .setDescription(tableDescription) + .build(); + bigQuery.update(tableInfo); + return DatasetInfo.newBuilder() - .setName(createTrainingDatasetName(namePrefix, featureSet.getEntityName(), tableName)) - .setTableUrl(toTableUrl(destinationTable)) + .setName(tableName) + .setTableUrl(toTableUrl(destinationTableId)) .build(); } catch (JobException e) { log.error("Failed creating training dataset", e); @@ -112,31 +133,54 @@ public DatasetInfo createDataset( } } - private void createBqDatasetIfMissing(String bqDatasetName) { - if (bigQuery.getDataset(bqDatasetName) != null) { - return; - } + private String createBqTableName(String datasetPrefix, FeatureSet featureSet, Timestamp startDate, + Timestamp endDate, String namePrefix) { - // create dataset - bigQuery.create(com.google.cloud.bigquery.DatasetInfo.of(bqDatasetName)); - } + List features = new ArrayList(featureSet.getFeatureIdsList()); + Collections.sort(features); + + String datasetId = String.format("%s_%s_%s", features, startDate, endDate); + String hashText; + + // create hash from datasetId + try { + MessageDigest md = MessageDigest.getInstance("SHA-1"); + byte[] messageDigest = md.digest(datasetId.getBytes()); + BigInteger no = new BigInteger(1, messageDigest); + hashText = no.toString(16); + while (hashText.length() < 32) { + hashText = "0" + hashText; + } + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException(e); + } - private String createBqTableName(Timestamp startDate, Timestamp endDate, String namePrefix) { - String currentTime = String.valueOf(clock.millis()); if (!Strings.isNullOrEmpty(namePrefix)) { // only alphanumeric and underscore are allowed namePrefix = namePrefix.replaceAll("[^a-zA-Z0-9_]", "_"); return String.format( - "%s_%s_%s_%s", - namePrefix, currentTime, formatTimestamp(startDate), formatTimestamp(endDate)); + "%s_%s_%s_%s", datasetPrefix, featureSet.getEntityName(), namePrefix, hashText); } return String.format( - "%s_%s_%s", currentTime, formatTimestamp(startDate), formatTimestamp(endDate)); + "%s_%s_%s", datasetPrefix, featureSet.getEntityName(), hashText); } - private String createBqDatasetName(String entity) { - return String.format("%s_%s", datasetPrefix, entity); + private String createBqTableDescription(FeatureSet featureSet, Timestamp startDate, Timestamp + endDate, String query) { + String currentTime = Instant.now().toString(); + return new StringBuilder() + .append("Feast Dataset for ") + .append(featureSet.getEntityName()) + .append(" features.\nContains data from ") + .append(formatTimestamp(startDate)) + .append(" to ") + .append(formatTimestamp(endDate)) + .append(".\nLast edited at ") + .append(currentTime) + .append(".\n\n-----\n\n") + .append(query) + .toString(); } private String formatTimestamp(Timestamp timestamp) { @@ -149,10 +193,4 @@ private String toTableUrl(TableId tableId) { "%s.%s.%s", tableId.getProject(), tableId.getDataset(), tableId.getTable()); } - private String createTrainingDatasetName(String namePrefix, String entityName, String tableName) { - if (!Strings.isNullOrEmpty(namePrefix)) { - return tableName; - } - return String.format("%s_%s", entityName, tableName); - } } diff --git a/sdk/python/examples/quickstart/Quickstart.ipynb b/sdk/python/examples/quickstart/Quickstart.ipynb index 6023d78a19..e915983ac5 100644 --- a/sdk/python/examples/quickstart/Quickstart.ipynb +++ b/sdk/python/examples/quickstart/Quickstart.ipynb @@ -2,15 +2,16 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { - "collapsed": true + "pycharm": { + "is_executing": false + } }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", - "\n", "from feast.sdk.resources.entity import Entity\n", "from feast.sdk.resources.storage import Storage\n", "from feast.sdk.resources.feature import Feature, Datastore, ValueType\n", @@ -18,7 +19,9 @@ "import feast.specs.FeatureSpec_pb2 as feature_pb\n", "\n", "from feast.sdk.importer import Importer\n", - "from feast.sdk.client import Client" + "from feast.sdk.client import Client\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\", \"Your application has authenticated using end user credentials\")" ] }, { @@ -30,11 +33,168 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { - "collapsed": true + "pycharm": { + "is_executing": false + } }, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ridepickup_datetimelog_trip_durationdistance_haversinedistance_dummy_manhattandirectionmonthday_of_monthhourday_of_weekvi_1vi_2sf_nsf_y
0id28754212016-03-14 17:24:556.1224931.4985211.73543399.9701963141700110
1id23773942016-06-12 00:43:356.4982821.8055072.430506-117.153768612061010
2id38585292016-01-19 11:35:247.6615276.3850988.203575-159.6801651191110110
3id35046732016-04-06 19:32:316.0637851.4854981.661331-172.737700461920110
4id21810282016-03-26 13:30:556.0776421.1885881.199457179.4735853261350110
\n", + "
" + ], + "text/plain": [ + " ride pickup_datetime log_trip_duration distance_haversine \\\n", + "0 id2875421 2016-03-14 17:24:55 6.122493 1.498521 \n", + "1 id2377394 2016-06-12 00:43:35 6.498282 1.805507 \n", + "2 id3858529 2016-01-19 11:35:24 7.661527 6.385098 \n", + "3 id3504673 2016-04-06 19:32:31 6.063785 1.485498 \n", + "4 id2181028 2016-03-26 13:30:55 6.077642 1.188588 \n", + "\n", + " distance_dummy_manhattan direction month day_of_month hour \\\n", + "0 1.735433 99.970196 3 14 17 \n", + "1 2.430506 -117.153768 6 12 0 \n", + "2 8.203575 -159.680165 1 19 11 \n", + "3 1.661331 -172.737700 4 6 19 \n", + "4 1.199457 179.473585 3 26 13 \n", + "\n", + " day_of_week vi_1 vi_2 sf_n sf_y \n", + "0 0 0 1 1 0 \n", + "1 6 1 0 1 0 \n", + "2 1 0 1 1 0 \n", + "3 2 0 1 1 0 \n", + "4 5 0 1 1 0 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Feature engineering steps \n", "## Referenced from https://www.kaggle.com/karelrv/nyct-from-a-to-z-with-xgboost-tutorial/notebook\n", @@ -61,7 +221,7 @@ " x = np.cos(lat1) * np.sin(lat2) - np.sin(lat1) * np.cos(lat2) * np.cos(lng_delta_rad)\n", " return np.degrees(np.arctan2(y, x))\n", "\n", - "df = pd.read_csv('taxi_small.csv')\n", + "df = pd.read_csv('~/Workspace/feast/sdk/python/examples/quickstart/taxi_small.csv')\n", "df['pickup_datetime'] = pd.to_datetime(df.pickup_datetime)\n", "df['dropoff_datetime'] = pd.to_datetime(df.dropoff_datetime)\n", "df['log_trip_duration'] = np.log(df['trip_duration'].values + 1)\n", @@ -98,25 +258,206 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, + "execution_count": 3, + "metadata": {}, "outputs": [], "source": [ - "FEAST_CORE_URL = 'localhost:6565'\n", - "FEAST_SERVING_URL = 'localhost:6566'\n", - "STAGING_LOCATION = 'gs://feast-bucket/staging'" + "FEAST_CORE_URL = 'localhost:8433'\n", + "FEAST_SERVING_URL = 'feast-serving.sandbox.s.ds.golabs.io:8433'\n", + "STAGING_LOCATION = 'gs://zzz-bubub/'" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": { - "collapsed": true, "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Successfully applied entity with name: ride\n", + "---\n", + "name: ride\n", + "description: nyc taxi dataset\n", + "\n", + "Successfully applied feature with id: ride.log_trip_duration\n", + "---\n", + "id: ride.log_trip_duration\n", + "name: log_trip_duration\n", + "owner: user@website.com\n", + "description: nyc taxi dataset\n", + "valueType: DOUBLE\n", + "entity: ride\n", + "dataStores: {}\n", + "\n", + "Successfully applied feature with id: ride.distance_haversine\n", + "---\n", + "id: ride.distance_haversine\n", + "name: distance_haversine\n", + "owner: user@website.com\n", + "description: nyc taxi dataset\n", + "valueType: DOUBLE\n", + "entity: ride\n", + "dataStores: {}\n", + "\n", + "Successfully applied feature with id: ride.distance_dummy_manhattan\n", + "---\n", + "id: ride.distance_dummy_manhattan\n", + "name: distance_dummy_manhattan\n", + "owner: user@website.com\n", + "description: nyc taxi dataset\n", + "valueType: DOUBLE\n", + "entity: ride\n", + "dataStores: {}\n", + "\n", + "Successfully applied feature with id: ride.direction\n", + "---\n", + "id: ride.direction\n", + "name: direction\n", + "owner: user@website.com\n", + "description: nyc taxi dataset\n", + "valueType: DOUBLE\n", + "entity: ride\n", + "dataStores: {}\n", + "\n", + "Successfully applied feature with id: ride.month\n", + "---\n", + "id: ride.month\n", + "name: month\n", + "owner: user@website.com\n", + "description: nyc taxi dataset\n", + "valueType: INT64\n", + "entity: ride\n", + "dataStores: {}\n", + "\n", + "Successfully applied feature with id: ride.day_of_month\n", + "---\n", + "id: ride.day_of_month\n", + "name: day_of_month\n", + "owner: user@website.com\n", + "description: nyc taxi dataset\n", + "valueType: INT64\n", + "entity: ride\n", + "dataStores: {}\n", + "\n", + "Successfully applied feature with id: ride.hour\n", + "---\n", + "id: ride.hour\n", + "name: hour\n", + "owner: user@website.com\n", + "description: nyc taxi dataset\n", + "valueType: INT64\n", + "entity: ride\n", + "dataStores: {}\n", + "\n", + "Successfully applied feature with id: ride.day_of_week\n", + "---\n", + "id: ride.day_of_week\n", + "name: day_of_week\n", + "owner: user@website.com\n", + "description: nyc taxi dataset\n", + "valueType: INT64\n", + "entity: ride\n", + "dataStores: {}\n", + "\n", + "Successfully applied feature with id: ride.vi_1\n", + "---\n", + "id: ride.vi_1\n", + "name: vi_1\n", + "owner: user@website.com\n", + "description: nyc taxi dataset\n", + "valueType: INT32\n", + "entity: ride\n", + "dataStores: {}\n", + "\n", + "Successfully applied feature with id: ride.vi_2\n", + "---\n", + "id: ride.vi_2\n", + "name: vi_2\n", + "owner: user@website.com\n", + "description: nyc taxi dataset\n", + "valueType: INT32\n", + "entity: ride\n", + "dataStores: {}\n", + "\n", + "Successfully applied feature with id: ride.sf_n\n", + "---\n", + "id: ride.sf_n\n", + "name: sf_n\n", + "owner: user@website.com\n", + "description: nyc taxi dataset\n", + "valueType: INT32\n", + "entity: ride\n", + "dataStores: {}\n", + "\n", + "Successfully applied feature with id: ride.sf_y\n", + "---\n", + "id: ride.sf_y\n", + "name: sf_y\n", + "owner: user@website.com\n", + "description: nyc taxi dataset\n", + "valueType: INT32\n", + "entity: ride\n", + "dataStores: {}\n", + "\n", + "Staging file to remote path gs://zzz-bubub//tmp_ride_1559191176607.csv\n", + "Submitting job with spec:\n", + " type: file.csv\n", + "sourceOptions:\n", + " path: gs://zzz-bubub//tmp_ride_1559191176607.csv\n", + "entities:\n", + "- ride\n", + "schema:\n", + " entityIdColumn: ride\n", + " fields:\n", + " - name: ride\n", + " - name: pickup_datetime\n", + " - featureId: ride.log_trip_duration\n", + " name: log_trip_duration\n", + " - featureId: ride.distance_haversine\n", + " name: distance_haversine\n", + " - featureId: ride.distance_dummy_manhattan\n", + " name: distance_dummy_manhattan\n", + " - featureId: ride.direction\n", + " name: direction\n", + " - featureId: ride.month\n", + " name: month\n", + " - featureId: ride.day_of_month\n", + " name: day_of_month\n", + " - featureId: ride.hour\n", + " name: hour\n", + " - featureId: ride.day_of_week\n", + " name: day_of_week\n", + " - featureId: ride.vi_1\n", + " name: vi_1\n", + " - featureId: ride.vi_2\n", + " name: vi_2\n", + " - featureId: ride.sf_n\n", + " name: sf_n\n", + " - featureId: ride.sf_y\n", + " name: sf_y\n", + " timestampColumn: pickup_datetime\n", + "\n" + ] + }, + { + "ename": "_Rendezvous", + "evalue": "<_Rendezvous of RPC that terminated with:\n\tstatus = StatusCode.INTERNAL\n\tdetails = \"Error running ingestion job: feast.core.exception.JobExecutionException: Error running ingestion job: feast.core.exception.JobExecutionException: Error running ingestion job: java.lang.RuntimeException: Could not submit job: \nOptional[Error: Unable to access jarfile feast-ingestion.jar]\"\n\tdebug_error_string = \"{\"created\":\"@1559191180.478136000\",\"description\":\"Error received from peer ipv6:[::1]:8433\",\"file\":\"src/core/lib/surface/call.cc\",\"file_line\":1041,\"grpc_message\":\"Error running ingestion job: feast.core.exception.JobExecutionException: Error running ingestion job: feast.core.exception.JobExecutionException: Error running ingestion job: java.lang.RuntimeException: Could not submit job: \\nOptional[Error: Unable to access jarfile feast-ingestion.jar]\",\"grpc_status\":13}\"\n>", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31m_Rendezvous\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[0;31m# Ingest the feature data into the store\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 21\u001b[0;31m \u001b[0mfs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimporter\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mapply_features\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mapply_entity\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/Workspace/feast/sdk/python/feast/sdk/client.py\u001b[0m in \u001b[0;36mrun\u001b[0;34m(self, importer, name_override, apply_entity, apply_features)\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Submitting job with spec:\\n {}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mspec_to_yaml\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mimporter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mspec\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 166\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_connect_core\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 167\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_job_service_stub\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSubmitJob\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 168\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Submitted job with id: {}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjobId\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 169\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjobId\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/.pyenv/versions/3.6.2/Python.framework/Versions/3.6/lib/python3.6/site-packages/grpc/_channel.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, request, timeout, metadata, credentials, wait_for_ready)\u001b[0m\n\u001b[1;32m 560\u001b[0m state, call, = self._blocking(request, timeout, metadata, credentials,\n\u001b[1;32m 561\u001b[0m wait_for_ready)\n\u001b[0;32m--> 562\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_end_unary_response_blocking\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcall\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 563\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 564\u001b[0m def with_call(self,\n", + "\u001b[0;32m~/.pyenv/versions/3.6.2/Python.framework/Versions/3.6/lib/python3.6/site-packages/grpc/_channel.py\u001b[0m in \u001b[0;36m_end_unary_response_blocking\u001b[0;34m(state, call, with_call, deadline)\u001b[0m\n\u001b[1;32m 464\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mstate\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mresponse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 465\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 466\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0m_Rendezvous\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstate\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdeadline\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 467\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 468\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31m_Rendezvous\u001b[0m: <_Rendezvous of RPC that terminated with:\n\tstatus = StatusCode.INTERNAL\n\tdetails = \"Error running ingestion job: feast.core.exception.JobExecutionException: Error running ingestion job: feast.core.exception.JobExecutionException: Error running ingestion job: java.lang.RuntimeException: Could not submit job: \nOptional[Error: Unable to access jarfile feast-ingestion.jar]\"\n\tdebug_error_string = \"{\"created\":\"@1559191180.478136000\",\"description\":\"Error received from peer ipv6:[::1]:8433\",\"file\":\"src/core/lib/surface/call.cc\",\"file_line\":1041,\"grpc_message\":\"Error running ingestion job: feast.core.exception.JobExecutionException: Error running ingestion job: feast.core.exception.JobExecutionException: Error running ingestion job: java.lang.RuntimeException: Could not submit job: \\nOptional[Error: Unable to access jarfile feast-ingestion.jar]\",\"grpc_status\":13}\"\n>" + ] + } + ], "source": [ "# Now that we have finished creating our features, we ingest them into feast\n", "\n", @@ -152,11 +493,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": { - "collapsed": true + "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "creating training dataset for features: ['ride.log_trip_duration', 'ride.distance_haversine', 'ride.distance_dummy_manhattan', 'ride.month', 'ride.direction', 'ride.day_of_month', 'ride.hour', 'ride.day_of_week', 'ride.vi_1', 'ride.vi_2', 'ride.sf_n', 'ride.sf_y']\n" + ] + }, + { + "ename": "ValueError", + "evalue": "Core API URL not set. Either set the environment variable FEAST_CORE_URL or set it explicitly.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0;34m\"ride.sf_n\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \"ride.sf_y\"])\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0mdataset_info\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcreate_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfeature_set\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"2016-06-01\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"2016-08-01\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 17\u001b[0m \u001b[0mdataset\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdownload_dataset_to_df\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdataset_info\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstaging_location\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mSTAGING_LOCATION\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Workspace/feast/sdk/python/feast/sdk/client.py\u001b[0m in \u001b[0;36mcreate_dataset\u001b[0;34m(self, feature_set, start_date, end_date, limit, name_prefix)\u001b[0m\n\u001b[1;32m 205\u001b[0m \u001b[0;34m\"creating training dataset for features: \"\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfeature_set\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeatures\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 206\u001b[0m )\n\u001b[0;32m--> 207\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_connect_core\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 208\u001b[0m \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_dataset_service_stub\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mCreateDataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreq\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 209\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Workspace/feast/sdk/python/feast/sdk/client.py\u001b[0m in \u001b[0;36m_connect_core\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 298\u001b[0m \u001b[0;34m\"\"\"Connect to core api\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 299\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__core_channel\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 300\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__core_channel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgrpc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minsecure_channel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcore_url\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 301\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_core_service_stub\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mCoreServiceStub\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__core_channel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 302\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_job_service_stub\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mJobServiceStub\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__core_channel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Workspace/feast/sdk/python/feast/sdk/client.py\u001b[0m in \u001b[0;36mcore_url\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 81\u001b[0m \u001b[0;34m\"Core API URL not set. Either set the \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 82\u001b[0m + \"environment variable {} or set it explicitly.\".format(\n\u001b[0;32m---> 83\u001b[0;31m \u001b[0mFEAST_CORE_URL_ENV_KEY\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 84\u001b[0m )\n\u001b[1;32m 85\u001b[0m )\n", + "\u001b[0;31mValueError\u001b[0m: Core API URL not set. Either set the environment variable FEAST_CORE_URL or set it explicitly." + ] + } + ], "source": [ "# Retrieving data: Training\n", "\n", @@ -164,8 +527,8 @@ " features=[\"ride.log_trip_duration\", \n", " \"ride.distance_haversine\",\n", " \"ride.distance_dummy_manhattan\",\n", - " \"ride.direction\",\n", " \"ride.month\",\n", + " \"ride.direction\",\n", " \"ride.day_of_month\",\n", " \"ride.hour\",\n", " \"ride.day_of_week\",\n", @@ -191,11 +554,105 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rideride.log_trip_durationride.distance_haversineride.distance_dummy_manhattanride.directionride.monthride.day_of_monthride.hourride.day_of_weekride.vi_1ride.vi_2ride.sf_nride.sf_y
0id12444818.08425417.98821823.770274114.1189841151340110
1id28754216.1224931.4985211.73543399.9701963141700110
\n", + "
" + ], + "text/plain": [ + " ride ride.log_trip_duration ride.distance_haversine \\\n", + "0 id1244481 8.084254 17.988218 \n", + "1 id2875421 6.122493 1.498521 \n", + "\n", + " ride.distance_dummy_manhattan ride.direction ride.month \\\n", + "0 23.770274 114.118984 1 \n", + "1 1.735433 99.970196 3 \n", + "\n", + " ride.day_of_month ride.hour ride.day_of_week ride.vi_1 ride.vi_2 \\\n", + "0 15 13 4 0 1 \n", + "1 14 17 0 0 1 \n", + "\n", + " ride.sf_n ride.sf_y \n", + "0 1 0 \n", + "1 1 0 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Retrieving data: Serving\n", "\n", @@ -234,9 +691,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "PyCharm (feast-python-sdk)", "language": "python", - "name": "python3" + "name": "pycharm-e7e8e038" }, "language_info": { "codemirror_mode": { @@ -248,18 +705,18 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.7" + "version": "3.6.2" }, "pycharm": { "stem_cell": { "cell_type": "raw", - "source": [], "metadata": { "collapsed": false - } + }, + "source": [] } } }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/sdk/python/feast/sdk/client.py b/sdk/python/feast/sdk/client.py index 1d835d37b5..1d1fe0a93d 100644 --- a/sdk/python/feast/sdk/client.py +++ b/sdk/python/feast/sdk/client.py @@ -266,7 +266,7 @@ def download_dataset( str: path to the downloaded file """ return self._table_downloader.download_table_as_file( - dataset_info.table_id, dest, staging_location, file_type + dataset_info.full_table_id, dest, staging_location, file_type ) def download_dataset_to_df(self, dataset_info, staging_location): @@ -282,7 +282,7 @@ def download_dataset_to_df(self, dataset_info, staging_location): """ return self._table_downloader.download_table_as_df( - dataset_info.table_id, staging_location + dataset_info.full_table_id, staging_location ) def close(self): diff --git a/sdk/python/feast/sdk/resources/feature_set.py b/sdk/python/feast/sdk/resources/feature_set.py index 1895f06c48..7eb2870078 100644 --- a/sdk/python/feast/sdk/resources/feature_set.py +++ b/sdk/python/feast/sdk/resources/feature_set.py @@ -66,16 +66,16 @@ class FileType(object): class DatasetInfo: - def __init__(self, name, table_id): + def __init__(self, name, full_table_id): """ Create instance of DatasetInfo with a BigQuery table as its backing store. Args: name: (str) dataset name - table_id: (str) fully qualified table id + full_table_id: (str) fully qualified table id """ self._name = name - self._table_id = table_id + self._full_table_id = full_table_id @property def name(self): @@ -87,10 +87,10 @@ def name(self): return self._name @property - def table_id(self): + def full_table_id(self): """ Returns: fully qualified table id """ - return self._table_id + return self._full_table_id diff --git a/sdk/python/feast/sdk/utils/bq_util.py b/sdk/python/feast/sdk/utils/bq_util.py index b52dee92ca..84167c09f6 100644 --- a/sdk/python/feast/sdk/utils/bq_util.py +++ b/sdk/python/feast/sdk/utils/bq_util.py @@ -197,11 +197,11 @@ def bq(self): self._bq = BQClient() return self._bq - def download_table_as_file(self, table_id, dest, staging_location, file_type): + def download_table_as_file(self, full_table_id, dest, staging_location, file_type): """ Download a bigquery table as file Args: - table_id (str): fully qualified BigQuery table id + full_table_id (str): fully qualified BigQuery table id dest (str): destination filename staging_location (str): url to staging_location (currently support a folder in GCS) @@ -218,7 +218,7 @@ def download_table_as_file(self, table_id, dest, staging_location, file_type): job_config = ExtractJobConfig() job_config.destination_format = file_type - src_table = Table.from_string(table_id) + src_table = Table.from_string(full_table_id) job = self.bq.extract_table(src_table, staging_file_path, job_config=job_config) # await completion @@ -230,11 +230,11 @@ def download_table_as_file(self, table_id, dest, staging_location, file_type): blob.download_to_filename(dest) return dest - def download_table_as_df(self, table_id, staging_location): + def download_table_as_df(self, full_table_id, staging_location): """ Download a BigQuery table as Pandas Dataframe Args: - table_id (src) : fully qualified BigQuery table id + full_table_id (src) : fully qualified BigQuery table id staging_location: url to staging_location (currently support a folder in GCS) @@ -250,7 +250,7 @@ def download_table_as_df(self, table_id, staging_location): job_config = ExtractJobConfig() job_config.destination_format = DestinationFormat.CSV job = self.bq.extract_table( - Table.from_string(table_id), staging_file_path, job_config=job_config + Table.from_string(full_table_id), staging_file_path, job_config=job_config ) # await completion diff --git a/sdk/python/tests/sdk/resources/test_feature_set.py b/sdk/python/tests/sdk/resources/test_feature_set.py index 3c6c97eba2..5972ecd0d1 100644 --- a/sdk/python/tests/sdk/resources/test_feature_set.py +++ b/sdk/python/tests/sdk/resources/test_feature_set.py @@ -38,7 +38,7 @@ def test_different_entity(self): class TestDatasetInfo(object): def test_creation(self): name = "dataset_name" - table_id = "gcp-project.dataset.table_name" - dataset = DatasetInfo(name, table_id) + full_table_id = "gcp-project.dataset.table_name" + dataset = DatasetInfo(name, full_table_id) assert dataset.name == name - assert dataset.table_id == table_id + assert dataset.full_table_id == full_table_id diff --git a/sdk/python/tests/sdk/test_client.py b/sdk/python/tests/sdk/test_client.py index f0d789afd0..83368c2b6a 100644 --- a/sdk/python/tests/sdk/test_client.py +++ b/sdk/python/tests/sdk/test_client.py @@ -198,7 +198,7 @@ def test_create_dataset(self, client, mocker): ds = client.create_dataset(fs, start_date, end_date) assert "dataset_name" == ds.name - assert "project.dataset.table" == ds.table_id + assert "project.dataset.table" == ds.full_table_id mock_trn_stub.CreateDataset.assert_called_once_with( DatasetServiceTypes.CreateDatasetRequest( featureSet=fs.proto, @@ -229,7 +229,7 @@ def test_create_dataset_with_limit(self, client, mocker): ds = client.create_dataset(fs, start_date, end_date, limit=limit) assert "dataset_name" == ds.name - assert "project.dataset.table" == ds.table_id + assert "project.dataset.table" == ds.full_table_id mock_trn_stub.CreateDataset.assert_called_once_with( DatasetServiceTypes.CreateDatasetRequest( featureSet=fs.proto, @@ -263,7 +263,7 @@ def test_create_dataset_with_name_prefix(self, client, mocker): fs, start_date, end_date, limit=limit, name_prefix=name_prefix) assert "dataset_name" == ds.name - assert "project.dataset.table" == ds.table_id + assert "project.dataset.table" == ds.full_table_id mock_dssvc_stub.CreateDataset.assert_called_once_with( DatasetServiceTypes.CreateDatasetRequest( featureSet=fs.proto, @@ -427,9 +427,9 @@ def test_download_dataset_as_file(self, client, mocker): table_dlder, "download_table_as_file", return_value=destination) client._table_downloader = table_dlder - table_id = "project.dataset.table" + full_table_id = "project.dataset.table" staging_location = "gs://gcs_bucket/" - dataset = DatasetInfo("mydataset", table_id) + dataset = DatasetInfo("mydataset", full_table_id) result = client.download_dataset( dataset, @@ -439,7 +439,7 @@ def test_download_dataset_as_file(self, client, mocker): assert result == destination table_dlder.download_table_as_file.assert_called_once_with( - table_id, destination, staging_location, FileType.CSV) + full_table_id, destination, staging_location, FileType.CSV) def _create_query_features_response(self, entity_name, entities): response = QueryFeaturesResponse(entityName=entity_name) diff --git a/sdk/python/tests/sdk/utils/test_bq_utils.py b/sdk/python/tests/sdk/utils/test_bq_utils.py index bc5fd72f18..39bca12e99 100644 --- a/sdk/python/tests/sdk/utils/test_bq_utils.py +++ b/sdk/python/tests/sdk/utils/test_bq_utils.py @@ -37,8 +37,8 @@ def test_get_table_name(): options={"project": project_name, "dataset": dataset_name}, ) assert ( - get_table_name(feature_id, storage_spec) - == "my_project.my_dataset.myentity_none" + get_table_name(feature_id, storage_spec) + == "my_project.my_dataset.myentity_none" ) @@ -55,7 +55,7 @@ def test_get_table_name_not_bq(): ) def test_query_to_dataframe(): with open( - os.path.join(testdata_path, "austin_bikeshare.bikeshare_stations.avro"), "rb" + os.path.join(testdata_path, "austin_bikeshare.bikeshare_stations.avro"), "rb" ) as expected_file: avro_reader = fastavro.reader(expected_file) expected = pd.DataFrame.from_records(avro_reader) @@ -80,11 +80,10 @@ def test_download_table_as_df(self, mocker): self._stop_time(mocker) mocked_gcs_to_df = mocker.patch( "feast.sdk.utils.bq_util.gcs_to_df", return_value=None - ) staging_path = "gs://temp/" staging_file_name = "temp_0" - table_id = "project_id.dataset_id.table_id" + full_table_id = "project_id.dataset_id.table_id" table_dldr = TableDownloader() exp_staging_path = os.path.join(staging_path, staging_file_name) @@ -92,11 +91,11 @@ def test_download_table_as_df(self, mocker): table_dldr._bq = _Mock_BQ_Client() mocker.patch.object(table_dldr._bq, "extract_table", return_value=_Job()) - table_dldr.download_table_as_df(table_id, staging_location=staging_path) + table_dldr.download_table_as_df(full_table_id, staging_location=staging_path) assert len(table_dldr._bq.extract_table.call_args_list) == 1 args, kwargs = table_dldr._bq.extract_table.call_args_list[0] - assert args[0].full_table_id == Table.from_string(table_id).full_table_id + assert args[0].full_table_id == Table.from_string(full_table_id).full_table_id assert args[1] == exp_staging_path assert kwargs["job_config"].destination_format == "CSV" mocked_gcs_to_df.assert_called_once_with(exp_staging_path) @@ -114,25 +113,25 @@ def test_download_json(self, mocker): self._test_download_file(mocker, FileType.JSON) def test_download_invalid_staging_url(self): - table_id = "project_id.dataset_id.table_id" + full_table_id = "project_id.dataset_id.table_id" table_dldr = TableDownloader() with pytest.raises( - ValueError, match="staging_uri must be a directory in " "GCS" + ValueError, match="staging_uri must be a directory in " "GCS" ): table_dldr.download_table_as_file( - table_id, "/tmp/dst", "/local/directory", FileType.CSV + full_table_id, "/tmp/dst", "/local/directory", FileType.CSV ) with pytest.raises( - ValueError, match="staging_uri must be a directory in " "GCS" + ValueError, match="staging_uri must be a directory in " "GCS" ): - table_dldr.download_table_as_df(table_id, "/local/directory") + table_dldr.download_table_as_df(full_table_id, "/local/directory") def _test_download_file(self, mocker, type): staging_path = "gs://temp/" staging_file_name = "temp_0" dst_path = "/tmp/myfile.csv" - table_id = "project_id.dataset_id.table_id" + full_table_id = "project_id.dataset_id.table_id" table_dldr = TableDownloader() mock_blob = _Blob() @@ -145,13 +144,13 @@ def _test_download_file(self, mocker, type): ) table_dldr.download_table_as_file( - table_id, dst_path, staging_location=staging_path, file_type=type + full_table_id, dst_path, staging_location=staging_path, file_type=type ) exp_staging_path = os.path.join(staging_path, staging_file_name) assert len(table_dldr._bq.extract_table.call_args_list) == 1 args, kwargs = table_dldr._bq.extract_table.call_args_list[0] - assert args[0].full_table_id == Table.from_string(table_id).full_table_id + assert args[0].full_table_id == Table.from_string(full_table_id).full_table_id assert args[1] == exp_staging_path assert kwargs["job_config"].destination_format == str(type) From 2bd1b9322a9fbc3725966359a9299e50187141b8 Mon Sep 17 00:00:00 2001 From: Budi Dharmawan <29maret@gmail.com> Date: Thu, 30 May 2019 15:43:47 +0800 Subject: [PATCH 02/10] fix tests for create dataset --- .../BigQueryTraningDatasetCreator.java | 23 ++++++++------- .../BigQueryTraningDatasetCreatorTest.java | 28 +++++++++++-------- 2 files changed, 29 insertions(+), 22 deletions(-) diff --git a/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java b/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java index ce240210ae..e76a083b12 100644 --- a/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java +++ b/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java @@ -19,11 +19,13 @@ import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.BigQuery.JobOption; import com.google.cloud.bigquery.BigQueryOptions; +import com.google.cloud.bigquery.Job; import com.google.cloud.bigquery.JobException; import com.google.cloud.bigquery.QueryJobConfiguration; import com.google.cloud.bigquery.Table; import com.google.cloud.bigquery.TableId; import com.google.cloud.bigquery.TableInfo; +import com.google.cloud.bigquery.TableResult; import com.google.common.base.Strings; import com.google.protobuf.Timestamp; import feast.core.DatasetServiceProto.DatasetInfo; @@ -99,27 +101,28 @@ public DatasetInfo createDataset( String tableDescription = createBqTableDescription(featureSet, startDate, endDate, query); Map options = templater.getStorageSpec().getOptionsMap(); + String bq_dataset = options.get("dataset"); TableId destinationTableId = - TableId.of(projectId, options.get("dataset"), tableName); + TableId.of(projectId, bq_dataset, tableName); - if (!bigQuery.getTable(destinationTableId).exists()) { + if (bigQuery.getTable(destinationTableId) == null) { QueryJobConfiguration queryConfig = QueryJobConfiguration.newBuilder(query) .setAllowLargeResults(true) .setDestinationTable(destinationTableId) .build(); JobOption jobOption = JobOption.fields(); - bigQuery.query(queryConfig, jobOption); - + TableResult res = bigQuery.query(queryConfig, jobOption); + if(res!= null) { + Table destinationTable = bigQuery.getTable(destinationTableId); + TableInfo tableInfo = destinationTable.toBuilder() + .setDescription(tableDescription) + .build(); + bigQuery.update(tableInfo); + } } - Table destinationTable = bigQuery.getTable(destinationTableId); - TableInfo tableInfo = destinationTable.toBuilder() - .setDescription(tableDescription) - .build(); - bigQuery.update(tableInfo); - return DatasetInfo.newBuilder() .setName(tableName) .setTableUrl(toTableUrl(destinationTableId)) diff --git a/core/src/test/java/feast/core/training/BigQueryTraningDatasetCreatorTest.java b/core/src/test/java/feast/core/training/BigQueryTraningDatasetCreatorTest.java index b28835c39e..79f2b8650e 100644 --- a/core/src/test/java/feast/core/training/BigQueryTraningDatasetCreatorTest.java +++ b/core/src/test/java/feast/core/training/BigQueryTraningDatasetCreatorTest.java @@ -16,13 +16,6 @@ */ package feast.core.training; -import static org.hamcrest.Matchers.equalTo; -import static org.junit.Assert.assertThat; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyLong; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - import com.google.cloud.bigquery.BigQuery; import com.google.protobuf.Timestamp; import com.google.protobuf.util.Timestamps; @@ -38,6 +31,13 @@ import org.mockito.Mock; import org.mockito.MockitoAnnotations; +import static org.hamcrest.Matchers.equalTo; +import static org.junit.Assert.assertThat; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + public class BigQueryTraningDatasetCreatorTest { public static final String projectId = "the-project"; @@ -87,12 +87,14 @@ public void shouldCreateCorrectDatasetIfPrefixNotSpecified() { DatasetInfo dsInfo = creator.createDataset(featureSet, startDate, endDate, limit, namePrefix); - assertThat(dsInfo.getName(), equalTo("myentity_0_20180101_20190101")); + assertThat(dsInfo.getName(), + equalTo("feast_myentity_b0009f0f7df634ddc130571319e0deb9742eb1da")); assertThat( dsInfo.getTableUrl(), equalTo( String.format( - "%s.%s_%s.%s", projectId, datasetPrefix, entityName, "0_20180101_20190101"))); + "%s.dataset.%s_%s_%s", projectId, datasetPrefix, entityName, + "b0009f0f7df634ddc130571319e0deb9742eb1da"))); } @Test @@ -118,9 +120,11 @@ public void shouldCreateCorrectDatasetIfPrefixIsSpecified() { dsInfo.getTableUrl(), equalTo( String.format( - "%s.%s_%s.%s", projectId, datasetPrefix, entityName, - "mydataset_0_20180101_20190101"))); - assertThat(dsInfo.getName(), equalTo("mydataset_0_20180101_20190101")); + "%s.dataset.%s_%s_%s_%s", projectId, datasetPrefix, entityName, + namePrefix, + "b0009f0f7df634ddc130571319e0deb9742eb1da"))); + assertThat(dsInfo.getName(), + equalTo("feast_myentity_mydataset_b0009f0f7df634ddc130571319e0deb9742eb1da")); } @Test From 1f906c22f48d129762d42ea12d5c612ba223d1a8 Mon Sep 17 00:00:00 2001 From: David Heryanto Date: Fri, 31 May 2019 16:00:29 +0800 Subject: [PATCH 03/10] Format code with Google style --- .../BigQueryTraningDatasetCreator.java | 55 ++++++++----------- 1 file changed, 24 insertions(+), 31 deletions(-) diff --git a/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java b/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java index e76a083b12..109cc97e84 100644 --- a/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java +++ b/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java @@ -16,21 +16,15 @@ */ package feast.core.training; -import com.google.cloud.bigquery.BigQuery; +import com.google.cloud.bigquery.*; import com.google.cloud.bigquery.BigQuery.JobOption; -import com.google.cloud.bigquery.BigQueryOptions; -import com.google.cloud.bigquery.Job; -import com.google.cloud.bigquery.JobException; -import com.google.cloud.bigquery.QueryJobConfiguration; -import com.google.cloud.bigquery.Table; -import com.google.cloud.bigquery.TableId; -import com.google.cloud.bigquery.TableInfo; -import com.google.cloud.bigquery.TableResult; import com.google.common.base.Strings; import com.google.protobuf.Timestamp; import feast.core.DatasetServiceProto.DatasetInfo; import feast.core.DatasetServiceProto.FeatureSet; import feast.core.exception.TrainingDatasetCreationException; +import lombok.extern.slf4j.Slf4j; + import java.math.BigInteger; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; @@ -42,7 +36,6 @@ import java.util.Collections; import java.util.List; import java.util.Map; -import lombok.extern.slf4j.Slf4j; @Slf4j public class BigQueryTraningDatasetCreator { @@ -54,13 +47,13 @@ public class BigQueryTraningDatasetCreator { private final String datasetPrefix; private transient BigQuery bigQuery; - public BigQueryTraningDatasetCreator( - BigQueryDatasetTemplater templater, - Clock clock, - String projectId, - String datasetPrefix) { - this(templater, clock, projectId, datasetPrefix, + BigQueryDatasetTemplater templater, Clock clock, String projectId, String datasetPrefix) { + this( + templater, + clock, + projectId, + datasetPrefix, BigQueryOptions.newBuilder().setProjectId(projectId).build().getService()); } @@ -96,15 +89,14 @@ public DatasetInfo createDataset( String namePrefix) { try { String query = templater.createQuery(featureSet, startDate, endDate, limit); - String tableName = createBqTableName(datasetPrefix, featureSet, startDate, endDate, - namePrefix); + String tableName = + createBqTableName(datasetPrefix, featureSet, startDate, endDate, namePrefix); String tableDescription = createBqTableDescription(featureSet, startDate, endDate, query); Map options = templater.getStorageSpec().getOptionsMap(); String bq_dataset = options.get("dataset"); - TableId destinationTableId = - TableId.of(projectId, bq_dataset, tableName); + TableId destinationTableId = TableId.of(projectId, bq_dataset, tableName); if (bigQuery.getTable(destinationTableId) == null) { QueryJobConfiguration queryConfig = @@ -114,11 +106,10 @@ public DatasetInfo createDataset( .build(); JobOption jobOption = JobOption.fields(); TableResult res = bigQuery.query(queryConfig, jobOption); - if(res!= null) { + if (res != null) { Table destinationTable = bigQuery.getTable(destinationTableId); - TableInfo tableInfo = destinationTable.toBuilder() - .setDescription(tableDescription) - .build(); + TableInfo tableInfo = + destinationTable.toBuilder().setDescription(tableDescription).build(); bigQuery.update(tableInfo); } } @@ -136,8 +127,12 @@ public DatasetInfo createDataset( } } - private String createBqTableName(String datasetPrefix, FeatureSet featureSet, Timestamp startDate, - Timestamp endDate, String namePrefix) { + private String createBqTableName( + String datasetPrefix, + FeatureSet featureSet, + Timestamp startDate, + Timestamp endDate, + String namePrefix) { List features = new ArrayList(featureSet.getFeatureIdsList()); Collections.sort(features); @@ -165,12 +160,11 @@ private String createBqTableName(String datasetPrefix, FeatureSet featureSet, Ti "%s_%s_%s_%s", datasetPrefix, featureSet.getEntityName(), namePrefix, hashText); } - return String.format( - "%s_%s_%s", datasetPrefix, featureSet.getEntityName(), hashText); + return String.format("%s_%s_%s", datasetPrefix, featureSet.getEntityName(), hashText); } - private String createBqTableDescription(FeatureSet featureSet, Timestamp startDate, Timestamp - endDate, String query) { + private String createBqTableDescription( + FeatureSet featureSet, Timestamp startDate, Timestamp endDate, String query) { String currentTime = Instant.now().toString(); return new StringBuilder() .append("Feast Dataset for ") @@ -195,5 +189,4 @@ private String toTableUrl(TableId tableId) { return String.format( "%s.%s.%s", tableId.getProject(), tableId.getDataset(), tableId.getTable()); } - } From b2a3f804cb0f51ca4a2c03a42a039c9affb7d521 Mon Sep 17 00:00:00 2001 From: David Heryanto Date: Fri, 31 May 2019 16:22:25 +0800 Subject: [PATCH 04/10] Remove unused clock variable in BigQueryTrainingDatasetCreator --- .../feast/core/config/TrainingConfig.java | 25 ++++---- .../BigQueryTraningDatasetCreator.java | 7 +-- .../BigQueryTraningDatasetCreatorTest.java | 59 +++++++++---------- 3 files changed, 41 insertions(+), 50 deletions(-) diff --git a/core/src/main/java/feast/core/config/TrainingConfig.java b/core/src/main/java/feast/core/config/TrainingConfig.java index 0e5ff3e3dd..4e71c894f3 100644 --- a/core/src/main/java/feast/core/config/TrainingConfig.java +++ b/core/src/main/java/feast/core/config/TrainingConfig.java @@ -7,21 +7,19 @@ import com.hubspot.jinjava.Jinjava; import feast.core.config.StorageConfig.StorageSpecs; import feast.core.dao.FeatureInfoRepository; -import feast.core.training.BigQueryTraningDatasetCreator; import feast.core.training.BigQueryDatasetTemplater; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.time.Clock; +import feast.core.training.BigQueryTraningDatasetCreator; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.core.io.ClassPathResource; import org.springframework.core.io.Resource; -/** - * Configuration related to training API - */ +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; + +/** Configuration related to training API */ @Configuration public class TrainingConfig { @@ -31,18 +29,17 @@ public BigQueryDatasetTemplater getBigQueryTrainingDatasetTemplater( Resource resource = new ClassPathResource("templates/bq_training.tmpl"); InputStream resourceInputStream = resource.getInputStream(); String tmpl = CharStreams.toString(new InputStreamReader(resourceInputStream, Charsets.UTF_8)); - return new BigQueryDatasetTemplater(new Jinjava(), tmpl, storageSpecs.getWarehouseStorageSpec(), - featureInfoRepository); + return new BigQueryDatasetTemplater( + new Jinjava(), tmpl, storageSpecs.getWarehouseStorageSpec(), featureInfoRepository); } @Bean public BigQueryTraningDatasetCreator getBigQueryTrainingDatasetCreator( - BigQueryDatasetTemplater templater, StorageSpecs storageSpecs, + BigQueryDatasetTemplater templater, + StorageSpecs storageSpecs, @Value("${feast.core.projectId}") String projectId, @Value("${feast.core.datasetPrefix}") String datasetPrefix) { BigQuery bigquery = BigQueryOptions.newBuilder().setProjectId(projectId).build().getService(); - Clock clock = Clock.systemUTC(); - return new BigQueryTraningDatasetCreator(templater, clock, - projectId, datasetPrefix); + return new BigQueryTraningDatasetCreator(templater, projectId, datasetPrefix); } } diff --git a/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java b/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java index 109cc97e84..ed1d7c620f 100644 --- a/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java +++ b/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java @@ -28,7 +28,6 @@ import java.math.BigInteger; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import java.time.Clock; import java.time.Instant; import java.time.ZoneId; import java.time.format.DateTimeFormatter; @@ -42,16 +41,14 @@ public class BigQueryTraningDatasetCreator { private final BigQueryDatasetTemplater templater; private final DateTimeFormatter formatter; - private final Clock clock; private final String projectId; private final String datasetPrefix; private transient BigQuery bigQuery; public BigQueryTraningDatasetCreator( - BigQueryDatasetTemplater templater, Clock clock, String projectId, String datasetPrefix) { + BigQueryDatasetTemplater templater, String projectId, String datasetPrefix) { this( templater, - clock, projectId, datasetPrefix, BigQueryOptions.newBuilder().setProjectId(projectId).build().getService()); @@ -59,12 +56,10 @@ public BigQueryTraningDatasetCreator( public BigQueryTraningDatasetCreator( BigQueryDatasetTemplater templater, - Clock clock, String projectId, String datasetPrefix, BigQuery bigQuery) { this.templater = templater; - this.clock = clock; this.formatter = DateTimeFormatter.ofPattern("yyyyMMdd").withZone(ZoneId.of("UTC")); this.projectId = projectId; this.datasetPrefix = datasetPrefix; diff --git a/core/src/test/java/feast/core/training/BigQueryTraningDatasetCreatorTest.java b/core/src/test/java/feast/core/training/BigQueryTraningDatasetCreatorTest.java index 79f2b8650e..825f8aff7d 100644 --- a/core/src/test/java/feast/core/training/BigQueryTraningDatasetCreatorTest.java +++ b/core/src/test/java/feast/core/training/BigQueryTraningDatasetCreatorTest.java @@ -23,14 +23,14 @@ import feast.core.DatasetServiceProto.FeatureSet; import feast.core.storage.BigQueryStorageManager; import feast.specs.StorageSpecProto.StorageSpec; -import java.time.Clock; -import java.time.Instant; -import java.util.Arrays; import org.junit.Before; import org.junit.Test; import org.mockito.Mock; import org.mockito.MockitoAnnotations; +import java.time.Instant; +import java.util.Arrays; + import static org.hamcrest.Matchers.equalTo; import static org.junit.Assert.assertThat; import static org.mockito.ArgumentMatchers.any; @@ -44,32 +44,29 @@ public class BigQueryTraningDatasetCreatorTest { public static final String datasetPrefix = "feast"; // class under test private BigQueryTraningDatasetCreator creator; - @Mock - private BigQueryDatasetTemplater templater; - @Mock - private BigQuery bq; - @Mock - private Clock clock; + @Mock private BigQueryDatasetTemplater templater; + @Mock private BigQuery bq; @Before - public void setUp() throws Exception { + public void setUp() { MockitoAnnotations.initMocks(this); - when(templater.getStorageSpec()).thenReturn(StorageSpec.newBuilder() - .setId("BIGQUERY1") - .setType(BigQueryStorageManager.TYPE) - .putOptions("project", "project") - .putOptions("dataset", "dataset") - .build()); - creator = new BigQueryTraningDatasetCreator(templater, clock, projectId, datasetPrefix, bq); + when(templater.getStorageSpec()) + .thenReturn( + StorageSpec.newBuilder() + .setId("BIGQUERY1") + .setType(BigQueryStorageManager.TYPE) + .putOptions("project", "project") + .putOptions("dataset", "dataset") + .build()); + creator = new BigQueryTraningDatasetCreator(templater, projectId, datasetPrefix, bq); when(templater.createQuery( - any(FeatureSet.class), any(Timestamp.class), any(Timestamp.class), anyLong())) + any(FeatureSet.class), any(Timestamp.class), any(Timestamp.class), anyLong())) .thenReturn("SELECT * FROM `project.dataset.table`"); } - @Test - public void shouldCreateCorrectDatasetIfPrefixNotSpecified() { + public void shouldCreateCorrqectDatasetIfPrefixNotSpecified() { String entityName = "myentity"; FeatureSet featureSet = @@ -85,16 +82,15 @@ public void shouldCreateCorrectDatasetIfPrefixNotSpecified() { long limit = 999; String namePrefix = ""; - DatasetInfo dsInfo = - creator.createDataset(featureSet, startDate, endDate, limit, namePrefix); - assertThat(dsInfo.getName(), - equalTo("feast_myentity_b0009f0f7df634ddc130571319e0deb9742eb1da")); + DatasetInfo dsInfo = creator.createDataset(featureSet, startDate, endDate, limit, namePrefix); + assertThat( + dsInfo.getName(), equalTo("feast_myentity_b0009f0f7df634ddc130571319e0deb9742eb1da")); assertThat( dsInfo.getTableUrl(), equalTo( String.format( - "%s.dataset.%s_%s_%s", projectId, datasetPrefix, entityName, - "b0009f0f7df634ddc130571319e0deb9742eb1da"))); + "%s.dataset.%s_%s_%s", + projectId, datasetPrefix, entityName, "b0009f0f7df634ddc130571319e0deb9742eb1da"))); } @Test @@ -114,16 +110,19 @@ public void shouldCreateCorrectDatasetIfPrefixIsSpecified() { long limit = 999; String namePrefix = "mydataset"; - DatasetInfo dsInfo = - creator.createDataset(featureSet, startDate, endDate, limit, namePrefix); + DatasetInfo dsInfo = creator.createDataset(featureSet, startDate, endDate, limit, namePrefix); assertThat( dsInfo.getTableUrl(), equalTo( String.format( - "%s.dataset.%s_%s_%s_%s", projectId, datasetPrefix, entityName, + "%s.dataset.%s_%s_%s_%s", + projectId, + datasetPrefix, + entityName, namePrefix, "b0009f0f7df634ddc130571319e0deb9742eb1da"))); - assertThat(dsInfo.getName(), + assertThat( + dsInfo.getName(), equalTo("feast_myentity_mydataset_b0009f0f7df634ddc130571319e0deb9742eb1da")); } From 0ea06634942e81777490399649b33cf5930122d3 Mon Sep 17 00:00:00 2001 From: David Heryanto Date: Fri, 31 May 2019 16:29:27 +0800 Subject: [PATCH 05/10] Update comment for createDataset in BigQueryTrainingDatasetCreator --- .../core/training/BigQueryTraningDatasetCreator.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java b/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java index ed1d7c620f..87a1359080 100644 --- a/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java +++ b/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java @@ -67,7 +67,8 @@ public BigQueryTraningDatasetCreator( } /** - * Create dataset for a feature set + * Create a training dataset for a feature set for features created between startDate (inclusive) + * and endDate (inclusive) * * @param featureSet feature set for which the training dataset should be created * @param startDate starting date of the training dataset (inclusive) @@ -90,9 +91,9 @@ public DatasetInfo createDataset( Map options = templater.getStorageSpec().getOptionsMap(); String bq_dataset = options.get("dataset"); - TableId destinationTableId = TableId.of(projectId, bq_dataset, tableName); + // Create the BigQuery table that will store the training dataset if not exists if (bigQuery.getTable(destinationTableId) == null) { QueryJobConfiguration queryConfig = QueryJobConfiguration.newBuilder(query) @@ -100,8 +101,8 @@ public DatasetInfo createDataset( .setDestinationTable(destinationTableId) .build(); JobOption jobOption = JobOption.fields(); - TableResult res = bigQuery.query(queryConfig, jobOption); - if (res != null) { + TableResult tableResult = bigQuery.query(queryConfig, jobOption); + if (tableResult != null) { Table destinationTable = bigQuery.getTable(destinationTableId); TableInfo tableInfo = destinationTable.toBuilder().setDescription(tableDescription).build(); From 71a9ede1d3a4494b1d29c80a86e46f6ae7bee253 Mon Sep 17 00:00:00 2001 From: David Heryanto Date: Fri, 31 May 2019 19:27:32 +0800 Subject: [PATCH 06/10] Clean up print function for createBqTableDescription() --- .../BigQueryTraningDatasetCreator.java | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java b/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java index 87a1359080..35a6081f77 100644 --- a/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java +++ b/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java @@ -161,19 +161,13 @@ private String createBqTableName( private String createBqTableDescription( FeatureSet featureSet, Timestamp startDate, Timestamp endDate, String query) { - String currentTime = Instant.now().toString(); - return new StringBuilder() - .append("Feast Dataset for ") - .append(featureSet.getEntityName()) - .append(" features.\nContains data from ") - .append(formatTimestamp(startDate)) - .append(" to ") - .append(formatTimestamp(endDate)) - .append(".\nLast edited at ") - .append(currentTime) - .append(".\n\n-----\n\n") - .append(query) - .toString(); + return String.format( + "Feast Dataset for %s features.\nContains data from %s to %s.\n Last edited at %s.\n\n-----\n\n%s", + featureSet.getEntityName(), + formatTimestamp(startDate), + formatTimestamp(endDate), + Instant.now(), + query); } private String formatTimestamp(Timestamp timestamp) { From 65104523c58c0ccb25b02a547e3cc1d6cba5770f Mon Sep 17 00:00:00 2001 From: Budi Dharmawan <29maret@gmail.com> Date: Tue, 4 Jun 2019 13:35:48 +0800 Subject: [PATCH 07/10] fix tests on create_dataset - remove Clock - update public dataset test --- .../feast/core/config/TrainingConfig.java | 3 +-- .../BigQueryTraningDatasetCreator.java | 21 +++++++----------- .../BigQueryTraningDatasetCreatorTest.java | 5 +---- .../austin_bikeshare.bikeshare_stations.avro | Bin 7653 -> 7059 bytes sdk/python/tests/sdk/utils/test_bq_utils.py | 2 +- 5 files changed, 11 insertions(+), 20 deletions(-) diff --git a/core/src/main/java/feast/core/config/TrainingConfig.java b/core/src/main/java/feast/core/config/TrainingConfig.java index 0e5ff3e3dd..30d55ac6e7 100644 --- a/core/src/main/java/feast/core/config/TrainingConfig.java +++ b/core/src/main/java/feast/core/config/TrainingConfig.java @@ -42,7 +42,6 @@ public BigQueryTraningDatasetCreator getBigQueryTrainingDatasetCreator( @Value("${feast.core.datasetPrefix}") String datasetPrefix) { BigQuery bigquery = BigQueryOptions.newBuilder().setProjectId(projectId).build().getService(); Clock clock = Clock.systemUTC(); - return new BigQueryTraningDatasetCreator(templater, clock, - projectId, datasetPrefix); + return new BigQueryTraningDatasetCreator(templater, projectId, datasetPrefix); } } diff --git a/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java b/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java index e76a083b12..940bcc39eb 100644 --- a/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java +++ b/core/src/main/java/feast/core/training/BigQueryTraningDatasetCreator.java @@ -19,7 +19,6 @@ import com.google.cloud.bigquery.BigQuery; import com.google.cloud.bigquery.BigQuery.JobOption; import com.google.cloud.bigquery.BigQueryOptions; -import com.google.cloud.bigquery.Job; import com.google.cloud.bigquery.JobException; import com.google.cloud.bigquery.QueryJobConfiguration; import com.google.cloud.bigquery.Table; @@ -34,7 +33,6 @@ import java.math.BigInteger; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; -import java.time.Clock; import java.time.Instant; import java.time.ZoneId; import java.time.format.DateTimeFormatter; @@ -49,7 +47,6 @@ public class BigQueryTraningDatasetCreator { private final BigQueryDatasetTemplater templater; private final DateTimeFormatter formatter; - private final Clock clock; private final String projectId; private final String datasetPrefix; private transient BigQuery bigQuery; @@ -57,21 +54,18 @@ public class BigQueryTraningDatasetCreator { public BigQueryTraningDatasetCreator( BigQueryDatasetTemplater templater, - Clock clock, String projectId, String datasetPrefix) { - this(templater, clock, projectId, datasetPrefix, + this(templater, projectId, datasetPrefix, BigQueryOptions.newBuilder().setProjectId(projectId).build().getService()); } public BigQueryTraningDatasetCreator( BigQueryDatasetTemplater templater, - Clock clock, String projectId, String datasetPrefix, BigQuery bigQuery) { this.templater = templater; - this.clock = clock; this.formatter = DateTimeFormatter.ofPattern("yyyyMMdd").withZone(ZoneId.of("UTC")); this.projectId = projectId; this.datasetPrefix = datasetPrefix; @@ -114,7 +108,7 @@ public DatasetInfo createDataset( .build(); JobOption jobOption = JobOption.fields(); TableResult res = bigQuery.query(queryConfig, jobOption); - if(res!= null) { + if (res != null) { Table destinationTable = bigQuery.getTable(destinationTableId); TableInfo tableInfo = destinationTable.toBuilder() .setDescription(tableDescription) @@ -143,16 +137,16 @@ private String createBqTableName(String datasetPrefix, FeatureSet featureSet, Ti Collections.sort(features); String datasetId = String.format("%s_%s_%s", features, startDate, endDate); - String hashText; + StringBuilder hashText; // create hash from datasetId try { MessageDigest md = MessageDigest.getInstance("SHA-1"); byte[] messageDigest = md.digest(datasetId.getBytes()); BigInteger no = new BigInteger(1, messageDigest); - hashText = no.toString(16); + hashText = new StringBuilder(no.toString(16)); while (hashText.length() < 32) { - hashText = "0" + hashText; + hashText.insert(0, "0"); } } catch (NoSuchAlgorithmException e) { throw new RuntimeException(e); @@ -162,11 +156,12 @@ private String createBqTableName(String datasetPrefix, FeatureSet featureSet, Ti // only alphanumeric and underscore are allowed namePrefix = namePrefix.replaceAll("[^a-zA-Z0-9_]", "_"); return String.format( - "%s_%s_%s_%s", datasetPrefix, featureSet.getEntityName(), namePrefix, hashText); + "%s_%s_%s_%s", datasetPrefix, featureSet.getEntityName(), namePrefix, + hashText.toString()); } return String.format( - "%s_%s_%s", datasetPrefix, featureSet.getEntityName(), hashText); + "%s_%s_%s", datasetPrefix, featureSet.getEntityName(), hashText.toString()); } private String createBqTableDescription(FeatureSet featureSet, Timestamp startDate, Timestamp diff --git a/core/src/test/java/feast/core/training/BigQueryTraningDatasetCreatorTest.java b/core/src/test/java/feast/core/training/BigQueryTraningDatasetCreatorTest.java index 79f2b8650e..ccc65b0f21 100644 --- a/core/src/test/java/feast/core/training/BigQueryTraningDatasetCreatorTest.java +++ b/core/src/test/java/feast/core/training/BigQueryTraningDatasetCreatorTest.java @@ -23,7 +23,6 @@ import feast.core.DatasetServiceProto.FeatureSet; import feast.core.storage.BigQueryStorageManager; import feast.specs.StorageSpecProto.StorageSpec; -import java.time.Clock; import java.time.Instant; import java.util.Arrays; import org.junit.Before; @@ -48,8 +47,6 @@ public class BigQueryTraningDatasetCreatorTest { private BigQueryDatasetTemplater templater; @Mock private BigQuery bq; - @Mock - private Clock clock; @Before public void setUp() throws Exception { @@ -60,7 +57,7 @@ public void setUp() throws Exception { .putOptions("project", "project") .putOptions("dataset", "dataset") .build()); - creator = new BigQueryTraningDatasetCreator(templater, clock, projectId, datasetPrefix, bq); + creator = new BigQueryTraningDatasetCreator(templater, projectId, datasetPrefix, bq); when(templater.createQuery( any(FeatureSet.class), any(Timestamp.class), any(Timestamp.class), anyLong())) diff --git a/sdk/python/tests/data/austin_bikeshare.bikeshare_stations.avro b/sdk/python/tests/data/austin_bikeshare.bikeshare_stations.avro index 0a44e3a3657cc2b0efe60015a68bb90fd6403209..db7ab344179422b86bc897c78043f66bd708d790 100644 GIT binary patch literal 7059 zcma)BYj{-UwUz^|#)HHQMrs3YrjVHrlF7bb2uvoK05KO55(Ek;-I>{vAv3d+Ju?9U zc;P|Nh)8`j1`U@2q5@J5QV}1hdP=bh*0v~8Ku-=;K|G@ME+Cw>XMf+fhu9ze8OXQx zTJKuxUGG|{Ynt2dKl1c;%k(G1%|={5IbfkDwV>7TjPY1T*t8;^LQg`E+dqes?f8de zs~)!hrKzpRGLq?7Dp|y{oW{zmSj6$HzzapOwscZeBkcUR)(o%ij2f{>(lch}LSikM z(o<10F*|C10e>-?NO=|)(qGAZm;PrmWknNnvj5*UkxshLfBYLU_(m!nG2GvYnCYgN zk!>YrCg%JbzZA~6>lz{-yM6H$$I-{P?3?kX%6+*bXVzcq`n}Tc?dA<7o`$H&Otkbw z#9#^-UQIO*)WfN0yYawh`?uXx9SDxT;jU-bnbThxBLi91~j@{^oRX&G&G-5$eB@yi^l`G9y%$&!BEW>C4 zOXa$iG80T?G!{!@Q+M36ICZ8x5FGx?R|X{7h%+)LXxJHr7eutB#KUvR6qvXz0TEcq zXlemA@Z`@A-_TVV2-e)ZF+GL)q6(6T4G5YfpbI58Pf8nMBWXh+Qz$-NJ+j~2szC6_ z$;K;I(8o|nDuII6c+~6V$LsKQcn8nxjfi0-^cdJ`O!=`F-8DC^sDd!GU4Lr-eVl|L zshWfhaiSuj3*wM+OYbmkFI*?M*#JUCAXqk8cpdjrq)PV zCQ~_?DPl5yN#K{BV#+O3kId5vFh6{g?(HcH1Yi2L_4PJz$(Kp9B$FI*EUTdHqhy?v z##Xz$lT47QjwTYu0;a6pz`@l&^myX>P$0PB@yUg|Tc^KLkg+85I5e5#RP=^6tkQr| zvyzbD0!E_{AN;cS>^7*C8~<^k)(4NBYEyVl!OtaLl+iP#1r27>m}i<1cwLs5W5l8f z9e+Q%;>hH80HB`EDMMx^hzVI$NtSp~;LscD5Sn7air}}i`p+CW9o}9)=|AuI$^}=} z$md0k*U%wNY)lze2Oy9^(y3Dn3AS{<{~2>NpimUb|KubML>5_@-~d&R{-pUs(9l3j zc9KIzQn#2;v)*oWU^D0RrJpy0nIqayBfoVW5(EOLD6uNqUV1a_zAPlZpLD2$gJ|Zw zb87BrK*GtXEDIC~PDCqwW9sjWrRNwHlteT#$AFwpWB5b_>J=I>g&jFF|4`7dQH>{R z>fCq~H!-3rq8CcVNqVxGskCbz^4kH%8p^AfMl(%Qd^h{~iG{&H@S&dj#(hfjBXKI} zVF*q@AE_faff7m?ut_3H2xVRScP%?71cKHHG;|vUMUgpztthH0`a-^^U*@|^7Q}tZ z7y2O8Y%pc%WGb3q%HoC<4eN=bMxbgV8P^jy_^a2vdvql9>(-g$y!+?}C6&^SC_&?I zD$Tch3-YMPTbqnn48NX!=@dH~+&y2q{QEB`bc#p=mPJ)S%e-s|c#5giV=<J6jne`LsTZ=S(6D7iMXP*cGi{zJ2%sFhLZ?Q}- zt!+5lf7LNasCxWF_gb0;S=ER&UKItj*5_+*C=}*GkZJUTV{zTWHa7k2!OhRxMLztN zuVOSZSrmy4NtPwF!YlTHy?~)cv8AUMyoSaA!b9g(UHJ*bhUrJ8eWxlsTIMUP(A%R) zJER6ZYSYeTHG1{SZ@={<7&&zI^*@xm0Ku%sJtwH5gkCD;YxEW)*=$;NNI3VEW-?_E zY^Fa~^W9T{K(PN?`<7QzY(!BZUB?T8j5d$*Q7^zyC>@X6oQNYjylHT1ArNX^$pz_` z)EP-qs56?%qch$KH9!E+`UzkZ8)3Us7-xF*1!;EK6aUo=@m+X-bLn~4q$YAWK2cLR zw8R(5)Gw4F3;;6**@ppWT1jT2Zt0o+&YICsVH{6;^Y2&f|KuFd?@z?*kP_fD6?J;^940C^=S2xB(JRb*2S9EI9nUjlXjVK8XunIq zyQte&(5F}HO_Kmn_lu&(cH8;g8kjZ{)EBlON~A3mP=YIco_ak7ObVS9vFk3Hj$`L{ zwe6p=2`0)y)_Vw zpF02SKcP*Xgs2iJ6G4zrx8|ueBA^by9+73(oe{pfR_uXr9$kLljKj3cWl0rW#s84=ejh=C_her;7W?8!csnvSip0o zx9~^(Rs}l}@u8>w{(C}pw+&Q!zoaE4aV#lG&^$6)UowPun!eu9Em9ExU`q~UAn*La zv#-;ZP-!)bvVvX`MrD8}uZbSU0IvA`5-|)s*)sZ@$KP|cuq2YA;_Ml+w^VjX8&q}7 z(35%sCL2skgYd?wX9w;18PN9A+ZT16qNbqP3IA{kMAGFeY%n6u1cIV~P3Q6{^4*rl za$bhPc<%XIkcZ+S1Lcr5mjTsnB|f}kkPc^p^`ROZRL_29hXrj4TaqAvjS(3Q>=Ff( z#Az>k+Y}~~*IIkz2i=y(M#kS8__Y_n9p1kAFAJz6s^rdlPUX<%65r*FGp#wAijw3m zvWW)xrup=`-X^f&w5+NTL_{bSbi$ioAJaQ@Xt-Dk6avv%ITv?7G8C$F!c`B;E3>MT zYCc{8iEuzAwSDxhYdo+}vG{Iu6AedUCBk7MTnNyMf+yn!hO{{vHi^0}kZ*Z+8GQV)N>~}`$0d!5 zKVDD-w8UFZ1dT0k?3rCm1qk?L>s{>8J{SwHgGYMro^M;hBd6%0_vwt}6?9UpuQluS zaArfd&m#afU4W(lBuvj{bI9UFC96o9(4R+c2Px*TQ;Ih>?$m;qCac-<)&`I z%#XEX+8e+AytDW2(NMr0TSAFt6edZONYU9mg;sgRKJ5SlCl>9{n+zJ>o~Pbf^L9 zoGv2HK!?f;n}nV=H1)ik13r|`2W>!8FKhIoBCv{xjzH@Fg;H}w!W1|}xc8krdnEzN z^68sDolS$1MK|{>ESGzQA#}fTFDq~Ubamtdh-%GsPwjh^mJRe1!Bv!bbdNXR>484l z_ggrhTnCYKJaqepj;zPDrvX>g>GNTCpi5oL5Jz`>WMLG422LbTLuHh@)lcyq6MFuBN37z!%oy55V z*B%S@a_e~Y#*|l|*#u_nu->6CDN>e%0PI)EgF)dY4TtZXmxHRhqvrt;se9S~{(>^D z3YSc>xbVQZJ4X4Zm}VTV5)HQMaCqD~mpEpG&JVk$0xarpuI;y+nvn&TBwtc_5v})m z@L2(FXZGy{fW;B~;f3+5uYpNfH!N>%Ekxi9Hc4g4m=-h+oe>@0b#~*vO4;$mzB{i2 zLzxNbG6l;Ms)2Jak51$=S={doGw1!EFU%cuEmYj0T|bN-O-G?D(UA=kKt}g_hxI9A z9LKB~*F3Ngo(I{95C0F%1l+JG(?Uq-FJ5MvE$=N#mZZMk;+Tc|0LW$i;ST|BeTg!8 z50Gl;{oKo0-f?}0+Vj1A9v(1tP|;5J+bX@r!8$9UBe}yInx|cdUH>KscXuF>XU2SR z=Zo~Y!qK~~2sd+dzmIW<(Ux#{;yItM`}kKstOM8Kc)#^jmh1F(DeyqGM|1s@y&(D; zIHA@=n=IX0fZcfVqoGfqh6ST&{$Ho90y9|Gi=-fVMUv66+)Agx?9pu>tLXuV{#}8C fD6z?8_158Swkm#@n^)vUrcqTaC_t}$|9bTQd=Kk= literal 7653 zcmb7JYj_l8mX;4j#YIqui3|f+WPo&lgsyuP0dnaCVlE^k2n?WXrMo0eI$cS1caQ*z zJP3#pSs#f(!({+b0UZ_@k$FILhs9A)W(P$E(9Jpu46f)nUIAgx>8|>|O2VJ@k53*t zZ_anld*1V&(-k%Idi3gVG}%^m&l{zEKo#KTr}P8dI&`sG>a2n;_)tmxeIdl!$U zA5MaG`&cY)hr4_bQ4_K5h}ZdO1paN92`l~v`lg`gK)f>#F>;ZLPjLfVA5#%)y%zR{UksUuP#3PhzCVi zB^O1&Xm;ihpTNiB_^Jle49zzdaCw!1GK$oGh}&fsBNG0HNW+rv(&(#HIg@P9A?Xgx zJyA%99Sg@7P>g4Pb^Pb8h*36hOJXXEMiXT>8c~+9&FiH z*ZXcjNV|OG;@gNYA{B+~h9N0xCRsWT!Vqoz{i?9VO|p$>h$Ut5k@oDz5ai3Z|5)&n zCk$24+@wf4yyoQ`lIOs9O3meQg{4(ob=(Mr6ZJH-Ce=wrfpS%3xKcb${+^mJ?m$&5i%XIGG%isFjIT0C|J3WIo3S|k{dpfZ zu3B9Pzt8m<_x+Fth`^Ujj-tB1F7uk6Nv=W&Qj##;LO8Sj_ouTF;@GMBJD>4{pongT zBvA#{t;!&2auTD!njdDl4c}lr+zaX7_jH=6cw*4qEXblLWRk-g8Khi4hW_atw{6E5 zW`4Wx)GAL5njpC45j04{k_<9PyFSMEiH`Q$F^uNN@7>&t7)~m5Hx&}EDNx~&L@6+A zXg#1kYE=xXTN;i=%>`UObhF#lm5)Ciy#q_L`ROU6_u{isyi?qW6rP8yv}6#e3mh^q z*#{m1KEL3t%&`T4Hh6x?O`o&qWKDG+sHu>oHXj+$1+LPvST|JsI&1CTBw=<;m_DnK zy;73hxKti+-qXpT0?V+Wt0Q$>C(eAes^g(i`IxBff#`CUY*~_NEP@PAyCRVR1-8*_ z#h|I6ax^vkocoq6#&YG4@`n-(K~ZT_OFT-<>d|C!z-S1^jR;q6#%+r$p2B5uMFyx2 zQpt#O1-50N^ic4hzJo$ycz^i5;qAtH`a)T8`K_;y&+;$ zY>IjSI`wU2htaX``v#VORd7iv@dBVO&mjH)2pCsvL?VD!W;7W=1JIy>DcFy9|Gwfa zEW+06=@X84-Y=;X4LUHB{7EO-o`?cw%&@sYt3WBuQS&4+HgF~WO)%HAX4r5jU6?Jl+qH7X^CIrGs%-aA|=V3 zbO)HNltU;t?4ZTqHrKQt?2Y6rcwkTCA)FH_gP`b&t4*R`g??G%BjY;bLE@%`gK^7V z03$6NVI|mnvFDmo*n`^XGi@6@m>QK`L6e}qe-O#YB&IuDL#7=yA|?y3<@Kdf5Ie4J z)OSyRgdLX(PnIQ@fxMzX-d+<)dJ@VRCAHz86{YDPq1^T1axCTAVvtN0k*rgV6GRmv z>c}8M5+*ebDor;coP!#~ zfpM(Swmtaq)U61(qc{ybnx<`$U{IlcYP9(%H{&uC@hqJ|Ti~{dUuq;S0tTyD3 zk!41m$>k?v@EQoFD4MkGG@eU^8;0Fm3Q)73_>XUWHign9(QP12#5A?%5X}o!5NQgb zuQAOAt|$~@0=)BDtZfV$$p0MQIw6iD#tBaIyTPdf&Z_+)89?>XJ0IS?ab4(BE&i997W}Jxj|Dl%m5~+9IH-OB$7>E5 zI5}YkO&qU+H-BHO8{T6c%Fl^Us&8mv@6`QNFC;;S1iqt@0Tcq}01aYx&soD~UWb4d_ zhwhlgz*O*4uCD4j4A0d*l3r;<>R~M9CqiLREw5V!>6^I-t9R3@CA&06dKpUQz{nz! zzQsqzR^A^;%mo~p?ddpyyH1Y_4;;-i@tN)Q(Xt8#si>@vBn^c14EV;%h|z4o0vCyc zjIKVjILK8uCJY<7neypW#At{YcDJ2egs@GGzq+-<1FO;zDQSM7{$|+f*dB!hg&--+ zqhB7{c_)g_sGmRh;zkCe!=C6iIQS{hFCXO0HNy~Bbv9RK)EkUj=u%$6>}L(V>zjzD zf|BHNOHR(7Wj^BfZe-WZTK`w>>+=~tul3xUA9*SO!sAXdr~qW`DMU+ZjmMtl2aS+f z&xG)B+R{N!e3m-CvEcY0L|2clT*Tik;to zIC~0z;KJBK{2Cre(9xF9djj&Hy*u-y-fv+>guW=rJ~(O(q@Jubs(#ihFwR4*wSPF z&+fn{J#w?QY@}zWlwH|UU=IfY%!RX)A5sDfuB%=Y#DB*rI~$`Y2wdxRCwmFZIx1K) z?nE!=l7aM?7J(5>A%2{Ew3){^=6-N)-Y8UJXM=~*I1dqc5c93MWKc@r0!Dp9jp=d4 zGp?QU&%xn)Id{d6Uwd)^WOpDG*ikacZoo*{PguurVlj?4qq{(^40zz;_}Sipptu-< zB*IHJdghss3hV3uqT%&#THDX`CI_jqB_W|qZ-99 zqgY6=ZpSa6Bnlq>L2}@FD+K zXBJ``Ki>Yx_%GNK@mS=f3ARs2(`&h;gw}eDcYg)ksUT+)!6?*QQRa`I+jP2gAZBlG z%|m}3&)%*GOoBBYI&D9^9sOvm)`F1*SHFm9#EdBHJJ2j9Q{i(j_TBf(2>|%~%Zpmi zu{}V78-)`q4_iDeI=Mt(fO)QEF1BKElkH72UMl7-4P*x$Yhci za>IYjxDbJ7&mh_PfdE{YI@y=hbaBAmnfm4nTTvxCPJA@(0}q(&$&Uyx z`xG=h1?Ffk!qAD}pq)H9eEVKRE1a^a;!_WrLX{6B2Z-=~2ANO*U#3{WI<7nsGZXdD zL>~W9URFwzHkfPM@)j;(Wf&s^)%>*MVE{si7bNEdk@`NMTNgP&;tstL6(o88^3cpL zxRiji8i@SI{kkr74#F<2{l_HVdrj+`wVOo*e0PZoH%K5=2Ov00Ngf`j=$lqt?VUXh zJLb@$1wFp=ASk*!A|xHtzD6Pa;Y)y+>C7o7g1#kf;mep4xo!FX{F;fo28URO*0O*` zW)tioj#CHqL;DFeqhb8<)uMH`Vk#k1k)X;TCMz?@kS;ErV$7a->!S-XEqVS) zPyDwhEvoF!Z}8+$n*Yr}G45*hX|F(t&drWO<8f^jLa)z$?9q3<(CoV)1+C;Mpk`;g zmdEk#7X}f_HbSs4Id9JDU0Hnu`Ci>RbhYRDt15Igf$J|2;Z#3ToB~C=fd-9QKYCW? zlWT>894p;1v@v;kc!Hmw#llr}dU+64oE^JHlI*FLRS);XDm-GObkW%?958X!XRXsv zcC_E#)?)=@iz516w#k|Rvh!y+_e{r`V?zd0j*p4gF`ip^08y6rgxTv6FG+EW19-r> pGyMqXF;~vX-HGMBEA#r^hIKf)=ciGlJoB5K|G{D@nO(nq{69IW`K|x} diff --git a/sdk/python/tests/sdk/utils/test_bq_utils.py b/sdk/python/tests/sdk/utils/test_bq_utils.py index fe7ddc6cb3..3fde64aee5 100644 --- a/sdk/python/tests/sdk/utils/test_bq_utils.py +++ b/sdk/python/tests/sdk/utils/test_bq_utils.py @@ -38,7 +38,7 @@ def test_get_table_name(): ) assert ( get_table_name(feature_id, storage_spec) - == "my_project.my_dataset.myentity_none" + == "my_project.my_dataset.myentity" ) From 08aa082340c340bc6bfb8130d21b868118572393 Mon Sep 17 00:00:00 2001 From: David Heryanto Date: Tue, 11 Jun 2019 17:22:00 +0800 Subject: [PATCH 08/10] Fix typo --- .../feast/core/training/BigQueryTraningDatasetCreatorTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/java/feast/core/training/BigQueryTraningDatasetCreatorTest.java b/core/src/test/java/feast/core/training/BigQueryTraningDatasetCreatorTest.java index 522917f77d..79a61fcd3e 100644 --- a/core/src/test/java/feast/core/training/BigQueryTraningDatasetCreatorTest.java +++ b/core/src/test/java/feast/core/training/BigQueryTraningDatasetCreatorTest.java @@ -65,7 +65,7 @@ public void setUp() { } @Test - public void shouldCreateCorrqectDatasetIfPrefixNotSpecified() { + public void shouldCreateCorrectDatasetIfPrefixNotSpecified() { String entityName = "myentity"; FeatureSet featureSet = From 2bb8ddc1619079972ceb105ac198e8154242ad90 Mon Sep 17 00:00:00 2001 From: David Heryanto Date: Wed, 12 Jun 2019 08:32:41 +0800 Subject: [PATCH 09/10] Mount service account during unit test in prow so we can test with actual BigQuery --- .prow/config.yaml | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/.prow/config.yaml b/.prow/config.yaml index 6bed56001a..fb0015e091 100644 --- a/.prow/config.yaml +++ b/.prow/config.yaml @@ -54,16 +54,38 @@ presubmits: decorate: true always_run: true spec: + volumes: + - name: service-account + secret: + secretName: prow-service-account containers: - image: maven:3.6-jdk-8 + volumeMounts: + - name: service-account + mountPath: /etc/service-account + readOnly: true + env: + - name: GOOGLE_APPLICATION_CREDENTIALS + value: /etc/service-account/service-account.json command: [".prow/scripts/run_unit_test.sh", "--component", "core"] - name: unit-test-ingestion decorate: true always_run: true spec: + volumes: + - name: service-account + secret: + secretName: prow-service-account containers: - image: maven:3.6-jdk-8 + volumeMounts: + - name: service-account + mountPath: /etc/service-account + readOnly: true + env: + - name: GOOGLE_APPLICATION_CREDENTIALS + value: /etc/service-account/service-account.json command: [".prow/scripts/run_unit_test.sh", "--component", "ingestion"] - name: unit-test-serving From 46ceb5672d9fb9aa6922c2ec5ca05af188bf2a49 Mon Sep 17 00:00:00 2001 From: David Heryanto Date: Wed, 12 Jun 2019 08:36:39 +0800 Subject: [PATCH 10/10] Add TODO in BigQueryTrainingDatasetCreatorTest --- .../core/training/BigQueryTraningDatasetCreatorTest.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/core/src/test/java/feast/core/training/BigQueryTraningDatasetCreatorTest.java b/core/src/test/java/feast/core/training/BigQueryTraningDatasetCreatorTest.java index 79a61fcd3e..fff75eefae 100644 --- a/core/src/test/java/feast/core/training/BigQueryTraningDatasetCreatorTest.java +++ b/core/src/test/java/feast/core/training/BigQueryTraningDatasetCreatorTest.java @@ -37,6 +37,14 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; +// TODO: Should consider testing with "actual" BigQuery vs mocking it +// because the mocked BigQuery client is very basic and may miss important functionalities +// such as an actual table / dataset is actually created +// In the test method, should probably add a condition so that tests can be skipped if +// the user running the tests do not have permission to manage BigQuery (although ideally they should have) +// Example of adding the condition whether or not to accept the test result as valid: +// https://stackoverflow.com/questions/1689242/conditionally-ignoring-tests-in-junit-4 + public class BigQueryTraningDatasetCreatorTest { public static final String projectId = "the-project";