From 46bba2ec915e97fea2be3ce2f43586e166fbba02 Mon Sep 17 00:00:00 2001 From: Nathan Hammond Date: Thu, 6 Apr 2023 19:13:19 +0000 Subject: [PATCH] Experimental Feature: Strict Environments (#4449) To provide a way to reduce information leakage into the task execution environment, add a strict mode for environment variable handling. This mode prevents passing non-enumerated variables to the subprocess of the task being executed. Co-authored-by: Mehul Kar --- cli/integration_tests/bad_flag.t | 2 +- .../strict_env_vars/fixture-configs/all.json | 12 + .../fixture-configs/global_pt-empty.json | 9 + .../fixture-configs/global_pt.json | 9 + .../fixture-configs/task_pt-empty.json | 9 + .../fixture-configs/task_pt.json | 9 + .../strict_env_vars/get-global-hash.sh | 12 + .../strict_env_vars/global_hash_infer.t | 22 ++ .../strict_env_vars/global_hash_loose.t | 25 ++ .../strict_env_vars/global_hash_no-value.t | 22 ++ .../strict_env_vars/global_hash_strict.t | 24 ++ .../strict_env_vars/monorepo/.gitignore | 4 + .../monorepo/apps/my-app/build.sh | 22 ++ .../monorepo/apps/my-app/package.json | 6 + .../strict_env_vars/monorepo/package.json | 6 + .../strict_env_vars/monorepo/turbo.json | 8 + .../strict_env_vars/setup.sh | 8 + .../strict_env_vars/usage_infer.t | 42 +++ .../strict_env_vars/usage_loose.t | 24 ++ .../strict_env_vars/usage_strict.t | 24 ++ cli/internal/core/engine.go | 4 +- cli/internal/env/env.go | 16 +- cli/internal/fs/testdata/correct/turbo.json | 2 + cli/internal/fs/turbo_json.go | 320 +++++++++++++----- cli/internal/fs/turbo_json_test.go | 47 ++- cli/internal/run/global_hash.go | 34 +- cli/internal/run/real_run.go | 49 ++- cli/internal/run/run.go | 8 +- cli/internal/turbostate/turbostate.go | 19 +- cli/internal/util/run_opts.go | 13 + crates/turborepo-lib/src/cli.rs | 88 ++++- 31 files changed, 770 insertions(+), 129 deletions(-) create mode 100644 cli/integration_tests/strict_env_vars/fixture-configs/all.json create mode 100644 cli/integration_tests/strict_env_vars/fixture-configs/global_pt-empty.json create mode 100644 cli/integration_tests/strict_env_vars/fixture-configs/global_pt.json create mode 100644 cli/integration_tests/strict_env_vars/fixture-configs/task_pt-empty.json create mode 100644 cli/integration_tests/strict_env_vars/fixture-configs/task_pt.json create mode 100755 cli/integration_tests/strict_env_vars/get-global-hash.sh create mode 100644 cli/integration_tests/strict_env_vars/global_hash_infer.t create mode 100644 cli/integration_tests/strict_env_vars/global_hash_loose.t create mode 100644 cli/integration_tests/strict_env_vars/global_hash_no-value.t create mode 100644 cli/integration_tests/strict_env_vars/global_hash_strict.t create mode 100644 cli/integration_tests/strict_env_vars/monorepo/.gitignore create mode 100755 cli/integration_tests/strict_env_vars/monorepo/apps/my-app/build.sh create mode 100644 cli/integration_tests/strict_env_vars/monorepo/apps/my-app/package.json create mode 100644 cli/integration_tests/strict_env_vars/monorepo/package.json create mode 100644 cli/integration_tests/strict_env_vars/monorepo/turbo.json create mode 100755 cli/integration_tests/strict_env_vars/setup.sh create mode 100644 cli/integration_tests/strict_env_vars/usage_infer.t create mode 100644 cli/integration_tests/strict_env_vars/usage_loose.t create mode 100644 cli/integration_tests/strict_env_vars/usage_strict.t diff --git a/cli/integration_tests/bad_flag.t b/cli/integration_tests/bad_flag.t index 99a89104a06d3..e98d526577baf 100644 --- a/cli/integration_tests/bad_flag.t +++ b/cli/integration_tests/bad_flag.t @@ -19,7 +19,7 @@ Bad flag with an implied run command should display run flags note: to pass '--bad-flag' as a value, use '-- --bad-flag' - Usage: turbo <--cache-dir |--cache-workers |--concurrency |--continue|--dry-run []|--single-package|--filter |--force|--global-deps |--graph []|--ignore |--include-dependencies|--no-cache|--no-daemon|--no-deps|--output-logs |--only|--parallel|--pkg-inference-root |--profile |--remote-only|--scope |--since |--summarize []|--log-prefix |TASKS|PASS_THROUGH_ARGS|--experimental-space-id > + Usage: turbo <--cache-dir |--cache-workers |--concurrency |--continue|--dry-run []|--single-package|--filter |--force|--global-deps |--graph []|--experimental-env-mode []|--ignore |--include-dependencies|--no-cache|--no-daemon|--no-deps|--output-logs |--only|--parallel|--pkg-inference-root |--profile |--remote-only|--scope |--since |--summarize []|--log-prefix |TASKS|PASS_THROUGH_ARGS|--experimental-space-id > For more information, try '--help'. diff --git a/cli/integration_tests/strict_env_vars/fixture-configs/all.json b/cli/integration_tests/strict_env_vars/fixture-configs/all.json new file mode 100644 index 0000000000000..46662a5aa674f --- /dev/null +++ b/cli/integration_tests/strict_env_vars/fixture-configs/all.json @@ -0,0 +1,12 @@ +{ + "$schema": "https://turbo.build/schema.json", + "pipeline": { + "build": { + "outputs": ["dist/**"], + "experimentalPassThroughEnv": ["LOCAL_VAR_PT"], + "env": ["LOCAL_VAR_DEP"] + } + }, + "experimentalGlobalPassThroughEnv": ["GLOBAL_VAR_PT"], + "globalEnv": ["GLOBAL_VAR_DEP"] +} diff --git a/cli/integration_tests/strict_env_vars/fixture-configs/global_pt-empty.json b/cli/integration_tests/strict_env_vars/fixture-configs/global_pt-empty.json new file mode 100644 index 0000000000000..bde0c0e16bb4f --- /dev/null +++ b/cli/integration_tests/strict_env_vars/fixture-configs/global_pt-empty.json @@ -0,0 +1,9 @@ +{ + "$schema": "https://turbo.build/schema.json", + "pipeline": { + "build": { + "outputs": ["dist/**"] + } + }, + "experimentalGlobalPassThroughEnv": [] +} diff --git a/cli/integration_tests/strict_env_vars/fixture-configs/global_pt.json b/cli/integration_tests/strict_env_vars/fixture-configs/global_pt.json new file mode 100644 index 0000000000000..81903d1cefc9a --- /dev/null +++ b/cli/integration_tests/strict_env_vars/fixture-configs/global_pt.json @@ -0,0 +1,9 @@ +{ + "$schema": "https://turbo.build/schema.json", + "pipeline": { + "build": { + "outputs": ["dist/**"] + } + }, + "experimentalGlobalPassThroughEnv": ["GLOBAL_VAR_PT"] +} diff --git a/cli/integration_tests/strict_env_vars/fixture-configs/task_pt-empty.json b/cli/integration_tests/strict_env_vars/fixture-configs/task_pt-empty.json new file mode 100644 index 0000000000000..3583802b72510 --- /dev/null +++ b/cli/integration_tests/strict_env_vars/fixture-configs/task_pt-empty.json @@ -0,0 +1,9 @@ +{ + "$schema": "https://turbo.build/schema.json", + "pipeline": { + "build": { + "outputs": ["dist/**"], + "experimentalPassthroughEnv": [] + } + } +} diff --git a/cli/integration_tests/strict_env_vars/fixture-configs/task_pt.json b/cli/integration_tests/strict_env_vars/fixture-configs/task_pt.json new file mode 100644 index 0000000000000..30b06f0dcf34f --- /dev/null +++ b/cli/integration_tests/strict_env_vars/fixture-configs/task_pt.json @@ -0,0 +1,9 @@ +{ + "$schema": "https://turbo.build/schema.json", + "pipeline": { + "build": { + "outputs": ["dist/**"], + "experimentalPassthroughEnv": ["LOCAL_VAR_PT"] + } + } +} diff --git a/cli/integration_tests/strict_env_vars/get-global-hash.sh b/cli/integration_tests/strict_env_vars/get-global-hash.sh new file mode 100755 index 0000000000000..d86f761c043c1 --- /dev/null +++ b/cli/integration_tests/strict_env_vars/get-global-hash.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +# This script greps stdin (i.e. what's piped to it) +# splits it by "=" and prints the second value. +# it's intendted to get the global hash from a debug log that looks like this: +# 2023-04-06T04:28:19.599Z [DEBUG] turbo: global hash: value=a027dadc4dea675e +# +# Usage: +# turbo build -vv 2>&1 | "$TESTDIR/./get-global-hash.sh" +# +# +grep "global hash:" - | awk '{split($0,a,"="); print a[2]}' diff --git a/cli/integration_tests/strict_env_vars/global_hash_infer.t b/cli/integration_tests/strict_env_vars/global_hash_infer.t new file mode 100644 index 0000000000000..c8962fce66c3f --- /dev/null +++ b/cli/integration_tests/strict_env_vars/global_hash_infer.t @@ -0,0 +1,22 @@ +Setup + $ . ${TESTDIR}/../setup.sh + $ . ${TESTDIR}/setup.sh $(pwd) monorepo + +With --experimental-env-mode=infer + +Baseline global hash + $ BASELINE=$(${TURBO} build -vv 2>&1 | "$TESTDIR/./get-global-hash.sh") + +There's no config to start, so the global hash does not change when flag is passed + $ WITH_FLAG=$(${TURBO} build -vv --experimental-env-mode=infer 2>&1 | "$TESTDIR/./get-global-hash.sh") + $ test $BASELINE = $WITH_FLAG + +Add empty config for global pass through env var, global hash changes + $ cp "$TESTDIR/fixture-configs/global_pt-empty.json" "$(pwd)/turbo.json" && git commit -am "no comment" --quiet + $ WITH_EMPTY_GLOBAL=$(${TURBO} build -vv --experimental-env-mode=infer 2>&1 | "$TESTDIR/./get-global-hash.sh") + $ test $BASELINE != $WITH_EMPTY_GLOBAL + +Add global pass through env var, global hash changes again, because we changed the value + $ cp "$TESTDIR/fixture-configs/global_pt.json" "$(pwd)/turbo.json" && git commit -am "no comment" --quiet + $ WITH_GLOBAL=$(${TURBO} build -vv --experimental-env-mode=infer 2>&1 | "$TESTDIR/./get-global-hash.sh") + $ test $WITH_EMPTY_GLOBAL != $WITH_GLOBAL diff --git a/cli/integration_tests/strict_env_vars/global_hash_loose.t b/cli/integration_tests/strict_env_vars/global_hash_loose.t new file mode 100644 index 0000000000000..0f8b3811b9e6a --- /dev/null +++ b/cli/integration_tests/strict_env_vars/global_hash_loose.t @@ -0,0 +1,25 @@ +Setup + $ . ${TESTDIR}/../setup.sh + $ . ${TESTDIR}/setup.sh $(pwd) monorepo + +With --experimental-env-mode=loose + +Baseline global hash + $ BASELINE=$(${TURBO} build -vv 2>&1 | "$TESTDIR/./get-global-hash.sh") + +Hash changes, because we're using a new mode + $ WITH_FLAG=$(${TURBO} build -vv --experimental-env-mode=loose 2>&1 | "$TESTDIR/./get-global-hash.sh") + $ test $BASELINE != $WITH_FLAG + +Add empty config for global pass through env var +Hash does not change, because in loose mode, we don't care what the actual config contains + $ cp "$TESTDIR/fixture-configs/global_pt-empty.json" "$(pwd)/turbo.json" && git commit -am "no comment" --quiet + $ WITH_EMPTY_GLOBAL=$(${TURBO} build -vv --experimental-env-mode=loose 2>&1 | "$TESTDIR/./get-global-hash.sh") + $ test $WITH_FLAG = $WITH_EMPTY_GLOBAL + +Add global pass through env var +Hash does not change, because in loose mode, we don't care what the actual config contains + $ cp "$TESTDIR/fixture-configs/global_pt.json" "$(pwd)/turbo.json" && git commit -am "no comment" --quiet + $ WITH_GLOBAL=$(${TURBO} build -vv --experimental-env-mode=loose 2>&1 | "$TESTDIR/./get-global-hash.sh") + $ test $WITH_FLAG = $WITH_GLOBAL + $ test $WITH_EMPTY_GLOBAL = $WITH_GLOBAL diff --git a/cli/integration_tests/strict_env_vars/global_hash_no-value.t b/cli/integration_tests/strict_env_vars/global_hash_no-value.t new file mode 100644 index 0000000000000..2fa65754017c0 --- /dev/null +++ b/cli/integration_tests/strict_env_vars/global_hash_no-value.t @@ -0,0 +1,22 @@ +Setup + $ . ${TESTDIR}/../setup.sh + $ . ${TESTDIR}/setup.sh $(pwd) monorepo + +With --experimental-env-mode (should be the same as --experimental-env-mode=infer) + +Baseline global hash + $ BASELINE=$(${TURBO} build -vv 2>&1 | "$TESTDIR/./get-global-hash.sh") + +There's no config to start, so the global hash does not change when flag is passed + $ WITH_FLAG=$(${TURBO} build -vv --experimental-env-mode 2>&1 | "$TESTDIR/./get-global-hash.sh") + $ test $BASELINE = $WITH_FLAG + +Add empty config for global pass through env var, global hash changes + $ cp "$TESTDIR/fixture-configs/global_pt-empty.json" "$(pwd)/turbo.json" && git commit -am "no comment" --quiet + $ WITH_EMPTY_GLOBAL=$(${TURBO} build -vv --experimental-env-mode 2>&1 | "$TESTDIR/./get-global-hash.sh") + $ test $BASELINE != $WITH_EMPTY_GLOBAL + +Add global pass through env var, global hash changes again, because we changed the value + $ cp "$TESTDIR/fixture-configs/global_pt.json" "$(pwd)/turbo.json" && git commit -am "no comment" --quiet + $ WITH_GLOBAL=$(${TURBO} build -vv --experimental-env-mode 2>&1 | "$TESTDIR/./get-global-hash.sh") + $ test $WITH_EMPTY_GLOBAL != $WITH_GLOBAL diff --git a/cli/integration_tests/strict_env_vars/global_hash_strict.t b/cli/integration_tests/strict_env_vars/global_hash_strict.t new file mode 100644 index 0000000000000..0096c79b46c8b --- /dev/null +++ b/cli/integration_tests/strict_env_vars/global_hash_strict.t @@ -0,0 +1,24 @@ +Setup + $ . ${TESTDIR}/../setup.sh + $ . ${TESTDIR}/setup.sh $(pwd) monorepo + +With strict mode + +Get Baseline global hash + $ BASELINE=$(${TURBO} build -vv 2>&1 | "$TESTDIR/./get-global-hash.sh") + +Hash changes, because we're using a new mode + $ WITH_FLAG=$(${TURBO} build -vv --experimental-env-mode=strict 2>&1 | "$TESTDIR/./get-global-hash.sh") + $ test $BASELINE != $WITH_FLAG + +Add empty config for global pass through env var +Hash does not change, because the mode is the same and we haven't added any new pass through vars + $ cp "$TESTDIR/fixture-configs/global_pt-empty.json" "$(pwd)/turbo.json" && git commit -am "no comment" --quiet + $ WITH_EMPTY_GLOBAL=$(${TURBO} build -vv --experimental-env-mode=strict 2>&1 | "$TESTDIR/./get-global-hash.sh") + $ test $WITH_FLAG = $WITH_EMPTY_GLOBAL + +Add global pass through env var +Hash changes, because we have a new pass through value + $ cp "$TESTDIR/fixture-configs/global_pt.json" "$(pwd)/turbo.json" && git commit -am "no comment" --quiet + $ WITH_GLOBAL=$(${TURBO} build -vv --experimental-env-mode=strict 2>&1 | "$TESTDIR/./get-global-hash.sh") + $ test $WITH_EMPTY_GLOBAL != $WITH_GLOBAL diff --git a/cli/integration_tests/strict_env_vars/monorepo/.gitignore b/cli/integration_tests/strict_env_vars/monorepo/.gitignore new file mode 100644 index 0000000000000..85f063d9b546d --- /dev/null +++ b/cli/integration_tests/strict_env_vars/monorepo/.gitignore @@ -0,0 +1,4 @@ +node_modules/ +.turbo +.npmrc +out.txt diff --git a/cli/integration_tests/strict_env_vars/monorepo/apps/my-app/build.sh b/cli/integration_tests/strict_env_vars/monorepo/apps/my-app/build.sh new file mode 100755 index 0000000000000..a13fc19118a83 --- /dev/null +++ b/cli/integration_tests/strict_env_vars/monorepo/apps/my-app/build.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +pathset="no" +sysrootset="no" + +if [ ! -z "$PATH" ]; then + pathset="yes" +fi + +if [ ! -z "$SYSTEMROOT" ]; then + sysrootset="yes" +fi + +{ + echo -n "globalpt: '$GLOBAL_VAR_PT', " + echo -n "localpt: '$LOCAL_VAR_PT', " + echo -n "globaldep: '$GLOBAL_VAR_DEP', " + echo -n "localdep: '$LOCAL_VAR_DEP', " + echo -n "other: '$OTHER_VAR', " + echo -n "sysroot set: '$sysrootset', " + echo "path set: '$pathset'" +} > out.txt diff --git a/cli/integration_tests/strict_env_vars/monorepo/apps/my-app/package.json b/cli/integration_tests/strict_env_vars/monorepo/apps/my-app/package.json new file mode 100644 index 0000000000000..5fc8e10cc32f3 --- /dev/null +++ b/cli/integration_tests/strict_env_vars/monorepo/apps/my-app/package.json @@ -0,0 +1,6 @@ +{ + "name": "my-app", + "scripts": { + "build": "./build.sh" + } +} diff --git a/cli/integration_tests/strict_env_vars/monorepo/package.json b/cli/integration_tests/strict_env_vars/monorepo/package.json new file mode 100644 index 0000000000000..9557291c8197e --- /dev/null +++ b/cli/integration_tests/strict_env_vars/monorepo/package.json @@ -0,0 +1,6 @@ +{ + "name": "monorepo", + "workspaces": [ + "apps/**" + ] +} diff --git a/cli/integration_tests/strict_env_vars/monorepo/turbo.json b/cli/integration_tests/strict_env_vars/monorepo/turbo.json new file mode 100644 index 0000000000000..fb6a143e3f51d --- /dev/null +++ b/cli/integration_tests/strict_env_vars/monorepo/turbo.json @@ -0,0 +1,8 @@ +{ + "$schema": "https://turbo.build/schema.json", + "pipeline": { + "build": { + "outputs": ["dist/**"] + } + } +} diff --git a/cli/integration_tests/strict_env_vars/setup.sh b/cli/integration_tests/strict_env_vars/setup.sh new file mode 100755 index 0000000000000..cfdfd598878bf --- /dev/null +++ b/cli/integration_tests/strict_env_vars/setup.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +SCRIPT_DIR=$(dirname ${BASH_SOURCE[0]}) +TARGET_DIR=$1 +FIXTURE=$2 + +cp -a ${SCRIPT_DIR}/$2/. ${TARGET_DIR}/ +${SCRIPT_DIR}/../setup_git.sh ${TARGET_DIR} diff --git a/cli/integration_tests/strict_env_vars/usage_infer.t b/cli/integration_tests/strict_env_vars/usage_infer.t new file mode 100644 index 0000000000000..2891a87dc913f --- /dev/null +++ b/cli/integration_tests/strict_env_vars/usage_infer.t @@ -0,0 +1,42 @@ +Setup + $ . ${TESTDIR}/../setup.sh + $ . ${TESTDIR}/setup.sh $(pwd) monorepo + +With --experimental-env-mode=infer + +Set the env vars + $ export GLOBAL_VAR_PT=higlobalpt + $ export GLOBAL_VAR_DEP=higlobaldep + $ export LOCAL_VAR_PT=hilocalpt + $ export LOCAL_VAR_DEP=hilocaldep + $ export OTHER_VAR=hiother + +Conditionally set these vars if they aren't already there for the purpose of the test. +The test doesn't care about the values, it just checks that the var is available to the task +so we just have to make sure the parent process has them set. In Github CI, for example SHELL +isn't already set. + $ export SYSTEMROOT="${SYSTEMROOT:=hisysroot}" + $ export PATH="${PATH:=hipath}" + +Inferred mode as loose because no pass through configs, all vars are available + $ ${TURBO} build -vv --experimental-env-mode=infer > /dev/null 2>&1 + $ cat apps/my-app/out.txt + globalpt: 'higlobalpt', localpt: 'hilocalpt', globaldep: 'higlobaldep', localdep: 'hilocaldep', other: 'hiother', sysroot set: 'yes', path set: 'yes' + +Inferred mode as strict, because global pass through config, no vars available + $ cp "$TESTDIR/fixture-configs/global_pt-empty.json" "$(pwd)/turbo.json" && git commit -am "no comment" --quiet + $ ${TURBO} build -vv --experimental-env-mode=infer > /dev/null 2>&1 + $ cat apps/my-app/out.txt + globalpt: '', localpt: '', globaldep: '', localdep: '', other: '', sysroot set: 'yes', path set: 'yes' + +Inferred mode as strict, because task pass through config, no vars available + $ cp "$TESTDIR/fixture-configs/task_pt-empty.json" "$(pwd)/turbo.json" && git commit -am "no comment" --quiet + $ ${TURBO} build -vv --experimental-env-mode=infer > /dev/null 2>&1 + $ cat apps/my-app/out.txt + globalpt: '', localpt: '', globaldep: '', localdep: '', other: '', sysroot set: 'yes', path set: 'yes' + +Inferred mode as strict, with declared deps and pass through. all declared available, other is not available + $ cp "$TESTDIR/fixture-configs/all.json" "$(pwd)/turbo.json" && git commit -am "no comment" --quiet + $ ${TURBO} build -vv --experimental-env-mode=infer > /dev/null 2>&1 + $ cat apps/my-app/out.txt + globalpt: 'higlobalpt', localpt: 'hilocalpt', globaldep: 'higlobaldep', localdep: 'hilocaldep', other: '', sysroot set: 'yes', path set: 'yes' diff --git a/cli/integration_tests/strict_env_vars/usage_loose.t b/cli/integration_tests/strict_env_vars/usage_loose.t new file mode 100644 index 0000000000000..75687ed441cae --- /dev/null +++ b/cli/integration_tests/strict_env_vars/usage_loose.t @@ -0,0 +1,24 @@ +Setup + $ . ${TESTDIR}/../setup.sh + $ . ${TESTDIR}/setup.sh $(pwd) monorepo + +With --experimental-env-mode=loose, all vars are available + +Set the env vars + $ export GLOBAL_VAR_PT=higlobalpt + $ export GLOBAL_VAR_DEP=higlobaldep + $ export LOCAL_VAR_PT=hilocalpt + $ export LOCAL_VAR_DEP=hilocaldep + $ export OTHER_VAR=hiother + $ export SYSTEMROOT=hisysroot + +All vars available in loose mode + $ ${TURBO} build -vv --experimental-env-mode=loose > /dev/null 2>&1 + $ cat apps/my-app/out.txt + globalpt: 'higlobalpt', localpt: 'hilocalpt', globaldep: 'higlobaldep', localdep: 'hilocaldep', other: 'hiother', sysroot set: 'yes', path set: 'yes' + +All vars available in loose mode, even when global and pass through configs defined + $ cp "$TESTDIR/fixture-configs/all.json" "$(pwd)/turbo.json" && git commit -am "no comment" --quiet + $ ${TURBO} build -vv --experimental-env-mode=loose > /dev/null 2>&1 + $ cat apps/my-app/out.txt + globalpt: 'higlobalpt', localpt: 'hilocalpt', globaldep: 'higlobaldep', localdep: 'hilocaldep', other: 'hiother', sysroot set: 'yes', path set: 'yes' diff --git a/cli/integration_tests/strict_env_vars/usage_strict.t b/cli/integration_tests/strict_env_vars/usage_strict.t new file mode 100644 index 0000000000000..daa7c1b6684f7 --- /dev/null +++ b/cli/integration_tests/strict_env_vars/usage_strict.t @@ -0,0 +1,24 @@ +Setup + $ . ${TESTDIR}/../setup.sh + $ . ${TESTDIR}/setup.sh $(pwd) monorepo + +With --experimental-env-mode=strict, only declared vars are available + +Set the env vars + $ export GLOBAL_VAR_PT=higlobalpt + $ export GLOBAL_VAR_DEP=higlobaldep + $ export LOCAL_VAR_PT=hilocalpt + $ export LOCAL_VAR_DEP=hilocaldep + $ export OTHER_VAR=hiother + $ export SYSTEMROOT=hisysroot + +No vars available by default + $ ${TURBO} build -vv --experimental-env-mode=strict > /dev/null 2>&1 + $ cat apps/my-app/out.txt + globalpt: '', localpt: '', globaldep: '', localdep: '', other: '', sysroot set: 'yes', path set: 'yes' + +All declared vars available, others are not available + $ cp "$TESTDIR/fixture-configs/all.json" "$(pwd)/turbo.json" && git commit -am "no comment" --quiet + $ ${TURBO} build -vv --experimental-env-mode=strict > /dev/null 2>&1 + $ cat apps/my-app/out.txt + globalpt: 'higlobalpt', localpt: 'hilocalpt', globaldep: 'higlobaldep', localdep: 'hilocaldep', other: '', sysroot set: 'yes', path set: 'yes' diff --git a/cli/internal/core/engine.go b/cli/internal/core/engine.go index efa5731609628..7f08ea8ed7a35 100644 --- a/cli/internal/core/engine.go +++ b/cli/internal/core/engine.go @@ -140,14 +140,14 @@ func (e *Engine) getTaskDefinition(pkg string, taskName string, taskID string) ( if task, ok := pipeline[taskID]; ok { return &Task{ Name: taskName, - TaskDefinition: task.TaskDefinition, + TaskDefinition: task.GetTaskDefinition(), }, nil } if task, ok := pipeline[taskName]; ok { return &Task{ Name: taskName, - TaskDefinition: task.TaskDefinition, + TaskDefinition: task.GetTaskDefinition(), }, nil } diff --git a/cli/internal/env/env.go b/cli/internal/env/env.go index 1d16cc60fe6c8..31ca69775d65d 100644 --- a/cli/internal/env/env.go +++ b/cli/internal/env/env.go @@ -35,6 +35,11 @@ func (evm EnvironmentVariableMap) Merge(another EnvironmentVariableMap) { } } +// Add creates one new environment variable. +func (evm EnvironmentVariableMap) Add(key string, value string) { + evm[key] = value +} + // Names returns a sorted list of env var names for the EnvironmentVariableMap func (evm EnvironmentVariableMap) Names() []string { names := []string{} @@ -85,7 +90,8 @@ func (evm EnvironmentVariableMap) ToHashable() EnvironmentVariablePairs { }) } -func getEnvMap() EnvironmentVariableMap { +// GetEnvMap returns a map of env vars and their values from os.Environ +func GetEnvMap() EnvironmentVariableMap { envMap := make(map[string]string) for _, envVar := range os.Environ() { if i := strings.Index(envVar, "="); i >= 0 { @@ -96,8 +102,8 @@ func getEnvMap() EnvironmentVariableMap { return envMap } -// fromKeys returns a map of env vars and their values from a given set of env var names -func fromKeys(all EnvironmentVariableMap, keys []string) EnvironmentVariableMap { +// FromKeys returns a map of env vars and their values from a given set of env var names +func FromKeys(all EnvironmentVariableMap, keys []string) EnvironmentVariableMap { output := EnvironmentVariableMap{} for _, key := range keys { output[key] = all[key] @@ -138,14 +144,14 @@ func fromMatching(all EnvironmentVariableMap, keyMatchers []string, shouldExclud // GetHashableEnvVars returns all sorted key=value env var pairs for both frameworks and from envKeys func GetHashableEnvVars(keys []string, matchers []string, envVarContainingExcludePrefix string) (DetailedMap, error) { - all := getEnvMap() + all := GetEnvMap() detailedMap := DetailedMap{ All: EnvironmentVariableMap{}, BySource: BySource{}, } - detailedMap.BySource.Explicit = fromKeys(all, keys) + detailedMap.BySource.Explicit = FromKeys(all, keys) detailedMap.All.Merge(detailedMap.BySource.Explicit) // Create an excluder function to pass to matcher. diff --git a/cli/internal/fs/testdata/correct/turbo.json b/cli/internal/fs/testdata/correct/turbo.json index ab3714a0e89b2..e22cde2a649e1 100644 --- a/cli/internal/fs/testdata/correct/turbo.json +++ b/cli/internal/fs/testdata/correct/turbo.json @@ -2,6 +2,7 @@ { "pipeline": { "build": { + "experimentalPassthroughEnv": ["GITHUB_TOKEN"], // mocked test comment "dependsOn": [ // mocked test comment @@ -40,6 +41,7 @@ }, "globalDependencies": ["some-file", "../another-dir/**", "$GLOBAL_ENV_VAR"], "globlaEnv": ["SOME_VAR", "ANOTHER_VAR"], + "experimentalGlobalPassThroughEnv": ["AWS_SECRET_KEY"], "remoteCache": { "teamId": "team_id", "signature": true diff --git a/cli/internal/fs/turbo_json.go b/cli/internal/fs/turbo_json.go index 3fd96b2242521..71ef29d391da8 100644 --- a/cli/internal/fs/turbo_json.go +++ b/cli/internal/fs/turbo_json.go @@ -27,6 +27,10 @@ type rawTurboJSON struct { GlobalDependencies []string `json:"globalDependencies,omitempty"` // Global env GlobalEnv []string `json:"globalEnv,omitempty"` + + // Global passthrough env + GlobalPassthroughEnv []string `json:"experimentalGlobalPassThroughEnv,omitempty"` + // Pipeline is a map of Turbo pipeline entries which define the task graph // and cache behavior on a per task or per package-task basis. Pipeline Pipeline `json:"pipeline"` @@ -41,19 +45,21 @@ type rawTurboJSON struct { // Notably, it includes a PristinePipeline instead of the regular Pipeline. (i.e. TaskDefinition // instead of BookkeepingTaskDefinition.) type pristineTurboJSON struct { - GlobalDependencies []string `json:"globalDependencies,omitempty"` - GlobalEnv []string `json:"globalEnv,omitempty"` - Pipeline PristinePipeline `json:"pipeline"` - RemoteCacheOptions RemoteCacheOptions `json:"remoteCache,omitempty"` - Extends []string `json:"extends,omitempty"` + GlobalDependencies []string `json:"globalDependencies,omitempty"` + GlobalEnv []string `json:"globalEnv,omitempty"` + GlobalPassthroughEnv []string `json:"experimentalGlobalPassThroughEnv,omitempty"` + Pipeline PristinePipeline `json:"pipeline"` + RemoteCacheOptions RemoteCacheOptions `json:"remoteCache,omitempty"` + Extends []string `json:"extends,omitempty"` } // TurboJSON represents a turbo.json configuration file type TurboJSON struct { - GlobalDeps []string - GlobalEnv []string - Pipeline Pipeline - RemoteCacheOptions RemoteCacheOptions + GlobalDeps []string + GlobalEnv []string + GlobalPassthroughEnv []string + Pipeline Pipeline + RemoteCacheOptions RemoteCacheOptions // A list of Workspace names Extends []string @@ -69,29 +75,54 @@ type RemoteCacheOptions struct { // We use this for printing ResolvedTaskConfiguration, because we _want_ to show // the user the default values for key they have not configured. type rawTaskWithDefaults struct { - Outputs []string `json:"outputs"` - Cache *bool `json:"cache"` - DependsOn []string `json:"dependsOn"` - Inputs []string `json:"inputs"` - OutputMode util.TaskOutputMode `json:"outputMode"` - Env []string `json:"env"` - Persistent bool `json:"persistent"` + Outputs []string `json:"outputs"` + Cache *bool `json:"cache"` + DependsOn []string `json:"dependsOn"` + Inputs []string `json:"inputs"` + OutputMode util.TaskOutputMode `json:"outputMode"` + PassthroughEnv []string `json:"experimentalPassThroughEnv,omitempty"` + Env []string `json:"env"` + Persistent bool `json:"persistent"` } // rawTask exists to Unmarshal from json. When fields are omitted, we _want_ // them to be missing, so that we can distinguish missing from empty value. type rawTask struct { - Outputs []string `json:"outputs,omitempty"` - Cache *bool `json:"cache,omitempty"` - DependsOn []string `json:"dependsOn,omitempty"` - Inputs []string `json:"inputs,omitempty"` - OutputMode *util.TaskOutputMode `json:"outputMode,omitempty"` - Env []string `json:"env,omitempty"` - Persistent *bool `json:"persistent,omitempty"` + Outputs []string `json:"outputs,omitempty"` + Cache *bool `json:"cache,omitempty"` + DependsOn []string `json:"dependsOn,omitempty"` + Inputs []string `json:"inputs,omitempty"` + OutputMode *util.TaskOutputMode `json:"outputMode,omitempty"` + Env []string `json:"env,omitempty"` + PassthroughEnv []string `json:"experimentalPassthroughEnv,omitempty"` + Persistent *bool `json:"persistent,omitempty"` } -// PristinePipeline contains original TaskDefinitions without the bookkeeping -type PristinePipeline map[string]TaskDefinition +// taskDefinitionHashable exists as a definition for PristinePipeline, which is used down +// stream for calculating the global hash. We want to exclude experimental fields here +// because we don't want experimental fields to be part of the global hash. +type taskDefinitionHashable struct { + Outputs TaskOutputs + ShouldCache bool + EnvVarDependencies []string + TopologicalDependencies []string + TaskDependencies []string + Inputs []string + OutputMode util.TaskOutputMode + Persistent bool +} + +// taskDefinitionExperiments is a list of config fields in a task definition that are considered +// experimental. We keep these separated so we can compute a global hash without these. +type taskDefinitionExperiments struct { + PassthroughEnv []string +} + +// PristinePipeline is a map of task names to TaskDefinition or taskDefinitionHashable. +// Depending on whether any experimental fields are defined, we will use either struct. +// The purpose is to omit experimental fields when making a pristine version, so that +// it doesn't show up in --dry/--summarize output or affect the global hash. +type PristinePipeline map[string]interface{} // Pipeline is a struct for deserializing .pipeline in configFile type Pipeline map[string]BookkeepingTaskDefinition @@ -99,8 +130,10 @@ type Pipeline map[string]BookkeepingTaskDefinition // BookkeepingTaskDefinition holds the underlying TaskDefinition and some bookkeeping data // about the TaskDefinition. This wrapper struct allows us to leave TaskDefinition untouched. type BookkeepingTaskDefinition struct { - definedFields util.Set - TaskDefinition TaskDefinition + definedFields util.Set + experimentalFields util.Set + experimental taskDefinitionExperiments + TaskDefinition taskDefinitionHashable } // TaskDefinition is a representation of the configFile pipeline for further computation. @@ -111,6 +144,9 @@ type TaskDefinition struct { // This field is custom-marshalled from rawTask.Env and rawTask.DependsOn EnvVarDependencies []string + // rawTask.PassthroughEnv + PassthroughEnv []string + // TopologicalDependencies are tasks from package dependencies. // E.g. "build" is a topological dependency in: // dependsOn: ['^build']. @@ -210,7 +246,7 @@ func LoadTurboConfig(dir turbopath.AbsoluteSystemPath, rootPackageJSON *PackageJ // rather than defaulting to the 0-value of a boolean field. turboJSON.Pipeline[taskName] = BookkeepingTaskDefinition{ definedFields: util.SetFromStrings([]string{"ShouldCache"}), - TaskDefinition: TaskDefinition{ + TaskDefinition: taskDefinitionHashable{ ShouldCache: false, }, } @@ -291,11 +327,11 @@ func readTurboJSON(path turbopath.AbsoluteSystemPath) (*TurboJSON, error) { // GetTaskDefinition returns a TaskDefinition from a serialized definition in configFile func (pc Pipeline) GetTaskDefinition(taskID string) (TaskDefinition, bool) { if entry, ok := pc[taskID]; ok { - return entry.TaskDefinition, true + return entry.GetTaskDefinition(), true } _, task := util.GetPackageTaskFromId(taskID) entry, ok := pc[task] - return entry.TaskDefinition, ok + return entry.GetTaskDefinition(), ok } // HasTask returns true if the given task is defined in the pipeline, either directly or @@ -315,11 +351,17 @@ func (pc Pipeline) HasTask(task string) bool { return false } -// Pristine returns a PristinePipeline +// Pristine returns a PristinePipeline, this is used for printing to console and pruning func (pc Pipeline) Pristine() PristinePipeline { pristine := PristinePipeline{} for taskName, taskDef := range pc { - pristine[taskName] = taskDef.TaskDefinition + // If there are any experimental fields, we will include them with 0-values + // if there aren't, we will omit them entirely + if taskDef.hasExperimentalFields() { + pristine[taskName] = taskDef.GetTaskDefinition() // merges experimental fields in + } else { + pristine[taskName] = taskDef.TaskDefinition // has no experimental fields + } } return pristine } @@ -328,7 +370,29 @@ func (pc Pipeline) Pristine() PristinePipeline { // see whether a field was actually in the underlying turbo.json // or whether it was initialized with its 0-value. func (btd BookkeepingTaskDefinition) hasField(fieldName string) bool { - return btd.definedFields.Includes(fieldName) + return btd.definedFields.Includes(fieldName) || btd.experimentalFields.Includes(fieldName) +} + +// hasExperimentalFields keeps track of whether any experimental fields were found +func (btd BookkeepingTaskDefinition) hasExperimentalFields() bool { + return len(btd.experimentalFields) > 0 +} + +// GetTaskDefinition gets a TaskDefinition by merging the experimental and non-experimental fields +// into a single representation to use downstream. +func (btd BookkeepingTaskDefinition) GetTaskDefinition() TaskDefinition { + return TaskDefinition{ + Outputs: btd.TaskDefinition.Outputs, + ShouldCache: btd.TaskDefinition.ShouldCache, + EnvVarDependencies: btd.TaskDefinition.EnvVarDependencies, + TopologicalDependencies: btd.TaskDefinition.TopologicalDependencies, + TaskDependencies: btd.TaskDefinition.TaskDependencies, + Inputs: btd.TaskDefinition.Inputs, + OutputMode: btd.TaskDefinition.OutputMode, + Persistent: btd.TaskDefinition.Persistent, + // From experimental fields + PassthroughEnv: btd.experimental.PassthroughEnv, + } } // MergeTaskDefinitions accepts an array of BookkeepingTaskDefinitions and merges them into @@ -344,7 +408,7 @@ func MergeTaskDefinitions(taskDefinitions []BookkeepingTaskDefinition) (*TaskDef // For each of the TaskDefinitions we know of, merge them in for _, bookkeepingTaskDef := range taskDefinitions { - taskDef := bookkeepingTaskDef.TaskDefinition + taskDef := bookkeepingTaskDef.GetTaskDefinition() if bookkeepingTaskDef.hasField("Outputs") { mergedTaskDefinition.Outputs = taskDef.Outputs @@ -358,6 +422,10 @@ func MergeTaskDefinitions(taskDefinitions []BookkeepingTaskDefinition) (*TaskDef mergedTaskDefinition.EnvVarDependencies = taskDef.EnvVarDependencies } + if bookkeepingTaskDef.hasField("PassthroughEnv") { + mergedTaskDefinition.PassthroughEnv = taskDef.PassthroughEnv + } + if bookkeepingTaskDef.hasField("DependsOn") { mergedTaskDefinition.TopologicalDependencies = taskDef.TopologicalDependencies } @@ -390,6 +458,7 @@ func (btd *BookkeepingTaskDefinition) UnmarshalJSON(data []byte) error { } btd.definedFields = util.Set{} + btd.experimentalFields = util.Set{} if task.Outputs != nil { var inclusions []string @@ -429,6 +498,7 @@ func (btd *BookkeepingTaskDefinition) UnmarshalJSON(data []byte) error { } envVarDependencies := make(util.Set) + envVarPassthroughs := make(util.Set) btd.TaskDefinition.TopologicalDependencies = []string{} // TODO @mehulkar: this should be a set btd.TaskDefinition.TaskDependencies = []string{} // TODO @mehulkar: this should be a set @@ -458,14 +528,8 @@ func (btd *BookkeepingTaskDefinition) UnmarshalJSON(data []byte) error { // Append env key into EnvVarDependencies if task.Env != nil { btd.definedFields.Add("EnvVarDependencies") - for _, value := range task.Env { - if strings.HasPrefix(value, envPipelineDelimiter) { - // Hard error to help people specify this correctly during migration. - // TODO: Remove this error after we have run summary. - return fmt.Errorf("You specified \"%s\" in the \"env\" key. You should not prefix your environment variables with \"$\"", value) - } - - envVarDependencies.Add(value) + if err := gatherEnvVars(task.Env, "env", &envVarDependencies); err != nil { + return err } } @@ -473,6 +537,16 @@ func (btd *BookkeepingTaskDefinition) UnmarshalJSON(data []byte) error { sort.Strings(btd.TaskDefinition.EnvVarDependencies) + if task.PassthroughEnv != nil { + btd.experimentalFields.Add("PassthroughEnv") + if err := gatherEnvVars(task.PassthroughEnv, "passthrougEnv", &envVarPassthroughs); err != nil { + return err + } + } + + btd.experimental.PassthroughEnv = envVarPassthroughs.UnsafeListOfStrings() + sort.Strings(btd.experimental.PassthroughEnv) + if task.Inputs != nil { // Note that we don't require Inputs to be sorted, we're going to // hash the resulting files and sort that instead @@ -500,51 +574,38 @@ func (btd *BookkeepingTaskDefinition) UnmarshalJSON(data []byte) error { return nil } +// MarshalJSON serializes taskDefinitionHashable struct into json +func (c taskDefinitionHashable) MarshalJSON() ([]byte, error) { + task := makeRawTask( + c.Persistent, + c.ShouldCache, + c.OutputMode, + c.Inputs, + c.Outputs, + c.EnvVarDependencies, + c.TaskDependencies, + c.TopologicalDependencies, + ) + return json.Marshal(task) +} + // MarshalJSON serializes TaskDefinition struct into json func (c TaskDefinition) MarshalJSON() ([]byte, error) { - // Initialize with empty arrays, so we get empty arrays serialized into JSON - task := rawTaskWithDefaults{ - Outputs: []string{}, - Inputs: []string{}, - Env: []string{}, - DependsOn: []string{}, - } - - task.Persistent = c.Persistent - task.Cache = &c.ShouldCache - task.OutputMode = c.OutputMode - - if len(c.Inputs) > 0 { - task.Inputs = c.Inputs - } - - if len(c.EnvVarDependencies) > 0 { - task.Env = append(task.Env, c.EnvVarDependencies...) - } - - if len(c.Outputs.Inclusions) > 0 { - task.Outputs = append(task.Outputs, c.Outputs.Inclusions...) - } - - for _, i := range c.Outputs.Exclusions { - task.Outputs = append(task.Outputs, "!"+i) - } - - if len(c.TaskDependencies) > 0 { - task.DependsOn = append(task.DependsOn, c.TaskDependencies...) - } - - for _, i := range c.TopologicalDependencies { - task.DependsOn = append(task.DependsOn, "^"+i) - } - - // These _should_ already be sorted when the TaskDefinition struct was unmarshaled, - // but we want to ensure they're sorted on the way out also, just in case something - // in the middle mutates the items. - sort.Strings(task.DependsOn) - sort.Strings(task.Outputs) - sort.Strings(task.Env) - sort.Strings(task.Inputs) + task := makeRawTask( + c.Persistent, + c.ShouldCache, + c.OutputMode, + c.Inputs, + c.Outputs, + c.EnvVarDependencies, + c.TaskDependencies, + c.TopologicalDependencies, + ) + + if len(c.PassthroughEnv) > 0 { + task.PassthroughEnv = append(task.PassthroughEnv, c.PassthroughEnv...) + } + sort.Strings(task.PassthroughEnv) return json.Marshal(task) } @@ -557,16 +618,14 @@ func (c *TurboJSON) UnmarshalJSON(data []byte) error { } envVarDependencies := make(util.Set) + envVarPassthroughs := make(util.Set) globalFileDependencies := make(util.Set) - for _, value := range raw.GlobalEnv { - if strings.HasPrefix(value, envPipelineDelimiter) { - // Hard error to help people specify this correctly during migration. - // TODO: Remove this error after we have run summary. - return fmt.Errorf("You specified \"%s\" in the \"env\" key. You should not prefix your environment variables with \"%s\"", value, envPipelineDelimiter) - } - - envVarDependencies.Add(value) + if err := gatherEnvVars(raw.GlobalEnv, "globalEnv", &envVarDependencies); err != nil { + return err + } + if err := gatherEnvVars(raw.GlobalPassthroughEnv, "experimentalGlobalPassThroughEnv", &envVarPassthroughs); err != nil { + return err } // TODO: In the rust port, warnings should be refactored to a post-parse validation step @@ -585,6 +644,12 @@ func (c *TurboJSON) UnmarshalJSON(data []byte) error { // turn the set into an array and assign to the TurboJSON struct fields. c.GlobalEnv = envVarDependencies.UnsafeListOfStrings() sort.Strings(c.GlobalEnv) + + if raw.GlobalPassthroughEnv != nil { + c.GlobalPassthroughEnv = envVarPassthroughs.UnsafeListOfStrings() + sort.Strings(c.GlobalPassthroughEnv) + } + c.GlobalDeps = globalFileDependencies.UnsafeListOfStrings() sort.Strings(c.GlobalDeps) @@ -597,13 +662,80 @@ func (c *TurboJSON) UnmarshalJSON(data []byte) error { } // MarshalJSON converts a TurboJSON into the equivalent json object in bytes -// note: we go via rawTurboJSON so that the output format is correct +// note: we go via rawTurboJSON so that the output format is correct. +// This is used by `turbo prune` to generate a pruned turbo.json +// and also by --summarize & --dry=json to serialize the known config +// into something we can print to screen func (c *TurboJSON) MarshalJSON() ([]byte, error) { raw := pristineTurboJSON{} raw.GlobalDependencies = c.GlobalDeps raw.GlobalEnv = c.GlobalEnv + raw.GlobalPassthroughEnv = c.GlobalPassthroughEnv raw.Pipeline = c.Pipeline.Pristine() raw.RemoteCacheOptions = c.RemoteCacheOptions return json.Marshal(&raw) } + +func makeRawTask(persistent bool, shouldCache bool, outputMode util.TaskOutputMode, inputs []string, outputs TaskOutputs, envVarDependencies []string, taskDependencies []string, topologicalDependencies []string) *rawTaskWithDefaults { + // Initialize with empty arrays, so we get empty arrays serialized into JSON + task := &rawTaskWithDefaults{ + Outputs: []string{}, + Inputs: []string{}, + Env: []string{}, + PassthroughEnv: []string{}, + DependsOn: []string{}, + } + + task.Persistent = persistent + task.Cache = &shouldCache + task.OutputMode = outputMode + + if len(inputs) > 0 { + task.Inputs = inputs + } + + if len(envVarDependencies) > 0 { + task.Env = append(task.Env, envVarDependencies...) + } + + if len(outputs.Inclusions) > 0 { + task.Outputs = append(task.Outputs, outputs.Inclusions...) + } + + for _, i := range outputs.Exclusions { + task.Outputs = append(task.Outputs, "!"+i) + } + + if len(taskDependencies) > 0 { + task.DependsOn = append(task.DependsOn, taskDependencies...) + } + + for _, i := range topologicalDependencies { + task.DependsOn = append(task.DependsOn, "^"+i) + } + + // These _should_ already be sorted when the TaskDefinition struct was unmarshaled, + // but we want to ensure they're sorted on the way out also, just in case something + // in the middle mutates the items. + sort.Strings(task.DependsOn) + sort.Strings(task.Outputs) + sort.Strings(task.Env) + sort.Strings(task.Inputs) + return task +} + +// gatherEnvVars puts env vars into the provided set as long as they don't have an invalid value. +func gatherEnvVars(vars []string, key string, into *util.Set) error { + for _, value := range vars { + if strings.HasPrefix(value, envPipelineDelimiter) { + // Hard error to help people specify this correctly during migration. + // TODO: Remove this error after we have run summary. + return fmt.Errorf("You specified \"%s\" in the \"%s\" key. You should not prefix your environment variables with \"%s\"", value, key, envPipelineDelimiter) + } + + into.Add(value) + } + + return nil +} diff --git a/cli/internal/fs/turbo_json_test.go b/cli/internal/fs/turbo_json_test.go index ce3e4ef879ebf..1d384d5a1c82b 100644 --- a/cli/internal/fs/turbo_json_test.go +++ b/cli/internal/fs/turbo_json_test.go @@ -35,10 +35,16 @@ func Test_ReadTurboConfig(t *testing.T) { t.Fatalf("invalid parse: %#v", turboJSONReadErr) } + assert.EqualValues(t, []string{"AWS_SECRET_KEY"}, turboJSON.GlobalPassthroughEnv) + pipelineExpected := map[string]BookkeepingTaskDefinition{ "build": { - definedFields: util.SetFromStrings([]string{"Outputs", "OutputMode", "DependsOn"}), - TaskDefinition: TaskDefinition{ + definedFields: util.SetFromStrings([]string{"Outputs", "OutputMode", "DependsOn"}), + experimentalFields: util.SetFromStrings([]string{"PassthroughEnv"}), + experimental: taskDefinitionExperiments{ + PassthroughEnv: []string{"GITHUB_TOKEN"}, + }, + TaskDefinition: taskDefinitionHashable{ Outputs: TaskOutputs{Inclusions: []string{".next/**", "dist/**"}, Exclusions: []string{"dist/assets/**"}}, TopologicalDependencies: []string{"build"}, EnvVarDependencies: []string{}, @@ -48,8 +54,12 @@ func Test_ReadTurboConfig(t *testing.T) { }, }, "lint": { - definedFields: util.SetFromStrings([]string{"Outputs", "OutputMode", "ShouldCache", "DependsOn"}), - TaskDefinition: TaskDefinition{ + definedFields: util.SetFromStrings([]string{"Outputs", "OutputMode", "ShouldCache", "DependsOn"}), + experimentalFields: util.SetFromStrings([]string{}), + experimental: taskDefinitionExperiments{ + PassthroughEnv: []string{}, + }, + TaskDefinition: taskDefinitionHashable{ Outputs: TaskOutputs{}, TopologicalDependencies: []string{}, EnvVarDependencies: []string{"MY_VAR"}, @@ -59,8 +69,12 @@ func Test_ReadTurboConfig(t *testing.T) { }, }, "dev": { - definedFields: util.SetFromStrings([]string{"OutputMode", "ShouldCache"}), - TaskDefinition: TaskDefinition{ + definedFields: util.SetFromStrings([]string{"OutputMode", "ShouldCache"}), + experimentalFields: util.SetFromStrings([]string{}), + experimental: taskDefinitionExperiments{ + PassthroughEnv: []string{}, + }, + TaskDefinition: taskDefinitionHashable{ Outputs: TaskOutputs{}, TopologicalDependencies: []string{}, EnvVarDependencies: []string{}, @@ -70,8 +84,12 @@ func Test_ReadTurboConfig(t *testing.T) { }, }, "publish": { - definedFields: util.SetFromStrings([]string{"Inputs", "Outputs", "DependsOn", "ShouldCache"}), - TaskDefinition: TaskDefinition{ + definedFields: util.SetFromStrings([]string{"Inputs", "Outputs", "DependsOn", "ShouldCache"}), + experimentalFields: util.SetFromStrings([]string{}), + experimental: taskDefinitionExperiments{ + PassthroughEnv: []string{}, + }, + TaskDefinition: taskDefinitionHashable{ Outputs: TaskOutputs{Inclusions: []string{"dist/**"}}, TopologicalDependencies: []string{"build", "publish"}, EnvVarDependencies: []string{}, @@ -120,8 +138,12 @@ func Test_LoadTurboConfig_BothCorrectAndLegacy(t *testing.T) { pipelineExpected := map[string]BookkeepingTaskDefinition{ "build": { - definedFields: util.SetFromStrings([]string{"Outputs", "OutputMode", "DependsOn"}), - TaskDefinition: TaskDefinition{ + definedFields: util.SetFromStrings([]string{"Outputs", "OutputMode", "DependsOn"}), + experimentalFields: util.SetFromStrings([]string{}), + experimental: taskDefinitionExperiments{ + PassthroughEnv: []string{}, + }, + TaskDefinition: taskDefinitionHashable{ Outputs: TaskOutputs{Inclusions: []string{".next/**", "dist/**"}, Exclusions: []string{"dist/assets/**"}}, TopologicalDependencies: []string{"build"}, EnvVarDependencies: []string{}, @@ -157,7 +179,7 @@ func Test_ReadTurboConfig_InvalidEnvDeclarations2(t *testing.T) { func Test_ReadTurboConfig_InvalidGlobalEnvDeclarations(t *testing.T) { testDir := getTestDir(t, "invalid-global-env") _, turboJSONReadErr := readTurboConfig(testDir.UntypedJoin("turbo.json")) - expectedErrorMsg := "turbo.json: You specified \"$QUX\" in the \"env\" key. You should not prefix your environment variables with \"$\"" + expectedErrorMsg := "turbo.json: You specified \"$QUX\" in the \"globalEnv\" key. You should not prefix your environment variables with \"$\"" assert.EqualErrorf(t, turboJSONReadErr, expectedErrorMsg, "Error should be: %v, got: %v", expectedErrorMsg, turboJSONReadErr) } @@ -225,10 +247,11 @@ func validatePipeline(t *testing.T, actual Pipeline, expected Pipeline) { if !ok { t.Errorf("missing expected task: %v", taskName) } - actualTaskDefinition := bookkeepingTaskDef.TaskDefinition + actualTaskDefinition := bookkeepingTaskDef.GetTaskDefinition() assertIsSorted(t, actualTaskDefinition.Outputs.Inclusions, "Task output inclusions") assertIsSorted(t, actualTaskDefinition.Outputs.Exclusions, "Task output exclusions") assertIsSorted(t, actualTaskDefinition.EnvVarDependencies, "Task env vars") + assertIsSorted(t, actualTaskDefinition.PassthroughEnv, "Task env vars") assertIsSorted(t, actualTaskDefinition.TopologicalDependencies, "Topo deps") assertIsSorted(t, actualTaskDefinition.TaskDependencies, "Task deps") assert.EqualValuesf(t, expectedTaskDefinition, bookkeepingTaskDef, "task definition mismatch for %v", taskName) diff --git a/cli/internal/run/global_hash.go b/cli/internal/run/global_hash.go index 8d5887e00e395..d2092e57ee0ee 100644 --- a/cli/internal/run/global_hash.go +++ b/cli/internal/run/global_hash.go @@ -29,9 +29,30 @@ type GlobalHashable struct { envVars env.DetailedMap globalCacheKey string pipeline fs.PristinePipeline + envVarPassthroughs []string + envMode util.EnvMode } -// getGlobalHashable converts GlobalHashable into an anonymous struct. +// calculateGlobalHashFromHashable returns a hash string from the globalHashable +func calculateGlobalHashFromHashable(named GlobalHashable) (string, error) { + // When we aren't in infer mode, we can hash the whole object + if named.envMode != util.Infer { + return fs.HashObject(named) + } + + // In infer mode, if there is any passThru config (even if it is an empty array) + // we'll hash the whole object, so we can detect changes to that config + if named.envVarPassthroughs != nil { + return fs.HashObject(named) + } + + // If we're in infer mode, and there is no global pass through config, + // we can use the old anonymous struct. this will be true for everyone not using the strict env + // feature, and we don't want to break their cache. + return fs.HashObject(getOldGlobalHashable(named)) +} + +// getOldGlobalHashable converts GlobalHashable into an anonymous struct. // This exists because the global hash was originally implemented with an anonymous // struct, and changing to a named struct changes the global hash (because the hash // is essentially a hash of `fmt.Sprint("%#v", thing)`, and the type is part of that string. @@ -39,7 +60,7 @@ type GlobalHashable struct { // struct, it would change the global hash for everyone, invalidating EVERY TURBO CACHE ON THE PLANET! // We can remove this converter when we are going to have to update the global hash for something // else anyway. -func getGlobalHashable(named GlobalHashable) struct { +func getOldGlobalHashable(named GlobalHashable) struct { globalFileHashMap map[turbopath.AnchoredUnixPath]string rootExternalDepsHash string hashedSortedEnvPairs env.EnvironmentVariablePairs @@ -69,6 +90,8 @@ func calculateGlobalHash( globalFileDependencies []string, packageManager *packagemanager.PackageManager, lockFile lockfile.Lockfile, + envVarPassthroughs []string, + envMode util.EnvMode, logger hclog.Logger, ) (GlobalHashable, error) { // Calculate env var dependencies @@ -118,11 +141,18 @@ func calculateGlobalHash( return GlobalHashable{}, fmt.Errorf("error hashing files: %w", err) } + // Remove the passthroughs from hash consideration if we're explicitly loose. + if envMode == util.Loose { + envVarPassthroughs = nil + } + return GlobalHashable{ globalFileHashMap: globalFileHashMap, rootExternalDepsHash: rootPackageJSON.ExternalDepsHash, envVars: globalHashableEnvVars, globalCacheKey: _globalCacheKey, pipeline: pipeline.Pristine(), + envVarPassthroughs: envVarPassthroughs, + envMode: envMode, }, nil } diff --git a/cli/internal/run/real_run.go b/cli/internal/run/real_run.go index b86dbe4204db3..b238d754676ff 100644 --- a/cli/internal/run/real_run.go +++ b/cli/internal/run/real_run.go @@ -4,7 +4,6 @@ import ( gocontext "context" "fmt" "log" - "os" "os/exec" "strings" "sync" @@ -18,6 +17,8 @@ import ( "github.com/vercel/turbo/cli/internal/cmdutil" "github.com/vercel/turbo/cli/internal/colorcache" "github.com/vercel/turbo/cli/internal/core" + "github.com/vercel/turbo/cli/internal/env" + "github.com/vercel/turbo/cli/internal/fs" "github.com/vercel/turbo/cli/internal/graph" "github.com/vercel/turbo/cli/internal/logstreamer" "github.com/vercel/turbo/cli/internal/nodes" @@ -29,6 +30,7 @@ import ( "github.com/vercel/turbo/cli/internal/taskhash" "github.com/vercel/turbo/cli/internal/turbopath" "github.com/vercel/turbo/cli/internal/ui" + "github.com/vercel/turbo/cli/internal/util" ) // RealRun executes a set of tasks @@ -39,6 +41,7 @@ func RealRun( engine *core.Engine, taskHashTracker *taskhash.Tracker, turboCache cache.Cache, + turboJSON *fs.TurboJSON, packagesInScope []string, base *cmdutil.CmdBase, runSummary runsummary.Meta, @@ -75,6 +78,8 @@ func RealRun( rs: rs, ui: &cli.ConcurrentUi{Ui: base.UI}, runCache: runCache, + env: turboJSON.GlobalEnv, + passthroughEnv: turboJSON.GlobalPassthroughEnv, logger: base.Logger, packageManager: packageManager, processes: processes, @@ -184,6 +189,8 @@ type execContext struct { rs *runSpec ui cli.Ui runCache *runcache.RunCache + env []string + passthroughEnv []string logger hclog.Logger packageManager *packagemanager.PackageManager processes *process.Manager @@ -210,6 +217,20 @@ func (ec *execContext) exec(ctx gocontext.Context, packageTask *nodes.PackageTas progressLogger := ec.logger.Named("") progressLogger.Debug("start") + strictEnv := false + switch ec.rs.Opts.runOpts.EnvMode { + case util.Infer: + globalStrict := ec.passthroughEnv != nil + taskStrict := packageTask.TaskDefinition.PassthroughEnv != nil + inferredStrict := taskStrict || globalStrict + + strictEnv = inferredStrict + case util.Loose: + strictEnv = false + case util.Strict: + strictEnv = true + } + passThroughArgs := ec.rs.ArgsForTask(packageTask.Task) hash := packageTask.Hash ec.logger.Debug("task hash", "value", hash) @@ -271,8 +292,30 @@ func (ec *execContext) exec(ctx gocontext.Context, packageTask *nodes.PackageTas cmd := exec.Command(ec.packageManager.Command, argsactual...) cmd.Dir = packageTask.Pkg.Dir.ToSystemPath().RestoreAnchor(ec.repoRoot).ToString() - envs := fmt.Sprintf("TURBO_HASH=%v", hash) - cmd.Env = append(os.Environ(), envs) + + currentState := env.GetEnvMap() + passthroughEnv := env.EnvironmentVariableMap{} + + if strictEnv { + defaultPassthrough := []string{ + "PATH", + "SHELL", + "SYSTEMROOT", // Go will always include this on Windows, but we're being explicit here + } + + passthroughEnv.Merge(env.FromKeys(currentState, defaultPassthrough)) + passthroughEnv.Merge(env.FromKeys(currentState, ec.env)) + passthroughEnv.Merge(env.FromKeys(currentState, ec.passthroughEnv)) + passthroughEnv.Merge(env.FromKeys(currentState, packageTask.TaskDefinition.EnvVarDependencies)) + passthroughEnv.Merge(env.FromKeys(currentState, packageTask.TaskDefinition.PassthroughEnv)) + } else { + passthroughEnv.Merge(currentState) + } + + // Always last to make sure it clobbers. + passthroughEnv.Add("TURBO_HASH", hash) + + cmd.Env = passthroughEnv.ToHashable() // Setup stdout/stderr // If we are not caching anything, then we don't need to write logs to disk diff --git a/cli/internal/run/run.go b/cli/internal/run/run.go index 95d4753e91d3f..5d087345600f9 100644 --- a/cli/internal/run/run.go +++ b/cli/internal/run/run.go @@ -80,9 +80,12 @@ func optsFromArgs(args *turbostate.ParsedArgsFromRust) (*Opts, error) { opts.cacheOpts.SkipFilesystem = runPayload.RemoteOnly opts.cacheOpts.OverrideDir = runPayload.CacheDir opts.cacheOpts.Workers = runPayload.CacheWorkers + + // Run flags opts.runOpts.LogPrefix = runPayload.LogPrefix opts.runOpts.Summarize = runPayload.Summarize opts.runOpts.ExperimentalSpaceID = runPayload.ExperimentalSpaceID + opts.runOpts.EnvMode = runPayload.EnvMode // Runcache flags opts.runcacheOpts.SkipReads = runPayload.Force @@ -253,6 +256,8 @@ func (r *run) run(ctx gocontext.Context, targets []string) error { turboJSON.GlobalDeps, pkgDepGraph.PackageManager, pkgDepGraph.Lockfile, + turboJSON.GlobalPassthroughEnv, + r.opts.runOpts.EnvMode, r.base.Logger, ) @@ -260,7 +265,7 @@ func (r *run) run(ctx gocontext.Context, targets []string) error { return fmt.Errorf("failed to collect global hash inputs: %v", err) } - if globalHash, err := fs.HashObject(getGlobalHashable(globalHashable)); err == nil { + if globalHash, err := calculateGlobalHashFromHashable(globalHashable); err == nil { r.base.Logger.Debug("global hash", "value", globalHash) g.GlobalHash = globalHash } else { @@ -388,6 +393,7 @@ func (r *run) run(ctx gocontext.Context, targets []string) error { engine, taskHashTracker, turboCache, + turboJSON, packagesInScope, r.base, summary, diff --git a/cli/internal/turbostate/turbostate.go b/cli/internal/turbostate/turbostate.go index b3fb8853b07aa..dad5b470f9c2e 100644 --- a/cli/internal/turbostate/turbostate.go +++ b/cli/internal/turbostate/turbostate.go @@ -5,6 +5,8 @@ package turbostate import ( "fmt" + + "github.com/vercel/turbo/cli/internal/util" ) // RepoState is the state for repository. Consists of the root for the repo @@ -30,14 +32,15 @@ type PrunePayload struct { // RunPayload is the extra flags passed for the `run` subcommand type RunPayload struct { - CacheDir string `json:"cache_dir"` - CacheWorkers int `json:"cache_workers"` - Concurrency string `json:"concurrency"` - ContinueExecution bool `json:"continue_execution"` - DryRun string `json:"dry_run"` - Filter []string `json:"filter"` - Force bool `json:"force"` - GlobalDeps []string `json:"global_deps"` + CacheDir string `json:"cache_dir"` + CacheWorkers int `json:"cache_workers"` + Concurrency string `json:"concurrency"` + ContinueExecution bool `json:"continue_execution"` + DryRun string `json:"dry_run"` + Filter []string `json:"filter"` + Force bool `json:"force"` + GlobalDeps []string `json:"global_deps"` + EnvMode util.EnvMode `json:"env_mode"` // NOTE: Graph has three effective states that is modeled using a *string: // nil -> no flag passed // "" -> flag passed but no file name attached: print to stdout diff --git a/cli/internal/util/run_opts.go b/cli/internal/util/run_opts.go index 6a3cab6ad66f7..7f3c09c4d0079 100644 --- a/cli/internal/util/run_opts.go +++ b/cli/internal/util/run_opts.go @@ -1,5 +1,17 @@ package util +// EnvMode specifies if we will be using strict env vars +type EnvMode string + +const ( + // Infer - infer environment variable constraints from turbo.json + Infer EnvMode = "Infer" + // Loose - environment variables are unconstrained + Loose EnvMode = "Loose" + // Strict - environment variables are limited + Strict EnvMode = "Strict" +) + // RunOpts holds the options that control the execution of a turbo run type RunOpts struct { // Force execution to be serially one-at-a-time @@ -7,6 +19,7 @@ type RunOpts struct { // Whether to execute in parallel (defaults to false) Parallel bool + EnvMode EnvMode // The filename to write a perf profile. Profile string // If true, continue task executions even if a task fails. diff --git a/crates/turborepo-lib/src/cli.rs b/crates/turborepo-lib/src/cli.rs index d00aa426ba5a2..237dd4c5e41bf 100644 --- a/crates/turborepo-lib/src/cli.rs +++ b/crates/turborepo-lib/src/cli.rs @@ -52,6 +52,19 @@ pub enum DryRunMode { Json, } +#[derive(Copy, Clone, Debug, PartialEq, Serialize, ValueEnum)] +pub enum EnvMode { + Infer, + Loose, + Strict, +} + +impl Default for EnvMode { + fn default() -> EnvMode { + EnvMode::Infer + } +} + #[derive(Parser, Clone, Default, Debug, PartialEq, Serialize)] #[clap(author, about = "The build system that makes ship happen", long_about = None)] #[clap(disable_help_subcommand = true)] @@ -309,6 +322,11 @@ pub struct RunArgs { /// .html). Outputs dot graph to stdout when if no filename is provided #[clap(long, num_args = 0..=1, default_missing_value = "")] pub graph: Option, + /// Environment variable mode. + /// Loose passes the entire environment. + /// Strict uses an allowlist specified in turbo.json. + #[clap(long = "experimental-env-mode", default_value = "infer", num_args = 0..=1, default_missing_value = "infer", hide = true)] + pub env_mode: EnvMode, /// Files to ignore when calculating changed files (i.e. --since). /// Supports globs. #[clap(long)] @@ -605,7 +623,7 @@ mod test { use anyhow::Result; - use crate::cli::{Args, Command, DryRunMode, OutputLogsMode, RunArgs, Verbosity}; + use crate::cli::{Args, Command, DryRunMode, EnvMode, OutputLogsMode, RunArgs, Verbosity}; #[test] fn test_parse_run() -> Result<()> { @@ -620,6 +638,74 @@ mod test { } ); + assert_eq!( + Args::try_parse_from(["turbo", "run", "build"]).unwrap(), + Args { + command: Some(Command::Run(Box::new(RunArgs { + tasks: vec!["build".to_string()], + env_mode: EnvMode::Infer, + ..get_default_run_args() + }))), + ..Args::default() + }, + "env_mode: default infer" + ); + + assert_eq!( + Args::try_parse_from(["turbo", "run", "build", "--experimental-env-mode"]).unwrap(), + Args { + command: Some(Command::Run(Box::new(RunArgs { + tasks: vec!["build".to_string()], + env_mode: EnvMode::Infer, + ..get_default_run_args() + }))), + ..Args::default() + }, + "env_mode: not fully-specified" + ); + + assert_eq!( + Args::try_parse_from(["turbo", "run", "build", "--experimental-env-mode", "infer"]) + .unwrap(), + Args { + command: Some(Command::Run(Box::new(RunArgs { + tasks: vec!["build".to_string()], + env_mode: EnvMode::Infer, + ..get_default_run_args() + }))), + ..Args::default() + }, + "env_mode: specified infer" + ); + + assert_eq!( + Args::try_parse_from(["turbo", "run", "build", "--experimental-env-mode", "loose"]) + .unwrap(), + Args { + command: Some(Command::Run(Box::new(RunArgs { + tasks: vec!["build".to_string()], + env_mode: EnvMode::Loose, + ..get_default_run_args() + }))), + ..Args::default() + }, + "env_mode: specified loose" + ); + + assert_eq!( + Args::try_parse_from(["turbo", "run", "build", "--experimental-env-mode", "strict"]) + .unwrap(), + Args { + command: Some(Command::Run(Box::new(RunArgs { + tasks: vec!["build".to_string()], + env_mode: EnvMode::Strict, + ..get_default_run_args() + }))), + ..Args::default() + }, + "env_mode: specified strict" + ); + assert_eq!( Args::try_parse_from(["turbo", "run", "build", "lint", "test"]).unwrap(), Args {