Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow for logging to Studio when not inside a repo #646

Merged
merged 46 commits into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from 38 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
2160ae4
post to studio even without git/dvc repo
Jul 31, 2023
8653732
tests for no-git scenario
Jul 31, 2023
ca57b17
studio: make no-repo paths relative to cwd
Aug 4, 2023
98ae256
make ruff happy
Aug 4, 2023
df1a20b
don't require exp name
Aug 4, 2023
9ddcf07
don't require baseline rev
Aug 4, 2023
bf01031
Merge branch 'main' into no-git
Aug 4, 2023
3c68cb6
refactor studio path formatting
Aug 4, 2023
f6a0a29
live: Set new defaults `report=None` and `save_dvc_exp=True`.
daavoo Aug 17, 2023
aa3610a
frameworks: Drop model_file.
daavoo Aug 22, 2023
2e538ce
update examples
daavoo Aug 22, 2023
0d21bb6
Merge branch 'main' into 484-30-release
daavoo Aug 30, 2023
bb34d87
fix merge conflicts
Sep 6, 2023
00ac887
Write to root dvc.yaml (#687)
Sep 7, 2023
5dcc43b
report: Drop "auto" logic.
daavoo Sep 7, 2023
7d2528e
studio: Extract `post_to_studio` and decoulple from `make_report` (#705)
daavoo Sep 7, 2023
a02d160
refactor(tests): Split `test_main` into separate files.
daavoo Sep 7, 2023
6ccc959
fix matplotlib warning
daavoo Sep 7, 2023
a539c8e
Merge branch 'main' into 484-30-release
Sep 11, 2023
7a374c4
merge 3.0 changes
Sep 11, 2023
c685bc1
Merge remote-tracking branch 'origin/484-30-release' into no-git
Sep 11, 2023
f3ebcd0
fix studio tests
Sep 11, 2023
68b4f90
fix windows studio paths
Sep 11, 2023
8d2112f
fix windows studio paths for plots
Sep 11, 2023
937bc5b
skip fabric tests if not installed
Dec 7, 2023
524e2a6
fix conflicts with main
Dec 8, 2023
0c49bea
drop dvc repo
Dec 8, 2023
83bb14a
drop dvcignore
Dec 8, 2023
71698f0
drop unrelated test_fabric.py file
Dec 8, 2023
a9b028f
fix windows paths
Dec 8, 2023
bb2a3b5
fix windows paths
Dec 8, 2023
35c34a9
merge main
Jan 22, 2024
28cde80
adapt plot paths even if no dvc repo
Jan 25, 2024
12755f2
default baseline rev to all zeros
Jan 25, 2024
b9ba6f2
Merge branch 'main' into no-git
Jan 26, 2024
939ff0b
Merge branch 'main' into no-git
Feb 6, 2024
ade1b3d
consolidate repro tests
Feb 7, 2024
56acdae
set null sha as variable
Feb 7, 2024
33288cd
add type hints to studio
Feb 13, 2024
73357b2
limit windows path handling to studio
Feb 13, 2024
b5ff21d
merge
Feb 13, 2024
53e77a6
fix typing errors in studio module
Feb 13, 2024
bea5a83
fix mypy in live module
Feb 13, 2024
bd431da
drop checking for dvc_file
Feb 13, 2024
5003104
Merge branch 'main' into no-git
Feb 13, 2024
a4cae82
Merge branch 'main' into no-git
Feb 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions src/dvclive/dvc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pathlib import Path
from typing import TYPE_CHECKING, Any, List, Optional

from dvclive import env
from dvclive.plots import Image, Metric
from dvclive.serialize import dump_yaml
from dvclive.utils import StrPath, rel_path
Expand Down Expand Up @@ -131,9 +132,14 @@ def _update_entries(old, new, key):
def get_exp_name(name, scm, baseline_rev) -> str:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated get_exp_name to always be able to return an experiment name instead of only when inside a repo.

from dvc.exceptions import InvalidArgumentError
from dvc.repo.experiments.refs import ExpRefInfo
from dvc.repo.experiments.utils import check_ref_format, get_random_exp_name
from dvc.repo.experiments.utils import (
check_ref_format,
gen_random_name,
get_random_exp_name,
)

if name:
name = name or os.getenv(env.DVC_EXP_NAME)
if name and scm and baseline_rev:
ref = ExpRefInfo(baseline_sha=baseline_rev, name=name)
if scm.get_ref(str(ref)):
logger.warning(f"Experiment conflicts with existing experiment '{name}'.")
Expand All @@ -144,7 +150,11 @@ def get_exp_name(name, scm, baseline_rev) -> str:
logger.warning(e)
else:
return name
return get_random_exp_name(scm, baseline_rev)
if scm and baseline_rev:
return get_random_exp_name(scm, baseline_rev)
if name:
return name
return gen_random_name()


def find_overlapping_stage(dvc_repo: "Repo", path: StrPath) -> Optional["Stage"]:
Expand Down
46 changes: 17 additions & 29 deletions src/dvclive/live.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@

ParamLike = Union[int, float, str, bool, List["ParamLike"], Dict[str, "ParamLike"]]

NULL_SHA: str = "0" * 40


class Live:
def __init__(
Expand Down Expand Up @@ -95,8 +97,8 @@ def __init__(
self._report_notebook = None
self._init_report()

self._baseline_rev: Optional[str] = None
self._exp_name: Optional[str] = exp_name
self._baseline_rev: str = os.getenv(env.DVC_EXP_BASELINE_REV, NULL_SHA)
shcheklein marked this conversation as resolved.
Show resolved Hide resolved
self._exp_name: Optional[str] = exp_name or os.getenv(env.DVC_EXP_NAME)
self._exp_message: Optional[str] = exp_message
self._experiment_rev: Optional[str] = None
self._inside_dvc_exp: bool = False
Expand Down Expand Up @@ -148,28 +150,36 @@ def _init_cleanup(self):
os.remove(dvc_file)

@catch_and_warn(DvcException, logger)
def _init_dvc(self):
def _init_dvc(self): # noqa: C901
from dvc.scm import NoSCM

if os.getenv(env.DVC_ROOT, None):
self._inside_dvc_pipeline = True
self._init_dvc_pipeline()
self._dvc_repo = get_dvc_repo()

scm = self._dvc_repo.scm if self._dvc_repo else None
if isinstance(scm, NoSCM):
scm = None
if scm:
self._baseline_rev = scm.get_rev()
self._exp_name = get_exp_name(self._exp_name, scm, self._baseline_rev)
logger.info(f"Logging to experiment '{self._exp_name}'")
Comment on lines +202 to +208
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Once we have tried to find a repo, we can generate all exp info we need instead of scattering it throughout the code


dvc_logger = logging.getLogger("dvc")
dvc_logger.setLevel(os.getenv(env.DVCLIVE_LOGLEVEL, "WARNING").upper())

self._dvc_file = self._init_dvc_file()

if (self._dvc_repo is None) or isinstance(self._dvc_repo.scm, NoSCM):
if not scm:
if self._save_dvc_exp:
logger.warning(
"Can't save experiment without a Git Repo."
"\nCreate a Git repo (`git init`) and commit (`git commit`)."
)
self._save_dvc_exp = False
return
if self._dvc_repo.scm.no_commits:
if scm.no_commits:
if self._save_dvc_exp:
logger.warning(
"Can't save experiment to an empty Git Repo."
Expand All @@ -189,12 +199,7 @@ def _init_dvc(self):
if self._inside_dvc_pipeline:
return

self._baseline_rev = self._dvc_repo.scm.get_rev()
if self._save_dvc_exp:
self._exp_name = get_exp_name(
self._exp_name, self._dvc_repo.scm, self._baseline_rev
)
logger.info(f"Logging to experiment '{self._exp_name}'")
mark_dvclive_only_started(self._exp_name)
self._include_untracked.append(self.dir)

Expand All @@ -208,8 +213,6 @@ def _init_dvc_file(self) -> str:
def _init_dvc_pipeline(self):
if os.getenv(env.DVC_EXP_BASELINE_REV, None):
# `dvc exp` execution
self._baseline_rev = os.getenv(env.DVC_EXP_BASELINE_REV, "")
self._exp_name = os.getenv(env.DVC_EXP_NAME, "")
self._inside_dvc_exp = True
if self._save_dvc_exp:
logger.info("Ignoring `save_dvc_exp` because `dvc exp run` is running")
Expand All @@ -234,22 +237,6 @@ def _init_studio(self):
logger.debug("Skipping `studio` report `start` and `done` events.")
self._studio_events_to_skip.add("start")
self._studio_events_to_skip.add("done")
elif self._dvc_repo is None:
logger.warning(
"Can't connect to Studio without a DVC Repo."
"\nYou can create a DVC Repo by calling `dvc init`."
)
self._studio_events_to_skip.add("start")
self._studio_events_to_skip.add("data")
self._studio_events_to_skip.add("done")
elif not self._save_dvc_exp:
logger.warning(
"Can't connect to Studio without creating a DVC experiment."
"\nIf you have a DVC Pipeline, run it with `dvc exp run`."
)
self._studio_events_to_skip.add("start")
self._studio_events_to_skip.add("data")
self._studio_events_to_skip.add("done")
else:
self.post_to_studio("start")

Expand Down Expand Up @@ -571,7 +558,8 @@ def make_report(self):

@catch_and_warn(DvcException, logger)
def make_dvcyaml(self):
make_dvcyaml(self)
if self.dvc_file:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is the case for this?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean when can it be None?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is the only (?) unresolved question? Approved, since it doesn't look anything major ...

good stuff, Dave. thanks!

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, missed this one. Good catch. Maybe this came from an earlier iteration. I dropped this condition in the latest commit.

make_dvcyaml(self)

@catch_and_warn(DvcException, logger)
def post_to_studio(self, event):
Expand Down
5 changes: 2 additions & 3 deletions src/dvclive/studio.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,8 @@ def _cast_to_numbers(datapoints):


def _adapt_path(live, name):
if live._dvc_repo is not None:
name = rel_path(name, live._dvc_repo.root_dir)
return name
dvc_root_path = live._dvc_repo.root_dir if live._dvc_repo else None
return rel_path(name, dvc_root_path)
dberenbaum marked this conversation as resolved.
Show resolved Hide resolved


def _adapt_plot_datapoints(live, plot):
Expand Down
10 changes: 7 additions & 3 deletions src/dvclive/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
import re
import shutil
from pathlib import Path
from pathlib import Path, PureWindowsPath
from platform import uname
from typing import Union, List, Dict, TYPE_CHECKING
import webbrowser
Expand Down Expand Up @@ -192,8 +192,12 @@ def wrapper(*args, **kwargs):


def rel_path(path, dvc_root_path):
absolute_path = Path(path).absolute()
return str(Path(os.path.relpath(absolute_path, dvc_root_path)).as_posix())
if dvc_root_path:
absolute_path = Path(path).absolute()
path = os.path.relpath(absolute_path, dvc_root_path)
if os.name == "nt":
return str(PureWindowsPath(path).as_posix())
dberenbaum marked this conversation as resolved.
Show resolved Hide resolved
return str(Path(path).as_posix())


def read_history(live, metric):
Expand Down
42 changes: 8 additions & 34 deletions tests/test_dvc.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,27 +29,24 @@ def test_get_dvc_repo_subdir(tmp_dir):
def test_exp_save_on_end(tmp_dir, save, mocked_dvc_repo):
live = Live(save_dvc_exp=save)
live.end()
assert live._baseline_rev is not None
assert live._exp_name is not None
Comment on lines +32 to +33
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These now get set always

if save:
assert live._baseline_rev is not None
assert live._exp_name is not None
mocked_dvc_repo.experiments.save.assert_called_with(
name=live._exp_name,
include_untracked=[live.dir, "dvc.yaml"],
force=True,
message=None,
)
else:
assert live._baseline_rev is not None
assert live._exp_name is None
mocked_dvc_repo.experiments.save.assert_not_called()


def test_exp_save_skip_on_env_vars(tmp_dir, monkeypatch, mocker):
def test_exp_save_skip_on_env_vars(tmp_dir, monkeypatch):
monkeypatch.setenv(DVC_EXP_BASELINE_REV, "foo")
monkeypatch.setenv(DVC_EXP_NAME, "bar")
monkeypatch.setenv(DVC_ROOT, tmp_dir)

mocker.patch("dvclive.live.get_dvc_repo", return_value=None)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not needed

live = Live()
live.end()

Expand All @@ -60,31 +57,6 @@ def test_exp_save_skip_on_env_vars(tmp_dir, monkeypatch, mocker):
assert live._inside_dvc_pipeline


def test_exp_save_run_on_dvc_repro(tmp_dir, mocker):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test was actually outdated and not working. It conflicts with test_dvc_repro below, so I consolidated the meaningful assertions into that test.

dvc_repo = mocker.MagicMock()
dvc_stage = mocker.MagicMock()
dvc_file = mocker.MagicMock()
dvc_repo.index.stages = [dvc_stage, dvc_file]
dvc_repo.scm.get_rev.return_value = "current_rev"
dvc_repo.scm.get_ref.return_value = None
dvc_repo.scm.no_commits = False
dvc_repo.config = {}
dvc_repo.root_dir = tmp_dir
mocker.patch("dvclive.live.get_dvc_repo", return_value=dvc_repo)
live = Live()
assert live._save_dvc_exp
assert live._baseline_rev is not None
assert live._exp_name is not None
live.end()

dvc_repo.experiments.save.assert_called_with(
name=live._exp_name,
include_untracked=[live.dir, "dvc.yaml"],
force=True,
message=None,
)


def test_exp_save_with_dvc_files(tmp_dir, mocker):
dvc_repo = mocker.MagicMock()
dvc_file = mocker.MagicMock()
Expand Down Expand Up @@ -166,7 +138,7 @@ def test_errors_on_git_add_are_catched(tmp_dir, mocked_dvc_repo, monkeypatch):
mocked_dvc_repo.scm.untracked_files.return_value = ["dvclive/metrics.json"]
mocked_dvc_repo.scm.add.side_effect = DvcException("foo")

with Live(dvcyaml=False) as live:
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not needed

with Live() as live:
live.summary["foo"] = 1


Expand Down Expand Up @@ -204,10 +176,12 @@ def test_no_scm_repo(tmp_dir, mocker):
assert live._save_dvc_exp is False


def test_dvc_repro(tmp_dir, monkeypatch, mocker):
def test_dvc_repro(tmp_dir, monkeypatch, mocked_dvc_repo, mocked_studio_post):
monkeypatch.setenv(DVC_ROOT, "root")
mocker.patch("dvclive.live.get_dvc_repo", return_value=None)
live = Live(save_dvc_exp=True)
assert live._baseline_rev is not None
assert live._exp_name is not None
assert not live._studio_events_to_skip
Comment on lines +182 to +184
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consolidated from the deleted test_exp_save_run_on_dvc_repro above. On repro, everything will still work except actually saving an exp ref.

assert not live._save_dvc_exp


Expand Down
78 changes: 73 additions & 5 deletions tests/test_post_to_studio.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,10 +235,9 @@ def test_post_to_studio_shorten_names(tmp_dir, mocked_dvc_repo, mocked_studio_po

@pytest.mark.studio()
def test_post_to_studio_inside_dvc_exp(
tmp_dir, mocker, monkeypatch, mocked_studio_post
tmp_dir, mocker, monkeypatch, mocked_studio_post, mocked_dvc_repo
):
mocked_post, _ = mocked_studio_post
mocker.patch("dvclive.live.get_dvc_repo", return_value=None)

monkeypatch.setenv(DVC_EXP_BASELINE_REV, "f" * 40)
monkeypatch.setenv(DVC_EXP_NAME, "bar")
Expand Down Expand Up @@ -310,9 +309,8 @@ def test_post_to_studio_inside_subdir_dvc_exp(
)


def test_post_to_studio_requires_exp(tmp_dir, mocked_dvc_repo, mocked_studio_post):
assert Live(save_dvc_exp=False)._studio_events_to_skip == {"start", "data", "done"}
assert not Live()._studio_events_to_skip
def test_post_to_studio_without_exp(tmp_dir, mocked_dvc_repo, mocked_studio_post):
assert not Live(save_dvc_exp=False)._studio_events_to_skip
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Live updates to Studio will happen regardless of save_dvc_exp value.



def test_get_dvc_studio_config_none(mocker):
Expand Down Expand Up @@ -396,3 +394,73 @@ def test_post_to_studio_if_done_skipped(tmp_dir, mocked_dvc_repo, mocked_studio_
mocked_post, _ = mocked_studio_post
call_types = [call.kwargs["json"]["type"] for call in mocked_post.call_args_list]
assert "data" in call_types


@pytest.mark.studio()
def test_post_to_studio_no_repo(tmp_dir, monkeypatch, mocked_studio_post):
monkeypatch.setenv(DVC_STUDIO_TOKEN, "STUDIO_TOKEN")
monkeypatch.setenv(DVC_STUDIO_REPO_URL, "STUDIO_REPO_URL")

live = Live(save_dvc_exp=True)
live.log_param("fooparam", 1)

foo_path = (Path(live.plots_dir) / Metric.subfolder / "foo.tsv").as_posix()

mocked_post, _ = mocked_studio_post

mocked_post.assert_called()
mocked_post.assert_called_with(
"https://0.0.0.0/api/live",
**get_studio_call("start", baseline_sha="0" * 40, exp_name=live._exp_name),
)

live.log_metric("foo", 1)

live.next_step()
mocked_post.assert_called_with(
"https://0.0.0.0/api/live",
**get_studio_call(
"data",
baseline_sha="0" * 40,
exp_name=live._exp_name,
step=0,
plots={f"{foo_path}": {"data": [{"step": 0, "foo": 1.0}]}},
),
)

live.log_metric("foo", 2)

live.next_step()
mocked_post.assert_called_with(
"https://0.0.0.0/api/live",
**get_studio_call(
"data",
baseline_sha="0" * 40,
exp_name=live._exp_name,
step=1,
plots={f"{foo_path}": {"data": [{"step": 1, "foo": 2.0}]}},
),
)

live.end()
mocked_post.assert_called_with(
"https://0.0.0.0/api/live",
**get_studio_call("done", baseline_sha="0" * 40, exp_name=live._exp_name),
)


@pytest.mark.studio()
def test_post_to_studio_skip_if_no_repo_url(
tmp_dir,
mocker,
monkeypatch,
):
mocked_post = mocker.patch("dvclive.studio.post_live_metrics", return_value=None)

monkeypatch.setenv(DVC_STUDIO_TOKEN, "token")

with Live() as live:
live.log_metric("foo", 1)
live.next_step()

assert mocked_post.call_count == 0