Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate to pydantic >= 2.0 #613

Merged
merged 38 commits into from
Jul 13, 2024
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
24aabf8
first pass at migration to pydantic > 2.0
Vectorrent Jun 4, 2024
62bd489
fix linting and tests
Vectorrent Jun 5, 2024
8801689
migrate deprecated torch.cuda.amp imports to torch.amp
Vectorrent Jun 7, 2024
78cfaed
add if/else check to handle differences between torch versions
Vectorrent Jun 7, 2024
51542ed
fix linting
Vectorrent Jun 7, 2024
b254425
update wandb package, fixing docker build
Vectorrent Jun 7, 2024
945176a
fix validators, and pass tests locally in Docker
Vectorrent Jun 7, 2024
6ca96f4
fix deprecated pkg_resources import
Vectorrent Jun 7, 2024
5cfe829
fix 2 tests
Vectorrent Jun 7, 2024
917eda7
fix pkg_resources again
Vectorrent Jun 7, 2024
c968b58
pain
Vectorrent Jun 7, 2024
c1dc7e2
partial fix of p2pd
Vectorrent Jun 7, 2024
42aa111
revert to old failing method for now
Vectorrent Jun 7, 2024
6f43d74
oops
Vectorrent Jun 7, 2024
46efb9b
fix pydantic deprecations, p2pd path handling
Vectorrent Jun 9, 2024
3045008
update black version
Vectorrent Jun 9, 2024
d4a4d26
revert black
Vectorrent Jun 9, 2024
055d1ce
make path handling work across all Python versions
Vectorrent Jun 9, 2024
df9f4e9
revert path handling
Vectorrent Jun 9, 2024
a55e527
Merge branch 'master' into upgrade-pydantic
justheuristic Jun 9, 2024
3d3e444
Merge branch 'master' into upgrade-pydantic
justheuristic Jun 9, 2024
bc5a52f
Merge commit '3d3e444618ca1b757061c607fe7a399f750ca0e3' into upgrade-…
Vectorrent Jun 10, 2024
32a1c68
revert to v1 api
Vectorrent Jun 10, 2024
cdb3de8
test 3.12 as well
Vectorrent Jun 10, 2024
3e22780
this error message has changed slightly
Vectorrent Jun 11, 2024
eba80fa
update test with unexpected error message
Vectorrent Jun 11, 2024
a8ae34b
rerun tests
Vectorrent Jun 11, 2024
84e225d
fix another edge case
Vectorrent Jun 11, 2024
5a8c31f
address comments
Vectorrent Jun 11, 2024
47d932c
revert changes
Vectorrent Jun 12, 2024
da4d798
restore broken amp
Vectorrent Jun 12, 2024
3cd9d38
revert to 2.0.0
Vectorrent Jun 12, 2024
d3593eb
un-fix other things
Vectorrent Jun 13, 2024
19004cc
nit
Vectorrent Jun 13, 2024
b2d95c8
nit
Vectorrent Jun 13, 2024
ad080ed
Restore Max's optimizer commit
Vectorrent Jun 13, 2024
6df0176
Merge branch 'master' into upgrade-pydantic
mryab Jul 13, 2024
13bfad3
restore a line break
Vectorrent Jul 13, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ '3.7', '3.8', '3.9', '3.10', '3.11' ]
python-version: [ '3.7', '3.8', '3.9', '3.10', '3.11', '3.12' ]
Vectorrent marked this conversation as resolved.
Show resolved Hide resolved
timeout-minutes: 15
steps:
- uses: actions/checkout@v3
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/benchmark_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def run_trainer(batch_size: int, batch_time: float, client_mode: bool, verbose:
grad_scaler = hivemind.GradScaler()
else:
# check that hivemind.Optimizer supports regular PyTorch grad scaler as well
grad_scaler = torch.cuda.amp.GradScaler(enabled=args.use_amp)
grad_scaler = torch.amp.GradScaler(enabled=args.use_amp)
Vectorrent marked this conversation as resolved.
Show resolved Hide resolved

prev_time = time.perf_counter()

Expand All @@ -107,7 +107,7 @@ def run_trainer(batch_size: int, batch_time: float, client_mode: bool, verbose:

batch = torch.randint(0, len(X_train), (batch_size,))

with torch.cuda.amp.autocast() if args.use_amp else nullcontext():
with torch.amp.autocast() if args.use_amp else nullcontext():
loss = F.cross_entropy(model(X_train[batch].to(args.device)), y_train[batch].to(args.device))
grad_scaler.scale(loss).backward()

Expand Down
2 changes: 1 addition & 1 deletion examples/albert/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
transformers~=4.6
datasets~=1.5
torch_optimizer==0.1.0
wandb==0.10.26
wandb==0.17.1
Vectorrent marked this conversation as resolved.
Show resolved Hide resolved
sentencepiece
requests
nltk==3.6.7
2 changes: 1 addition & 1 deletion examples/albert/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Dict, List, Tuple

from pydantic import BaseModel, StrictFloat, confloat, conint
from pydantic.v1 import BaseModel, StrictFloat, confloat, conint

from hivemind.dht.crypto import RSASignatureValidator
from hivemind.dht.schema import BytesWithPublicKey, SchemaValidator
Expand Down
2 changes: 1 addition & 1 deletion hivemind/dht/schema.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re
from typing import Any, Dict, Optional, Type

import pydantic
import pydantic.v1 as pydantic

from hivemind.dht.crypto import RSASignatureValidator
from hivemind.dht.protocol import DHTProtocol
Expand Down
2 changes: 1 addition & 1 deletion hivemind/optim/progress_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import Dict, Optional

import numpy as np
from pydantic import BaseModel, StrictBool, StrictFloat, confloat, conint
from pydantic.v1 import BaseModel, StrictBool, StrictFloat, confloat, conint

from hivemind.dht import DHT
from hivemind.dht.schema import BytesWithPublicKey, RSASignatureValidator, SchemaValidator
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ configargparse>=1.2.3
py-multihash>=0.2.3
multiaddr @ git+https://github.com/multiformats/py-multiaddr.git@e01dbd38f2c0464c0f78b556691d655265018cce
cryptography>=3.4.6
pydantic>=1.8.1,<2.0
pydantic>=2.5.3
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This version is quite recent, can you provide any reasoning behind how it was chosen? Maybe we can bump to just 2.0? Ideally, we should even keep backwards compatibility with older versions

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I chose this version because it was the highest possible version I could use, which was still compatible with all of the tests. Remember: the whole reason we're upgrading Pydantic is because this old version has been conflicting with other dependencies, in other projects.

We can revert to 2.0.0 if you still want me to do that, though 2.5.3 seems to be working fine.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO, If the code is still compatible with pydantic v1 it might be worth it to just do pydantic>1.8.1

packaging>=20.9
20 changes: 10 additions & 10 deletions tests/test_dht_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Dict

import pytest
from pydantic import BaseModel, StrictInt, conint
from pydantic.v1 import BaseModel, StrictInt, conint

import hivemind
from hivemind.dht.node import DHTNode
Expand All @@ -12,9 +12,9 @@


class SampleSchema(BaseModel):
experiment_name: bytes
n_batches: Dict[bytes, conint(ge=0, strict=True)]
signed_data: Dict[BytesWithPublicKey, bytes]
experiment_name: bytes = None
n_batches: Dict[bytes, conint(ge=0, strict=True)] = None
signed_data: Dict[BytesWithPublicKey, bytes] = None
Vectorrent marked this conversation as resolved.
Show resolved Hide resolved


@pytest.fixture
Expand Down Expand Up @@ -95,10 +95,10 @@ async def test_expecting_public_keys(dht_nodes_with_schema):
@pytest.mark.asyncio
async def test_keys_outside_schema(dht_nodes_with_schema):
class Schema(BaseModel):
some_field: StrictInt
some_field: StrictInt = None

class MergedSchema(BaseModel):
another_field: StrictInt
another_field: StrictInt = None

for allow_extra_keys in [False, True]:
validator = SchemaValidator(Schema, allow_extra_keys=allow_extra_keys)
Expand All @@ -122,7 +122,7 @@ class MergedSchema(BaseModel):
@pytest.mark.asyncio
async def test_prefix():
class Schema(BaseModel):
field: StrictInt
field: StrictInt = None

validator = SchemaValidator(Schema, allow_extra_keys=False, prefix="prefix")

Expand Down Expand Up @@ -154,11 +154,11 @@ def validate(self, record: DHTRecord) -> bool:
assert not alice.protocol.record_validator.merge_with(second_validator)

class SecondSchema(BaseModel):
some_field: StrictInt
another_field: str
some_field: StrictInt = 0
another_field: str = None

class ThirdSchema(BaseModel):
another_field: StrictInt # Allow it to be a StrictInt as well
another_field: StrictInt = 0 # Allow it to be a StrictInt as well

for schema in [SecondSchema, ThirdSchema]:
new_validator = SchemaValidator(schema, allow_extra_keys=False)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_dht_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Dict

import pytest
from pydantic import BaseModel, StrictInt
from pydantic.v1 import BaseModel, StrictInt

import hivemind
from hivemind.dht.crypto import RSASignatureValidator
Expand Down
4 changes: 2 additions & 2 deletions tests/test_p2p_daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,12 @@ async def test_daemon_killed_on_del():

@pytest.mark.asyncio
async def test_startup_error_message():
with pytest.raises(P2PDaemonError, match=r"(?i)Failed to connect to bootstrap peers"):
with pytest.raises(P2PDaemonError, match=r"(?i)Failed to connect to bootstrap peers|Daemon failed to start"):
await P2P.create(
initial_peers=[f"/ip4/127.0.0.1/tcp/{get_free_port()}/p2p/QmdaK4LUeQaKhqSFPRu9N7MvXUEWDxWwtCvPrS444tCgd1"]
)

with pytest.raises(P2PDaemonError, match=r"Daemon failed to start in .+ seconds"):
with pytest.raises(P2PDaemonError, match=r"Daemon failed to start|error accepting connection"):
await P2P.create(startup_timeout=0.01) # Test that startup_timeout works


Expand Down
10 changes: 9 additions & 1 deletion tests/test_utils/p2p_daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,24 @@
import time
import uuid
from contextlib import asynccontextmanager, suppress

# from pathlib import Path
from typing import NamedTuple

from multiaddr import Multiaddr, protocols
from pkg_resources import resource_filename

from hivemind.p2p.p2p_daemon_bindings.p2pclient import Client

from test_utils.networking import get_free_port

TIMEOUT_DURATION = 30 # seconds

# hivemind_dir = Path(__file__).parent / "hivemind" / "hivemind_cli"

# P2PD_PATH = str(hivemind_dir / "p2pd")
Vectorrent marked this conversation as resolved.
Show resolved Hide resolved

from pkg_resources import resource_filename

P2PD_PATH = resource_filename("hivemind", "hivemind_cli/p2pd")


Expand Down
Loading