Skip to content

Commit

Permalink
add ranks testing
Browse files Browse the repository at this point in the history
  • Loading branch information
wj-Mcat committed Jan 8, 2024
1 parent 97f6158 commit 1a5e8fc
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 0 deletions.
39 changes: 39 additions & 0 deletions tests/llm/test_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
get_path_from_url_with_filelock,
url_file_exists,
)
from tests.testing_utils import GPUsTesting, require_gpu

from .testing_utils import LLMTest, argv_context_guard, load_test_config

Expand Down Expand Up @@ -205,3 +206,41 @@ def test_create_predictor_with_unexpected_length(self):

with argv_context_guard(config):
predict()


@parameterized_class(
["model_name_or_path", "model_class"],
[
["__internal_testing__/tiny-random-llama", LlamaForCausalLM],
],
)
class GPUsPredictorTest(LLMTest, GPUsTesting, unittest.TestCase):
config_path: str = "./tests/fixtures/llm/predictor.yaml"
model_name_or_path: str = None
model_class = None

def setUp(self) -> None:
super().setUp()
self.model_class.from_pretrained(self.model_name_or_path, dtype="float16").save_pretrained(self.output_dir)
AutoTokenizer.from_pretrained(self.model_name_or_path).save_pretrained(self.output_dir)

@require_gpu(2)
def test_predictor(self):
self.init_dist_env()

self.run_predictor({"inference_model": True})
result_0 = self._read_result(os.path.join(self.output_dir, "predict.json"))
self.run_predictor({"inference_model": False})
result_1 = self._read_result(os.path.join(self.output_dir, "predict.json"))

# compare the generation result of inference & dygraph model
assert len(result_0) == len(result_1)

count, full_match = 0, 0
for inference_item, no_inference_item in zip(result_0, result_1):
min_length = min(len(inference_item), len(no_inference_item))
count += int(inference_item[: min_length // 2] == no_inference_item[: min_length // 2])
full_match += int(inference_item[:min_length] == no_inference_item[:min_length])

self.assertGreaterEqual(full_match / len(result_0), 0.25)
self.assertGreaterEqual(count / len(result_0), 0.4)
34 changes: 34 additions & 0 deletions tests/testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

import numpy as np
import paddle
import paddle.distributed.fleet as fleet
import yaml

from paddlenlp.trainer.argparser import strtobool
Expand Down Expand Up @@ -470,3 +471,36 @@ def require_paddle_up_to_2_gpus(test_case):
import paddle

return unittest.skipUnless(paddle.device.cuda.device_count() < 3, "test requires 0 or 1 or 2 GPUs")(test_case)


def require_gpu(min_gpus: int = 1):
def actual_decorator(func):
gpu_count = paddle.device.cuda.device_count()

if gpu_count < min_gpus:
return unittest.skip(f"test requires {min_gpus} GPUs")(func)

def wrapper(*args, **kwargs):
result = func(*args, **kwargs)
return result

return wrapper

return actual_decorator


class GPUsTesting(unittest.TestCase):
def init_dist_env(self, config: dict = {}):
world_size = paddle.distributed.get_world_size()
strategy = fleet.DistributedStrategy()
hybrid_configs = {
"dp_degree": 1,
"mp_degree": world_size,
"pp_degree": 1,
"sharding_degree": 1,
}
hybrid_configs.update(config)
strategy.hybrid_configs = hybrid_configs

fleet.init(is_collective=True, strategy=strategy)
fleet.get_hybrid_communicate_group()

0 comments on commit 1a5e8fc

Please sign in to comment.