Skip to content

Commit

Permalink
[INF2] give better room for more tokens (deepjavalibrary#710)
Browse files Browse the repository at this point in the history
* give better room for more tokens

* revert the tests
  • Loading branch information
Qing Lan authored May 12, 2023
1 parent a583888 commit 14d0e67
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 5 deletions.
4 changes: 2 additions & 2 deletions tests/integration/llm/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
"seq_length": [64, 128]
},
"bigscience/bloom-3b": {
"max_memory_per_gpu": [0.1, 5.0, 6.0, 0.1],
"max_memory_per_gpu": [5.0, 6.0],
"batch_size": [1, 4],
"seq_length": [16, 32],
"worker": 1,
Expand Down Expand Up @@ -139,7 +139,7 @@
"worker": 2
},
"gpt-neo-1.3b": {
"max_memory_per_gpu": [0.1, 4.0, 0.1, 5.0],
"max_memory_per_gpu": [4.0, 5.0],
"batch_size": [1, 4],
"seq_length": [16],
"worker": 1,
Expand Down
6 changes: 3 additions & 3 deletions tests/integration/llm/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,15 +238,15 @@
"option.model_id": "gpt2",
"option.batch_size": 4,
"option.tensor_parallel_degree": 2,
"option.n_positions": 256,
"option.n_positions": 512,
"option.dtype": "fp16",
"option.model_loading_timeout": 600
},
"opt-1.3b": {
"option.model_id": "s3://djl-llm/opt-1.3b/",
"option.batch_size": 4,
"option.tensor_parallel_degree": 4,
"option.n_positions": 256,
"option.n_positions": 512,
"option.dtype": "fp16",
"option.model_loading_timeout": 600
},
Expand All @@ -262,7 +262,7 @@
"option.model_id": "s3://djl-llm/opt-1.3b/",
"option.batch_size": 1,
"option.tensor_parallel_degree": 4,
"option.n_positions": 256,
"option.n_positions": 512,
"option.dtype": "fp16",
"option.model_loading_timeout": 600,
"option.enable_streaming": True
Expand Down

0 comments on commit 14d0e67

Please sign in to comment.