Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ICL fixes #176

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion examples/common/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ def _validate_cfg(icl_cfg):
assert 'example_delimiter' in icl_cfg
assert 'continuation_delimiter' in icl_cfg
assert 'label' in icl_cfg
assert 'destination_path' in icl_cfg

for icl_cfg in cfg.icl_tasks:
_validate_cfg(icl_cfg)
Expand All @@ -124,7 +125,9 @@ def _validate_cfg(icl_cfg):
num_fewshot=num_fewshot,
prompt_string=icl_cfg.prompt_string,
example_delimiter=icl_cfg.example_delimiter,
continuation_delimiter=icl_cfg.continuation_delimiter)
continuation_delimiter=icl_cfg.continuation_delimiter,
destination_path=icl_cfg.destination_path,
)
logger_keys.extend([f'metrics/{label}/{m}' for m in metric_names])
evaluators.append(
Evaluator(label=label,
Expand Down
15 changes: 12 additions & 3 deletions examples/llm/icl_eval/evaluate_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,16 @@

import sys
import time
from typing import List

import torch
from composer.loggers import InMemoryLogger
from composer.loggers import InMemoryLogger, LoggerDestination
from composer.trainer import Trainer
from composer.utils import reproducibility
from omegaconf import DictConfig
from omegaconf import OmegaConf as om

from examples.common.builders import build_icl_evaluators
from examples.common.builders import build_icl_evaluators, build_logger
from examples.llm.src.model_registry import COMPOSER_MODEL_REGISTRY
from examples.llm.src.tokenizer import TOKENIZER_REGISTRY

Expand All @@ -21,13 +23,20 @@
cli_cfg = om.from_cli(args_list)
cfg = DictConfig(om.merge(yaml_cfg, cli_cfg))

reproducibility.seed_all(cfg.get('seed', 1234))

composer_model = COMPOSER_MODEL_REGISTRY[cfg.model.name](cfg.model)
tokenizer = TOKENIZER_REGISTRY[cfg.tokenizer.type](**cfg.tokenizer.args)
evaluators, logger_keys = build_icl_evaluators(cfg, tokenizer)
for evaluator in evaluators:
composer_model.add_eval_metrics(evaluator)

in_memory_logger = InMemoryLogger() # track metrics in the in_memory_logger
loggers: List[LoggerDestination] = [
build_logger(name, logger_cfg)
for name, logger_cfg in (cfg.get('loggers') or {}).items()
]
loggers.append(in_memory_logger)

fsdp_config = cfg.get('fsdp_config', None)
fsdp_config = om.to_container(
Expand All @@ -37,7 +46,7 @@

trainer = Trainer(
model=composer_model,
loggers=in_memory_logger,
loggers=loggers,
fsdp_config=fsdp_config, # type: ignore
load_path=load_path,
load_weights_only=True,
Expand Down
4 changes: 3 additions & 1 deletion examples/llm/icl_eval/yamls/gpt_neo_eval.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ icl_tasks:
prompt_string: '' # this goes at the beginning of each input
example_delimiter: '\n' # this goes between fewshot examples
continuation_delimiter: ' ' # this separates questions from answers
destination_path: piqa_local.jsonl
-
label: lambada
dataset_uri: # ADD YOUR OWN DATASET URI
Expand All @@ -41,4 +42,5 @@ icl_tasks:
- InContextLearningLMAccuracy
prompt_string: '' # this goes at the beginning of each input
example_delimiter: '\n' # this goes between fewshot examples
continuation_delimiter: '' # this separates contexts from continuations
continuation_delimiter: ' ' # this separates contexts from continuations
destination_path: lambada_local.jsonl
4 changes: 3 additions & 1 deletion examples/llm/icl_eval/yamls/mosaic_gpt_eval.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ icl_tasks:
prompt_string: '' # this goes at the beginning of each input
example_delimiter: '\n' # this goes between fewshot examples
continuation_delimiter: ' ' # this separates questions from answers
destination_path: piqa_local.jsonl
-
label: lambada
dataset_uri: # ADD YOUR OWN DATASET URI
Expand All @@ -50,4 +51,5 @@ icl_tasks:
- InContextLearningLMAccuracy
prompt_string: '' # this goes at the beginning of each input
example_delimiter: '\n' # this goes between fewshot examples
continuation_delimiter: '' # this separates contexts from continuations
continuation_delimiter: ' ' # this separates contexts from continuations
destination_path: lambada_local.jsonl