-
Notifications
You must be signed in to change notification settings - Fork 3
/
params.yaml
34 lines (32 loc) · 1.22 KB
/
params.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
extract_text:
orig_document_s3_bucket: cornell-mfd64
generate_dataset:
seed: 42
# The fraction of the dataset that will be used for the test split. The
# remainder will be used for the training split.
test_split_frac: 0.3
# Whether or not to publish the resulting datasets to HuggingFace Hub. If False,
# the dataset will only be saved locally.
publish_datasets: true
eval:
# The number of rows of ground truth to use for generating results. Lower this
# to reduce time and cost of evaluation. Rows are consumed in order, so a
# value of 10 implies that the first 10 rows of the ground truth are consumed.
num-eval-rows: 10
# The terms to run evaluation against. See `thesaurus.json` for a list of
# valid terms.
terms:
- min_lot_size
- min_unit_size
- max_height
- max_lot_coverage
- max_lot_coverage_pavement
- min_parking_spaces
search-method: elasticsearch
# The extraction method to use on located pages. See `extract.py` for a list
# of valid search methods.
extraction-method: map
# The number of results to consider when evaluating extraction accuracy and
# page search recall. e.g., k=6 implies that an extraction is correct if any
# of the top 6 results contain the correct answer.
k: 12