cgelbank2-punct.prm

stages=[
  dict(
    name='dop', mode='plcfrs', dop='doubledop',
	m=1000, estimator='rfe', objective = 'mpp',
  ),
],

evalparam='../disco-dop/proper.prm',  # EVALB-style parameter file
corpusfmt='export',  # choices: export, bracket, discbracket, alpino, tiger
traincorpus=dict(
	path='cgelbank2-punct.export',
	encoding='utf-8',
	maxwords=100,  # max number of words for sentences in train corpus
	numsents=9999,  # length (sents) of training corpus
),
testcorpus=dict(
	path='cgelbank2-punct.export',
	encoding='utf-8',
	maxwords=100,  # max number of words for sentences in test corpus
	numsents=9999,  # (max) number of test sentences to parse
	skiptrain=False,  # when the train & test set are read from the same file,
		# enable this to skip the training sentences to get to the test set.
	skip=0,  # skip (additional) sentences between train & test set
),

punct='move',  # options:
functions=None,  # options:
morphology=None,  # options:

ensureroot='ROOT',

# postagging: pass None to use tags from treebank.
postagging=dict(
    # choices: unknownword (assign during parsing),
    #    treetagger, stanford (external taggers)
    method='unknownword',
    # choices unknownword: 4, 6, base,
    # for treetagger / stanford: [filename of external tagger model]
    model='4',
    # options for unknown word models:
    unknownthreshold=1,  # use probs of rare words for unknown words
    openclassthreshold=50,  # add unseen tags for known words. 0 to disable.
),

# binarization options
binarization=dict(
	method='default',  # choices: default, optimal, optimalhead
	factor='right',  # right factored binarization
		# (applicable for non-optimal binarizations)
	# headrules='alpino.headrules',  # file with rules for head assignment
	h=1,  # horizontal Markovization: number of siblings of context
	v=1,  # vertical Markovization; v=1 means no additional parent annotation.
	revh=0,  # horizontal Markovization: number of siblings of preceding context
	pospa=False,  # when v > 1, add parent annotation to POS tags?
	markhead=True,  # prepend label of head node to siblings
	leftmostunary=False,  # start binarization with unary node
	rightmostunary=False,  # end binarization with unary node
	tailmarker='',  # symbol to add to last node in a binarization, to mark head node
	revmarkov=False,  # reverse order for horizontal Markovization
	fanout_marks_before_bin=False,  # whether to add fanout markers before
		# binarization, to distinguish them for markovization,
		# e.g., VP|<NP_2-VVFIN> instead of VP|<NP-VVFIN>
),

# misc
verbosity=2,  # 0=silent; 1=summary report; 2=per sentence results; 3=dump derivations/parse trees.
numproc=1,  # increase to use multiple CPUs. Set to None to use all CPUs.