conf

#####################	edu.cmu.lti.util.run.Tunner 
num_processes=1
	# number of working processes for tuning parameters in parallel (default=4)
tunner_mode=sweep
	# the type of parameter tuning (default=sweep)
num_avarage=1
	# we do experiments N times and average the scores (default=1)
split_summary=false
starting_id=0
print_std_dev=false
	# also print the standard deviations of the evaluation measures
#cmd=java -Xms16G -Xmx16G -cp ~/code_java/ni/class edu.cmu.pra.LearnerPRA
cmd=perl ../../pra.20G.pl
	# the command used to run an experiment during parameter tuning

##################### edu.cmu.lti.util.run.Learner
num_threads=1
task=train
	# decide the type of task to perform (default=sCV)
	train	// train using trainFile
	, test	//test using testFile
	, STT		//train test by splitting trainFile
	, TT		//train test using trainFile, testFile
	, CV		//cross validation using trainFile
	, sCV		//self cross validation using trainFile
	, sLOO	//self leave-on-out cross validation using trainFile
	, LOO		//leave one out cross validation using trainFile
	, predict	// without knowing the ground truth
id=a
	# add a postfix the result folder
num_CV_folds=3
	# number of fold for cross validations 	(default=5)
train_rate=0.7
	# the ratio when splitting data into train and test sets (default=0.8)
time_field=0
	# the column id of query time in the data files

output_folder=./
#train_samples=../tmp/training_queries/<target_relation>
train_samples=./queriesR_train/<target_relation>
	# data file used for training
test_samples=./queriesR_test/<target_relation>
	# data file used for test

createQueries_max_relation_count=100000
	# max number of edges for a relation during query creation
	
createQueries_min_relation_count=10
	# min number of edges for a relation during query creation


##################### edu.cmu.pra.LearnerPRA
target_relation=worksfor
	# (optional) what relation we are training for 
	# _competeswith athleteplaysforteam citylocatedinstate  
	# _specializationof agentcompeteswithagent
	
graph_folder=./graphs/NELL165/
	# where is the graph NELL446svo NELL165
	
model=./models/<target_relation>
	# a weight file can be loaded for prediction
	
model_folder=./models/

prediction_folder=./predictions/

top_prediction_results=5
	
#subgraph_store=
	# subgraphs that can be loaded/unloaded
		
rank_mode=Path
	# the loss function during training (default=P)
	Path	//PathRank: each relation path has a weight
	Rel	//RelationRank: each relation has a weight
loss_mode=log
	#	the loss function (default=log)
	none //no training
	,log // log loss
	,exp	//exp loss
	,hinge	//hinge loss
L1=0.001
	#L1 regularization 
L2=0.001
	#L2 regularization 
pairwise_loss=false
	# optimizing partial ordering or relevance (default=false)
	false	//optimizing relevance
	true	//optimizing partial ordering
negative_mode=Sqr
	# how to select negative entities
	all		//use all negative samples
	,topK	//take top K*rNega samples ranked by plain weighting
	,expX	// take a^x:
	, Sqr		// take x^2: 0,1,4,9,16,25,...th negative samples
	,Tri	// take x^3: 0,1,8,27,64,125,...th negative samples
	,poly	// take x^a:
	,none // add none
negative_weight=20
	# the weight shared by all negative samples

cache_data=true

inspect_data=true

output_training_data=true
output_training_data_folder=./training_data/

#max_num_exploration_queries=400
max_num_exploration_particles=1000000

################## edu.cmu.pra.model.PRAModel	########################################
max_steps=3
	# max length of relation paths	-0
bias=true
	# add a bias term in the scoring function. can improve log-likelihood (default=true)
bias_value=1
	# value of the bias feature. can control its regularization (default=0.1)

#blocked_paths=1:a(_Author)p(Cites)p
	# some paths are not allowed in the path tree: idSeed,path;idSeed,path;...
	# a(_Author)p(Cites)p   _Author>Cites
	
#and_paths=1~_generalizations
	# the targets have to be in the result of this path

#not_paths=1:a(_Author)p
	# the targets cannot be in the result of this path
	#a(_Author)p		_Author

#rerank_and_paths=2:_generalizations
	# the and-filter used at rerank stage

#no_cost_relations=athleteplaysinleague;_athleteplaysinleague;teammate
	# a ; separated list of promoted relations which have no cost
	# (regular relations each have cost 1)
	# to avoid creating infite long paths, the path creation precedure
	# will prevent going forward-backward with zero-cost relations
	
min_feature_hit=3
min_feature_accuracy=0.01
cache_RW=false

################## edu.cmu.pra.model.EntityRank
entity_rank=false
	# whether to use entity rank or not
min_ER_steps=3
	# max length of entity rank paths (default=3)
ER_time_gap=2
	# only generates entity rank one for every N years, in order to save memory (default=1)

################## edu.cmu.pra.graph.Graph
timed_graph=false
	# whether the graph contains time information
time_type=year
	# the entity type which represent time (assuming which can be parsed as integers)
time_relation=Year
	# the relation type that connect time entities to other entities
	# OBSOLETED: moved to the task file

################## edu.cmu.pra.graph.Walker
#inferred_relation_file=./infered_relations

RW_mode=LVParticle
	# how to keep the random walk distributions sparse (default=Q)
	Truncate, 	//Truncation
	Beem, 		//Beam
	LVWalker, // Sampling (low variance)
	Walker, 	// Sampling
	Particle, 	//particle filtering
	LVParticle, //particle filtering (low variance)
	Exact, 		//none
	
truncate=0.003
	# absolute truncation
beam_width=10000
	#beam truncation
num_walkers=10000
	#sampling
min_particle=0.0001
	#particle filtering

blocked_field=0
	# the field in query file which leads to the target node
	
RW_renormalize=false
damping_factor=1.0
	
##################### Optimization  #####################
max_train_iteration=100

epsCvg=0.00001
	# 0.002
maxLinfStep=10000
maxL1Step=20000
	20	100
stepAdapt=0.1
num_stable=2

#################### Visualization#####################

feature_comments=../../mFeatureComments.read
	# add explanations to selected relation paths
	
LBFGS_history=false


#################### edu.cmu.lti.util.run.Reporter #####################
#publish_folder=./www/test/
publish_folder=/afs/cs.cmu.edu/user/nlao/www/demo/wk/test/
	# the folder where report is to be copied to
		
reporter_keys=agentcompeteswithagent
#reporter_keys=athleteplaysforteam,athleteplaysinleague,athleteplayssport,stadiumlocatedincity,teamhomestadium,teamplaysincity,teamplaysinleague,teamplayssport,competeswith,hasofficeincity,journalistwritesforpublication,professionistypeofprofession,teamwontrophy,worksfor
	# the key words to search and group results

#result_filter=max_steps=3_0
	# only consider a subset of runs for which paramter string contains this filter string
	
col1=target_relation
	
col2=nSampling

valueCol=mrr,MAP,tRW,tCV
	# the metrics to be shown in the report