Skip to content
rzanoli edited this page Feb 16, 2014 · 20 revisions

This page reports the results obtained by EOP users and the configuration files they used to produce such results. Using the reported configuration files allows other users to replicate experiments under the same condition and to avoid doing experiments that have already been done.

Users that would like to share their results with the other EOP members can either upload their results into this page directly (a GitHub account is needed) or they can contact the EOP members by the provided mailing lists.

Experiments:

EDA Data Set Lexical
Resources
Result
F1 (accuracy)
Experiments
EDITS English RTE-3 none 0.615 (0.615) EXP_1
TIE EXP_2

EDITS

**EXPERIMENT:** EXP_1

Author name: Roberto Zanoli
Affiliation: Fondazione Bruno Kessler (FBK)
Contact: zanoli@fbk.eu

Data set: English RTE-3
Lexical Resources: none

Results:

<?xml version="1.0" encoding="UTF-8"?>
<Result EDA_Configuration="EditDistanceEDA_EN.xml_results.txt">
  <Total_Pairs>800</Total_Pairs>
  <Accuracy>0.615</Accuracy>
  <Positive_Pairs Number="410">
    <Precision>0.60944206</Precision>
    <Recall>0.6926829</Recall>
    <F_Measure>0.6484018</F_Measure>
    <Classified_As_Positive>284</Classified_As_Positive>
    <Classified_As_Negative>126</Classified_As_Negative>
  </Positive_Pairs>
  <Negative_Pairs Number="390">
    <Precision>0.6227545</Precision>
    <Recall>0.53333336</Recall>
    <F_Measure>0.5745857</F_Measure>
    <Classified_As_Positive>182</Classified_As_Positive>
    <Classified_As_Negative>208</Classified_As_Negative>
  </Negative_Pairs>
</Result>

Configuration file content:

<?xml version="1.0" encoding="UTF-8"?><!-- 
<configuration>
    <!-- Platform configuration section; the information in this section is used by the EOPRunner class being
    able to perform both the processing of the data set and running the EDA -->
	<section name="PlatformConfiguration">
		<!-- The EDA to be used: EditDistanceEDA -->
		<property name="activatedEDA">eu.excitementproject.eop.core.EditDistanceEDA</property>
		<!-- The language: [EN] -->
		<property name="language">EN</property>
		<!-- The linguistic annotation pipeline to preprocess the data to be annotated: [OpenNLPTaggerEN|TreeTaggerEN] -->
		<!-- Differently to OpenNLPTagger, TreeTagger can produce the lemma and it is the pipeline to be
		used when FixedWeightLemmaEditDistance is selected. Be sure to have TreeTagger installed before using it -->
		<!-- <property name="activatedLAP">eu.excitementproject.eop.lap.dkpro.TreeTaggerEN</property> -->
		<property name="activatedLAP">eu.excitementproject.eop.lap.dkpro.OpenNLPTaggerEN</property>
	</section>
	
	
	<!-- FixedWeightTokenEditDistance uses the token to calculate the distance between each pair T-H -->
	<section name="eu.excitementproject.eop.core.component.distance.FixedWeightTokenEditDistance">
	    
	    <!-- Do not consider the stop words: [true|false] -->
		<property name="stopWordRemoval">true</property>
	    
	    <!-- The configuration to be used by the component: [basic|wordnet|wikipedia|wordnet,wikipedia] -->
	    <property name="instances">basic</property>
	    
	     <!-- This configuration does not use any external resources -->
		<subsection name="basic"/>
		
		<!-- This configuration uses WordNet as an external resources -->
		<subsection name="wordnet">
			<!-- path of the WordNet files -->
			<property name="path">eop-resources-1.1.1/ontologies/EnglishWordNet-dict/</property>
		</subsection>
		
		<!-- This configuration uses Wikipedia as an external resources -->
		<subsection name="wikipedia">
			<!-- connection to the Wikipedia data base -->
			<property name="dbconnection">jdbc:mysql://nathrezim:3306/wikikb</property>
			<property name="dbuser">root</property>
			<property name="dbpasswd">nat_2k12</property>
		</subsection>
		
	</section>	
	
	<!-- FixedWeightLemmaEditDistance uses the lemma to calculate the distance between each pair T-H -->
	<section name="eu.excitementproject.eop.core.component.distance.FixedWeightLemmaEditDistance">
	    
	    <!-- Do not consider the stop words: [true|false] -->
		<property name="stopWordRemoval">true</property>
	    
	    <!-- The configuration to be used by the component: [basic|wordnet|wikipedia|wordnet,wikipedia] -->
	    <property name="instances">basic</property>
	    
	     <!-- This configuration does not use any external resources -->
		<subsection name="basic"/>
		
		<!-- This configuration uses WordNet as an external resources -->
		<subsection name="wordnet">
			<!-- path of the WordNet files -->
			<property name="path">eop-resources-1.1.1/ontologies/EnglishWordNet-dict/</property>
		</subsection>
		
		<!-- This configuration uses Wikipedia as an external resources -->
		<subsection name="wikipedia">
			<!-- connection to the Wikipedia data base -->
			<property name="dbconnection">jdbc:mysql://nathrezim:3306/wikikb</property>
			<property name="dbuser">root</property>
			<property name="dbpasswd">nat_2k12</property>
		</subsection>
		
	</section>	
		
	
	<!-- EditDistanceBasicEDA uses the weights in the configuration file to calculate the entailment -->
	<section name="eu.excitementproject.eop.core.EditDistanceEDA">
	
		<!-- weights of the edit distance operations -->
		<property name="match">0.0</property>
		<property name="delete">0.0</property>
		<property name="insert">1.0</property>
		<property name="substitute">1.0</property>
		
		<!-- <property name="trainDir">/tmp/</property> -->
		<property name="trainDir">/tmp/EN/dev/</property> 
		<!-- <property name="testDir">/tmp/</property> -->
		<property name="testDir">/tmp/EN/test/</property>
		<!-- measure to be optimized: [accuracy|f1] -->
		<property name="measure">accuracy</property>
		<!-- component to be used by EDA: [FixedWeightTokenEditDistance|FixedWeightLemmaEditDistance] 
		FixedWeightLemmaEditDistance can be used only when the preprocessing pipeline provides lemmas-->
	    <property name="components">eu.excitementproject.eop.core.component.distance.FixedWeightTokenEditDistance</property>
	
	</section>
	
	
	<!-- The information in this section are calculated automatically during the training phase by the EDA and
	it represents the learnt model. -->
	<section name="model">
	
	    <!-- threshold -->
		<property name="threshold">0.5741758241758221</property>
		<!-- the accuracy obtained on the training data set -->
		<property name="trainingAccuracy">0.6575</property>
		
	</section>
		
</configuration>

TIE

**EXPERIMENT:** EXP_2

Author name: Affiliation: Contact:

Data set:
Lexical Resources:

Clone this wiki locally