From 48144923696b78c3ea818cea822d42af1e164a99 Mon Sep 17 00:00:00 2001
From: Joachim Bingel <joa.bingel@gmail.com>
Date: Tue, 9 Apr 2019 21:07:45 +0200
Subject: [PATCH] towards 0.3, new structure for simplification pipeline, move
 away from Pickle

---
 .gitignore                             |    3 +-
 README.md                              |    5 +
 lexi/config.py                         |   10 +-
 lexi/core/endpoints.py                 |   22 +-
 lexi/core/featurize/feat_util.py       |   49 +-
 lexi/core/featurize/featurizers.py     |   12 +
 lexi/core/featurize/functions.py       |    1 +
 lexi/core/simplification/__init__.py   |   44 +-
 lexi/core/simplification/lexical.py    |  435 +--
 lexi/core/simplification/structured.py |   16 +-
 lexi/core/simplification/util.py       |   61 +
 lexi/lib/__init__.py                   |    0
 lexi/lib/lexenstein/__init__.py        |    0
 lexi/lib/lexenstein/evaluators.py      |  572 ----
 lexi/lib/lexenstein/features.py        | 3547 ------------------------
 lexi/lib/lexenstein/generators.py      | 2129 --------------
 lexi/lib/lexenstein/identifiers.py     |  395 ---
 lexi/lib/lexenstein/morphadorner.py    |  175 --
 lexi/lib/lexenstein/rankers.py         | 1450 ----------
 lexi/lib/lexenstein/selectors.py       | 1569 -----------
 lexi/lib/lexenstein/spelling.py        |   74 -
 lexi/lib/lexenstein/util.py            |  383 ---
 lexi/lib/lib.py                        |  795 ------
 lexi/server/run_lexi_server.py         |   90 +-
 lexi/server/util/database.py           |    8 +-
 lexi/server/util/html.py               |    8 +-
 requirements.txt                       |    3 +-
 scripts/train_default_classifier.py    |   41 +-
 28 files changed, 487 insertions(+), 11410 deletions(-)
 create mode 100644 lexi/core/featurize/functions.py
 create mode 100644 lexi/core/simplification/util.py
 delete mode 100644 lexi/lib/__init__.py
 delete mode 100755 lexi/lib/lexenstein/__init__.py
 delete mode 100755 lexi/lib/lexenstein/evaluators.py
 delete mode 100755 lexi/lib/lexenstein/features.py
 delete mode 100755 lexi/lib/lexenstein/generators.py
 delete mode 100755 lexi/lib/lexenstein/identifiers.py
 delete mode 100755 lexi/lib/lexenstein/morphadorner.py
 delete mode 100755 lexi/lib/lexenstein/rankers.py
 delete mode 100755 lexi/lib/lexenstein/selectors.py
 delete mode 100755 lexi/lib/lexenstein/spelling.py
 delete mode 100755 lexi/lib/lexenstein/util.py
 delete mode 100644 lexi/lib/lib.py

diff --git a/.gitignore b/.gitignore
index 96af683..c019046 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,5 @@ models/*
 *.pyc
 trash/
 .idea
-lexi.cfg
\ No newline at end of file
+lexi.cfg
+lexi/res/*
diff --git a/README.md b/README.md
index 604134d..f51ceef 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,11 @@
 
 ## Changelog
 
+
+### Version 0.3
++ no more pickling!
++ POS-based synonym selection
+
 ### Version 0.2.5
 + more general database error handling
 
diff --git a/lexi/config.py b/lexi/config.py
index 7fbb12b..e2d1446 100644
--- a/lexi/config.py
+++ b/lexi/config.py
@@ -5,13 +5,17 @@
 LOG_DIR = os.path.join(LEXI_BASE, "logs")
 MODELS_DIR = os.path.join(LEXI_BASE, "models")
 RANKER_MODELS_DIR = os.path.join(MODELS_DIR, "rankers")
+CWI_MODELS_DIR = os.path.join(MODELS_DIR, "cwi")
 RESOURCES_DIR = os.path.join(LEXI_BASE, "res")
+STANFORDNLP = os.path.join(RESOURCES_DIR, "stanfordnlp_resources")
 
 RANKER_MODEL_PATH_TEMPLATE = os.path.join(RANKER_MODELS_DIR, "{}.pickle")
+CWI_MODEL_PATH_TEMPLATE = os.path.join(CWI_MODELS_DIR, "{}.pickle")
+
 LEXICAL_MODEL_PATH_TEMPLATE = os.path.join(MODELS_DIR, "{}-lexical.pickle")
 MODEL_PATH_TEMPLATE = os.path.join(MODELS_DIR, "{}.pickle")
 
-RESOURCES = {
+RESOURCES_FULL = {
     "da": {
         "embeddings":
             #[RESOURCES_DIR+"/da/embeddings/danish_word_vectors_1300_cbow_
@@ -25,10 +29,10 @@
         "ranking_training_dataset":
             RESOURCES_DIR+"/da/simplification/clean_danish_ls_dataset.txt",
         "synonyms":
-            RESOURCES_DIR + "/da/synonyms/da_synonyms_combined.csv"}
+            [RESOURCES_DIR + "/da/synonyms/da_synonyms_combined.csv"]}
 }
 
-RESOURCES_TEST = {
+RESOURCES = {
     "da": {
         "embeddings":
             [RESOURCES_DIR+"/da/embeddings/danish_word_vectors_1300_cbow_"
diff --git a/lexi/core/endpoints.py b/lexi/core/endpoints.py
index 66be11c..f933974 100644
--- a/lexi/core/endpoints.py
+++ b/lexi/core/endpoints.py
@@ -35,7 +35,7 @@ def process_html_structured(classifier, html, ranker, parId):
     spanId = 0
     if not html.strip():
         return html
-    output_sents = classifier.predict_text(html, ranker)
+    output_sents = classifier.simplify_text(html, ranker)
     for original, simple in zip(*output_sents):
         simple_parsed = parser.parse_sent(simple)
         logger.debug([simple_parsed, simple.replace('\n', ''), parser])
@@ -64,17 +64,18 @@ def process_html_structured(classifier, html, ranker, parId):
     return " ".join(html_out), simplifications
 
 
-def process_html_lexical(classifier, html, startOffset, endOffset, ranker,
+def process_html_lexical(pipeline, html, startOffset, endOffset, cwi, ranker,
                          requestId=0, min_similarity=0.7,
                          blacklist=None):
     """
     Transforms HMTL source, enriching simplified words with core markup by
     separating markup from text and sending pure text to simplification class.
 
-    :param classifier: Simplification classifier instance
+    :param pipeline: Simplification pipeline instance
     :param html: Input HTML source
     :param startOffset: offset after which simplifications are solicited
     :param endOffset: offset until which simplifications are solicited
+    :param cwi: personalized CWI module
     :param ranker: personalized ranker
     :param requestId: Request identifier to disambiguate core simplification
     targets across multiple calls to this method
@@ -115,8 +116,8 @@ def get_local_hyperlink_balance(tags):
     # output is a sequence of tokens including whitespaces, id2simplification
     # is a dict mapping token IDs to simplifications, if applicable
     offset2html, pure_text = util.filter_html(html)
-    offset2simplification = classifier.predict_text(
-        pure_text, startOffset, endOffset, ranker,
+    offset2simplification = pipeline.simplify_text(
+        pure_text, startOffset, endOffset, cwi=cwi, ranker=ranker,
         min_similarity=min_similarity, blacklist=blacklist)
     logger.debug("Simplifying text between character offsets {} "
                  "and {}: {}".format(startOffset, endOffset, pure_text))
@@ -130,11 +131,12 @@ def get_local_hyperlink_balance(tags):
             html_out += "".join(offset2html[i])
         if i in offset2simplification and not open_hyperlinks_count > 0:
             # checking for hyperlinks because we don't want to simplify those
-            original, simple, sentence, word_index = offset2simplification[i]
+            original, replacements, \
+                sentence, word_index = offset2simplification[i]
             # in future, possibly get more alternatives, and possibly return
             # in some other order
-            choices = [original, simple]
-            simple = util.escape(simple)
+            replacements = [util.escape(r) for r in replacements]
+            choices = [original] + replacements
             spanId += 1
             elemId = "lexi_{}_{}".format(requestId, spanId)
             displaying_original = "true" if choices[0] == original else "false"
@@ -151,7 +153,7 @@ def get_local_hyperlink_balance(tags):
                 {elemId: {
                     "request_id": requestId,
                     "original": original, 
-                    "simple": simple,  # legacy for frontend version <= 0.2
+                    "simple": replacements,  # legacy for frontend version <= 0.2
                     "choices": choices,
                     "bad_feedback": False,
                     "selection": 0,
@@ -168,6 +170,7 @@ def get_local_hyperlink_balance(tags):
     return html_out, simplifications
 
 
+# TODO adapt to new structure
 def update_classifier(classifier, feedback):
     """
     Featurizes simplification feedback from user and updates classifier
@@ -191,6 +194,7 @@ def update_classifier(classifier, feedback):
         classifier.featurize_train(xs, ys)
 
 
+# TODO adapt to new structure
 def update_ranker(ranker, user_id, feedback, overall_rating=0):
     """
     Collects feedback and updates ranker
diff --git a/lexi/core/featurize/feat_util.py b/lexi/core/featurize/feat_util.py
index f762344..4fe6d97 100644
--- a/lexi/core/featurize/feat_util.py
+++ b/lexi/core/featurize/feat_util.py
@@ -2,14 +2,17 @@
 
 import networkx as nx
 import numpy as np
-import spacy
+# import stanfordnlp
 from networkx.algorithms.traversal.depth_first_search import dfs_edges
-
+from lexi.config import STANFORDNLP
 from lexi.core.featurize.util import resources
 
 COMMA = ","
 VERB = "V"
-nlp = spacy.load('en')
+# nlp = stanfordnlp.Pipeline(nlp = stanfordnlp.Pipeline(
+#     processors='tokenize,mwt,pos',
+#     lang='da', models_dir=STANFORDNLP,
+#     tokenize_pretokenized=True))
 
 
 class EtymWN:
@@ -121,26 +124,26 @@ def has_ancestor_in_lang(lang, word_etym):
             return True
     return False
 
-
-def read_sentences_plain(raw_data):
-    doc = nlp(raw_data)
-    words_seen = 0
-    for s in doc.sents:
-        sent = defaultdict(list)
-        for i, w in enumerate(s):
-            sent["idx"].append(i+1)
-            sent["form"].append(w.text)
-            sent["lemma"].append(w.lemma_)
-            sent["pos"].append(w.pos_)
-            ne = w.ent_type_ if w.ent_type_ else "O"
-            sent["ne"].append(ne)
-            # target = w.head.i - words_seen if w.dep_.lower() != "root" else -1
-            target = w.head.i - words_seen
-            sent["head"].append(target+1)
-            sent["deprel"].append(w.dep_)
-            sent["label"].append("?")
-        words_seen += len(s)
-        yield sent
+#
+# def read_sentences_plain(raw_data):
+#     doc = nlp(raw_data)
+#     words_seen = 0
+#     for s in doc.sentences:
+#         sent = defaultdict(list)
+#         for i, w in enumerate(s):
+#             sent["idx"].append(i+1)
+#             sent["form"].append(w.text)
+#             sent["lemma"].append(w.lemma_)
+#             sent["pos"].append(w.pos_)
+#             ne = w.ent_type_ if w.ent_type_ else "O"
+#             sent["ne"].append(ne)
+#             # target = w.head.i - words_seen if w.dep_.lower() != "root" else -1
+#             target = w.head.i - words_seen
+#             sent["head"].append(target+1)
+#             sent["deprel"].append(w.dep_)
+#             sent["label"].append("?")
+#         words_seen += len(s)
+#         yield sent
 
 
 def read_sentences(data):
diff --git a/lexi/core/featurize/featurizers.py b/lexi/core/featurize/featurizers.py
index ed04eea..16a0052 100644
--- a/lexi/core/featurize/featurizers.py
+++ b/lexi/core/featurize/featurizers.py
@@ -3,6 +3,7 @@
 
 from lexi.core.featurize import extract_lexical_feats, feat_util
 from lexi.core.featurize.extract_sentence_feats import TreeNode
+from abc import ABCMeta, abstractmethod
 
 
 class LabelMapper:
@@ -30,6 +31,17 @@ def map_inv(self, ids):
         return out
 
 
+class LexiFeaturizer(metaclass=ABCMeta):
+
+    @abstractmethod
+    def save(self, path):
+        raise NotImplementedError
+
+    @abstractmethod
+    def load(self, path):
+        raise NotImplementedError
+
+
 class Featurizer:
 
     def __init__(self, features=None):
diff --git a/lexi/core/featurize/functions.py b/lexi/core/featurize/functions.py
new file mode 100644
index 0000000..8a4e560
--- /dev/null
+++ b/lexi/core/featurize/functions.py
@@ -0,0 +1 @@
+# # # Feature Functions
diff --git a/lexi/core/simplification/__init__.py b/lexi/core/simplification/__init__.py
index 303f994..19f7d9f 100644
--- a/lexi/core/simplification/__init__.py
+++ b/lexi/core/simplification/__init__.py
@@ -1,47 +1,9 @@
 from abc import ABCMeta, abstractmethod
-from sacremoses import MosesDetokenizer
 
-detokenizer = MosesDetokenizer()
 
-
-class Classifier(metaclass=ABCMeta):
-    # @abstractmethod
-    # def fresh_train(self, x, y):
-    #     pass
-
-    @abstractmethod
-    def predict(self, x, ranker=None):
-        raise NotImplementedError
+class SimplificationPipeline(metaclass=ABCMeta):
 
     @abstractmethod
-    def predict_text(self, txt, startOffset=0, endOffset=None, ranker=None):
+    def simplify_text(self, txt, startOffset=0, endOffset=None,
+                      cwi=None, ranker=None):
         raise NotImplementedError
-
-    @abstractmethod
-    def update(self, x, y):
-        raise NotImplementedError
-
-    @abstractmethod
-    def save(self):
-        raise NotImplementedError
-
-    @abstractmethod
-    def load(self, model_id):
-        raise NotImplementedError
-
-    @abstractmethod
-    def load_default_init(self):
-        raise NotImplementedError
-
-    @abstractmethod
-    def check_featurizer_set(self):
-        raise NotImplementedError
-
-
-# Classifier.register(DummyLexicalClassifier)
-# Classifier.register(PystructClassifier)
-# Classifier.register(LexensteinSimplifier)
-# PystructClassifier.register(ChainCRFClassifier)
-# PystructClassifier.register(EdgeCRFClassifier)
-# Classifier.register(AveragedPerceptron)
-# Classifier.register(OnlineStructuredPerceptron)
diff --git a/lexi/core/simplification/lexical.py b/lexi/core/simplification/lexical.py
index 4e9586c..af99b1a 100644
--- a/lexi/core/simplification/lexical.py
+++ b/lexi/core/simplification/lexical.py
@@ -1,115 +1,69 @@
 import logging
 import pickle
+import os
 
-from lexi.config import RESOURCES, LEXICAL_MODEL_PATH_TEMPLATE
-from lexi.core.simplification import Classifier
-from lexi.core.featurize.featurizers import LexicalFeaturizer
+from lexi.config import LEXICAL_MODEL_PATH_TEMPLATE, RANKER_MODEL_PATH_TEMPLATE
+from lexi.core.simplification import SimplificationPipeline
+from lexi.core.simplification.util import make_synonyms_dict, \
+    parse_embeddings
+from lexi.core.featurize.featurizers import LexicalFeaturizer, LexiFeaturizer
 from lexi.core.util import util
-from lexi.lib.lexenstein.features import FeatureEstimator
-from lexi.lib.lib import LexensteinGenerator, BoundaryRanker, BoundarySelector,\
-    OnlineRegressionRanker, SynonymDBGenerator
-
+from abc import ABCMeta, abstractmethod
+import keras
+from keras.layers import Input, Dense
+from sklearn.feature_extraction import DictVectorizer
 
 logger = logging.getLogger('lexi')
 
 
-class LexensteinSimplifier(Classifier):
+class LexicalSimplificationPipeline(SimplificationPipeline):
 
     def __init__(self, userId, language="da"):
         self.language = language
         self.userId = userId
+        self.cwi = None
         self.generator = None
         self.selector = None
         self.ranker = None
-        # self.fresh_train(resources)
-
-    def generateCandidates(self, sent, target, index, min_similarity=0.6):
-        # Produce candidates:
-        subs = self.generator.getSubstitutionsSingle(
-            sent, target, index, min_similarity=min_similarity)
-        # Create input data instance:
-        fulldata = [sent, target, index]
-        for sub in subs[target]:
-            fulldata.append('0:'+sub)
-        fulldata = [fulldata]
-
-        # Return requested structures:
-        return fulldata
-
-    def selectCandidates(self, data):
-        # # If there are not enough candidates to be selected, select none:
-        # if len(data[0]) < 5:
-        #     selected = [[]]
-        # else:
-        selected = self.selector.selectCandidates(
-                data, 0.65, proportion_type='percentage')
-
-        # Produce resulting data:
-        fulldata = [data[0][0], data[0][1], data[0][2]]
-        for sub in selected[0]:
-            fulldata.append('0:'+sub)
-        fulldata = [fulldata]
-
-        # Return desired objects:
-        return fulldata
-
-    def rankCandidates(self, data, ranker=None):
-        # Rank selected candidates:
-        if ranker:
-            ranks = ranker.getRankings(data)
-        elif self.ranker:
-            ranks = self.ranker.getRankings(data)
-        else:
-            raise AttributeError("No ranker provided to lexical simplifier.")
-            # TODO just return unranked/randomly ranked data?
-        return ranks
-
-    def get_replacement(self, sent, word, index, ranker=None,
-                        min_similarity=0.6):
-        candidates = self.generateCandidates(sent, word, index,
-                                             min_similarity=min_similarity)
-        logger.debug("Candidates {}".format(candidates))
-        candidates = self.selectCandidates(candidates)
-        logger.debug("Candidates (selected) {}".format(candidates))
-        candidates = self.rankCandidates(candidates, ranker)
-        logger.debug("Candidates (ranked) {}".format(candidates))
-        replacement = ""
-        if candidates and len(candidates[0]) > 0:
-            try:
-                replacement = candidates[0][0].decode('utf8')
-            except (UnicodeDecodeError, AttributeError):
-                replacement = candidates[0][0]
-        # heuristics: if target and candidate are too similar, exclude (probably
-        # just morphological variation)
-        if replacement and util.relative_levenshtein(word, replacement) < 0.2:
-            return ""
-        return replacement
-
-    def predict_text(self, text, startOffset=0, endOffset=None,
-                     ranker=None, min_similarity=0.6, blacklist=None):
-        """
-        Receives pure text, without HTML markup, as input and returns
-        simplifications for character offsets.
-        :param text: the input string
-        :param startOffset: offset after which simplifications are solicited
-        :param endOffset: offset until which simplifications are solicited. If
-         None, this will be set to the entire text length
-        :param ranker: a personalized ranker
-        :param min_similarity: minimum similarity for generator, if available
-        :param blacklist: list of words not to be simplified
-        :return: a dictionary mapping character offset anchors to
-        simplifications, which are 4-tuples (original_word, simplified_word,
-        sentence, original_word_index)
-        """
-        if not blacklist:
-            blacklist = []
 
-        def to_be_simplified(_word):
-            return len(_word) > 4 and _word not in blacklist
+    def generateCandidates(self, sent, startOffset, endOffset,
+                           min_similarity=0.6):
+        if self.generator is not None:
+            return self.generator.getSubstitutions(
+                sent[startOffset:endOffset], min_similarity=min_similarity)
+        return []
+
+    def selectCandidates(self, sent, startOffset, endOffset, candidates):
+        if self.selector is not None:
+            return self.selector.select(sent, startOffset, endOffset,
+                                        candidates)
+        return candidates  # fallback if selector not set
+
+    def setCwi(self, cwi):
+        self.cwi = cwi
 
-        if not endOffset:
-            endOffset = len(text)
+    def setRanker(self, ranker):
+        self.ranker = ranker
 
+    def setGenerator(self, generator):
+        self.generator = generator
+
+    def setSelector(self, selector):
+        self.selector = selector
+
+    def simplify_text(self, text, startOffset=0, endOffset=None, cwi=None,
+                      ranker=None, min_similarity=0.6, blacklist=None):
+        """
+        Full lexical simplification pipeline.
+        :param text:
+        :param startOffset:
+        :param endOffset:
+        :param cwi:
+        :param ranker:
+        :param min_similarity:
+        :param blacklist:
+        :return:
+        """
         startOffset = max(0, startOffset)
         endOffset = min(len(text), endOffset)
 
@@ -122,101 +76,228 @@ def to_be_simplified(_word):
             # after the selection
             if se < startOffset or sb > endOffset:
                 continue
+
             sent = text[sb:se]
-            word_offsets = util.span_tokenize_words(sent)
-            for i, (wb, we) in enumerate(word_offsets):
-                # make sure we're within start/end offset
+            token_offsets = util.span_tokenize_words(sent)
+
+            for i, (wb, we) in enumerate(token_offsets):
                 global_word_offset_start = sb + wb
                 global_word_offset_end = sb + we
-                if global_word_offset_start >= startOffset and \
-                        global_word_offset_end <= endOffset:
-                    word = sent[wb:we]
-                    logger.debug("Trying to simplify: {}".format(word))
-                    if to_be_simplified(word):
-                        try:
-                            replacement = self.get_replacement(sent, word,
-                                                               str(i), ranker,
-                                                               min_similarity)
-                        except (IndexError, ValueError):
-                            replacement = ""
-                        if replacement:
-
-                            # This is where the output is generated
-                            offset2simplification[global_word_offset_start] = \
-                                (word, replacement, sent, i)
-                        else:
-                            logger.debug("Found no simplification "
-                                         "for: {}".format(word))
-                    else:
-                        logger.debug("Some rule prevents simplification "
-                                     "for: {}".format(word))
+                if global_word_offset_start < startOffset or \
+                        global_word_offset_end > endOffset:
+                    continue
+
+                # STEP 1: TARGET IDENTIFICATION
+                complex_word = True  # default case, e.g. for when no CWI module
+                # provided for single-word requests
+                if cwi:
+                    complex_word = cwi.is_complex(sent, wb, we)
+                elif self.cwi:
+                    complex_word = self.cwi.is_complex(sent, wb, we)
+                if not complex_word:
+                    continue
+
+                logger.debug("Identified targets: {}".format(sent[wb:we]))
+
+                # STEP 2: CANDIDATE GENERATION
+                candidates = self.generateCandidates(
+                    sent, wb, we, min_similarity=min_similarity)
+                if not candidates:
+                    logger.debug("No candidate replacements found "
+                                 "for '{}'.".format(sent[wb:we]))
+                    continue
+                logger.debug("Candidate replacements: {}.".format(candidates))
+
+                # STEP 3: CANDIDATE SELECTION
+                candidates = self.selectCandidates(sent, wb, we, candidates)
+                if not candidates:
+                    logger.debug("No valid replacements in context.")
+                    continue
+                logger.debug("Filtered replacements: {}.".format(candidates))
+
+                # STEP 4: RANKING
+                if ranker:
+                    ranking = ranker.rank(candidates)
+                elif self.ranker:
+                    ranking = self.ranker.rank(candidates)
+                else:
+                    ranking = candidates
+                offset2simplification[global_word_offset_start] = \
+                    (sent[wb:we], ranking, sent, i)
         return offset2simplification
 
-    def load_default_init(self):
-        self.load("default")
 
-    def predict(self, x, ranker=None):
+class LexiGenerator:
+
+    def __init__(self, language="da", synonyms_files=(), embedding_files=()):
+        self.language = language
+        self.thesaura = [make_synonyms_dict(sf) for sf in synonyms_files]
+        self.w2v_model = parse_embeddings(embedding_files)
+
+    def getSubstitutions(self, word, sources=("thesaurus", "embeddings"),
+                         min_similarity=0.0, eager_return=True):
+        """
+        Get substitutions from different types of sources (e.g. thesaura,
+        embeddings). Using `eager_return`, this method can return substitutions
+        as soon as one of the sources provides substitutions, such that e.g.
+        low-quality substitutions from embeddings do not dilute gold synonyms
+        from a thesaurus.
+        :param word: the target word to replace
+        :param sources: which types of sources to use for mining substitutions.
+        Valid options are `thesaurus` and `embeddings`.
+        :param min_similarity: For embedding substitions, defines the cosine
+        similarity theshold for a candidate to be considered a synonym
+        :param eager_return: if True, return found substitutions as soon as
+        one of the sources provides candidates
+        :return:
+        """
+        subs = set()
+        for src in sources:
+            if src == "thesaurus":
+                subs = self.getSubstitutionsThesaurus(word)
+            elif src == "embeddings":
+                subs = self.getSubstitutionsEmbeddings(word, min_similarity)
+            if subs and eager_return:
+                return subs
+        return subs
+
+    def getSubstitutionsEmbeddings(self, word, min_similarity=0.6):
+        return set([w for w, score in
+                    self.w2v_model.most_similar(word, min_similarity)])
+
+    def getSubstitutionsThesaurus(self, word):
+        substitutions = set()
+        for t in self.thesaura:
+            substitutions.update(t.get(word, []))
+        return substitutions
+
+
+class LexiSelector:
+
+    def __init__(self, language="da"):
+        self.language = language
+
+    def select(self, sentence, startOffset, endOffset, candidates):
+        return candidates  # TODO implement properly
+
+
+class LexiPersonalizedPipelineStep(metaclass=ABCMeta):
+
+    def __init__(self, userId=None):
+        self.userId = userId
+        self.model = None
+        self.featurizer = None
+
+    @abstractmethod
+    def fresh_train(self, data):
         raise NotImplementedError
 
-    def update(self, x, y):
+    @abstractmethod
+    def update(self, data):
         raise NotImplementedError
 
-    def save(self):
-        with open(LEXICAL_MODEL_PATH_TEMPLATE.format(self.userId), 'wb') as pf:
-            pickle.dump((self.language, self.userId, self.generator,
-                         self.selector, self.ranker), pf,
-                        pickle.HIGHEST_PROTOCOL)
+    def save(self, models_path):
+        path_prefix = os.path.join(models_path, self.userId)
+        self.model.save(path_prefix+".model.h5")
+        self.featurizer.save(path_prefix+".featurizer")
 
-    def load(self, userId=None):
-        if not userId:
-            userId = self.userId
-        with open(LEXICAL_MODEL_PATH_TEMPLATE.format(userId), 'rb') as pf:
-            unpickled = pickle.load(pf)
-            logger.debug(unpickled)
-            (self.language, self.userId, self.generator,
-             self.selector, self.ranker) = unpickled
-        return self
+    def load(self, path):
+        self.model = keras.models.load_model(path)
 
-    def fresh_train(self, resources=None):
-        if not resources:
-            try:
-                resources = RESOURCES[self.language]
-            except KeyError:
-                logger.error("Couldn't find resources for language "
-                             "ID {}".format(self.language))
-        # General purpose
-        w2vpm = resources['embeddings']
-        # Generator
-        # gg = LexensteinGenerator(w2vpm)
-        # gg = SynonymDBGenerator(w2vpm, resources['synonyms'])
-        gg = LexensteinGenerator(w2vpm)
-
-        # Selector
-        fe = FeatureEstimator()
-        fe.resources[w2vpm[0]] = gg.model
-        fe.addCollocationalFeature(resources['lm'], 2, 2, 'Complexity')
-        fe.addWordVectorSimilarityFeature(w2vpm[0], 'Simplicity')
-        br = BoundaryRanker(fe)
-        bs = BoundarySelector(br)
-        bs.trainSelectorWithCrossValidation(resources['ubr'], 1, 5, 0.25,
-                                            k='all')
-        # Ranker
-        fe = FeatureEstimator()
-        fe.addLengthFeature('Complexity')
-        fe.addCollocationalFeature(resources['lm'], 2, 2, 'Simplicity')
-        orr = OnlineRegressionRanker(fe, None, training_dataset=resources[
-                                         'ranking_training_dataset'])
-        # Return LexicalSimplifier object
-        self.generator = gg
-        self.selector = bs
-        self.ranker = orr
-        return self
 
-    def check_featurizer_set(self):
-        return True
+class LexiCWIFeaturizer(LexiFeaturizer, DictVectorizer):
+
+    def __init__(self):
+        super().__init__(self)
+
+    def save(self, path):
+        pass  # TODO
+
+    def load(self, path):
+        pass  # TODO
+
+    def dimensions(self):
+        # return len(self.get_feature_names())
+        return 3
+
+
+class LexiCWI(LexiPersonalizedPipelineStep):
+
+    def __init__(self, userId, featurizer=None):
+        self.featurizer = featurizer if featurizer else LexiCWIFeaturizer()
+        # self.model = self.build_model()
+        super().__init__(userId)
+
+    def build_model(self, ):
+        n_input = self.featurizer.dimensions()
+        i = Input(shape=(n_input,))
+        o = Dense([2])
+        model = keras.models.Model(Input(n_input), )
+        return model
+
+    def fresh_train(self, cwi_data):
+        x, y = cwi_data
+        self.model.fit(x, y)
+
+    def update(self, cwi_data):
+        x, y = cwi_data
+        self.model.fit(x, y)  # TODO updating like this is problematic if we
+        # want learning rate decay or other things that rely on previous
+        # iterations, those are not saved in the model or optimizer...
+
+    def identify_targets(self, sent, token_offsets):
+        return token_offsets  # TODO implement, use is_complex
+
+    def is_complex(self, sent, startOffset, endOffset):
+        return endOffset-startOffset > 7  # TODO implement properly
+
+
+class LexiRankingFeaturizer(LexiFeaturizer, DictVectorizer):
+
+    def __init__(self):
+        super().__init__(self)
+
+    def save(self, path):
+        pass  # TODO
+
+    def load(self, path):
+        pass  # TODO
+
+    def dimensions(self):
+        # return len(self.get_feature_names())
+        return 3
+
+
+class LexiRanker(LexiPersonalizedPipelineStep):
+
+    def __init__(self, userId, featurizer=None):
+        self.userId = userId
+        self.featurizer = featurizer if featurizer else LexiRankingFeaturizer()
+        self.model = self.build_model()
+        super().__init__(userId)
+
+    def build_model(self):
+        pass
+
+    def rank(self, candidates, sentence=None, index=None):
+        return sorted(candidates, key=lambda x: len(x))
+
+    def save(self, userId):
+        with open(RANKER_MODEL_PATH_TEMPLATE.format(userId), 'wb') as pf:
+            # pickle.dump((self.fe, self.model), pf, pickle.HIGHEST_PROTOCOL)
+            pickle.dump(self, pf, pickle.HIGHEST_PROTOCOL)
+
+    def load(self, path):
+        pass
+
+    def fresh_train(self, x, y):
+        pass
+
+    def update(self, x, y):
+        pass
 
 
-class DummyLexicalClassifier(Classifier):
+class DummyLexicalSimplificationPipeline(SimplificationPipeline):
     def __init__(self, userId="anonymous"):
         self.model = None
         self.featurizer = None
@@ -267,9 +348,13 @@ def load_default_init(self):
         with open(LEXICAL_MODEL_PATH_TEMPLATE.format("default"), 'rb') as pf:
             self.model, self.featurizer = pickle.load(pf)
 
-    def predict_text(self, txt, ranker=None):
+    def simplify_text(self, txt, startOffset=0, endOffset=None,
+                      cwi=None, ranker=None):
         """
         :param txt:
+        :param startOffset:
+        :param endOffset:
+        :param cwi:
         :param ranker:
         :return: tokenized text (incl. word-final whitespaces) and
         id2simplifications dict
diff --git a/lexi/core/simplification/structured.py b/lexi/core/simplification/structured.py
index 3384386..be953fe 100644
--- a/lexi/core/simplification/structured.py
+++ b/lexi/core/simplification/structured.py
@@ -14,13 +14,13 @@
 from lexi.core.featurize.featurizers import PystructChainFeaturizer, \
     PystructEdgeFeaturizer
 from lexi.config import MODEL_PATH_TEMPLATE
-from lexi.core.simplification import Classifier
+from lexi.core.simplification import SimplificationPipeline
 from lexi.core.simplification import detokenizer
 
 logger = logging.getLogger('lexi')
 
 
-class PystructClassifier(Classifier, metaclass=ABCMeta):
+class PystructSimplificationPipeline(SimplificationPipeline, metaclass=ABCMeta):
     def __init__(self, userId="anonymous"):
         self.model = None
         self.learner = None
@@ -84,7 +84,7 @@ def load_default_init(self):
         with open(MODEL_PATH_TEMPLATE.format("default"), 'rb') as pf:
             self.learner, self.model, self.featurizer = pickle.load(pf)
 
-    def predict_text(self, input_txt):
+    def simplify_text(self, input_txt):
         original = []
         simplified = []
         X, parses = self.featurizer.transform_plain(input_txt)
@@ -123,7 +123,7 @@ def predict_text(self, input_txt):
         return original, simplified
 
 
-class ChainCRFClassifier(PystructClassifier):
+class ChainCRFClassifier(PystructSimplificationPipeline):
 
     def fresh_train(self, x, y, iterations=10):
         self.model = ChainCRF(inference_method="max-product")
@@ -146,7 +146,7 @@ def check_featurizer_set(self):
     #     self.fresh_train(x, y, iterations=iterations)
 
 
-class EdgeCRFClassifier(PystructClassifier):
+class EdgeCRFClassifier(PystructSimplificationPipeline):
 
     def fresh_train(self, x, y, iterations=10, decay_rate=1):
         self.model = EdgeFeatureGraphCRF(inference_method="max-product")
@@ -169,7 +169,7 @@ def check_featurizer_set(self):
     #     self.fresh_train(x, y, iterations=iterations)
 
 
-class AveragedPerceptron(Classifier):
+class AveragedPerceptron(SimplificationPipeline):
     """
     An averaged perceptron, as implemented by Matthew Honnibal.
 
@@ -246,7 +246,7 @@ def load(self, path):
         return None
 
 
-class OnlineStructuredPerceptron(Classifier):
+class OnlineStructuredPerceptron(SimplificationPipeline):
     """Implements a first order CRF"""
 
     def __init__(self,
@@ -397,7 +397,7 @@ def compute_scores(self, sequence):
     def predict(self, x):
         return self.viterbi_decode(x)[0]
 
-    def predict_text(self, txt):
+    def simplify_text(self, txt):
         # TODO
         pass
 
diff --git a/lexi/core/simplification/util.py b/lexi/core/simplification/util.py
new file mode 100644
index 0000000..eccc96c
--- /dev/null
+++ b/lexi/core/simplification/util.py
@@ -0,0 +1,61 @@
+import gensim
+import numpy as np
+
+
+class W2VModelEnsemble:
+
+    def __init__(self, models):
+        self.models = models
+
+    def most_similar(self, target, min_similarity=0.5, topn=10):
+
+        all_similar_words = set()
+        for model in self.models:
+            if target in model:
+                all_similar_words.update([w for w, sim in
+                                          model.most_similar(target, topn=topn)
+                                          if sim > min_similarity])
+        candidate_mean_scores = []
+        for w in all_similar_words:
+            mean_score = np.mean([model.similarity(target, w)
+                                  for model in self.models
+                                  if w in model and target in model])
+            candidate_mean_scores.append((w, mean_score))
+
+        # sort
+        most_similar = sorted(candidate_mean_scores, key=lambda x: x[1],
+                              reverse=True)
+        # select top n
+        return most_similar[:topn]
+
+    def similarity(self, w1, w2):
+        return np.mean([model.similarity(w1, w2) for model in self.models])
+
+
+def make_synonyms_dict(synonyms_file):
+    """
+
+    :param synonyms_file:
+    :return:
+    """
+    from collections import defaultdict
+    words2synonyms = defaultdict(set)
+    for line in open(synonyms_file):
+        tgt, syns = line.strip().split("\t", 1)
+        words2synonyms[tgt].update(syns.split(";"))
+    return words2synonyms
+
+
+def parse_embeddings(embeddings_files):
+    individual_models = []
+    for model_file in embeddings_files:
+        try:
+            _model = gensim.models.KeyedVectors.load_word2vec_format(
+                model_file, binary=True, unicode_errors='ignore')
+        except UnicodeDecodeError:
+            try:
+                _model = gensim.models.KeyedVectors.load(model_file)
+            except:
+                continue
+        individual_models.append(_model)
+    return W2VModelEnsemble(individual_models)
diff --git a/lexi/lib/__init__.py b/lexi/lib/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/lexi/lib/lexenstein/__init__.py b/lexi/lib/lexenstein/__init__.py
deleted file mode 100755
index e69de29..0000000
diff --git a/lexi/lib/lexenstein/evaluators.py b/lexi/lib/lexenstein/evaluators.py
deleted file mode 100755
index e54c7dc..0000000
--- a/lexi/lib/lexenstein/evaluators.py
+++ /dev/null
@@ -1,572 +0,0 @@
-from scipy.stats import *
-
-class IdentifierEvaluator:
-
-	def evaluateIdentifier(self, cwictor_corpus, predicted_labels):
-		"""
-		Performs an intrinsic evaluation of a Complex Word Identification approach.
-	
-		@param cwictor_corpus: Path to a training corpus in CWICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param predicted_labels: A vector containing the predicted binary labels of each instance in the CWICTOR corpus.
-		@return: Accuracy, Precision, Recall and the F-score between Accuracy and Recall for the substitutions provided as input with respect to the gold-standard in the VICTOR corpus.
-		For more information on how the metrics are calculated, please refer to the LEXenstein Manual.
-		"""
-
-		gold = [int(line.strip().split('\t')[3]) for line in open(cwictor_corpus)]
-		
-		#Initialize variables:
-		accuracyc = 0.0
-		accuracyt = 0.0
-		precisionc = 0.0
-		precisiont = 0.0
-		recallc = 0.0
-		recallt = 0.0
-		
-		#Calculate measures:
-		for i in range(0, len(gold)):
-			gold_label = gold[i]
-			predicted_label = predicted_labels[i]
-			if gold_label==predicted_label:
-				accuracyc += 1
-				if gold_label==1:
-					recallc += 1
-					precisionc += 1
-			if gold_label==1:
-				recallt += 1
-			if predicted_label==1:
-				precisiont += 1
-			accuracyt += 1
-
-		try:
-			accuracy = accuracyc / accuracyt
-		except ZeroDivisionError:
-			accuracy = 0
-		try:
-			precision = precisionc / precisiont
-		except ZeroDivisionError:
-			precision = 0
-		try:
-			recall = recallc / recallt
-		except ZeroDivisionError:
-			recall = 0
-		fmean = 0
-		gmean = 0
-		
-		try:
-			fmean = 2 * (precision * recall) / (precision + recall)
-			gmean = 2 * (accuracy * recall) / (accuracy + recall)
-		except ZeroDivisionError:
-			fmean = 0
-			gmean = 0
-		
-		#Return measures:
-		return accuracy, precision, recall, fmean, gmean
-		
-class GeneratorEvaluator:
-
-	def evaluateGenerator(self, victor_corpus, substitutions):
-		"""
-		Performs an intrinsic evaluation of a Substitution Generation approach.
-	
-		@param victor_corpus: Path to a training corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param substitutions: A dictionary that assigns target complex words to sets of candidate substitutions.
-		Example: substitutions['perched'] = {'sat', 'roosted'}
-		@return: Values for Potential, Precision, Recall and F-measure for the substitutions provided as input with respect to the gold-standard in the VICTOR corpus.
-		For more information on how the metrics are calculated, please refer to the LEXenstein Manual.
-		"""
-		
-		#Initialize variables:
-		potentialc = 0
-		potentialt = 0
-		precisionc = 0
-		precisiont = 0
-		recallt = 0
-		
-		#Calculate measures:
-		f = open(victor_corpus)
-		for line in f:
-			data = line.strip().split('\t')
-			target = data[1].strip()
-			items = data[3:len(data)]
-			candidates = set([item.strip().split(':')[1].strip() for item in items])
-			if target in substitutions:
-				overlap = candidates.intersection(set(substitutions[target]))
-				precisionc += len(overlap)
-				if len(overlap)>0:
-					potentialc += 1
-				precisiont += len(substitutions[target])
-			potentialt += 1
-			recallt += len(candidates)
-		f.close()
-		
-		potential = float(potentialc)/float(potentialt)
-		precision = float(precisionc)/float(precisiont)
-		recall = float(precisionc)/float(recallt)
-		fmean = 0.0
-		if precision==0.0 and recall==0.0:
-			fmean = 0.0
-		else:
-			fmean = 2*(precision*recall)/(precision+recall)
-			
-		#Return measures:
-		return potential, precision, recall, fmean
-
-class SelectorEvaluator:
-
-	def evaluateSelector(self, victor_corpus, substitutions):
-		"""
-		Performs an intrinsic evaluation of a Substitution Selection approach.
-	
-		@param victor_corpus: Path to a training corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param substitutions: A vector of size N, containing a set of selected substitutions for each instance in the VICTOR corpus.
-		@return: Values for Potential, Recall, Precision and F-measure for the substitutions provided as input with respect to the gold-standard in the VICTOR corpus.
-		For more information on how the metrics are calculated, please refer to the LEXenstein Manual.
-		"""
-	
-		#Initialize variables:
-		potentialc = 0
-		potentialt = 0
-		precisionc = 0
-		precisiont = 0
-		recallt = 0
-		
-		#Calculate measures:
-		f = open(victor_corpus)
-		index = -1
-		for line in f:
-			index += 1
-		
-			data = line.strip().split('\t')
-			target = data[1].strip()
-			items = data[3:len(data)]
-			candidates = set([item.strip().split(':')[1].strip() for item in items])
-			
-			selected = substitutions[index]
-			if len(selected)>0:
-				overlap = candidates.intersection(set(selected))
-				precisionc += len(overlap)
-				if len(overlap)>0:
-					potentialc += 1
-			potentialt += 1
-			precisiont += len(selected)
-			recallt += len(candidates)
-		f.close()
-
-		potential = float(potentialc)/float(potentialt)
-		precision = float(precisionc)/float(precisiont)
-		recall = float(precisionc)/float(recallt)
-		fmean = 0.0
-		if precision==0.0 and recall==0.0:
-			fmean = 0.0
-		else:
-			fmean = 2*(precision*recall)/(precision+recall)
-			
-		#Return measures:
-		return potential, precision, recall, fmean
-
-class RankerEvaluator:
-
-	def evaluateRanker(self, victor_corpus, rankings):
-		"""
-		Performs an intrinsic evaluation of a Substitution Ranking approach.
-	
-		@param victor_corpus: Path to a training corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param rankings: A vector of size N, containing a set of ranked substitutions for each instance in the VICTOR corpus.
-		@return: Values for TRank-at-1/2/3, Recall-at-1/2/3, Spearman and Pearson correlation for the substitutions provided as input with respect to the gold-standard in the VICTOR corpus.
-		For more information on how the metrics are calculated, please refer to the LEXenstein Manual.
-		"""
-		
-		#Initialize variables:
-		total1 = 0
-		total2 = 0
-		total3 = 0
-		corrects1 = 0
-		corrects2 = 0
-		corrects3 = 0
-		recall1 = 0
-		recall2 = 0
-		recall3 = 0
-		trecall1 = 0
-		trecall2 = 0
-		trecall3 = 0
-
-		#Read data:
-		index = -1
-		f = open(victor_corpus)
-		all_gold = []
-		all_ranks = []
-		for data in f:
-			index += 1
-			line = data.strip().split('\t')
-			gold_rankings = {}
-			for subst in line[3:len(line)]:
-				subst_data = subst.strip().split(':')
-				word = subst_data[1].strip()
-				ranking = int(subst_data[0].strip())
-				gold_rankings[word] = ranking
-			ranked_candidates = rankings[index]
-
-			for i in range(0, len(ranked_candidates)):
-				word = ranked_candidates[i]
-				all_gold.append(gold_rankings[word])
-				all_ranks.append(i)
-
-			first = gold_rankings[ranked_candidates[0]]
-
-			#Get recall sets:
-			set1, set2, set3 = self.getRecallSets(line[3:len(line)])
-			rankedset1 = set([])
-			rankedset2 = set([])
-			rankedset3 = set([])
-						
-			#Calculate TRank 1:
-			if first==1:
-				rankedset1 = set([ranked_candidates[0]])
-				corrects1 += 1
-			recall1 += len(rankedset1.intersection(set1))
-			trecall1 += len(set1)
-			total1 += 1
-
-			#Calculate TRank 2:
-			if len(list(gold_rankings.keys()))>2:
-				rankedset2 = rankedset1.union(set([ranked_candidates[1]]))
-				recall2 += len(rankedset2.intersection(set2))
-				trecall2 += len(set2)
-				if first<=2:
-					corrects2 += 1
-				total2 += 1
-						
-			#Calculate TRank 3:
-			if len(list(gold_rankings.keys()))>3:
-				rankedset3 = rankedset2.union(set([ranked_candidates[2]]))
-				recall3 += len(rankedset3.intersection(set3))
-				trecall3 += len(set3)
-				if first<=3:
-					corrects3 += 1
-				total3 += 1
-
-		S, p = spearmanr(all_ranks, all_gold)
-		P = pearsonr(all_ranks, all_gold)
-
-		return float(corrects1)/float(total1), float(corrects2)/float(total2), float(corrects3)/float(total3), float(recall1)/float(trecall1), float(recall2)/float(trecall2), float(recall3)/float(trecall3), S, P[0]
-		
-	def getRecallSets(self, substs):
-		result1 = set([])
-		result2 = set([])
-		result3 = set([])
-		for subst in substs:
-			datasubst = subst.strip().split(':')
-			word = datasubst[1].strip()
-			index = datasubst[0].strip()
-			if index=="1":
-				result1.add(word)
-				result2.add(word)
-				result3.add(word)
-			elif index=="2":
-				result2.add(word)
-				result3.add(word)
-			elif index=="3":
-				result3.add(word)
-		return result1, result2, result3
-
-class PipelineEvaluator:
-
-	def evaluatePipeline(self, victor_corpus, rankings):
-		"""
-		Performs a round-trip evaluation of a Substitution Generation, Selection and Ranking approach combined.
-	
-		@param victor_corpus: Path to a training corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param rankings: A list of ranked candidates for each instance in the VICTOR corpus, from simplest to most complex.
-		One should produce candidates with a Substitution Generation approach, select them for a given VICTOR corpus with a Substitution Selection approach, then rank them with a Substitution Ranking approach.
-		@return: Values for Precision, Accuracy and Changed Proportion for the substitutions provided as input with respect to the gold-standard in the VICTOR corpus.
-		For more information on how the metrics are calculated, please refer to the LEXenstein Manual.
-		"""
-	
-		#Initialize counting variables:
-		total = 0
-		totalc = 0
-		accurate = 0
-		precise = 0
-		
-		#Read victor corpus:
-		f = open(victor_corpus)
-		for i in range(0, len(rankings)):
-			#Get gold candidates:
-			data = f.readline().strip().split('\t')
-			target = data[1].strip()
-			data = data[3:len(data)]
-			gold_subs = set([item.strip().split(':')[1].strip() for item in data])
-			
-			#Get highest ranked candidate:
-			first = rankings[i][0]
-			
-			#Check if it is in gold candidates:
-			total += 1
-			if first!=target:
-				totalc += 1
-				if first in gold_subs:
-					accurate += 1
-					precise += 1
-			else:
-				precise += 1
-		
-		#Return metrics:
-		return float(precise)/float(total), float(accurate)/float(total), float(totalc)/float(total)
-		
-class PLUMBErr:
-
-	def __init__(self, dataset, complex):
-		"""
-		Creates a PLUMBErr error categorizer.
-		This class implements the strategy introduced in:
-		Paetzold, G. H.; Specia, L. PLUMBErr: An Automatic Error Identification Framework for Lexical Simplification. Proceedings of the 1st QATS. 2016.
-		One can download BenchLS (dataset) and NNSVocab (complex) from http://ghpaetzold.github.io/data/PLUMBErr.zip
-	
-		@param dataset: Path to a data in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param complex: Path to a file containing complex words.
-		Each line of the file must contain a single word.
-		"""
-		self.data = [line.strip().split('\t') for line in open(dataset)]
-		self.complex = set([line.strip() for line in open(complex)])
-		
-	def cumulativeAnalysis(self, identified, selected, ranked):
-		"""
-		Performs the cumulative error identification analysis of a simplifier.
-		In a cumulative analysis, the errors made during Complex Word Identification are carried onto the following steps of the pipeline.
-	
-		@param identified: A vector containing one binary value (0 for simple, 1 for complex) for each word in the dataset.
-		To produce the vector, one can run a Complex Word Identification approach from LEXenstein over the dataset.
-		@param selected: A vector containing the candidates selected for each instance in the dataset.
-		To produce the vector, one can pair a Substitution Generation and a Substitution Selection approach from LEXenstein.
-		@param ranked: A vector containing the selected candidates ranked in order of simplicity.
-		To produce the vector, one can run a Substitution Ranking approach from LEXenstein over the selected candidates provided.
-		"""
-		
-		#Initialize report:
-		report = []
-		
-		#Create CWI gold-standard:
-		gold = []
-		for line in self.data:
-			if line[1] in self.complex:
-				gold.append(1)
-			else:
-				gold.append(0)
-				
-		#Find errors of type 2:
-		error2a = 0
-		error2b = 0
-		for i in range(0, len(gold)):
-			errors = set([])
-			g = gold[i]
-			p = identified[i]
-			if p==0 and g==1:
-				error2a += 1
-				errors.add('2A')
-			elif p==1 and g==0:
-				error2b += 1
-				errors.add('2B')
-			report.append(errors)
-				
-		#Find errors of type 3:
-		error3a = 0
-		error3b = 0
-		
-		goldcands = []
-		simplecands = []
-		for line in self.data:
-				cs = set([cand.strip().split(':')[1].strip() for cand in line[3:]])
-				goldcands.append(cs)
-				simplecands.append(cs.difference(self.complex))
-		
-		cands = []
-		for vec in selected:
-			cands.append(set(vec))
-		
-		control = []
-		for i in range(0, len(self.data)):
-			gold_label = gold[i]
-			pred_label = identified[i]
-			ac = goldcands[i]
-			sc = simplecands[i]
-			cs = cands[i]
-			if gold_label==0:
-				sc = set([])
-			else:
-				if pred_label==0:
-					cs = set([])
-			ainter = ac.intersection(cs)
-			sinter = sc.intersection(cs)
-
-			if gold_label==1:
-				if len(ainter)==0:
-					error3a += 1
-					report[i].add('3A')
-					control.append('Error')
-				elif len(sinter)==0:
-					error3b += 1
-					report[i].add('3B')
-					control.append('Error')
-				else:
-					control.append('Ok')
-			else:
-				control.append('Ignore')
-		
-		#Find errors of type 4 and 5:
-		error4 = 0
-		error5 = 0
-		noerror = 0
-		for i in range(0, len(self.data)):
-			gold_label = gold[i]
-			pred_label = identified[i]
-			ac = goldcands[i]
-			sc = simplecands[i]
-			cs = ranked[i]
-			if gold_label==0:
-				sc = set([])
-			else:
-				if pred_label==0:
-					cs = set([])
-
-			sub = ''
-			if len(cs)>0:
-				sub = cs[0]
-
-			if control[i]=='Ok':
-				if sub not in ac:
-					error4 += 1
-					report[i].add('4')
-				elif sub not in sc:
-					error5 += 1
-					report[i].add('5')
-				else:
-					noerror += 1
-					report[i].add('1')
-					
-		#Create error count map:
-		counts = {}
-		counts['2A'] = error2a
-		counts['2B'] = error2b
-		counts['3A'] = error3a
-		counts['3B'] = error3b
-		counts['4'] = error4
-		counts['5'] = error5
-		counts['1'] = noerror
-		
-		return report, counts
-		
-	def nonCumulativeAnalysis(self, identified, selected, ranked):
-		"""
-		Performs the non-cumulative error identification analysis of a simplifier.
-		In a non-cumulative analysis, the errors made during Complex Word Identification are not carried onto the following steps of the pipeline.
-	
-		@param identified: A vector containing one binary value (0 for simple, 1 for complex) for each word in the dataset.
-		To produce the vector, one can run a Complex Word Identification approach from LEXenstein over the dataset.
-		@param selected: A vector containing the candidates selected for each instance in the dataset.
-		To produce the vector, one can pair a Substitution Generation and a Substitution Selection approach from LEXenstein.
-		@param ranked: A vector containing the selected candidates ranked in order of simplicity.
-		To produce the vector, one can run a Substitution Ranking approach from LEXenstein over the selected candidates provided.
-		@return: A report vector containing the errors made in each instance of the dataset, as well as a map containing total error counts for the entire dataset.
-		"""
-		
-		#Initialize report:
-		report = []
-		
-		#Create CWI gold-standard:
-		gold = []
-		for line in self.data:
-			if line[1] in self.complex:
-				gold.append(1)
-			else:
-				gold.append(0)
-				
-		#Find errors of type 2:
-		error2a = 0
-		error2b = 0
-		for i in range(0, len(gold)):
-			errors = set([])
-			g = gold[i]
-			p = identified[i]
-			if p==0 and g==1:
-				error2a += 1
-				errors.add('2A')
-			elif p==1 and g==0:
-				error2b += 1
-				errors.add('2B')
-			report.append(errors)
-				
-		#Find errors of type 3:
-		error3a = 0
-		error3b = 0
-		
-		goldcands = []
-		simplecands = []
-		for line in self.data:
-				cs = set([cand.strip().split(':')[1].strip() for cand in line[3:]])
-				goldcands.append(cs)
-				simplecands.append(cs.difference(self.complex))
-		
-		cands = []
-		for vec in selected:
-			cands.append(set(vec))
-		
-		for i in range(0, len(self.data)):
-			gold_label = gold[i]
-			pred_label = identified[i]
-			ac = goldcands[i]
-			sc = simplecands[i]
-			cs = cands[i]
-			ainter = ac.intersection(cs)
-			sinter = sc.intersection(cs)
-
-			if gold_label==1:
-				if len(ainter)==0:
-					error3a += 1
-					report[i].add('3A')
-				elif len(sinter)==0:
-					error3b += 1
-					report[i].add('3B')
-		
-		#Find errors of type 4 and 5:
-		error4 = 0
-		error5 = 0
-		noerror = 0
-		for i in range(0, len(self.data)):
-			gold_label = gold[i]
-			pred_label = identified[i]
-			ac = goldcands[i]
-			sc = simplecands[i]
-			cs = ranked[i]
-
-			sub = ''
-			if len(cs)>0:
-				sub = cs[0]
-
-			if gold_label==1:
-				if sub not in ac:
-					error4 += 1
-					report[i].add('4')
-				elif sub not in sc:
-					error5 += 1
-					report[i].add('5')
-				else:
-					noerror += 1
-					report[i].add('1')
-		
-		#Create error count map:
-		counts = {}
-		counts['2A'] = error2a
-		counts['2B'] = error2b
-		counts['3A'] = error3a
-		counts['3B'] = error3b
-		counts['4'] = error4
-		counts['5'] = error5
-		counts['1'] = noerror
-		
-		return report, counts
diff --git a/lexi/lib/lexenstein/features.py b/lexi/lib/lexenstein/features.py
deleted file mode 100755
index a7363bc..0000000
--- a/lexi/lib/lexenstein/features.py
+++ /dev/null
@@ -1,3547 +0,0 @@
-from .util import getGeneralisedPOS, dependencyParseSentences
-from nltk.corpus import wordnet as wn
-import kenlm
-import math
-import gensim
-from nltk.tag.stanford import StanfordPOSTagger
-from nltk.parse.stanford import StanfordParser
-import os
-import pickle
-from sklearn.preprocessing import normalize
-import numpy
-import shelve
-import urllib.request, urllib.error, urllib.parse
-import json
-import re
-import logging
-
-logger = logging.getLogger('lexi')
-
-class FeatureEstimator:
-
-    def __init__(self, norm=False):
-        """
-        Creates an instance of the FeatureEstimator class.
-
-        @param norm: Boolean variable that determines whether or not feature values should be normalized.
-        """
-        #List of features to be calculated:
-        self.features = []
-        #List of identifiers of features to be calculated:
-        self.identifiers = []
-        #Normalization parameter:
-        self.norm = norm
-        #Persistent resource list:
-        self.resources = {}
-        #One-run resource list:
-        self.temp_resources = {}
-
-    def calculateFeatures(self, corpus, format='victor', input='file'):
-        """
-        Calculate the selected features over the candidates of a VICTOR or CWICTOR corpus.
-
-        @param corpus: Path to a corpus in the VICTOR or CWICTOR format.
-        For more information about the input's format, refer to the LEXenstein Manual.
-        @param format: Input format.
-        Values available: victor, cwictor.
-        @param input: Type of input provided.
-        Values available: file, text.
-        @return: Returns a MxN matrix, where M is the number of substitutions of all instances in the VICTOR corpus, and N the number of selected features.
-        """
-        data = []
-        if format.strip().lower()=='victor':
-            if input=='file':
-                data = [line.strip().split('\t') for line in open(corpus)]
-            elif input=='text':
-                data = [line.strip().split('\t') for line in corpus.split('\n')]
-            else:
-                logger.debug('Unrecognized format: must be file or text.')
-        elif format.strip().lower()=='cwictor':
-            if input=='file':
-                f = open(corpus)
-                for line in f:
-                    line_data = line.strip().split('\t')
-                    data.append([line_data[0].strip(), line_data[1].strip(), line_data[2].strip(), '0:'+line_data[1].strip()])
-            elif input=='text':
-                for line in corpus.split('\n'):
-                    line_data = line.strip().split('\t')
-                    data.append([line_data[0].strip(), line_data[1].strip(), line_data[2].strip(), '0:'+line_data[1].strip()])
-            else:
-                logger.debug('Unrecognized format: must be file or text.')
-        else:
-            logger.debug('Unknown input format during feature estimation!')
-            return []
-
-        values = []
-        for feature in self.features:
-            values.append(feature[0].__call__(data, feature[1]))
-
-        result = []
-        index = 0
-        for line in data:
-            for i in range(3, len(line)):
-                vector = self.generateVector(values, index)
-                result.append(vector)
-                index += 1
-
-        #Normalize if required:
-        if self.norm:
-            result = normalize(result, axis=0)
-
-        #Clear one-run resources:
-        self.temp_resources = {}
-
-        return result
-
-    def calculateInstanceFeatures(self, sent, target, head, candidate):
-        """
-        Calculate the selected features over an instance of a VICTOR corpus.
-
-        @param sent: Sentence containing a target complex word.
-        @param target: Target complex sentence to be simplified.
-        @param head: Position of target complex word in sentence.
-        @param candidate: Candidate substitution.
-        @return: Returns a vector containing the feature values of VICTOR instance.
-        """
-
-        data = [[sent, target, head, '0:'+candidate]]
-
-        values = []
-        for feature in self.features:
-            values.append(feature[0].__call__(data, feature[1]))
-        vector = self.generateVector(values, 0)
-        return vector
-
-    def generateVector(self, feature_vector, index):
-        result = []
-        for feature in feature_vector:
-            if not isinstance(feature[index], list):
-                result.append(feature[index])
-            else:
-                result.extend(feature[index])
-        return result
-
-    def targetPOSTagProbability(self, data, args):
-        model = self.resources[args[0]]
-        tagger = self.resources[args[1]]
-        result = []
-
-        #Get tagged sentences:
-        tagged_sents = None
-        if 'tagged_sents' in self.temp_resources:
-            tagged_sents = self.temp_resources['tagged_sents']
-        else:
-            sentences = [l[0].strip().split(' ') for l in data]
-            tagged_sents = tagger.tag_sents(sentences)
-            self.temp_resources['tagged_sents'] = tagged_sents
-
-        for i in range(0, len(data)):
-            line = data[i]
-            target = line[1].strip().lower()
-            head = int(line[2].strip())
-            target_pos = tagged_sents[i][head][1]
-
-            for subst in line[3:len(line)]:
-                words = subst.strip().split(':')[1].strip()
-                probability = model[words].prob(target_pos)
-                result.append(probability)
-        return result
-
-    def minimumWordVectorSimilarityFeature(self, data, args):
-        model = self.resources[args[0]]
-        result = []
-        for line in data:
-            target = line[1].strip().lower().replace(' ', '_')
-            for subst in line[3:len(line)]:
-                words = subst.strip().split(':')[1].strip()
-                similarities = []
-                cand_size = 0
-                for word in words.split(' '):
-                    cand_size += 1
-                    try:
-                        similarities.append(model.similarity(target, word))
-                    except KeyError:
-                        try:
-                            similarities.append(model.similarity(target, word.lower()))
-                        except KeyError:
-                            pass
-                if len(similarities)>0:
-                    similarity = numpy.min(similarities)
-                    result.append(similarity)
-                else:
-                    result.append(0.0)
-        return result
-
-    def maximumWordVectorSimilarityFeature(self, data, args):
-        model = self.resources[args[0]]
-        result = []
-        for line in data:
-            target = line[1].strip().lower().replace(' ', '_')
-            for subst in line[3:len(line)]:
-                words = subst.strip().split(':')[1].strip()
-                similarities = []
-                cand_size = 0
-                for word in words.split(' '):
-                    cand_size += 1
-                    try:
-                        similarities.append(model.similarity(target, word))
-                    except KeyError:
-                        try:
-                            similarities.append(model.similarity(target, word.lower()))
-                        except KeyError:
-                            pass
-                if len(similarities)>0:
-                    similarity = numpy.max(similarities)
-                    result.append(similarity)
-                else:
-                    result.append(0.0)
-        return result
-
-    def averageWordVectorSimilarityFeature(self, data, args):
-        model = self.resources[args[0]]
-        result = []
-        for line in data:
-            target = line[1].strip().lower().replace(' ', '_')
-            for subst in line[3:len(line)]:
-                words = subst.strip().split(':')[1].strip()
-                similarities = []
-                cand_size = 0
-                for word in words.split(' '):
-                    cand_size += 1
-                    try:
-                        similarities.append(model.similarity(target, word))
-                    except KeyError:
-                        try:
-                            similarities.append(model.similarity(target, word.lower()))
-                        except KeyError:
-                            pass
-                if len(similarities)>0:
-                    similarity = numpy.mean(similarities)
-                    result.append(similarity)
-                else:
-                    result.append(0.0)
-        return result
-
-    def wordVectorSimilarityFeature(self, data, args):
-        model = self.resources[args[0]]
-        result = []
-        for line in data:
-            target = line[1].strip().lower()
-            for subst in line[3:len(line)]:
-                words = subst.strip().split(':')[1].strip()
-                similarity = 0.0
-                cand_size = 0
-                for word in words.split(' '):
-                    cand_size += 1
-                    try:
-                        similarity += model.similarity(target, word)
-                    except KeyError:
-                        try:
-                            similarity += model.similarity(target, word.lower())
-                        except KeyError:
-                            pass
-                similarity /= cand_size
-                result.append(similarity)
-        return result
-
-    def taggedWordVectorSimilarityFeature(self, data, args):
-        result = []
-
-        model = self.resources[args[0]]
-        tagger = self.resources[args[1]]
-        pos_type = args[2]
-
-        #Get tagged sentences:
-        tagged_sents = None
-        if 'tagged_sents' in self.temp_resources:
-            tagged_sents = self.temp_resources['tagged_sents']
-        else:
-            sentences = [l[0].strip().split(' ') for l in data]
-            tagged_sents = tagger.tag_sents(sentences)
-            self.temp_resources['tagged_sents'] = tagged_sents
-
-        #Transform them to the right format:
-        if pos_type=='paetzold':
-            transformed = []
-            for sent in tagged_sents:
-                tokens = []
-                for token in sent:
-                    tokens.append((token[0], getGeneralisedPOS(token[1])))
-                transformed.append(tokens)
-            tagged_sents = transformed
-
-        for i in range(0, len(data)):
-            line = data[i]
-            target = line[1].strip().lower()
-            head = int(line[2].strip())
-            target_pos = tagged_sents[i][head][1]
-            for subst in line[3:len(line)]:
-                words = subst.strip().split(':')[1].strip()
-                similarity = 0.0
-                cand_size = 0
-                for word in words.split(' '):
-                    cand_size += 1
-                    try:
-                        similarity += model.similarity(target+'|||'+target_pos, word+'|||'+target_pos)
-                    except KeyError:
-                        try:
-                            similarity += model.similarity(target+'|||'+target_pos, word.lower()+'|||'+target_pos)
-                        except KeyError:
-                            pass
-                similarity /= cand_size
-                result.append(similarity)
-        return result
-
-    def wordVectorValuesFeature(self, data, args):
-        model = self.resources[args[0]]
-        size = args[1]
-        result = []
-        for line in data:
-            target = line[1].strip().lower()
-            for subst in line[3:len(line)]:
-                words = subst.strip().split(':')[1].strip()
-                word_vector = numpy.zeros(size)
-                for word in words.split(' '):
-                    try:
-                        word_vector = numpy.add(word_vector, model[words])
-                    except KeyError:
-                        pass
-                result.append(word_vector)
-        for i in range(0, len(result)):
-            result[i] = result[i].tolist()
-        return result
-
-    def translationProbabilityFeature(self, data, args):
-        probabilities = self.resources[args[0]]
-        result = []
-        for line in data:
-            target = line[1].strip()
-            for subst in line[3:len(line)]:
-                words = subst.strip().split(':')[1].strip()
-                prob = -9999999999
-                for word in words.split(' '):
-                    if target+'\t'+word in probabilities:
-                        p = probabilities[target+'\t'+word]
-                        if p>prob:
-                            prob = p
-                result.append(prob)
-        return result
-
-    def lexiconFeature(self, data, args):
-        path = args[0]
-        result = []
-        basics = self.resources[path]
-        for line in data:
-            for subst in line[3:len(line)]:
-                words = subst.strip().split(':')[1].strip()
-                basicCount = 0
-                for word in words.split(' '):
-                    if word.strip() in basics:
-                        basicCount += 1
-                if basicCount==len(words.split(' ')):
-                    result.append(1.0)
-                else:
-                    result.append(0.0)
-        return result
-
-    def lengthFeature(self, data, args):
-        result = []
-        for line in data:
-            for subst in line[3:len(line)]:
-                word = subst.strip().split(':')[1].strip()
-                result.append(len(word))
-        return result
-
-    def numberOfTokens(self, data, args):
-        result = []
-        for line in data:
-            for subst in line[3:len(line)]:
-                word = subst.strip().split(':')[1].strip().split(' ')
-                result.append(len(word))
-        return result
-
-    def syllableFeature(self, data, args):
-        mat = args[0]
-        #Create the input for the Java application:
-        input = []
-        for line in data:
-            for subst in line[3:len(line)]:
-                word = subst.strip().split(':')[1].strip()
-                input.append(word)
-
-        #Run the syllable splitter:
-        outr = mat.splitSyllables(input)
-
-        #Decode output:
-        out = []
-        for o in outr:
-            out.append(o.decode("latin1").replace(' ', '-'))
-
-        #Calculate number of syllables
-        result = []
-        for instance in out:
-            if len(instance.strip())>0:
-                result.append(len(instance.split('-')))
-        return result
-
-    def collocationalFeature(self, data, args):
-        lm = args[0]
-        spanl = args[1]
-        spanr = args[2]
-        result = []
-        model = self.resources[lm]
-        for line in data:
-            sent = line[0].strip().split(' ')
-            target = line[1]
-            head = int(line[2])
-            spanlv = list(range(0, spanl+1))
-            spanrv = list(range(0, spanr+1))
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                values = []
-                for span1 in spanlv:
-                    for span2 in spanrv:
-                        ngram, bosv, eosv = self.getNgram(word, sent, head, span1, span2)
-                        # aux = model.score(ngram, bos=bosv, eos=eosv)
-                        aux = model.score(ngram)
-                        values.append(aux)
-                result.append(values)
-        return result
-
-    def frequencyCollocationalFeature(self, data, args):
-        ngrams = args[0]
-        spanl = args[1]
-        spanr = args[2]
-        result = []
-        counts = self.resources[ngrams]
-        for line in data:
-            sent = line[0].strip().split(' ')
-            target = line[1]
-            head = int(line[2])
-            spanlv = list(range(0, spanl+1))
-            spanrv = list(range(0, spanr+1))
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                values = []
-                for span1 in spanlv:
-                    for span2 in spanrv:
-                        ngram, bosv, eosv = self.getNgram(word, sent, head, span1, span2)
-                        if ngram in counts:
-                            values.append(counts[ngram])
-                        else:
-                            values.append(0.0)
-                result.append(values)
-        return result
-
-    def taggedFrequencyCollocationalFeature(self, data, args):
-        counts = self.resources[args[0]]
-        spanl = args[1]
-        spanr = args[2]
-        tagger = self.resources[args[3]]
-        pos_type = args[4]
-
-        #Get tagged sentences:
-        tagged_sents = None
-        if 'tagged_sents' in self.temp_resources:
-            tagged_sents = self.temp_resources['tagged_sents']
-        else:
-            sentences = [l[0].strip().split(' ') for l in data]
-            tagged_sents = tagger.tag_sents(sentences)
-            self.temp_resources['tagged_sents'] = tagged_sents
-
-        #Transform them to the right format:
-        if pos_type=='paetzold':
-            transformed = []
-            for sent in tagged_sents:
-                tokens = []
-                for token in sent:
-                    tokens.append((token[0], getGeneralisedPOS(token[1])))
-                transformed.append(tokens)
-            tagged_sents = transformed
-
-        result = []
-        for i in range(0, len(data)):
-            line = data[i]
-            sent = ['<s>'] + [tokendata[1] for tokendata in tagged_sents[i]] + ['</s>']
-            target = line[1]
-            head = int(line[2])+1
-            spanlv = list(range(0, spanl+1))
-            spanrv = list(range(0, spanr+1))
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                values = []
-                for span1 in spanlv:
-                    for span2 in spanrv:
-                        ngram, bosv, eosv = self.getNgram(word, sent, head, span1, span2)
-                        if ngram in counts:
-                            values.append(counts[ngram])
-                        else:
-                            values.append(0.0)
-                result.append(values)
-        return result
-
-    def binaryTaggedFrequencyCollocationalFeature(self, data, args):
-        counts = self.resources[args[0]]
-        spanl = args[1]
-        spanr = args[2]
-        tagger = self.resources[args[3]]
-        pos_type = args[4]
-
-        #Get tagged sentences:
-        tagged_sents = None
-        if 'tagged_sents' in self.temp_resources:
-            tagged_sents = self.temp_resources['tagged_sents']
-        else:
-            sentences = [l[0].strip().split(' ') for l in data]
-            tagged_sents = tagger.tag_sents(sentences)
-            self.temp_resources['tagged_sents'] = tagged_sents
-
-        #Transform them to the right format:
-        if pos_type=='paetzold':
-            transformed = []
-            for sent in tagged_sents:
-                tokens = []
-                for token in sent:
-                    tokens.append((token[0], getGeneralisedPOS(token[1])))
-                transformed.append(tokens)
-            tagged_sents = transformed
-
-        result = []
-        for i in range(0, len(data)):
-            line = data[i]
-            sent = ['<s>'] + [tokendata[1] for tokendata in tagged_sents[i]] + ['</s>']
-            target = line[1]
-            head = int(line[2])+1
-            spanlv = list(range(0, spanl+1))
-            spanrv = list(range(0, spanr+1))
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                values = []
-                for span1 in spanlv:
-                    for span2 in spanrv:
-                        ngram, bosv, eosv = self.getNgram(word, sent, head, span1, span2)
-                        if ngram in counts:
-                            values.append(1.0)
-                        else:
-                            values.append(0.0)
-                result.append(values)
-        return result
-
-    def popCollocationalFeature(self, data, args):
-        lm = args[0]
-        spanl = args[1]
-        spanr = args[2]
-        result = []
-        model = self.resources[lm]
-        for line in data:
-            sent = line[0]
-            target = line[1]
-            head = int(line[2])
-            spanlv = list(range(0, spanl+1))
-            spanrv = list(range(0, spanr+1))
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                values = []
-                for span1 in spanlv:
-                    for span2 in spanrv:
-                        ngrams = self.getPopNgrams(word, sent, head, span1, span2)
-                        maxscore = -999999
-                        for ngram in ngrams:
-                            # aux = model.score(ngram[0], bos=ngram[1], eos=ngram[2])
-                            aux = model.score(ngram[0])
-                            if aux>maxscore:
-                                maxscore = aux
-                        values.append(maxscore)
-                result.append(values)
-        return result
-
-    def ngramProbabilityFeature(self, data, args):
-        lm = args[0]
-        spanl = args[1]
-        spanr = args[2]
-        result = []
-        model = self.resources[lm]
-        for line in data:
-            sent = line[0].strip().split(' ')
-            target = line[1]
-            head = int(line[2])
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                ngram, bosv, eosv = self.getNgram(word, sent, head, spanl, spanr)
-                # prob = model.score(ngram, bos=bosv, eos=eosv)
-                prob = model.score(ngram)
-                result.append(prob)
-        return result
-
-    def averageTokenProbabilityFeature(self, data, args):
-        lm = args[0]
-        result = []
-        model = self.resources[lm]
-        for line in data:
-            sent = line[0].strip().split(' ')
-            target = line[1]
-            head = int(line[2])
-            for subst in line[3:len(line)]:
-                candidate = subst.split(':')[1].strip().split(' ')
-                probabilities = []
-                for token in candidate:
-                    ngram, bosv, eosv = self.getNgram(token, sent, head, 0, 0)
-                    prob = model.score(ngram, bos=bosv, eos=eosv)
-                    probabilities.append(prob)
-                result.append(numpy.mean(probabilities))
-        return result
-
-    def maximumTokenProbabilityFeature(self, data, args):
-        lm = args[0]
-        result = []
-        model = self.resources[lm]
-        for line in data:
-            sent = line[0].strip().split(' ')
-            target = line[1]
-            head = int(line[2])
-            for subst in line[3:len(line)]:
-                candidate = subst.split(':')[1].strip().split(' ')
-                probabilities = []
-                for token in candidate:
-                    ngram, bosv, eosv = self.getNgram(token, sent, head, 0, 0)
-                    # prob = model.score(ngram, bos=bosv, eos=eosv)
-                    prob = model.score(ngram)
-                    probabilities.append(prob)
-                result.append(numpy.max(probabilities))
-        return result
-
-    def minimumTokenProbabilityFeature(self, data, args):
-        lm = args[0]
-        result = []
-        model = self.resources[lm]
-        for line in data:
-            sent = line[0].strip().split(' ')
-            target = line[1]
-            head = int(line[2])
-            for subst in line[3:len(line)]:
-                candidate = subst.split(':')[1].strip().split(' ')
-                probabilities = []
-                for token in candidate:
-                    ngram, bosv, eosv = self.getNgram(token, sent, head, 0, 0)
-                    # prob = model.score(ngram, bos=bosv, eos=eosv)
-                    prob = model.score(ngram)
-                    probabilities.append(prob)
-                result.append(numpy.min(probabilities))
-        return result
-
-    def ngramFrequencyFeature(self, data, args):
-        ngrams = args[0]
-        spanl = args[1]
-        spanr = args[2]
-        result = []
-        counts = self.resources[ngrams]
-        for line in data:
-            sent = line[0].strip().split(' ')
-            target = line[1]
-            head = int(line[2])
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                ngram, bosv, eosv = self.getNgram(word, sent, head, spanl, spanr)
-                if ngram in counts:
-                    result.append(counts[ngram])
-                else:
-                    result.append(0.0)
-        return result
-
-    def binaryNgramFrequencyFeature(self, data, args):
-        ngrams = args[0]
-        spanl = args[1]
-        spanr = args[2]
-        result = []
-        counts = self.resources[ngrams]
-        for line in data:
-            sent = line[0].strip().split(' ')
-            target = line[1]
-            head = int(line[2])
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                ngram, bosv, eosv = self.getNgram(word, sent, head, spanl, spanr)
-                if ngram in counts:
-                    result.append(1.0)
-                else:
-                    result.append(0.0)
-        return result
-
-    def popNgramProbabilityFeature(self, data, args):
-        lm = args[0]
-        spanl = args[1]
-        spanr = args[2]
-        result = []
-        model = self.resources[lm]
-        for line in data:
-            sent = line[0]
-            target = line[1]
-            head = int(line[2])
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                ngrams = self.getPopNgrams(word, sent, head, spanl, spanl)
-                maxscore = -999999
-                for ngram in ngrams:
-                    # aux = model.score(ngram[0], bos=ngram[1], eos=ngram[2])
-                    aux = model.score(ngram[0])
-                    if aux>maxscore:
-                        maxscore = aux
-                result.append(maxscore)
-        return result
-
-    def popNgramFrequencyFeature(self, data, args):
-        ngrams = args[0]
-        spanl = args[1]
-        spanr = args[2]
-        result = []
-        counts = self.resources[ngrams]
-        for line in data:
-            sent = line[0].strip()
-            target = line[1]
-            head = int(line[2])
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                ngrams = self.getPopNgrams(word, sent, head, spanl, spanl)
-                maxscore = -999999
-                for ngram in ngrams:
-                    aux = 0.0
-                    if ngram[0] in counts:
-                        aux = counts[ngram[0]]
-
-                    if aux>maxscore:
-                        maxscore = aux
-                result.append(maxscore)
-
-        return result
-
-    def getNgram(self, cand, tokens, head, configl, configr):
-        if configl==0 and configr==0:
-            return cand, False, False
-        else:
-            result = ''
-            bosv = False
-            if max(0, head-configl)==0:
-                bosv = True
-            eosv = False
-            if min(len(tokens), head+configr+1)==len(tokens):
-                eosv = True
-            for i in range(max(0, head-configl), head):
-                result += tokens[i] + ' '
-            result += cand + ' '
-            for i in range(head+1, min(len(tokens), head+configr+1)):
-                result += tokens[i] + ' '
-            return result.strip(), bosv, eosv
-
-    def getPopNgrams(self, cand, sent, head, configl, configr):
-        if configl==0 and configr==0:
-            bos = False
-            eos = False
-            if head==0:
-                bos = True
-            if head==len(sent.split(' '))-1:
-                eos = True
-            return [(cand, bos, eos)]
-        else:
-            result = set([])
-            contexts = self.getPopContexts(sent, head)
-            for context in contexts:
-                ctokens = context[0]
-                chead = context[1]
-                bosv = False
-                if max(0, chead-configl)==0:
-                    bosv = True
-                eosv = False
-                ngram = ''
-                if min(len(ctokens), chead+configr+1)==len(ctokens):
-                    eosv = True
-                for i in range(max(0, chead-configl), chead):
-                    ngram += ctokens[i] + ' '
-                ngram += cand + ' '
-                for i in range(chead+1, min(len(ctokens), chead+configr+1)):
-                    ngram += ctokens[i] + ' '
-                result.add((ngram.strip(), bosv, eosv))
-            return result
-
-    def getPopContexts(self, sent, head):
-        tokens = sent.strip().split(' ')
-        result = []
-        check = 0
-        if head>0:
-            check += 1
-            tokens1 = list(tokens)
-            tokens1.pop(head-1)
-            result.append((tokens1, head-1))
-        if head<len(tokens)-1:
-            check += 1
-            tokens2 = list(tokens)
-            tokens2.pop(head+1)
-            result.append((tokens2, head))
-        if check==2:
-            tokens3 = list(tokens)
-            tokens3.pop(head+1)
-            tokens3.pop(head-1)
-            result.append((tokens3, head-1))
-        return result
-
-    def sentenceProbabilityFeature(self, data, args):
-        lm = args[0]
-        result = []
-        model = self.resources[lm]
-        for line in data:
-            sent = line[0].strip().split(' ')
-            target = line[1]
-            head = int(line[2])
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                ngram, bosv, eosv = self.getNgram(word, sent, head, 9999, 9999)
-                # aux = model.score(ngram, bos=bosv, eos=eosv)
-                aux = model.score(ngram)
-                result.append(aux)
-        return result
-
-    def reverseSentenceProbabilityFeature(self, data, args):
-        lm = args[0]
-        result = []
-        model = self.resources[lm]
-        for line in data:
-            sent = line[0].strip().split(' ')
-            invsent = []
-            for i in range(0, len(sent)):
-                invsent.append(sent[len(sent)-1-i])
-            target = line[1]
-            head = len(sent)-1-int(line[2])
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                ngram, bosv, eosv = self.getNgram(word, invsent, head, 9999, 9999)
-                # aux = model.score(ngram, bos=bosv, eos=eosv)
-                aux = model.score(ngram)
-                result.append(aux)
-        return result
-
-    def prefixProbabilityFeature(self, data, args):
-        lm = args[0]
-        result = []
-        model = self.resources[lm]
-        for line in data:
-            sent = line[0].strip().split(' ')
-            target = line[1]
-            head = int(line[2])
-            sent = sent[0:head+1]
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                ngram, bosv, eosv = self.getNgram(word, sent, head, 9999, 9999)
-                # aux = model.score(ngram, bos=bosv, eos=eosv)
-                aux = model.score(ngram)
-
-                result.append(aux)
-        return result
-
-    def reversePrefixProbabilityFeature(self, data, args):
-        lm = args[0]
-        result = []
-        model = self.resources[lm]
-        for line in data:
-            sent = line[0].strip().split(' ')
-            invsent = []
-            for i in range(0, len(sent)):
-                invsent.append(sent[len(sent)-1-i])
-            target = line[1]
-            head = len(sent)-1-int(line[2])
-            invsent = invsent[0:head+1]
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                ngram, bosv, eosv = self.getNgram(word, invsent, head, 9999, 9999)
-                # aux = model.score(ngram, bos=bosv, eos=eosv)
-                aux = model.score(ngram)
-                result.append(aux)
-        return result
-
-    def senseCount(self, data, args):
-        resultse = []
-        for line in data:
-            for subst in line[3:len(line)]:
-                words = subst.strip().split(':')[1].strip()
-                sensec = 0
-                for word in words.split(' '):
-                    senses = None
-                    try:
-                        senses = wn.synsets(word)
-                    except UnicodeDecodeError:
-                        senses = []
-                    sensec += len(senses)
-                resultse.append(sensec)
-        return resultse
-
-    def synonymCount(self, data, args):
-        resultsy = []
-        for line in data:
-            for subst in line[3:len(line)]:
-                words = subst.strip().split(':')[1].strip()
-                syncount = 0
-                for word in words.split(' '):
-                    senses = None
-                    try:
-                        senses = wn.synsets(word)
-                    except UnicodeDecodeError:
-                        senses = []
-                    for sense in senses:
-                        syncount += len(sense.lemmas())
-                resultsy.append(syncount)
-        return resultsy
-
-    def isSynonym(self, data, args):
-        resultsy = []
-        for line in data:
-            target = line[1].strip()
-            tgtsenses = set([])
-            try:
-                tgtsenses = wn.synsets(target)
-            except Exception:
-                tgtsenses = set([])
-            for subst in line[3:len(line)]:
-                words = subst.strip().split(':')[1].strip()
-                senses = set([])
-                for word in words.split(' '):
-                    try:
-                        senses.update(wn.synsets(word))
-                    except UnicodeDecodeError:
-                        senses = senses
-                if len(tgtsenses)==0 or len(senses.intersection(tgtsenses))>0:
-                    resultsy.append(1.0)
-                else:
-                    resultsy.append(0.0)
-        return resultsy
-
-    def hypernymCount(self, data, args):
-        resulthe = []
-        for line in data:
-            for subst in line[3:len(line)]:
-                words = subst.strip().split(':')[1].strip()
-                hypernyms = set([])
-                for word in words.split(' '):
-                    senses = None
-                    try:
-                        senses = wn.synsets(word)
-                    except UnicodeDecodeError:
-                        senses = []
-                    for sense in senses:
-                        hypernyms.update(sense.hypernyms())
-                resulthe.append(len(hypernyms))
-        return resulthe
-
-    def isHypernym(self, data, args):
-        resultsy = []
-        for line in data:
-            target = line[1].strip()
-            tgthypernyms = set([])
-            try:
-                tgtsenses = wn.synsets(target)
-                for sense in tgtsenses:
-                    tgthypernyms.update(sense.hypernyms())
-            except Exception:
-                tgthypernyms = tgthypernyms
-            for subst in line[3:len(line)]:
-                words = subst.strip().split(':')[1].strip()
-                senses = set([])
-                for word in words.split(' '):
-                    try:
-                        senses.update(wn.synsets(word))
-                    except UnicodeDecodeError:
-                        senses = senses
-                if len(tgthypernyms)==0 or len(senses.intersection(tgthypernyms))>0:
-                    resultsy.append(1.0)
-                else:
-                    resultsy.append(0.0)
-        return resultsy
-
-    def hyponymCount(self, data, args):
-        resultho = []
-        for line in data:
-            for subst in line[3:len(line)]:
-                words = subst.strip().split(':')[1].strip()
-                hyponyms = set([])
-                for word in words.split(' '):
-                    senses = None
-                    try:
-                        senses = wn.synsets(word)
-                    except UnicodeDecodeError:
-                        senses = []
-                    for sense in senses:
-                        hyponyms.update(sense.hyponyms())
-                resultho.append(len(hyponyms))
-        return resultho
-
-    def isHyponym(self, data, args):
-        resultsy = []
-        for line in data:
-            target = line[1].strip()
-            tgthyponyms = set([])
-            try:
-                tgtsenses = wn.synsets(target)
-                for sense in tgtsenses:
-                    tgthyponyms.update(sense.hyponyms())
-            except Exception:
-                tgthyponyms = tgthyponyms
-            for subst in line[3:len(line)]:
-                words = subst.strip().split(':')[1].strip()
-                senses = set([])
-                for word in words.split(' '):
-                    try:
-                        senses.update(wn.synsets(word))
-                    except UnicodeDecodeError:
-                        senses = senses
-                if len(tgthyponyms)==0 or len(senses.intersection(tgthyponyms))>0:
-                    resultsy.append(1.0)
-                else:
-                    resultsy.append(0.0)
-        return resultsy
-
-    def minDepth(self, data, args):
-        resultmi = []
-        for line in data:
-            for subst in line[3:len(line)]:
-                words = subst.strip().split(':')[1].strip()
-                mindepth = 9999999
-                for word in words.split(' '):
-                    senses = None
-                    try:
-                        senses = wn.synsets(word)
-                    except UnicodeDecodeError:
-                        senses = []
-                    for sense in senses:
-                        auxmin = sense.min_depth()
-                        if auxmin<mindepth:
-                            mindepth = auxmin
-                resultmi.append(mindepth)
-        return resultmi
-
-    def maxDepth(self, data, args):
-        resultma = []
-        for line in data:
-            for subst in line[3:len(line)]:
-                words = subst.strip().split(':')[1].strip()
-                maxdepth = -1
-                for word in words.split(' '):
-                    senses = None
-                    try:
-                        senses = wn.synsets(word)
-                    except UnicodeDecodeError:
-                        senses = []
-                    for sense in senses:
-                        auxmax = sense.max_depth()
-                        if auxmax>maxdepth:
-                            maxdepth = auxmax
-                resultma.append(maxdepth)
-        return resultma
-
-    def averageDepth(self, data, args):
-        resultma = []
-        for line in data:
-            for subst in line[3:len(line)]:
-                words = subst.strip().split(':')[1].strip()
-                avgdepth = 0
-                total = 0
-                for word in words.split(' '):
-                    senses = None
-                    try:
-                        senses = wn.synsets(word)
-                    except UnicodeDecodeError:
-                        senses = []
-                    for sense in senses:
-                        auxmax = sense.max_depth()
-                        avgdepth += auxmax
-                    total += len(senses)
-                try:
-                    avgdepth /= total
-                except Exception:
-                    avgdepth = 0
-                resultma.append(avgdepth)
-        return resultma
-
-    def subjectDependencyProbabilityFeature(self, data, args):
-        model = self.resources[args[0]]
-        parser = self.resources[args[1]]
-
-        #Get parsed sentences:
-        dep_maps = None
-        if 'dep_maps' in self.temp_resources:
-            dep_maps = self.temp_resources['dep_maps']
-        else:
-            sentences = [l[0].strip().split(' ') for l in data]
-            dep_parsed_sents = None
-            if 'dep_parsed_sents' in self.temp_resources:
-                dep_parsed_sents = self.temp_resources['dep_parsed_sents']
-            else:
-                dep_parsed_sents = dependencyParseSentences(parser, sentences)
-                self.temp_resources['dep_parsed_sents'] = dep_parsed_sents
-            dep_maps = []
-            for sent in dep_parsed_sents:
-                dep_map = {}
-                for parse in sent:
-                    deplink = str(parse[0])
-                    subjectindex = int(str(parse[2]))-1
-                    objectindex = int(str(parse[4]))-1
-                    if subjectindex not in dep_map:
-                        dep_map[subjectindex] = {objectindex: set([deplink])}
-                    elif objectindex not in dep_map[subjectindex]:
-                        dep_map[subjectindex][objectindex] = set([deplink])
-                    else:
-                        dep_map[subjectindex][objectindex].add(deplink)
-                dep_maps.append(dep_map)
-            self.temp_resources['dep_maps'] = dep_maps
-
-        result = []
-        for i in range(0, len(data)):
-            line = data[i]
-            sent = line[0].strip().split(' ')
-            target = line[1].strip().lower()
-            head = int(line[2].strip())
-            dep_map = dep_maps[i]
-            insts = set([])
-            if head in dep_map:
-                for object in dep_map[head]:
-                    for dep_link in dep_map[head][object]:
-                        insts.add((dep_link, sent[object]))
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                total = 0.0
-                if len(insts)>0:
-                    for inst in insts:
-                        ngram = inst[0] + ' ' + word + ' ' + inst[1]
-                        # prob = math.exp(model.score(ngram, bos=False, eos=False))
-                        prob = math.exp(model.score(ngram))
-                        total += prob
-                    total /= float(len(insts))
-                else:
-                    total = 1.0
-                result.append(total)
-        return result
-
-    def binarySubjectDependencyFeature(self, data, args):
-        model = self.resources[args[0]]
-        parser = self.resources[args[1]]
-
-        #Get parsed sentences:
-        dep_maps = None
-        if 'dep_maps' in self.temp_resources:
-            dep_maps = self.temp_resources['dep_maps']
-        else:
-            sentences = [l[0].strip().split(' ') for l in data]
-            dep_parsed_sents = None
-            if 'dep_parsed_sents' in self.temp_resources:
-                dep_parsed_sents = self.temp_resources['dep_parsed_sents']
-            else:
-                dep_parsed_sents = dependencyParseSentences(parser, sentences)
-                self.temp_resources['dep_parsed_sents'] = dep_parsed_sents
-            dep_maps = []
-            for sent in dep_parsed_sents:
-                dep_map = {}
-                for parse in sent:
-                    deplink = str(parse[0])
-                    subjectindex = int(str(parse[2]))-1
-                    objectindex = int(str(parse[4]))-1
-                    if subjectindex not in dep_map:
-                        dep_map[subjectindex] = {objectindex: set([deplink])}
-                    elif objectindex not in dep_map[subjectindex]:
-                        dep_map[subjectindex][objectindex] = set([deplink])
-                    else:
-                        dep_map[subjectindex][objectindex].add(deplink)
-                dep_maps.append(dep_map)
-            self.temp_resources['dep_maps'] = dep_maps
-
-        result = []
-        for i in range(0, len(data)):
-            line = data[i]
-            sent = line[0].strip().split(' ')
-            target = line[1].strip().lower()
-            head = int(line[2].strip())
-            dep_map = dep_maps[i]
-            insts = set([])
-            if head in dep_map:
-                for object in dep_map[head]:
-                    for dep_link in dep_map[head][object]:
-                        insts.add((dep_link, sent[object]))
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                total = 1.0
-                if len(insts)>0:
-                    for inst in insts:
-                        ngram = inst[0] + ' ' + word + ' ' + inst[1]
-                        if ngram not in model:
-                            total = 0.0
-                else:
-                    total = 1.0
-                result.append(total)
-        return result
-
-    def subjectDependencyFrequencyFeature(self, data, args):
-        model = self.resources[args[0]]
-        parser = self.resources[args[1]]
-
-        #Get parsed sentences:
-        dep_maps = None
-        if 'dep_maps' in self.temp_resources:
-            dep_maps = self.temp_resources['dep_maps']
-        else:
-            sentences = [l[0].strip().split(' ') for l in data]
-            dep_parsed_sents = None
-            if 'dep_parsed_sents' in self.temp_resources:
-                dep_parsed_sents = self.temp_resources['dep_parsed_sents']
-            else:
-                dep_parsed_sents = dependencyParseSentences(parser, sentences)
-                self.temp_resources['dep_parsed_sents'] = dep_parsed_sents
-            dep_maps = []
-            for sent in dep_parsed_sents:
-                dep_map = {}
-                for parse in sent:
-                    deplink = str(parse[0])
-                    subjectindex = int(str(parse[2]))-1
-                    objectindex = int(str(parse[4]))-1
-                    if subjectindex not in dep_map:
-                        dep_map[subjectindex] = {objectindex: set([deplink])}
-                    elif objectindex not in dep_map[subjectindex]:
-                        dep_map[subjectindex][objectindex] = set([deplink])
-                    else:
-                        dep_map[subjectindex][objectindex].add(deplink)
-                dep_maps.append(dep_map)
-            self.temp_resources['dep_maps'] = dep_maps
-
-        result = []
-        for i in range(0, len(data)):
-            line = data[i]
-            sent = line[0].strip().split(' ')
-            target = line[1].strip().lower()
-            head = int(line[2].strip())
-            dep_map = dep_maps[i]
-            insts = set([])
-            if head in dep_map:
-                for object in dep_map[head]:
-                    for dep_link in dep_map[head][object]:
-                        insts.add((dep_link, sent[object]))
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                total = 0.0
-                if len(insts)>0:
-                    for inst in insts:
-                        ngram = inst[0] + ' ' + word + ' ' + inst[1]
-                        if ngram in model:
-                            total += model[ngram]
-                    if total>0.0:
-                        total /= float(len(insts))
-                else:
-                    total = 99999.0
-                result.append(total)
-        return result
-
-    def objectDependencyProbabilityFeature(self, data, args):
-        model = self.resources[args[0]]
-        parser = self.resources[args[1]]
-
-        #Get parsed sentences:
-        inv_dep_maps = None
-        if 'inv_dep_maps' in self.temp_resources:
-            inv_dep_maps = self.temp_resources['inv_dep_maps']
-        else:
-            dep_maps = None
-            if 'dep_maps' in self.temp_resources:
-                dep_maps = self.temp_resources['dep_maps']
-            else:
-                sentences = [l[0].strip().split(' ') for l in data]
-                dep_parsed_sents = None
-                if 'dep_parsed_sents' in self.temp_resources:
-                    dep_parsed_sents = self.temp_resources['dep_parsed_sents']
-                else:
-                    dep_parsed_sents = dependencyParseSentences(parser, sentences)
-                    self.temp_resources['dep_parsed_sents'] = dep_parsed_sents
-                dep_maps = []
-                for sent in dep_parsed_sents:
-                    dep_map = {}
-                    for parse in sent:
-                        deplink = str(parse[0])
-                        subjectindex = int(str(parse[2]))-1
-                        objectindex = int(str(parse[4]))-1
-                        if subjectindex not in dep_map:
-                            dep_map[subjectindex] = {objectindex: set([deplink])}
-                        elif objectindex not in dep_map[subjectindex]:
-                            dep_map[subjectindex][objectindex] = set([deplink])
-                        else:
-                            dep_map[subjectindex][objectindex].add(deplink)
-                    dep_maps.append(dep_map)
-                self.temp_resources['dep_maps'] = dep_maps
-
-            inv_dep_maps = []
-            for inst in dep_maps:
-                inv_dep_map = {}
-                for subjectindex in inst:
-                    for objectindex in inst[subjectindex]:
-                        if objectindex not in inv_dep_map:
-                            inv_dep_map[objectindex] = {}
-                        inv_dep_map[objectindex][subjectindex] = inst[subjectindex][objectindex]
-                inv_dep_maps.append(inv_dep_map)
-            self.temp_resources['inv_dep_maps'] = inv_dep_maps
-
-        result = []
-        for i in range(0, len(data)):
-            line = data[i]
-            sent = line[0].strip().split(' ')
-            target = line[1].strip().lower()
-            head = int(line[2].strip())
-            inv_dep_map = inv_dep_maps[i]
-            insts = set([])
-            if head in inv_dep_map:
-                for object in inv_dep_map[head]:
-                    for dep_link in inv_dep_map[head][object]:
-                        insts.add((dep_link, sent[object]))
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                total = 0.0
-                if len(insts)>0:
-                    for inst in insts:
-                        ngram = inst[0] + ' ' + inst[1] + ' ' + word
-                        # prob = math.exp(model.score(ngram, bos=False, eos=False))
-                        prob = math.exp(model.score(ngram))
-                        total += prob
-                    total /= float(len(insts))
-                else:
-                    total = 1.0
-                result.append(total)
-        return result
-
-    def binaryObjectDependencyFeature(self, data, args):
-        model = self.resources[args[0]]
-        parser = self.resources[args[1]]
-
-        #Get parsed sentences:
-        inv_dep_maps = None
-        if 'inv_dep_maps' in self.temp_resources:
-            inv_dep_maps = self.temp_resources['inv_dep_maps']
-        else:
-            dep_maps = None
-            if 'dep_maps' in self.temp_resources:
-                dep_maps = self.temp_resources['dep_maps']
-            else:
-                sentences = [l[0].strip().split(' ') for l in data]
-                dep_parsed_sents = None
-                if 'dep_parsed_sents' in self.temp_resources:
-                    dep_parsed_sents = self.temp_resources['dep_parsed_sents']
-                else:
-                    dep_parsed_sents = dependencyParseSentences(parser, sentences)
-                    self.temp_resources['dep_parsed_sents'] = dep_parsed_sents
-                dep_maps = []
-                for sent in dep_parsed_sents:
-                    dep_map = {}
-                    for parse in sent:
-                        deplink = str(parse[0])
-                        subjectindex = int(str(parse[2]))-1
-                        objectindex = int(str(parse[4]))-1
-                        if subjectindex not in dep_map:
-                            dep_map[subjectindex] = {objectindex: set([deplink])}
-                        elif objectindex not in dep_map[subjectindex]:
-                            dep_map[subjectindex][objectindex] = set([deplink])
-                        else:
-                            dep_map[subjectindex][objectindex].add(deplink)
-                    dep_maps.append(dep_map)
-                self.temp_resources['dep_maps'] = dep_maps
-
-            inv_dep_maps = []
-            for inst in dep_maps:
-                inv_dep_map = {}
-                for subjectindex in inst:
-                    for objectindex in inst[subjectindex]:
-                        if objectindex not in inv_dep_map:
-                            inv_dep_map[objectindex] = {}
-                        inv_dep_map[objectindex][subjectindex] = inst[subjectindex][objectindex]
-                inv_dep_maps.append(inv_dep_map)
-            self.temp_resources['inv_dep_maps'] = inv_dep_maps
-
-        result = []
-        for i in range(0, len(data)):
-            line = data[i]
-            sent = line[0].strip().split(' ')
-            target = line[1].strip().lower()
-            head = int(line[2].strip())
-            inv_dep_map = inv_dep_maps[i]
-            insts = set([])
-            if head in inv_dep_map:
-                for object in inv_dep_map[head]:
-                    for dep_link in inv_dep_map[head][object]:
-                        insts.add((dep_link, sent[object]))
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                total = 1.0
-                if len(insts)>0:
-                    for inst in insts:
-                        ngram = inst[0] + ' ' + inst[1] + ' ' + word
-                        if ngram not in model:
-                            total = 0.0
-                else:
-                    total = 1.0
-                result.append(total)
-        return result
-
-    def objectDependencyFrequencyFeature(self, data, args):
-        model = self.resources[args[0]]
-        parser = self.resources[args[1]]
-
-        #Get parsed sentences:
-        inv_dep_maps = None
-        if 'inv_dep_maps' in self.temp_resources:
-            inv_dep_maps = self.temp_resources['inv_dep_maps']
-        else:
-            dep_maps = None
-            if 'dep_maps' in self.temp_resources:
-                dep_maps = self.temp_resources['dep_maps']
-            else:
-                sentences = [l[0].strip().split(' ') for l in data]
-                dep_parsed_sents = None
-                if 'dep_parsed_sents' in self.temp_resources:
-                    dep_parsed_sents = self.temp_resources['dep_parsed_sents']
-                else:
-                    dep_parsed_sents = dependencyParseSentences(parser, sentences)
-                    self.temp_resources['dep_parsed_sents'] = dep_parsed_sents
-                dep_maps = []
-                for sent in dep_parsed_sents:
-                    dep_map = {}
-                    for parse in sent:
-                        deplink = str(parse[0])
-                        subjectindex = int(str(parse[2]))-1
-                        objectindex = int(str(parse[4]))-1
-                        if subjectindex not in dep_map:
-                            dep_map[subjectindex] = {objectindex: set([deplink])}
-                        elif objectindex not in dep_map[subjectindex]:
-                            dep_map[subjectindex][objectindex] = set([deplink])
-                        else:
-                            dep_map[subjectindex][objectindex].add(deplink)
-                    dep_maps.append(dep_map)
-                self.temp_resources['dep_maps'] = dep_maps
-
-            inv_dep_maps = []
-            for inst in dep_maps:
-                inv_dep_map = {}
-                for subjectindex in inst:
-                    for objectindex in inst[subjectindex]:
-                        if objectindex not in inv_dep_map:
-                            inv_dep_map[objectindex] = {}
-                        inv_dep_map[objectindex][subjectindex] = inst[subjectindex][objectindex]
-                inv_dep_maps.append(inv_dep_map)
-            self.temp_resources['inv_dep_maps'] = inv_dep_maps
-
-        result = []
-        for i in range(0, len(data)):
-            line = data[i]
-            sent = line[0].strip().split(' ')
-            target = line[1].strip().lower()
-            head = int(line[2].strip())
-            inv_dep_map = inv_dep_maps[i]
-            insts = set([])
-            if head in inv_dep_map:
-                for object in inv_dep_map[head]:
-                    for dep_link in inv_dep_map[head][object]:
-                        insts.add((dep_link, sent[object]))
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                total = 0.0
-                if len(insts)>0:
-                    for inst in insts:
-                        ngram = inst[0] + ' ' + inst[1] + ' ' + word
-                        if ngram in model:
-                            total += model[ngram]
-                    if total>0.0:
-                        total /= float(len(insts))
-                else:
-                    total = 99999.0
-                result.append(total)
-        return result
-
-    def allDependencyProbabilityFeature(self, data, args):
-        model = self.resources[args[0]]
-        parser = self.resources[args[1]]
-
-        #Get parsed sentences:
-        if 'inv_dep_maps' in self.temp_resources:
-            inv_dep_maps = self.temp_resources['inv_dep_maps']
-        else:
-            dep_maps = None
-            if 'dep_maps' in self.temp_resources:
-                dep_maps = self.temp_resources['dep_maps']
-            else:
-                sentences = [l[0].strip().split(' ') for l in data]
-                dep_parsed_sents = None
-                if 'dep_parsed_sents' in self.temp_resources:
-                    dep_parsed_sents = self.temp_resources['dep_parsed_sents']
-                else:
-                    dep_parsed_sents = dependencyParseSentences(parser, sentences)
-                    self.temp_resources['dep_parsed_sents'] = dep_parsed_sents
-                dep_maps = []
-                for sent in dep_parsed_sents:
-                    dep_map = {}
-                    for parse in sent:
-                        deplink = str(parse[0])
-                        subjectindex = int(str(parse[2]))-1
-                        objectindex = int(str(parse[4]))-1
-                        if subjectindex not in dep_map:
-                            dep_map[subjectindex] = {objectindex: set([deplink])}
-                        elif objectindex not in dep_map[subjectindex]:
-                            dep_map[subjectindex][objectindex] = set([deplink])
-                        else:
-                            dep_map[subjectindex][objectindex].add(deplink)
-                    dep_maps.append(dep_map)
-                self.temp_resources['dep_maps'] = dep_maps
-
-            inv_dep_maps = []
-            for inst in dep_maps:
-                inv_dep_map = {}
-                for subjectindex in inst:
-                    for objectindex in inst[subjectindex]:
-                        if objectindex not in inv_dep_map:
-                            inv_dep_map[objectindex] = {}
-                        inv_dep_map[objectindex][subjectindex] = inst[subjectindex][objectindex]
-                inv_dep_maps.append(inv_dep_map)
-            self.temp_resources['inv_dep_maps'] = inv_dep_maps
-
-        dep_maps = self.temp_resources['dep_maps']
-        inv_dep_maps = self.temp_resources['inv_dep_maps']
-
-        result = []
-        for i in range(0, len(data)):
-            line = data[i]
-            sent = line[0].strip().split(' ')
-            target = line[1].strip().lower()
-            head = int(line[2].strip())
-
-            dep_map = dep_maps[i]
-            inv_dep_map = inv_dep_maps[i]
-            insts = set([])
-            if head in dep_map:
-                for object in dep_map[head]:
-                    for dep_link in dep_map[head][object]:
-                        insts.add((dep_link, sent[object]))
-            insts_inv = set([])
-            if head in inv_dep_map:
-                for object in inv_dep_map[head]:
-                    for dep_link in inv_dep_map[head][object]:
-                        insts_inv.add((dep_link, sent[object]))
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                total = 0.0
-                if len(insts)>0 or len(insts_inv)>0:
-                    for inst in insts:
-                        ngram = inst[0] + ' ' + word + ' ' + inst[1]
-                        # prob = math.exp(model.score(ngram, bos=False, eos=False))
-                        prob = math.exp(model.score(ngram))
-                        total += prob
-                    for inst in insts_inv:
-                        ngram = inst[0] + ' ' + inst[1] + ' ' + word
-                        # prob = math.exp(model.score(ngram, bos=False, eos=False))
-                        prob = math.exp(model.score(ngram))
-                        total += prob
-                    total /= float(len(insts)+len(insts_inv))
-                else:
-                    total = 1.0
-                result.append(total)
-        return result
-
-    def binaryAllDependencyFeature(self, data, args):
-        model = self.resources[args[0]]
-        parser = self.resources[args[1]]
-
-        #Get parsed sentences:
-        if 'inv_dep_maps' in self.temp_resources:
-            inv_dep_maps = self.temp_resources['inv_dep_maps']
-        else:
-            dep_maps = None
-            if 'dep_maps' in self.temp_resources:
-                dep_maps = self.temp_resources['dep_maps']
-            else:
-                sentences = [l[0].strip().split(' ') for l in data]
-                dep_parsed_sents = None
-                if 'dep_parsed_sents' in self.temp_resources:
-                    dep_parsed_sents = self.temp_resources['dep_parsed_sents']
-                else:
-                    dep_parsed_sents = dependencyParseSentences(parser, sentences)
-                    self.temp_resources['dep_parsed_sents'] = dep_parsed_sents
-                dep_maps = []
-                for sent in dep_parsed_sents:
-                    dep_map = {}
-                    for parse in sent:
-                        deplink = str(parse[0])
-                        subjectindex = int(str(parse[2]))-1
-                        objectindex = int(str(parse[4]))-1
-                        if subjectindex not in dep_map:
-                            dep_map[subjectindex] = {objectindex: set([deplink])}
-                        elif objectindex not in dep_map[subjectindex]:
-                            dep_map[subjectindex][objectindex] = set([deplink])
-                        else:
-                            dep_map[subjectindex][objectindex].add(deplink)
-                    dep_maps.append(dep_map)
-                self.temp_resources['dep_maps'] = dep_maps
-
-            inv_dep_maps = []
-            for inst in dep_maps:
-                inv_dep_map = {}
-                for subjectindex in inst:
-                    for objectindex in inst[subjectindex]:
-                        if objectindex not in inv_dep_map:
-                            inv_dep_map[objectindex] = {}
-                        inv_dep_map[objectindex][subjectindex] = inst[subjectindex][objectindex]
-                inv_dep_maps.append(inv_dep_map)
-            self.temp_resources['inv_dep_maps'] = inv_dep_maps
-
-        dep_maps = self.temp_resources['dep_maps']
-        inv_dep_maps = self.temp_resources['inv_dep_maps']
-
-        result = []
-        for i in range(0, len(data)):
-            line = data[i]
-            sent = line[0].strip().split(' ')
-            target = line[1].strip().lower()
-            head = int(line[2].strip())
-
-            dep_map = dep_maps[i]
-            inv_dep_map = inv_dep_maps[i]
-            insts = set([])
-            if head in dep_map:
-                for object in dep_map[head]:
-                    for dep_link in dep_map[head][object]:
-                        insts.add((dep_link, sent[object]))
-            insts_inv = set([])
-            if head in inv_dep_map:
-                for object in inv_dep_map[head]:
-                    for dep_link in inv_dep_map[head][object]:
-                        insts_inv.add((dep_link, sent[object]))
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                total = 1.0
-                if len(insts)>0 or len(insts_inv)>0:
-                    for inst in insts:
-                        ngram = inst[0] + ' ' + word + ' ' + inst[1]
-                        if ngram not in model:
-                            total = 0.0
-                    for inst in insts_inv:
-                        ngram = inst[0] + ' ' + inst[1] + ' ' + word
-                        if ngram not in model:
-                            total = 0.0
-                else:
-                    total = 1.0
-                result.append(total)
-        return result
-
-    def allDependencyFrequencyFeature(self, data, args):
-        model = self.resources[args[0]]
-        parser = self.resources[args[1]]
-
-        #Get parsed sentences:
-        if 'inv_dep_maps' in self.temp_resources:
-            inv_dep_maps = self.temp_resources['inv_dep_maps']
-        else:
-            dep_maps = None
-            if 'dep_maps' in self.temp_resources:
-                dep_maps = self.temp_resources['dep_maps']
-            else:
-                sentences = [l[0].strip().split(' ') for l in data]
-                dep_parsed_sents = None
-                if 'dep_parsed_sents' in self.temp_resources:
-                    dep_parsed_sents = self.temp_resources['dep_parsed_sents']
-                else:
-                    dep_parsed_sents = dependencyParseSentences(parser, sentences)
-                    self.temp_resources['dep_parsed_sents'] = dep_parsed_sents
-                dep_maps = []
-                for sent in dep_parsed_sents:
-                    dep_map = {}
-                    for parse in sent:
-                        deplink = str(parse[0])
-                        subjectindex = int(str(parse[2]))-1
-                        objectindex = int(str(parse[4]))-1
-                        if subjectindex not in dep_map:
-                            dep_map[subjectindex] = {objectindex: set([deplink])}
-                        elif objectindex not in dep_map[subjectindex]:
-                            dep_map[subjectindex][objectindex] = set([deplink])
-                        else:
-                            dep_map[subjectindex][objectindex].add(deplink)
-                    dep_maps.append(dep_map)
-                self.temp_resources['dep_maps'] = dep_maps
-
-            inv_dep_maps = []
-            for inst in dep_maps:
-                inv_dep_map = {}
-                for subjectindex in inst:
-                    for objectindex in inst[subjectindex]:
-                        if objectindex not in inv_dep_map:
-                            inv_dep_map[objectindex] = {}
-                        inv_dep_map[objectindex][subjectindex] = inst[subjectindex][objectindex]
-                inv_dep_maps.append(inv_dep_map)
-            self.temp_resources['inv_dep_maps'] = inv_dep_maps
-
-        dep_maps = self.temp_resources['dep_maps']
-        inv_dep_maps = self.temp_resources['inv_dep_maps']
-
-        result = []
-        for i in range(0, len(data)):
-            line = data[i]
-            sent = line[0].strip().split(' ')
-            target = line[1].strip().lower()
-            head = int(line[2].strip())
-
-            dep_map = dep_maps[i]
-            inv_dep_map = inv_dep_maps[i]
-            insts = set([])
-            if head in dep_map:
-                for object in dep_map[head]:
-                    for dep_link in dep_map[head][object]:
-                        insts.add((dep_link, sent[object]))
-            insts_inv = set([])
-            if head in inv_dep_map:
-                for object in inv_dep_map[head]:
-                    for dep_link in inv_dep_map[head][object]:
-                        insts_inv.add((dep_link, sent[object]))
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                total = 0.0
-                if len(insts)>0 or len(insts_inv)>0:
-                    for inst in insts:
-                        ngram = inst[0] + ' ' + word + ' ' + inst[1]
-                        if ngram in model:
-                            total += model[ngram]
-                    for inst in insts_inv:
-                        ngram = inst[0] + ' ' + inst[1] + ' ' + word
-                        if ngram in model:
-                            total += model[ngram]
-                    if total>0.0:
-                        total /= float(len(insts)+len(insts_inv))
-                else:
-                    total = 99999.0
-                result.append(total)
-        return result
-
-    def wordVectorContextSimilarityFeature(self, data, args):
-        model = self.resources[args[0]]
-        tagger = self.resources[args[1]]
-        result = []
-
-        #Get tagged sentences:
-        tagged_sents = None
-        if 'tagged_sents' in self.temp_resources:
-            tagged_sents = self.temp_resources['tagged_sents']
-        else:
-            sentences = [l[0].strip().split(' ') for l in data]
-            tagged_sents = tagger.tag_sents(sentences)
-            self.temp_resources['tagged_sents'] = tagged_sents
-
-        for i in range(0, len(data)):
-            line = data[i]
-            tokens = line[0].strip().split(' ')
-            target = line[1].strip().lower()
-            head = int(line[2].strip())
-
-            #Get content words in sentence:
-            content_words = set([])
-            for j in range(0, len(tokens)):
-                token = tokens[j]
-                tag = tagged_sents[i][j][1]
-                if self.isContentWord(token, tag):
-                    content_words.add(token)
-
-            #Produce divisor:
-            divisor = float(len(content_words))
-
-            for subst in line[3:len(line)]:
-                word = subst.strip().split(':')[1].strip()
-                similarity = 0.0
-                for content_word in content_words:
-                    try:
-                        similarity += model.similarity(content_word, word)
-                    except KeyError:
-                        try:
-                            similarity += model.similarity(content_word, word.lower())
-                        except KeyError:
-                            pass
-                similarity /= divisor
-                result.append(similarity)
-        return result
-
-    def taggedWordVectorContextSimilarityFeature(self, data, args):
-        model = self.resources[args[0]]
-        tagger = self.resources[args[1]]
-        pos_type = args[2]
-        result = []
-
-        #Get tagged sentences:
-        tagged_sents = None
-        if 'tagged_sents' in self.temp_resources:
-            tagged_sents = self.temp_resources['tagged_sents']
-        else:
-            sentences = [l[0].strip().split(' ') for l in data]
-            tagged_sents = tagger.tag_sents(sentences)
-            self.temp_resources['tagged_sents'] = tagged_sents
-
-
-        #Produce embeddings vector tags:
-        model_tagged_sents = None
-        if pos_type=='paetzold':
-            transformed = []
-            for sent in tagged_sents:
-                tokens = []
-                for token in sent:
-                    tokens.append((token[0], getGeneralisedPOS(token[1])))
-                transformed.append(tokens)
-            model_tagged_sents = transformed
-        else:
-            model_tagged_sents = tagged_sents
-
-        for i in range(0, len(data)):
-            line = data[i]
-            tokens = line[0].strip().split(' ')
-            target = line[1].strip().lower()
-            head = int(line[2].strip())
-            target_pos = model_tagged_sents[i][head][1]
-
-            #Get content words in sentence:
-            content_words = set([])
-            for j in range(0, len(tokens)):
-                token = tokens[j]
-                tag = tagged_sents[i][j][1]
-                model_tag = model_tagged_sents[i][j][1]
-                if self.isContentWord(token, tag):
-                    content_words.add(token+'|||'+model_tag)
-
-            #Produce divisor:
-            divisor = float(len(content_words))
-
-            for subst in line[3:len(line)]:
-                word = subst.strip().split(':')[1].strip()
-                similarity = 0.0
-                for content_word in content_words:
-                    try:
-                        similarity += model.similarity(content_word, word+'|||'+target_pos)
-                    except KeyError:
-                        try:
-                            similarity += model.similarity(content_word, word.lower()+'|||'+target_pos)
-                        except KeyError:
-                            pass
-                similarity /= divisor
-                result.append(similarity)
-        return result
-
-    def nullLinkNominalFeature(self, data, args):
-        parser = self.resources[args[0]]
-
-        #Get parsed sentences:
-        if 'inv_dep_maps' in self.temp_resources:
-            inv_dep_maps = self.temp_resources['inv_dep_maps']
-        else:
-            dep_maps = None
-            if 'dep_maps' in self.temp_resources:
-                dep_maps = self.temp_resources['dep_maps']
-            else:
-                sentences = [l[0].strip().split(' ') for l in data]
-                dep_parsed_sents = None
-                if 'dep_parsed_sents' in self.temp_resources:
-                    dep_parsed_sents = self.temp_resources['dep_parsed_sents']
-                else:
-                    dep_parsed_sents = dependencyParseSentences(parser, sentences)
-                    self.temp_resources['dep_parsed_sents'] = dep_parsed_sents
-                dep_maps = []
-                for sent in dep_parsed_sents:
-                    dep_map = {}
-                    for parse in sent:
-                        deplink = str(parse[0])
-                        subjectindex = int(str(parse[2]))-1
-                        objectindex = int(str(parse[4]))-1
-                        if subjectindex not in dep_map:
-                            dep_map[subjectindex] = {objectindex: set([deplink])}
-                        elif objectindex not in dep_map[subjectindex]:
-                            dep_map[subjectindex][objectindex] = set([deplink])
-                        else:
-                            dep_map[subjectindex][objectindex].add(deplink)
-                    dep_maps.append(dep_map)
-                self.temp_resources['dep_maps'] = dep_maps
-
-            inv_dep_maps = []
-            for inst in dep_maps:
-                inv_dep_map = {}
-                for subjectindex in inst:
-                    for objectindex in inst[subjectindex]:
-                        if objectindex not in inv_dep_map:
-                            inv_dep_map[objectindex] = {}
-                        inv_dep_map[objectindex][subjectindex] = inst[subjectindex][objectindex]
-                inv_dep_maps.append(inv_dep_map)
-            self.temp_resources['inv_dep_maps'] = inv_dep_maps
-
-        dep_maps = self.temp_resources['dep_maps']
-        inv_dep_maps = self.temp_resources['inv_dep_maps']
-
-        result = []
-        for i in range(0, len(data)):
-            line = data[i]
-            sent = line[0].strip().split(' ')
-            target = line[1].strip().lower()
-            head = int(line[2].strip())
-
-            dep_map = dep_maps[i]
-            inv_dep_map = inv_dep_maps[i]
-            value = False
-            if head in dep_map or head in inv_dep_map:
-                value = True
-
-            for subst in line[3:len(line)]:
-                result.append(value)
-        return result
-
-    def backoffBehaviorNominalFeature(self, data, args):
-        ngrams = args[0]
-        result = []
-        counts = self.resources[ngrams]
-        for line in data:
-            sent = line[0].strip().split(' ')
-            target = line[1]
-            head = int(line[2])
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                ngram2t, bos2t, eos2t = self.getNgram(word, sent, head, 2, 0)
-                ngram1t, bos1t, eos1t = self.getNgram(word, sent, head, 1, 0)
-                ngram0t, bos0t, eos0t = self.getNgram(word, sent, head, 0, 0)
-                ngram2f, bos2f, eos2f = word, True, False
-                ngram1f, bos1f, eos1f = word, True, False
-                if head>0:
-                    ngram2f, bos2f, eos2f = self.getNgram(sent[head-1], sent, head-1, 1, 0)
-                    ngram1f, bos1f, eos1f = self.getNgram(sent[head-1], sent, head-1, 0, 0)
-
-                backoff = -1
-                if ngram2t in counts:
-                    backoff = 7.0
-                elif ngram2f in counts and ngram1t in counts:
-                    backoff = 6.0
-                elif ngram1t in counts:
-                    backoff = 5.0
-                elif ngram2f in counts and ngram0t in counts:
-                    backoff = 4.0
-                elif ngram1f in counts and ngram0t in counts:
-                    backoff = 3.0
-                elif ngram0t in counts:
-                    backoff = 2.0
-                else:
-                    backoff = 1.0
-                result.append(backoff)
-        return result
-
-    def candidateNominalFeature(self, data, args):
-        result = []
-        for line in data:
-            for subst in line[3:len(line)]:
-                words = subst.strip().split(':')[1].strip()
-                result.append(words)
-        return result
-
-    def ngramNominalFeature(self, data, args):
-        spanl = args[0]
-        spanr = args[1]
-        result = []
-        for line in data:
-            sent = line[0].strip().split(' ')
-            target = line[1]
-            head = int(line[2])
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                ngram, bosv, eosv = self.getNgram(word, sent, head, spanl, spanr)
-                tokens = ngram.split(' ')
-                fngram = ''
-                for token in tokens:
-                    fngram += token + '|||'
-                result.append(fngram[0:len(fngram)-3])
-        return result
-
-    def candidatePOSNominalFeature(self, data, args):
-        result = []
-
-        tagger = self.resources[args[0]]
-        pos_type = args[1]
-
-        #Get tagged sentences:
-        tagged_sents = None
-        if 'tagged_sents' in self.temp_resources:
-            tagged_sents = self.temp_resources['tagged_sents']
-        else:
-            sentences = [l[0].strip().split(' ') for l in data]
-            tagged_sents = tagger.tag_sents(sentences)
-            self.temp_resources['tagged_sents'] = tagged_sents
-
-        #Transform them to the right format:
-        if pos_type=='paetzold':
-            transformed = []
-            for sent in tagged_sents:
-                tokens = []
-                for token in sent:
-                    tokens.append((token[0], getGeneralisedPOS(token[1])))
-                transformed.append(tokens)
-            tagged_sents = transformed
-
-        for i in range(0, len(data)):
-            line = data[i]
-            target = line[1].strip().lower()
-            head = int(line[2].strip())
-            target_pos = tagged_sents[i][head][1]
-            for subst in line[3:len(line)]:
-                result.append(target_pos)
-        return result
-
-    def POSNgramNominalFeature(self, data, args):
-        result = []
-
-        spanl = args[0]
-        spanr = args[1]
-        tagger = self.resources[args[2]]
-        pos_type = args[3]
-
-        #Get tagged sentences:
-        tagged_sents = None
-        if 'tagged_sents' in self.temp_resources:
-            tagged_sents = self.temp_resources['tagged_sents']
-        else:
-            sentences = [l[0].strip().split(' ') for l in data]
-            tagged_sents = tagger.tag_sents(sentences)
-            self.temp_resources['tagged_sents'] = tagged_sents
-
-        #Transform them to the right format:
-        if pos_type=='paetzold':
-            transformed = []
-            for sent in tagged_sents:
-                tokens = []
-                for token in sent:
-                    tokens.append((token[0], getGeneralisedPOS(token[1])))
-                transformed.append(tokens)
-            tagged_sents = transformed
-
-        for i in range(0, len(data)):
-            line = data[i]
-            target = line[1].strip().lower()
-            head = int(line[2].strip())
-            target_pos = tagged_sents[i][head][1]
-            POStokens = [posdata[1] for posdata in tagged_sents[i]]
-            for subst in line[3:len(line)]:
-                ngram, bosv, eosv = self.getNgram(target_pos, POStokens, head, spanl, spanr)
-                tokens = ngram.split(' ')
-                fngram = ''
-                for token in tokens:
-                    fngram += token + '|||'
-                result.append(fngram[0:len(fngram)-3])
-        return result
-
-    def POSNgramWithCandidateNominalFeature(self, data, args):
-        result = []
-
-        spanl = args[0]
-        spanr = args[1]
-        tagger = self.resources[args[2]]
-        pos_type = args[3]
-
-        #Get tagged sentences:
-        tagged_sents = None
-        if 'tagged_sents' in self.temp_resources:
-            tagged_sents = self.temp_resources['tagged_sents']
-        else:
-            sentences = [l[0].strip().split(' ') for l in data]
-            tagged_sents = tagger.tag_sents(sentences)
-            self.temp_resources['tagged_sents'] = tagged_sents
-
-        #Transform them to the right format:
-        if pos_type=='paetzold':
-            transformed = []
-            for sent in tagged_sents:
-                tokens = []
-                for token in sent:
-                    tokens.append((token[0], getGeneralisedPOS(token[1])))
-                transformed.append(tokens)
-            tagged_sents = transformed
-
-        for i in range(0, len(data)):
-            line = data[i]
-            target = line[1].strip().lower()
-            head = int(line[2].strip())
-            target_pos = tagged_sents[i][head][1]
-            POStokens = [posdata[1] for posdata in tagged_sents[i]]
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                ngram, bosv, eosv = self.getNgram(word, POStokens, head, spanl, spanr)
-                tokens = ngram.split(' ')
-                fngram = ''
-                for token in tokens:
-                    fngram += token + '|||'
-                result.append(fngram[0:len(fngram)-3])
-        return result
-
-    def imageSearchCountFeature(self, data, args):
-        result = []
-
-        key = args[0]
-
-        for i in range(0, len(data)):
-            line = data[i]
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                imagecount = None
-                if word not in self.resources['image_counts']:
-                    imagecount = self.getImageCount(word, key)
-                    self.resources['image_counts'][word] = imagecount
-                else:
-                    imagecount = self.resources['image_counts'][word]
-                result.append(imagecount)
-        return result
-
-    def webSearchCountFeature(self, data, args):
-        result = []
-
-        for i in range(0, len(data)):
-            line = data[i]
-            for subst in line[3:len(line)]:
-                word = subst.split(':')[1].strip()
-                pagecount = None
-                if word not in self.resources['page_counts']:
-                    pagecount = self.getPageCount(word)
-                    self.resources['page_counts'][word] = pagecount
-                else:
-                    pagecount = self.resources['page_counts'][word]
-                result.append(pagecount)
-        return result
-
-    def getImageCount(self, word, key):
-        headers = {}
-        headers['Api-Key'] = key
-        tokens = word.strip().split(' ')
-        suffix = ''
-        for token in tokens:
-            suffix += token + '+'
-        suffix = suffix[0:len(suffix)-1]
-
-        #Make HTTP request:
-        url = 'https://api.gettyimages.com/v3/search/images?fields=id&phrase='+suffix
-        req = urllib.request.Request(url=url, headers=headers)
-
-        #Send request:
-        count = None
-        try:
-            f = urllib.request.urlopen(req)
-            data = json.loads(f.read())
-            count = int(data['result_count'])
-        except Exception:
-            count = 0
-        return count
-
-    def getPageCount(self, word):
-        tokens = word.strip().split(' ')
-        suffix = ''
-        for token in tokens:
-            suffix += token + '+'
-        suffix = suffix[0:len(suffix)-1]
-
-        #Make HTTP request:
-        exp = re.compile('class=\"sb_count\"[^>]*>([^<]+)<')
-        url = 'https://www.bing.com/search?q='+suffix
-        req = urllib.request.Request(url=url)
-
-        #Send request:
-        count = None
-        try:
-            f = urllib.request.urlopen(req)
-            data = f.read()
-            result = exp.findall(data)
-            count = int(result[0].strip().split(' ')[0].strip().replace(',', ''))
-        except Exception:
-            count = 0
-        return count
-
-    def morphologicalFeature(self, data, args):
-        dictionary = args[0]
-        result = []
-        for line in data:
-            for subst in line[3:len(line)]:
-                words = subst.strip().split(':')[1].strip()
-                if words in dictionary:
-                    result.append(dictionary[words])
-                else:
-                    result.append(0.0)
-        return result
-
-    def readNgramFile(self, ngram_file):
-        counts = shelve.open(ngram_file, protocol=pickle.HIGHEST_PROTOCOL)
-        return counts
-
-    def isContentWord(self, word, tag):
-        content_tags = set(['JJ', 'JJS', 'JJR', 'NN', 'NNS', 'RB', 'RBR', 'RBS', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ'])
-        if tag in content_tags:
-            return True
-        else:
-            return False
-
-    def addWordVectorValues(self, model, size, orientation):
-        """
-        Adds all the word vector values of a model to the estimator.
-
-        @param model: Path to a binary word vector model.
-        For instructions on how to create the model, please refer to the LEXenstein Manual.
-        @param size: Number of feature values that represent a word in the model.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if model not in self.resources:
-                m = gensim.models.KeyedVectors.load_word2vec_format(model, binary=True)
-                self.resources[model] = m
-            self.features.append((self.wordVectorValuesFeature, [model, size]))
-            for i in range(0, size):
-                self.identifiers.append(('Word Vector Value '+str(i)+' (Model: '+model+')', orientation))
-
-    def addTargetPOSTagProbability(self, condprob_model, pos_model, stanford_tagger, java_path, orientation):
-        """
-        Adds a target POS tag probability feature to the estimator.
-        The value will be the conditional probability between a candidate substitution and the POS tag of a given target word.
-
-        @param condprob_model: Path to a binary conditional probability model.
-        For instructions on how to create the model, please refer to the LEXenstein Manual.
-        @param pos_model: Path to a POS tagging model for the Stanford POS Tagger.
-        The models can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param stanford_tagger: Path to the "stanford-postagger.jar" file.
-        The tagger can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param java_path: Path to the system's "java" executable.
-        Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            os.environ['JAVAHOME'] = java_path
-            if pos_model not in self.resources:
-                tagger = StanfordPOSTagger(pos_model, stanford_tagger)
-                self.resources[pos_model] = tagger
-            if condprob_model not in self.resources:
-                m = pickle.load(open(condprob_model, 'rb'))
-                self.resources[condprob_model] = m
-
-            self.features.append((self.targetPOSTagProbability, [condprob_model, pos_model]))
-            self.identifiers.append(('Target POS Tag Probability (Model:'+str(condprob_model)+')', orientation))
-
-    def addWordVectorSimilarityFeature(self, model, orientation):
-        """
-        Adds a word vector similarity feature to the estimator.
-        The value will be the similarity between the word vector of a target complex word and the word vector of a candidate.
-
-        @param model: Path to a binary word vector model.
-        For instructions on how to create the model, please refer to the LEXenstein Manual.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if model not in self.resources:
-                m = gensim.models.KeyedVectors.load_word2vec_format(model, binary=True)
-                self.resources[model] = m
-            self.features.append((self.wordVectorSimilarityFeature, [model]))
-            self.identifiers.append(('Word Vector Similarity (Model: '+model+')', orientation))
-
-    def addTaggedWordVectorSimilarityFeature(self, model, pos_model, stanford_tagger, java_path, pos_type, orientation):
-        """
-        Adds a tagged word vector similarity feature to the estimator.
-        The value will be the similarity between the word vector of a target complex word and the word vector of a candidate, while accompanied by their POS tags.
-        Each entry in the word vector model must be in the following format: <word>|||<tag>
-        To create a corpus for such model to be trained, one must tag each word in a corpus, and then concatenate words and tags using the aforementioned convention.
-
-        @param model: Path to a binary tagged word vector model.
-        For instructions on how to create the model, please refer to the LEXenstein Manual.
-        @param pos_model: Path to a POS tagging model for the Stanford POS Tagger.
-        The models can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param stanford_tagger: Path to the "stanford-postagger.jar" file.
-        The tagger can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param java_path: Path to the system's "java" executable.
-        Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-        @param pos_type: The type of POS tags to be used.
-        Values supported: treebank, paetzold
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            os.environ['JAVAHOME'] = java_path
-            if model not in self.resources:
-                m = gensim.models.KeyedVectors.load_word2vec_format(model, binary=True)
-                self.resources[model] = m
-            if pos_model not in self.resources:
-                tagger = StanfordPOSTagger(pos_model, stanford_tagger)
-                self.resources[pos_model] = tagger
-            self.features.append((self.taggedWordVectorSimilarityFeature, [model, pos_model, pos_type]))
-            self.identifiers.append(('Word Vector Similarity (Model: '+model+') (POS Model: '+pos_model+') (POS Type: '+pos_type+')', orientation))
-
-    def addTranslationProbabilityFeature(self, translation_probabilities, orientation):
-        """
-        Adds a translation probability feature to the estimator.
-        The value will be the probability of a target complex word of being translated into a given candidate substitution.
-
-        @param translation_probabilities: Path to a shelve file containing translation probabilities.
-        To produce the file, first run the following command through fast_align:
-        fast_align -i <parallel_data> -v -d -o <translation_probabilities_file>
-        Then, produce a shelve file with the "addTranslationProbabilitiesFileToShelve" function from LEXenstein's "util" module.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-        probabilities = self.readNgramFile(translation_probabilities)
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if translation_probabilities not in self.resources:
-                self.resources[translation_probabilities] = probabilities
-            self.features.append((self.translationProbabilityFeature, [translation_probabilities]))
-            self.identifiers.append(('Translation Probability (File: '+translation_probabilities+')', orientation))
-
-    def addLexiconFeature(self, lexicon, orientation):
-        """
-        Adds a lexicon feature to the estimator.
-        The value will be 1 if a given candidate is in the provided lexicon, and 0 otherwise.
-
-        @param lexicon: Path to a file containing the words of the lexicon.
-        The file must have one word per line.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if lexicon not in self.resources:
-                words = set([w.strip() for w in open(lexicon)])
-                self.resources[lexicon] = words
-            self.features.append((self.lexiconFeature, [lexicon]))
-            self.identifiers.append(('Lexicon Occurrence (Lexicon: '+lexicon+')', orientation))
-
-    def addLengthFeature(self, orientation):
-        """
-        Adds a word length feature to the estimator.
-        The value will be the number of characters in each candidate.
-
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            self.features.append((self.lengthFeature, []))
-            self.identifiers.append(('Word Length', orientation))
-
-    def addSyllableFeature(self, mat, orientation):
-        """
-        Adds a syllable count feature to the estimator.
-        The value will be the number of syllables of each candidate.
-
-        @param mat: A configured MorphAdornerToolkit object.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            self.features.append((self.syllableFeature, [mat]))
-            self.identifiers.append(('Syllable Count', orientation))
-
-    def addCollocationalFeature(self, language_model, leftw, rightw, orientation):
-        """
-        Adds a set of collocational features to the estimator.
-        The values will be the language model probabilities of all collocational features selected.
-        Each feature is the probability of an n-gram with 0<=l<=leftw tokens to the left and 0<=r<=rightw tokens to the right.
-        This method creates (leftw+1)*(rightw+1) features.
-
-        @param language_model: Path to the language model from which to extract probabilities.
-        @param leftw: Maximum number of tokens to the left.
-        @param rightw: Maximum number of tokens to the right.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if language_model not in self.resources:
-                model = kenlm.LanguageModel(language_model)
-                self.resources[language_model] = model
-            self.features.append((self.collocationalFeature, [language_model, leftw, rightw]))
-            for i in range(0, leftw+1):
-                for j in range(0, rightw+1):
-                    self.identifiers.append(('Collocational Feature ['+str(i)+', '+str(j)+'] (LM: '+language_model+')', orientation))
-
-    def addFrequencyCollocationalFeature(self, ngram_file, leftw, rightw, orientation):
-        """
-        Adds a set of frequency collocational features to the estimator.
-        The values will be the n-gram frequencies of all collocational features selected.
-        Each feature is the frequency of an n-gram with 0<=l<=leftw tokens to the left and 0<=r<=rightw tokens to the right.
-        This method creates (leftw+1)*(rightw+1) features.
-        To produce the ngram counts file, the user must first acquire a large corpus of text.
-        In sequence, the user can then use SRILM to produce an ngram counts file with the "-write" option.
-        Finally, the user must create a shelve file using the "addNgramCountsFileToShelve" function from the "util" module.
-
-        @param ngram_file: Path to a shelve file containing n-gram frequency counts.
-        @param leftw: Maximum number of tokens to the left.
-        @param rightw: Maximum number of tokens to the right.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if ngram_file not in self.resources:
-                counts = self.readNgramFile(ngram_file)
-                self.resources[ngram_file] = counts
-            self.features.append((self.frequencyCollocationalFeature, [ngram_file, leftw, rightw]))
-            for i in range(0, leftw+1):
-                for j in range(0, rightw+1):
-                    self.identifiers.append(('Frequency Collocational Feature ['+str(i)+', '+str(j)+'] (N-Grams File: '+ngram_file+')', orientation))
-
-    def addTaggedFrequencyCollocationalFeature(self, ngram_file, leftw, rightw, pos_model, stanford_tagger, java_path, pos_type, orientation):
-        """
-        Adds a set of frequency tagged n-gram frequency features to the estimator.
-        The values will be the n-gram frequencies of all tagged collocational features selected.
-        Each feature is the frequency of an n-gram with 0<=l<=leftw tagged tokens to the left and 0<=r<=rightw tagged tokens to the right.
-        This method creates (leftw+1)*(rightw+1) features.
-        This function requires for a special type of ngram counts file.
-        Each n-gram in the file must be composed of n-1 tags, and exactly 1 word.
-        To produce this file, one must first parse a corpus and create a corpus with n-grams in the aforementioned format.
-        The user can then use SRILM to produce an ngram counts file with the "-write" option.
-        Finally, the user must create a shelve file using the "addNgramCountsFileToShelve" function from the "util" module.
-
-        @param ngram_file: Path to a shelve file containing n-gram frequency counts.
-        @param leftw: Maximum number of tokens to the left.
-        @param rightw: Maximum number of tokens to the right.
-        @param pos_model: Path to a POS tagging model for the Stanford POS Tagger.
-        The models can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param stanford_tagger: Path to the "stanford-postagger.jar" file.
-        The tagger can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param java_path: Path to the system's "java" executable.
-        Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-        @param pos_type: The type of POS tags to be used.
-        Values supported: treebank, paetzold
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if ngram_file not in self.resources:
-                counts = self.readNgramFile(ngram_file)
-                self.resources[ngram_file] = counts
-            os.environ['JAVAHOME'] = java_path
-            if pos_model not in self.resources:
-                tagger = StanfordPOSTagger(pos_model, stanford_tagger)
-                self.resources[pos_model] = tagger
-            self.features.append((self.taggedFrequencyCollocationalFeature, [ngram_file, leftw, rightw, pos_model, pos_type]))
-            for i in range(0, leftw+1):
-                for j in range(0, rightw+1):
-                    self.identifiers.append(('Tagged Frequency Collocational Feature ['+str(i)+', '+str(j)+'] (N-Grams File: '+ngram_file+') (POS type: '+pos_type+')', orientation))
-
-    def addBinaryTaggedFrequencyCollocationalFeature(self, ngram_file, leftw, rightw, pos_model, stanford_tagger, java_path, pos_type, orientation):
-        """
-        Adds a set of binary tagged frequency collocational features to the estimator.
-        The values will be the binary n-gram values of all tagged collocational features selected.
-        Each feature is the frequency of an n-gram with 0<=l<=leftw tagged tokens to the left and 0<=r<=rightw tagged tokens to the right.
-        This method creates (leftw+1)*(rightw+1) features.
-        This function requires for a special type of ngram counts file.
-        Each n-gram in the file must be composed of n-1 tags, and exactly 1 word.
-        To produce this file, one must first parse a corpus and create a corpus with n-grams in the aforementioned format.
-        The user can then use SRILM to produce an ngram counts file with the "-write" option.
-        Finally, the user must create a shelve file using the "addNgramCountsFileToShelve" function from the "util" module.
-
-        @param ngram_file: Path to a shelve file containing n-gram frequency counts.
-        @param leftw: Maximum number of tokens to the left.
-        @param rightw: Maximum number of tokens to the right.
-        @param pos_model: Path to a POS tagging model for the Stanford POS Tagger.
-        The models can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param stanford_tagger: Path to the "stanford-postagger.jar" file.
-        The tagger can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param java_path: Path to the system's "java" executable.
-        Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-        @param pos_type: The type of POS tags to be used.
-        Values supported: treebank, paetzold
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if ngram_file not in self.resources:
-                counts = self.readNgramFile(ngram_file)
-                self.resources[ngram_file] = counts
-            os.environ['JAVAHOME'] = java_path
-            if pos_model not in self.resources:
-                tagger = StanfordPOSTagger(pos_model, stanford_tagger)
-                self.resources[pos_model] = tagger
-            self.features.append((self.binaryTaggedFrequencyCollocationalFeature, [ngram_file, leftw, rightw, pos_model, pos_type]))
-            for i in range(0, leftw+1):
-                for j in range(0, rightw+1):
-                    self.identifiers.append(('Binary Tagged Frequency Collocational Feature ['+str(i)+', '+str(j)+'] (N-Grams File: '+ngram_file+') (POS type: '+pos_type+')', orientation))
-
-    def addPopCollocationalFeature(self, language_model, leftw, rightw, orientation):
-        """
-        Adds a set of "pop" collocational features to the estimator.
-        Each feature is the probability of an n-gram with 0<=l<=leftw tokens to the left and 0<=r<=rightw tokens to the right.
-        The value of each feature will be the highest frequency between all "popping" n-gram combinations of one token to the left and right.
-        This method creates (leftw+1)*(rightw+1) features.
-
-        @param language_model: Path to the language model from which to extract probabilities.
-        @param leftw: Maximum number of tokens to the left.
-        @param rightw: Maximum number of tokens to the right.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if language_model not in self.resources:
-                model = kenlm.LanguageModel(language_model)
-                self.resources[language_model] = model
-            self.features.append((self.popCollocationalFeature, [language_model, leftw, rightw]))
-            for i in range(0, leftw+1):
-                for j in range(0, rightw+1):
-                    self.identifiers.append(('Pop Collocational Feature ['+str(i)+', '+str(j)+'] (LM: '+language_model+')', orientation))
-
-    def addNGramProbabilityFeature(self, language_model, leftw, rightw, orientation):
-        """
-        Adds a n-gram probability feature to the estimator.
-        The value will be the language model probability of the n-gram composed by leftw tokens to the left and rightw tokens to the right of a given word.
-
-        @param language_model: Path to the language model from which to extract probabilities.
-        @param leftw: Number of tokens to the left.
-        @param rightw: Number of tokens to the right.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if language_model not in self.resources:
-                model = kenlm.LanguageModel(language_model)
-                self.resources[language_model] = model
-            self.features.append((self.ngramProbabilityFeature, [language_model, leftw, rightw]))
-            self.identifiers.append(('N-Gram Probability Feature ['+str(leftw)+', '+str(rightw)+'] (LM: '+language_model+')', orientation))
-
-    def addNGramFrequencyFeature(self, ngram_file, leftw, rightw, orientation):
-        """
-        Adds a n-gram frequency feature to the estimator.
-        The value will be the the frequency of the n-gram composed by leftw tokens to the left and rightw tokens to the right of a given word.
-        To produce the ngram counts file, the user must first acquire a large corpus of text.
-        In sequence, the user can then use SRILM to produce an ngram counts file with the "-write" option.
-        Finally, the user must create a shelve file using the "addNgramCountsFileToShelve" function from the "util" module.
-
-        @param ngram_file: Path to a shelve file containing n-gram frequency counts.
-        @param leftw: Number of tokens to the left.
-        @param rightw: Number of tokens to the right.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if ngram_file not in self.resources:
-                counts = self.readNgramFile(ngram_file)
-                self.resources[ngram_file] = counts
-            self.features.append((self.ngramFrequencyFeature, [ngram_file, leftw, rightw]))
-            self.identifiers.append(('N-Gram Frequency Feature ['+str(leftw)+', '+str(rightw)+'] (N-grams File: '+ngram_file+')', orientation))
-
-    def addBinaryNGramFrequencyFeature(self, ngram_file, leftw, rightw, orientation):
-        """
-        Adds a binary n-gram frequency feature to the estimator.
-        The value will be 1 if the n-gram composed by leftw tokens to the left and rightw tokens to the right of a given word are in the n-grams file, and 0 otherwise.
-        To produce the ngram counts file, the user must first acquire a large corpus of text.
-        In sequence, the user can then use SRILM to produce an ngram counts file with the "-write" option.
-        Finally, the user must create a shelve file using the "addNgramCountsFileToShelve" function from the "util" module.
-
-        @param ngram_file: Path to a shelve file containing n-gram frequency counts.
-        @param leftw: Number of tokens to the left.
-        @param rightw: Number of tokens to the right.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if ngram_file not in self.resources:
-                counts = self.readNgramFile(ngram_file)
-                self.resources[ngram_file] = counts
-            self.features.append((self.binaryNgramFrequencyFeature, [ngram_file, leftw, rightw]))
-            self.identifiers.append(('Binary N-Gram Probability Feature ['+str(leftw)+', '+str(rightw)+'] (N-grams File: '+ngram_file+')', orientation))
-
-    def addPopNGramProbabilityFeature(self, language_model, leftw, rightw, orientation):
-        """
-        Adds a pop n-gram probability feature to the estimator.
-        The value is the highest probability of the n-gram with leftw tokens to the left and rightw tokens to the right, with a popping window of one token to the left and right.
-
-        @param language_model: Path to the language model from which to extract probabilities.
-        @param leftw: Number of tokens to the left.
-        @param rightw: Number of tokens to the right.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if language_model not in self.resources:
-                model = kenlm.LanguageModel(language_model)
-                self.resources[language_model] = model
-            self.features.append((self.popNgramProbabilityFeature, [language_model, leftw, rightw]))
-            self.identifiers.append(('Pop N-Gram Frequency Feature ['+str(leftw)+', '+str(rightw)+'] (LM: '+language_model+')', orientation))
-
-    def addPopNGramFrequencyFeature(self, ngram_file, leftw, rightw, orientation):
-        """
-        Adds a pop n-gram frequency feature to the estimator.
-        The value is the highest raw frequency count of the n-gram with leftw tokens to the left and rightw tokens to the right, with a popping window of one token to the left and right.
-        To produce the ngram counts file, the user must first acquire a large corpus of text.
-        In sequence, the user can then use SRILM to produce an ngram counts file with the "-write" option.
-        Finally, the user must create a shelve file using the "addNgramCountsFileToShelve" function from the "util" module.
-
-        @param ngram_file: Path to a shelve file containing n-gram frequency counts.
-        @param leftw: Number of tokens to the left.
-        @param rightw: Number of tokens to the right.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if ngram_file not in self.resources:
-                counts = self.readNgramFile(ngram_file)
-                self.resources[ngram_file] = counts
-            self.features.append((self.popNgramFrequencyFeature, [ngram_file, leftw, rightw]))
-            self.identifiers.append(('Pop N-Gram Frequency Feature ['+str(leftw)+', '+str(rightw)+'] (N-grams File: '+ngram_file+')', orientation))
-
-    def addSentenceProbabilityFeature(self, language_model, orientation):
-        """
-        Adds a sentence probability feature to the estimator.
-        The value will be the language model probability of each sentence in the VICTOR corpus with its target complex word replaced by a candidate.
-
-        @param language_model: Path to the language model from which to extract probabilities.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if language_model not in self.resources:
-                model = kenlm.LanguageModel(language_model)
-                self.resources[language_model] = model
-            self.features.append((self.sentenceProbabilityFeature, [language_model]))
-            self.identifiers.append(('Sentence Probability (LM: '+language_model+')', orientation))
-
-    def addReverseSentenceProbabilityFeature(self, language_model, orientation):
-        """
-        Adds a reverse sentence probability feature to the estimator.
-        The value will be the language model probability of each inverted sentence in the VICTOR corpus with its target complex word replaced by a candidate.
-
-        @param language_model: Path to the language model from which to extract probabilities.
-        This language model must be trained over a corpus composed of inverted sentences (Ex: ". sentence a is This").
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if language_model not in self.resources:
-                model = kenlm.LanguageModel(language_model)
-                self.resources[language_model] = model
-            self.features.append((self.reverseSentenceProbabilityFeature, [language_model]))
-            self.identifiers.append(('Reverse Sentence Probability (LM: '+language_model+')', orientation))
-
-    def addPrefixProbabilityFeature(self, language_model, orientation):
-        """
-        Adds a prefix probability feature to the estimator.
-        The value will be the language model probability of all words in each sentence in the VICTOR corpus until the target complex word, while replaced by a candidate.
-
-        @param language_model: Path to the language model from which to extract probabilities.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if language_model not in self.resources:
-                model = kenlm.LanguageModel(language_model)
-                self.resources[language_model] = model
-            self.features.append((self.prefixProbabilityFeature, [language_model]))
-            self.identifiers.append(('Prefix Probability (LM: '+language_model+')', orientation))
-
-    def addReversePrefixProbabilityFeature(self, language_model, orientation):
-        """
-        Adds a reverse prefix probability feature to the estimator.
-        The value will be the language model probability of all words in each inverted sentence in the VICTOR corpus until the target complex word, while replaced by a candidate.
-
-        @param language_model: Path to the language model from which to extract probabilities.
-        This language model must be trained over a corpus composed of inverted sentences (Ex: ". sentence a is This").
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if language_model not in self.resources:
-                model = kenlm.LanguageModel(language_model)
-                self.resources[language_model] = model
-            self.features.append((self.reversePrefixProbabilityFeature, [language_model]))
-            self.identifiers.append(('Reverse Prefix Probability (LM: '+language_model+')', orientation))
-
-    def addSenseCountFeature(self, orientation):
-        """
-        Adds a sense count feature to the estimator.
-        Calculates the number of senses registered in WordNet of a candidate.
-
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            self.features.append((self.senseCount ,[]))
-            self.identifiers.append(('Sense Count', orientation))
-
-    def addSynonymCountFeature(self, orientation):
-        """
-        Adds a synonym count feature to the estimator.
-        Calculates the number of synonyms registered in WordNet of a candidate.
-
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            self.features.append((self.synonymCount ,[]))
-            self.identifiers.append(('Synonym Count', orientation))
-
-    def addIsSynonymFeature(self, orientation):
-        """
-        Adds a synonymy relation feature to the estimator.
-        If a candidate substitution is a synonym of the target word, then it returns 1, if not, it returns 0.
-
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            self.features.append((self.isSynonym ,[]))
-            self.identifiers.append(('Is Synonym', orientation))
-
-    def addHypernymCountFeature(self, orientation):
-        """
-        Adds a hypernym count feature to the estimator.
-        Calculates the number of hypernyms registered in WordNet of a candidate.
-
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            self.features.append((self.hypernymCount ,[]))
-            self.identifiers.append(('Hypernym Count', orientation))
-
-    def addIsHypernymFeature(self, orientation):
-        """
-        Adds a hypernymy relation feature to the estimator.
-        If a candidate substitution is a hypernym of the target word, then it returns 1, if not, it returns 0.
-
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            self.features.append((self.isHypernym ,[]))
-            self.identifiers.append(('Is Hypernym', orientation))
-
-    def addHyponymCountFeature(self, orientation):
-        """
-        Adds a hyponym count feature to the estimator.
-        Calculates the number of hyponyms registered in WordNet of a candidate.
-
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            self.features.append((self.hyponymCount ,[]))
-            self.identifiers.append(('Hyponym Count', orientation))
-
-    def addIsHyponymFeature(self, orientation):
-        """
-        Adds a hyponymy relation feature to the estimator.
-        If a candidate substitution is a hyponym of the target word, then it returns 1, if not, it returns 0.
-
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            self.features.append((self.isHyponym ,[]))
-            self.identifiers.append(('Is Hyponym', orientation))
-
-    def addMinDepthFeature(self, orientation):
-        """
-        Adds a minimum sense depth feature to the estimator.
-        Calculates the minimum distance between two senses of a given candidate.
-
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            self.features.append((self.minDepth ,[]))
-            self.identifiers.append(('Minimal Sense Depth', orientation))
-
-    def addMaxDepthFeature(self, orientation):
-        """
-        Adds a maximum sense depth feature to the estimator.
-        Calculates the maximum distance between two senses of a given candidate.
-
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            self.features.append((self.maxDepth ,[]))
-            self.identifiers.append(('Maximal Sense Depth', orientation))
-
-    def addAverageDepthFeature(self, orientation):
-        """
-        Adds an average sense depth feature to the estimator.
-        Calculates the average distance between two senses of a given candidate.
-
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            self.features.append((self.averageDepth ,[]))
-            self.identifiers.append(('Average Sense Depth', orientation))
-
-    def addSubjectDependencyProbabilityFeature(self, language_model, stanford_parser, dependency_models, java_path, orientation):
-        """
-        Adds a subject dependency probability feature to the estimator.
-        The value will be the average language model probability of all dependency links of which the target word is subject, with the target word replaced by a given candidate.
-        To train the language model used by this feature, one must first extract dependency links from a large corpora of sentences.
-        In sequence, the dependency links must be transformed into the following format: <type_of_dependency_link> <subject_word> <object_word>
-        In the format above, each token is space-separated.
-        Once transformed, the dependency links can then be placed in a text file, one per line.
-        Finally, one can then run any language modelling tool to produce a language model in ARPA format.
-
-        @param language_model: Path to the language model from which to extract dependency link probabilities.
-        @param stanford_parser: Path to the "stanford-parser.jar" file.
-        The parser can be downloaded from the following link: http://nlp.stanford.edu/software/lex-parser.shtml
-        @param dependency_models: Path to a JAR file containing parsing models.
-        The models can be downloaded from the following link: http://nlp.stanford.edu/software/lex-parser.shtml
-        @param java_path: Path to the system's "java" executable.
-        Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if language_model not in self.resources:
-                model = kenlm.LanguageModel(language_model)
-                self.resources[language_model] = model
-            os.environ['JAVAHOME'] = java_path
-            if dependency_models not in self.resources:
-                parser = StanfordParser(path_to_jar=stanford_parser, path_to_models_jar=dependency_models)
-                self.resources[dependency_models] = parser
-            self.features.append((self.subjectDependencyProbabilityFeature, [language_model, dependency_models]))
-            self.identifiers.append(('Subject Dependency Probability Feature (Language Model: '+language_model+') (Models: '+dependency_models+')', orientation))
-
-    def addBinarySubjectDependencyFeature(self, dep_counts_file, stanford_parser, dependency_models, java_path, orientation):
-        """
-        Adds a binary subject dependency feature to the estimator.
-        The value will be 1 if all dependency links of which the target word is subject exist for a given candidate, and 0 otherwise.
-        To produce the dependency link counts file used by this feature, one must first extract dependency links from a large corpora of sentences.
-        In sequence, the dependency links must be transformed into the following format: <type_of_dependency_link> <subject_word> <object_word>
-        In the format above, each token is space-separated.
-        Once transformed, the dependency links can then be placed in a text file, one per line.
-        Finally, one can then run any language modelling tool to produce a language model in ARPA format.
-
-        @param dep_counts_file: Path to a shelve file containing dependency link counts.
-        @param stanford_parser: Path to the "stanford-parser.jar" file.
-        The parser can be downloaded from the following link: http://nlp.stanford.edu/software/lex-parser.shtml
-        @param dependency_models: Path to a JAR file containing parsing models.
-        The models can be downloaded from the following link: http://nlp.stanford.edu/software/lex-parser.shtml
-        @param java_path: Path to the system's "java" executable.
-        Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if dep_counts_file not in self.resources:
-                counts = self.readNgramFile(dep_counts_file)
-                self.resources[dep_counts_file] = counts
-            os.environ['JAVAHOME'] = java_path
-            if dependency_models not in self.resources:
-                parser = StanfordParser(path_to_jar=stanford_parser, path_to_models_jar=dependency_models)
-                self.resources[dependency_models] = parser
-            self.features.append((self.binarySubjectDependencyFeature, [dep_counts_file, dependency_models]))
-            self.identifiers.append(('Binary Subject Dependency Feature (Dependency Link Counts File: '+dep_counts_file+') (Models: '+dependency_models+')', orientation))
-
-    def addSubjectDependencyFrequencyFeature(self, dep_counts_file, stanford_parser, dependency_models, java_path, orientation):
-        """
-        Adds a subject dependency frequency feature to the estimator.
-        The value will be the average raw frequency of all dependency links of which the target word is subject, with the target word replaced by a given candidate.
-        To produce the dependency link counts file used by this feature, one must first extract dependency links from a large corpora of sentences.
-        In sequence, the dependency links must be transformed into the following format: <type_of_dependency_link> <subject_word> <object_word>
-        In the format above, each token is space-separated.
-        Once transformed, the dependency links can then be placed in a text file, one per line.
-        Finally, one can then run any language modelling tool to produce a language model in ARPA format.
-
-        @param dep_counts_file: Path to a shelve file containing dependency link counts.
-        @param stanford_parser: Path to the "stanford-parser.jar" file.
-        The parser can be downloaded from the following link: http://nlp.stanford.edu/software/lex-parser.shtml
-        @param dependency_models: Path to a JAR file containing parsing models.
-        The models can be downloaded from the following link: http://nlp.stanford.edu/software/lex-parser.shtml
-        @param java_path: Path to the system's "java" executable.
-        Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if dep_counts_file not in self.resources:
-                counts = self.readNgramFile(dep_counts_file)
-                self.resources[dep_counts_file] = counts
-            os.environ['JAVAHOME'] = java_path
-            if dependency_models not in self.resources:
-                parser = StanfordParser(path_to_jar=stanford_parser, path_to_models_jar=dependency_models)
-                self.resources[dependency_models] = parser
-            self.features.append((self.subjectDependencyFrequencyFeature, [dep_counts_file, dependency_models]))
-            self.identifiers.append(('Subject Dependency Frequency Feature (Dependency Link Counts File: '+dep_counts_file+') (Models: '+dependency_models+')', orientation))
-
-    def addObjectDependencyProbabilityFeature(self, language_model, stanford_parser, dependency_models, java_path, orientation):
-        """
-        Adds an object dependency probability feature to the estimator.
-        The value will be the average language model probability of all dependency links of which the target word is object, with the target word replaced by a given candidate.
-        To train the language model used by this feature, one must first extract dependency links from a large corpora of sentences.
-        In sequence, the dependency links must be transformed into the following format: <type_of_dependency_link> <subject_word> <object_word>
-        In the format above, each token is space-separated.
-        Once transformed, the dependency links can then be placed in a text file, one per line.
-        Finally, one can then run any language modelling tool to produce a language model in ARPA format.
-
-        @param language_model: Path to the language model from which to extract dependency link probabilities.
-        @param stanford_parser: Path to the "stanford-parser.jar" file.
-        The parser can be downloaded from the following link: http://nlp.stanford.edu/software/lex-parser.shtml
-        @param dependency_models: Path to a JAR file containing parsing models.
-        The models can be downloaded from the following link: http://nlp.stanford.edu/software/lex-parser.shtml
-        @param java_path: Path to the system's "java" executable.
-        Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if language_model not in self.resources:
-                model = kenlm.LanguageModel(language_model)
-                self.resources[language_model] = model
-            os.environ['JAVAHOME'] = java_path
-            if dependency_models not in self.resources:
-                parser = StanfordParser(path_to_jar=stanford_parser, path_to_models_jar=dependency_models)
-                self.resources[dependency_models] = parser
-            self.features.append((self.objectDependencyProbabilityFeature, [language_model, dependency_models]))
-            self.identifiers.append(('Object Dependency Probability Feature (Language Model: '+language_model+') (Models: '+dependency_models+')', orientation))
-
-    def addBinaryObjectDependencyFeature(self, dep_counts_file, stanford_parser, dependency_models, java_path, orientation):
-        """
-        Adds a binary object dependency feature to the estimator.
-        The value will be 1 if all dependency links of which the target word is object exist for a given candidate, and 0 otherwise.
-        To produce the dependency link counts file used by this feature, one must first extract dependency links from a large corpora of sentences.
-        In sequence, the dependency links must be transformed into the following format: <type_of_dependency_link> <subject_word> <object_word>
-        In the format above, each token is space-separated.
-        Once transformed, the dependency links can then be placed in a text file, one per line.
-        Finally, one can then run any language modelling tool to produce a language model in ARPA format.
-
-        @param dep_counts_file: Path to a shelve file containing dependency link counts.
-        @param stanford_parser: Path to the "stanford-parser.jar" file.
-        The parser can be downloaded from the following link: http://nlp.stanford.edu/software/lex-parser.shtml
-        @param dependency_models: Path to a JAR file containing parsing models.
-        The models can be downloaded from the following link: http://nlp.stanford.edu/software/lex-parser.shtml
-        @param java_path: Path to the system's "java" executable.
-        Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if dep_counts_file not in self.resources:
-                counts = self.readNgramFile(dep_counts_file)
-                self.resources[dep_counts_file] = counts
-            os.environ['JAVAHOME'] = java_path
-            if dependency_models not in self.resources:
-                parser = StanfordParser(path_to_jar=stanford_parser, path_to_models_jar=dependency_models)
-                self.resources[dependency_models] = parser
-            self.features.append((self.binaryObjectDependencyFeature, [dep_counts_file, dependency_models]))
-            self.identifiers.append(('Binary Object Dependency Feature (Dependency Link Counts File: '+dep_counts_file+') (Models: '+dependency_models+')', orientation))
-
-    def addObjectDependencyFrequencyFeature(self, dep_counts_file, stanford_parser, dependency_models, java_path, orientation):
-        """
-        Adds an object dependency frequency feature to the estimator.
-        The value will be the average raw frequency of all dependency links of which the target word is object, with the target word replaced by a given candidate.
-        To produce the dependency link counts file used by this feature, one must first extract dependency links from a large corpora of sentences.
-        In sequence, the dependency links must be transformed into the following format: <type_of_dependency_link> <subject_word> <object_word>
-        In the format above, each token is space-separated.
-        Once transformed, the dependency links can then be placed in a text file, one per line.
-        Finally, one can then run any language modelling tool to produce a language model in ARPA format.
-
-        @param dep_counts_file: Path to a shelve file containing dependency link counts.
-        @param stanford_parser: Path to the "stanford-parser.jar" file.
-        The parser can be downloaded from the following link: http://nlp.stanford.edu/software/lex-parser.shtml
-        @param dependency_models: Path to a JAR file containing parsing models.
-        The models can be downloaded from the following link: http://nlp.stanford.edu/software/lex-parser.shtml
-        @param java_path: Path to the system's "java" executable.
-        Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if dep_counts_file not in self.resources:
-                counts = self.readNgramFile(dep_counts_file)
-                self.resources[dep_counts_file] = counts
-            os.environ['JAVAHOME'] = java_path
-            if dependency_models not in self.resources:
-                parser = StanfordParser(path_to_jar=stanford_parser, path_to_models_jar=dependency_models)
-                self.resources[dependency_models] = parser
-            self.features.append((self.objectDependencyFrequencyFeature, [dep_counts_file, dependency_models]))
-            self.identifiers.append(('Object Dependency Frequency Feature (Dependency Link Counts File: '+dep_counts_file+') (Models: '+dependency_models+')', orientation))
-
-    def addAllDependencyProbabilityFeature(self, language_model, stanford_parser, dependency_models, java_path, orientation):
-        """
-        Adds a dependency probability feature to the estimator.
-        The value will be the average language model probability of all the target word's dependency links, with the target word replaced by a given candidate.
-        To train the language model used by this feature, one must first extract dependency links from a large corpora of sentences.
-        In sequence, the dependency links must be transformed into the following format: <type_of_dependency_link> <subject_word> <object_word>
-        In the format above, each token is space-separated.
-        Once transformed, the dependency links can then be placed in a text file, one per line.
-        Finally, one can then run any language modelling tool to produce a language model in ARPA format.
-
-        @param language_model: Path to the language model from which to extract dependency link probabilities.
-        @param stanford_parser: Path to the "stanford-parser.jar" file.
-        The parser can be downloaded from the following link: http://nlp.stanford.edu/software/lex-parser.shtml
-        @param dependency_models: Path to a JAR file containing parsing models.
-        The models can be downloaded from the following link: http://nlp.stanford.edu/software/lex-parser.shtml
-        @param java_path: Path to the system's "java" executable.
-        Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if language_model not in self.resources:
-                model = kenlm.LanguageModel(language_model)
-                self.resources[language_model] = model
-            os.environ['JAVAHOME'] = java_path
-            if dependency_models not in self.resources:
-                parser = StanfordParser(path_to_jar=stanford_parser, path_to_models_jar=dependency_models)
-                self.resources[dependency_models] = parser
-            self.features.append((self.allDependencyProbabilityFeature, [language_model, dependency_models]))
-            self.identifiers.append(('Dependency Probability Feature (Language Model: '+language_model+') (Models: '+dependency_models+')', orientation))
-
-    def addBinaryAllDependencyFeature(self, dep_counts_file, stanford_parser, dependency_models, java_path, orientation):
-        """
-        Adds a binary dependency feature to the estimator.
-        The value will be 1 if all dependency links of the target word exist for a given candidate, and 0 otherwise.
-        To produce the dependency link counts file used by this feature, one must first extract dependency links from a large corpora of sentences.
-        In sequence, the dependency links must be transformed into the following format: <type_of_dependency_link> <subject_word> <object_word>
-        In the format above, each token is space-separated.
-        Once transformed, the dependency links can then be placed in a text file, one per line.
-        Finally, one can then run any language modelling tool to produce a language model in ARPA format.
-
-        @param dep_counts_file: Path to a shelve file containing dependency link counts.
-        @param stanford_parser: Path to the "stanford-parser.jar" file.
-        The parser can be downloaded from the following link: http://nlp.stanford.edu/software/lex-parser.shtml
-        @param dependency_models: Path to a JAR file containing parsing models.
-        The models can be downloaded from the following link: http://nlp.stanford.edu/software/lex-parser.shtml
-        @param java_path: Path to the system's "java" executable.
-        Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if dep_counts_file not in self.resources:
-                counts = self.readNgramFile(dep_counts_file)
-                self.resources[dep_counts_file] = counts
-            os.environ['JAVAHOME'] = java_path
-            if dependency_models not in self.resources:
-                parser = StanfordParser(path_to_jar=stanford_parser, path_to_models_jar=dependency_models)
-                self.resources[dependency_models] = parser
-            self.features.append((self.binaryAllDependencyFeature, [dep_counts_file, dependency_models]))
-            self.identifiers.append(('Binary All Dependency Feature (Dependency Link Counts File: '+dep_counts_file+') (Models: '+dependency_models+')', orientation))
-
-    def addAllDependencyFrequencyFeature(self, dep_counts_file, stanford_parser, dependency_models, java_path, orientation):
-        """
-        Adds a dependency frequency feature to the estimator.
-        The value will be the average raw frequency of all dependency links of the target word, with the target word replaced by a given candidate.
-        To produce the dependency link counts file used by this feature, one must first extract dependency links from a large corpora of sentences.
-        In sequence, the dependency links must be transformed into the following format: <type_of_dependency_link> <subject_word> <object_word>
-        In the format above, each token is space-separated.
-        Once transformed, the dependency links can then be placed in a text file, one per line.
-        Finally, one can then run any language modelling tool to produce a language model in ARPA format.
-
-        @param dep_counts_file: Path to a shelve file containing dependency link counts.
-        @param stanford_parser: Path to the "stanford-parser.jar" file.
-        The parser can be downloaded from the following link: http://nlp.stanford.edu/software/lex-parser.shtml
-        @param dependency_models: Path to a JAR file containing parsing models.
-        The models can be downloaded from the following link: http://nlp.stanford.edu/software/lex-parser.shtml
-        @param java_path: Path to the system's "java" executable.
-        Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if dep_counts_file not in self.resources:
-                counts = self.readNgramFile(dep_counts_file)
-                self.resources[dep_counts_file] = counts
-            os.environ['JAVAHOME'] = java_path
-            if dependency_models not in self.resources:
-                parser = StanfordParser(path_to_jar=stanford_parser, path_to_models_jar=dependency_models)
-                self.resources[dependency_models] = parser
-            self.features.append((self.allDependencyFrequencyFeature, [dep_counts_file, dependency_models]))
-            self.identifiers.append(('All Dependency Frequency Feature (Dependency Link Counts File: '+dep_counts_file+') (Models: '+dependency_models+')', orientation))
-
-    def addWordVectorContextSimilarityFeature(self, model, pos_model, stanford_tagger, java_path, orientation):
-        """
-        Adds a word vector context similarity feature to the estimator.
-        The value will be the average similarity between the word vector of a candidate and the vectors of all content word in the target word's context.
-
-        @param model: Path to a binary word vector model.
-        For instructions on how to create the model, please refer to the LEXenstein Manual.
-        @param pos_model: Path to a POS tagging model for the Stanford POS Tagger.
-        The models can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param stanford_tagger: Path to the "stanford-postagger.jar" file.
-        The tagger can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param java_path: Path to the system's "java" executable.
-        Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if model not in self.resources:
-                m = gensim.models.KeyedVectors.load_word2vec_format(model, binary=True)
-                self.resources[model] = m
-            os.environ['JAVAHOME'] = java_path
-            if pos_model not in self.resources:
-                tagger = StanfordPOSTagger(pos_model, stanford_tagger)
-                self.resources[pos_model] = tagger
-            self.features.append((self.wordVectorContextSimilarityFeature, [model, pos_model]))
-            self.identifiers.append(('Word Vector Context Similarity (Model: '+model+') (POS Model: '+pos_model+')', orientation))
-
-    def addTaggedWordVectorContextSimilarityFeature(self, model, pos_model, stanford_tagger, java_path, pos_type, orientation):
-        """
-        Adds a tagged word vector context similarity feature to the estimator.
-        The value will be the average similarity between the word vector of a candidate and the vectors of all content word in the target word's context.
-        Each entry in the word vector model must be in the following format: <word>|||<tag>
-        To create a corpus for such model to be trained, one must tag each word in a corpus, and then concatenate words and tags using the aforementioned convention.
-
-        @param model: Path to a binary tagged word vector model.
-        For instructions on how to create the model, please refer to the LEXenstein Manual.
-        @param pos_model: Path to a POS tagging model for the Stanford POS Tagger.
-        The models can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param stanford_tagger: Path to the "stanford-postagger.jar" file.
-        The tagger can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param java_path: Path to the system's "java" executable.
-        Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-        @param pos_type: The type of POS tags to be used.
-        Values supported: treebank, paetzold
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if model not in self.resources:
-                m = gensim.models.KeyedVectors.load_word2vec_format(model, binary=True)
-                self.resources[model] = m
-            os.environ['JAVAHOME'] = java_path
-            if pos_model not in self.resources:
-                tagger = StanfordPOSTagger(pos_model, stanford_tagger)
-                self.resources[pos_model] = tagger
-            self.features.append((self.taggedWordVectorContextSimilarityFeature, [model, pos_model, pos_type]))
-            self.identifiers.append(('Tagged Word Vector Context Similarity (Model: '+model+') (POS Model: '+pos_model+') (POS Type: '+pos_type+')', orientation))
-
-    def addNullLinkNominalFeature(self, stanford_parser, dependency_models, java_path, orientation):
-        """
-        Adds a null link nominal feature to the estimator
-        The value will be 1 if there is at least one dependency link of which the candidate is part of, and 0 otherwise.
-
-        @param stanford_parser: Path to the "stanford-parser.jar" file.
-        The parser can be downloaded from the following link: http://nlp.stanford.edu/software/lex-parser.shtml
-        @param dependency_models: Path to a JAR file containing parsing models.
-        The models can be downloaded from the following link: http://nlp.stanford.edu/software/lex-parser.shtml
-        @param java_path: Path to the system's "java" executable.
-        Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            os.environ['JAVAHOME'] = java_path
-            if dependency_models not in self.resources:
-                parser = StanfordParser(path_to_jar=stanford_parser, path_to_models_jar=dependency_models)
-                self.resources[dependency_models] = parser
-
-            self.features.append((self.nullLinkNominalFeature, [dependency_models]))
-            self.identifiers.append(('Null Link Nominal Feature (Models: '+dependency_models+')', orientation))
-
-    def addBackoffBehaviorNominalFeature(self, ngram_file, orientation):
-        """
-        Adds a nominal language model backoff behavior nominal feature to the estimator.
-
-        @param ngram_file: Path to a shelve file containing n-gram frequency counts.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if ngram_file not in self.resources:
-                counts = self.readNgramFile(ngram_file)
-                self.resources[ngram_file] = counts
-
-            self.features.append((self.backoffBehaviorNominalFeature, [ngram_file]))
-            self.identifiers.append(('N-Gram Nominal Feature (N-Grams File: '+ngram_file+')', orientation))
-
-    def addImageSearchCountFeature(self, key, orientation):
-        """
-        Adds an image search count feature to the estimator.
-        The resulting value will be the number of distinct pictures retrieved by the Getty Images API.
-        This feature requires for a free "Connect Embed" key, which gives you access to 5 queries per second, and unlimited queries per day.
-        For more information on how to acquire a key, please visit their website at: https://developer.gettyimages.com
-
-        @param key: Connect Embed key for the Getty Images API.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if key not in self.resources:
-                self.resources['GettyImagesKey'] = key
-            if 'image_counts' not in self.resources:
-                self.resources['image_counts'] = {}
-
-            self.features.append((self.imageSearchCountFeature, [key]))
-            self.identifiers.append(('Image Search Count Feature (Key: '+key+')', orientation))
-
-    def addWebSearchCountFeature(self, orientation):
-        """
-        Adds a web search count feature to the estimator.
-        The resulting value will be the number of websites retrieved by Bing.
-
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if 'page_counts' not in self.resources:
-                self.resources['page_counts'] = {}
-
-            self.features.append((self.webSearchCountFeature, []))
-            self.identifiers.append((' Web Search Count Feature', orientation))
-
-    def addMorphologicalFeature(self, dictionary, description, orientation):
-        """
-        Adds a generalized morphological feature to the estimator.
-        It requires for a dictionary that assigns words to their respective feature values.
-        For each word in a dataset, the value of this feature will be the one found in the dictionar provided, or 0 if it is not available.
-
-        @param dictionary: A dictionary object assigning words to values.
-        Example: dictionary['chair'] = 45.33.
-        @param description: Description of the feature.
-        Example: "Age of Acquisition".
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            self.features.append((self.morphologicalFeature, [dictionary]))
-            self.identifiers.append((description, orientation))
-
-    # Nominal features:
-
-    def addCandidateNominalFeature(self):
-        """
-        Adds a candidate nominal feature to the estimator.
-        """
-        self.features.append((self.candidateNominalFeature, []))
-        self.identifiers.append(('Candidate Nominal Feature', 'Not Applicable'))
-
-    def addNgramNominalFeature(self, leftw, rightw):
-        """
-        Adds a n-gram nominal feature to the estimator.
-
-        @param leftw: Number of tokens to the left.
-        @param rightw: Number of tokens to the right.
-        """
-        self.features.append((self.ngramNominalFeature, [leftw, rightw]))
-        self.identifiers.append(('N-Gram Nominal Feature ['+str(leftw)+', '+str(rightw)+']', 'Not Applicable'))
-
-    def addCandidatePOSNominalFeature(self, pos_model, stanford_tagger, java_path, pos_type):
-        """
-        Adds a candidate POS tag nominal feature to the estimator.
-
-        @param pos_model: Path to a POS tagging model for the Stanford POS Tagger.
-        The models can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param stanford_tagger: Path to the "stanford-postagger.jar" file.
-        The tagger can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param java_path: Path to the system's "java" executable.
-        Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-        @param pos_type: The type of POS tags to be used.
-        Values supported: treebank, paetzold
-        """
-        os.environ['JAVAHOME'] = java_path
-        if pos_model not in self.resources:
-            tagger = StanfordPOSTagger(pos_model, stanford_tagger)
-            self.resources[pos_model] = tagger
-
-        self.features.append((self.candidatePOSNominalFeature, [pos_model, pos_type]))
-        self.identifiers.append(('Candidate POS Nominal Feature (POS Model: '+pos_model+') (POS Type: '+pos_type+')', 'Not Applicable'))
-
-    def addPOSNgramNominalFeature(self, leftw, rightw, pos_model, stanford_tagger, java_path, pos_type):
-        """
-        Adds a POS n-gram nominal feature to the estimator.
-        The n-gram will contain the candidate's POS tag surrounded by the POS tags of neighboring words.
-
-        @param leftw: Number of tokens to the left.
-        @param rightw: Number of tokens to the right.
-        @param pos_model: Path to a POS tagging model for the Stanford POS Tagger.
-        The models can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param stanford_tagger: Path to the "stanford-postagger.jar" file.
-        The tagger can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param java_path: Path to the system's "java" executable.
-        Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-        @param pos_type: The type of POS tags to be used.
-        Values supported: treebank, paetzold
-        """
-        os.environ['JAVAHOME'] = java_path
-        if pos_model not in self.resources:
-            tagger = StanfordPOSTagger(pos_model, stanford_tagger)
-            self.resources[pos_model] = tagger
-
-        self.features.append((self.POSNgramNominalFeature, [leftw, rightw, pos_model, pos_type]))
-        self.identifiers.append(('POS N-gram Nominal Feature ['+str(leftw)+', '+str(rightw)+'] (POS Model: '+pos_model+') (POS Type: '+pos_type+')', 'Not Applicable'))
-
-    def addPOSNgramWithCandidateNominalFeature(self, leftw, rightw, pos_model, stanford_tagger, java_path, pos_type):
-        """
-        Adds a candidate centered POS n-gram nominal feature to the estimator.
-        The n-gram will contain the candidate surrounded by the POS tags of neighboring words.
-
-        @param leftw: Number of tokens to the left.
-        @param rightw: Number of tokens to the right.
-        @param pos_model: Path to a POS tagging model for the Stanford POS Tagger.
-        The models can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param stanford_tagger: Path to the "stanford-postagger.jar" file.
-        The tagger can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param java_path: Path to the system's "java" executable.
-        Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-        @param pos_type: The type of POS tags to be used.
-        Values supported: treebank, paetzold
-        """
-        os.environ['JAVAHOME'] = java_path
-        if pos_model not in self.resources:
-            tagger = StanfordPOSTagger(pos_model, stanford_tagger)
-            self.resources[pos_model] = tagger
-
-        self.features.append((self.POSNgramWithCandidateNominalFeature, [leftw, rightw, pos_model, pos_type]))
-        self.identifiers.append(('POS N-gram with Candidate Nominal Feature ['+str(leftw)+', '+str(rightw)+'] (POS Model: '+pos_model+') (POS Type: '+pos_type+')', 'Not Applicable'))
-
-##############################################################################################################################################################################################################################################
-#Phrase-level LS features:
-##############################################################################################################################################################################################################################################
-
-    def addNumberOfTokensFeature(self, orientation):
-        """
-        Adds a number of tokens feature to the estimator.
-        The value will be the number of tokens in each candidate.
-
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            self.features.append((self.numberOfTokens, []))
-            self.identifiers.append(('Number of Tokens', orientation))
-
-    def addAverageTokenProbabilityFeature(self, language_model, orientation):
-        """
-        Adds an average token probability feature to the estimator.
-        The value will be the average language model probability of all tokens that compose the candidate.
-
-        @param language_model: Path to the language model from which to extract probabilities.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if language_model not in self.resources:
-                model = kenlm.LanguageModel(language_model)
-                self.resources[language_model] = model
-            self.features.append((self.averageTokenProbabilityFeature, [language_model]))
-            self.identifiers.append(('Average Token Probability (LM: '+language_model+')', orientation))
-
-    def addMaximumTokenProbabilityFeature(self, language_model, orientation):
-        """
-        Adds an maximum token probability feature to the estimator.
-        The value will be the maximum language model probability of all tokens that compose the candidate.
-
-        @param language_model: Path to the language model from which to extract probabilities.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if language_model not in self.resources:
-                model = kenlm.LanguageModel(language_model)
-                self.resources[language_model] = model
-            self.features.append((self.maximumTokenProbabilityFeature, [language_model]))
-            self.identifiers.append(('Maximum Token Probability (LM: '+language_model+')', orientation))
-
-    def addMinimumTokenProbabilityFeature(self, language_model, orientation):
-        """
-        Adds an minimum token probability feature to the estimator.
-        The value will be the minimum language model probability of all tokens that compose the candidate.
-
-        @param language_model: Path to the language model from which to extract probabilities.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if language_model not in self.resources:
-                model = kenlm.LanguageModel(language_model)
-                self.resources[language_model] = model
-            self.features.append((self.minimumTokenProbabilityFeature, [language_model]))
-            self.identifiers.append(('Minimum Token Probability (LM: '+language_model+')', orientation))
-
-    def addMinimumWordVectorSimilarityFeature(self, model, orientation):
-        """
-        Adds a minimum word vector similarity feature to the estimator.
-        The value will be the minimum similarity between the word vectors of the words that compose the candidate and the word vector of the target complex word.
-
-        @param model: Path to a binary word vector model.
-        For instructions on how to create the model, please refer to the LEXenstein Manual.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if model not in self.resources:
-                m = gensim.models.KeyedVectors.load_word2vec_format(model, binary=True)
-                self.resources[model] = m
-            self.features.append((self.minimumWordVectorSimilarityFeature, [model]))
-            self.identifiers.append(('Minimum Word Vector Similarity (Model: '+model+')', orientation))
-
-    def addMaximumWordVectorSimilarityFeature(self, model, orientation):
-        """
-        Adds a maximum word vector similarity feature to the estimator.
-        The value will be the maximum similarity between the word vectors of the words that compose the candidate and the word vector of the target complex word.
-
-        @param model: Path to a binary word vector model.
-        For instructions on how to create the model, please refer to the LEXenstein Manual.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if model not in self.resources:
-                m = gensim.models.KeyedVectors.load_word2vec_format(model, binary=True)
-                self.resources[model] = m
-            self.features.append((self.maximumWordVectorSimilarityFeature, [model]))
-            self.identifiers.append(('Maximum Word Vector Similarity (Model: '+model+')', orientation))
-
-    def addAverageWordVectorSimilarityFeature(self, model, orientation):
-        """
-        Adds an average word vector similarity feature to the estimator.
-        The value will be the average similarity between the word vectors of the words that compose the candidate and the word vector of the target complex word.
-
-        @param model: Path to a binary word vector model.
-        For instructions on how to create the model, please refer to the LEXenstein Manual.
-        @param orientation: Whether the feature is a simplicity of complexity measure.
-        Possible values: Complexity, Simplicity.
-        """
-
-        if orientation not in ['Complexity', 'Simplicity']:
-            logger.debug('Orientation must be Complexity or Simplicity')
-        else:
-            if model not in self.resources:
-                m = gensim.models.KeyedVectors.load_word2vec_format(model, binary=True)
-                self.resources[model] = m
-            self.features.append((self.averageWordVectorSimilarityFeature, [model]))
-            self.identifiers.append(('Average Word Vector Similarity (Model: '+model+')', orientation))
diff --git a/lexi/lib/lexenstein/generators.py b/lexi/lib/lexenstein/generators.py
deleted file mode 100755
index 86cae5f..0000000
--- a/lexi/lib/lexenstein/generators.py
+++ /dev/null
@@ -1,2129 +0,0 @@
-import xml.etree.ElementTree as ET
-import re
-import urllib2 as urllib
-import subprocess
-import nltk
-from nltk.tag.stanford import StanfordPOSTagger
-import kenlm
-import codecs
-import os
-import gensim
-from nltk.corpus import wordnet as wn
-from nltk.stem.wordnet import WordNetLemmatizer
-from nltk.stem.porter import PorterStemmer
-
-
-
-class PaetzoldGenerator:
-
-    def __init__(self, posw2vmodel, nc, pos_model, stanford_tagger, java_path):
-        """
-        Creates a PaetzoldGenerator instance.
-
-        @param posw2vmodel: Binary parsed word vector model.
-        For more information on how to produce the model, please refer to the LEXenstein Manual.
-        @param nc: NorvigCorrector object.
-        @param pos_model: Path to a POS tagging model for the Stanford POS Tagger.
-        The models can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param stanford_tagger: Path to the "stanford-postagger.jar" file.
-        The tagger can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param java_path: Path to the system's "java" executable.
-        Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-        """
-        self.lemmatizer = WordNetLemmatizer()
-        self.stemmer = PorterStemmer()
-        self.model = gensim.models.KeyedVectors.load_word2vec_format(posw2vmodel, binary=True)
-        self.nc = nc
-        os.environ['JAVAHOME'] = java_path
-        self.tagger = StanfordPOSTagger(pos_model, stanford_tagger)
-
-    def getSubstitutions(self, victor_corpus, amount):
-        """
-        Generates substitutions for the target words of a corpus in VICTOR format.
-
-        @param victor_corpus: Path to a corpus in the VICTOR format.
-        For more information about the file's format, refer to the LEXenstein Manual.
-        @return: A dictionary that assigns target complex words to sets of candidate substitutions.
-        Example: substitutions['perched'] = {'sat', 'roosted'}
-        """
-        #Get candidate->pos map:
-        tagged_sents = self.getParsedSentences(victor_corpus)
-
-        #Get initial set of substitutions:
-        substitutions = self.getInitialSet(victor_corpus, tagged_sents, amount)
-        return substitutions
-
-    def getParsedSentences(self, victor_corpus):
-        lexf = open(victor_corpus)
-        sents = []
-        for line in lexf:
-            data = line.strip().split('\t')
-            sent = data[0].strip().split(' ')
-            sents.append(sent)
-        lexf.close()
-
-        tagged_sents = self.tagger.tag_sents(sents)
-        return tagged_sents
-
-    def getInitialSet(self, victor_corpus, tsents, amount):
-        lexf = open(victor_corpus)
-        data = []
-        for line in lexf:
-            d = line.strip().split('\t')
-            data.append(d)
-        lexf.close()
-
-        trgs = []
-        trgsc = []
-        trgsstems = []
-        trgslemmas = []
-        trgscstems = []
-        trgsclemmas = []
-        for i in range(0, len(data)):
-            d = data[i]
-            tags = tsents[i]
-            target = d[1].strip().lower()
-            head = int(d[2].strip())
-            tag = self.getClass(tags[head][1])
-            targetc = self.nc.correct(target)
-            trgs.append(target)
-            trgsc.append(targetc)
-        trgslemmas = self.lemmatizeWords(trgs)
-        trgsclemmas = self.lemmatizeWords(trgsc)
-        trgsstems = self.stemWords(trgs)
-        trgscstems = self.stemWords(trgsc)
-        trgmap = {}
-        for i in range(0, len(trgslemmas)):
-            target = data[i][1].strip().lower()
-            head = int(data[i][2].strip())
-            tag = self.getClass(tsents[i][head][1])
-            lemma = trgslemmas[i]
-            stem = trgsstems[i]
-            trgmap[target] = (lemma, stem)
-
-        subs = []
-        cands = set([])
-        for i in range(0, len(data)):
-            d = data[i]
-
-            t = trgs[i]
-            tstem = trgsstems[i]
-            tlemma = trgslemmas[i]
-            tc = trgsc[i]
-            tcstem = trgscstems[i]
-            tclemma = trgsclemmas[i]
-
-            tags = tsents[i]
-            head = int(d[2].strip())
-            tag = tags[head][1]
-
-            word = t+'|||'+self.getClass(tag)
-            wordc = tc+'|||'+self.getClass(tag)
-
-            most_sim = []
-            try:
-                most_sim = self.model.most_similar(positive=[word], topn=50)
-            except KeyError:
-                try:
-                    most_sim = self.model.most_similar(positive=[wordc], topn=50)
-                except KeyError:
-                    most_sim = []
-
-            subs.append([word[0] for word in most_sim])
-
-        subsr = subs
-        subs = []
-        for l in subsr:
-            lr = []
-            for inst in l:
-                cand = inst.split('|||')[0].strip()
-                encc = None
-                try:
-                    encc = cand.encode('ascii')
-                except Exception:
-                    encc = None
-                if encc:
-                    cands.add(cand)
-                    lr.append(inst)
-            subs.append(lr)
-
-        cands = list(cands)
-        candslemmas = self.lemmatizeWords(cands)
-        candsstems = self.stemWords(cands)
-        candmap = {}
-        for i in range(0, len(cands)):
-            cand = cands[i]
-            lemma = candslemmas[i]
-            stem = candsstems[i]
-            candmap[cand] = (lemma, stem)
-
-        subs_filtered = self.filterSubs(data, tsents, subs, candmap, trgs, trgsc, trgsstems, trgscstems, trgslemmas, trgsclemmas)
-
-        final_cands = {}
-        for i in range(0, len(data)):
-            target = data[i][1]
-            cands = subs_filtered[i][0:min(amount, subs_filtered[i])]
-            cands = [str(word.split('|||')[0].strip()) for word in cands]
-            if target not in final_cands:
-                final_cands[target] = set([])
-            final_cands[target].update(set(cands))
-
-        return final_cands
-
-    def lemmatizeWords(self, words):
-        result = []
-        for word in words:
-            result.append(self.lemmatizer.lemmatize(word))
-        return result
-
-    def stemWords(self, words):
-        result = []
-        for word in words:
-            result.append(self.stemmer.stem(word))
-        return result
-
-    def filterSubs(self, data, tsents, subs, candmap, trgs, trgsc, trgsstems, trgscstems, trgslemmas, trgsclemmas):
-        result = []
-        for i in range(0, len(data)):
-            d = data[i]
-
-            t = trgs[i]
-            tstem = trgsstems[i]
-            tlemma = trgslemmas[i]
-            tc = trgsc[i]
-            tcstem = trgscstems[i]
-            tclemma = trgsclemmas[i]
-
-            tags = tsents[i]
-            head = int(d[2].strip())
-            tag = self.getClass(tags[head][1])
-
-            word = t+'|||'+self.getClass(tag)
-            wordc = tc+'|||'+self.getClass(tag)
-
-            most_sim = subs[i]
-            most_simf = []
-
-            for cand in most_sim:
-                candd = cand.split('|||')
-                cword = candd[0].strip()
-                ctag = candd[1].strip()
-                clemma = candmap[cword][0]
-                cstem = candmap[cword][1]
-
-                if ctag==tag:
-                    if clemma!=tlemma and clemma!=tclemma and cstem!=tstem and cstem!=tcstem:
-                        if cword not in t and cword not in tc and t not in cword and tc not in cword:
-                            most_simf.append(cand)
-
-            class_filtered = []
-            for cand in most_simf:
-                candd = cand.split('|||')
-                cword = candd[0].strip()
-                ctag = candd[1].strip()
-                clemma = candmap[cword][0]
-                cstem = candmap[cword][1]
-
-                if tag=='V':
-                    if (t.endswith('ing') or tc.endswith('ing')) and cword.endswith('ing'):
-                        class_filtered.append(cand)
-                    elif (t.endswith('d') or tc.endswith('d')) and cword.endswith('d'):
-                        class_filtered.append(cand)
-                else:
-                    class_filtered.append(cand)
-
-            result.append(most_simf)
-        return result
-
-    def getClass(self, tag):
-        result = None
-        if tag.startswith('N'):
-            result = 'N'
-        elif tag.startswith('V'):
-            result = 'V'
-        elif tag.startswith('RB'):
-            result = 'A'
-        elif tag.startswith('J'):
-            result = 'J'
-        elif tag.startswith('W'):
-            result = 'W'
-        elif tag.startswith('PRP'):
-            result = 'P'
-        else:
-            result = tag.strip()
-        return result
-
-class GlavasGenerator:
-
-    def __init__(self, w2vmodel):
-        """
-        Creates a GlavasGenerator instance.
-
-        @param w2vmodel: Binary parsed word vector model.
-        For more information on how to produce the model, please refer to the LEXenstein Manual.
-        """
-        self.lemmatizer = WordNetLemmatizer()
-        self.stemmer = PorterStemmer()
-        self.model = gensim.models.KeyedVectors.load_word2vec_format(w2vmodel, binary=True)
-
-    def getSubstitutions(self, victor_corpus, amount):
-        """
-        Generates substitutions for the target words of a corpus in VICTOR format.
-
-        @param victor_corpus: Path to a corpus in the VICTOR format.
-        For more information about the file's format, refer to the LEXenstein Manual.
-        @return: A dictionary that assigns target complex words to sets of candidate substitutions.
-        Example: substitutions['perched'] = {'sat', 'roosted'}
-        """
-
-        #Get initial set of substitutions:
-        substitutions = self.getInitialSet(victor_corpus, amount)
-        return substitutions
-
-    def getInitialSet(self, victor_corpus, amount):
-        lexf = open(victor_corpus)
-        data = []
-        for line in lexf:
-            d = line.strip().split('\t')
-            data.append(d)
-        lexf.close()
-
-        trgs = []
-        trgsstems = []
-        trgslemmas = []
-        for i in range(0, len(data)):
-            d = data[i]
-            target = d[1].strip().lower()
-            head = int(d[2].strip())
-            trgs.append(target)
-        trgslemmas = self.lemmatizeWords(trgs)
-        trgsstems = self.stemWords(trgs)
-
-        trgmap = {}
-        for i in range(0, len(trgslemmas)):
-            target = data[i][1].strip().lower()
-            head = int(data[i][2].strip())
-            lemma = trgslemmas[i]
-            stem = trgsstems[i]
-            trgmap[target] = (lemma, stem)
-
-        subs = []
-        cands = set([])
-        for i in range(0, len(data)):
-            d = data[i]
-
-            t = trgs[i]
-            tstem = trgsstems[i]
-            tlemma = trgslemmas[i]
-
-            word = t
-
-            most_sim = []
-            try:
-                most_sim = self.model.most_similar(positive=[word], topn=50)
-            except KeyError:
-                most_sim = []
-
-            subs.append([word[0] for word in most_sim])
-
-        subsr = subs
-        subs = []
-        for l in subsr:
-            lr = []
-            for inst in l:
-                cand = inst.split('|||')[0].strip()
-                encc = None
-                try:
-                    encc = cand.encode('ascii')
-                except Exception:
-                    encc = None
-                if encc:
-                    cands.add(cand)
-                    lr.append(inst)
-            subs.append(lr)
-
-        cands = list(cands)
-        candslemmas = self.lemmatizeWords(cands)
-        candsstems = self.stemWords(cands)
-        candmap = {}
-        for i in range(0, len(cands)):
-            cand = cands[i]
-            lemma = candslemmas[i]
-            stem = candsstems[i]
-            candmap[cand] = (lemma, stem)
-
-        subs_filtered = self.filterSubs(data, subs, candmap, trgs, trgsstems, trgslemmas)
-
-        final_cands = {}
-        for i in range(0, len(data)):
-            target = data[i][1]
-            cands = subs_filtered[i][0:min(amount, subs_filtered[i])]
-            cands = [str(word.split('|||')[0].strip()) for word in cands]
-            if target not in final_cands:
-                final_cands[target] = set([])
-            final_cands[target].update(set(cands))
-
-        return final_cands
-
-    def lemmatizeWords(self, words):
-        result = []
-        for word in words:
-            result.append(self.lemmatizer.lemmatize(word))
-        return result
-
-    def stemWords(self, words):
-        result = []
-        for word in words:
-            result.append(self.stemmer.stem(word))
-        return result
-
-    def filterSubs(self, data, subs, candmap, trgs, trgsstems, trgslemmas):
-        result = []
-        for i in range(0, len(data)):
-            d = data[i]
-
-            t = trgs[i]
-            tstem = trgsstems[i]
-            tlemma = trgslemmas[i]
-
-            word = t
-
-            most_sim = subs[i]
-            most_simf = []
-
-            for cand in most_sim:
-                cword = cand
-                clemma = candmap[cword][0]
-                cstem = candmap[cword][1]
-
-                if clemma!=tlemma and cstem!=tstem:
-                    most_simf.append(cand)
-
-            result.append(most_simf)
-        return result
-
-class KauchakGenerator:
-
-    def __init__(self, mat, parallel_pos_file, alignments_file, stop_words, nc):
-        """
-        Creates a KauchakGenerator instance.
-
-        @param mat: MorphAdornerToolkit object.
-        @param parallel_pos_file: Path to the parsed parallel corpus from which to extract substitutions.
-        For more information about the file's format, refer to the LEXenstein Manual.
-        @param alignments_file: Path to the alignments for the parsed parallel corpus from which to extract substitutions.
-        For more information about the file's format, refer to the LEXenstein Manual.
-        @param stop_words: Path to the file containing stop words of the desired language.
-        The file must contain one stop word per line.
-        @param nc: NorvigCorrector object.
-        """
-        self.mat = mat
-        self.parallel_pos_file = parallel_pos_file
-        self.alignments_file = alignments_file
-        self.stop_words = set([word.strip() for word in open(stop_words)])
-        self.nc = nc
-
-    def getSubstitutions(self, victor_corpus):
-        """
-        Generates substitutions for the target words of a corpus in VICTOR format.
-
-        @param victor_corpus: Path to a corpus in the VICTOR format.
-        For more information about the file's format, refer to the LEXenstein Manual.
-        @return: A dictionary that assigns target complex words to sets of candidate substitutions.
-        Example: substitutions['perched'] = {'sat', 'roosted'}
-        """
-        #Get candidate->pos map:
-        print('Getting POS map...')
-        target_pos = self.getPOSMap(victor_corpus)
-
-        #Get initial set of substitutions:
-        print('Getting initial set of substitutions...')
-        substitutions_initial = self.getInitialSet(victor_corpus, target_pos)
-
-        #Get final substitutions:
-        print('Inflecting substitutions...')
-        substitutions_inflected = self.getInflectedSet(substitutions_initial)
-
-        #Return final set:
-        print('Finished!')
-        return substitutions_inflected
-
-    def getInitialSet(self, victor_corpus, pos_map):
-        substitutions_initial = {}
-
-        targets = set([line.strip().split('\t')[1].strip() for line in open(victor_corpus)])
-
-        fparallel = open(self.parallel_pos_file)
-        falignments = open(self.alignments_file)
-
-        for line in fparallel:
-            data = line.strip().split('\t')
-            source = data[0].strip().split(' ')
-            target = data[1].strip().split(' ')
-
-            alignments = set(falignments.readline().strip().split(' '))
-
-            for alignment in alignments:
-                adata = alignment.strip().split('-')
-                left = int(adata[0].strip())
-                right = int(adata[1].strip())
-                leftraw = source[left].strip()
-                leftp = leftraw.split('|||')[1].strip().lower()
-                leftw = leftraw.split('|||')[0].strip()
-                rightraw = target[right].strip()
-                rightp = rightraw.split('|||')[1].strip().lower()
-                rightw = rightraw.split('|||')[0].strip()
-
-                if len(leftw)>0 and len(rightw)>0 and leftp!='nnp' and rightp!='nnp' and rightp==leftp and leftw not in self.stop_words and rightw not in self.stop_words and leftw!=rightw:
-                        if leftw in substitutions_initial:
-                            if leftp in substitutions_initial[leftw]:
-                                substitutions_initial[leftw][leftp].add(rightw)
-                            else:
-                                substitutions_initial[leftw][leftp] = set([rightw])
-                        else:
-                            substitutions_initial[leftw] = {leftp:set([rightw])}
-        fparallel.close()
-        falignments.close()
-        return substitutions_initial
-
-    def getPOSMap(self, path):
-        result = {}
-        lex = open(path)
-        for line in lex:
-            data = line.strip().split('\t')
-            sent = data[0].strip().lower().split(' ')
-            target = data[1].strip().lower()
-            head = int(data[2].strip())
-
-            posd = nltk.pos_tag(sent)
-            postarget = posd[head][1].lower().strip()
-            if target in result:
-                result[target].add(postarget)
-            else:
-                result[target] = set([postarget])
-        lex.close()
-        return result
-
-    def getInflectedSet(self, result):
-        final_substitutions = {}
-
-        #Get inflections:
-        allkeys = sorted(list(result.keys()))
-
-        singulars = {}
-        plurals = {}
-        verbs = {}
-
-        singularsk = {}
-        pluralsk = {}
-        verbsk = {}
-
-        for i in range(0, len(allkeys)):
-            key = allkeys[i]
-            leftw = key
-
-            for leftp in result[leftw]:
-                if leftp.startswith('n'):
-                    if leftp=='nns':
-                        pluralsk[leftw] = set([])
-                        for subst in result[key][leftp]:
-                            plurals[subst] = set([])
-                    else:
-                        singularsk[leftw] = set([])
-                        for subst in result[key][leftp]:
-                            singulars[subst] = set([])
-                elif leftp.startswith('v'):
-                    verbsk[leftw] = {}
-                    for subst in result[key][leftp]:
-                        verbs[subst] = {}
-
-        #------------------------------------------------------------------------------------------------
-
-        #Generate keys input:
-        singkeys = sorted(list(singularsk.keys()))
-        plurkeys = sorted(list(pluralsk.keys()))
-        verbkeys = sorted(list(verbsk.keys()))
-
-        #Get stems:
-        singstems, plurstems, verbstems = self.getStems(singkeys, plurkeys, verbkeys)
-
-        #Get plurals:
-        singres = self.getPlurals(singstems)
-
-        #Get singulars:
-        plurres = self.getSingulars(plurstems)
-
-        #Get verb inflections:
-        verbres1, verbres2, verbres3, verbres4, verbres5 = self.getInflections(verbstems)
-
-        #Add information to dictionaries:
-        for i in range(0, len(singkeys)):
-            k = singkeys[i]
-            singre = singres[i]
-            singularsk[k] = singre
-        for i in range(0, len(plurkeys)):
-            k = plurkeys[i]
-            plurre = plurres[i]
-            pluralsk[k] = plurre
-        for i in range(0, len(verbkeys)):
-            k = verbkeys[i]
-            verbre1 = verbres1[i]
-            verbre2 = verbres2[i]
-            verbre3 = verbres3[i]
-            verbre4 = verbres4[i]
-            verbre5 = verbres5[i]
-            verbsk[k] = {'PAST_PERFECT_PARTICIPLE': verbre1, 'PAST_PARTICIPLE': verbre2, 'PRESENT_PARTICIPLE': verbre3, 'PRESENT': verbre4, 'PAST': verbre5}
-
-        #------------------------------------------------------------------------------------------------
-
-        #Generate substs input:
-        singkeys = sorted(list(singulars.keys()))
-        plurkeys = sorted(list(plurals.keys()))
-        verbkeys = sorted(list(verbs.keys()))
-
-        #Get stems:
-        singstems, plurstems, verbstems = self.getStems(singkeys, plurkeys, verbkeys)
-
-        #Get plurals:
-        singres = self.getPlurals(singstems)
-
-        #Get singulars:
-        plurres = self.getSingulars(plurstems)
-
-        #Get verb inflections:
-        verbres1, verbres2, verbres3, verbres4, verbres5 = self.getInflections(verbstems)
-
-        #Add information to dictionaries:
-        for i in range(0, len(singkeys)):
-            k = singkeys[i]
-            singre = singres[i]
-            singulars[k] = singre
-        for i in range(0, len(plurkeys)):
-            k = plurkeys[i]
-            plurre = plurres[i]
-            plurals[k] = plurre
-        for i in range(0, len(verbkeys)):
-            k = verbkeys[i]
-            verbre1 = verbres1[i]
-            verbre2 = verbres2[i]
-            verbre3 = verbres3[i]
-            verbre4 = verbres4[i]
-            verbre5 = verbres5[i]
-            verbs[k] = {'PAST_PERFECT_PARTICIPLE': verbre1, 'PAST_PARTICIPLE': verbre2, 'PRESENT_PARTICIPLE': verbre3, 'PRESENT': verbre4, 'PAST': verbre5}
-
-        #------------------------------------------------------------------------------------------------
-
-        #Generate final substitution list:
-        for i in range(0, len(allkeys)):
-            key = allkeys[i]
-            leftw = key
-            for leftp in result[leftw]:
-
-                #Add final version to candidates:
-                if leftw not in final_substitutions:
-                    final_substitutions[leftw] = result[key][leftp]
-                else:
-                    final_substitutions[leftw] = final_substitutions[leftw].union(result[key][leftp])
-                #If left is a noun:
-                if leftp.startswith('n'):
-                    #If it is a plural:
-                    if leftp=='nns':
-                        plurl = pluralsk[leftw]
-                        newcands = set([])
-                        for candidate in result[key][leftp]:
-                            candplurl = plurals[candidate]
-                            newcands.add(candplurl)
-                        if plurl not in final_substitutions:
-                            final_substitutions[plurl] = newcands
-                        else:
-                            final_substitutions[plurl] = final_substitutions[plurl].union(newcands)
-                    #If it is singular:
-                    else:
-                        singl = singularsk[leftw]
-                        newcands = set([])
-                        for candidate in result[key][leftp]:
-                            candsingl = singulars[candidate]
-                            newcands.add(candsingl)
-                        if singl not in final_substitutions:
-                            final_substitutions[singl] = newcands
-                        else:
-                            final_substitutions[singl] = final_substitutions[singl].union(newcands)
-                #If left is a verb:
-                elif leftp.startswith('v'):
-                    for verb_tense in ['PAST_PERFECT_PARTICIPLE', 'PAST_PARTICIPLE', 'PRESENT_PARTICIPLE', 'PRESENT', 'PAST']:
-                        tensedl = verbsk[leftw][verb_tense]
-                        newcands = set([])
-                        for candidate in result[key][leftp]:
-                            candtensedl = verbs[candidate][verb_tense]
-                            newcands.add(candtensedl)
-                        if tensedl not in final_substitutions:
-                            final_substitutions[tensedl] = newcands
-                        else:
-                            final_substitutions[tensedl] = final_substitutions[tensedl].union(newcands)
-        return final_substitutions
-
-    def getInflections(self, verbstems):
-        data1 = self.mat.conjugateVerbs(verbstems, 'PAST_PERFECT_PARTICIPLE', 'FIRST_PERSON_SINGULAR')
-        data2 = self.mat.conjugateVerbs(verbstems, 'PAST_PARTICIPLE', 'FIRST_PERSON_SINGULAR')
-        data3 = self.mat.conjugateVerbs(verbstems, 'PRESENT_PARTICIPLE', 'FIRST_PERSON_SINGULAR')
-        data4 = self.mat.conjugateVerbs(verbstems, 'PRESENT', 'FIRST_PERSON_SINGULAR')
-        data5 = self.mat.conjugateVerbs(verbstems, 'PAST', 'FIRST_PERSON_SINGULAR')
-        return self.correctWords(data1), self.correctWords(data2), self.correctWords(data3), self.correctWords(data4), self.correctWords(data5)
-
-    def getSingulars(self, plurstems):
-        data = self.mat.inflectNouns(plurstems, 'singular')
-        return self.correctWords(data)
-
-    def getPlurals(self, singstems):
-        data = self.mat.inflectNouns(singstems, 'plural')
-        return self.correctWords(data)
-
-    def getStems(self, sings, plurs, verbs):
-        data = self.mat.lemmatizeWords(sings+plurs+verbs)
-        rsings = []
-        rplurs = []
-        rverbs = []
-        c = -1
-        for sing in sings:
-            c += 1
-            if len(data[c])>0:
-                rsings.append(data[c])
-            else:
-                rsings.append(sing)
-        for plur in plurs:
-            c += 1
-            if len(data[c])>0:
-                rplurs.append(data[c])
-            else:
-                rplurs.append(plur)
-        for verb in verbs:
-            c += 1
-            if len(data[c])>0:
-                rverbs.append(data[c])
-            else:
-                rverbs.append(verb)
-        return self.correctWords(rsings), self.correctWords(rplurs), self.correctWords(rverbs)
-
-    def correctWords(self, words):
-        result = []
-        for word in words:
-            result.append(self.nc.correct(word))
-        return result
-
-class YamamotoGenerator:
-
-    def __init__(self, mat, dictionary_key, nc):
-        """
-        Creates a YamamotoGenerator instance.
-
-        @param mat: MorphAdornerToolkit object.
-        @param dictionary_key: Key for the Merriam Dictionary.
-        @param nc: NorvigCorrector object.
-        For more information on how to get the key for free, please refer to the LEXenstein Manual
-        """
-        self.mat = mat
-        self.dictionary_key = dictionary_key
-        self.nc = nc
-
-    def getSubstitutions(self, victor_corpus):
-        """
-        Generates substitutions for the target words of a corpus in VICTOR format.
-
-        @param victor_corpus: Path to a corpus in the VICTOR format.
-        For more information about the file's format, refer to the LEXenstein Manual.
-        @return: A dictionary that assigns target complex words to sets of candidate substitutions.
-        Example: substitutions['perched'] = {'sat', 'roosted'}
-        """
-        #Get initial set of substitutions:
-        print('Getting initial set of substitutions...')
-        substitutions_initial = self.getInitialSet(victor_corpus)
-
-        #Get final substitutions:
-        print('Inflecting substitutions...')
-        substitutions_inflected = self.getInflectedSet(substitutions_initial)
-
-        #Return final set:
-        print('Finished!')
-        return substitutions_inflected
-
-    def getInflectedSet(self, result):
-        final_substitutions = {}
-
-        #Get inflections:
-        allkeys = sorted(list(result.keys()))
-
-        singulars = {}
-        plurals = {}
-        verbs = {}
-
-        singularsk = {}
-        pluralsk = {}
-        verbsk = {}
-
-        for i in range(0, len(allkeys)):
-            key = allkeys[i]
-            leftw = key
-
-            for leftp in result[leftw]:
-                if leftp.startswith('n'):
-                    if leftp=='nns':
-                        pluralsk[leftw] = set([])
-                        for subst in result[key][leftp]:
-                            plurals[subst] = set([])
-                    else:
-                        singularsk[leftw] = set([])
-                        for subst in result[key][leftp]:
-                            singulars[subst] = set([])
-                elif leftp.startswith('v'):
-                    verbsk[leftw] = {}
-                    for subst in result[key][leftp]:
-                        verbs[subst] = {}
-
-        #------------------------------------------------------------------------------------------------
-
-        #Generate keys input:
-        singkeys = sorted(list(singularsk.keys()))
-        plurkeys = sorted(list(pluralsk.keys()))
-        verbkeys = sorted(list(verbsk.keys()))
-
-        #Get stems:
-        singstems, plurstems, verbstems = self.getStems(singkeys, plurkeys, verbkeys)
-
-        #Get plurals:
-        singres = self.getPlurals(singstems)
-
-        #Get singulars:
-        plurres = self.getSingulars(plurstems)
-
-        #Get verb inflections:
-        verbres1, verbres2, verbres3, verbres4, verbres5 = self.getInflections(verbstems)
-
-        #Add information to dictionaries:
-        for i in range(0, len(singkeys)):
-            k = singkeys[i]
-            singre = singres[i]
-            singularsk[k] = singre
-        for i in range(0, len(plurkeys)):
-            k = plurkeys[i]
-            plurre = plurres[i]
-            pluralsk[k] = plurre
-        for i in range(0, len(verbkeys)):
-            k = verbkeys[i]
-            verbre1 = verbres1[i]
-            verbre2 = verbres2[i]
-            verbre3 = verbres3[i]
-            verbre4 = verbres4[i]
-            verbre5 = verbres5[i]
-            verbsk[k] = {'PAST_PERFECT_PARTICIPLE': verbre1, 'PAST_PARTICIPLE': verbre2, 'PRESENT_PARTICIPLE': verbre3, 'PRESENT': verbre4, 'PAST': verbre5}
-
-        #------------------------------------------------------------------------------------------------
-
-        #Generate substs input:
-        singkeys = sorted(list(singulars.keys()))
-        plurkeys = sorted(list(plurals.keys()))
-        verbkeys = sorted(list(verbs.keys()))
-
-        #Get stems:
-        singstems, plurstems, verbstems = self.getStems(singkeys, plurkeys, verbkeys)
-
-        #Get plurals:
-        singres = self.getPlurals(singstems)
-
-        #Get singulars:
-        plurres = self.getSingulars(plurstems)
-
-        #Get verb inflections:
-        verbres1, verbres2, verbres3, verbres4, verbres5 = self.getInflections(verbstems)
-
-        #Add information to dictionaries:
-        for i in range(0, len(singkeys)):
-            k = singkeys[i]
-            singre = singres[i]
-            singulars[k] = singre
-        for i in range(0, len(plurkeys)):
-            k = plurkeys[i]
-            plurre = plurres[i]
-            plurals[k] = plurre
-        for i in range(0, len(verbkeys)):
-            k = verbkeys[i]
-            verbre1 = verbres1[i]
-            verbre2 = verbres2[i]
-            verbre3 = verbres3[i]
-            verbre4 = verbres4[i]
-            verbre5 = verbres5[i]
-            verbs[k] = {'PAST_PERFECT_PARTICIPLE': verbre1, 'PAST_PARTICIPLE': verbre2, 'PRESENT_PARTICIPLE': verbre3, 'PRESENT': verbre4, 'PAST': verbre5}
-
-        #------------------------------------------------------------------------------------------------
-
-        #Generate final substitution list:
-        for i in range(0, len(allkeys)):
-            key = allkeys[i]
-            leftw = key
-            for leftp in result[leftw]:
-
-                #Add final version to candidates:
-                if leftw not in final_substitutions:
-                    final_substitutions[leftw] = result[key][leftp]
-                else:
-                    final_substitutions[leftw] = final_substitutions[leftw].union(result[key][leftp])
-                #If left is a noun:
-                if leftp.startswith('n'):
-                    #If it is a plural:
-                    if leftp=='nns':
-                        plurl = pluralsk[leftw]
-                        newcands = set([])
-                        for candidate in result[key][leftp]:
-                            candplurl = plurals[candidate]
-                            newcands.add(candplurl)
-                        if plurl not in final_substitutions:
-                            final_substitutions[plurl] = newcands
-                        else:
-                            final_substitutions[plurl] = final_substitutions[plurl].union(newcands)
-                    #If it is singular:
-                    else:
-                        singl = singularsk[leftw]
-                        newcands = set([])
-                        for candidate in result[key][leftp]:
-                            candsingl = singulars[candidate]
-                            newcands.add(candsingl)
-                        if singl not in final_substitutions:
-                            final_substitutions[singl] = newcands
-                        else:
-                            final_substitutions[singl] = final_substitutions[singl].union(newcands)
-                #If left is a verb:
-                elif leftp.startswith('v'):
-                    for verb_tense in ['PAST_PERFECT_PARTICIPLE', 'PAST_PARTICIPLE', 'PRESENT_PARTICIPLE', 'PRESENT', 'PAST']:
-                        tensedl = verbsk[leftw][verb_tense]
-                        newcands = set([])
-                        for candidate in result[key][leftp]:
-                            candtensedl = verbs[candidate][verb_tense]
-                            newcands.add(candtensedl)
-                        if tensedl not in final_substitutions:
-                            final_substitutions[tensedl] = newcands
-                        else:
-                            final_substitutions[tensedl] = final_substitutions[tensedl].union(newcands)
-        return final_substitutions
-
-    def getInflections(self, verbstems):
-        data1 = self.mat.conjugateVerbs(verbstems, 'PAST_PERFECT_PARTICIPLE', 'FIRST_PERSON_SINGULAR')
-        data2 = self.mat.conjugateVerbs(verbstems, 'PAST_PARTICIPLE', 'FIRST_PERSON_SINGULAR')
-        data3 = self.mat.conjugateVerbs(verbstems, 'PRESENT_PARTICIPLE', 'FIRST_PERSON_SINGULAR')
-        data4 = self.mat.conjugateVerbs(verbstems, 'PRESENT', 'FIRST_PERSON_SINGULAR')
-        data5 = self.mat.conjugateVerbs(verbstems, 'PAST', 'FIRST_PERSON_SINGULAR')
-        return self.correctWords(data1), self.correctWords(data2), self.correctWords(data3), self.correctWords(data4), self.correctWords(data5)
-
-    def getSingulars(self, plurstems):
-        data = self.mat.inflectNouns(plurstems, 'singular')
-        return self.correctWords(data)
-
-    def getPlurals(self, singstems):
-        data = self.mat.inflectNouns(singstems, 'plural')
-        return self.correctWords(data)
-
-    def getStems(self, sings, plurs, verbs):
-        data = self.mat.lemmatizeWords(sings+plurs+verbs)
-        rsings = []
-        rplurs = []
-        rverbs = []
-        c = -1
-        for sing in sings:
-            c += 1
-            if len(data[c])>0:
-                rsings.append(data[c])
-            else:
-                rsings.append(sing)
-        for plur in plurs:
-            c += 1
-            if len(data[c])>0:
-                rplurs.append(data[c])
-            else:
-                rplurs.append(plur)
-        for verb in verbs:
-            c += 1
-            if len(data[c])>0:
-                rverbs.append(data[c])
-            else:
-                rverbs.append(verb)
-        return self.correctWords(rsings), self.correctWords(rplurs), self.correctWords(rverbs)
-
-    def getInitialSet(self, victor_corpus):
-        substitutions_initial = {}
-
-        lex = open(victor_corpus)
-        for line in lex:
-            data = line.strip().split('\t')
-            target = data[1].strip()
-            head = int(data[2].strip())
-
-            url = 'http://www.dictionaryapi.com/api/v1/references/collegiate/xml/' + target + '?key=' + self.dictionary_key
-            conn = urllib.request.urlopen(url)
-            root = ET.fromstring(conn.read())
-
-            newline = target + '\t'
-            cands = {}
-
-            entries = root.iter('entry')
-            for entry in entries:
-                node_pos = entry.find('fl')
-                if node_pos != None:
-                    node_pos = node_pos.text.strip()[0].lower()
-                    if node_pos not in cands:
-                        cands[node_pos] = set([])
-                for definition in entry.iter('dt'):
-                    if definition.text!=None:
-                        text = definition.text.strip()
-                        text = text[1:len(text)]
-                        tokens = nltk.word_tokenize(text)
-                        postags = nltk.pos_tag(tokens)
-                        for p in postags:
-                            postag = p[1].strip()[0].lower()
-                            cand = p[0].strip()
-                            if postag==node_pos:
-                                cands[node_pos].add(cand)
-            for pos in cands:
-                if target in cands[pos]:
-                    cands[pos].remove(target)
-            if len(list(cands.keys()))>0:
-                substitutions_initial[target] = cands
-        lex.close()
-        return substitutions_initial
-
-    def correctWords(self, words):
-        result = []
-        for word in words:
-            result.append(self.nc.correct(word))
-        return result
-
-class MerriamGenerator:
-
-    def __init__(self, mat, thesaurus_key, nc):
-        """
-        Creates a MerriamGenerator instance.
-
-        @param mat: MorphAdornerToolkit object.
-        @param thesaurus_key: Key for the Merriam Thesaurus.
-        For more information on how to get the key for free, please refer to the LEXenstein Manual
-        @param nc: NorvigCorrector object.
-        """
-        self.mat = mat
-        self.thesaurus_key = thesaurus_key
-        self.nc = nc
-
-    def getSubstitutions(self, victor_corpus):
-        """
-        Generates substitutions for the target words of a corpus in VICTOR format.
-
-        @param victor_corpus: Path to a corpus in the VICTOR format.
-        For more information about the file's format, refer to the LEXenstein Manual.
-        @return: A dictionary that assigns target complex words to sets of candidate substitutions.
-        Example: substitutions['perched'] = {'sat', 'roosted'}
-        """
-        #Get initial set of substitutions:
-        print('Getting initial set of substitutions...')
-        substitutions_initial = self.getInitialSet(victor_corpus)
-
-        #Get final substitutions:
-        print('Inflecting substitutions...')
-        substitutions_inflected = self.getInflectedSet(substitutions_initial)
-
-        #Return final set:
-        print('Finished!')
-        return substitutions_inflected
-
-    def getInflectedSet(self, result):
-        final_substitutions = {}
-
-        #Get inflections:
-        allkeys = sorted(list(result.keys()))
-
-        singulars = {}
-        plurals = {}
-        verbs = {}
-
-        singularsk = {}
-        pluralsk = {}
-        verbsk = {}
-
-        for i in range(0, len(allkeys)):
-            key = allkeys[i]
-            leftw = key
-
-            for leftp in result[leftw]:
-                if leftp.startswith('n'):
-                    if leftp=='nns':
-                        pluralsk[leftw] = set([])
-                        for subst in result[key][leftp]:
-                            plurals[subst] = set([])
-                    else:
-                        singularsk[leftw] = set([])
-                        for subst in result[key][leftp]:
-                            singulars[subst] = set([])
-                elif leftp.startswith('v'):
-                    verbsk[leftw] = {}
-                    for subst in result[key][leftp]:
-                        verbs[subst] = {}
-
-        #------------------------------------------------------------------------------------------------
-
-        #Generate keys input:
-        singkeys = sorted(list(singularsk.keys()))
-        plurkeys = sorted(list(pluralsk.keys()))
-        verbkeys = sorted(list(verbsk.keys()))
-
-        #Get stems:
-        singstems, plurstems, verbstems = self.getStems(singkeys, plurkeys, verbkeys)
-
-        #Get plurals:
-        singres = self.getPlurals(singstems)
-
-        #Get singulars:
-        plurres = self.getSingulars(plurstems)
-
-        #Get verb inflections:
-        verbres1, verbres2, verbres3, verbres4, verbres5 = self.getInflections(verbstems)
-
-        #Add information to dictionaries:
-        for i in range(0, len(singkeys)):
-            k = singkeys[i]
-            singre = singres[i]
-            singularsk[k] = singre
-        for i in range(0, len(plurkeys)):
-            k = plurkeys[i]
-            plurre = plurres[i]
-            pluralsk[k] = plurre
-        for i in range(0, len(verbkeys)):
-            k = verbkeys[i]
-            verbre1 = verbres1[i]
-            verbre2 = verbres2[i]
-            verbre3 = verbres3[i]
-            verbre4 = verbres4[i]
-            verbre5 = verbres5[i]
-            verbsk[k] = {'PAST_PERFECT_PARTICIPLE': verbre1, 'PAST_PARTICIPLE': verbre2, 'PRESENT_PARTICIPLE': verbre3, 'PRESENT': verbre4, 'PAST': verbre5}
-
-        #------------------------------------------------------------------------------------------------
-
-        #Generate substs input:
-        singkeys = sorted(list(singulars.keys()))
-        plurkeys = sorted(list(plurals.keys()))
-        verbkeys = sorted(list(verbs.keys()))
-
-        #Get stems:
-        singstems, plurstems, verbstems = self.getStems(singkeys, plurkeys, verbkeys)
-
-        #Get plurals:
-        singres = self.getPlurals(singstems)
-
-        #Get singulars:
-        plurres = self.getSingulars(plurstems)
-
-        #Get verb inflections:
-        verbres1, verbres2, verbres3, verbres4, verbres5 = self.getInflections(verbstems)
-
-        #Add information to dictionaries:
-        for i in range(0, len(singkeys)):
-            k = singkeys[i]
-            singre = singres[i]
-            singulars[k] = singre
-        for i in range(0, len(plurkeys)):
-            k = plurkeys[i]
-            plurre = plurres[i]
-            plurals[k] = plurre
-        for i in range(0, len(verbkeys)):
-            k = verbkeys[i]
-            verbre1 = verbres1[i]
-            verbre2 = verbres2[i]
-            verbre3 = verbres3[i]
-            verbre4 = verbres4[i]
-            verbre5 = verbres5[i]
-            verbs[k] = {'PAST_PERFECT_PARTICIPLE': verbre1, 'PAST_PARTICIPLE': verbre2, 'PRESENT_PARTICIPLE': verbre3, 'PRESENT': verbre4, 'PAST': verbre5}
-
-        #------------------------------------------------------------------------------------------------
-
-        #Generate final substitution list:
-        for i in range(0, len(allkeys)):
-            key = allkeys[i]
-            leftw = key
-            for leftp in result[leftw]:
-
-                #Add final version to candidates:
-                if leftw not in final_substitutions:
-                    final_substitutions[leftw] = result[key][leftp]
-                else:
-                    final_substitutions[leftw] = final_substitutions[leftw].union(result[key][leftp])
-                #If left is a noun:
-                if leftp.startswith('n'):
-                    #If it is a plural:
-                    if leftp=='nns':
-                        plurl = pluralsk[leftw]
-                        newcands = set([])
-                        for candidate in result[key][leftp]:
-                            candplurl = plurals[candidate]
-                            newcands.add(candplurl)
-                        if plurl not in final_substitutions:
-                            final_substitutions[plurl] = newcands
-                        else:
-                            final_substitutions[plurl] = final_substitutions[plurl].union(newcands)
-                    #If it is singular:
-                    else:
-                        singl = singularsk[leftw]
-                        newcands = set([])
-                        for candidate in result[key][leftp]:
-                            candsingl = singulars[candidate]
-                            newcands.add(candsingl)
-                        if singl not in final_substitutions:
-                            final_substitutions[singl] = newcands
-                        else:
-                            final_substitutions[singl] = final_substitutions[singl].union(newcands)
-                #If left is a verb:
-                elif leftp.startswith('v'):
-                    for verb_tense in ['PAST_PERFECT_PARTICIPLE', 'PAST_PARTICIPLE', 'PRESENT_PARTICIPLE', 'PRESENT', 'PAST']:
-                        tensedl = verbsk[leftw][verb_tense]
-                        newcands = set([])
-                        for candidate in result[key][leftp]:
-                            candtensedl = verbs[candidate][verb_tense]
-                            newcands.add(candtensedl)
-                        if tensedl not in final_substitutions:
-                            final_substitutions[tensedl] = newcands
-                        else:
-                            final_substitutions[tensedl] = final_substitutions[tensedl].union(newcands)
-        return final_substitutions
-
-    def getInflections(self, verbstems):
-        data1 = self.mat.conjugateVerbs(verbstems, 'PAST_PERFECT_PARTICIPLE', 'FIRST_PERSON_SINGULAR')
-        data2 = self.mat.conjugateVerbs(verbstems, 'PAST_PARTICIPLE', 'FIRST_PERSON_SINGULAR')
-        data3 = self.mat.conjugateVerbs(verbstems, 'PRESENT_PARTICIPLE', 'FIRST_PERSON_SINGULAR')
-        data4 = self.mat.conjugateVerbs(verbstems, 'PRESENT', 'FIRST_PERSON_SINGULAR')
-        data5 = self.mat.conjugateVerbs(verbstems, 'PAST', 'FIRST_PERSON_SINGULAR')
-        return self.correctWords(data1), self.correctWords(data2), self.correctWords(data3), self.correctWords(data4), self.correctWords(data5)
-
-    def getSingulars(self, plurstems):
-        data = self.mat.inflectNouns(plurstems, 'singular')
-        return self.correctWords(data)
-
-    def getPlurals(self, singstems):
-        data = self.mat.inflectNouns(singstems, 'plural')
-        return self.correctWords(data)
-
-    def getStems(self, sings, plurs, verbs):
-        data = self.mat.lemmatizeWords(sings+plurs+verbs)
-        rsings = []
-        rplurs = []
-        rverbs = []
-        c = -1
-        for sing in sings:
-            c += 1
-            if len(data[c])>0:
-                rsings.append(data[c])
-            else:
-                rsings.append(sing)
-        for plur in plurs:
-            c += 1
-            if len(data[c])>0:
-                rplurs.append(data[c])
-            else:
-                rplurs.append(plur)
-        for verb in verbs:
-            c += 1
-            if len(data[c])>0:
-                rverbs.append(data[c])
-            else:
-                rverbs.append(verb)
-        return self.correctWords(rsings), self.correctWords(rplurs), self.correctWords(rverbs)
-
-    def getInitialSet(self, victor_corpus):
-        substitutions_initial = {}
-
-        lex = open(victor_corpus)
-        for line in lex:
-            data = line.strip().split('\t')
-            target = data[1].strip()
-            url = 'http://www.dictionaryapi.com/api/v1/references/thesaurus/xml/' + target + '?key=' + self.thesaurus_key
-            conn = urllib.request.urlopen(url)
-            root = ET.fromstring(conn.read())
-            root = root.findall('entry')
-
-            cands = {}
-            if len(root)>0:
-                for root_node in root:
-                    node_pos = root_node.find('fl')
-                    if node_pos != None:
-                        node_pos = node_pos.text.strip()[0].lower()
-                        if node_pos not in cands:
-                            cands[node_pos] = set([])
-                    for sense in root_node.iter('sens'):
-                        syn = sense.findall('syn')[0]
-                    res = ''
-                    for snip in syn.itertext():
-                        res += snip + ' '
-                    finds = re.findall('\([^\)]+\)', res)
-                    for find in finds:
-                        res = res.replace(find, '')
-
-                    synonyms = [s.strip() for s in res.split(',')]
-
-                    for synonym in synonyms:
-                        if len(synonym.split(' '))==1:
-                            try:
-                                test = codecs.ascii_encode(synonym)
-                                cands[node_pos].add(synonym)
-                            except UnicodeEncodeError:
-                                cands = cands
-            for pos in cands:
-                if target in cands[pos]:
-                    cands[pos].remove(target)
-            if len(list(cands.keys()))>0:
-                substitutions_initial[target] = cands
-        lex.close()
-        return substitutions_initial
-
-    def correctWords(self, words):
-        result = []
-        for word in words:
-            result.append(self.nc.correct(word))
-        return result
-
-
-#Class for the Wordnet Generator
-class WordnetGenerator:
-
-    def __init__(self, mat, nc, pos_model, stanford_tagger, java_path):
-        """
-        Creates a WordnetGenerator instance.
-
-        @param mat: MorphAdornerToolkit object.
-        @param nc: NorvigCorrector object.
-        @param pos_model: Path to a POS tagging model for the Stanford POS Tagger.
-        The models can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param stanford_tagger: Path to the "stanford-postagger.jar" file.
-        The tagger can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param java_path: Path to the system's "java" executable.
-        Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-        """
-        self.mat = mat
-        self.nc = nc
-        os.environ['JAVAHOME'] = java_path
-        self.tagger = StanfordPOSTagger(pos_model, stanford_tagger)
-
-    def getSubstitutions(self, victor_corpus):
-        """
-        Generates substitutions for the target words of a corpus in VICTOR format.
-
-        @param victor_corpus: Path to a corpus in the VICTOR format.
-        For more information about the file's format, refer to the LEXenstein Manual.
-        @return: A dictionary that assigns target complex words to sets of candidate substitutions.
-        Example: substitutions['perched'] = {'sat', 'roosted'}
-        """
-
-        #Get initial set of substitutions:
-        print('Getting initial set of substitutions...')
-        substitutions_initial = self.getInitialSet(victor_corpus)
-
-        #Get final substitutions:
-        print('Inflecting substitutions...')
-        substitutions_inflected = self.getInflectedSet(substitutions_initial)
-
-        #Return final set:
-        print('Finished!')
-        return substitutions_inflected
-
-    def getInflectedSet(self, subs):
-        #Create list of targets:
-        targets = []
-
-        #Create lists for inflection:
-        toNothing = []
-        toSingular = []
-        toPlural = []
-        toPAPEPA = []
-        toPA = []
-        toPRPA = []
-        toPAPA = []
-        toPE = []
-        toPR = []
-        toComparative = []
-        toSuperlative = []
-        toOriginal = []
-
-        #Fill lists:
-        for target in subs:
-            targets.append(target)
-            for pos in subs[target]:
-                #Get cands for a target and tag combination:
-                cands = list(subs[target][pos])
-
-                #Add candidates to lists:
-                if pos == 'NN':
-                    toSingular.extend(cands)
-                elif pos == 'NNS':
-                    toPlural.extend(cands)
-                elif pos == 'VB':
-                    toPAPEPA.extend(cands)
-                elif pos == 'VBD':
-                    toPA.extend(cands)
-                    toPAPA.extend(cands)
-                elif pos == 'VBG':
-                    toPRPA.extend(cands)
-                elif pos == 'VBN':
-                    toPA.extend(cands)
-                    toPAPA.extend(cands)
-                elif pos == 'VBP':
-                    toPE.extend(cands)
-                elif pos == 'VBZ':
-                    toPR.extend(cands)
-                elif pos == 'JJR' or pos == 'RBR':
-                    toComparative.extend(cands)
-                elif pos == 'JJS' or pos == 'RBS':
-                    toSuperlative.extend(cands)
-                else:
-                    toNothing.extend(cands)
-
-        #Lemmatize targets:
-        targetsL = self.mat.lemmatizeWords(targets)
-
-        #Lemmatize words:
-        toNothingL = self.correctWords(self.mat.lemmatizeWords(toNothing))
-        toSingularL = self.correctWords(self.mat.lemmatizeWords(toSingular))
-        toPluralL = self.correctWords(self.mat.lemmatizeWords(toPlural))
-        toPAPEPAL = self.correctWords(self.mat.lemmatizeWords(toPAPEPA))
-        toPAL = self.correctWords(self.mat.lemmatizeWords(toPA))
-        toPRPAL = self.correctWords(self.mat.lemmatizeWords(toPRPA))
-        toPAPAL = self.correctWords(self.mat.lemmatizeWords(toPAPA))
-        toPEL = self.correctWords(self.mat.lemmatizeWords(toPE))
-        toPRL = self.correctWords(self.mat.lemmatizeWords(toPR))
-        toComparativeL = self.correctWords(self.mat.lemmatizeWords(toComparative))
-        toSuperlativeL = self.correctWords(self.mat.lemmatizeWords(toSuperlative))
-
-        #Inflect nouns:
-        singulars = self.correctWords(self.mat.inflectNouns(toSingularL, 'singular'))
-        plurals = self.correctWords(self.mat.inflectNouns(toPluralL, 'plural'))
-
-        #Inflect verbs:
-        papepas = self.correctWords(self.mat.conjugateVerbs(toPAPEPAL, 'PAST_PERFECT_PARTICIPLE', 'FIRST_PERSON_SINGULAR'))
-        pas = self.correctWords(self.mat.conjugateVerbs(toPAL, 'PAST', 'FIRST_PERSON_SINGULAR'))
-        prpas = self.correctWords(self.mat.conjugateVerbs(toPRPAL, 'PRESENT_PARTICIPLE', 'FIRST_PERSON_SINGULAR'))
-        papas = self.correctWords(self.mat.conjugateVerbs(toPAPAL, 'PAST_PARTICIPLE', 'FIRST_PERSON_SINGULAR'))
-        pes = self.correctWords(self.mat.conjugateVerbs(toPEL, 'PERFECT', 'FIRST_PERSON_SINGULAR'))
-        prs = self.correctWords(self.mat.conjugateVerbs(toPRL, 'PRESENT', 'THIRD_PERSON_SINGULAR'))
-
-        #Inflect adjectives and adverbs:
-        comparatives = self.correctWords(self.mat.inflectAdjectives(toComparativeL, 'comparative'))
-        superlatives = self.correctWords(self.mat.inflectAdjectives(toSuperlativeL, 'superlative'))
-
-        #Create maps:
-        stemM = {}
-        singularM = {}
-        pluralM = {}
-        papepaM = {}
-        paM = {}
-        prpaM = {}
-        papaM = {}
-        peM = {}
-        prM = {}
-        comparativeM = {}
-        superlativeM = {}
-
-        for i in range(0, len(toNothing)):
-            stemM[toNothing[i]] = toNothingL[i]
-        for i in range(0, len(targets)):
-            stemM[targets[i]] = targetsL[i]
-        for i in range(0, len(toSingular)):
-            stemM[toSingular[i]] = toSingularL[i]
-            singularM[toSingular[i]] = singulars[i]
-        for i in range(0, len(toPlural)):
-            stemM[toPlural[i]] = toPluralL[i]
-            pluralM[toPlural[i]] = plurals[i]
-        for i in range(0, len(toPAPEPA)):
-            stemM[toPAPEPA[i]] = toPAPEPAL[i]
-            papepaM[toPAPEPA[i]] = papepas[i]
-        for i in range(0, len(toPA)):
-            stemM[toPA[i]] = toPAL[i]
-            paM[toPA[i]] = pas[i]
-        for i in range(0, len(toPRPA)):
-            stemM[toPRPA[i]] = toPRPAL[i]
-            prpaM[toPRPA[i]] = prpas[i]
-        for i in range(0, len(toPAPA)):
-            stemM[toPAPA[i]] = toPAPAL[i]
-            papaM[toPAPA[i]] = papas[i]
-        for i in range(0, len(toPE)):
-            stemM[toPE[i]] = toPEL[i]
-            peM[toPE[i]] = pes[i]
-        for i in range(0, len(toPR)):
-            stemM[toPR[i]] = toPRL[i]
-            prM[toPR[i]] = prs[i]
-        for i in range(0, len(toComparative)):
-            stemM[toComparative[i]] = toComparativeL[i]
-            comparativeM[toComparative[i]] = comparatives[i]
-        for i in range(0, len(toSuperlative)):
-            stemM[toSuperlative[i]] = toSuperlativeL[i]
-            superlativeM[toSuperlative[i]] = superlatives[i]
-
-        #Create final substitutions:
-        final_substitutions = {}
-        for target in subs:
-            #Get lemma of target:
-            targetL = stemM[target]
-
-            #Create instance in final_substitutions:
-            final_substitutions[target] = set([])
-
-            #Iterate through pos tags of target:
-            for pos in subs[target]:
-                #Create final cands:
-                final_cands = set([])
-
-                #Get cands for a target and tag combination:
-                cands = list(subs[target][pos])
-
-                #Add candidates to lists:
-                if pos == 'NN':
-                    for cand in cands:
-                        if targetL!=stemM[cand]:
-                            final_cands.add(singularM[cand])
-                            final_cands.add(cand)
-                elif pos == 'NNS':
-                    for cand in cands:
-                        if targetL!=stemM[cand]:
-                            final_cands.add(pluralM[cand])
-                            final_cands.add(cand)
-                elif pos == 'VB':
-                    for cand in cands:
-                        if targetL!=stemM[cand]:
-                            final_cands.add(papepaM[cand])
-                elif pos == 'VBD':
-                    for cand in cands:
-                        if targetL!=stemM[cand]:
-                            final_cands.add(paM[cand])
-                            final_cands.add(papaM[cand])
-                elif pos == 'VBG':
-                    for cand in cands:
-                        if targetL!=stemM[cand]:
-                            final_cands.add(prpaM[cand])
-                elif pos == 'VBN':
-                    for cand in cands:
-                        if targetL!=stemM[cand]:
-                            final_cands.add(paM[cand])
-                            final_cands.add(papaM[cand])
-                elif pos == 'VBP':
-                    for cand in cands:
-                        if targetL!=stemM[cand]:
-                            final_cands.add(peM[cand])
-                elif pos == 'VBZ':
-                    for cand in cands:
-                        if targetL!=stemM[cand]:
-                            final_cands.add(prM[cand])
-                elif pos == 'JJR' or pos == 'RBR':
-                    for cand in cands:
-                        if targetL!=stemM[cand]:
-                            final_cands.add(comparativeM[cand])
-                elif pos == 'JJS' or pos == 'RBS':
-                    for cand in cands:
-                        if targetL!=stemM[cand]:
-                            final_cands.add(superlativeM[cand])
-                else:
-                    for cand in cands:
-                        if targetL!=stemM[cand]:
-                            final_cands.add(cand)
-
-                #Add final cands to final substitutions:
-                final_substitutions[target].update(final_cands)
-        return final_substitutions
-
-    def getExpandedSet(self, subs):
-        #Create lists for inflection:
-        nouns = set([])
-        verbs = set([])
-        adjectives = set([])
-
-        #Fill lists:
-        for target in subs:
-            for pos in subs[target]:
-                #Get cands for a target and tag combination:
-                cands = list(subs[target][pos])
-
-                #Add candidates to lists:
-                if pos == 'NN' or pos == 'NNS':
-                    nouns.add(target)
-                elif pos.startswith('V'):
-                    verbs.add(target)
-                elif pos.startswith('J') or pos.startswith('RB'):
-                    adjectives.add(target)
-
-        #Transform sets in lists:
-        nouns = list(nouns)
-        verbs = list(verbs)
-        adjectives = list(adjectives)
-
-        #Lemmatize words:
-        nounsL = self.correctWords(self.mat.lemmatizeWords(nouns))
-        verbsL = self.correctWords(self.mat.lemmatizeWords(verbs))
-        adjectivesL = self.correctWords(self.mat.lemmatizeWords(adjectives))
-
-        #Create lemma maps:
-        nounM = {}
-        verbM = {}
-        adjectiveM = {}
-        for i in range(0, len(nouns)):
-            nounM[nouns[i]] = nounsL[i]
-        for i in range(0, len(verbs)):
-            verbM[verbs[i]] = verbsL[i]
-        for i in range(0, len(adjectives)):
-            adjectiveM[adjectives[i]] = adjectivesL[i]
-
-        #Inflect words:
-        plurals = self.correctWords(self.mat.inflectNouns(nounsL, 'plural'))
-        pas = self.correctWords(self.mat.conjugateVerbs(verbsL, 'PAST'))
-        prpas = self.correctWords(self.mat.conjugateVerbs(verbsL, 'PRESENT_PARTICIPLE'))
-        papas = self.correctWords(self.mat.conjugateVerbs(verbsL, 'PAST_PARTICIPLE'))
-        prs = self.correctWords(self.mat.conjugateVerbs(verbsL, 'PRESENT'))
-        comparatives = self.correctWords(self.mat.inflectAdjectives(adjectives, 'comparative'))
-        superlatives = self.correctWords(self.mat.inflectAdjectives(adjectives, 'superlative'))
-
-        #Create inflected maps:
-        pluralM = {}
-        paM = {}
-        prpaM = {}
-        papaM = {}
-        prM = {}
-        comparativeM = {}
-        superlativeM = {}
-        for i in range(0, len(nouns)):
-            pluralM[nouns[i]] = plurals[i]
-        for i in range(0, len(verbs)):
-            paM[verbs[i]] = pas[i]
-            prpaM[verbs[i]] = prpas[i]
-            papaM[verbs[i]] = papas[i]
-            prM[verbs[i]] = prs[i]
-        for i in range(0, len(adjectives)):
-            comparativeM[adjectives[i]] = comparatives[i]
-            superlativeM[adjectives[i]] = superlatives[i]
-
-        #Create extended substitutions:
-        substitutions_extended = {}
-        for target in subs:
-            for pos in subs[target]:
-                #Get cands for a target and tag combination:
-                cands = list(subs[target][pos])
-
-                #Add original to substitution dictionary:
-                self.addToExtended(target, pos, cands, substitutions_extended)
-
-                #Add candidates to lists:
-                if pos == 'NN':
-                    pluralT = pluralM[target]
-                    self.addToExtended(pluralT, 'NNS', cands, substitutions_extended)
-                elif pos == 'NNS':
-                    singularT = nounM[target]
-                    self.addToExtended(singularT, 'NN', cands, substitutions_extended)
-                elif pos == 'VB':
-                    paT = paM[target]
-                    prpaT = prpaM[target]
-                    papaT = papaM[target]
-                    prT = prM[target]
-                    self.addToExtended(paT, 'VBD', cands, substitutions_extended)
-                    self.addToExtended(prpaT, 'VBG', cands, substitutions_extended)
-                    self.addToExtended(papaT, 'VBN', cands, substitutions_extended)
-                    self.addToExtended(prT, 'VBP', cands, substitutions_extended)
-                    self.addToExtended(prT, 'VBZ', cands, substitutions_extended)
-                elif pos == 'VBD':
-                    lemmaT = verbM[target]
-                    prpaT = prpaM[target]
-                    papaT = papaM[target]
-                    prT = prM[target]
-                    self.addToExtended(lemmaT, 'VB', cands, substitutions_extended)
-                    self.addToExtended(prpaT, 'VBG', cands, substitutions_extended)
-                    self.addToExtended(papaT, 'VBN', cands, substitutions_extended)
-                    self.addToExtended(prT, 'VBP', cands, substitutions_extended)
-                    self.addToExtended(prT, 'VBZ', cands, substitutions_extended)
-                elif pos == 'VBG':
-                    lemmaT = verbM[target]
-                    paT = paM[target]
-                    papaT = papaM[target]
-                    prT = prM[target]
-                    self.addToExtended(lemmaT, 'VB', cands, substitutions_extended)
-                    self.addToExtended(paT, 'VBD', cands, substitutions_extended)
-                    self.addToExtended(papaT, 'VBN', cands, substitutions_extended)
-                    self.addToExtended(prT, 'VBP', cands, substitutions_extended)
-                    self.addToExtended(prT, 'VBZ', cands, substitutions_extended)
-                elif pos == 'VBN':
-                    lemmaT = verbM[target]
-                    paT = paM[target]
-                    prpaT = prpaM[target]
-                    prT = prM[target]
-                    self.addToExtended(lemmaT, 'VB', cands, substitutions_extended)
-                    self.addToExtended(paT, 'VBD', cands, substitutions_extended)
-                    self.addToExtended(prpaT, 'VBG', cands, substitutions_extended)
-                    self.addToExtended(prT, 'VBP', cands, substitutions_extended)
-                    self.addToExtended(prT, 'VBZ', cands, substitutions_extended)
-                elif pos == 'VBP':
-                    lemmaT = verbM[target]
-                    paT = paM[target]
-                    prpaT = prpaM[target]
-                    papaT = prM[target]
-                    self.addToExtended(target, 'VBZ', cands, substitutions_extended)
-                    self.addToExtended(lemmaT, 'VB', cands, substitutions_extended)
-                    self.addToExtended(paT, 'VBD', cands, substitutions_extended)
-                    self.addToExtended(prpaT, 'VBG', cands, substitutions_extended)
-                    self.addToExtended(papaT, 'VBN', cands, substitutions_extended)
-                elif pos == 'VBZ':
-                    lemmaT = verbM[target]
-                    paT = paM[target]
-                    prpaT = prpaM[target]
-                    papaT = prM[target]
-                    self.addToExtended(target, 'VBP', cands, substitutions_extended)
-                    self.addToExtended(lemmaT, 'VB', cands, substitutions_extended)
-                    self.addToExtended(paT, 'VBD', cands, substitutions_extended)
-                    self.addToExtended(prpaT, 'VBG', cands, substitutions_extended)
-                    self.addToExtended(papaT, 'VBN', cands, substitutions_extended)
-                elif pos == 'JJ':
-                    comparativeT = comparativeM[target]
-                    superlativeT = superlativeM[target]
-                    self.addToExtended(comparativeT, 'JJR', cands, substitutions_extended)
-                    self.addToExtended(superlativeT, 'JJS', cands, substitutions_extended)
-                elif pos == 'JJR':
-                    lemmaT = adjectiveM[target]
-                    superlativeT = superlativeM[target]
-                    self.addToExtended(lemmaT, 'JJ', cands, substitutions_extended)
-                    self.addToExtended(superlativeT, 'JJS', cands, substitutions_extended)
-                elif pos == 'JJS':
-                    lemmaT = adjectiveM[target]
-                    comparativeT = comparativeM[target]
-                    self.addToExtended(lemmaT, 'JJ', cands, substitutions_extended)
-                    self.addToExtended(comparativeT, 'JJR', cands, substitutions_extended)
-                elif pos == 'RB':
-                    comparativeT = comparativeM[target]
-                    superlativeT = superlativeM[target]
-                    self.addToExtended(comparativeT, 'RBR', cands, substitutions_extended)
-                    self.addToExtended(superlativeT, 'RBS', cands, substitutions_extended)
-                elif pos == 'RBR':
-                    lemmaT = adjectiveM[target]
-                    superlativeT = superlativeM[target]
-                    self.addToExtended(lemmaT, 'RB', cands, substitutions_extended)
-                    self.addToExtended(superlativeT, 'RBS', cands, substitutions_extended)
-                elif pos == 'RBS':
-                    lemmaT = adjectiveM[target]
-                    comparativeT = comparativeM[target]
-                    self.addToExtended(lemmaT, 'RB', cands, substitutions_extended)
-                    self.addToExtended(comparativeT, 'RBR', cands, substitutions_extended)
-        return substitutions_extended
-
-    def getInitialSet(self, victor_corpus):
-        substitutions_initial = {}
-        lexf = open(victor_corpus)
-        sents = []
-        targets = []
-        heads = []
-        for line in lexf:
-            data = line.strip().split('\t')
-            sent = data[0].strip().split(' ')
-            target = data[1].strip()
-            head = int(data[2].strip())
-            sents.append(sent)
-            targets.append(target)
-            heads.append(head)
-        lexf.close()
-
-        tagged_sents = self.tagger.tag_sents(sents)
-
-        for i in range(0, len(sents)):
-            target = targets[i]
-            head = heads[i]
-            target_pos = str(tagged_sents[i][head][1])
-            target_wnpos = self.getWordnetPOS(target_pos)
-
-            syns = wn.synsets(target)
-
-            cands = set([])
-            for syn in syns:
-                for lem in syn.lemmas():
-                    candidate = self.cleanLemma(lem.name())
-                    if len(candidate.split(' '))==1:
-                        cands.add(candidate)
-            if len(cands)>0:
-                if target in substitutions_initial:
-                    substitutions_initial[target][target_pos] = cands
-                else:
-                    substitutions_initial[target] = {target_pos:cands}
-        return substitutions_initial
-
-    def addToExtended(self, target, tag, cands, subs):
-        if target not in subs:
-            subs[target] = {tag:cands}
-        else:
-            if tag not in subs[target]:
-                subs[target][tag] = cands
-            else:
-                subs[target][tag].extend(cands)
-
-    def correctWords(self, words):
-        result = []
-        for word in words:
-            result.append(self.nc.correct(word))
-        return result
-
-    def cleanLemma(self, lem):
-        result = ''
-        aux = lem.strip().split('_')
-        for word in aux:
-            result += word + ' '
-        return result.strip()
-
-    def getWordnetPOS(self, pos):
-        if pos[0] == 'N' or pos[0] == 'V' or pos == 'RBR' or pos == 'RBS':
-            return pos[0].lower()
-        elif pos[0] == 'J':
-            return 'a'
-        else:
-            return None
-
-#Class for the Biran Generator:
-class BiranGenerator:
-
-    def __init__(self, mat, complex_vocab, simple_vocab, complex_lm, simple_lm, nc, pos_model, stanford_tagger, java_path):
-        """
-        Creates a BiranGenerator instance.
-
-        @param mat: MorphAdornerToolkit object.
-        @param complex_vocab: Path to a vocabulary of complex words.
-        For more information on how to create the file, refer to the LEXenstein Manual.
-        @param simple_vocab: Path to a vocabulary of simple words.
-        For more information on how to create the file, refer to the LEXenstein Manual.
-        @param complex_lm: Path to a language model built over complex text.
-        For more information on how to create the file, refer to the LEXenstein Manual.
-        @param simple_lm: Path to a language model built over simple text.
-        For more information on how to create the file, refer to the LEXenstein Manual.
-        @param nc: NorvigCorrector object.
-        @param pos_model: Path to a POS tagging model for the Stanford POS Tagger.
-        The models can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param stanford_tagger: Path to the "stanford-postagger.jar" file.
-        The tagger can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-        @param java_path: Path to the system's "java" executable.
-        Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-        """
-
-        self.complex_vocab = self.getVocab(complex_vocab)
-        self.simple_vocab = self.getVocab(simple_vocab)
-        self.complex_lm = kenlm.LanguageModel(complex_lm)
-        self.simple_lm = kenlm.LanguageModel(simple_lm)
-        self.mat = mat
-        self.nc = nc
-        os.environ['JAVAHOME'] = java_path
-        self.tagger = StanfordPOSTagger(pos_model, stanford_tagger)
-
-    def getSubstitutions(self, victor_corpus):
-        """
-        Generates substitutions for the target words of a corpus in VICTOR format.
-
-        @param victor_corpus: Path to a corpus in the VICTOR format.
-        For more information about the file's format, refer to the LEXenstein Manual.
-        @return: A dictionary that assigns target complex words to sets of candidate substitutions.
-        Example: substitutions['perched'] = {'sat', 'roosted'}
-        """
-
-        #Get initial set of substitutions:
-        print('Getting initial set of substitutions...')
-        substitutions_initial = self.getInitialSet(victor_corpus)
-
-        #Get inflected substitutions:
-        print('Inflecting substitutions...')
-        substitutions_inflected = self.getInflectedSet(substitutions_initial)
-
-        #Get final substitutions:
-        print('Filtering simple->complex substitutions...')
-        substitutions_final = self.getFinalSet(substitutions_inflected)
-
-        #Return final set:
-        print('Finished!')
-        return substitutions_final
-
-    def getFinalSet(self, substitutions_inflected):
-        #Remove simple->complex substitutions:
-        substitutions_final = {}
-
-        for key in substitutions_inflected:
-            candidate_list = set([])
-            key_score = self.getComplexity(key, self.complex_lm, self.simple_lm)
-            for cand in substitutions_inflected[key]:
-                cand_score = self.getComplexity(cand, self.complex_lm, self.simple_lm)
-                if key_score>=cand_score:
-                    candidate_list.add(cand)
-            if len(candidate_list)>0:
-                substitutions_final[key] = candidate_list
-        return substitutions_final
-
-    def getInflectedSet(self, subs):
-        #Create list of targets:
-        targets = []
-
-        #Create lists for inflection:
-        toNothing = []
-        toSingular = []
-        toPlural = []
-        toPAPEPA = []
-        toPA = []
-        toPRPA = []
-        toPAPA = []
-        toPE = []
-        toPR = []
-        toComparative = []
-        toSuperlative = []
-        toOriginal = []
-
-        #Fill lists:
-        for target in subs:
-            targets.append(target)
-            for pos in subs[target]:
-                #Get cands for a target and tag combination:
-                cands = list(subs[target][pos])
-
-                #Add candidates to lists:
-                if pos == 'NN':
-                    toSingular.extend(cands)
-                elif pos == 'NNS':
-                    toPlural.extend(cands)
-                elif pos == 'VB':
-                    toPAPEPA.extend(cands)
-                elif pos == 'VBD':
-                    toPA.extend(cands)
-                    toPAPA.extend(cands)
-                elif pos == 'VBG':
-                    toPRPA.extend(cands)
-                elif pos == 'VBN':
-                    toPA.extend(cands)
-                    toPAPA.extend(cands)
-                elif pos == 'VBP':
-                    toPE.extend(cands)
-                elif pos == 'VBZ':
-                    toPR.extend(cands)
-                elif pos == 'JJR' or pos == 'RBR':
-                    toComparative.extend(cands)
-                elif pos == 'JJS' or pos == 'RBS':
-                    toSuperlative.extend(cands)
-                else:
-                    toNothing.extend(cands)
-
-        #Lemmatize targets:
-        targetsL = self.mat.lemmatizeWords(targets)
-
-        #Lemmatize words:
-        toNothingL = self.correctWords(self.mat.lemmatizeWords(toNothing))
-        toSingularL = self.correctWords(self.mat.lemmatizeWords(toSingular))
-        toPluralL = self.correctWords(self.mat.lemmatizeWords(toPlural))
-        toPAPEPAL = self.correctWords(self.mat.lemmatizeWords(toPAPEPA))
-        toPAL = self.correctWords(self.mat.lemmatizeWords(toPA))
-        toPRPAL = self.correctWords(self.mat.lemmatizeWords(toPRPA))
-        toPAPAL = self.correctWords(self.mat.lemmatizeWords(toPAPA))
-        toPEL = self.correctWords(self.mat.lemmatizeWords(toPE))
-        toPRL = self.correctWords(self.mat.lemmatizeWords(toPR))
-        toComparativeL = self.correctWords(self.mat.lemmatizeWords(toComparative))
-        toSuperlativeL = self.correctWords(self.mat.lemmatizeWords(toSuperlative))
-
-        #Inflect nouns:
-        singulars = self.correctWords(self.mat.inflectNouns(toSingularL, 'singular'))
-        plurals = self.correctWords(self.mat.inflectNouns(toPluralL, 'plural'))
-
-        #Inflect verbs:
-        papepas = self.correctWords(self.mat.conjugateVerbs(toPAPEPAL, 'PAST_PERFECT_PARTICIPLE', 'FIRST_PERSON_SINGULAR'))
-        pas = self.correctWords(self.mat.conjugateVerbs(toPAL, 'PAST', 'FIRST_PERSON_SINGULAR'))
-        prpas = self.correctWords(self.mat.conjugateVerbs(toPRPAL, 'PRESENT_PARTICIPLE', 'FIRST_PERSON_SINGULAR'))
-        papas = self.correctWords(self.mat.conjugateVerbs(toPAPAL, 'PAST_PARTICIPLE', 'FIRST_PERSON_SINGULAR'))
-        pes = self.correctWords(self.mat.conjugateVerbs(toPEL, 'PERFECT', 'FIRST_PERSON_SINGULAR'))
-        prs = self.correctWords(self.mat.conjugateVerbs(toPRL, 'PRESENT', 'THIRD_PERSON_SINGULAR'))
-
-        #Inflect adjectives and adverbs:
-        comparatives = self.correctWords(self.mat.inflectAdjectives(toComparativeL, 'comparative'))
-        superlatives = self.correctWords(self.mat.inflectAdjectives(toSuperlativeL, 'superlative'))
-
-        #Create maps:
-        stemM = {}
-        singularM = {}
-        pluralM = {}
-        papepaM = {}
-        paM = {}
-        prpaM = {}
-        papaM = {}
-        peM = {}
-        prM = {}
-        comparativeM = {}
-        superlativeM = {}
-
-        for i in range(0, len(toNothing)):
-            stemM[toNothing[i]] = toNothingL[i]
-        for i in range(0, len(targets)):
-            stemM[targets[i]] = targetsL[i]
-        for i in range(0, len(toSingular)):
-            stemM[toSingular[i]] = toSingularL[i]
-            singularM[toSingular[i]] = singulars[i]
-        for i in range(0, len(toPlural)):
-            stemM[toPlural[i]] = toPluralL[i]
-            pluralM[toPlural[i]] = plurals[i]
-        for i in range(0, len(toPAPEPA)):
-            stemM[toPAPEPA[i]] = toPAPEPAL[i]
-            papepaM[toPAPEPA[i]] = papepas[i]
-        for i in range(0, len(toPA)):
-            stemM[toPA[i]] = toPAL[i]
-            paM[toPA[i]] = pas[i]
-        for i in range(0, len(toPRPA)):
-            stemM[toPRPA[i]] = toPRPAL[i]
-            prpaM[toPRPA[i]] = prpas[i]
-        for i in range(0, len(toPAPA)):
-            stemM[toPAPA[i]] = toPAPAL[i]
-            papaM[toPAPA[i]] = papas[i]
-        for i in range(0, len(toPE)):
-            stemM[toPE[i]] = toPEL[i]
-            peM[toPE[i]] = pes[i]
-        for i in range(0, len(toPR)):
-            stemM[toPR[i]] = toPRL[i]
-            prM[toPR[i]] = prs[i]
-        for i in range(0, len(toComparative)):
-            stemM[toComparative[i]] = toComparativeL[i]
-            comparativeM[toComparative[i]] = comparatives[i]
-        for i in range(0, len(toSuperlative)):
-            stemM[toSuperlative[i]] = toSuperlativeL[i]
-            superlativeM[toSuperlative[i]] = superlatives[i]
-
-        #Create final substitutions:
-        final_substitutions = {}
-        for target in subs:
-            #Get lemma of target:
-            targetL = stemM[target]
-
-            #Create instance in final_substitutions:
-            final_substitutions[target] = set([])
-
-            #Iterate through pos tags of target:
-            for pos in subs[target]:
-                #Create final cands:
-                final_cands = set([])
-
-                #Get cands for a target and tag combination:
-                cands = list(subs[target][pos])
-
-                #Add candidates to lists:
-                if pos == 'NN':
-                    for cand in cands:
-                        if targetL!=stemM[cand]:
-                            final_cands.add(singularM[cand])
-                            final_cands.add(cand)
-                elif pos == 'NNS':
-                    for cand in cands:
-                        if targetL!=stemM[cand]:
-                            final_cands.add(pluralM[cand])
-                            final_cands.add(cand)
-                elif pos == 'VB':
-                    for cand in cands:
-                        if targetL!=stemM[cand]:
-                            final_cands.add(papepaM[cand])
-                elif pos == 'VBD':
-                    for cand in cands:
-                        if targetL!=stemM[cand]:
-                            final_cands.add(paM[cand])
-                            final_cands.add(papaM[cand])
-                elif pos == 'VBG':
-                    for cand in cands:
-                        if targetL!=stemM[cand]:
-                            final_cands.add(prpaM[cand])
-                elif pos == 'VBN':
-                    for cand in cands:
-                        if targetL!=stemM[cand]:
-                            final_cands.add(paM[cand])
-                            final_cands.add(papaM[cand])
-                elif pos == 'VBP':
-                    for cand in cands:
-                        if targetL!=stemM[cand]:
-                            final_cands.add(peM[cand])
-                elif pos == 'VBZ':
-                    for cand in cands:
-                        if targetL!=stemM[cand]:
-                            final_cands.add(prM[cand])
-                elif pos == 'JJR' or pos == 'RBR':
-                    for cand in cands:
-                        if targetL!=stemM[cand]:
-                            final_cands.add(comparativeM[cand])
-                elif pos == 'JJS' or pos == 'RBS':
-                    for cand in cands:
-                        if targetL!=stemM[cand]:
-                            final_cands.add(superlativeM[cand])
-                else:
-                    for cand in cands:
-                        if targetL!=stemM[cand]:
-                            final_cands.add(cand)
-
-                #Add final cands to final substitutions:
-                final_substitutions[target].update(final_cands)
-        return final_substitutions
-
-    def getInitialSet(self, victor_corpus):
-        substitutions_initial = {}
-        lexf = open(victor_corpus)
-        sents = []
-        targets = []
-        heads = []
-        for line in lexf:
-            data = line.strip().split('\t')
-            sent = data[0].strip().split(' ')
-            target = data[1].strip()
-            head = int(data[2].strip())
-            sents.append(sent)
-            targets.append(target)
-            heads.append(head)
-        lexf.close()
-
-        tagged_sents = self.tagger.tag_sents(sents)
-
-        for i in range(0, len(sents)):
-            target = targets[i]
-            head = heads[i]
-            target_pos = str(tagged_sents[i][head][1])
-            target_wnpos = self.getWordnetPOS(target_pos)
-
-            if target in self.complex_vocab:
-                syns = wn.synsets(target)
-                cands = set([])
-                for syn in syns:
-                    for lem in syn.lemmas():
-                        candidate = self.cleanLemma(lem.name())
-                        if len(candidate.split(' '))==1 and candidate in self.simple_vocab:
-                            cands.add(candidate)
-                    for hyp in syn.hypernyms():
-                        for lem in hyp.lemmas():
-                            candidate = self.cleanLemma(lem.name())
-                            if len(candidate.split(' '))==1 and candidate in self.simple_vocab:
-                                cands.add(candidate)
-                if target in cands:
-                    cands.remove(target)
-                if len(cands)>0:
-                    if target in substitutions_initial:
-                        substitutions_initial[target][target_pos] = cands
-                    else:
-                        substitutions_initial[target] = {target_pos:cands}
-        return substitutions_initial
-
-    def getComplexity(self, word, clm, slm):
-        C = (clm.score(word, bos=False, eos=False))/(slm.score(word, bos=False, eos=False))
-        #C = (clm.score(word)/(slm.score(word))
-        L = float(len(word))
-        return C*L
-
-    def getVocab(self, path):
-        return set([line.strip() for line in open(path)])
-
-    def cleanLemma(self, lem):
-        result = ''
-        aux = lem.strip().split('_')
-        for word in aux:
-            result += word + ' '
-        return result.strip()
-
-    def getWordnetPOS(self, pos):
-        if pos[0] == 'N' or pos[0] == 'V' or pos == 'RBR' or pos == 'RBS':
-            return pos[0].lower()
-        elif pos[0] == 'J':
-            return 'a'
-        else:
-            return None
-
-    def correctWords(self, words):
-        result = []
-        for word in words:
-            result.append(self.nc.correct(word))
-        return result
diff --git a/lexi/lib/lexenstein/identifiers.py b/lexi/lib/lexenstein/identifiers.py
deleted file mode 100755
index 700fa75..0000000
--- a/lexi/lib/lexenstein/identifiers.py
+++ /dev/null
@@ -1,395 +0,0 @@
-import numpy as np
-from sklearn import svm
-from sklearn.linear_model import *
-from sklearn.tree import *
-from sklearn.ensemble import *
-from sklearn.feature_selection import SelectKBest
-from sklearn.feature_selection import f_classif
-from sklearn.preprocessing import normalize
-
-class MachineLearningIdentifier:
-
-	def __init__(self, fe):
-		"""
-		Creates a MachineLearningIdentifier instance.
-	
-		@param fe: FeatureEstimator object.
-		"""
-		self.fe = fe
-		self.classifier = None
-	
-	def calculateTrainingFeatures(self, training_corpus):
-		"""
-		Calculate features of a corpus in CWICTOR format.
-	
-		@param training_corpus: Path to a corpus in the CWICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		"""
-		self.Xtr = self.fe.calculateFeatures(training_corpus, format='cwictor')
-		self.Ytr = []
-		f = open(training_corpus)
-		for line in f:
-			data = line.strip().split('\t')
-			y = int(data[3].strip())
-			self.Ytr.append(y)
-		f.close()
-		
-	def calculateTestingFeatures(self, testing_corpus):
-		"""
-		Calculate testing features of a corpus in VICTOR or CWICTOR format.
-	
-		@param testing_corpus: Path to a corpus in the VICTOR or CWICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		"""
-		self.Xte = self.fe.calculateFeatures(testing_corpus, format='cwictor')
-		
-	def selectKBestFeatures(self, k='all'):
-		"""
-		Selects the k best features through univariate feature selection.
-	
-		@param k: Number of features to be selected.
-		"""
-		feature_selector = SelectKBest(f_classif, k=k)
-		feature_selector.fit(self.Xtr, self.Ytr)
-		self.Xtr = feature_selector.transform(self.Xtr)
-		self.Xte = feature_selector.transform(self.Xte)
-		
-	def trainSVM(self, C=1.0, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, class_weight={0:1.0, 1:1.0}):
-		"""
-		Trains an SVM classifier. To know more about the meaning of each parameter,
-		please refer to http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn.svm.SVC
-		"""
-		self.classifier = svm.SVC(C=C, kernel=kernel, degree=degree, gamma=gamma, coef0=coef0, class_weight=class_weight)
-		self.classifier.fit(self.Xtr, self.Ytr)
-		
-	def trainPassiveAggressiveClassifier(self, C=1.0, loss='hinge'):
-		"""
-		Trains a Passive Agressive classifier. To know more about the meaning of each parameter,
-		please refer to http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.PassiveAggressiveClassifier.html
-		"""
-		self.classifier = PassiveAggressiveClassifier(C=C, loss=loss)
-		self.classifier.fit(self.Xtr, self.Ytr)
-		
-	def trainSGDClassifier(self, loss='hinge', penalty='l2', alpha=0.0001, l1_ratio=0.15, epsilon=0.001, class_weight={0:1.0, 1:1.0}):
-		"""
-		Trains a Stochastic Gradient Descent classifier. To know more about the meaning of each parameter,
-		please refer to http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html
-		"""
-		self.classifier = SGDClassifier(loss=loss, penalty=penalty, alpha=alpha, l1_ratio=l1_ratio, epsilon=epsilon, class_weight=class_weight)
-		self.classifier.fit(self.Xtr, self.Ytr)
-		
-	def trainDecisionTreeClassifier(self, criterion='gini', splitter='best', max_features=None, max_depth=None):
-		"""
-		Trains a Decision Tree classifier. To know more about the meaning of each parameter,
-		please refer to http://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html
-		"""
-		self.classifier = DecisionTreeClassifier(criterion=criterion, splitter=splitter, max_features=max_features, max_depth=max_depth)
-		self.classifier.fit(self.Xtr, self.Ytr)
-	
-	def trainAdaBoostClassifier(self, n_estimators=50, learning_rate=1, algorithm='SAMME.R'):
-		"""
-		Trains an Ada Boost Classifier. To know more about the meaning of each parameter,
-		please refer to http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html
-		"""
-		self.classifier = AdaBoostClassifier(n_estimators=n_estimators, learning_rate=learning_rate, algorithm=algorithm)
-		self.classifier.fit(self.Xtr, self.Ytr)
-		
-	def trainGradientBoostClassifier(self, loss='deviance', n_estimators=50, learning_rate=1, max_features=None):
-		"""
-		Trains an Gradient Boost Classifier. To know more about the meaning of each parameter,
-		please refer to http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.GradientBoostingClassifier.html
-		"""
-		self.classifier = GradientBoostingClassifier(loss=loss, n_estimators=n_estimators, learning_rate=learning_rate, max_features=max_features)
-		self.classifier.fit(self.Xtr, self.Ytr)
-		
-	def trainExtraTreesClassifier(self, n_estimators=50, criterion='gini', max_features=None):
-		"""
-		Trains an Extra Trees Classifier. To know more about the meaning of each parameter,
-		please refer to http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesClassifier.html
-		"""
-		self.classifier = ExtraTreesClassifier(n_estimators=n_estimators, criterion=criterion, max_features=max_features)
-		self.classifier.fit(self.Xtr, self.Ytr)
-		
-	def trainRandomForestClassifier(self, n_estimators=50, criterion='gini', max_features=None):
-		"""
-		Trains an Random Trees Classifier. To know more about the meaning of each parameter,
-		please refer to http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.ExtraTreesClassifier.html
-		"""
-		self.classifier = RandomForestClassifier(n_estimators=n_estimators, criterion=criterion, max_features=max_features)
-		self.classifier.fit(self.Xtr, self.Ytr)
-		
-	def identifyComplexWords(self):
-		return self.classifier.predict(self.Xte)
-
-class SimplifyAllIdentifier:
-
-	def identifyComplexWords(self, corpus):
-		"""
-		Assign label 1 (complex) to all target words in the VICTOR or CWICTOR corpus.
-	
-		@param corpus: Path to a corpus in the VICTOR or CWICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@return: A list of binary values, one per line, with value 1.
-		"""
-		result = []
-		f = open(corpus)
-		for line in f:
-			result.append(1)
-		f.close()
-		return result
-		
-class SimplifyNoneIdentifier:
-
-	def identifyComplexWords(self, corpus):
-		"""
-		Assign label 0 (simple) to all target words in the VICTOR or CWICTOR corpus.
-	
-		@param corpus: Path to a corpus in the VICTOR or CWICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@return: A list of binary values, one per line, with value 0.
-		"""
-		result = []
-		f = open(corpus)
-		for line in f:
-			result.append(0)
-		f.close()
-		return result
-		
-class LexiconIdentifier:
-
-	def __init__(self, lexicon, type):
-		"""
-		Creates a LexiconIdentifier instance.
-	
-		@param lexicon: Lexicon containing simple or complex, one word per line.
-		@param type: Type of lexicon.
-		Values: 'complex', 'simple'
-		"""
-		self.lexicon = set([line.strip() for line in open(lexicon)])
-		self.type = type
-		self.feature_index = None
-
-	def identifyComplexWords(self, corpus):
-		"""
-		Judge if the target words of a corpus in VICTOR or CWICTOR format are complex or not
-	
-		@param corpus: Path to a corpus in the VICTOR or CWICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@return: A list of binary values, one per line, with value 1 if a target word is complex, and 0 otherwise.
-		"""
-		result = []
-		f = open(corpus)
-		for line in f:
-			data = line.strip().split('\t')
-			target = data[1].strip()
-			if target in self.lexicon:
-				if self.type=='simple':
-					result.append(0)
-				else:
-					result.append(1)
-			else:
-				if self.type=='simple':
-					result.append(1)
-				else:
-					result.append(0)
-		f.close()
-		return result
-		
-class ThresholdIdentifier:
-
-	def __init__(self, fe):
-		"""
-		Creates a ThresholdIdentifier instance.
-	
-		@param fe: FeatureEstimator object.
-		"""
-		self.fe = fe
-
-	def calculateTrainingFeatures(self, training_corpus):
-		"""
-		Calculate features of a corpus in CWICTOR format.
-	
-		@param training_corpus: Path to a corpus in the CWICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		"""
-		self.Xtr = self.fe.calculateFeatures(training_corpus, format='cwictor')
-		self.Ytr = []
-		f = open(training_corpus)
-		for line in f:
-			data = line.strip().split('\t')
-			y = int(data[3].strip())
-			self.Ytr.append(y)
-		f.close()
-			
-	def calculateTestingFeatures(self, testing_corpus):
-		"""
-		Calculate testing features of a corpus in VICTOR or CWICTOR format.
-	
-		@param testing_corpus: Path to a corpus in the VICTOR or CWICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		"""
-		self.Xte = self.fe.calculateFeatures(testing_corpus, format='cwictor')
-			
-	def trainIdentifierBruteForce(self, feature_index, step=None):
-		"""
-		Trains the threshold identifier with respect to a certain feature through brute force.
-
-		@param feature_index: Index of the feature to be used in training.
-		"""
-		#Save feature index:
-		self.feature_index = feature_index
-		
-		#Estimate min and max:
-		self.minX, self.maxX = self.getMinMax()
-
-		#Set initial min, max and pivot:
-		min = float(self.minX)
-		max = float(self.maxX)
-
-		#Set step:
-		if step==None:
-			step = (max-min)/1000
-
-		#Find best threshold:
-		best = -1
-		bestIndex = None
-		i = min+step
-		while i<max:
-			score = self.getScore(i)
-			if score>best:
-				best=score
-				bestIndex = i
-			i += step
-
-		#Set threshold and score:
-		self.threshold = bestIndex
-		
-	def trainIdentifierBinarySearch(self, feature_index, diff=None, order=None):
-		"""
-		Trains the threshold identifier with respect to a certain feature through binary search.
-
-		@param feature_index: Index of the feature to be used in training.
-		"""
-		#Save feature index:
-		self.feature_index = feature_index
-		
-		#Estimate min and max:
-		self.minX, self.maxX = self.getMinMax()
-		
-		#Set initial min, max and pivot:
-		min = float(self.minX)
-		max = float(self.maxX)
-
-		#Define difference threshold:
-		if diff==None:
-			diff = (max-min)/1000
-
-		#Define order:
-		if order==None or order<1:
-			order = 1
-
-		#Estimate best threshold:
-		best = -1
-		bestIndex = None
-		divisor = float(2**order)
-		step = (max-min)/divisor
-		for i in range(1, int(divisor)):
-			pivot = i*step
-			index, score = self.findMaxBinary(min, max, pivot, diff)
-			if score>best:
-				best = score
-				bestIndex = index
-
-		#Set threshold and score:
-		self.threshold = bestIndex
-		
-	def findMaxBinary(self, min, max, pivot, diff):
-		#Estimate best threshold:
-		best = -1
-		bestIndex = None
-		while (max-min)>diff:
-			left = (min+pivot)/2.0
-			right = (pivot+max)/2.0
-			scoreL = self.getScore(left)
-			scoreR = self.getScore(right)
-			if scoreL>scoreR:
-				max = pivot
-				pivot = left
-				if scoreL>best:
-					best = scoreL
-					bestIndex = left
-			else:
-				min = pivot
-				pivot = right
-				if scoreR>best:
-					best = scoreR
-					bestIndex = right
-
-		#Set threshold and score:
-		return bestIndex, best
-		
-	def identifyComplexWords(self):
-		"""
-		Judge if the target words of the testing instances are complex or not.
-
-		@return: A list of binary values, one per line, with value 1 if a target word is complex, and 0 otherwise.
-		"""
-		result = []
-		for i in range(0, len(self.Xte)):
-			x = self.Xte[i][self.feature_index]
-			if self.fe.identifiers[self.feature_index][1]=='Complexity':
-				if x>self.threshold:
-					result.append(1)
-				else:
-					result.append(0)
-			else:
-				if x<self.threshold:
-					result.append(1)
-				else:
-					result.append(0)
-		return result
-		
-		
-	def getMinMax(self):
-		min = 99999
-		max = -99999
-		for i in range(0, len(self.Xtr)):
-			value = self.Xtr[i][self.feature_index]
-			if value>max:
-				max = value
-			if value<min:
-				min = value
-		return min, max
-		
-	def getScore(self, threshold):
-		precisionc = 0
-		precisiont = 0
-		recallc = 0
-		recallt = 0
-		for i in range(0, len(self.Xtr)):
-			x = self.Xtr[i][self.feature_index]
-			y = self.Ytr[i]
-			if self.fe.identifiers[self.feature_index][1]=='Complexity':
-				if (x>threshold and y==1) or (x<threshold and y==0):
-					precisionc += 1
-					if y==1:
-						recallc += 1
-			else:
-				if (x<threshold and y==1) or (x>threshold and y==0):
-					precisionc += 1
-					if y==1:
-						recallc += 1
-			precisiont += 1
-			if y==1:
-				recallt += 1
-				
-		precision = float(precisionc)/float(precisiont)
-		recall = float(recallc)/float(recallt)
-		fmean = 0.0
-		if precision==0.0 and recall==0.0:
-			fmean = 0.0
-		else:
-			fmean = 2*(precision*recall)/(precision+recall)
-			
-		#Return F-Measure:
-		return fmean
diff --git a/lexi/lib/lexenstein/morphadorner.py b/lexi/lib/lexenstein/morphadorner.py
deleted file mode 100755
index edd9f17..0000000
--- a/lexi/lib/lexenstein/morphadorner.py
+++ /dev/null
@@ -1,175 +0,0 @@
-import subprocess
-
-class MorphAdornerToolkit:
-
-	def __init__(self, path):
-		"""
-		Creates an instance of the MorphAdornerToolkit class.
-	
-		@param path: Path to the root installation folder of Morph Adorner Toolkit.
-		"""
-		
-		self.root = path
-		if not self.root.endswith('/'):
-			self.root += '/'
-		self.lemmatizer = self.root + 'WordLemmatizer/WordLemmatizer.jar'
-		self.stemmer = self.root + 'WordStemmer/WordStemmer.jar'
-		self.conjugator = self.root + 'VerbConjugator/VerbConjugator.jar'
-		self.inflector = self.root + 'NounInflector/NounInflector.jar'
-		self.tenser = self.root + 'VerbTenser/VerbTenser.jar'
-		self.syllabler = self.root + 'SyllableSplitter/SyllableSplitter.jar'
-		self.adjinflector = self.root + 'AdjectiveInflector/AdjectiveInflector.jar'
-	
-	def lemmatizeWords(self, words):
-		"""
-		Lemmatizes a set of words.
-	
-		@param words: List of words to be lemmatized.
-		@return: List of the lemmas of the words passed as input.
-		"""
-		
-		input = ''
-		for word in words:
-			input += word + '\n'
-		input += '\n'
-
-		args = ['java', '-jar', self.lemmatizer]
-		proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=False)
-		(out, err) = proc.communicate(input)
-
-		result = out.strip().split('\n')
-		return result
-		
-	def stemWords(self, words):
-		"""
-		Porter stems a set of words.
-	
-		@param words: List of words to be Porter stemmed.
-		@return: List of the Porter stems of the words passed as input.
-		"""
-	
-		input = ''
-		for word in words:
-			input += word + '\n'
-		input += '\n'
-
-		args = ['java', '-jar', self.stemmer]
-		proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=False)
-		(out, err) = proc.communicate(input)
-
-		result = out.strip().split('\n')
-		return result
-
-	def conjugateVerbs(self, lemmas, tense, person):
-		"""
-		Conjugate a set of verbs in a given tense.
-	
-		@param lemmas: Lemmas of verbs to be conjugated.
-		@param tense: Tense in which to conjugate the verbs.
-		Tenses available: PAST, PAST_PARTICIPLE, PAST_PERFECT, PAST_PERFECT_PARTICIPLE, PERFECT, PRESENT, PRESENT_PARTICIPLE.
-		@param person: Person in which to conjugate the verbs.
-		Tenses available: FIRST_PERSON_SINGULAR, FIRST_PERSON_PLURAL, SECOND_PERSON_SINGULAR, SECOND_PERSON_PLURAL, THIRD_PERSON_SINGULAR, THIRD_PERSON_PLURAL.
-		@return: List of the conjugated versions of the verb lemmas passed as input.
-		"""
-		
-		input = ''
-		for lemma in lemmas:
-			input += lemma + ' ' + tense +  ' ' + person + '\n'
-		input += '\n'
-
-		args = ['java', '-jar', self.conjugator]
-		proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=False)
-		(out, err) = proc.communicate(input)
-
-		result = out.strip().split('\n')
-		return result
-
-
-	def inflectNouns(self, lemmas, number):
-		"""
-		Inflect a list of nouns to its singular or plural form.
-	
-		@param lemmas: Lemmas of nouns to be inflected.
-		@param number: Form in which to inflect the lemmas.
-		Forms available: singular, plural.
-		@return: List of the inflected versions of the noun lemmas passed as input.
-		"""
-		
-		input = ''
-		for lemma in lemmas:
-			input += lemma + ' ' + number +  '\n'
-		input += '\n'
-
-		args = ['java', '-jar', self.inflector]
-		proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=False)
-		(out, err) = proc.communicate(input)
-
-		result = out.strip().split('\n')
-		return result
-
-	def tenseVerbs(self, lemmas, verbs):
-		"""
-		Retrieve the tense of a given set of verbs.
-	
-		@param lemmas: Lemmas of verbs to be tensed.
-		@param verbs: Verbs in their original forms.
-		@return: List of the tenses and persons of the verb passed as input.
-		Tenses available: PAST, PAST_PARTICIPLE, PAST_PERFECT, PAST_PERFECT_PARTICIPLE, PERFECT, PRESENT, PRESENT_PARTICIPLE.
-		Persons available: FIRST_PERSON_SINGULAR, FIRST_PERSON_PLURAL, SECOND_PERSON_SINGULAR, SECOND_PERSON_PLURAL, THIRD_PERSON_SINGULAR, THIRD_PERSON_PLURAL.
-		"""
-		
-		input = ''
-		for i in range(0, len(lemmas)):
-			input += lemmas[i] + ' ' + verbs[i] +  '\n'
-		input += '\n'
-
-		args = ['java', '-jar', self.tenser]
-		proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=False)
-		(out, err) = proc.communicate(input)
-
-		result = [line.strip().split(' ') for line in out.strip().split('\n')]
-		return result
-
-
-	def splitSyllables(self, words):
-		"""
-		Splits a set of words in syllables.
-	
-		@param words: List of words to be lemmatized.
-		@return: List of words with their syllables separated by hyphen markers.
-		"""
-		
-		input = ''
-		for word in words:
-			input += word + '\n'
-		input += '\n'
-
-		args = ['java', '-jar', self.syllabler]
-		proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=False)
-		(out, err) = proc.communicate(input)
-		
-		out = out.replace('\xc2\xad', '-')
-		result = out.strip().split('\n')
-		return result
-		
-	def inflectAdjectives(self, lemmas, form):
-		"""
-		Inflect a list of adjectives/adverbs to its singular or plural form.
-	
-		@param lemmas: Lemmas of adjectives/adverbs to be inflected.
-		@param form: Form in which to inflect the lemmas.
-		Forms available: comparative, superlative.
-		@return: List of the inflected versions of the adjective/adverb lemmas passed as input.
-		"""
-		
-		input = ''
-		for lemma in lemmas:
-			input += lemma + ' ' + form +  '\n'
-		input += '\n'
-
-		args = ['java', '-jar', self.adjinflector]
-		proc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=False)
-		(out, err) = proc.communicate(input)
-
-		result = out.strip().split('\n')
-		return result
diff --git a/lexi/lib/lexenstein/rankers.py b/lexi/lib/lexenstein/rankers.py
deleted file mode 100755
index e38fe04..0000000
--- a/lexi/lib/lexenstein/rankers.py
+++ /dev/null
@@ -1,1450 +0,0 @@
-import os
-import kenlm
-import math
-from keras.optimizers import *
-from keras.models import *
-from keras.layers.core import *
-from nltk.corpus import wordnet as wn
-from sklearn.preprocessing import normalize
-from sklearn.feature_selection import f_classif
-from sklearn import linear_model
-from sklearn.svm import SVC
-from sklearn.cross_validation import train_test_split
-from sklearn.feature_selection import SelectKBest
-
-class NNRegressionRanker:
-
-	def __init__(self, fe, model=None):
-		"""
-		Creates an instance of the NNRegressionRanker class.
-		This ranker was introduced by "Lexical Simplification with Neural Ranking, Proceedings of the 15th EACL, 2017".
-	
-		@param fe: A configured FeatureEstimator object.
-		@param model: A trained neural ranking model. If provided, it must be an instance created by the ranker itself, and the features provided must be the same used for its training.
-		"""
-		self.fe = fe
-		self.model = model
-		
-	def createRanker(self, layers, hidden_size):
-		"""
-		Creates a new neural ranker based on the architecture specifications provided.
-	
-		@param layers: number of hidden layers of the neural ranker.
-		@param hidden_size: size of the hidden layers of the neural ranker.
-		"""
-		model = Sequential()
-		model.add(Dense(output_dim=hidden_size, input_dim=len(self.fe.identifiers)*2, init="glorot_uniform"))
-		model.add(Activation("tanh"))
-		model.add(Dropout(0.25))
-		for i in range(0, layers):
-			model.add(Dense(output_dim=hidden_size, init="glorot_uniform"))
-			model.add(Activation("tanh"))
-			model.add(Dropout(0.10))
-		model.add(Dense(output_dim=1))
-		model.add(Activation("linear"))
-		model.compile(loss='mean_squared_error', optimizer='adam')
-		self.model = model
-		return model
-		
-	def saveRanker(self, json_path, h5_path):
-		"""
-		Saves the ranker's neural model.
-	
-		@param json_path: Path in which to save the JSON file containing the structure of the neural network.
-		@param h5_path: Path in which to save the H5 file containing the weights of the neural network.
-		"""
-		json_string = self.model.to_json()
-		open(json_path, 'w').write(json_string)
-		self.model.save_weights(h5_path, overwrite=True)
-		
-	def loadRanker(self, json_path, h5_path):
-		"""
-		Loads the ranker's neural model.
-	
-		@param json_path: Path of JSON file from which to load the structure of the neural network.
-		@param h5_path: Path of H5 file from which to load the weights of the neural network.
-		"""
-		model = model_from_json(open(json_path).read())
-		model.load_weights(h5_path)
-		model.compile(loss='mean_squared_error', optimizer='adam')
-		self.model = model
-		return model
-		
-	def trainRanker(self, victor_corpus, epochs, batch_size):
-		features = self.fe.calculateFeatures(victor_corpus)
-		Xtr = []
-		Ytr = []
-		f = open(victor_corpus)
-		c = -1
-		for line in f:
-			data = line.strip().split('\t')
-			cands = [cand.strip().split(':')[1] for cand in data[3:]]
-			indexes = [int(cand.strip().split(':')[0]) for cand in data[3:]]
-			featmap = {}
-			for cand in cands:
-				c += 1
-				featmap[cand] = features[c]
-			for i in range(0, len(cands)-1):
-				for j in range(i+1, len(cands)):
-					indexi = indexes[i]
-					indexj = indexes[j]
-					indexdiffji = indexj-indexi
-					indexdiffij = indexi-indexj
-					positive = featmap[cands[i]]
-					negative = featmap[cands[j]]
-					v1 = np.concatenate((positive,negative))
-					v2 = np.concatenate((negative,positive))
-					Xtr.append(v1)
-					Xtr.append(v2)
-					Ytr.append(indexdiffji)
-					Ytr.append(indexdiffij)
-		f.close()
-		Xtr = np.array(Xtr)
-		Ytr = np.array(Ytr)
-		self.model.fit(Xtr, Ytr, nb_epoch=epochs, batch_size=batch_size, verbose=0)
-		
-	def getRankings(self, victor_corpus):
-		"""
-		Ranks candidates using a neural ranker.
-		Candidates are ranked according to their simplicity score, which is calculated as the sum of the simplicity difference between a given candidate and the remainder.
-	
-		@param victor_corpus: Path to a testing corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@return: A list of ranked candidates for each instance in the VICTOR corpus, from simplest to most complex.
-		"""
-		#If feature values are not available, then estimate them:
-		features = self.fe.calculateFeatures(victor_corpus)
-		
-		#Read feature values for each candidate in victor corpus:
-		ranks = []
-		c = -1
-		f = open(victor_corpus)
-		index = 0
-		for l in f:
-			#Get all substitutions in ranking instance:
-			line = l.strip().split('\t')
-			cands = [cand.strip().split(':')[1].strip() for cand in line[3:]]
-			
-			#Estimate feature and candidate maps:
-			featmap = {}
-			scoremap = {}
-			for cand in cands:
-				c += 1
-				featmap[cand] = features[c]
-				scoremap[cand] = 0.0
-			
-			#Calculate simplicity differences between candidates and update scores:
-			for i in range(0, len(cands)-1):
-				cand1 = cands[i]
-				for j in range(i+1, len(cands)):
-					cand2 = cands[j]
-					posneg = np.concatenate((featmap[cand1], featmap[cand2]))
-					probs = self.model.predict(np.array([posneg]))
-					score = probs[0]
-					scoremap[cand1] += score
-					negpos = np.concatenate((featmap[cand2], featmap[cand1]))
-					probs = self.model.predict(np.array([negpos]))
-					score = probs[0]
-					scoremap[cand1] -= score
-					
-			#Rank candidates according to score:
-			rank = sorted(list(scoremap.keys()), key=scoremap.__getitem__, reverse=True)
-			ranks.append(rank)
-		return ranks
-
-class GlavasRanker:
-
-	def __init__(self, fe):
-		"""
-		Creates an instance of the GlavasRanker class.
-		This ranker was introduced by "Simplifying Lexical Simplification: Do We Need Simplified Corpora?, Proceedings of the 2015 ACL, 2015".
-	
-		@param fe: A configured FeatureEstimator object.
-		"""
-		
-		self.fe = fe
-		self.feature_values = None
-		
-	def getRankings(self, victor_corpus):
-		"""
-		Ranks candidates with respect to a set of features.
-		Candidates are ranked according to their average ranking position obtained with all feature values.
-	
-		@param victor_corpus: Path to a testing corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@return: A list of ranked candidates for each instance in the VICTOR corpus, from simplest to most complex.
-		"""
-		
-		#If feature values are not available, then estimate them:
-		self.feature_values = self.fe.calculateFeatures(victor_corpus)
-		
-		#Create object for results:
-		result = []
-		
-		#Read feature values for each candidate in victor corpus:
-		f = open(victor_corpus)
-		index = 0
-		for line in f:
-			#Get all substitutions in ranking instance:
-			data = line.strip().split('\t')
-			substitutions = data[3:len(data)]
-			
-			#Get instance's feature values:
-			instance_features = []
-			for substitution in substitutions:
-				instance_features.append(self.feature_values[index])
-				index += 1
-			
-			rankings = {}
-			for i in range(0, len(self.fe.identifiers)):
-				#Create dictionary of substitution to feature value:
-				scores = {}
-				for j in range(0, len(substitutions)):
-					substitution = substitutions[j]
-					word = substitution.strip().split(':')[1].strip()
-					scores[word] = instance_features[j][i]
-				
-				#Check if feature is simplicity or complexity measure:
-				rev = False
-				if self.fe.identifiers[i][1]=='Simplicity':
-					rev = True
-				
-				#Sort substitutions:
-				words = list(scores.keys())
-				sorted_substitutions = sorted(words, key=scores.__getitem__, reverse=rev)
-				
-				#Update rankings:
-				for j in range(0, len(sorted_substitutions)):
-					word = sorted_substitutions[j]
-					if word in rankings:
-						rankings[word] += j
-					else:
-						rankings[word] = j
-		
-			#Produce final rankings:
-			final_rankings = sorted(list(rankings.keys()), key=rankings.__getitem__)
-		
-			#Add them to result:
-			result.append(final_rankings)
-		f.close()
-		
-		#Return result:
-		return result
-		
-	def size(self):
-		"""
-		Returns the number of features available for a given MetricRanker.
-		
-		@return: The number of features in the MetricRanker's FeatureEstimator object.
-		"""
-		return len(self.fe.identifiers)
-
-class SVMBoundaryRanker:
-
-	def __init__(self, fe):
-		"""
-		Creates an instance of the SVMBoundaryRanker class.
-		This simplifier was introduced by "LEXenstein: A Framework for Lexical Simplification, Proceedings of the 2015 ACL, 2015".
-	
-		@param fe: A configured FeatureEstimator object.
-		"""
-		
-		self.fe = fe
-		self.classifier = None
-		self.feature_selector = None
-		
-	def trainRanker(self, victor_corpus, positive_range, C, kernel, degree, gamma, coef0, k='all'):
-		"""
-		Trains a SVM Boundary Ranker according to the parameters provided.
-	
-		@param victor_corpus: Path to a training corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param positive_range: Maximum rank to which label 1 is assigned in the binary classification setup.
-		Recommended value: 1.
-		@param C: Penalty parameter.
-		Recommended values: 0.1, 1, 10.
-		@param kernel: Kernel function to be used.
-		Supported values: 'linear', 'poly', 'rbf', 'sigmoid'.
-		@param degree: Degree of the polynomial kernel.
-		Recommended values: 2, 3.
-		@param gamma: Kernel coefficient.
-		Recommended values: 0.01, 0.1, 1.
-		@param coef0: Independent term value.
-		Recommended values: 0, 1.
-		@param k: Number of best features to be selected through univariate feature selection.
-		If k='all', then no feature selection is performed.
-		"""
-	
-		#Read victor corpus:
-		data = []
-		f = open(victor_corpus)
-		for line in f:
-			data.append(line.strip().split('\t'))
-		f.close()
-		
-		#Create matrixes:
-		X = self.fe.calculateFeatures(victor_corpus)
-		Y = self.generateLabels(data, positive_range)
-		
-		#Select features:
-		self.feature_selector = SelectKBest(f_classif, k=k)
-		self.feature_selector.fit(X, Y)
-		X = self.feature_selector.transform(X)
-	
-		#Train classifier:
-		self.classifier = SVC(C=C, kernel=kernel, degree=degree, gamma=gamma, coef0=coef0)
-		self.classifier.fit(X, Y)
-		
-	def trainRankerWithCrossValidation(self, victor_corpus, positive_range, folds, test_size, Cs=[0.1, 1, 10], kernels=['linear', 'rbf', 'poly', 'sigmoid'], degrees=[2], gammas=[0.01, 0.1, 1], coef0s=[0, 1], k='all'):
-		"""
-		Trains a SVM Boundary Ranker while maximizing hyper-parameters through cross-validation.
-		It uses the TRank-at-1 as an optimization metric.
-	
-		@param victor_corpus: Path to a training corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param positive_range: Maximum rank to which label 1 is assigned in the binary classification setup.
-		Recommended value: 1.
-		@param folds: Number of folds to be used in cross-validation.
-		@param test_size: Percentage of the dataset to be used in testing.
-		Recommended values: 0.2, 0.25, 0.33
-		@param Cs: Penalty parameters.
-		Recommended values: 0.1, 1, 10.
-		@param kernels: Kernel functions to be used.
-		Supported values: 'linear', 'poly', 'rbf', 'sigmoid'.
-		@param degrees: Degrees of the polynomial kernel.
-		Recommended values: 2, 3.
-		@param gammas: Kernel coefficients.
-		Recommended values: 0.01, 0.1, 1.
-		@param coef0s: Independent term values.
-		Recommended values: 0, 1.
-		@param k: Number of best features to be selected through univariate feature selection.
-		If k='all', then no feature selection is performed.
-		"""
-		#Read victor corpus:
-		data = []
-		f = open(victor_corpus)
-		for line in f:
-			data.append(line.strip().split('\t'))
-		f.close()
-		
-		#Create matrixes:
-		X = self.fe.calculateFeatures(victor_corpus)
-		Y = self.generateLabels(data, positive_range)
-		
-		#Select features:
-		self.feature_selector = SelectKBest(f_classif, k=k)
-		self.feature_selector.fit(X, Y)
-		X = self.feature_selector.transform(X)
-		
-		#Extract ranking problems:
-		firsts = []
-		candidates = []
-		Xsets = []
-		Ysets = []
-		index = -1
-		for line in data:
-			fs = set([])
-			cs = []
-			Xs = []
-			Ys = []
-			for cand in line[3:len(line)]:
-				index += 1
-				candd = cand.split(':')
-				rank = candd[0].strip()
-				word = candd[1].strip()
-				
-				cs.append(word)
-				Xs.append(X[index])
-				Ys.append(Y[index])
-				if rank=='1':
-					fs.add(word)
-			firsts.append(fs)
-			candidates.append(cs)
-			Xsets.append(Xs)
-			Ysets.append(Ys)
-		
-		#Create data splits:
-		datasets = []
-		for i in range(0, folds):
-			Xtr, Xte, Ytr, Yte, Ftr, Fte, Ctr, Cte = train_test_split(Xsets, Ysets, firsts, candidates, test_size=test_size, random_state=i)
-			Xtra = []
-			for matrix in Xtr:
-				Xtra += matrix
-			Xtea = []
-			for matrix in Xte:
-				Xtea += matrix
-			Ytra = []
-			for matrix in Ytr:
-				Ytra += matrix
-			datasets.append((Xtra, Ytra, Xte, Xtea, Fte, Cte))
-		
-		#Get classifier with best parameters for the RBF kernel:
-		max_score = -1.0
-		parameters = ()
-		if 'rbf' in kernels:
-			for C in Cs:
-				for g in gammas:
-					sum = 0.0
-					sum_total = 0
-					for dataset in datasets:
-						Xtra = dataset[0]
-						Ytra = dataset[1]
-						Xte = dataset[2]
-						Xtea = dataset[3]
-						Fte = dataset[4]
-						Cte = dataset[5]
-
-						classifier = SVC(kernel='rbf', C=C, gamma=g)
-						try:
-							classifier.fit(Xtra, Ytra)
-							t1 = self.getCrossValidationScore(classifier, Xtea, Xte, Fte, Cte)
-							sum += t1
-							sum_total += 1
-						except Exception:
-							pass
-					sum_total = max(1, sum_total)
-					if (sum/sum_total)>max_score:
-						max_score = sum
-						parameters = (C, 'rbf', 1, g, 0)
-					
-		#Get classifier with best parameters for the Polynomial kernel:
-		if 'poly' in kernels:
-			for C in Cs:
-				for d in degrees:
-					for g in gammas:
-						for c in coef0s:
-							sum = 0.0
-							sum_total = 0
-							for dataset in datasets:
-								Xtra = dataset[0]
-								Ytra = dataset[1]
-								Xte = dataset[2]
-								Xtea = dataset[3]
-								Fte = dataset[4]
-								Cte = dataset[5]
-
-								classifier = SVC(kernel='poly', C=C, degree=d, gamma=g, coef0=c)
-								try:
-									classifier.fit(Xtra, Ytra)
-									t1 = self.getCrossValidationScore(classifier, Xtea, Xte, Fte, Cte)
-									sum += t1
-									sum_total += 1
-								except Exception:
-									pass
-							sum_total = max(1, sum_total)
-							if (sum/sum_total)>max_score:
-								max_score = sum
-								parameters = (C, 'poly', d, g, c)
-								
-		#Get classifier with best parameters for the Sigmoid kernel:
-		if 'sigmoid' in kernels:
-			for C in Cs:
-				for g in gammas:
-					for c in coef0s:
-						sum = 0.0
-						sum_total = 0
-						for dataset in datasets:
-							Xtra = dataset[0]
-							Ytra = dataset[1]
-							Xte = dataset[2]
-							Xtea = dataset[3]
-							Fte = dataset[4]
-							Cte = dataset[5]
-
-							classifier = SVC(kernel='sigmoid', C=C, gamma=g, coef0=c)
-							try:
-								classifier.fit(Xtra, Ytra)
-								t1 = self.getCrossValidationScore(classifier, Xtea, Xte, Fte, Cte)
-								sum += t1
-								sum_total += 1
-							except Exception:
-								pass
-						sum_total = max(1, sum_total)
-						if (sum/sum_total)>max_score:
-							max_score = sum
-							parameters = (C, 'sigmoid', d, g, c)
-							
-		#Get classifier with best parameters for the Linear kernel:
-		if 'linear' in kernels:
-			for C in Cs:
-				sum = 0.0
-				sum_total = 0
-				for dataset in datasets:
-					Xtra = dataset[0]
-					Ytra = dataset[1]
-					Xte = dataset[2]
-					Xtea = dataset[3]
-					Fte = dataset[4]
-					Cte = dataset[5]
-
-					classifier = SVC(kernel='linear', C=C, gamma=g, coef0=c)
-					try:
-						classifier.fit(Xtra, Ytra)
-						t1 = self.getCrossValidationScore(classifier, Xtea, Xte, Fte, Cte)
-						sum += t1
-						sum_total += 1
-					except Exception:
-						pass
-				sum_total = max(1, sum_total)
-				if (sum/sum_total)>max_score:
-					max_score = sum
-					parameters = (C, 'linear', d, g, c)
-		self.classifier = SVC(C=parameters[0], kernel=parameters[1], degree=parameters[2], gamma=parameters[3], coef0=parameters[4])
-		self.classifier.fit(X, Y)
-	
-	def getCrossValidationScore(self, classifier, Xtea, Xte, firsts, candidates):
-		distances = classifier.decision_function(Xtea)
-		index = -1
-		corrects = 0
-		total = 0
-		for i in range(0, len(Xte)):
-			xset = Xte[i]
-			maxd = -999999
-			for j in range(0, len(xset)):
-				index += 1
-				distance = distances[index]
-				if distance>maxd:
-					maxd = distance
-					maxc = candidates[i][j]
-			if maxc in firsts[i]:
-				corrects += 1
-			total += 1
-		return float(corrects)/float(total)
-	
-	def getRankings(self, victor_corpus):
-		"""
-		Ranks candidates with respect to their simplicity.
-		Requires for the trainRanker function to be previously called so that a model can be trained.
-	
-		@param victor_corpus: Path to a testing corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@return: A list of ranked candidates for each instance in the VICTOR corpus, from simplest to most complex.
-		"""
-		
-		#Read victor corpus:
-		data = []
-		f = open(victor_corpus)
-		for line in f:
-			data.append(line.strip().split('\t'))
-		f.close()
-		
-		#Create matrixes:
-		X = self.fe.calculateFeatures(victor_corpus)
-		
-		#Select features:
-		X = self.feature_selector.transform(X)
-		
-		#Get boundary distances:
-		distances = self.classifier.decision_function(X)
-		
-		#Get rankings:
-		result = []
-		index = 0
-		for i in range(0, len(data)):
-			line = data[i]
-			scores = {}
-			for subst in line[3:len(line)]:
-				word = subst.strip().split(':')[1].strip()
-				scores[word] = distances[index]
-				index += 1
-			ranking_data = sorted(list(scores.keys()), key=scores.__getitem__, reverse=True)
-			result.append(ranking_data)
-		
-		#Return rankings:
-		return result
-
-	def generateLabels(self, data, positive_range):
-		Y = []
-		for line in data:
-			max_range = min(int(line[len(line)-1].split(':')[0].strip()), positive_range)
-			for i in range(3, len(line)):
-				rank_index = int(line[i].split(':')[0].strip())
-				if rank_index<=max_range:
-					Y.append(1)
-				else:
-					Y.append(0)
-		return Y
-
-class BottRanker:
-
-	def __init__(self, simple_lm):
-		"""
-		Creates an instance of the BottRanker class.
-		This simplifier was introduced by "Can Spanish Be Simpler? LexSiS: Lexical Simplification for Spanish, Proceedings of the 2012 COLING, 2012".
-	
-		@param simple_lm: Path to a language model built over simple text.
-		For more information on how to create the file, refer to the LEXenstein Manual.
-		"""
-		
-		self.simple_lm = kenlm.LanguageModel(simple_lm)
-		
-	def getRankings(self, victor_corpus, a1=1.0, a2=1.0):
-		"""
-		Ranks candidates with respect to their simplicity.
-	
-		@param victor_corpus: Path to a testing corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param a1: Weight of the word's length score.
-		@param a2: Weight of the word's frequency score.
-		@return: A list of ranked candidates for each instance in the VICTOR corpus, from simplest to most complex.
-		"""
-		#Create object for results:
-		result = []
-		
-		#Read feature values for each candidate in victor corpus:
-		f = open(victor_corpus)
-		for line in f:
-			#Get all substitutions in ranking instance:
-			data = line.strip().split('\t')
-			substitutions = data[3:len(data)]
-			
-			#Create dictionary of substitution to feature value:
-			scores = {}
-			for substitution in substitutions:
-				word = substitution.strip().split(':')[1].strip()
-				scores[word] = self.getCandidateComplexity(word, a1, a2)
-			
-			#Sort substitutions:
-			sorted_substitutions = sorted(list(scores.keys()), key=scores.__getitem__, reverse=False)
-		
-			#Add them to result:
-			result.append(sorted_substitutions)
-		f.close()
-		
-		#Return result:
-		return result
-		
-	def getCandidateComplexity(self, word, a1, a2):
-		ScoreWL = 0
-		if len(word)>4:
-			ScoreWL = math.sqrt(len(word)-4)
-		ScoreFreq = -1*self.simple_lm.score(word, bos=False, eos=False)
-		#ScoreFreq = -1*self.simple_lm.score(word)
-		return a1*ScoreWL + a2*ScoreFreq
-
-class YamamotoRanker:
-
-	def __init__(self, simple_lm, cooc_model):
-		"""
-		Creates an instance of the YamamotoRanker class.
-		This simplifier was introduced by "Selecting Proper Lexical Paraphrase for Children, Proceedings of the 2013 ROCLING, 2013".
-	
-		@param simple_lm: Path to a language model built over simple text.
-		For more information on how to create the file, refer to the LEXenstein Manual.
-		@param cooc_model: Path to a word co-occurrence model.
-		For instructions on how to create the model, please refer to the LEXenstein Manual.
-		"""
-		
-		self.simple_lm = kenlm.LanguageModel(simple_lm)
-		self.cooc_model = self.getModel(cooc_model)
-		
-	def getRankings(self, victor_corpus, a1=1.0, a2=1.0, a3=1.0, a4=1.0, a5=1.0):
-		"""
-		Ranks candidates with respect to their simplicity.
-	
-		@param victor_corpus: Path to a testing corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param a1: Weight of the word's frequency score.
-		@param a2: Weight of the word's sense score.
-		@param a3: Weight of the word's collocational score.
-		@param a4: Weight of the word's log score.
-		@param a5: Weight of the word's trigram score.
-		@return: A list of ranked candidates for each instance in the VICTOR corpus, from simplest to most complex.
-		"""
-		#Create object for results:
-		result = []
-		
-		#Read feature values for each candidate in victor corpus:
-		f = open(victor_corpus)
-		for line in f:
-			#Get all substitutions in ranking instance:
-			data = line.strip().split('\t')
-			sent = data[0].strip()
-			target = data[1].strip()
-			head = int(data[2].strip())
-			substitutions = data[3:len(data)]
-			
-			#Create dictionary of substitution to feature value:
-			scores = {}
-			for substitution in substitutions:
-				word = substitution.strip().split(':')[1].strip()
-				scores[word] = self.getCandidateScore(sent, target, head, word, a1, a2, a3, a4, a5)
-			
-			#Sort substitutions:
-			sorted_substitutions = sorted(list(scores.keys()), key=scores.__getitem__, reverse=True)
-		
-			#Add them to result:
-			result.append(sorted_substitutions)
-		f.close()
-		
-		#Return result:
-		return result
-		
-	def getModel(self, path):
-		result = {}
-		f = open(path)
-		for line in f:
-			data = line.strip().split('\t')
-			target = data[0].strip()
-			coocs = data[1:len(data)]
-			result[target] = {}
-			for cooc in coocs:
-				coocd = cooc.strip().split(':')
-				word = coocd[0].strip()
-				count = int(coocd[1].strip())
-				result[target][word] = count
-		return result
-		
-	def getCandidateScore(self, sent, target, head, word, a1, a2, a3, a4, a5):
-		Fcorpus = a1*self.simple_lm.score(word, bos=False, eos=False)
-		#Fcorpus = a1*self.simple_lm.score(word)
-		Sense = a2*self.getSenseScore(word, target)
-		Cooc = a3*self.getCoocScore(word, sent)
-		Log = a4*self.getLogScore(Cooc, sent, word)
-		Trigram = a5*self.getTrigramScore(sent, head, word)
-		
-		score = Fcorpus+Sense+Cooc+Log+Trigram
-		return score
-	
-	def getTrigramScore(self, sent, head, word):
-		tokens = ['', ''] + sent.strip().split(' ') + ['', '']
-		h = head + 2
-		t1 = tokens[h-2] + ' ' + tokens[h-1] + ' ' + word
-		t2 = tokens[h-1] + ' ' + word + ' ' + tokens[h+1]
-		t3 = word + ' ' + tokens[h+1] + ' ' + tokens[h+2]
-		bos = False
-		eos = False
-		if tokens[h-1]=='':
-			bos = True
-		if tokens[h+1]=='':
-			eos = True
-		result = self.simple_lm.score(t1, bos=bos, eos=eos)+self.simple_lm.score(t2, bos=bos, eos=eos)+self.simple_lm.score(t3, bos=bos, eos=eos)
-		#result = self.simple_lm.score(t1)+self.simple_lm.score(t2)+self.simple_lm.score(t3)
-		return result
-	
-	def getLogScore(self, Cooc, sent, word):
-		dividend = Cooc
-		divisor = self.simple_lm.score(word, bos=False, eos=False)*self.simple_lm.score(sent, bos=True, eos=True)
-		#divisor = self.simple_lm.score(word)*self.simple_lm.score(sent)
-		if divisor==0:
-			return 0
-		else:
-			result = 0
-			try:
-				result = math.log(dividend/divisor)
-			except ValueError:
-				result = 0
-			return result
-		
-	def getCoocScore(self, word, sent):
-		tokens = sent.strip().split(' ')
-		if word not in self.cooc_model:
-			return 0
-		else:
-			result = 0
-			for token in tokens:
-				if token in self.cooc_model[word]:
-					result += self.cooc_model[word][token]
-			return result
-		
-	def getSenseScore(self, word, target):
-		candidate_sense = None
-		try:
-			candidate_sense = wn.synsets(word)[0]
-		except Exception:
-			candidate_sense = None
-		target_sense = None
-		try:
-			target_sense = wn.synsets(target)[0]
-		except Exception:
-			target_sense = None
-		result = 999999
-		if candidate_sense and target_sense:
-			result = candidate_sense.shortest_path_distance(target_sense)
-		if not result:
-			result = 999999
-		return result
-
-class BiranRanker:
-
-	def __init__(self, complex_lm, simple_lm):
-		"""
-		Creates an instance of the BiranRanker class.
-		This simplifier was introduced by "Putting it Simply: a Context-Aware Approach to Lexical Simplification, Proceedings of the 2012 ACL, 2012".
-	
-		@param complex_lm: Path to a language model built over complex text.
-		For more information on how to create the file, refer to the LEXenstein Manual.
-		@param simple_lm: Path to a language model built over simple text.
-		For more information on how to create the file, refer to the LEXenstein Manual.
-		"""
-		
-		self.complex_lm = kenlm.LanguageModel(complex_lm)
-		self.simple_lm = kenlm.LanguageModel(simple_lm)
-		
-	def getRankings(self, victor_corpus):
-		"""
-		Ranks candidates with respect to their simplicity.
-	
-		@param victor_corpus: Path to a testing corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@return: A list of ranked candidates for each instance in the VICTOR corpus, from simplest to most complex.
-		"""
-		#Create object for results:
-		result = []
-		
-		#Read feature values for each candidate in victor corpus:
-		f = open(victor_corpus)
-		for line in f:
-			#Get all substitutions in ranking instance:
-			data = line.strip().split('\t')
-			substitutions = data[3:len(data)]
-			
-			#Create dictionary of substitution to feature value:
-			scores = {}
-			for substitution in substitutions:
-				word = substitution.strip().split(':')[1].strip()
-				scores[word] = self.getCandidateComplexity(word)
-			
-			#Sort substitutions:
-			sorted_substitutions = sorted(list(scores.keys()), key=scores.__getitem__, reverse=False)
-		
-			#Add them to result:
-			result.append(sorted_substitutions)
-		f.close()
-		
-		#Return result:
-		return result
-		
-	def getCandidateComplexity(self, word):
-		C = (self.complex_lm.score(word, bos=False, eos=False))/(self.simple_lm.score(word, bos=False, eos=False))
-		#C = (self.complex_lm.score(word))/(self.simple_lm.score(word))
-		L = float(len(word))
-		return C*L
-
-class BoundaryRanker:
-
-	def __init__(self, fe):
-		"""
-		Creates an instance of the BoundaryRanker class.
-		This simplifier was introduced by "LEXenstein: A Framework for Lexical Simplification, Proceedings of the 2015 ACL, 2015".
-	
-		@param fe: A configured FeatureEstimator object.
-		"""
-		
-		self.fe = fe
-		self.classifier = None
-		self.feature_selector = None
-		
-	def trainRanker(self, victor_corpus, positive_range, loss, penalty, alpha, l1_ratio, epsilon, k='all'):
-		"""
-		Trains a Boundary Ranker according to the parameters provided.
-	
-		@param victor_corpus: Path to a training corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param positive_range: Maximum rank to which label 1 is assigned in the binary classification setup.
-		Recommended value: 1.
-		@param loss: Loss function to be used.
-		Values available: hinge, log, modified_huber, squared_hinge, perceptron.
-		@param penalty: Regularization term to be used.
-		Values available: l2, l1, elasticnet.
-		@param alpha: Constant that multiplies the regularization term.
-		Recommended values: 0.0001, 0.001, 0.01, 0.1
-		@param l1_ratio: Elastic net mixing parameter.
-		Recommended values: 0.05, 0.10, 0.15
-		@param epsilon: Acceptable error margin.
-		Recommended values: 0.0001, 0.001
-		@param k: Number of best features to be selected through univariate feature selection.
-		If k='all', then no feature selection is performed.
-		"""
-	
-		#Read victor corpus:
-		data = []
-		f = open(victor_corpus)
-		for line in f:
-			data.append(line.strip().split('\t'))
-		f.close()
-		
-		#Create matrixes:
-		X = self.fe.calculateFeatures(victor_corpus)
-		Y = self.generateLabels(data, positive_range)
-		
-		#Select features:
-		self.feature_selector = SelectKBest(f_classif, k=k)
-		self.feature_selector.fit(X, Y)
-		X = self.feature_selector.transform(X)
-	
-		#Train classifier:
-		self.classifier = linear_model.SGDClassifier(loss=loss, penalty=penalty, alpha=alpha, l1_ratio=l1_ratio, epsilon=epsilon)
-		self.classifier.fit(X, Y)
-		
-	def trainRankerWithCrossValidation(self, victor_corpus, positive_range, folds, test_size, losses=['hinge', 'modified_huber'], penalties=['elasticnet'], alphas=[0.0001, 0.001, 0.01], l1_ratios=[0.0, 0.15, 0.25, 0.5, 0.75, 1.0], k='all'):
-		"""
-		Trains a Boundary Ranker while maximizing hyper-parameters through cross-validation.
-		It uses the TRank-at-1 as an optimization metric.
-	
-		@param victor_corpus: Path to a training corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param positive_range: Maximum rank to which label 1 is assigned in the binary classification setup.
-		Recommended value: 1.
-		@param folds: Number of folds to be used in cross-validation.
-		@param test_size: Percentage of the dataset to be used in testing.
-		Recommended values: 0.2, 0.25, 0.33
-		@param losses: Loss functions to be considered.
-		Values available: hinge, log, modified_huber, squared_hinge, perceptron.
-		@param penalties: Regularization terms to be considered.
-		Values available: l2, l1, elasticnet.
-		@param alphas: Constants that multiplies the regularization term.
-		Recommended values: 0.0001, 0.001, 0.01, 0.1
-		@param l1_ratios: Elastic net mixing parameters.
-		Recommended values: 0.05, 0.10, 0.15
-		@param k: Number of best features to be selected through univariate feature selection.
-		If k='all', then no feature selection is performed.
-		"""
-		#Read victor corpus:
-		data = []
-		f = open(victor_corpus)
-		for line in f:
-			data.append(line.strip().split('\t'))
-		f.close()
-		
-		#Create matrixes:
-		X = self.fe.calculateFeatures(victor_corpus)
-		Y = self.generateLabels(data, positive_range)
-		
-		#Select features:
-		self.feature_selector = SelectKBest(f_classif, k=k)
-		self.feature_selector.fit(X, Y)
-		X = self.feature_selector.transform(X)
-		
-		#Extract ranking problems:
-		firsts = []
-		candidates = []
-		Xsets = []
-		Ysets = []
-		index = -1
-		for line in data:
-			fs = set([])
-			cs = []
-			Xs = []
-			Ys = []
-			for cand in line[3:len(line)]:
-				index += 1
-				candd = cand.split(':')
-				rank = candd[0].strip()
-				word = candd[1].strip()
-				
-				cs.append(word)
-				Xs.append(X[index])
-				Ys.append(Y[index])
-				if rank=='1':
-					fs.add(word)
-			firsts.append(fs)
-			candidates.append(cs)
-			Xsets.append(Xs)
-			Ysets.append(Ys)
-		
-		#Create data splits:
-		datasets = []
-		for i in range(0, folds):
-			Xtr, Xte, Ytr, Yte, Ftr, Fte, Ctr, Cte = train_test_split(Xsets, Ysets, firsts, candidates, test_size=test_size, random_state=i)
-			Xtra = []
-			for matrix in Xtr:
-				Xtra += matrix
-			Xtea = []
-			for matrix in Xte:
-				Xtea += matrix
-			Ytra = []
-			for matrix in Ytr:
-				Ytra += matrix
-			datasets.append((Xtra, Ytra, Xte, Xtea, Fte, Cte))
-		
-		#Get classifier with best parameters:
-		max_score = -1.0
-		parameters = ()
-		for l in losses:
-			for p in penalties:
-				for a in alphas:
-					for r in l1_ratios:
-						sum = 0.0
-						sum_total = 0
-						for dataset in datasets:
-							Xtra = dataset[0]
-							Ytra = dataset[1]
-							Xte = dataset[2]
-							Xtea = dataset[3]
-							Fte = dataset[4]
-							Cte = dataset[5]
-
-							classifier = linear_model.SGDClassifier(loss=l, penalty=p, alpha=a, l1_ratio=r, epsilon=0.0001)
-							try:
-								classifier.fit(Xtra, Ytra)
-								t1 = self.getCrossValidationScore(classifier, Xtea, Xte, Fte, Cte)
-								sum += t1
-								sum_total += 1
-							except Exception:
-								pass
-						sum_total = max(1, sum_total)
-						if (sum/sum_total)>max_score:
-							max_score = sum
-							parameters = (l, p, a, r)
-		self.classifier = linear_model.SGDClassifier(loss=parameters[0], penalty=parameters[1], alpha=parameters[2], l1_ratio=parameters[3], epsilon=0.0001)
-		self.classifier.fit(X, Y)
-	
-	def getCrossValidationScore(self, classifier, Xtea, Xte, firsts, candidates):
-		distances = classifier.decision_function(Xtea)
-		index = -1
-		corrects = 0
-		total = 0
-		for i in range(0, len(Xte)):
-			xset = Xte[i]
-			maxd = -999999
-			for j in range(0, len(xset)):
-				index += 1
-				distance = distances[index]
-				if distance>maxd:
-					maxd = distance
-					maxc = candidates[i][j]
-			if maxc in firsts[i]:
-				corrects += 1
-			total += 1
-		return float(corrects)/float(total)
-	
-	def getRankings(self, victor_corpus):
-		"""
-		Ranks candidates with respect to their simplicity.
-		Requires for the trainRanker function to be previously called so that a model can be trained.
-	
-		@param victor_corpus: Path to a testing corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@return: A list of ranked candidates for each instance in the VICTOR corpus, from simplest to most complex.
-		"""
-		
-		#Read victor corpus:
-		data = []
-		f = open(victor_corpus)
-		for line in f:
-			data.append(line.strip().split('\t'))
-		f.close()
-		
-		#Create matrixes:
-		X = self.fe.calculateFeatures(victor_corpus)
-		
-		#Select features:
-		X = self.feature_selector.transform(X)
-		
-		#Get boundary distances:
-		distances = self.classifier.decision_function(X)
-		
-		#Get rankings:
-		result = []
-		index = 0
-		for i in range(0, len(data)):
-			line = data[i]
-			scores = {}
-			for subst in line[3:len(line)]:
-				word = subst.strip().split(':')[1].strip()
-				scores[word] = distances[index]
-				index += 1
-			ranking_data = sorted(list(scores.keys()), key=scores.__getitem__, reverse=True)
-			result.append(ranking_data)
-		
-		#Return rankings:
-		return result
-
-	def generateLabels(self, data, positive_range):
-		Y = []
-		for line in data:
-			max_range = min(int(line[len(line)-1].split(':')[0].strip()), positive_range)
-			for i in range(3, len(line)):
-				rank_index = int(line[i].split(':')[0].strip())
-				if rank_index<=max_range:
-					Y.append(1)
-				else:
-					Y.append(0)
-		return Y
-		
-class SVMRanker:
-
-	def __init__(self, fe, svmrank_path):
-		"""
-		Creates an instance of the SVMRanker class.
-		This ranker was introduced in Lexical Simplification by "Learning a Lexical Simplifier Using Wikipedia, Proceedings of the 2014 ACL, 2014".
-	
-		@param fe: A configured FeatureEstimator object.
-		@param svmrank_path: Path to SVM-Rank's root installation folder.
-		"""
-		
-		self.fe = fe
-		self.svmrank = svmrank_path
-		if not self.svmrank.endswith('/'):
-			self.svmrank += '/'
-			
-	def trainRankerWithCrossValidation(self, victor_corpus, folds, test_size, temp_folder, temp_id, Cs=['0.01', '0.001'], epsilons=[0.0001, 0.001], kernels=['0', '2', '3']):
-		"""
-		Trains a SVM Ranker while maximizing hyper-parameters through cross-validation.
-		It uses the TRank-at-1 as an optimization metric.
-	
-		@param victor_corpus: Path to a training corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param folds: Number of folds to be used in cross-validation.
-		@param test_size: Percentage of the dataset to be used in testing.
-		Recommended values: 0.2, 0.25, 0.33
-		@param temp_folder: Folder in which to save temporary files.
-		@param temp_id: ID to be used in the identification of temporary files.
-		@param Cs: Trade-offs between training error and margin.
-		Recommended values: 0.001, 0.01
-		@param epsilons: Acceptable error margins.
-		Recommended values: 0.00001, 0.0001
-		@param kernels: ID for the kernels to be considered.
-		Kernels available:
-		0 - Linear
-		1 - Polynomial
-		2 - Radial Basis Function
-		3 - Sigmoid
-		"""
-		#Read victor corpus:
-		data = []
-		f = open(victor_corpus)
-		for line in f:
-			data.append(line.strip().split('\t'))
-		f.close()
-		
-		#Create matrixes:
-		X = self.fe.calculateFeatures(victor_corpus)
-		X = normalize(X, axis=0)
-		#X = self.toSVMRankFormat(data, X)
-		
-		#Extract ranking problems:
-		firsts = []
-		candidates = []
-		Xsets = []
-		index = -1
-		for line in data:
-			fs = set([])
-			cs = []
-			Xs = []
-			for cand in line[3:len(line)]:
-				index += 1
-				candd = cand.split(':')
-				rank = candd[0].strip()
-				word = candd[1].strip()
-				
-				cs.append(word)
-				Xs.append(X[index])
-				if rank=='1':
-					fs.add(word)
-			firsts.append(fs)
-			candidates.append(cs)
-			Xsets.append(Xs)
-			
-		#Create data splits:
-		datasets = []
-		for i in range(0, folds):
-			Xtr, Xte, Ftr, Fte, Ctr, Cte, Dtr, Dte = train_test_split(Xsets, firsts, candidates, data, test_size=test_size, random_state=i)
-			Xtra = []
-			for matrix in Xtr:
-				Xtra += matrix
-			Xtra_path = temp_folder + '/' + str(temp_id) + '_' + str(i) + '_training_features_file.txt'
-			self.fromMatrixToFile(Dtr, Xtra, Xtra_path)
-			
-			Xtea = []
-			for matrix in Xte:
-				Xtea += matrix
-			Xtea_path = temp_folder + '/' + str(temp_id) + '_' + str(i) + '_testing_features_file.txt'
-			self.fromMatrixToFile(Dte, Xtea, Xtea_path)
-			datasets.append((Xtra_path, Xte, Xtea_path, Fte, Cte))
-			
-		#Get classifier with best parameters:
-		max_score = -1.0
-		parameters = ()
-		for C in Cs:
-			for k in kernels:
-				for e in epsilons:
-					sum = 0.0
-					sum_total = 0
-					for dataset in datasets:
-						Xtra_path = dataset[0]
-						Xte = dataset[1]
-						Xtea_path = dataset[2]
-						Fte = dataset[3]
-						Cte = dataset[4]
-
-						model_path = temp_folder + '/' + str(temp_id) + '_' + str(i) + '_model_file.txt'
-						scores_path = temp_folder + '/' + str(temp_id) + '_' + str(i) + '_scores_file.txt'
-						self.getTrainingModel(Xtra_path, C, e, k, model_path)
-						self.getScoresFile(Xtea_path, model_path, scores_path)
-						
-						t1 = self.getCrossValidationScore(scores_path, Xte, Fte, Cte)
-						sum += t1
-						sum_total += 1
-					sum_total = max(1, sum_total)
-					if (sum/sum_total)>max_score:
-						max_score = sum
-						parameters = (C, k, e)
-		return parameters
-		
-	def getCrossValidationScore(self, scores_path, Xte, firsts, candidates):
-		scores = [str(value.strip()) for value in open(scores_path)]
-		index = -1
-		corrects = 0
-		total = 0
-		for i in range(0, len(Xte)):
-			xset = Xte[i]
-			mind = 999999
-			minc = ''
-			for j in range(0, len(xset)):
-				index += 1
-				distance = scores[index]
-				if distance<mind:
-					mind = distance
-					minc = candidates[i][j]
-			if minc in firsts[i]:
-				corrects += 1
-			total += 1
-		return float(corrects)/float(total)
-	
-	def fromMatrixToFile(self, data, X, path):
-		f = open(path, 'w')
-		index = -1
-		for i in range(0, len(data)):
-			inst = data[i]
-			for subst in inst[3:len(inst)]:
-				index += 1
-				rank = subst.strip().split(':')[0].strip()
-				word = subst.strip().split(':')[1].strip()
-				newline = rank + ' qid:' + str(i+1) + ' '
-				feature_values = X[index]
-				for j in range(0, len(feature_values)):
-					newline += str(j+1) + ':' + str(feature_values[j]) + ' '
-				newline += '# ' + word
-				f.write(newline.strip() + '\n')
-		f.close()
-		
-	def toSVMRankFormat(self, data, X):
-		result = []
-		index = 0
-		for i in range(0, len(data)):
-			inst = data[i]
-			for subst in inst[3:len(inst)]:
-				rank = subst.strip().split(':')[0].strip()
-				word = subst.strip().split(':')[1].strip()
-				newline = rank + ' qid:' + str(i+1) + ' '
-				feature_values = X[index]
-				index += 1
-				for j in range(0, len(feature_values)):
-					newline += str(j+1) + ':' + str(feature_values[j]) + ' '
-				newline += '# ' + word
-				result.append(newline.strip())
-		return result
-	
-	def getFeaturesFile(self, victor_corpus, output_file):
-		"""
-		Creates a file containing feature values in SVM-Rank format.
-		Produces the "features_file" parameter for functions getTrainingModel, getScoresFile and getRankings.
-	
-		@param victor_corpus: Path to a corpus in the VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param output_file: Path in which to save the resulting feature values.
-		"""
-		
-		#Read victor corpus:
-		data = []
-		f = open(victor_corpus)
-		for line in f:
-			data.append(line.strip().split('\t'))
-		
-		#Get feature values:
-		features_train = self.fe.calculateFeatures(victor_corpus)
-		features_train = normalize(features_train, axis=0)
-		
-		#Save training file:
-		out = open(output_file, 'w')
-		index = 0
-		for i in range(0, len(data)):
-			inst = data[i]
-			for subst in inst[3:len(inst)]:
-				rank = subst.strip().split(':')[0].strip()
-				word = subst.strip().split(':')[1].strip()
-				newline = rank + ' qid:' + str(i+1) + ' '
-				feature_values = features_train[index]
-				index += 1
-				for j in range(0, len(feature_values)):
-					newline += str(j+1) + ':' + str(feature_values[j]) + ' '
-				newline += '# ' + word + '\n'
-				out.write(newline)
-		out.close()
-	
-	def getTrainingModel(self, features_file, c, epsilon, kernel, output_file):
-		"""
-		Trains an SVM-Rank ranking model.
-		The model produced can be used as the "model_file" parameter of the getScoresFile function.
-
-		@param features_file: Path to features file produced over a training VICTOR corpus.
-		Should be produced by the getFeaturesFile function.
-		@param c: Trade-off between training error and margin.
-		Recommended values: 0.001, 0.01
-		@param epsilon: Acceptable error margin.
-		Recommended values: 0.00001, 0.0001
-		@param kernel: ID for the kernel to be used.
-		Kernels available:
-		0 - Linear
-		1 - Polynomial
-		2 - Radial Basis Function
-		3 - Sigmoid
-		@param output_file: Path in which to save the resulting SVM-Rank model.
-		"""
-		
-		print('Training...')
-		comm = self.svmrank+'svm_rank_learn -c '+str(c)+' -e '+str(epsilon)+' -t '+str(kernel)+' '+features_file+' '+output_file
-		os.system(comm)
-		print('Trained!')
-	
-	def getScoresFile(self, features_file, model_file, output_file):
-		"""
-		Produces ranking scores in SVM-Rank format.
-		The scores file produced can be used as the "scores_file" parameter of the getRankings function.
-	
-		@param features_file: Path to features file produced over a testing VICTOR corpus.
-		Should be produced by the getFeaturesFile function.
-		@param model_file: Path to a trained model file in SVM-Rank format.
-		Should be produced by the getTrainingModel function.
-		@param output_file: Path in which to save the resulting ranking scores in SVM-Rank format.
-		"""
-		
-		print('Scoring...')
-		comm = self.svmrank+'svm_rank_classify '+features_file+' '+model_file+' '+output_file
-		os.system(comm)
-		print('Scored!')
-	
-	def getRankings(self, victor_corpus, features_file, scores_file):
-		"""
-		Produces ranking scores in SVM-Rank format.
-		The scores file produced can be used as the "scores_file" parameter of the getRankings function.
-	
-		@param victor_corpus: Path to a corpus in the VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param features_file: Path to features file produced over a testing VICTOR corpus.
-		Should be produced by the getFeaturesFile function.
-		@param scores_file: Path to a scores file in SVM-Rank format.
-		Should be produced by the getScoresFile function.
-		@return: A list of ranked candidates, from simplest to most complex.
-		"""
-		
-		#Read features file:
-		f = open(features_file)
-		data = []
-		for line in f:
-			data.append(line.strip().split(' '))
-		f.close()
-		
-		#Read scores file:
-		f = open(scores_file)
-		scores = []
-		for line in f:
-			scores.append(float(line.strip()))
-		f.close()
-		
-		#Combine data:
-		ranking_data = {}
-		index = 0
-		for line in data:
-			id = int(line[1].strip().split(':')[1].strip())
-			starti = 0
-			while line[starti]!='#':
-				starti += 1
-			word = ''
-			for i in range(starti+1, len(line)):
-				word += line[i] + ' '
-			word = word.strip()
-			score = scores[index]
-			index += 1
-			if id in ranking_data:
-				ranking_data[id][word] = score
-			else:
-				ranking_data[id] = {word:score}
-		
-		#Get problems:
-		size = 0
-		f = open(victor_corpus)
-		for line in f:
-			size += 1
-		f.close()
-		
-		#Produce rankings:
-		result = []
-		for id in range(1, size+1):
-			if id not in ranking_data:
-				result.append([])
-			else:
-				candidates = list(ranking_data[id].keys())
-				candidates = sorted(candidates, key=ranking_data[id].__getitem__, reverse=False)
-				result.append(candidates)
-			
-		#Return rankings:
-		return result
-	
-class MetricRanker:
-
-	def __init__(self, fe):
-		"""
-		Creates an instance of the MetricRanker class.
-	
-		@param fe: A configured FeatureEstimator object.
-		"""
-		
-		self.fe = fe
-		self.feature_values = None
-		
-	def getRankings(self, victor_corpus, featureIndex):
-		"""
-		Ranks candidates according to a feature's orientation and its values.
-	
-		@param victor_corpus: Path to a testing corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param featureIndex: Index of the feature in the FeatureEstimator to be used as a ranking metric.
-		@return: A list of ranked candidates for each instance in the VICTOR corpus, from simplest to most complex.
-		"""
-		
-		#If feature values are not available, then estimate them:
-		self.feature_values = self.fe.calculateFeatures(victor_corpus)
-		
-		#Create object for results:
-		result = []
-		
-		#Read feature values for each candidate in victor corpus:
-		f = open(victor_corpus)
-		index = 0
-		for line in f:
-			#Get all substitutions in ranking instance:
-			data = line.strip().split('\t')
-			substitutions = data[3:len(data)]
-			
-			#Create dictionary of substitution to feature value:
-			scores = {}
-			for substitution in substitutions:
-				word = substitution.strip().split(':')[1].strip()
-				scores[word] = self.feature_values[index][featureIndex]
-				index += 1
-			
-			#Check if feature is simplicity or complexity measure:
-			rev = False
-			if self.fe.identifiers[featureIndex][1]=='Simplicity':
-				rev = True
-			
-			#Sort substitutions:
-			sorted_substitutions = sorted(list(scores.keys()), key=scores.__getitem__, reverse=rev)
-		
-			#Add them to result:
-			result.append(sorted_substitutions)
-		f.close()
-		
-		#Return result:
-		return result
-		
-	def size(self):
-		"""
-		Returns the number of features available for a given MetricRanker.
-		
-		@return: The number of features in the MetricRanker's FeatureEstimator object.
-		"""
-		return len(self.fe.identifiers)
diff --git a/lexi/lib/lexenstein/selectors.py b/lexi/lib/lexenstein/selectors.py
deleted file mode 100755
index 7654d71..0000000
--- a/lexi/lib/lexenstein/selectors.py
+++ /dev/null
@@ -1,1569 +0,0 @@
-from lexenstein.util import *
-import pywsd
-import gensim
-from scipy.spatial.distance import cosine
-import nltk
-from nltk.tag.stanford import StanfordPOSTagger
-import numpy as np
-import os
-import pickle
-
-class HeuristicSelector:
-
-	def __init__(self, fe):
-		"""
-		Creates an instance of the HeuristicSelector class.
-	
-		@param fe: An instance of the FeatureEstimator class.
-		"""
-		self.fe = fe
-
-	def selectCandidates(self, substitutions, victor_corpus, minimum_proportion):
-		"""
-		Selects which candidates can replace the target complex words in each instance of a VICTOR corpus.
-	
-		@param substitutions: Candidate substitutions to be filtered.
-		It can be in two formats:
-		A dictionary produced by a Substitution Generator linking complex words to a set of candidate substitutions.
-		Example: substitutions['perched'] = {'sat', 'roosted'}
-		A list of candidate substitutions selected for the "victor_corpus" dataset by a Substitution Selector.
-		Example: [['sat', 'roosted'], ['easy', 'uncomplicated']]
-		@param victor_corpus: Path to a corpus in the VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param minimum_proportion: The minimum proportion of features that indicate that a candidate is suitable necessary for it not to be discarded.
-		Must be a number between 0.0 and 1.0.
-		@return: Returns a vector of size N, containing a set of selected substitutions for each instance in the VICTOR corpus.
-		"""
-		selected_substitutions = []
-
-		if isinstance(substitutions, list):
-			return substitutions	
-
-		lexf = open(victor_corpus)
-		for line in lexf:
-			data = line.strip().split('\t')
-			sent = data[0].strip()
-			target = data[1].strip()
-			targetindex = data[2].strip()
-		
-			#Create list of candidate substitutions for target word that starts with target word:
-			candidates = set([])
-			if target in substitutions:
-				candidates = set(substitutions[target])
-			if target in candidates:
-				candidates.remove(target)
-			candidates = [target]+list(candidates)
-			
-			#Create the text input for the feature estimator:
-			input = sent+'\t'+target+'\t'+targetindex
-			for candidate in candidates:
-				input += '\t0:'+candidate
-			input += '\n'
-			
-			#Calculate feature values:
-			features = self.fe.calculateFeatures(input, format='victor', input='text')
-			
-			#Calculate score map:
-			scoremap = {}
-			if len(candidates)==1:
-				selected_substitutions.append([])
-			else:
-				tgtfeatures = features[0]
-				for i, cand in enumerate(candidates[1:]):
-					scoremap[cand] = 0.0
-					candfeatures = features[i+1]
-					for j, identifier in enumerate(self.fe.identifiers):
-						ftype = identifier[1]
-						tgtvalue = tgtfeatures[j]
-						candvalue = candfeatures[j]
-						if ftype=='Complexity':
-							if candvalue<tgtvalue:
-								scoremap[cand] += 1.0
-						elif ftype=='Simplicity':
-							if candvalue>tgtvalue:
-								scoremap[cand] += 1.0
-						else:
-							print('Feature has an invalid Complexity/Simplicity identifier!')
-							
-			#Filter candidates:
-			final_candidates = []
-			total_features = float(len(self.fe.identifiers))
-			for cand in scoremap:
-				proportion = scoremap[cand]/total_features
-				if proportion>=minimum_proportion:
-					final_candidates.append(cand)
-			selected_substitutions.append(final_candidates)
-			
-		lexf.close()
-		return selected_substitutions
-		
-	def toVictorFormat(self, victor_corpus, substitutions, output_path, addTargetAsCandidate=False):
-		"""
-		Saves a set of selected substitutions in a file in VICTOR format.
-	
-		@param victor_corpus: Path to the corpus in the VICTOR format to which the substitutions were selected.
-		@param substitutions: The vector of substitutions selected for the VICTOR corpus.
-		@param output_path: The path in which to save the resulting VICTOR corpus.
-		@param addTargetAsCandidate: If True, adds the target complex word of each instance as a candidate substitution.
-		"""
-		o = open(output_path, 'w')
-		f = open(victor_corpus)
-		for subs in substitutions:
-			data = f.readline().strip().split('\t')
-			sentence = data[0].strip()
-			target = data[1].strip()
-			head = data[2].strip()
-			
-			newline = sentence + '\t' + target + '\t' + head + '\t'
-			for sub in subs:
-				newline += '0:'+sub + '\t'
-			o.write(newline.strip() + '\n')
-		f.close()
-		o.close()
-
-class SVMRankSelector:
-
-	def __init__(self, svm_ranker):
-		"""
-		Creates an instance of the SVMRankSelector class.
-	
-		@param svm_ranker: An instance of the SVMRanker class.
-		"""
-		self.ranker = svm_ranker
-		
-	def trainSelector(self, tr_victor_corpus, tr_features_file, model_file, c, epsilon, kernel):
-		"""
-		Trains a SVM Ranker according to the parameters provided.
-	
-		@param tr_victor_corpus: Path to a training corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param tr_features_file: File in which to save the training features file.
-		@param model_file: File in which to save the trained model.
-		@param c: Trade-off between training error and margin.
-		Recommended values: 0.001, 0.01
-		@param epsilon: Acceptable error margin.
-		Recommended values: 0.00001, 0.0001
-		@param kernel: ID for the kernel to be used.
-		Kernels available:
-		0 - Linear
-		1 - Polynomial
-		2 - Radial Basis Function
-		3 - Sigmoid
-		"""
-		self.ranker.getFeaturesFile(tr_victor_corpus, tr_features_file)
-		self.ranker.getTrainingModel(tr_features_file, c, epsilon, kernel, model_file)
-		self.model = model_file
-	
-	def trainSelectorWithCrossValidation(self, victor_corpus, features_file, model_file, folds, test_size, temp_folder, temp_id, Cs=['0.01', '0.001'], epsilons=[0.0001, 0.001], kernels=['0', '2', '3']):
-		"""
-		Trains a SVM Selector while maximizing hyper-parameters through cross-validation.
-		It uses the TRank-at-1 as an optimization metric.
-	
-		@param victor_corpus: Path to a training corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param features_file: File in which to save the training features file.
-		@param model_file: File in which to save the trained model.
-		@param folds: Number of folds to be used in cross-validation.
-		@param test_size: Percentage of the dataset to be used in testing.
-		Recommended values: 0.2, 0.25, 0.33
-		@param temp_folder: Folder in which to save temporary files.
-		@param temp_id: ID to be used in the identification of temporary files.
-		@param Cs: Trade-offs between training error and margin.
-		Recommended values: 0.001, 0.01
-		@param epsilons: Acceptable error margins.
-		Recommended values: 0.00001, 0.0001
-		@param kernels: ID for the kernels to be considered.
-		Kernels available:
-		0 - Linear
-		1 - Polynomial
-		2 - Radial Basis Function
-		3 - Sigmoid
-		"""
-		parameters = self.ranker.trainRankerWithCrossValidation(victor_corpus, folds, test_size, temp_folder, temp_id, Cs=Cs, epsilons=epsilons, kernels=kernels)
-		self.ranker.getFeaturesFile(victor_corpus, features_file)
-		self.ranker.getTrainingModel(features_file, parameters[0], parameters[2], parameters[1], model_file)
-		self.model = model_file
-		
-	def selectCandidates(self, substitutions, victor_corpus, features_file, scores_file, temp_file, proportion, proportion_type='percentage'):
-		"""
-		Selects which candidates can replace the target complex words in each instance of a VICTOR corpus.
-	
-		@param substitutions: Candidate substitutions to be filtered.
-		It can be in two formats:
-		A dictionary produced by a Substitution Generator linking complex words to a set of candidate substitutions.
-		Example: substitutions['perched'] = {'sat', 'roosted'}
-		A list of candidate substitutions selected for the "victor_corpus" dataset by a Substitution Selector.
-		Example: [['sat', 'roosted'], ['easy', 'uncomplicated']]
-		@param victor_corpus: Path to a corpus in the VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param features_file: File in which to save the testing features file.
-		@param scores_file: File in which to save the scores file.
-		User must have the privilege to delete such file without administrator privileges.
-		@param temp_file: File in which to save a temporary victor corpus.
-		The file is removed after the algorithm is concluded.
-		@param proportion: Proportion of substitutions to keep.
-		If proportion_type is set to "percentage", then this parameter must be a floating point number between 0 and 1.
-		If proportion_type is set to "integer", then this parameter must be an integer number.
-		@param proportion_type: Type of proportion to be kept.
-		Values supported: percentage, integer.
-		@return: Returns a vector of size N, containing a set of selected substitutions for each instance in the VICTOR corpus.
-		"""
-		void = VoidSelector()
-		selected_void = void.selectCandidates(substitutions, victor_corpus)
-		void.toVictorFormat(victor_corpus, selected_void, temp_file)
-		
-		self.ranker.getFeaturesFile(temp_file, features_file)
-		self.ranker.getScoresFile(features_file, self.model, scores_file)
-		rankings = self.getRankings(temp_file, features_file, scores_file)
-		
-		selected_substitutions = []				
-
-		lexf = open(victor_corpus)
-		index = -1
-		for line in lexf:
-			index += 1
-		
-			selected_candidates = None
-			if proportion_type == 'percentage':
-				toselect = None
-				if proportion > 1.0:
-					toselect = 1.0
-				else:
-					toselect = proportion
-				selected_candidates = rankings[index][0:max(1, int(toselect*float(len(rankings[index]))))]
-			else:
-				toselect = None
-				if proportion < 1:
-					toselect = 1
-				elif proportion > len(rankings[index]):
-					toselect = len(rankings[index])
-				else:
-					toselect = proportion
-				selected_candidates = rankings[index][0:toselect]
-		
-			selected_substitutions.append(selected_candidates)
-		lexf.close()
-		
-		#Delete temp_file:
-		os.system('rm ' + temp_file)
-		return selected_substitutions
-		
-	def getRankings(self, victor_corpus, features_file, scores_file):		
-		#Read features file:
-		f = open(features_file)
-		data = []
-		for line in f:
-			data.append(line.strip().split(' '))
-		f.close()
-		
-		#Read scores file:
-		f = open(scores_file)
-		scores = []
-		for line in f:
-			scores.append(float(line.strip()))
-		f.close()
-		
-		#Combine data:
-		ranking_data = {}
-		index = 0
-		for line in data:
-			id = int(line[1].strip().split(':')[1].strip())
-			starti = 0
-			while line[starti]!='#':
-				starti += 1
-			word = ''
-			for i in range(starti+1, len(line)):
-				word += line[i] + ' '
-			word = word.strip()
-			score = scores[index]
-			index += 1
-			if id in ranking_data:
-				ranking_data[id][word] = score
-			else:
-				ranking_data[id] = {word:score}
-		
-		#Produce rankings:
-		result = []
-		f = open(victor_corpus)
-		id = 0
-		for line in f:
-			id += 1
-			candidates = []
-			if id in ranking_data:
-				candidates = list(ranking_data[id].keys())
-				candidates = sorted(candidates, key=ranking_data[id].__getitem__, reverse=False)
-			result.append(candidates)
-			
-		#Return rankings:
-		return result
-		
-	def toVictorFormat(self, victor_corpus, substitutions, output_path, addTargetAsCandidate=False):
-		"""
-		Saves a set of selected substitutions in a file in VICTOR format.
-	
-		@param victor_corpus: Path to the corpus in the VICTOR format to which the substitutions were selected.
-		@param substitutions: The vector of substitutions selected for the VICTOR corpus.
-		@param output_path: The path in which to save the resulting VICTOR corpus.
-		@param addTargetAsCandidate: If True, adds the target complex word of each instance as a candidate substitution.
-		"""
-		o = open(output_path, 'w')
-		f = open(victor_corpus)
-		for subs in substitutions:
-			data = f.readline().strip().split('\t')
-			sentence = data[0].strip()
-			target = data[1].strip()
-			head = data[2].strip()
-			
-			newline = sentence + '\t' + target + '\t' + head + '\t'
-			for sub in subs:
-				newline += '0:'+sub + '\t'
-			o.write(newline.strip() + '\n')
-		f.close()
-		o.close()
-
-class SVMBoundarySelector:
-
-	def __init__(self, svm_boundary_ranker):
-		"""
-		Creates an instance of the SVMBoundarySelector class.
-	
-		@param svm_boundary_ranker: An instance of the BoundaryRanker class.
-		"""
-		self.ranker = svm_boundary_ranker
-		
-	def trainSelector(self, victor_corpus, positive_range, C, kernel, degree, gamma, coef0, k='all'):
-		"""
-		Trains a Boundary Ranker according to the parameters provided.
-	
-		@param victor_corpus: Path to a training corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param positive_range: Maximum rank to which label 1 is assigned in the binary classification setup.
-		Recommended value: 1.
-		@param C: Penalty parameter.
-		Recommended values: 0.1, 1, 10.
-		@param kernel: Kernel function to be used.
-		Supported values: 'linear', 'poly', 'rbf', 'sigmoid'.
-		@param degree: Degree of the polynomial kernel.
-		Recommended values: 2, 3.
-		@param gamma: Kernel coefficient.
-		Recommended values: 0.01, 0.1, 1.
-		@param coef0: Independent term value.
-		Recommended values: 0, 1.
-		@param k: Number of best features to be selected through univariate feature selection.
-		If k='all', then no feature selection is performed.
-		"""
-		self.ranker.trainRanker(victor_corpus, positive_range, C, kernel, degree, gamma, coef0, k=k)
-	
-	def trainSelectorWithCrossValidation(self, victor_corpus, positive_range, folds, test_size, Cs=[0.1, 1, 10], kernels=['linear', 'rbf', 'poly', 'sigmoid'], degrees=[2], gammas=[0.01, 0.1, 1], coef0s=[0, 1], k='all'):
-		"""
-		Trains a Boundary Selector while maximizing hyper-parameters through cross-validation.
-		It uses the TRank-at-1 as an optimization metric.
-	
-		@param victor_corpus: Path to a training corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param positive_range: Maximum rank to which label 1 is assigned in the binary classification setup.
-		Recommended value: 1.
-		@param folds: Number of folds to be used in cross-validation.
-		@param test_size: Percentage of the dataset to be used in testing.
-		Recommended values: 0.2, 0.25, 0.33
-		@param Cs: Penalty parameters.
-		Recommended values: 0.1, 1, 10.
-		@param kernels: Kernel functions to be used.
-		Supported values: 'linear', 'poly', 'rbf', 'sigmoid'.
-		@param degrees: Degrees of the polynomial kernel.
-		Recommended values: 2, 3.
-		@param gammas: Kernel coefficients.
-		Recommended values: 0.01, 0.1, 1.
-		@param coef0s: Independent term values.
-		Recommended values: 0, 1.
-		@param k: Number of best features to be selected through univariate feature selection.
-		If k='all', then no feature selection is performed.
-		"""
-		self.ranker.trainRankerWithCrossValidation(victor_corpus, positive_range, folds, test_size, Cs=Cs, kernels=kernels, degrees=degrees, gammas=gammas, coef0s=coef0s, k=k)
-		
-	def selectCandidates(self, substitutions, victor_corpus, temp_file, proportion, proportion_type='percentage'):
-		"""
-		Selects which candidates can replace the target complex words in each instance of a VICTOR corpus.
-	
-		@param substitutions: Candidate substitutions to be filtered.
-		It can be in two formats:
-		A dictionary produced by a Substitution Generator linking complex words to a set of candidate substitutions.
-		Example: substitutions['perched'] = {'sat', 'roosted'}
-		A list of candidate substitutions selected for the "victor_corpus" dataset by a Substitution Selector.
-		Example: [['sat', 'roosted'], ['easy', 'uncomplicated']]
-		@param victor_corpus: Path to a corpus in the VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		User must have the privilege to delete such file without administrator privileges.
-		@param temp_file: File in which to save a temporary victor corpus.
-		The file is removed after the algorithm is concluded.
-		@param proportion: Proportion of substitutions to keep.
-		If proportion_type is set to "percentage", then this parameter must be a floating point number between 0 and 1.
-		If proportion_type is set to "integer", then this parameter must be an integer number.
-		@param proportion_type: Type of proportion to be kept.
-		Values supported: percentage, integer.
-		@return: Returns a vector of size N, containing a set of selected substitutions for each instance in the VICTOR corpus.
-		"""
-		void = VoidSelector()
-		selected_void = void.selectCandidates(substitutions, victor_corpus)
-		void.toVictorFormat(victor_corpus, selected_void, temp_file)
-		
-		rankings = self.ranker.getRankings(temp_file)
-		
-		selected_substitutions = []				
-
-		lexf = open(victor_corpus)
-		index = -1
-		for line in lexf:
-			index += 1
-		
-			selected_candidates = None
-			if proportion_type == 'percentage':
-				toselect = None
-				if proportion > 1.0:
-					toselect = 1.0
-				else:
-					toselect = proportion
-				selected_candidates = rankings[index][0:max(1, int(toselect*float(len(rankings[index]))))]
-			else:
-				toselect = None
-				if proportion < 1:
-					toselect = 1
-				elif proportion > len(rankings[index]):
-					toselect = len(rankings[index])
-				else:
-					toselect = proportion
-				selected_candidates = rankings[index][0:toselect]
-		
-			selected_substitutions.append(selected_candidates)
-		lexf.close()
-		
-		#Delete temp_file:
-		os.system('rm ' + temp_file)
-		return selected_substitutions
-		
-	def toVictorFormat(self, victor_corpus, substitutions, output_path, addTargetAsCandidate=False):
-		"""
-		Saves a set of selected substitutions in a file in VICTOR format.
-	
-		@param victor_corpus: Path to the corpus in the VICTOR format to which the substitutions were selected.
-		@param substitutions: The vector of substitutions selected for the VICTOR corpus.
-		@param output_path: The path in which to save the resulting VICTOR corpus.
-		@param addTargetAsCandidate: If True, adds the target complex word of each instance as a candidate substitution.
-		"""
-		o = open(output_path, 'w')
-		f = open(victor_corpus)
-		for subs in substitutions:
-			data = f.readline().strip().split('\t')
-			sentence = data[0].strip()
-			target = data[1].strip()
-			head = data[2].strip()
-			
-			newline = sentence + '\t' + target + '\t' + head + '\t'
-			for sub in subs:
-				newline += '0:'+sub + '\t'
-			o.write(newline.strip() + '\n')
-		f.close()
-		o.close()
-		
-class BoundarySelector:
-
-	def __init__(self, boundary_ranker):
-		"""
-		Creates an instance of the BoundarySelector class.
-	
-		@param boundary_ranker: An instance of the BoundaryRanker class.
-		"""
-		self.ranker = boundary_ranker
-		
-	def trainSelector(self, victor_corpus, positive_range, loss, penalty, alpha, l1_ratio, epsilon, k='all'):
-		"""
-		Trains a Boundary Ranker according to the parameters provided.
-	
-		@param victor_corpus: Path to a training corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param positive_range: Maximum rank to which label 1 is assigned in the binary classification setup.
-		Recommended value: 1.
-		@param loss: Loss function to be used.
-		Values available: hinge, log, modified_huber, squared_hinge, perceptron.
-		@param penalty: Regularization term to be used.
-		Values available: l2, l1, elasticnet.
-		@param alpha: Constant that multiplies the regularization term.
-		Recommended values: 0.0001, 0.001, 0.01, 0.1
-		@param l1_ratio: Elastic net mixing parameter.
-		Recommended values: 0.05, 0.10, 0.15
-		@param epsilon: Acceptable error margin.
-		Recommended values: 0.0001, 0.001
-		@param k: Number of best features to be selected through univariate feature selection.
-		If k='all', then no feature selection is performed.
-		"""
-		self.ranker.trainRanker(victor_corpus, positive_range, loss, penalty, alpha, l1_ratio, epsilon, k=k)
-	
-	def trainSelectorWithCrossValidation(self, victor_corpus, positive_range, folds, test_size, losses=['hinge', 'modified_huber'], penalties=['elasticnet'], alphas=[0.0001, 0.001, 0.01], l1_ratios=[0.0, 0.15, 0.25, 0.5, 0.75, 1.0], k='all'):
-		"""
-		Trains a Boundary Selector while maximizing hyper-parameters through cross-validation.
-		It uses the TRank-at-1 as an optimization metric.
-	
-		@param victor_corpus: Path to a training corpus in VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param positive_range: Maximum rank to which label 1 is assigned in the binary classification setup.
-		Recommended value: 1.
-		@param folds: Number of folds to be used in cross-validation.
-		@param test_size: Percentage of the dataset to be used in testing.
-		Recommended values: 0.2, 0.25, 0.33
-		@param losses: Loss functions to be considered.
-		Values available: hinge, log, modified_huber, squared_hinge, perceptron.
-		@param penalties: Regularization terms to be considered.
-		Values available: l2, l1, elasticnet.
-		@param alphas: Constants that multiplies the regularization term.
-		Recommended values: 0.0001, 0.001, 0.01, 0.1
-		@param l1_ratios: Elastic net mixing parameters.
-		Recommended values: 0.05, 0.10, 0.15
-		@param k: Number of best features to be selected through univariate feature selection.
-		If k='all', then no feature selection is performed.
-		"""
-		self.ranker.trainRankerWithCrossValidation(victor_corpus, positive_range, folds, test_size, losses=losses, penalties=penalties, alphas=alphas, l1_ratios=l1_ratios, k=k)
-		
-	def selectCandidates(self, substitutions, victor_corpus, temp_file, proportion, proportion_type='percentage'):
-		"""
-		Selects which candidates can replace the target complex words in each instance of a VICTOR corpus.
-	
-		@param substitutions: Candidate substitutions to be filtered.
-		It can be in two formats:
-		A dictionary produced by a Substitution Generator linking complex words to a set of candidate substitutions.
-		Example: substitutions['perched'] = {'sat', 'roosted'}
-		A list of candidate substitutions selected for the "victor_corpus" dataset by a Substitution Selector.
-		Example: [['sat', 'roosted'], ['easy', 'uncomplicated']]
-		@param victor_corpus: Path to a corpus in the VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		User must have the privilege to delete such file without administrator privileges.
-		@param temp_file: File in which to save a temporary victor corpus.
-		The file is removed after the algorithm is concluded.
-		@param proportion: Proportion of substitutions to keep.
-		If proportion_type is set to "percentage", then this parameter must be a floating point number between 0 and 1.
-		If proportion_type is set to "integer", then this parameter must be an integer number.
-		@param proportion_type: Type of proportion to be kept.
-		Values supported: percentage, integer.
-		@return: Returns a vector of size N, containing a set of selected substitutions for each instance in the VICTOR corpus.
-		"""
-		void = VoidSelector()
-		selected_void = void.selectCandidates(substitutions, victor_corpus)
-		void.toVictorFormat(victor_corpus, selected_void, temp_file)
-		
-		rankings = self.ranker.getRankings(temp_file)
-		
-		selected_substitutions = []				
-
-		lexf = open(victor_corpus)
-		index = -1
-		for line in lexf:
-			index += 1
-		
-			selected_candidates = None
-			if proportion_type == 'percentage':
-				toselect = None
-				if proportion > 1.0:
-					toselect = 1.0
-				else:
-					toselect = proportion
-				selected_candidates = rankings[index][0:max(1, int(toselect*float(len(rankings[index]))))]
-			else:
-				toselect = None
-				if proportion < 1:
-					toselect = 1
-				elif proportion > len(rankings[index]):
-					toselect = len(rankings[index])
-				else:
-					toselect = proportion
-				selected_candidates = rankings[index][0:toselect]
-		
-			selected_substitutions.append(selected_candidates)
-		lexf.close()
-		
-		#Delete temp_file:
-		os.system('rm ' + temp_file)
-		return selected_substitutions
-		
-	def toVictorFormat(self, victor_corpus, substitutions, output_path, addTargetAsCandidate=False):
-		"""
-		Saves a set of selected substitutions in a file in VICTOR format.
-	
-		@param victor_corpus: Path to the corpus in the VICTOR format to which the substitutions were selected.
-		@param substitutions: The vector of substitutions selected for the VICTOR corpus.
-		@param output_path: The path in which to save the resulting VICTOR corpus.
-		@param addTargetAsCandidate: If True, adds the target complex word of each instance as a candidate substitution.
-		"""
-		o = open(output_path, 'w')
-		f = open(victor_corpus)
-		for subs in substitutions:
-			data = f.readline().strip().split('\t')
-			sentence = data[0].strip()
-			target = data[1].strip()
-			head = data[2].strip()
-			
-			newline = sentence + '\t' + target + '\t' + head + '\t'
-			for sub in subs:
-				newline += '0:'+sub + '\t'
-			o.write(newline.strip() + '\n')
-		f.close()
-		o.close()
-
-class BelderSelector:
-
-	def __init__(self, clusters):
-		"""
-		Creates an instance of the BelderSelector class.
-	
-		@param clusters: Path to a file containing clusters of words.
-		For instructions on how to create the file, please refer to the LEXenstein Manual.
-		"""
-		self.clusters_to_words, self.words_to_clusters = self.getClusterData(clusters)
-
-	def selectCandidates(self, substitutions, victor_corpus):
-		"""
-		Selects which candidates can replace the target complex words in each instance of a VICTOR corpus.
-	
-		@param substitutions: Candidate substitutions to be filtered.
-		It can be in two formats:
-		A dictionary produced by a Substitution Generator linking complex words to a set of candidate substitutions.
-		Example: substitutions['perched'] = {'sat', 'roosted'}
-		A list of candidate substitutions selected for the "victor_corpus" dataset by a Substitution Selector.
-		Example: [['sat', 'roosted'], ['easy', 'uncomplicated']]
-		@param victor_corpus: Path to a corpus in the VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@return: Returns a vector of size N, containing a set of selected substitutions for each instance in the VICTOR corpus.
-		"""
-		selected_substitutions = []
-
-		substitution_candidates = []
-		if isinstance(substitutions, list):
-			substitution_candidates = substitutions
-		elif isinstance(substitutions, dict):
-			void = VoidSelector()
-			substitution_candidates = void.selectCandidates(substitutions, victor_corpus)
-		else:
-			print('ERROR: Substitutions are neither a dictionary or a list!')
-			return selected_substitutions
-
-		c = -1
-		lexf = open(victor_corpus)
-		for line in lexf:
-			c += 1
-			data = line.strip().split('\t')
-			sent = data[0].strip()
-			target = data[1].strip()
-		
-			selected_candidates = set([])
-			if target in self.words_to_clusters:	
-				cluster = self.words_to_clusters[target]
-				candidates = set(substitution_candidates[c])
-				selected_candidates = candidates.intersection(self.clusters_to_words[cluster])
-		
-			selected_substitutions.append(selected_candidates)
-		lexf.close()
-		return selected_substitutions
-		
-	def getClusterData(self, clusters):
-		cw = {}
-		wc = {}
-		f = open(clusters)
-		for line in f:
-			data = line.strip().split('\t')
-			cluster = data[0].strip()
-			word = data[1].strip()
-			
-			if cluster in cw:
-				cw[cluster].add(word)
-			else:
-				cw[cluster] = set([word])
-			
-			wc[word] = cluster
-		f.close()
-		return cw, wc
-		
-	def toVictorFormat(self, victor_corpus, substitutions, output_path, addTargetAsCandidate=False):
-		"""
-		Saves a set of selected substitutions in a file in VICTOR format.
-	
-		@param victor_corpus: Path to the corpus in the VICTOR format to which the substitutions were selected.
-		@param substitutions: The vector of substitutions selected for the VICTOR corpus.
-		@param output_path: The path in which to save the resulting VICTOR corpus.
-		@param addTargetAsCandidate: If True, adds the target complex word of each instance as a candidate substitution.
-		"""
-		o = open(output_path, 'w')
-		f = open(victor_corpus)
-		for subs in substitutions:
-			data = f.readline().strip().split('\t')
-			sentence = data[0].strip()
-			target = data[1].strip()
-			head = data[2].strip()
-			
-			newline = sentence + '\t' + target + '\t' + head + '\t'
-			for sub in subs:
-				newline += '0:'+sub + '\t'
-			o.write(newline.strip() + '\n')
-		f.close()
-		o.close()
-
-class POSProbSelector:
-
-	def __init__(self, condprob_model, pos_model, stanford_tagger, java_path):
-		"""
-		Creates a POSProbSelector instance.
-		It selects only the candidate substitutions of which the most likely POS tag is that of the target word.
-	
-		@param condprob_model: Path to a binary conditional probability model.
-		For instructions on how to create the model, please refer to the LEXenstein Manual.
-		@param pos_model: Path to a POS tagging model for the Stanford POS Tagger.
-		The models can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-		@param stanford_tagger: Path to the "stanford-postagger.jar" file.
-		The tagger can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-		@param java_path: Path to the system's "java" executable.
-		Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-		"""
-		os.environ['JAVAHOME'] = java_path
-		self.tagger = StanfordPOSTagger(pos_model, stanford_tagger)
-		self.model = pickle.load(open(condprob_model, 'rb'))
-
-	def selectCandidates(self, substitutions, victor_corpus):
-		"""
-		Selects which candidates can replace the target complex words in each instance of a VICTOR corpus.
-	
-		@param substitutions: Candidate substitutions to be filtered.
-		It can be in two formats:
-		A dictionary produced by a Substitution Generator linking complex words to a set of candidate substitutions.
-		Example: substitutions['perched'] = {'sat', 'roosted'}
-		A list of candidate substitutions selected for the "victor_corpus" dataset by a Substitution Selector.
-		Example: [['sat', 'roosted'], ['easy', 'uncomplicated']]
-		@param victor_corpus: Path to a corpus in the VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@return: Returns a vector of size N, containing a set of selected substitutions for each instance in the VICTOR corpus.
-		"""
-		selected_substitutions = []
-
-		substitution_candidates = []
-		if isinstance(substitutions, list):
-			substitution_candidates = substitutions
-		elif isinstance(substitutions, dict):
-			void = VoidSelector()
-			substitution_candidates = void.selectCandidates(substitutions, victor_corpus)
-		else:
-			print('ERROR: Substitutions are neither a dictionary or a list!')
-			return selected_substitutions
-		
-		#Read VICTOR corpus:
-		lexf = open(victor_corpus)
-		sents = []
-		targets = []
-		heads = []
-		c = -1
-		for line in lexf:
-			c += 1
-			data = line.strip().split('\t')
-			sent = data[0].strip().split(' ')
-			target = data[1].strip()
-			head = int(data[2].strip())
-			sents.append(sent)
-			targets.append(target)
-			heads.append(head)
-		lexf.close()
-		
-		#Tag sentences:
-		tagged_sents = self.tagger.tag_sents(sents)
-		
-		for i in range(0, len(sents)):
-			target = targets[i]
-			head = heads[i]
-			target_pos = str(tagged_sents[i][head][1])
-		
-			candidates = []
-			candidates = set(substitution_candidates[i])
-			candidates = self.getCandidatesWithSamePOS(candidates, target_pos)
-		
-			selected_substitutions.append(candidates)
-		lexf.close()
-		return selected_substitutions
-	
-	def getTargetPOS(self, sent, target, head):
-		pos_data = []
-		try:
-			pos_data = nltk.pos_tag(sent)
-			return pos_data[head][1]
-		except UnicodeDecodeError:
-			try:
-				pos_data = nltk.pos_tag(target)
-				return pos_data[0][1]
-			except UnicodeDecodeError:
-				return 'None'
-			
-	def getCandidatesWithSamePOS(self, candidates, target_pos):
-		result = set([])
-		for candidate in candidates:
-			cand_tag = None
-			try:
-				cand_tag = self.model[candidate].max()
-			except Exception:
-				pass
-			if cand_tag and cand_tag==target_pos:
-				result.add(candidate)
-		return result
-	
-	def toVictorFormat(self, victor_corpus, substitutions, output_path, addTargetAsCandidate=False):
-		"""
-		Saves a set of selected substitutions in a file in VICTOR format.
-	
-		@param victor_corpus: Path to the corpus in the VICTOR format to which the substitutions were selected.
-		@param substitutions: The vector of substitutions selected for the VICTOR corpus.
-		@param output_path: The path in which to save the resulting VICTOR corpus.
-		@param addTargetAsCandidate: If True, adds the target complex word of each instance as a candidate substitution.
-		"""
-		o = open(output_path, 'w')
-		f = open(victor_corpus)
-		for subs in substitutions:
-			data = f.readline().strip().split('\t')
-			sentence = data[0].strip()
-			target = data[1].strip()
-			head = data[2].strip()
-			
-			newline = sentence + '\t' + target + '\t' + head + '\t'
-			for sub in subs:
-				newline += '0:'+sub + '\t'
-			o.write(newline.strip() + '\n')
-		f.close()
-		o.close()
-		
-class AluisioSelector:
-
-	def __init__(self, condprob_model, pos_model, stanford_tagger, java_path):
-		"""
-		Creates an AluisioSelector instance.
-		It selects only candidate substitutions that can assume the same POS tag of the target word.
-	
-		@param condprob_model: Path to a binary conditional probability model.
-		For instructions on how to create the model, please refer to the LEXenstein Manual.
-		@param pos_model: Path to a POS tagging model for the Stanford POS Tagger.
-		The models can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-		@param stanford_tagger: Path to the "stanford-postagger.jar" file.
-		The tagger can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-		@param java_path: Path to the system's "java" executable.
-		Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-		"""
-		os.environ['JAVAHOME'] = java_path
-		self.tagger = StanfordPOSTagger(pos_model, stanford_tagger)
-		self.model = pickle.load(open(condprob_model, 'rb'))
-
-	def selectCandidates(self, substitutions, victor_corpus):
-		"""
-		Selects which candidates can replace the target complex words in each instance of a VICTOR corpus.
-	
-		@param substitutions: Candidate substitutions to be filtered.
-		It can be in two formats:
-		A dictionary produced by a Substitution Generator linking complex words to a set of candidate substitutions.
-		Example: substitutions['perched'] = {'sat', 'roosted'}
-		A list of candidate substitutions selected for the "victor_corpus" dataset by a Substitution Selector.
-		Example: [['sat', 'roosted'], ['easy', 'uncomplicated']]
-		@param victor_corpus: Path to a corpus in the VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@return: Returns a vector of size N, containing a set of selected substitutions for each instance in the VICTOR corpus.
-		"""
-		selected_substitutions = []
-
-		substitution_candidates = []
-		if isinstance(substitutions, list):
-			substitution_candidates = substitutions
-		elif isinstance(substitutions, dict):
-			void = VoidSelector()
-			substitution_candidates = void.selectCandidates(substitutions, victor_corpus)
-		else:
-			print('ERROR: Substitutions are neither a dictionary or a list!')
-			return selected_substitutions
-		
-		#Read VICTOR corpus:
-		lexf = open(victor_corpus)
-		sents = []
-		targets = []
-		heads = []
-		c = -1
-		for line in lexf:
-			c += 1
-			data = line.strip().split('\t')
-			sent = data[0].strip().split(' ')
-			target = data[1].strip()
-			head = int(data[2].strip())
-			sents.append(sent)
-			targets.append(target)
-			heads.append(head)
-		lexf.close()
-		
-		#Tag sentences:
-		tagged_sents = self.tagger.tag_sents(sents)
-		
-		for i in range(0, len(sents)):
-			target = targets[i]
-			head = heads[i]
-			target_pos = str(tagged_sents[i][head][1])
-		
-			candidates = []
-			candidates = set(substitution_candidates[i])
-			candidates = self.getCandidatesWithSamePOS(candidates, target_pos)
-		
-			selected_substitutions.append(candidates)
-		lexf.close()
-		return selected_substitutions
-	
-	def getTargetPOS(self, sent, target, head):
-		pos_data = []
-		try:
-			pos_data = nltk.pos_tag(sent)
-			return pos_data[head][1]
-		except UnicodeDecodeError:
-			try:
-				pos_data = nltk.pos_tag(target)
-				return pos_data[0][1]
-			except UnicodeDecodeError:
-				return 'None'
-			
-	def getCandidatesWithSamePOS(self, candidates, target_pos):
-		result = set([])
-		for candidate in candidates:
-			tag_freq = 0
-			try:
-				tag_freq = self.model[candidate].prob(target_pos)
-			except Exception:
-				pass
-			if tag_freq>0:
-				result.add(candidate)
-		return result
-	
-	def toVictorFormat(self, victor_corpus, substitutions, output_path, addTargetAsCandidate=False):
-		"""
-		Saves a set of selected substitutions in a file in VICTOR format.
-	
-		@param victor_corpus: Path to the corpus in the VICTOR format to which the substitutions were selected.
-		@param substitutions: The vector of substitutions selected for the VICTOR corpus.
-		@param output_path: The path in which to save the resulting VICTOR corpus.
-		@param addTargetAsCandidate: If True, adds the target complex word of each instance as a candidate substitution.
-		"""
-		o = open(output_path, 'w')
-		f = open(victor_corpus)
-		for subs in substitutions:
-			data = f.readline().strip().split('\t')
-			sentence = data[0].strip()
-			target = data[1].strip()
-			head = data[2].strip()
-			
-			newline = sentence + '\t' + target + '\t' + head + '\t'
-			for sub in subs:
-				newline += '0:'+sub + '\t'
-			o.write(newline.strip() + '\n')
-		f.close()
-		o.close()
-
-class VoidSelector:
-
-	def selectCandidates(self, substitutions, victor_corpus):
-		"""
-		Selects which candidates can replace the target complex words in each instance of a VICTOR corpus.
-	
-		@param substitutions: Candidate substitutions to be filtered.
-		It can be in two formats:
-		A dictionary produced by a Substitution Generator linking complex words to a set of candidate substitutions.
-		Example: substitutions['perched'] = {'sat', 'roosted'}
-		A list of candidate substitutions selected for the "victor_corpus" dataset by a Substitution Selector.
-		Example: [['sat', 'roosted'], ['easy', 'uncomplicated']]
-		@param victor_corpus: Path to a corpus in the VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@return: Returns a vector of size N, containing a set of selected substitutions for each instance in the VICTOR corpus.
-		"""
-		selected_substitutions = []
-
-		if isinstance(substitutions, list):
-			return substitutions	
-
-		lexf = open(victor_corpus)
-		for line in lexf:
-			data = line.strip().split('\t')
-			sent = data[0].strip()
-			target = data[1].strip()
-		
-			candidates = []
-			if target in substitutions:
-				candidates = substitutions[target]
-		
-			selected_substitutions.append(candidates)
-		lexf.close()
-		return selected_substitutions
-		
-	def toVictorFormat(self, victor_corpus, substitutions, output_path, addTargetAsCandidate=False):
-		"""
-		Saves a set of selected substitutions in a file in VICTOR format.
-	
-		@param victor_corpus: Path to the corpus in the VICTOR format to which the substitutions were selected.
-		@param substitutions: The vector of substitutions selected for the VICTOR corpus.
-		@param output_path: The path in which to save the resulting VICTOR corpus.
-		@param addTargetAsCandidate: If True, adds the target complex word of each instance as a candidate substitution.
-		"""
-		o = open(output_path, 'w')
-		f = open(victor_corpus)
-		for subs in substitutions:
-			data = f.readline().strip().split('\t')
-			sentence = data[0].strip()
-			target = data[1].strip()
-			head = data[2].strip()
-			
-			newline = sentence + '\t' + target + '\t' + head + '\t'
-			for sub in subs:
-				newline += '0:'+sub + '\t'
-			o.write(newline.strip() + '\n')
-		f.close()
-		o.close()
-
-class BiranSelector:
-
-	def __init__(self, cooc_model):
-		"""
-		Creates an instance of the BiranSelector class.
-	
-		@param cooc_model: Path to a word co-occurrence model.
-		For instructions on how to create the model, please refer to the LEXenstein Manual.
-		"""
-		self.model = self.getModel(cooc_model)
-		
-	def selectCandidates(self, substitutions, victor_corpus, common_distance=0.01, candidate_distance=0.9):
-		"""
-		Selects which candidates can replace the target complex words in each instance of a VICTOR corpus.
-	
-		@param substitutions: Candidate substitutions to be filtered.
-		It can be in two formats:
-		A dictionary produced by a Substitution Generator linking complex words to a set of candidate substitutions.
-		Example: substitutions['perched'] = {'sat', 'roosted'}
-		A list of candidate substitutions selected for the "victor_corpus" dataset by a Substitution Selector.
-		Example: [['sat', 'roosted'], ['easy', 'uncomplicated']]
-		@param victor_corpus: Path to a corpus in the VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param common_distance: The cutoff minimum distance from the sentence's co-occurrence vector and the common vector between the target complex word and the candidate.
-		We recommend using very small values, such as 0.01, or even 0.0.
-		@param candidate_distance: The cutoff maximum distance from the sentence's co-occurrence vector and the candidate vector.
-		We recommend using values close to 1.0, such as 0.8, or 0.9.
-		@return: Returns a vector of size N, containing a set of selected substitutions for each instance in the VICTOR corpus.
-		"""
-		selected_substitutions = []
-
-		substitution_candidates = []
-		if isinstance(substitutions, list):
-			substitution_candidates = substitutions
-		elif isinstance(substitutions, dict):
-			void = VoidSelector()
-			substitution_candidates = void.selectCandidates(substitutions, victor_corpus)
-		else:
-			print('ERROR: Substitutions are neither a dictionary or a list!')
-			return selected_substitutions			
-
-		c = -1
-		lexf = open(victor_corpus)
-		for line in lexf:
-			c += 1
-			data = line.strip().split('\t')
-			sent = data[0].strip()
-			target = data[1].strip()
-			head = int(data[2].strip())
-		
-			target_vec = self.getSentVec(sent, head)
-
-			candidates = set(substitution_candidates[c])
-		
-			final_candidates = set([])
-			for candidate_raw in candidates:
-				candidate = str(candidate_raw)
-				candidate_vec = self.getVec(candidate)
-				candidate_dist = 1.0
-				try:
-					candidate_dist = self.getCosine(candidate_vec, target_vec)
-				except ValueError:
-					candidate_dist = 1.0
-		
-				common_vec = self.getCommonVec(target, candidate)
-				common_dist = 0.0
-				try:
-					common_dist = self.getCosine(common_vec, target_vec)
-				except ValueError:
-					common_dist = 0.0
-				if common_dist>=common_distance and candidate_dist<=candidate_distance:
-					final_candidates.add(candidate)
-			selected_substitutions.append(final_candidates)
-		lexf.close()
-		return selected_substitutions
-		
-	def getModel(self, path):
-		result = {}
-		f = open(path)
-		for line in f:
-			data = line.strip().split('\t')
-			target = data[0].strip()
-			coocs = data[1:len(data)]
-			result[target] = {}
-			for cooc in coocs:
-				coocd = cooc.strip().split(':')
-				word = coocd[0].strip()
-				count = int(coocd[1].strip())
-				result[target][word] = count
-		return result
-	
-	def getCosine(self, vec1, vec2):
-		all_keys = sorted(list(set(vec1.keys()).union(set(vec2.keys()))))
-		v1 = []
-		v2 = []
-		for k in all_keys:
-			if k in vec1:
-				v1.append(vec1[k])
-			else:
-				v1.append(0.0)
-			if k in vec2:
-				v2.append(vec2[k])
-			else:
-				v2.append(0.0)
-		return cosine(v1, v2)
-	
-	def getCommonVec(self, target, candidate):
-		if target not in list(self.model.keys()) or candidate not in self.model:
-			return {}
-		else:
-			result = {}
-			common_keys = set(self.model[target].keys()).intersection(set(self.model[candidate].keys()))
-			for k in common_keys:
-				if self.model[target][k]>self.model[candidate][k]:
-					result[k] = self.model[candidate][k]
-				else:
-					result[k] = self.model[target][k]
-			return result
-					
-	def isNumeral(self, text):
-		try:
-			num = float(text.strip())
-			return True
-		except ValueError:
-			return False
-	
-	def getSentVec(self, sent, head):
-		coocs = {}
-		tokens = sent.strip().split(' ')
-		left = max(0, head-5)
-		right = min(len(tokens), head+6)
-		for j in range(left, right):
-			if j!=head:
-				cooc = tokens[j]
-				if self.isNumeral(cooc):
-					cooc = '#NUMERAL#'
-				if cooc not in coocs:
-					coocs[cooc] = 1
-				else:
-					coocs[cooc] += 1
-		return coocs
-	
-	def getVec(self, word):
-		result = {}
-		try:
-			result = self.model[word]
-		except KeyError:
-			try:
-				result = self.model[word.lower()]
-			except KeyError:
-				result = {}
-		return result
-		
-	def getCandidateSentence(self, sentence, candidate, head):
-		tokens = sentence.strip().split(' ')
-		result = ''
-		for i in range(0, head):
-			result += tokens[i] + ' '
-		result += candidate + ' '
-		for i in range(head+1, len(tokens)):
-			result += tokens[i] + ' '
-		return result.strip()
-		
-	def toVictorFormat(self, victor_corpus, substitutions, output_path, addTargetAsCandidate=False):
-		"""
-		Saves a set of selected substitutions in a file in VICTOR format.
-	
-		@param victor_corpus: Path to the corpus in the VICTOR format to which the substitutions were selected.
-		@param substitutions: The vector of substitutions selected for the VICTOR corpus.
-		@param output_path: The path in which to save the resulting VICTOR corpus.
-		@param addTargetAsCandidate: If True, adds the target complex word of each instance as a candidate substitution.
-		"""
-		o = open(output_path, 'w')
-		f = open(victor_corpus)
-		for subs in substitutions:
-			data = f.readline().strip().split('\t')
-			sentence = data[0].strip()
-			target = data[1].strip()
-			head = data[2].strip()
-			
-			newline = sentence + '\t' + target + '\t' + head + '\t'
-			for sub in subs:
-				newline += '0:'+sub + '\t'
-			o.write(newline.strip() + '\n')
-		f.close()
-		o.close()
-	
-class WordVectorSelector:
-	
-	def __init__(self, vector_model, pos_model, stanford_tagger, java_path, pos_type='none'):
-		"""
-		Creates an instance of the WordVectorSelector class.
-	
-		@param vector_model: Path to a binary word vector model.
-		For instructions on how to create the model, please refer to the LEXenstein Manual.
-		@param pos_model: Path to a POS tagging model for the Stanford POS Tagger.
-		The models can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-		@param stanford_tagger: Path to the "stanford-postagger.jar" file.
-		The tagger can be downloaded from the following link: http://nlp.stanford.edu/software/tagger.shtml
-		@param java_path: Path to the system's "java" executable.
-		Can be commonly found in "/usr/bin/java" in Unix/Linux systems, or in "C:/Program Files/Java/jdk_version/java.exe" in Windows systems.
-		@param pos_type: The type of POS tags with which the model's words are annotated, if any.
-		Values supported: none, treebank, paetzold
-		"""
-		self.model = gensim.models.KeyedVectors.load_word2vec_format(vector_model, binary=True)
-		self.pos_type = pos_type
-		os.environ['JAVAHOME'] = java_path
-		self.tagger = StanfordPOSTagger(pos_model, stanford_tagger)
-	
-	def selectCandidates(self, substitutions, victor_corpus, proportion=1.0, proportion_type='percentage', stop_words_file=None, window=99999, onlyInformative=False, keepTarget=False, onePerWord=False):
-		"""
-		Selects which candidates can replace the target complex words in each instance of a VICTOR corpus.
-	
-		@param substitutions: Candidate substitutions to be filtered.
-		It can be in two formats:
-		A dictionary produced by a Substitution Generator linking complex words to a set of candidate substitutions.
-		Example: substitutions['perched'] = {'sat', 'roosted'}
-		A list of candidate substitutions selected for the "victor_corpus" dataset by a Substitution Selector.
-		Example: [['sat', 'roosted'], ['easy', 'uncomplicated']]
-		@param victor_corpus: Path to a corpus in the VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@param proportion: Percentage of substitutions to keep.
-		If proportion_type is set to "percentage", then this parameter must be a floating point number between 0 and 1.
-		If proportion_type is set to "integer", then this parameter must be an integer number.
-		@param proportion_type: Type of proportion to be kept.
-		Values supported: percentage, integer.
-		@param stop_words_file: Path to the file containing stop words of the desired language.
-		The file must contain one stop word per line.
-		@param window: Number of tokens around the target complex sentence to consider as its context.
-		@param onlyInformative: If True, only content words are considered as part of the complex word's context, such as nouns, verbs, adjectives and adverbs.
-		@param keepTarget: If True, the complex target word is also included as part of its context.
-		@param onePerWord: If True, a word in the complex word's context can only contribute once to its resulting word vector.
-		@return: Returns a vector of size N, containing a set of selected substitutions for each instance in the VICTOR corpus.
-		"""
-		#Initialize selected substitutions:
-		selected_substitutions = []
-		
-		#Read stop words:
-		stop_words = set([])
-		if stop_words_file != None:
-			stop_words = set([word.strip() for word in open(stop_words_file)])
-
-		#Configure input:
-		substitution_candidates = []
-		if isinstance(substitutions, list):
-			substitution_candidates = substitutions
-		elif isinstance(substitutions, dict):
-			void = VoidSelector()
-			substitution_candidates = void.selectCandidates(substitutions, victor_corpus)
-		else:
-			print('ERROR: Substitutions are neither a dictionary or a list!')
-			return selected_substitutions		
-
-		#Parse sentences:
-		lexf = open(victor_corpus)
-		sents = [line.strip().split('\t')[0].strip().split(' ') for line in lexf]
-		lexf.close()
-		tagged_sents = self.tagger.tag_sents(sents)
-		
-		#Transform them to the right format:
-		if self.pos_type=='paetzold':
-			transformed = []
-			for sent in tagged_sents:
-				tokens = []
-				for token in sent:
-					tokens.append((token[0], getGeneralisedPOS(token[1])))
-				transformed.append(tokens)
-			tagged_sents = transformed
-		
-		#Rank candidates:
-		c = -1
-		lexf = open(victor_corpus)
-		for line in lexf:
-			c += 1
-			data = line.strip().split('\t')
-			sent = data[0].strip()
-			target = data[1].strip()
-			head = int(data[2].strip())
-			pos_tags = tagged_sents[c]
-			target_pos = pos_tags[head][1]
-		
-			target_vec = self.getSentVec(sent, head, stop_words, window, onlyInformative, keepTarget, onePerWord, pos_tags)
-			candidates = substitution_candidates[c]
-
-			candidate_dists = {}
-			for candidate in candidates:
-				candidate_vec = self.getWordVec(candidate, target_pos)
-				try:
-					candidate_dists[candidate] = cosine(candidate_vec, target_vec)
-				except ValueError:
-					candidate_dists = candidate_dists
-
-			final_candidates = self.getFinalCandidates(candidate_dists, proportion, proportion_type)
-
-			selected_substitutions.append(final_candidates)
-		lexf.close()
-		return selected_substitutions
-		
-	def getSentVec(self, sentence, head, stop_words, window, onlyInformative, keepTarget, onePerWord, pos_tokens):
-		informative_tags = set([])
-		if onlyInformative:
-			if self.pos_type=='treebank':
-				informative_tags = set(['NN', 'NNS', 'JJ', 'JJS', 'JJR', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ', 'RB', 'RBR', 'RBS'])
-			if self.pos_type=='paetzold':
-				informative_tags = set(['N', 'V', 'J', 'R'])
-		
-		tokens = sentence.split(' ')
-		
-		valid_tokens = []
-		if keepTarget:
-			valid = tokens[head].strip()
-			if self.pos_type!='none':
-				valid += '|||' + pos_tokens[head][1]
-			valid_tokens.append(valid)
-		
-		if head>0:
-			for i in range(max(0, head-window), head):
-				if len(informative_tags)==0 or pos_tokens[i][1].lower().strip() in informative_tags:
-					if tokens[i] not in stop_words:
-						valid = tokens[i]
-						if self.pos_type!='none':
-							valid += '|||' + pos_tokens[i][1]
-						valid_tokens.append(valid)
-		
-		if head<len(tokens)-1:
-			for i in range(head+1, min(len(tokens), head+1+window)):
-				if len(informative_tags)==0 or pos_tokens[i][1].lower().strip() in informative_tags:
-					if tokens[i] not in stop_words:
-						valid = tokens[i]
-						if self.pos_type!='none':
-							valid += '|||' + pos_tokens[i][1]
-						valid_tokens.append(valid)
-						
-		if onePerWord:
-			valid_tokens = list(set(valid_tokens))
-		
-		result = np.array([])
-		for	token in valid_tokens:
-			if len(result)==0:
-				try:
-					result = self.model[token]
-				except Exception:
-					pass
-			else:
-				try:
-					result = np.add(result, self.model[token])
-				except Exception:
-					pass
-		result = result/float(len(valid_tokens))
-		return result
-		
-	def getWordVec(self, candidate, target_pos):
-		cand = None
-		if self.pos_type!='none':
-			cand = candidate + '|||' + target_pos
-		else:
-			cand = candidate
-
-		result = np.array([])
-		try:
-			result = self.model[cand]
-		except Exception:
-			pass
-		return result
-				
-	def getFinalCandidates(self, candidate_dists, proportion, proportion_type):
-		result = sorted(list(candidate_dists.keys()), key=candidate_dists.__getitem__)
-		if proportion_type=='percentage':
-			return result[0:max(1, int(proportion*float(len(result))))]
-		elif proportion_type=='integer':
-			if proportion>=len(result):
-				return result
-			else:
-				return result[0:max(1, int(proportion))]
-		else:
-			print('Unrecognized proportion type.')
-			return result
-		
-	def toVictorFormat(self, victor_corpus, substitutions, output_path, addTargetAsCandidate=False):
-		"""
-		Saves a set of selected substitutions in a file in VICTOR format.
-	
-		@param victor_corpus: Path to the corpus in the VICTOR format to which the substitutions were selected.
-		@param substitutions: The vector of substitutions selected for the VICTOR corpus.
-		@param output_path: The path in which to save the resulting VICTOR corpus.
-		@param addTargetAsCandidate: If True, adds the target complex word of each instance as a candidate substitution.
-		"""
-		o = open(output_path, 'w')
-		f = open(victor_corpus)
-		for subs in substitutions:
-			data = f.readline().strip().split('\t')
-			sentence = data[0].strip()
-			target = data[1].strip()
-			head = data[2].strip()
-			
-			newline = sentence + '\t' + target + '\t' + head + '\t'
-			for sub in subs:
-				newline += '0:'+sub + '\t'
-			o.write(newline.strip() + '\n')
-		f.close()
-		o.close()
-
-class WSDSelector:
-
-	def __init__(self, method):
-		"""
-		Creates an instance of the WSDSelector class.
-	
-		@param method: Type of Word Sense Disambiguation algorithm to use.
-		Options available:
-		lesk - Original lesk algorithm.
-		path - Path similarity algorithm.
-		random - Random sense from WordNet.
-		first - First sense from WordNet.
-		"""
-		
-		if method == 'lesk':
-			self.WSDfunction = self.getLeskSense
-		elif method == 'path':
-			self.WSDfunction = self.getPathSense
-		elif method == 'random':
-			self.WSDfunction = self.getRandomSense
-		elif method == 'first':
-			self.WSDfunction = self.getFirstSense
-		else:
-			self.WSDfunction = self.getLeskSense
-		
-	def selectCandidates(self, substitutions, victor_corpus):
-		"""
-		Selects which candidates can replace the target complex words in each instance of a VICTOR corpus.
-	
-		@param substitutions: Candidate substitutions to be filtered.
-		It can be in two formats:
-		A dictionary produced by a Substitution Generator linking complex words to a set of candidate substitutions.
-		Example: substitutions['perched'] = {'sat', 'roosted'}
-		A list of candidate substitutions selected for the "victor_corpus" dataset by a Substitution Selector.
-		Example: [['sat', 'roosted'], ['easy', 'uncomplicated']]
-		@param victor_corpus: Path to a corpus in the VICTOR format.
-		For more information about the file's format, refer to the LEXenstein Manual.
-		@return: Returns a vector of size N, containing a set of selected substitutions for each instance in the VICTOR corpus.
-		"""
-		
-		selected_substitutions = []
-
-		substitution_candidates = []
-		if isinstance(substitutions, list):
-			substitution_candidates = substitutions
-		elif isinstance(substitutions, dict):
-			void = VoidSelector()
-			substitution_candidates = void.selectCandidates(substitutions, victor_corpus)
-		else:
-			print('ERROR: Substitutions are neither a dictionary or a list!')
-			return selected_substitutions					
-
-		c = -1
-		lexf = open(victor_corpus)
-		for line in lexf:
-			c += 1
-			data = line.strip().split('\t')
-			sent = data[0].strip()
-			target = data[1].strip()
-			head = int(data[2].strip())
-		
-			target_sense = self.WSDfunction.__call__(sent, target)
-		
-			candidates = substitution_candidates[c]
-		
-			selected_candidates = set([])
-			for candidate in candidates:
-				candidate_sense = None
-				try:
-					unic = str(candidate)
-					candidate_sense = self.WSDfunction.__call__(self.getCandidateSentence(sent, candidate, head), candidate)
-				except UnicodeDecodeError:
-					candidate_sense = None
-				if target_sense or not candidate_sense:
-					if not candidate_sense or candidate_sense==target_sense:
-						selected_candidates.add(candidate)
-			selected_substitutions.append(selected_candidates)
-		lexf.close()
-		return selected_substitutions
-
-	def getLeskSense(self, sentence, target):
-		try:
-			result = pywsd.lesk.original_lesk(sentence, target)
-			return result
-		except IndexError:
-			return None
-
-	def getPathSense(self, sentence, target):
-		try:
-			result = pywsd.similarity.max_similarity(sentence, target, option="path", best=False)
-			return result
-		except IndexError:
-			return None
-			
-	def getRandomSense(self, sentence, target):
-		try:
-			result = pywsd.baseline.random_sense(target)
-			return result
-		except IndexError:
-			return None
-			
-	def getFirstSense(self, sentence, target):
-		try:
-			result = pywsd.baseline.first_sense(target)
-			return result
-		except IndexError:
-			return None
-			
-	def getMaxLemmaSense(self, sentence, target):
-		try:
-			result = pywsd.baseline.max_lemma_count(target)
-			return result
-		except IndexError:
-			return None
-
-	def getCandidateSentence(self, sentence, candidate, head):
-		tokens = sentence.strip().split(' ')
-		result = ''
-		for i in range(0, head):
-			result += tokens[i] + ' '
-		result += candidate + ' '
-		for i in range(head+1, len(tokens)):
-			result += tokens[i] + ' '
-		return result.strip()
-
-	def toVictorFormat(self, victor_corpus, substitutions, output_path, addTargetAsCandidate=False):
-		"""
-		Saves a set of selected substitutions in a file in VICTOR format.
-	
-		@param victor_corpus: Path to the corpus in the VICTOR format to which the substitutions were selected.
-		@param substitutions: The vector of substitutions selected for the VICTOR corpus.
-		@param output_path: The path in which to save the resulting VICTOR corpus.
-		@param addTargetAsCandidate: If True, adds the target complex word of each instance as a candidate substitution.
-		"""
-		o = open(output_path, 'w')
-		f = open(victor_corpus)
-		for subs in substitutions:
-			data = f.readline().strip().split('\t')
-			sentence = data[0].strip()
-			target = data[1].strip()
-			head = data[2].strip()
-			
-			newline = sentence + '\t' + target + '\t' + head + '\t'
-			for sub in subs:
-				newline += '0:'+sub + '\t'
-			o.write(newline.strip() + '\n')
-		f.close()
-		o.close()
diff --git a/lexi/lib/lexenstein/spelling.py b/lexi/lib/lexenstein/spelling.py
deleted file mode 100755
index 6aff446..0000000
--- a/lexi/lib/lexenstein/spelling.py
+++ /dev/null
@@ -1,74 +0,0 @@
-import re, collections, pickle
-
-class NorvigCorrector:
-
-	def __init__(self, model_file, format='text'):
-		"""
-		Creates an instance of the NorvigCorrector class.
-	
-		@param model_file: Path to a file containing either raw, untokenized text, or a binary spelling correction model.
-		If "model_file" is the path to a text file, then the value of "format" must be "text".
-		If "model_file" is the path to a binary spelling correction model, then the value of "format" must be "bin".
-		@param format: Indicator of the type of input provided.
-		Possible values: "text", "bin".
-		"""
-		
-		#If input is text, then train a model:
-		if format=='text':
-			#Read text file:
-			file = open(model_file)
-			text = file.read()
-			file.close()
-			
-			#Create model:
-			self.model = self.getSpellingModel(re.findall('[a-z]+', text))
-		#If input is binary, then load the model:
-		elif format=='bin':
-			self.model = pickle.load(open(model_file, 'rb'))
-		else:
-			self.model = None
-			print(('Input format \"' + format + '\" no supported, see documentation for available formats.'))
-			
-		#Create alphabet:
-		self.alphabet = 'abcdefghijklmnopqrstuvwxyz'
-	
-	def correct(self, word):
-		"""
-		Returns the spell-corrected version of a word.
-		If the model determines that the word has no spelling errors, it returns the word itself.
-	
-		@param word: Word to be spell-corrected.
-		"""
-		
-		candidates = self.getKnown([word]) or self.getKnown(self.getEdits(word)) or self.getKnownEdits(word) or [word]
-		return max(candidates, key=self.model.get)
-		
-	def saveBinaryModel(self, model_path):
-		"""
-		Saves the spelling correction model in binary format.
-		The saved model can then be loaded with the "bin" format during the creation of a NorvigCorrector.
-	
-		@param model_path: Path in which to save the model.
-		"""
-		
-		pickle.dump(self.model, open(model_path, 'wb'))
-	
-	def getSpellingModel(self, words):
-		model = collections.defaultdict(int)
-		for f in words:
-			model[f] += 1
-		return model
-
-	def getEdits(self, word):
-		splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]
-		deletes = [a + b[1:] for a, b in splits if b]
-		transposes = [a + b[1] + b[0] + b[2:] for a, b in splits if len(b)>1]
-		replaces = [a + c + b[1:] for a, b in splits for c in self.alphabet if b]
-		inserts = [a + c + b for a, b in splits for c in self.alphabet]
-		return set(deletes + transposes + replaces + inserts)
-
-	def getKnownEdits(self, word):
-		return set(e2 for e1 in self.getEdits(word) for e2 in self.getEdits(e1) if e2 in self.model)
-
-	def getKnown(self, words):
-		return set(w for w in words if w in self.model)
diff --git a/lexi/lib/lexenstein/util.py b/lexi/lib/lexenstein/util.py
deleted file mode 100755
index 54612ad..0000000
--- a/lexi/lib/lexenstein/util.py
+++ /dev/null
@@ -1,383 +0,0 @@
-import nltk
-import pickle
-import shelve
-import re
-
-
-def dependencyParseSentences(parser, sentences):
-    """
-    Use StanfordParser to parse multiple sentences.
-    Takes multiple sentences as a list where each sentence is a list of words.
-    Each sentence will be automatically tagged with this StanfordParser instance's tagger.
-    If whitespaces exists inside a token, then the token will be treated as separate tokens.
-    This method is an adaptation of the code provided by NLTK.
-
-    @param parser: An instance of the nltk.parse.stanford.StanfordParser class.
-    @param sentences: Input sentences to parse.
-    Each sentence must be a list of tokens.
-    @return A list of the dependency links of each sentence.
-    Each dependency link is composed by the relation type, the source word, its position in the sentence, the target word, and its position in the sentence.
-    """
-    cmd = [
-        'edu.stanford.nlp.parser.lexparser.LexicalizedParser',
-        '-model', parser.model_path,
-        '-sentences', 'newline',
-        '-outputFormat', 'typedDependencies',
-        '-tokenized',
-        '-escaper', 'edu.stanford.nlp.process.PTBEscapingProcessor',
-    ]
-
-    output=parser._execute(cmd, '\n'.join(' '.join(sentence) for sentence in sentences), False)
-
-    depexp = re.compile("([^\\(]+)\\(([^\\,]+)\\,\s([^\\)]+)\\)")
-
-    res = []
-    cur_lines = []
-    for line in output.splitlines(False):
-        if line == '':
-            res.append(cur_lines)
-            cur_lines = []
-        else:
-            depdata = re.findall(depexp, line)
-            if len(depdata)>0:
-                link = depdata[0]
-                subjecth = link[1].rfind('-')
-                objecth = link[2].rfind('-')
-                subjectindex = link[1][subjecth+1:len(link[1])]
-                if subjectindex.endswith(r"'"):
-                    subjectindex = subjectindex[0:len(subjectindex)-1]
-                objectindex = link[2][objecth+1:len(link[2])]
-                if objectindex.endswith(r"'"):
-                    objectindex = objectindex[0:len(objectindex)-1]
-                clean_link = (link[0], link[1][0:subjecth], subjectindex, link[2][0:objecth], objectindex)
-                try:
-                    a = int(subjectindex)
-                    b = int(objectindex)
-                    cur_lines.append(clean_link)
-                except Exception:
-                    pass
-    return res
-
-def getGeneralisedPOS(tag):
-    """
-    Returns a generalised version of a POS tag in Treebank format.
-
-    @param tag: POS tag in Treebank format.
-    @return A generalised POS tag.
-    """
-    result = None
-    if tag.startswith('N'):
-        result = 'N'
-    elif tag.startswith('V'):
-        result = 'V'
-    elif tag.startswith('RB'):
-        result = 'A'
-    elif tag.startswith('J'):
-        result = 'J'
-    elif tag.startswith('W'):
-        result = 'W'
-    elif tag.startswith('PRP'):
-        result = 'P'
-    else:
-        result = tag.strip()
-    return result
-
-def createTaggedNgramsFile(ngrams_file, tagged_ngrams_file):
-    """
-    Creates a tagged version of an annotated n-gram counts file.
-
-    @param ngrams_file: File containing POS tag annotated n-gram counts.
-    The file must be in the format produced by the "-write" option of SRILM.
-    Each word in the corpus used must be in the following format: <word>|||<tag>
-    @param tagged_ngrams_file: File with tagged n-gram counts.
-    """
-    o = open(tagged_ngrams_file, 'w')
-
-    print('Opening input n-gram counts file...')
-    c = 0
-    f = open(ngrams_file)
-    for line in f:
-        c += 1
-        if c % 1000000 == 0:
-            print((str(c) + ' n-grams processed.'))
-        data = line.strip().split('\t')
-        tokens = [t.split('|||') for t in data[0].split(' ')]
-        if len(tokens)==2:
-            o.write(tokens[0][0] + ' ' + tokens[1][min(1, len(tokens[1])-1)] + '\t' + data[1] + '\n')
-            o.write(tokens[0][min(1, len(tokens[0])-1)] + ' ' + tokens[1][0] + '\t' + data[1] + '\n')
-        elif len(tokens)==3:
-            o.write(tokens[0][0] + ' ' + tokens[1][min(1, len(tokens[1])-1)] + ' ' + tokens[2][min(1, len(tokens[2])-1)] + '\t' + data[1] + '\n')
-            o.write(tokens[0][min(1, len(tokens[0])-1)] + ' ' + tokens[1][0] + ' ' + tokens[2][min(1, len(tokens[2])-1)] + '\t' + data[1] + '\n')
-            o.write(tokens[0][min(1, len(tokens[0])-1)] + ' ' + tokens[1][min(1, len(tokens[1])-1)] + ' ' + tokens[2][0] + '\t' + data[1] + '\n')
-        elif len(tokens)==4:
-            o.write(tokens[0][min(1, len(tokens[0])-1)] + ' ' + tokens[1][min(1, len(tokens[1])-1)] + ' ' + tokens[2][0] + ' ' + tokens[3][min(1, len(tokens[3])-1)] + '\t' + data[1] + '\n')
-            o.write(tokens[0][min(1, len(tokens[0])-1)] + ' ' + tokens[1][0] + ' ' + tokens[2][min(1, len(tokens[2])-1)] + ' ' + tokens[3][min(1, len(tokens[3])-1)] + '\t' + data[1] + '\n')
-        elif len(tokens)==5:
-            o.write(tokens[0][min(1, len(tokens[0])-1)] + ' ' + tokens[1][min(1, len(tokens[1])-1)] + ' ' + tokens[2][0] + ' ' + tokens[3][min(1, len(tokens[3])-1)] + ' ' + tokens[4][min(1, len(tokens[4])-1)] + '\t' + data[1] + '\n')
-    f.close()
-    print('N-grams file read!')
-
-    print('Saving model...')
-    o.close()
-    print('Finished!')
-
-def removeUnkFromNgramsFile(ngrams_file, output):
-    """
-    Removes n-grams with "<unk>" tokens from an SRILM n-grams file.
-
-    @param ngrams_file: Input n-grams file.
-    @param output: Filtered n-grams file.
-    """
-    f = open(ngrams_file)
-    o = open(output, 'w')
-    c = 0
-    for line in f:
-        c += 1
-        if c % 1000000==0:
-            print((str(c) + ' tokens filtered.'))
-        if '<unk>' not in line:
-            o.write(line)
-    f.close()
-    o.close()
-
-def getVocabularyFromDataset(dataset, vocab_file, leftw, rightw, format='victor'):
-    """
-    Extracts the vocabulary from a dataset in VICTOR or CWICTOR format.
-    This vocabularies can be used along with SRILM in order for smaller n-gram count files to be produced.
-
-    @param dataset: Dataset from which to extract the vocabulary.
-    @param vocab_file: File in which to save the vocabulary.
-    @param leftw: Window to consider from the left of the target word.
-    @param rightw: Window to consider from the right of the target word.
-    @param format: Format of the dataset.
-    Values accepted: victor, cwictor
-    """
-    #Obtain vocabulary:
-    vocab = set([])
-    if format=='victor':
-        f = open(dataset)
-        for line in f:
-            data = line.strip().split('\t')
-            sent = data[0].strip().split(' ')
-            head = int(data[2].strip())
-            for i in range(max(0, head-leftw), head):
-                vocab.add(sent[i])
-            for i in range(head, min(len(sent), head+rightw+1)):
-                vocab.add(sent[i])
-            target = data[1].strip()
-            vocab.add(target)
-            for sub in data[3:len(data)]:
-                words = sub.strip().split(':')[1].strip().split(' ')
-                for word in words:
-                    vocab.add(word.strip())
-        f.close()
-    elif format=='cwictor':
-        f = open(dataset)
-        for line in f:
-            data = line.strip().split('\t')
-            sent = data[0].strip().split(' ')
-            head = int(data[2].strip())
-            for i in range(max(0, head-leftw), head):
-                vocab.add(sent[i])
-            for i in range(head, min(len(sent), head+rightw+1)):
-                vocab.add(sent[i])
-            target = data[1].strip()
-            vocab.update(sent)
-            vocab.add(target)
-        f.close()
-
-    #Save vocabulary:
-    f = open(vocab_file, 'w')
-    for word in vocab:
-        if len(word.strip())>0:
-            f.write(word.strip() + '\n')
-    f.close()
-
-def addTranslationProbabilitiesFileToShelve(transprob_file, model_file):
-    """
-    Adds a translation probabilities file to an either new, or existing shelve dictionary.
-    The shelve file can then be used for the calculation of features.
-    To produce the translation probabilities file, first run the following command through fast_align:
-    fast_align -i <parallel_data> -v -d -o <transprob_file>
-
-    @param transprob_file: File containing translation probabilities.
-    @param model_file: Shelve file in which to save the translation probabilities.
-    """
-    print('Opening shelve file...')
-    d = shelve.open(model_file, protocol=pickle.HIGHEST_PROTOCOL)
-    print('Shelve file open!')
-
-    print('Reading translation probabilities file...')
-    c = 0
-    f = open(transprob_file)
-    for line in f:
-        c += 1
-        if c % 1000000 == 0:
-            print((str(c) + ' translation probabilities read.'))
-        data = line.strip().split('\t')
-        key = data[0] + '\t' + data[1]
-        value = float(data[2])
-        if key not in d:
-            d[key] = value
-        else:
-            d[key] += value
-    f.close()
-    print('Translation probabilities file read!')
-
-    print('Saving model...')
-    d.close()
-    print('Finished!')
-
-def addNgramCountsFileToShelve(ngrams_file, model_file):
-    """
-    Adds a n-gram counts file to an either new, or existing shelve dictionary.
-    The shelve file can then be used for the calculation of several features.
-    The file must be in the format produced by the "-write" option of SRILM ngram-count application.
-
-    @param ngrams_file: File containing n-gram counts.
-    @param model_file: Shelve file in which to save the n-gram counts file.
-    """
-    print('Opening shelve file...')
-    d = shelve.open(model_file, protocol=pickle.HIGHEST_PROTOCOL)
-    print('Shelve file open!')
-
-    print('Reading n-grams file...')
-    c = 0
-    f = open(ngrams_file)
-    for line in f:
-        c += 1
-        if c % 1000000 == 0:
-            print((str(c) + ' n-grams read.'))
-        data = line.strip().split('\t')
-        if data[0] not in d:
-            d[data[0]] = int(data[1])
-        else:
-            d[data[0]] += int(data[1])
-    f.close()
-    print('N-grams file read!')
-
-    print('Saving model...')
-    d.close()
-    print('Finished!')
-
-def createConditionalProbabilityModel(folder, fileids, model, sep='/', encoding='utf8'):
-    """
-    Creates an tagging probability model to be used along with the FeatureEstimator object.
-    Files of tagged data must contain one sentence per line, and each line must follow the following format:
-    <word_1><separator><tag_1> <word_2><separator><tag_2> ... <word_n-1><separator><tag_n-1> <word_n><separator><tag_n>
-
-    @param folder: Folder containing files of tagged sentences.
-    @param fileids: A list or regular expressions specifying the file names with tagged data in "folder".
-    @param model: File in which to save the trained model.
-    @param sep: Separator between words and tags in the files with tagged data.
-    @param encoding: Encoding of the files with tagged data.
-    """
-    print('Reading files...')
-    tcr = nltk.corpus.reader.tagged.TaggedCorpusReader(folder, fileids, sep=sep, encoding=encoding)
-
-    print('Extracting tagged data...')
-    data = tcr.tagged_words()
-
-    print('Creating conditional probability maps...')
-    cfd_tagwords = nltk.ConditionalFreqDist(data)
-    cpd_tagwords = nltk.ConditionalProbDist(cfd_tagwords, nltk.MLEProbDist)
-
-    print('Saving model...')
-    pickle.dump(cpd_tagwords, open(model, "wb"))
-    print('Finished!')
-
-def fitTranslationProbabilityFileToCorpus(translation_probabilities, corpus, output):
-    """
-    Creates a translation probabilities file that has only translations pertaining to the target complex words of a given VICTOR or CWICTOR corpus.
-
-    @param translation_probabilities: Path to a file containing the translation probabilities.
-    The file must produced by the following command through fast_align:
-    fast_align -i <parallel_data> -v -d -o <translation_probabilities>
-    @param corpus: Path to a corpus in the VICTOR or CWICTOR format.
-    For more information about the file's format, refer to the LEXenstein Manual.
-    @param output: Path in which to save the filtered translation probabilities file.
-    """
-    targets = set([])
-    f = open(corpus)
-    for line in f:
-        data = line.strip().split('\t')
-        target = data[1].strip()
-        targets.add(target)
-    f.close()
-
-    o = open(output, 'w')
-    f = open(translation_probabilities)
-    for line in f:
-        data = line.strip().split('\t')
-        word = data[0].strip()
-        if word in targets:
-            o.write(line.strip() + '\n')
-    f.close()
-    o.close()
-
-def addTargetAsFirstToVictorCorpus(self, victor_corpus, output):
-    """
-    Creates a modified version of an input VICTOR corpus in which the target complex word is ranked first.
-    Can be very useful for the training of Substitution Selection Models
-
-    @param victor_corpus: Path to a corpus in the VICTOR format.
-    For more information about the file's format, refer to the LEXenstein Manual.
-    @param output: Path in which to save the modified VICTOR corpus.
-    """
-    f = open(victor_corpus)
-    o = open(output, 'w')
-    for line in f:
-        data = line.strip().split('\t')
-        newline = data[0].strip() + '\t' + data[1].strip() + '\t' + data[2].strip() + '\t' + '1:'+data[1].strip() + '\t'
-        for subst in data[3:len(data)]:
-            substd = subst.strip().split(':')
-            rank = int(substd[0].strip())
-            word = substd[1].strip()
-            newline += str(rank+1)+':'+word + '\t'
-        o.write(newline.strip() + '\n')
-    f.close()
-    o.close()
-
-def produceWordCooccurrenceModel(text_file, window, model_file):
-    """
-    Creates a co-occurrence model from a text file.
-    These models can be used by certain classes in LEXenstein, such as the Yamamoto Ranker and the Biran Selector.
-
-    @param text_file: Text from which to estimate the word co-occurrence model.
-    @param window: Number of tokens to the left and right of a word to be included as a co-occurring word.
-    @param model_file: Path in which to save the word co-occurrence model.
-    """
-    inp = open(text_file)
-
-    coocs = {}
-
-    c = 0
-    for line in inp:
-        c += 1
-        print(('At line: ' + str(c)))
-        tokens = line.strip().lower().split(' ')
-        for i in range(0, len(tokens)):
-            target = tokens[i]
-            if target not in list(coocs.keys()):
-                coocs[target] = {}
-            left = max(0, i-window)
-            right = min(len(tokens), i+window+1)
-            for j in range(left, right):
-                if j!=i:
-                    cooc = tokens[j]
-                    if cooc not in list(coocs[target].keys()):
-                        coocs[target][cooc] = 1
-                    else:
-                        coocs[target][cooc] += 1
-    inp.close()
-
-    targets = sorted(coocs.keys())
-
-    out = open(model_file, 'w')
-    for target in targets:
-        newline = target + '\t'
-        words = sorted(coocs[target].keys())
-        for word in words:
-            newline += word + ':' + str(coocs[target][word]) + '\t'
-        out.write(newline.strip() + '\n')
-    out.close()
diff --git a/lexi/lib/lib.py b/lexi/lib/lib.py
deleted file mode 100644
index bb73de3..0000000
--- a/lexi/lib/lib.py
+++ /dev/null
@@ -1,795 +0,0 @@
-import logging
-import pickle
-
-import numpy as np
-from sklearn import linear_model
-from sklearn.feature_selection import SelectKBest
-from sklearn.feature_selection import f_classif
-from sklearn.model_selection import train_test_split
-from collections import defaultdict
-
-from lexi.config import RANKER_MODEL_PATH_TEMPLATE
-
-logger = logging.getLogger('lexi')
-
-
-def make_synonyms_dict(synonyms_file):
-    """
-
-    :param synonyms_file:
-    :return:
-    """
-    words2synonyms = defaultdict(set)
-    for line in open(synonyms_file):
-        tgt, syns = line.strip().split("\t", 1)
-        words2synonyms[tgt].update(syns.split(";"))
-    return words2synonyms
-
-
-class Generator:
-    def __init__(self):
-        raise NotImplementedError
-
-    def getSubstitutionsSingle(self, sentence, target, index, **kwargs):
-        raise NotImplementedError
-
-
-class SynonymDBGenerator(Generator):
-    """
-    Generates candidates from a serialized WordNet-like list of synonymy
-    relations.
-    """
-
-    def __init__(self, synonyms_file):
-        self.word2synonmys = make_synonyms_dict(synonyms_file)
-
-    def getSubstitutionsSingle(self, sentence, target, index, **kwargs):
-        # TODO get POS of word for filtering?
-        """
-
-        :param sentence:
-        :param target:
-        :param index:
-        :return:
-        """
-        return {target: self.word2synonmys.get(target, {})}
-
-
-class LexensteinGenerator(Generator):
-
-    def __init__(self, w2vmodels):
-        import gensim
-        self.model = None
-        self.individual_models = []
-        for model_file in w2vmodels:
-            try:
-                _model = gensim.models.KeyedVectors.load_word2vec_format(
-                    model_file, binary=True, unicode_errors='ignore')
-            except UnicodeDecodeError:
-                try:
-                    _model = gensim.models.KeyedVectors.load(model_file)
-                except:
-                    continue
-            self.individual_models.append(_model)
-        logger.debug(self.individual_models)
-        self.model = W2VModelEnsemble(self.individual_models)
-
-    def getSubstitutionsSingle(self, sentence, target, index,
-                               min_similarity=0.2):
-        """
-        :param sentence:
-        :param target:
-        :param index:
-        :param min_similarity: minimum similarity score
-        :return:
-        """
-        if min_similarity <= 0 or min_similarity > 1:
-            raise ValueError("'min_similarity' must be between 0 and 1 "
-                             "(you provided {}).".format(min_similarity))
-        substitutions = self.getInitialSet([[sentence, target, index]],
-                                           min_similarity)
-        return substitutions
-
-    def getInitialSet(self, data, min_similarity):
-        trgs = []
-        for i in range(len(data)):
-            d = data[i]
-            logger.debug(d)
-            target = d[1].strip().lower()
-            head = int(d[2].strip())
-            trgs.append(target)
-
-        logger.debug("tgts: {}".format(trgs))
-        logger.debug("  getting candidates with min_similarity={}".
-                     format(min_similarity))
-        subs = []
-        cands = set([])
-        for i in range(len(data)):
-            d = data[i]
-            t = trgs[i]
-
-            word = t
-
-            most_sim = self.model.most_similar(word)
-
-            subs.append([word for word, score in most_sim
-                         if score >= min_similarity])
-
-        logger.debug("subs: {}".format(subs))
-        subsr = subs
-        subs = []
-        for l in subsr:
-            lr = []
-            for inst in l:
-                cand = inst.split('|||')[0].strip()
-                cands.add(cand)
-                lr.append(inst)
-            subs.append(lr)
-
-        cands = list(cands)
-
-        subs_filtered = self.filterSubs(data, subs, trgs)
-
-        final_cands = {}
-        for i in range(0, len(data)):
-            target = data[i][1]
-            logger.debug(subs_filtered)
-            cands = subs_filtered[i][0:len(subs_filtered[i])]
-            cands = [word.split('|||')[0].strip() for word in cands]
-            if target not in final_cands:
-                final_cands[target] = set([])
-            final_cands[target].update(set(cands))
-
-        return final_cands
-
-    def filterSubs(self, data, subs, trgs):
-        result = []
-        for i in range(0, len(data)):
-            d = data[i]
-
-            t = trgs[i]
-
-            most_sim = subs[i]
-            most_simf = []
-
-            for cand in most_sim:
-                if cand!=t:
-                    most_simf.append(cand)
-
-            result.append(most_simf)
-        return result
-
-
-class EnsembleLexensteinGenerator(LexensteinGenerator):
-
-    def __init__(self, w2vmodels):
-        import gensim
-        self.model = None
-        self.individual_models = []
-        for model_file in w2vmodels:
-            try:
-                _model = gensim.models.KeyedVectors.load_word2vec_format(
-                    model_file, binary=True, unicode_errors='ignore')
-            except UnicodeDecodeError:
-                try:
-                    _model = gensim.models.KeyedVectors.load(model_file)
-                except:
-                    continue
-            self.individual_models.append(_model)
-        self.model = W2VModelEnsemble(self.individual_models)
-
-    def getInitialSet(self, data, amount=5, min_similarity=0.5):
-
-        trgs = []
-        for i in range(len(data)):
-            d = data[i]
-            logger.debug(d)
-            target = d[1].strip().lower()
-            head = int(d[2].strip())
-            trgs.append(target)
-
-        logger.debug("tgts: {}".format(trgs))
-        subs = []
-        cands = set([])
-        candidates = set()
-        for i in range(len(data)):
-            d = data[i]
-            t = trgs[i]
-            for model in self.models:
-                try:
-                    candidates.update([(w, v) for w, v in
-                                 model.most_similar(t.decode('utf-8'), topn=10)
-                                 if v > min_similarity])
-                except Exception:
-                    try:
-                        candidates.update([(w, v) for w, v in
-                                           model.most_similar(t, topn=10)
-                                      if v > min_similarity])
-                    except Exception:
-                        pass
-
-            candidate_mean_scores = []
-            for candidate in candidates:
-                # compute mean score for every candidate across models
-                mean_score = np.mean([model.similarity(t, candidate)
-                                      for model in self.models
-                                      if candidate in model])
-                candidate_mean_scores.append((candidate, mean_score))
-
-            # sort candidates by score (best first)
-            candidate_mean_scores = sorted(candidate_mean_scores,
-                                           key=lambda x: x[1], reversed=True)
-            # select top n
-            best_candidates = [cand for cand, sim in
-                               candidate_mean_scores][:amount]
-            # subs.append([word[0] for word in most_sim])
-            subs.append(best_candidates)
-
-        logger.debug("tgts: {}".format(trgs))
-        subsr = subs
-        subs = []
-        for l in subsr:
-            lr = []
-            for inst in l:
-                cand = inst.split('|||')[0].strip()
-                cands.add(cand)
-                lr.append(inst)
-            subs.append(lr)
-
-        cands = list(cands)
-
-        subs_filtered = self.filterSubs(data, subs, trgs)
-
-        final_cands = {}
-        for i in range(0, len(data)):
-            target = data[i][1]
-            logger.debug(subs_filtered, amount, i)
-            cands = subs_filtered[i][0:min(amount, len(subs_filtered[i]))]
-            cands = [word.split('|||')[0].strip() for word in cands]
-            if target not in final_cands:
-                final_cands[target] = set([])
-            final_cands[target].update(set(cands))
-
-        return final_cands
-
-    def filterSubs(self, data, subs, trgs):
-        result = []
-        for i in range(0, len(data)):
-            d = data[i]
-
-            t = trgs[i]
-
-            most_sim = subs[i]
-            most_simf = []
-
-            for cand in most_sim:
-                if cand!=t:
-                    most_simf.append(cand)
-
-            result.append(most_simf)
-        return result
-
-
-class W2VModelEnsemble:
-
-    def __init__(self, models):
-        self.models = models
-
-    def most_similar(self, target, min_similarity=0.5, topn=10):
-
-        all_similar_words = set()
-        for model in self.models:
-            if target in model:
-                all_similar_words.update([w for w, sim in
-                                      model.most_similar(target, topn=topn)
-                                      if sim > min_similarity])
-        candidate_mean_scores = []
-        for w in all_similar_words:
-            mean_score = np.mean([model.similarity(target, w)
-                                  for model in self.models
-                                  if w in model and target in model])
-            candidate_mean_scores.append((w, mean_score))
-
-        # sort
-        most_similar = sorted(candidate_mean_scores, key=lambda x: x[1],
-                              reverse=True)
-        # select top n
-        return most_similar[:topn]
-
-    def similarity(self, w1, w2):
-        return np.mean([model.similarity(w1, w2) for model in self.models])
-
-
-class BoundaryRanker:
-
-    def __init__(self, fe=None, userId=None):
-        self.fe = fe
-        self.classifier = None
-        self.feature_selector = None
-        self.userId = userId
-
-    def trainRankerWithCrossValidation(
-            self, victor_corpus, positive_range, folds, test_size,
-            losses=['hinge', 'modified_huber'], penalties=['elasticnet'],
-            alphas=[0.0001, 0.001, 0.01],
-            l1_ratios=[0.0, 0.15, 0.25, 0.5, 0.75, 1.0], k='all'):
-        # Read victor corpus:
-        data = []
-        f = open(victor_corpus)
-        for line in f:
-            data.append(line.strip().split('\t'))
-        f.close()
-
-        # Create matrixes:
-        X = self.fe.calculateFeatures(victor_corpus)
-        Y = self.generateLabels(data, positive_range)
-
-        # Select features:
-        self.feature_selector = SelectKBest(f_classif, k=k)
-        self.feature_selector.fit(X, Y)
-        X = self.feature_selector.transform(X)
-
-        # Extract ranking problems:
-        firsts = []
-        candidates = []
-        Xsets = []
-        Ysets = []
-        index = -1
-        for line in data:
-            fs = set([])
-            cs = []
-            Xs = []
-            Ys = []
-            for cand in line[3:len(line)]:
-                index += 1
-                candd = cand.split(':')
-                rank = candd[0].strip()
-                word = candd[1].strip()
-
-                cs.append(word)
-                Xs.append(X[index])
-                Ys.append(Y[index])
-                if rank=='1':
-                    fs.add(word)
-            firsts.append(fs)
-            candidates.append(cs)
-            Xsets.append(Xs)
-            Ysets.append(Ys)
-
-        # Create data splits:
-        datasets = []
-        for i in range(0, folds):
-            Xtr, Xte, Ytr, Yte, Ftr, Fte, Ctr, Cte = train_test_split(
-                Xsets, Ysets, firsts, candidates, test_size=test_size,
-                random_state=i)
-            Xtra = []
-            for matrix in Xtr:
-                Xtra += matrix
-            Xtea = []
-            for matrix in Xte:
-                Xtea += matrix
-            Ytra = []
-            for matrix in Ytr:
-                Ytra += matrix
-            datasets.append((Xtra, Ytra, Xte, Xtea, Fte, Cte))
-
-        # Get classifier with best parameters:
-        max_score = -1.0
-        parameters = ()
-        for l in losses:
-            for p in penalties:
-                for a in alphas:
-                    for r in l1_ratios:
-                        sum = 0.0
-                        sum_total = 0
-                        for dataset in datasets:
-                            Xtra = dataset[0]
-                            Ytra = dataset[1]
-                            Xte = dataset[2]
-                            Xtea = dataset[3]
-                            Fte = dataset[4]
-                            Cte = dataset[5]
-
-                            classifier = linear_model.SGDClassifier(loss=l, penalty=p, alpha=a, l1_ratio=r, epsilon=0.0001)
-                            try:
-                                classifier.fit(Xtra, Ytra)
-                                t1 = self.getCrossValidationScore(classifier, Xtea, Xte, Fte, Cte)
-                                sum += t1
-                                sum_total += 1
-                            except Exception:
-                                pass
-                        sum_total = max(1, sum_total)
-                        if (sum/sum_total)>max_score:
-                            max_score = sum
-                            parameters = (l, p, a, r)
-        self.classifier = linear_model.SGDClassifier(loss=parameters[0], penalty=parameters[1], alpha=parameters[2], l1_ratio=parameters[3], epsilon=0.0001)
-        self.classifier.fit(X, Y)
-
-    def getCrossValidationScore(self, classifier, Xtea, Xte, firsts, candidates):
-        distances = classifier.decision_function(Xtea)
-        index = -1
-        corrects = 0
-        total = 0
-        for i in range(0, len(Xte)):
-            xset = Xte[i]
-            maxd = -999999
-            for j in range(0, len(xset)):
-                index += 1
-                distance = distances[index]
-                if distance>maxd:
-                    maxd = distance
-                    maxc = candidates[i][j]
-            if maxc in firsts[i]:
-                corrects += 1
-            total += 1
-        return float(corrects)/float(total)
-
-    def getRankings(self, data):
-        #Transform data:
-        textdata = ''
-        for inst in data:
-            for token in inst:
-                textdata += token+'\t'
-            textdata += '\n'
-        textdata = textdata.strip()
-
-        #Create matrixes:
-        X = self.fe.calculateFeatures(textdata, input='text')
-
-        #Select features:
-        X = self.feature_selector.transform(X)
-
-        #Get boundary distances:
-        distances = self.classifier.decision_function(X)
-
-        #Get rankings:
-        result = []
-        index = 0
-        for i in range(0, len(data)):
-            line = data[i]
-            scores = {}
-            for subst in line[3:len(line)]:
-                word = subst.strip().split(':')[1].strip()
-                scores[word] = distances[index]
-                index += 1
-            ranking_data = sorted(list(scores.keys()), key=scores.__getitem__, reverse=True)
-            result.append(ranking_data)
-
-        #Return rankings:
-        return result
-
-    def generateLabels(self, data, positive_range):
-        Y = []
-        for line in data:
-            max_range = min(int(line[len(line)-1].split(':')[0].strip()), positive_range)
-            for i in range(3, len(line)):
-                rank_index = int(line[i].split(':')[0].strip())
-                if rank_index<=max_range:
-                    Y.append(1)
-                else:
-                    Y.append(0)
-        return Y
-
-    def save(self, userId):
-        with open(RANKER_MODEL_PATH_TEMPLATE.format(userId), 'wb') as pf:
-            pickle.dump((self.fe, self.classifier, self.feature_selector), pf,
-                        pickle.HIGHEST_PROTOCOL)
-
-    def load(self, userId=None):
-        if not userId:
-            userId = self.userId
-        with open(RANKER_MODEL_PATH_TEMPLATE.format(userId), 'rb') as pf:
-            (self.fe, self.classifier, self.feature_selector) = pickle.load(pf)
-        return self
-
-
-class BoundarySelector:
-
-    def __init__(self, boundary_ranker):
-        self.ranker = boundary_ranker
-
-    def trainSelectorWithCrossValidation(self, victor_corpus, positive_range,
-                                         folds, test_size,
-                                         losses=['hinge', 'modified_huber'],
-                                         penalties=['elasticnet'],
-                                         alphas=[0.0001, 0.001, 0.01],
-                                         l1_ratios=[0.0, 0.15, 0.25, 0.5, 0.75, 1.0],
-                                         k='all'):
-        self.ranker.trainRankerWithCrossValidation(victor_corpus, positive_range,
-                                                   folds, test_size, losses=losses, penalties=penalties, alphas=alphas, l1_ratios=l1_ratios, k=k)
-
-    def selectCandidates(self, data, proportion, proportion_type='percentage'):
-        rankings = self.ranker.getRankings(data)
-        logger.debug((data, rankings))
-        selected_substitutions = []
-
-        index = -1
-        for line in data:
-            index += 1
-
-            if proportion_type == 'percentage':
-                if proportion > 1.0:
-                    select_n = len(rankings[index])
-                else:
-                    select_n = int(float(proportion) * len(rankings[index]))
-                selected_candidates = rankings[index][:max(1, select_n)]
-            else:
-                if proportion < 1:
-                    toselect = 1
-                elif proportion > len(rankings[index]):
-                    toselect = len(rankings[index])
-                else:
-                    toselect = proportion
-                selected_candidates = rankings[index][:toselect]
-
-            selected_substitutions.append(selected_candidates)
-
-        return selected_substitutions
-
-
-class GlavasRanker:
-
-    def __init__(self, fe):
-        """
-        Creates an instance of the GlavasRanker class.
-
-        @param fe: A configured FeatureEstimator object.
-        """
-
-        self.fe = fe
-        self.feature_values = None
-
-    def getRankings(self, alldata):
-
-        #Calculate features:
-        textdata = ''
-        for inst in alldata:
-                for token in inst:
-                        textdata += token+'\t'
-                textdata += '\n'
-        textdata = textdata.strip()
-        self.feature_values = self.fe.calculateFeatures(textdata, input='text')
-
-        #Create object for results:
-        result = []
-
-        #Read feature values for each candidate in victor corpus:
-        index = 0
-        for data in alldata:
-            #Get all substitutions in ranking instance:
-            substitutions = data[3:len(data)]
-
-            #Get instance's feature values:
-            instance_features = []
-            for substitution in substitutions:
-                instance_features.append(self.feature_values[index])
-                index += 1
-
-            rankings = {}
-            for i in range(0, len(self.fe.identifiers)):
-                #Create dictionary of substitution to feature value:
-                scores = {}
-                for j in range(0, len(substitutions)):
-                    substitution = substitutions[j]
-                    word = substitution.strip().split(':')[1].strip()
-                    scores[word] = instance_features[j][i]
-
-                #Check if feature is simplicity or complexity measure:
-                rev = False
-                if self.fe.identifiers[i][1]=='Simplicity':
-                    rev = True
-
-                #Sort substitutions:
-                words = list(scores.keys())
-                sorted_substitutions = sorted(words, key=scores.__getitem__, reverse=rev)
-
-                #Update rankings:
-                for j in range(0, len(sorted_substitutions)):
-                    word = sorted_substitutions[j]
-                    if word in rankings:
-                        rankings[word] += j
-                    else:
-                        rankings[word] = j
-
-            #Produce final rankings:
-            final_rankings = sorted(list(rankings.keys()), key=rankings.__getitem__)
-
-            #Add them to result:
-            result.append(final_rankings)
-
-        #Return result:
-        return result
-
-
-class NNRegressionRanker:
-
-    def __init__(self, fe, model):
-        self.fe = fe
-        self.model = model
-
-    def getRankings(self, data):
-        #Transform data:
-        textdata = ''
-        for inst in data:
-            for token in inst:
-                textdata += token+'\t'
-            textdata += '\n'
-        textdata = textdata.strip()
-
-        #Create matrix:
-        features = self.fe.calculateFeatures(textdata, input='text')
-
-        ranks = []
-        c = -1
-        for line in data:
-            cands = [cand.strip().split(':')[1].strip() for cand in line[3:]]
-            featmap = {}
-            scoremap = {}
-            for cand in cands:
-                c += 1
-                featmap[cand] = features[c]
-                scoremap[cand] = 0.0
-            for i in range(0, len(cands)-1):
-                cand1 = cands[i]
-                for j in range(i+1, len(cands)):
-                    cand2 = cands[j]
-                    posneg = np.concatenate((featmap[cand1], featmap[cand2]))
-                    probs = self.model.predict(np.array([posneg]))
-                    score = probs[0]
-                    scoremap[cand1] += score
-                    negpos = np.concatenate((featmap[cand2], featmap[cand1]))
-                    probs = self.model.predict(np.array([negpos]))
-                    score = probs[0]
-                    scoremap[cand1] -= score
-            rank = sorted(list(scoremap.keys()), key=scoremap.__getitem__, reverse=True)
-            if len(rank)>1:
-                if rank[0]==line[1].strip():
-                    rank = rank[1:]
-            ranks.append(rank)
-        return ranks
-
-
-class OnlineRegressionRanker:
-
-    def __init__(self, fe, model, training_dataset=None, userId=None):
-        self.fe = fe
-        self.userId = userId
-        if model:
-            self.model = model
-        elif training_dataset:
-            self.model = self.trainRegressionModel(training_dataset)
-        else:
-            self.model = None
-
-    def trainRegressionModel(self, training_dataset):
-        # Create matrix:
-        features = self.fe.calculateFeatures(training_dataset, input='file')
-        Xtr = []
-        Ytr = []
-        f = open(training_dataset)
-        c = -1
-        for line in f:
-            data = line.strip().split('\t')
-            cands = [cand.strip().split(':')[1] for cand in data[3:]]
-            indexes = [int(cand.strip().split(':')[0]) for cand in data[3:]]
-            featmap = {}
-            for cand in cands:
-                c += 1
-                featmap[cand] = features[c]
-            for i in range(0, len(cands)-1):
-                for j in range(i+1, len(cands)):
-                    indexi = indexes[i]
-                    indexj = indexes[j]
-                    indexdiffji = indexj-indexi
-                    indexdiffij = indexi-indexj
-                    positive = featmap[cands[i]]
-                    negative = featmap[cands[j]]
-                    v1 = np.concatenate((positive,negative))
-                    v2 = np.concatenate((negative,positive))
-                    Xtr.append(v1)
-                    Xtr.append(v2)
-                    Ytr.append(indexdiffji)
-                    Ytr.append(indexdiffij)
-        f.close()
-        Xtr = np.array(Xtr)
-        Ytr = np.array(Ytr)
-
-        model = linear_model.SGDRegressor()
-        model.fit(Xtr, Ytr)
-        return model
-
-    def onlineTrainRegressionModel(self, training_data_text):
-        logger.info("Partially fitting the ranker")
-        # Create matrix:
-        features = self.fe.calculateFeatures(training_data_text,
-                                             format='victor', input='text')
-        Xtr = []
-        Ytr = []
-        c = -1
-        for line in training_data_text.strip().split('\n'):
-            logger.debug(line)
-            data = line.strip().split('\t')
-            cands = [cand.strip().split(':')[1] for cand in data[3:]]
-            indexes = [int(cand.strip().split(':')[0]) for cand in data[3:]]
-            featmap = {}
-            for cand in cands:
-                c += 1
-                featmap[cand] = features[c]
-            for i in range(0, len(cands) - 1):
-                for j in range(i + 1, len(cands)):
-                    indexi = indexes[i]
-                    indexj = indexes[j]
-                    indexdiffji = indexj - indexi
-                    indexdiffij = indexi - indexj
-                    positive = featmap[cands[i]]
-                    negative = featmap[cands[j]]
-                    v1 = np.concatenate((positive, negative))
-                    v2 = np.concatenate((negative, positive))
-                    Xtr.append(v1)
-                    Xtr.append(v2)
-                    Ytr.append(indexdiffji)
-                    Ytr.append(indexdiffij)
-        Xtr = np.array(Xtr)
-        Ytr = np.array(Ytr)
-
-        self.model.partial_fit(Xtr, Ytr)
-        return self.model
-
-    def getRankings(self, data):
-        #Transform data:
-        textdata = ''
-        for inst in data:
-            for token in inst:
-                textdata += token+'\t'
-            textdata += '\n'
-        textdata = textdata.strip()
-
-        #Create matrix:
-        features = self.fe.calculateFeatures(textdata, input='text')
-
-        ranks = []
-        c = -1
-        for line in data:
-            cands = [cand.strip().split(':')[1].strip() for cand in line[3:]]
-            featmap = {}
-            scoremap = {}
-            for cand in cands:
-                c += 1
-                featmap[cand] = features[c]
-                scoremap[cand] = 0.0
-            for i in range(0, len(cands)-1):
-                cand1 = cands[i]
-                for j in range(i+1, len(cands)):
-                    cand2 = cands[j]
-                    posneg = np.concatenate((featmap[cand1], featmap[cand2]))
-                    probs = self.model.predict(np.array([posneg]))
-                    score = probs[0]
-                    scoremap[cand1] += score
-                    negpos = np.concatenate((featmap[cand2], featmap[cand1]))
-                    probs = self.model.predict(np.array([negpos]))
-                    score = probs[0]
-                    scoremap[cand1] -= score
-            rank = sorted(list(scoremap.keys()), key=scoremap.__getitem__, reverse=True)
-            if len(rank)>1:
-                if rank[0]==line[1].strip():
-                    rank = rank[1:]
-            ranks.append(rank)
-        return ranks
-
-    def save(self, userId):
-        logger.info("Saving new model for user {}".format(userId))
-        with open(RANKER_MODEL_PATH_TEMPLATE.format(userId), 'wb') as pf:
-            # pickle.dump((self.fe, self.model), pf, pickle.HIGHEST_PROTOCOL)
-            pickle.dump(self, pf, pickle.HIGHEST_PROTOCOL)
-
-    # def load(self, userId=None):
-    #     if not userId:
-    #         userId = self.userId
-    #     with open(RANKER_MODEL_TEMPLATE.format(userId), 'rb') as pf:
-    #         (self.fe, self.model) = pickle.load(pf)
-    #     return self
-
-    @staticmethod
-    def staticload(userId):
-        with open(RANKER_MODEL_PATH_TEMPLATE.format(userId), 'rb') as pf:
-            return pickle.load(pf)
diff --git a/lexi/server/run_lexi_server.py b/lexi/server/run_lexi_server.py
index ee9f528..a79f76d 100644
--- a/lexi/server/run_lexi_server.py
+++ b/lexi/server/run_lexi_server.py
@@ -14,14 +14,14 @@
 from werkzeug.exceptions import HTTPException
 
 from lexi.config import LEXI_BASE, LOG_DIR, RANKER_MODEL_PATH_TEMPLATE, \
-    MODELS_DIR
+    CWI_MODEL_PATH_TEMPLATE, MODELS_DIR, RESOURCES
 from lexi.core.endpoints import update_ranker
-from lexi.core.simplification.lexical import LexensteinSimplifier
-from lexi.core.util.io import load_pickled_model
+from lexi.core.simplification.lexical import LexicalSimplificationPipeline, \
+    LexiCWI, LexiRanker, LexiGenerator
 from lexi.server.util import statuscodes
 from lexi.server.util.html import process_html
 from lexi.server.util.communication import make_response
-from lexi.lib.lib import OnlineRegressionRanker
+# from lexi.lib.lib import OnlineRegressionRanker
 
 SCRIPTDIR = os.path.dirname(os.path.realpath(__file__))
 
@@ -118,10 +118,18 @@
 
 
 # LOADING DEFAULT MODEL
-simplifier = LexensteinSimplifier("default").load()
-default_ranker = load_pickled_model(
-    RANKER_MODEL_PATH_TEMPLATE.format("default"))
+simplification_pipeline = LexicalSimplificationPipeline("default")
+generator = LexiGenerator(synonyms_files=RESOURCES["da"]["synonyms"],
+                          embedding_files=RESOURCES["da"]["embeddings"])
+simplification_pipeline.setGenerator(generator)
+# default_ranker = load_pickled_model(
+#     RANKER_MODEL_PATH_TEMPLATE.format("default"))
+# default_cwi = load_pickled_model(
+#     CWI_MODEL_PATH_TEMPLATE.format("default"))
+default_ranker = LexiRanker("default")
+default_cwi = LexiCWI("default")  # TODO pretrain offline and load
 personalized_rankers = {"default": default_ranker}
+personalized_cwi = {"default": default_cwi}
 logger.debug("Default ranker:")
 logger.debug(type(default_ranker))
 logger.debug(default_ranker)
@@ -162,32 +170,24 @@ def process():
                                               frontend_version=frontend_version,
                                               language=language)
 
-    if user_id in personalized_rankers:
-        logger.info("Using personalized ranker, still in memory.")
-        ranker = personalized_rankers[user_id]
-    else:
-        logger.info("Gotta load ranker or use default...")
-        try:
-            # retrieve model
-            model_path = db_connection.get_model_path(user_id)
-            ranker = OnlineRegressionRanker.staticload(model_path)
-        except:
-            logger.warning("Could not load personalized model. "
-                           "Loading default ranker.")
-            ranker = copy.copy(personalized_rankers["default"])
-            logger.debug(ranker)
-            ranker.userId = user_id
-        personalized_rankers[user_id] = ranker
+    cwi = None
+    single_word_request = request.json.get("single_word_request", False)
+    if not single_word_request:
+        cwi = get_personalized_cwi(user_id)
+
+    ranker = get_personalized_ranker(user_id)
+
+    logger.info("Loaded CWI: "+str(cwi))
     logger.info("Loaded ranker: "+str(ranker))
     min_similarity = request.json.get("min_similarity", 0.65)
     if not type(min_similarity) == float:
         raise ValueError("'min_similarity' must be a float. You "
                          "provided a {}".format(type(min_similarity)))
-    html_out, simplifications = process_html(simplifier,
+    html_out, simplifications = process_html(simplification_pipeline,
                                              request.json["html"],
                                              request.json.get("startOffset"),
                                              request.json.get("endOffset"),
-                                             ranker, mode="lexical",
+                                             cwi, ranker, mode="lexical",
                                              requestId=request_id,
                                              min_similarity=min_similarity,
                                              blacklist=GENERIC_BLACKLIST)
@@ -295,6 +295,46 @@ def versioncheck():
                          download_url=download_url)
 
 
+def get_personalized_ranker(user_id):
+    if user_id in personalized_rankers:
+        logger.info("Using personalized ranker, still in memory.")
+        ranker = personalized_rankers[user_id]
+    else:
+        logger.info("Gotta load ranker or use default...")
+        try:
+            # retrieve model
+            model_path = db_connection.get_model_path(user_id)
+            ranker = LexiRanker(user_id)
+        except:
+            logger.warning("Could not load personalized model. "
+                           "Loading default ranker.")
+            ranker = copy.copy(personalized_rankers["default"])
+            logger.debug(ranker)
+            ranker.userId = user_id
+        personalized_rankers[user_id] = ranker
+    return ranker
+
+
+def get_personalized_cwi(user_id):
+    if user_id in personalized_cwi:
+        logger.info("Using personalized cwi, still in memory.")
+        cwi = personalized_cwi[user_id]
+    else:
+        logger.info("Gotta load cwi or use default...")
+        try:
+            # retrieve model
+            model_path = db_connection.get_model_path(user_id)
+            cwi = LexiCWI(user_id)
+        except:
+            logger.warning("Could not load personalized model. "
+                           "Loading default cwi.")
+            cwi = copy.copy(personalized_cwi["default"])
+            logger.debug(cwi)
+            cwi.userId = user_id
+        personalized_cwi[user_id] = cwi
+    return cwi
+
+
 if __name__ == "__main__":
     app.run(threaded=True)
     logger.debug("Rules: " + str([rule for rule in app.url_map.iter_rules()]))
diff --git a/lexi/server/util/database.py b/lexi/server/util/database.py
index 899a237..9c33ccb 100644
--- a/lexi/server/util/database.py
+++ b/lexi/server/util/database.py
@@ -1,6 +1,7 @@
 import psycopg2
 import logging
 import json
+# import MySQLdb
 from collections import defaultdict
 
 logger = logging.getLogger('lexi')
@@ -8,9 +9,12 @@
 
 class DatabaseConnection:
 
-    def __init__(self, kwargs):
+    def __init__(self, kwargs, type="postgres"):
         try:
-            self.pg_connection = psycopg2.connect(**kwargs)
+            if type == "postgres":
+                self.pg_connection = psycopg2.connect(**kwargs)
+            # elif type == "mysql":
+            #     self.pg_connection = MySQLdb.connect(**kwargs)
             self.cursor = self.pg_connection.cursor()
             logger.info("Connected to database '{}' at '{}'.".format(
                 kwargs["dbname"], kwargs["host"]
diff --git a/lexi/server/util/html.py b/lexi/server/util/html.py
index 0205e03..6cfd17a 100644
--- a/lexi/server/util/html.py
+++ b/lexi/server/util/html.py
@@ -24,12 +24,13 @@ def map_text_to_html_offsets(html_src):
     return mapping
 
 
-def process_html(classifier, html_src, startOffset, endOffset, ranker,
+def process_html(pipeline, html_src, startOffset, endOffset, cwi, ranker,
                  mode="lexical", requestId=0, min_similarity=0.7,
                  blacklist=None):
     """
-    :param classifier:
+    :param pipeline:
     :param html_src: The HTML source in question
+    :param ranker: CWI module to use with this classifier
     :param ranker: Ranker to use with this classifier
     :param mode: simplification mode (whether to perform lexical simplification,
      sentence simplification, ...). Only "lexical" accepted for now.
@@ -43,7 +44,8 @@ def process_html(classifier, html_src, startOffset, endOffset, ranker,
     html_out = ""
     if mode == "lexical":
         _output, _simplifications = process_html_lexical(
-            classifier, html_src, startOffset, endOffset, requestId=requestId,
+            pipeline, html_src, startOffset, endOffset, requestId=requestId,
+            cwi=cwi,
             ranker=ranker,
             min_similarity=min_similarity,
             blacklist=blacklist)
diff --git a/requirements.txt b/requirements.txt
index b29ca48..2f696fc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -37,7 +37,7 @@ scipy==0.19.1
 singledispatch==3.4.0.3
 six==1.10.0
 smart-open==1.5.3
-spacy==2.0.18
+stanfordnlp==0.1.1
 thinc==6.12.1
 toolz==0.8.2
 tqdm==4.29.0
@@ -47,3 +47,4 @@ wcwidth==0.1.7
 webencodings==0.5.1
 Werkzeug==0.12.2
 wrapt==1.10.11
+git+https://github.com/jbingel/rippletagger.git
diff --git a/scripts/train_default_classifier.py b/scripts/train_default_classifier.py
index d8e4b0a..e297882 100644
--- a/scripts/train_default_classifier.py
+++ b/scripts/train_default_classifier.py
@@ -1,45 +1,26 @@
-from lexi.config import RESOURCES, RESOURCES_TEST
+from lexi.config import RESOURCES
 from lexi.core.simplification.lexical import *
 
 
 def fresh_train(userId="default", language="da", resources=None):
-    c = LexensteinSimplifier(userId=userId, language=language)
+    c = LexicalSimplificationPipeline(userId=userId, language=language)
     if not resources:
         try:
-            #resources = RESOURCES[language]
-            resources = RESOURCES_TEST[language]
-            print("WARNING: CHECK FOR CORRECT RESOURCES! (using test)")
+            resources = RESOURCES[language]
         except KeyError:
             print("Couldn't find resources for language {}".format(language))
-    # General purpose
-    w2vpm = resources['embeddings']
+
     # Generator
-    # gg = LexensteinGenerator(w2vpm)
-    gg = SynonymDBGenerator(resources['synonyms'])
-    # gg = LexensteinGenerator(w2vpm)
+    g = LexiGenerator(synonyms_files=resources["synonyms"],
+                      embedding_files=resources["embeddings"])
+    c.setGenerator(g)
 
-    # Selector
-    fe = FeatureEstimator()
-    # fe.resources[w2vpm[0]] = gg.model
-    fe.addCollocationalFeature(resources['lm'], 2, 2, 'Complexity')
-    fe.addWordVectorSimilarityFeature(w2vpm[0], 'Simplicity')
-    br = BoundaryRanker(fe)
-    bs = BoundarySelector(br)
-    bs.trainSelectorWithCrossValidation(resources['ubr'], 1, 5, 0.25, k='all')
     # Ranker
-    fe = FeatureEstimator()
-    fe.addLengthFeature('Complexity')
-    fe.addCollocationalFeature(resources['lm'], 2, 2, 'Simplicity')
-    orr = OnlineRegressionRanker(fe, None, training_dataset=resources[
-        'ranking_training_dataset'])
-    # Return LexicalSimplifier object
-    c.generator = gg
-    c.selector = bs
-    c.ranker = orr
+    c.setRanker(LexiRanker("default"))
     return c
 
+
 c = fresh_train()
-c.save()
 
-r = c.ranker
-r.save("default")
+c.ranker.save("default")
+c.cwi.save("default")