Merge pull request #326 from JohnSnowLabs/fix/remove-cohyphonym-test

Fix/remove cohyphonym test
JohnSnowLabs · Apr 13, 2023 · abb0cce · abb0cce
2 parents f81cfd1 + 045d68b
commit abb0cce
Show file tree

Hide file tree

Showing 8 changed files with 0 additions and 159 deletions.
diff --git a/conda/meta.yaml b/conda/meta.yaml
@@ -26,7 +26,6 @@ requirements:
     - pandas
     - scikit-learn
     - transformers
-    - nltk
     - pytorch
     - sentencepiece
 

diff --git a/docs/pages/tests/robustness/swap_cohyponyms.md b/docs/pages/tests/robustness/swap_cohyponyms.md
diff --git a/docs/pages/tests/test.md b/docs/pages/tests/test.md
@@ -73,7 +73,6 @@ The following tables give an overview of the different categories and tests.
 |[Robustness](robustness)		            |[British to American](robustness#british-to-american)                                                      |ner, text-classification     
 |[Robustness](robustness)		            |[Lowercase](robustness#lowercase)                                                                          |ner, text-classification     
 |[Robustness](robustness)		            |[Strip Punctuation](robustness#strip-punctuation)                                                          |ner, text-classification     
-|[Robustness](robustness)		            |[Swap Cohyponyms](robustness#swap-cohyponyms)                                                              |ner     
 |[Robustness](robustness)		            |[Swap Entities](robustness#swap-entities)                                                                  |ner     
 |[Robustness](robustness)		            |[Titlecase](robustness#titlecase)                                                                          |ner, text-classification     
 |[Robustness](robustness)		            |[Uppercase](robustness#uppercase)                                                                          |ner, text-classification     

diff --git a/nlptest/transform/__init__.py b/nlptest/transform/__init__.py
@@ -2,7 +2,6 @@
 from typing import Dict, List
 from tqdm import tqdm
 
-import nltk
 import pandas as pd
 
 from nlptest.modelhandler import ModelFactory
@@ -164,15 +163,6 @@ def __init__(
             self.tests['british_to_american']['parameters'] = {}
             self.tests['british_to_american']['parameters']['accent_map'] = {v: k for k, v in A2B_DICT.items()}
 
-        if 'swap_cohyponyms' in self.tests:
-            nltk.download('omw-1.4', quiet=True)
-            nltk.download('wordnet', quiet=True)
-            df = pd.DataFrame({'text': [sample.original for sample in data_handler],
-                               'label': [[i.entity for i in sample.expected_results.predictions]
-                                         for sample in data_handler]})
-            self.tests['swap_cohyponyms']['parameters'] = {}
-            self.tests['swap_cohyponyms']['parameters']['labels'] = df.label.tolist()
-
         self._data_handler = data_handler
 
     def transform(self) -> List[Sample]:

diff --git a/nlptest/transform/robustness.py b/nlptest/transform/robustness.py
@@ -2,7 +2,6 @@
 import re
 import numpy as np
 from abc import ABC, abstractmethod
-from functools import reduce
 from typing import Dict, List, Optional
 
 from .utils import (CONTRACTION_MAP, TYPO_FREQUENCY)
@@ -292,120 +291,6 @@ def transform(
             sample.category = "robustness"
         return sample_list
 
-
-def get_cohyponyms_wordnet(word: str) -> str:
-    """
-    Retrieve co-hyponym of the input string using WordNet when a hit is found.
-
-    Args:
-        word: input string to retrieve co-hyponym
-    Returns:
-        Cohyponym of the input word if exists, else original word.
-    """
-
-    try:
-        from nltk.corpus import wordnet as wn
-    except ImportError:
-        raise ImportError("WordNet is not available!\n"
-                          "Please install WordNet via pip install wordnet to use swap_cohyponyms")
-
-    orig_word = word
-    word = word.lower()
-    if len(word.split()) > 0:
-        word = word.replace(" ", "_")
-    syns = wn.synsets(word)
-
-    if len(syns) == 0:
-        return orig_word
-    else:
-        hypernym = syns[0].hypernyms()
-        if len(hypernym) == 0:
-            return orig_word
-        else:
-            hypos = hypernym[0].hyponyms()
-            hypo_len = len(hypos)
-            if hypo_len == 1:
-                name = str(hypos[0].lemmas()[0])
-            else:
-                ind = random.sample(range(hypo_len), k=1)[0]
-                name = str(hypos[ind].lemmas()[0])
-                while name == word:
-                    ind = random.sample(range(hypo_len), k=1)[0]
-                    name = str(hypos[ind].lemmas()[0])
-            return name.replace("_", " ").split(".")[0][7:]
-
-
-class SwapCohyponyms(BaseRobustness):
-    alias_name = "swap_cohyponyms"
-
-    @staticmethod
-    def transform(
-            sample_list: List[Sample],
-            labels: List[List[str]] = None,
-    ) -> List[Sample]:
-        """Swaps named entities with the new one from the terminology extracted from passed data.
-
-        Args:
-            sample_list: List of sentences to process.
-            labels: Corresponding labels to make changes according to sentences.
-
-        Returns:
-            List sample indexes and corresponding augmented sentences, tags and labels if provided.
-        """
-
-        if labels is None:
-            raise ValueError('In order to generate test cases for swap_entities, terminology should be passed!')
-
-        assert len(sample_list) == len(labels), f"'labels' and 'sample_list' must have same lengths."
-
-        for sample, sample_labels in zip(sample_list, labels):
-            if all([label == "O" for label in sample_labels]):
-                sample.test_case = sample.original
-                continue
-
-            sent_tokens = sample.original.split(' ')
-
-            ent_start_pos = np.array([1 if label[0] == 'B' else 0 for label in sample_labels])
-            ent_idx, = np.where(ent_start_pos == 1)
-
-            replace_idx = np.random.choice(ent_idx)
-            ent_type = sample_labels[replace_idx][2:]
-            replace_idxs = [replace_idx]
-            if replace_idx < len(sample_labels) - 1:
-                for i, label in enumerate(sample_labels[replace_idx + 1:]):
-                    if label == f'I-{ent_type}':
-                        replace_idxs.append(i + replace_idx + 1)
-                    else:
-                        break
-
-            replace_token = sent_tokens[replace_idx: replace_idx + len(replace_idxs)]
-            token_length = len(replace_token)
-            replace_token = " ".join(replace_token)
-
-            chosen_ent = get_cohyponyms_wordnet(replace_token)
-            replace_token_pos = re.search(replace_token, sample.original)
-
-            sample.test_case = sample.original.replace(replace_token, chosen_ent)
-            sample.transformations = [
-                Transformation(
-                    original_span=Span(
-                        start=replace_token_pos.start(),
-                        end=replace_token_pos.end(),
-                        word=replace_token
-                    ),
-                    new_span=Span(
-                        start=replace_token_pos.start(),
-                        end=replace_token_pos.start() + len(chosen_ent),
-                        word=chosen_ent
-                    ),
-                    ignore=False
-                )
-            ]
-            sample.category = "robustness"
-
-        return sample_list
-
-
 class ConvertAccent(BaseRobustness):
     alias_name = ["american_to_british", "british_to_american"]
 

diff --git a/nlptest/transform/utils.py b/nlptest/transform/utils.py
@@ -17,7 +17,6 @@
     "add_context",
     "add_contractions",
     "swap_entities",
-    "swap_cohyponyms",
     "replace_to_male_pronouns",
     "replace_to_female_pronouns",
     "replace_to_neutral_pronouns"
@@ -35,7 +34,6 @@
     "add_context": 'AddContext',
     "add_contractions": 'AddContraction',
     "swap_entities": 'SwapEntities',
-    "swap_cohyponyms": 'SwapCohyponyms',
     "replace_to_male_pronouns": "GenderPronounBias",
     "replace_to_female_pronouns": "GenderPronounBias",
     "replace_to_neutral_pronouns": "GenderPronounBias"

diff --git a/requirements.txt b/requirements.txt
@@ -2,7 +2,6 @@ numpy
 pandas
 scikit-learn
 transformers
-nltk
 torch
 protobuf<=3.20.0
 sentencepiece

diff --git a/setup.py b/setup.py
@@ -20,7 +20,6 @@
     'wn',
     'scikit-learn',
     'transformers',
-    'nltk',
     'torch',
     'sentencepiece',
     'pydantic'