Skip to content

Commit

Permalink
Merge pull request #326 from JohnSnowLabs/fix/remove-cohyphonym-test
Browse files Browse the repository at this point in the history
Fix/remove cohyphonym test
  • Loading branch information
luca-martial authored Apr 13, 2023
2 parents f81cfd1 + 045d68b commit abb0cce
Show file tree
Hide file tree
Showing 8 changed files with 0 additions and 159 deletions.
1 change: 0 additions & 1 deletion conda/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ requirements:
- pandas
- scikit-learn
- transformers
- nltk
- pytorch
- sentencepiece

Expand Down
28 changes: 0 additions & 28 deletions docs/pages/tests/robustness/swap_cohyponyms.md

This file was deleted.

1 change: 0 additions & 1 deletion docs/pages/tests/test.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,6 @@ The following tables give an overview of the different categories and tests.
|[Robustness](robustness) |[British to American](robustness#british-to-american) |ner, text-classification
|[Robustness](robustness) |[Lowercase](robustness#lowercase) |ner, text-classification
|[Robustness](robustness) |[Strip Punctuation](robustness#strip-punctuation) |ner, text-classification
|[Robustness](robustness) |[Swap Cohyponyms](robustness#swap-cohyponyms) |ner
|[Robustness](robustness) |[Swap Entities](robustness#swap-entities) |ner
|[Robustness](robustness) |[Titlecase](robustness#titlecase) |ner, text-classification
|[Robustness](robustness) |[Uppercase](robustness#uppercase) |ner, text-classification
Expand Down
10 changes: 0 additions & 10 deletions nlptest/transform/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from typing import Dict, List
from tqdm import tqdm

import nltk
import pandas as pd

from nlptest.modelhandler import ModelFactory
Expand Down Expand Up @@ -164,15 +163,6 @@ def __init__(
self.tests['british_to_american']['parameters'] = {}
self.tests['british_to_american']['parameters']['accent_map'] = {v: k for k, v in A2B_DICT.items()}

if 'swap_cohyponyms' in self.tests:
nltk.download('omw-1.4', quiet=True)
nltk.download('wordnet', quiet=True)
df = pd.DataFrame({'text': [sample.original for sample in data_handler],
'label': [[i.entity for i in sample.expected_results.predictions]
for sample in data_handler]})
self.tests['swap_cohyponyms']['parameters'] = {}
self.tests['swap_cohyponyms']['parameters']['labels'] = df.label.tolist()

self._data_handler = data_handler

def transform(self) -> List[Sample]:
Expand Down
115 changes: 0 additions & 115 deletions nlptest/transform/robustness.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import re
import numpy as np
from abc import ABC, abstractmethod
from functools import reduce
from typing import Dict, List, Optional

from .utils import (CONTRACTION_MAP, TYPO_FREQUENCY)
Expand Down Expand Up @@ -292,120 +291,6 @@ def transform(
sample.category = "robustness"
return sample_list


def get_cohyponyms_wordnet(word: str) -> str:
"""
Retrieve co-hyponym of the input string using WordNet when a hit is found.
Args:
word: input string to retrieve co-hyponym
Returns:
Cohyponym of the input word if exists, else original word.
"""

try:
from nltk.corpus import wordnet as wn
except ImportError:
raise ImportError("WordNet is not available!\n"
"Please install WordNet via pip install wordnet to use swap_cohyponyms")

orig_word = word
word = word.lower()
if len(word.split()) > 0:
word = word.replace(" ", "_")
syns = wn.synsets(word)

if len(syns) == 0:
return orig_word
else:
hypernym = syns[0].hypernyms()
if len(hypernym) == 0:
return orig_word
else:
hypos = hypernym[0].hyponyms()
hypo_len = len(hypos)
if hypo_len == 1:
name = str(hypos[0].lemmas()[0])
else:
ind = random.sample(range(hypo_len), k=1)[0]
name = str(hypos[ind].lemmas()[0])
while name == word:
ind = random.sample(range(hypo_len), k=1)[0]
name = str(hypos[ind].lemmas()[0])
return name.replace("_", " ").split(".")[0][7:]


class SwapCohyponyms(BaseRobustness):
alias_name = "swap_cohyponyms"

@staticmethod
def transform(
sample_list: List[Sample],
labels: List[List[str]] = None,
) -> List[Sample]:
"""Swaps named entities with the new one from the terminology extracted from passed data.
Args:
sample_list: List of sentences to process.
labels: Corresponding labels to make changes according to sentences.
Returns:
List sample indexes and corresponding augmented sentences, tags and labels if provided.
"""

if labels is None:
raise ValueError('In order to generate test cases for swap_entities, terminology should be passed!')

assert len(sample_list) == len(labels), f"'labels' and 'sample_list' must have same lengths."

for sample, sample_labels in zip(sample_list, labels):
if all([label == "O" for label in sample_labels]):
sample.test_case = sample.original
continue

sent_tokens = sample.original.split(' ')

ent_start_pos = np.array([1 if label[0] == 'B' else 0 for label in sample_labels])
ent_idx, = np.where(ent_start_pos == 1)

replace_idx = np.random.choice(ent_idx)
ent_type = sample_labels[replace_idx][2:]
replace_idxs = [replace_idx]
if replace_idx < len(sample_labels) - 1:
for i, label in enumerate(sample_labels[replace_idx + 1:]):
if label == f'I-{ent_type}':
replace_idxs.append(i + replace_idx + 1)
else:
break

replace_token = sent_tokens[replace_idx: replace_idx + len(replace_idxs)]
token_length = len(replace_token)
replace_token = " ".join(replace_token)

chosen_ent = get_cohyponyms_wordnet(replace_token)
replace_token_pos = re.search(replace_token, sample.original)

sample.test_case = sample.original.replace(replace_token, chosen_ent)
sample.transformations = [
Transformation(
original_span=Span(
start=replace_token_pos.start(),
end=replace_token_pos.end(),
word=replace_token
),
new_span=Span(
start=replace_token_pos.start(),
end=replace_token_pos.start() + len(chosen_ent),
word=chosen_ent
),
ignore=False
)
]
sample.category = "robustness"

return sample_list


class ConvertAccent(BaseRobustness):
alias_name = ["american_to_british", "british_to_american"]

Expand Down
2 changes: 0 additions & 2 deletions nlptest/transform/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
"add_context",
"add_contractions",
"swap_entities",
"swap_cohyponyms",
"replace_to_male_pronouns",
"replace_to_female_pronouns",
"replace_to_neutral_pronouns"
Expand All @@ -35,7 +34,6 @@
"add_context": 'AddContext',
"add_contractions": 'AddContraction',
"swap_entities": 'SwapEntities',
"swap_cohyponyms": 'SwapCohyponyms',
"replace_to_male_pronouns": "GenderPronounBias",
"replace_to_female_pronouns": "GenderPronounBias",
"replace_to_neutral_pronouns": "GenderPronounBias"
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ numpy
pandas
scikit-learn
transformers
nltk
torch
protobuf<=3.20.0
sentencepiece
Expand Down
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
'wn',
'scikit-learn',
'transformers',
'nltk',
'torch',
'sentencepiece',
'pydantic'
Expand Down

0 comments on commit abb0cce

Please sign in to comment.