Skip to content

Commit

Permalink
add support for ES7
Browse files Browse the repository at this point in the history
  • Loading branch information
philtweir committed Feb 17, 2024
1 parent fc4f0f7 commit 3df11dc
Showing 1 changed file with 16 additions and 12 deletions.
28 changes: 16 additions & 12 deletions python/bonn/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,16 @@
import json
from sortedcontainers import SortedDict
from collections import Counter

from elasticsearch_dsl import Search, Q
from nltk import download
from tqdm import tqdm
from nltk.stem.wordnet import WordNetLemmatizer

from ._bonn import FfModel
from .category_manager import CategoryManager
from .taxonomy import get_taxonomy, taxonomy_to_categories, categories_to_classifier_bow

try:
from elasticsearch2 import Elasticsearch as Elasticsearch2
except ImportError:
Expand All @@ -16,15 +26,6 @@
if not Elasticsearch2:
raise RuntimeError("elasticsearch or elasticsearch2 must be installed") from exc

from elasticsearch_dsl import Search, Q
from nltk import download
from tqdm import tqdm
from nltk.stem.wordnet import WordNetLemmatizer

from ._bonn import FfModel
from .category_manager import CategoryManager
from .taxonomy import get_taxonomy, taxonomy_to_categories, categories_to_classifier_bow

DEFAULT_TAXONOMY_LOCATION = "/app/test_data/taxonomy.json"


Expand All @@ -40,11 +41,14 @@ def get_elasticsearch(settings):
elasticsearch = Elasticsearch2
else:
raise RuntimeError(
"To use Elasticsearch 2.x, you must have elasticsearch_dsl~=2.0 and elasticsearch2 installed"
"To use Elasticsearch 2.x, you must have elasticsearch_dsl~=2.0 "
"and elasticsearch2 installed"
)

if elasticsearch is None:
raise RuntimeError("You must have a version of elasticsearch or elasticsearch2 installed")
raise RuntimeError(
"You must have a version of elasticsearch or elasticsearch2 installed"
)

return elasticsearch

Expand All @@ -65,7 +69,7 @@ def get_datasets(cm, classifier_bow, settings):
# results_df = pd.DataFrame((d.to_dict() for d in s.scan()))
# /businesseconomy../business/activitiespeopel/123745
elasticsearch = get_elasticsearch(settings)
client = ELASTICSEARCH([host])
client = elasticsearch([host])

s = Search(using=client, index=elasticsearch_index).filter(
"bool", must=[Q("exists", field="title")]
Expand Down

0 comments on commit 3df11dc

Please sign in to comment.