From 92ada70d3c3675af8a78225aaad2b6c9509f4b8e Mon Sep 17 00:00:00 2001 From: Stephen Dodson Date: Sat, 15 Feb 2020 15:11:22 +0000 Subject: [PATCH 1/4] Updating test matrix for 7.6 + removing oss for now. --- .ci/test-matrix.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.ci/test-matrix.yml b/.ci/test-matrix.yml index 9b35c842..4dc647f8 100755 --- a/.ci/test-matrix.yml +++ b/.ci/test-matrix.yml @@ -2,10 +2,9 @@ ELASTICSEARCH_VERSION: - 8.0.0-SNAPSHOT - - 7.5-SNAPSHOT + - 7.6-SNAPSHOT TEST_SUITE: - - oss - xpack PYTHON_VERSION: From a726f409a8d1c1c3855a5553a0fd874c11482fb5 Mon Sep 17 00:00:00 2001 From: Stephen Dodson Date: Sat, 15 Feb 2020 18:36:35 +0000 Subject: [PATCH 2/4] Resolving 7.6.0 docs issues --- docs/requirements-docs.txt | 2 ++ make_docs.sh | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/requirements-docs.txt b/docs/requirements-docs.txt index 8ad6cd05..0cb59676 100644 --- a/docs/requirements-docs.txt +++ b/docs/requirements-docs.txt @@ -5,3 +5,5 @@ pytest>=5.2.1 git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master numpydoc>=0.9.0 nbsphinx +scikit-learn +xgboost diff --git a/make_docs.sh b/make_docs.sh index 0549a73a..98338e20 100644 --- a/make_docs.sh +++ b/make_docs.sh @@ -2,8 +2,8 @@ python setup.py install -#jupyter nbconvert --to notebook --inplace --execute docs/source/examples/demo_notebook.ipynb -#jupyter nbconvert --to notebook --inplace --execute docs/source/examples/online_retail_analysis.ipynb +jupyter nbconvert --to notebook --inplace --execute docs/source/examples/demo_notebook.ipynb +jupyter nbconvert --to notebook --inplace --execute docs/source/examples/online_retail_analysis.ipynb cd docs From 47928b571eac4610905422e51a8756bb09307d69 Mon Sep 17 00:00:00 2001 From: Stephen Dodson Date: Sat, 15 Feb 2020 18:51:00 +0000 Subject: [PATCH 3/4] Updating ML docs --- docs/source/reference/ml.rst | 12 ++++++++++++ eland/ml/external_ml_model.py | 8 ++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/docs/source/reference/ml.rst b/docs/source/reference/ml.rst index ba632775..bcb44bd9 100644 --- a/docs/source/reference/ml.rst +++ b/docs/source/reference/ml.rst @@ -5,6 +5,18 @@ Machine Learning ================ .. currentmodule:: eland.ml +Machine learning is built into the Elastic Stack and enables users to gain insights into their Elasticsearch data. +There are a wide range of capabilities from identifying in +anomalies in your data, to training and deploying regression or classification models based on Elasticsearch data. + +To use the Elastic Stack machine learning features, you must have the appropriate license and at least one machine +learning node in your Elasticsearch cluster. If Elastic Stack security features are enabled, you must also ensure +your users have the necessary privileges. + +The fastest way to get started with machine learning features is to start a free 14-day trial of Elasticsearch Service in the cloud. + +See https://www.elastic.co/guide/en/machine-learning/current/setup.html and other documentation for more detail. + ExternalMLModel ~~~~~~~~~~~~~~~ .. currentmodule:: eland.ml diff --git a/eland/ml/external_ml_model.py b/eland/ml/external_ml_model.py index 286dd7ce..b30a4598 100644 --- a/eland/ml/external_ml_model.py +++ b/eland/ml/external_ml_model.py @@ -30,8 +30,8 @@ class ExternalMLModel(MLModel): """ - Put a trained inference model in Elasticsearch based on an external model. - An external model that is transformed and added to Elasticsearch. + Transform and serialize a trained 3rd party model into Elasticsearch. + This model can then be used for inference in the Elastic Stack. Parameters ---------- @@ -152,9 +152,9 @@ def __init__(self, def predict(self, X): """ - Make a prediction using a trained inference model in Elasticsearch. + Make a prediction using a trained model stored in Elasticsearch. - Parameters for this method are not fully compatible with standard sklearn.predict. + Parameters for this method are not yet fully compatible with standard sklearn.predict. Parameters ---------- From 3f3eaae4f1fe22149502dde041abfd434faf6ac5 Mon Sep 17 00:00:00 2001 From: Stephen Dodson Date: Fri, 28 Feb 2020 12:39:04 +0000 Subject: [PATCH 4/4] Fixing too_long_frame_exception in scan/scroll --- eland/operations.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/eland/operations.py b/eland/operations.py index 2ab7a5ee..a5d4d760 100644 --- a/eland/operations.py +++ b/eland/operations.py @@ -591,7 +591,17 @@ def _es_results(self, query_compiler, collector): # Only return requested field_names _source = query_compiler.get_field_names(include_scripted_fields=False) - if not _source: + if _source: + # For query_compiler._client.search we could add _source + # as a parameter, or add this value in body. + # + # If _source is a parameter it is encoded into to the url. + # + # If _source is a large number of fields (1000+) then this can result in an + # extremely long url and a `too_long_frame_exception`. Therefore, add + # _source to the body rather than as a _source parameter + body['_source'] = _source + else: _source = False es_results = None @@ -602,16 +612,7 @@ def _es_results(self, query_compiler, collector): if size is not None and size <= DEFAULT_ES_MAX_RESULT_WINDOW: if size > 0: try: - # For query_compiler._client.search we could add _source - # as a parameter, or add this value in body. - # - # If _source is a parameter it is encoded into to the url. - # - # If _source is a large number of fields (1000+) then this can result in an - # extremely long url and a `too_long_frame_exception`. Therefore, add - # _source to the body rather than as a _source parameter - if _source: - body['_source'] = _source + es_results = query_compiler._client.search( index=query_compiler._index_pattern, @@ -624,8 +625,7 @@ def _es_results(self, query_compiler, collector): 'index': query_compiler._index_pattern, 'size': size, 'sort': sort_params, - 'body': body, - '_source': _source + 'body': body } print("Elasticsearch error:", error) raise @@ -633,8 +633,7 @@ def _es_results(self, query_compiler, collector): is_scan = True es_results = query_compiler._client.scan( index=query_compiler._index_pattern, - query=body, - _source=_source) + query=body) # create post sort if sort_params is not None: post_processing.append(SortFieldAction(sort_params))