apache · potiuk · Aug 8, 2023 · Aug 5, 2023 · Aug 5, 2023 · Aug 5, 2023
diff --git a/airflow/config_templates/config.yml b/airflow/config_templates/config.yml
@@ -2379,7 +2379,7 @@ elasticsearch:
 elasticsearch_configs:
   description: ~
   options:
-    use_ssl:
+    http_compress:
       description: ~
       version_added: 1.10.5
       type: string

diff --git a/airflow/providers/elasticsearch/log/es_task_handler.py b/airflow/providers/elasticsearch/log/es_task_handler.py
@@ -30,7 +30,7 @@
 # Using `from elasticsearch import *` would break elasticsearch mocking used in unit test.
 import elasticsearch
 import pendulum
-from elasticsearch.exceptions import ElasticsearchException, NotFoundError
+from elasticsearch.exceptions import NotFoundError
 
 from airflow.configuration import conf
 from airflow.exceptions import AirflowProviderDeprecationWarning
@@ -89,7 +89,7 @@ def __init__(
         json_fields: str,
         host_field: str = "host",
         offset_field: str = "offset",
-        host: str = "localhost:9200",
+        host: str = "http://localhost:9200",
         frontend: str = "localhost:5601",
         index_patterns: str | None = conf.get("elasticsearch", "index_patterns", fallback="_all"),
         es_kwargs: dict | None = conf.getsection("elasticsearch_configs"),
@@ -101,8 +101,8 @@ def __init__(
         super().__init__(base_log_folder, filename_template)
         self.closed = False
 
-        self.client = elasticsearch.Elasticsearch(host.split(";"), **es_kwargs)  # type: ignore[attr-defined]
-
+        self.client = elasticsearch.Elasticsearch(host, **es_kwargs)  # type: ignore[attr-defined]
+        # in airflow.cfg, host of elasticsearch has to be http://dockerhostXxxx:9200
         if USE_PER_RUN_LOG_ID and log_id_template is not None:
             warnings.warn(
                 "Passing log_id_template to ElasticsearchTaskHandler is deprecated and has no effect",
@@ -292,27 +292,24 @@ def es_read(self, log_id: str, offset: int | str, metadata: dict) -> list | Elas
         }
 
         try:
-            max_log_line = self.client.count(index=self.index_patterns, body=query)["count"]
+            max_log_line = self.client.count(index=self.index_patterns, body=query)["count"]  # type: ignore
         except NotFoundError as e:
             self.log.exception("The target index pattern %s does not exist", self.index_patterns)
             raise e
-        except ElasticsearchException as e:
-            self.log.exception("Could not get current log size with log_id: %s", log_id)
-            raise e
 
         logs: list[Any] | ElasticSearchResponse = []
         if max_log_line != 0:
             try:
                 query.update({"sort": [self.offset_field]})
-                res = self.client.search(
+                res = self.client.search(  # type: ignore
                     index=self.index_patterns,
                     body=query,
                     size=self.MAX_LINE_PER_PAGE,
                     from_=self.MAX_LINE_PER_PAGE * self.PAGE,
                 )
                 logs = ElasticSearchResponse(self, res)
-            except elasticsearch.exceptions.ElasticsearchException:
-                self.log.exception("Could not read log with log_id: %s", log_id)
+            except Exception as err:
+                self.log.exception("Could not read log with log_id: %s. Exception: %s", log_id, err)
 
         return logs
 

diff --git a/airflow/providers/elasticsearch/provider.yaml b/airflow/providers/elasticsearch/provider.yaml
@@ -23,6 +23,7 @@ description: |
 
 suspended: false
 versions:
+  - 5.1.0
   - 5.0.0
   - 4.5.1
   - 4.5.0
@@ -53,7 +54,7 @@ versions:
 dependencies:
   - apache-airflow>=2.4.0
   - apache-airflow-providers-common-sql>=1.3.1
-  - elasticsearch>7,<7.15.0
+  - elasticsearch>8,<9
 
 integrations:
   - integration-name: Elasticsearch

diff --git a/generated/provider_dependencies.json b/generated/provider_dependencies.json
@@ -358,7 +358,7 @@
     "deps": [
       "apache-airflow-providers-common-sql>=1.3.1",
       "apache-airflow>=2.4.0",
-      "elasticsearch>7,<7.15.0"
+      "elasticsearch>8,<9"
     ],
     "cross-providers-deps": [
       "common.sql"

diff --git a/tests/providers/elasticsearch/log/elasticmock/__init__.py b/tests/providers/elasticsearch/log/elasticmock/__init__.py
@@ -41,17 +41,55 @@
 """Elastic mock module used for testing"""
 from functools import wraps
 from unittest.mock import patch
-
-from elasticsearch.client.utils import _normalize_hosts
+from urllib.parse import unquote, urlparse
 
 from .fake_elasticsearch import FakeElasticsearch
 
 ELASTIC_INSTANCES: dict[str, FakeElasticsearch] = {}
 
 
+def _normalize_hosts(hosts):
+    """
+    Helper function to transform hosts argument to
+    :class:`~elasticsearch.Elasticsearch` to a list of dicts.
+    """
+    # if hosts are empty, just defer to defaults down the line
+    if hosts is None:
+        return [{}]
+
+    hosts = [hosts]
+
+    out = []
+
+    for host in hosts:
+        if "://" not in host:
+            host = f"//{host}"
+
+        parsed_url = urlparse(host)
+        h = {"host": parsed_url.hostname}
+
+        if parsed_url.port:
+            h["port"] = parsed_url.port
+
+        if parsed_url.scheme == "https":
+            h["port"] = parsed_url.port or 443
+            h["use_ssl"] = True
+
+        if parsed_url.username or parsed_url.password:
+            h["http_auth"] = f"{unquote(parsed_url.username)}:{unquote(parsed_url.password)}"
+
+        if parsed_url.path and parsed_url.path != "/":
+            h["url_prefix"] = parsed_url.path
+
+        out.append(h)
+    else:
+        out.append(host)
+    return out
+
+
 def _get_elasticmock(hosts=None, *args, **kwargs):
     host = _normalize_hosts(hosts)[0]
-    elastic_key = f"{host.get('host', 'localhost')}:{host.get('port', 9200)}"
+    elastic_key = f"http://{host.get('host', 'localhost')}:{host.get('port', 9200)}"
 
     if elastic_key in ELASTIC_INSTANCES:
         connection = ELASTIC_INSTANCES.get(elastic_key)

diff --git a/tests/providers/elasticsearch/log/elasticmock/fake_elasticsearch.py b/tests/providers/elasticsearch/log/elasticmock/fake_elasticsearch.py
@@ -20,10 +20,9 @@
 import json
 
 from elasticsearch import Elasticsearch
-from elasticsearch.client.utils import query_params
 from elasticsearch.exceptions import NotFoundError
 
-from .utilities import get_random_id
+from .utilities import MissingIndexException, get_random_id, query_params
 
 #
 # The MIT License (MIT)
@@ -53,7 +52,7 @@ class FakeElasticsearch(Elasticsearch):
     __documents_dict = None
 
     def __init__(self):
-        super().__init__()
+        super().__init__("http://localhost:9200")
         self.__documents_dict = {}
 
     @query_params()
@@ -327,9 +326,8 @@ def get_source(self, index, doc_type, id, params=None):
         "version",
     )
     def count(self, index=None, doc_type=None, body=None, params=None, headers=None):
-        searchable_indexes = self._normalize_index_to_list(index)
+        searchable_indexes = self._normalize_index_to_list(index, body)
         searchable_doc_types = self._normalize_doc_type_to_list(doc_type)
-
         i = 0
         for searchable_index in searchable_indexes:
             for document in self.__documents_dict[searchable_index]:
@@ -376,7 +374,7 @@ def count(self, index=None, doc_type=None, body=None, params=None, headers=None)
         "version",
     )
     def search(self, index=None, doc_type=None, body=None, params=None, headers=None):
-        searchable_indexes = self._normalize_index_to_list(index)
+        searchable_indexes = self._normalize_index_to_list(index, body)
 
         matches = self._find_match(index, doc_type, body)
 
@@ -446,7 +444,7 @@ def suggest(self, body, index=None):
         return result_dict
 
     def _find_match(self, index, doc_type, body):
-        searchable_indexes = self._normalize_index_to_list(index)
+        searchable_indexes = self._normalize_index_to_list(index, body)
         searchable_doc_types = self._normalize_doc_type_to_list(doc_type)
 
         must = body["query"]["bool"]["must"][0]  # only support one must
@@ -477,19 +475,20 @@ def match_must_phrase(document, matches, must):
                     matches.append(document)
 
     # Check index(es) exists.
-    def _validate_search_targets(self, targets):
+    def _validate_search_targets(self, targets, body):
         # TODO: support allow_no_indices query parameter
         matches = set()
         for target in targets:
+            print(f"Loop over:::target = {target}")
             if target == "_all" or target == "":
                 matches.update(self.__documents_dict)
             elif "*" in target:
                 matches.update(fnmatch.filter(self.__documents_dict, target))
             elif target not in self.__documents_dict:
-                raise NotFoundError(404, f"IndexMissingException[[{target}] missing]")
+                raise MissingIndexException(msg=f"IndexMissingException[[{target}] missing]", body=body)
         return matches
 
-    def _normalize_index_to_list(self, index):
+    def _normalize_index_to_list(self, index, body):
         # Ensure to have a list of index
         if index is None:
             searchable_indexes = self.__documents_dict.keys()
@@ -501,11 +500,8 @@ def _normalize_index_to_list(self, index):
             # Is it the correct exception to use ?
             raise ValueError("Invalid param 'index'")
 
-        return list(
-            self._validate_search_targets(
-                target for index in searchable_indexes for target in index.split(",")
-            )
-        )
+        generator = (target for index in searchable_indexes for target in index.split(","))
+        return list(self._validate_search_targets(generator, body))
 
     @staticmethod
     def _normalize_doc_type_to_list(doc_type):