Merge branch 'main' into refactoring/legacy_indices_read_only_compatible

javanna · Dec 19, 2024 · a906eff · a906eff
2 parents f943a25 + 84f233a
commit a906eff
Show file tree

Hide file tree

Showing 32 changed files with 891 additions and 317 deletions.
diff --git a/.ci/init.gradle b/.ci/init.gradle
@@ -1,95 +1,3 @@
-import com.bettercloud.vault.VaultConfig
-import com.bettercloud.vault.Vault
-
-initscript {
-  repositories {
-    mavenCentral()
-  }
-  dependencies {
-    classpath 'com.bettercloud:vault-java-driver:4.1.0'
-  }
-}
-
-boolean USE_ARTIFACTORY = false
-
-if (System.getenv('VAULT_ADDR') == null) {
-  // When trying to reproduce errors outside of CI, it can be useful to allow this to just return rather than blowing up
-  if (System.getenv('CI') == null) {
-    return
-  }
-
-  throw new GradleException("You must set the VAULT_ADDR environment variable to use this init script.")
-}
-
-if (System.getenv('VAULT_ROLE_ID') == null && System.getenv('VAULT_SECRET_ID') == null && System.getenv('VAULT_TOKEN') == null) {
-    // When trying to reproduce errors outside of CI, it can be useful to allow this to just return rather than blowing up
-  if (System.getenv('CI') == null) {
-    return
-  }
-
-  throw new GradleException("You must set either the VAULT_ROLE_ID and VAULT_SECRET_ID environment variables, " +
-    "or the VAULT_TOKEN environment variable to use this init script.")
-}
-
-final String vaultPathPrefix = System.getenv('VAULT_ADDR') ==~ /.+vault-ci.+\.dev.*/ ? "secret/ci/elastic-elasticsearch/migrated" : "secret/elasticsearch-ci"
-
-final String vaultToken = System.getenv('VAULT_TOKEN') ?: new Vault(
-  new VaultConfig()
-    .address(System.env.VAULT_ADDR)
-    .engineVersion(1)
-    .build()
-)
-  .withRetries(5, 1000)
-  .auth()
-  .loginByAppRole("approle", System.env.VAULT_ROLE_ID, System.env.VAULT_SECRET_ID)
-  .getAuthClientToken()
-
-final Vault vault = new Vault(
-  new VaultConfig()
-    .address(System.env.VAULT_ADDR)
-    .engineVersion(1)
-    .token(vaultToken)
-    .build()
-)
-  .withRetries(5, 1000)
-
-
-if (USE_ARTIFACTORY) {
-  final Map<String, String> artifactoryCredentials = vault.logical()
-    .read("${vaultPathPrefix}/artifactory.elstc.co")
-    .getData()
-  logger.info("Using elastic artifactory repos")
-  Closure configCache = {
-    return {
-      name "artifactory-gradle-release"
-      url "https://artifactory.elstc.co/artifactory/gradle-release"
-      credentials {
-        username artifactoryCredentials.get("username")
-        password artifactoryCredentials.get("token")
-      }
-    }
-  }
-  settingsEvaluated { settings ->
-    settings.pluginManagement {
-      repositories {
-        maven configCache()
-      }
-    }
-  }
-  projectsLoaded {
-    allprojects {
-      buildscript {
-        repositories {
-          maven configCache()
-        }
-      }
-      repositories {
-        maven configCache()
-      }
-    }
-  }
-}
-
 gradle.settingsEvaluated { settings ->
   settings.pluginManager.withPlugin("com.gradle.develocity") {
     settings.develocity {
@@ -98,14 +6,10 @@ gradle.settingsEvaluated { settings ->
   }
 }
 
-
 final String buildCacheUrl = System.getProperty('org.elasticsearch.build.cache.url')
 final boolean buildCachePush = Boolean.valueOf(System.getProperty('org.elasticsearch.build.cache.push', 'false'))
 
 if (buildCacheUrl) {
-  final Map<String, String> buildCacheCredentials = System.getenv("GRADLE_BUILD_CACHE_USERNAME") ? [:] : vault.logical()
-    .read("${vaultPathPrefix}/gradle-build-cache")
-    .getData()
   gradle.settingsEvaluated { settings ->
     settings.buildCache {
       local {
@@ -116,11 +20,10 @@ if (buildCacheUrl) {
         url = buildCacheUrl
         push = buildCachePush
         credentials {
-          username = System.getenv("GRADLE_BUILD_CACHE_USERNAME") ?: buildCacheCredentials.get("username")
-          password = System.getenv("GRADLE_BUILD_CACHE_PASSWORD") ?: buildCacheCredentials.get("password")
+          username = System.getenv("GRADLE_BUILD_CACHE_USERNAME")
+          password = System.getenv("GRADLE_BUILD_CACHE_PASSWORD")
         }
       }
     }
   }
 }
-
diff --git a/docs/changelog/118931.yaml b/docs/changelog/118931.yaml
@@ -0,0 +1,6 @@
+pr: 118931
+summary: Add a `LicenseAware` interface for licensed Nodes
+area: ES|QL
+type: enhancement
+issues:
+ - 117405
diff --git a/docs/changelog/118941.yaml b/docs/changelog/118941.yaml
@@ -0,0 +1,5 @@
+pr: 118941
+summary: Allow archive and searchable snapshots indices in N-2 version
+area: Recovery
+type: enhancement
+issues: []
diff --git a/docs/reference/troubleshooting/common-issues/task-queue-backlog.asciidoc b/docs/reference/troubleshooting/common-issues/task-queue-backlog.asciidoc
@@ -1,103 +1,149 @@
 [[task-queue-backlog]]
-=== Task queue backlog
+=== Backlogged task queue
 
-A backlogged task queue can prevent tasks from completing and put the cluster
-into an unhealthy state. Resource constraints, a large number of tasks being
-triggered at once, and long running tasks can all contribute to a backlogged
-task queue.
+*******************************
+*Product:* Elasticsearch +
+*Deployment type:* Elastic Cloud Enterprise, Elastic Cloud Hosted, Elastic Cloud on Kubernetes, Elastic Self-Managed  +
+*Versions:* All
+*******************************
+
+A backlogged task queue can prevent tasks from completing and lead to an 
+unhealthy cluster state. Contributing factors include resource constraints, 
+a large number of tasks triggered at once, and long-running tasks.
 
 [discrete]
 [[diagnose-task-queue-backlog]]
-==== Diagnose a task queue backlog
+==== Diagnose a backlogged task queue
+
+To identify the cause of the backlog, try these diagnostic actions.
 
-**Check the thread pool status**
+* <<diagnose-task-queue-thread-pool>>
+* <<diagnose-task-queue-hot-thread>>
+* <<diagnose-task-queue-long-running-node-tasks>>
+* <<diagnose-task-queue-long-running-cluster-tasks>>
+
+[discrete]
+[[diagnose-task-queue-thread-pool]]
+===== Check the thread pool status
 
 A <<high-cpu-usage,depleted thread pool>> can result in
 <<rejected-requests,rejected requests>>. 
 
-Thread pool depletion might be restricted to a specific <<data-tiers,data tier>>. If <<hotspotting,hot spotting>> is occuring, one node might experience depletion faster than other nodes, leading to performance issues and a growing task backlog.
-
-You can use the <<cat-thread-pool,cat thread pool API>> to see the number of
-active threads in each thread pool and how many tasks are queued, how many
-have been rejected, and how many have completed.
+Use the <<cat-thread-pool,cat thread pool API>> to monitor
+active threads, queued tasks, rejections, and completed tasks:
 
 [source,console]
 ----
 GET /_cat/thread_pool?v&s=t,n&h=type,name,node_name,active,queue,rejected,completed
 ----
 
-The `active` and `queue` statistics are instantaneous while the `rejected` and
-`completed` statistics are cumulative from node startup.
+* Look for high `active` and `queue` metrics, which indicate potential bottlenecks
+and opportunities to <<reduce-cpu-usage,reduce CPU usage>>.
+* Determine whether thread pool issues are specific to a <<data-tiers,data tier>>.
+* Check whether a specific node's thread pool is depleting faster than others. This
+might indicate <<resolve-task-queue-backlog-hotspotting, hot spotting>>.
 
-**Inspect the hot threads on each node**
+[discrete]
+[[diagnose-task-queue-hot-thread]]
+===== Inspect hot threads on each node
 
-If a particular thread pool queue is backed up, you can periodically poll the
-<<cluster-nodes-hot-threads,Nodes hot threads>> API to determine if the thread
-has sufficient resources to progress and gauge how quickly it is progressing.
+If a particular thread pool queue is backed up, periodically poll the
+<<cluster-nodes-hot-threads,nodes hot threads API>> to gauge the thread's
+progression and ensure it has sufficient resources:
 
 [source,console]
 ----
 GET /_nodes/hot_threads
 ----
 
-**Look for long running node tasks**
+Although the hot threads API response does not list the specific tasks running on a thread, 
+it provides a summary of the thread's activities. You can correlate a hot threads response 
+with a <<tasks,task management API response>> to identify any overlap with specific tasks. For 
+example, if the hot threads response indicates the thread is `performing a search query`, you can 
+<<diagnose-task-queue-long-running-node-tasks,check for long-running search tasks>> using the task management API.
+
+[discrete]
+[[diagnose-task-queue-long-running-node-tasks]]
+===== Identify long-running node tasks
 
-Long-running tasks can also cause a backlog. You can use the <<tasks,task
-management>> API to get information about the node tasks that are running.
-Check the `running_time_in_nanos` to identify tasks that are taking an
-excessive amount of time to complete.
+Long-running tasks can also cause a backlog. Use the <<tasks,task
+management API>> to check for excessive `running_time_in_nanos` values:
 
 [source,console]
 ----
 GET /_tasks?pretty=true&human=true&detailed=true
 ----
 
-If a particular `action` is suspected, you can filter the tasks further. The most common long-running tasks are <<docs-bulk,bulk index>>- or search-related.
+You can filter on a specific `action`, such as <<docs-bulk,bulk indexing>> or search-related tasks.
+These tend to be long-running.
 
-* Filter for <<docs-bulk,bulk index>> actions:
+* Filter on <<docs-bulk,bulk index>> actions:
 +
 [source,console]
 ----
 GET /_tasks?human&detailed&actions=indices:data/write/bulk
 ----
 
-* Filter for search actions:
+* Filter on search actions:
 +
 [source,console]
 ----
 GET /_tasks?human&detailed&actions=indices:data/write/search
 ----
 
-The API response may contain additional tasks columns, including `description` and `header`, which provides the task parameters, target, and requestor. You can use this information to perform further diagnosis.
+Long-running tasks might need to be <<resolve-task-queue-backlog-stuck-tasks,canceled>>.
 
-**Look for long running cluster tasks**
+[discrete]
+[[diagnose-task-queue-long-running-cluster-tasks]]
+===== Look for long-running cluster tasks
 
-A task backlog might also appear as a delay in synchronizing the cluster state. You
-can use the <<cluster-pending,cluster pending tasks API>> to get information
-about the pending cluster state sync tasks that are running. 
+Use the <<cluster-pending,cluster pending tasks API>> to identify delays
+in cluster state synchronization: 
 
 [source,console]
 ----
 GET /_cluster/pending_tasks
 ----
 
-Check the `timeInQueue` to identify tasks that are taking an excessive amount 
-of time to complete.
+Tasks with a high `timeInQueue` value are likely contributing to the backlog and might
+need to be <<resolve-task-queue-backlog-stuck-tasks,canceled>>.
 
 [discrete]
 [[resolve-task-queue-backlog]]
-==== Resolve a task queue backlog
+==== Recommendations
+
+After identifying problematic threads and tasks, resolve the issue by increasing resources or canceling tasks.
 
-**Increase available resources** 
+[discrete]
+[[resolve-task-queue-backlog-resources]]
+===== Increase available resources
 
-If tasks are progressing slowly and the queue is backing up, 
-you might need to take steps to <<reduce-cpu-usage>>. 
+If tasks are progressing slowly, try <<reduce-cpu-usage,reducing CPU usage>>.
 
-In some cases, increasing the thread pool size might help.
-For example, the `force_merge` thread pool defaults to a single thread.
+In some cases, you might need to increase the thread pool size. For example, the `force_merge` thread pool defaults to a single thread. 
 Increasing the size to 2 might help reduce a backlog of force merge requests.
 
-**Cancel stuck tasks**
+[discrete]
+[[resolve-task-queue-backlog-stuck-tasks]]
+===== Cancel stuck tasks
+
+If an active task's <<diagnose-task-queue-hot-thread,hot thread>> shows no progress, consider <<task-cancellation,canceling the task>>.
+
+[discrete]
+[[resolve-task-queue-backlog-hotspotting]]
+===== Address hot spotting
+
+If a specific node's thread pool is depleting faster than others, try addressing
+uneven node resource utilization, also known as hot spotting.
+For details on actions you can take, such as rebalancing shards, see <<hotspotting>>.
+
+[discrete]
+==== Resources
+
+Related symptoms:
+
+* <<high-cpu-usage>>
+* <<rejected-requests>>
+* <<hotspotting>>
 
-If you find the active task's hot thread isn't progressing and there's a backlog, 
-consider canceling the task. 
+// TODO add link to standard Additional resources when that topic exists
diff --git a/muted-tests.yml b/muted-tests.yml
@@ -300,6 +300,14 @@ tests:
   issue: https://github.com/elastic/elasticsearch/issues/115727
 - class: org.elasticsearch.xpack.security.authc.kerberos.KerberosAuthenticationIT
   issue: https://github.com/elastic/elasticsearch/issues/118414
+- class: org.elasticsearch.xpack.esql.qa.multi_node.EsqlClientYamlIT
+  issue: https://github.com/elastic/elasticsearch/issues/119086
+- class: org.elasticsearch.xpack.spatial.index.query.ShapeQueryBuilderOverShapeTests
+  method: testToQuery
+  issue: https://github.com/elastic/elasticsearch/issues/119090
+- class: org.elasticsearch.xpack.spatial.index.query.GeoShapeQueryBuilderGeoShapeTests
+  method: testToQuery
+  issue: https://github.com/elastic/elasticsearch/issues/119091
 
 # Examples:
 #