Skip to content

Commit

Permalink
Merge branch 'main' into refactoring/timestamp_range_unknown_updatev9
Browse files Browse the repository at this point in the history
  • Loading branch information
javanna authored Dec 19, 2024
2 parents 6472d16 + c808137 commit d1399d7
Show file tree
Hide file tree
Showing 14 changed files with 325 additions and 179 deletions.
101 changes: 2 additions & 99 deletions .ci/init.gradle
Original file line number Diff line number Diff line change
@@ -1,95 +1,3 @@
import com.bettercloud.vault.VaultConfig
import com.bettercloud.vault.Vault

initscript {
repositories {
mavenCentral()
}
dependencies {
classpath 'com.bettercloud:vault-java-driver:4.1.0'
}
}

boolean USE_ARTIFACTORY = false

if (System.getenv('VAULT_ADDR') == null) {
// When trying to reproduce errors outside of CI, it can be useful to allow this to just return rather than blowing up
if (System.getenv('CI') == null) {
return
}

throw new GradleException("You must set the VAULT_ADDR environment variable to use this init script.")
}

if (System.getenv('VAULT_ROLE_ID') == null && System.getenv('VAULT_SECRET_ID') == null && System.getenv('VAULT_TOKEN') == null) {
// When trying to reproduce errors outside of CI, it can be useful to allow this to just return rather than blowing up
if (System.getenv('CI') == null) {
return
}

throw new GradleException("You must set either the VAULT_ROLE_ID and VAULT_SECRET_ID environment variables, " +
"or the VAULT_TOKEN environment variable to use this init script.")
}

final String vaultPathPrefix = System.getenv('VAULT_ADDR') ==~ /.+vault-ci.+\.dev.*/ ? "secret/ci/elastic-elasticsearch/migrated" : "secret/elasticsearch-ci"

final String vaultToken = System.getenv('VAULT_TOKEN') ?: new Vault(
new VaultConfig()
.address(System.env.VAULT_ADDR)
.engineVersion(1)
.build()
)
.withRetries(5, 1000)
.auth()
.loginByAppRole("approle", System.env.VAULT_ROLE_ID, System.env.VAULT_SECRET_ID)
.getAuthClientToken()

final Vault vault = new Vault(
new VaultConfig()
.address(System.env.VAULT_ADDR)
.engineVersion(1)
.token(vaultToken)
.build()
)
.withRetries(5, 1000)


if (USE_ARTIFACTORY) {
final Map<String, String> artifactoryCredentials = vault.logical()
.read("${vaultPathPrefix}/artifactory.elstc.co")
.getData()
logger.info("Using elastic artifactory repos")
Closure configCache = {
return {
name "artifactory-gradle-release"
url "https://artifactory.elstc.co/artifactory/gradle-release"
credentials {
username artifactoryCredentials.get("username")
password artifactoryCredentials.get("token")
}
}
}
settingsEvaluated { settings ->
settings.pluginManagement {
repositories {
maven configCache()
}
}
}
projectsLoaded {
allprojects {
buildscript {
repositories {
maven configCache()
}
}
repositories {
maven configCache()
}
}
}
}

gradle.settingsEvaluated { settings ->
settings.pluginManager.withPlugin("com.gradle.develocity") {
settings.develocity {
Expand All @@ -98,14 +6,10 @@ gradle.settingsEvaluated { settings ->
}
}


final String buildCacheUrl = System.getProperty('org.elasticsearch.build.cache.url')
final boolean buildCachePush = Boolean.valueOf(System.getProperty('org.elasticsearch.build.cache.push', 'false'))

if (buildCacheUrl) {
final Map<String, String> buildCacheCredentials = System.getenv("GRADLE_BUILD_CACHE_USERNAME") ? [:] : vault.logical()
.read("${vaultPathPrefix}/gradle-build-cache")
.getData()
gradle.settingsEvaluated { settings ->
settings.buildCache {
local {
Expand All @@ -116,11 +20,10 @@ if (buildCacheUrl) {
url = buildCacheUrl
push = buildCachePush
credentials {
username = System.getenv("GRADLE_BUILD_CACHE_USERNAME") ?: buildCacheCredentials.get("username")
password = System.getenv("GRADLE_BUILD_CACHE_PASSWORD") ?: buildCacheCredentials.get("password")
username = System.getenv("GRADLE_BUILD_CACHE_USERNAME")
password = System.getenv("GRADLE_BUILD_CACHE_PASSWORD")
}
}
}
}
}

6 changes: 6 additions & 0 deletions docs/changelog/118931.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 118931
summary: Add a `LicenseAware` interface for licensed Nodes
area: ES|QL
type: enhancement
issues:
- 117405
Original file line number Diff line number Diff line change
@@ -1,103 +1,149 @@
[[task-queue-backlog]]
=== Task queue backlog
=== Backlogged task queue

A backlogged task queue can prevent tasks from completing and put the cluster
into an unhealthy state. Resource constraints, a large number of tasks being
triggered at once, and long running tasks can all contribute to a backlogged
task queue.
*******************************
*Product:* Elasticsearch +
*Deployment type:* Elastic Cloud Enterprise, Elastic Cloud Hosted, Elastic Cloud on Kubernetes, Elastic Self-Managed +
*Versions:* All
*******************************

A backlogged task queue can prevent tasks from completing and lead to an
unhealthy cluster state. Contributing factors include resource constraints,
a large number of tasks triggered at once, and long-running tasks.

[discrete]
[[diagnose-task-queue-backlog]]
==== Diagnose a task queue backlog
==== Diagnose a backlogged task queue

To identify the cause of the backlog, try these diagnostic actions.

**Check the thread pool status**
* <<diagnose-task-queue-thread-pool>>
* <<diagnose-task-queue-hot-thread>>
* <<diagnose-task-queue-long-running-node-tasks>>
* <<diagnose-task-queue-long-running-cluster-tasks>>

[discrete]
[[diagnose-task-queue-thread-pool]]
===== Check the thread pool status

A <<high-cpu-usage,depleted thread pool>> can result in
<<rejected-requests,rejected requests>>.

Thread pool depletion might be restricted to a specific <<data-tiers,data tier>>. If <<hotspotting,hot spotting>> is occuring, one node might experience depletion faster than other nodes, leading to performance issues and a growing task backlog.

You can use the <<cat-thread-pool,cat thread pool API>> to see the number of
active threads in each thread pool and how many tasks are queued, how many
have been rejected, and how many have completed.
Use the <<cat-thread-pool,cat thread pool API>> to monitor
active threads, queued tasks, rejections, and completed tasks:

[source,console]
----
GET /_cat/thread_pool?v&s=t,n&h=type,name,node_name,active,queue,rejected,completed
----

The `active` and `queue` statistics are instantaneous while the `rejected` and
`completed` statistics are cumulative from node startup.
* Look for high `active` and `queue` metrics, which indicate potential bottlenecks
and opportunities to <<reduce-cpu-usage,reduce CPU usage>>.
* Determine whether thread pool issues are specific to a <<data-tiers,data tier>>.
* Check whether a specific node's thread pool is depleting faster than others. This
might indicate <<resolve-task-queue-backlog-hotspotting, hot spotting>>.

**Inspect the hot threads on each node**
[discrete]
[[diagnose-task-queue-hot-thread]]
===== Inspect hot threads on each node

If a particular thread pool queue is backed up, you can periodically poll the
<<cluster-nodes-hot-threads,Nodes hot threads>> API to determine if the thread
has sufficient resources to progress and gauge how quickly it is progressing.
If a particular thread pool queue is backed up, periodically poll the
<<cluster-nodes-hot-threads,nodes hot threads API>> to gauge the thread's
progression and ensure it has sufficient resources:

[source,console]
----
GET /_nodes/hot_threads
----

**Look for long running node tasks**
Although the hot threads API response does not list the specific tasks running on a thread,
it provides a summary of the thread's activities. You can correlate a hot threads response
with a <<tasks,task management API response>> to identify any overlap with specific tasks. For
example, if the hot threads response indicates the thread is `performing a search query`, you can
<<diagnose-task-queue-long-running-node-tasks,check for long-running search tasks>> using the task management API.

[discrete]
[[diagnose-task-queue-long-running-node-tasks]]
===== Identify long-running node tasks

Long-running tasks can also cause a backlog. You can use the <<tasks,task
management>> API to get information about the node tasks that are running.
Check the `running_time_in_nanos` to identify tasks that are taking an
excessive amount of time to complete.
Long-running tasks can also cause a backlog. Use the <<tasks,task
management API>> to check for excessive `running_time_in_nanos` values:

[source,console]
----
GET /_tasks?pretty=true&human=true&detailed=true
----

If a particular `action` is suspected, you can filter the tasks further. The most common long-running tasks are <<docs-bulk,bulk index>>- or search-related.
You can filter on a specific `action`, such as <<docs-bulk,bulk indexing>> or search-related tasks.
These tend to be long-running.

* Filter for <<docs-bulk,bulk index>> actions:
* Filter on <<docs-bulk,bulk index>> actions:
+
[source,console]
----
GET /_tasks?human&detailed&actions=indices:data/write/bulk
----

* Filter for search actions:
* Filter on search actions:
+
[source,console]
----
GET /_tasks?human&detailed&actions=indices:data/write/search
----

The API response may contain additional tasks columns, including `description` and `header`, which provides the task parameters, target, and requestor. You can use this information to perform further diagnosis.
Long-running tasks might need to be <<resolve-task-queue-backlog-stuck-tasks,canceled>>.

**Look for long running cluster tasks**
[discrete]
[[diagnose-task-queue-long-running-cluster-tasks]]
===== Look for long-running cluster tasks

A task backlog might also appear as a delay in synchronizing the cluster state. You
can use the <<cluster-pending,cluster pending tasks API>> to get information
about the pending cluster state sync tasks that are running.
Use the <<cluster-pending,cluster pending tasks API>> to identify delays
in cluster state synchronization:

[source,console]
----
GET /_cluster/pending_tasks
----

Check the `timeInQueue` to identify tasks that are taking an excessive amount
of time to complete.
Tasks with a high `timeInQueue` value are likely contributing to the backlog and might
need to be <<resolve-task-queue-backlog-stuck-tasks,canceled>>.

[discrete]
[[resolve-task-queue-backlog]]
==== Resolve a task queue backlog
==== Recommendations

After identifying problematic threads and tasks, resolve the issue by increasing resources or canceling tasks.

**Increase available resources**
[discrete]
[[resolve-task-queue-backlog-resources]]
===== Increase available resources

If tasks are progressing slowly and the queue is backing up,
you might need to take steps to <<reduce-cpu-usage>>.
If tasks are progressing slowly, try <<reduce-cpu-usage,reducing CPU usage>>.

In some cases, increasing the thread pool size might help.
For example, the `force_merge` thread pool defaults to a single thread.
In some cases, you might need to increase the thread pool size. For example, the `force_merge` thread pool defaults to a single thread.
Increasing the size to 2 might help reduce a backlog of force merge requests.

**Cancel stuck tasks**
[discrete]
[[resolve-task-queue-backlog-stuck-tasks]]
===== Cancel stuck tasks

If an active task's <<diagnose-task-queue-hot-thread,hot thread>> shows no progress, consider <<task-cancellation,canceling the task>>.

[discrete]
[[resolve-task-queue-backlog-hotspotting]]
===== Address hot spotting

If a specific node's thread pool is depleting faster than others, try addressing
uneven node resource utilization, also known as hot spotting.
For details on actions you can take, such as rebalancing shards, see <<hotspotting>>.

[discrete]
==== Resources

Related symptoms:

* <<high-cpu-usage>>
* <<rejected-requests>>
* <<hotspotting>>

If you find the active task's hot thread isn't progressing and there's a backlog,
consider canceling the task.
// TODO add link to standard Additional resources when that topic exists
10 changes: 10 additions & 0 deletions muted-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,16 @@ tests:
issue: https://github.com/elastic/elasticsearch/issues/116777
- class: org.elasticsearch.xpack.security.authc.ldap.ActiveDirectoryRunAsIT
issue: https://github.com/elastic/elasticsearch/issues/115727
- class: org.elasticsearch.xpack.security.authc.kerberos.KerberosAuthenticationIT
issue: https://github.com/elastic/elasticsearch/issues/118414
- class: org.elasticsearch.xpack.esql.qa.multi_node.EsqlClientYamlIT
issue: https://github.com/elastic/elasticsearch/issues/119086
- class: org.elasticsearch.xpack.spatial.index.query.ShapeQueryBuilderOverShapeTests
method: testToQuery
issue: https://github.com/elastic/elasticsearch/issues/119090
- class: org.elasticsearch.xpack.spatial.index.query.GeoShapeQueryBuilderGeoShapeTests
method: testToQuery
issue: https://github.com/elastic/elasticsearch/issues/119091

# Examples:
#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
import org.apache.lucene.search.Query;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.common.geo.ShapeRelation;
import org.elasticsearch.common.logging.DeprecationCategory;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.common.time.DateFormatter;
import org.elasticsearch.common.time.DateFormatters;
Expand All @@ -34,6 +36,7 @@
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.IndexVersions;
import org.elasticsearch.index.fielddata.FieldDataContext;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData.NumericType;
Expand Down Expand Up @@ -76,6 +79,7 @@
/** A {@link FieldMapper} for dates. */
public final class DateFieldMapper extends FieldMapper {

private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(DateFieldMapper.class);
private static final Logger logger = LogManager.getLogger(DateFieldMapper.class);

public static final String CONTENT_TYPE = "date";
Expand Down Expand Up @@ -342,7 +346,20 @@ private Long parseNullValue(DateFieldType fieldType) {
try {
return fieldType.parse(nullValue.getValue());
} catch (Exception e) {
throw new MapperParsingException("Error parsing [null_value] on field [" + leafName() + "]: " + e.getMessage(), e);
if (indexCreatedVersion.onOrAfter(IndexVersions.V_8_0_0)) {
throw new MapperParsingException("Error parsing [null_value] on field [" + leafName() + "]: " + e.getMessage(), e);
} else {
DEPRECATION_LOGGER.warn(
DeprecationCategory.MAPPINGS,
"date_mapper_null_field",
"Error parsing ["
+ nullValue.getValue()
+ "] as date in [null_value] on field ["
+ leafName()
+ "]); [null_value] will be ignored"
);
return null;
}
}
}

Expand Down
Loading

0 comments on commit d1399d7

Please sign in to comment.