Skip to content

Commit

Permalink
[ML] Change JobManager to work with Job config in index (elastic#33064)
Browse files Browse the repository at this point in the history
  • Loading branch information
davidkyle committed Oct 29, 2018
1 parent 59a1205 commit 7b56999
Show file tree
Hide file tree
Showing 30 changed files with 1,256 additions and 593 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,15 @@
import org.elasticsearch.xpack.core.ml.job.config.JobState;
import org.elasticsearch.xpack.core.ml.job.config.JobTaskState;

import java.util.Collection;
import java.util.Set;
import java.util.stream.Collectors;

public final class MlTasks {

public static final String JOB_TASK_PREFIX = "job-";
public static final String DATAFEED_TASK_PREFIX = "datafeed-";

private MlTasks() {
}

Expand All @@ -22,15 +29,15 @@ private MlTasks() {
* A datafeed id can be used as a job id, because they are stored separately in cluster state.
*/
public static String jobTaskId(String jobId) {
return "job-" + jobId;
return JOB_TASK_PREFIX + jobId;
}

/**
* Namespaces the task ids for datafeeds.
* A job id can be used as a datafeed id, because they are stored separately in cluster state.
*/
public static String datafeedTaskId(String datafeedId) {
return "datafeed-" + datafeedId;
return DATAFEED_TASK_PREFIX + datafeedId;
}

@Nullable
Expand Down Expand Up @@ -67,4 +74,17 @@ public static DatafeedState getDatafeedState(String datafeedId, @Nullable Persis
return DatafeedState.STOPPED;
}
}

/**
* The job Ids of anomaly detector job tasks
* @param tasks Active tasks
* @return The job Ids of anomaly detector job tasks
*/
public static Set<String> openJobIds(PersistentTasksCustomMetaData tasks) {
Collection<PersistentTasksCustomMetaData.PersistentTask<?>> activeTasks = tasks.tasks();

return activeTasks.stream().filter(t -> t.getId().startsWith(JOB_TASK_PREFIX))
.map(t -> t.getId().substring(JOB_TASK_PREFIX.length()))
.collect(Collectors.toSet());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -248,9 +248,6 @@ public static void addJobConfigFields(XContentBuilder builder) throws IOExceptio
.startObject(AnalysisConfig.MULTIVARIATE_BY_FIELDS.getPreferredName())
.field(TYPE, BOOLEAN)
.endObject()
.startObject(AnalysisConfig.USE_PER_PARTITION_NORMALIZATION.getPreferredName())
.field(TYPE, BOOLEAN)
.endObject()
.endObject()
.endObject()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,6 @@ public final class ReservedFieldNames {
AnalysisConfig.OVERLAPPING_BUCKETS.getPreferredName(),
AnalysisConfig.RESULT_FINALIZATION_WINDOW.getPreferredName(),
AnalysisConfig.MULTIVARIATE_BY_FIELDS.getPreferredName(),
AnalysisConfig.USE_PER_PARTITION_NORMALIZATION.getPreferredName(),

AnalysisLimits.MODEL_MEMORY_LIMIT.getPreferredName(),
AnalysisLimits.CATEGORIZATION_EXAMPLES_LIMIT.getPreferredName(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ public Collection<Object> createComponents(Client client, ClusterService cluster
Auditor auditor = new Auditor(client, clusterService.getNodeName());
JobResultsProvider jobResultsProvider = new JobResultsProvider(client, settings);
UpdateJobProcessNotifier notifier = new UpdateJobProcessNotifier(settings, client, clusterService, threadPool);
JobManager jobManager = new JobManager(env, settings, jobResultsProvider, clusterService, auditor, client, notifier);
JobManager jobManager = new JobManager(env, settings, jobResultsProvider, clusterService, auditor, threadPool, client, notifier);

JobDataCountsPersister jobDataCountsPersister = new JobDataCountsPersister(settings, client);
JobResultsPersister jobResultsPersister = new JobResultsPersister(settings, client);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,11 @@ protected void doExecute(Task task, DeleteCalendarAction.Request request, Action
listener.onFailure(new ResourceNotFoundException("No calendar with id [" + calendarId + "]"));
return;
}
jobManager.updateProcessOnCalendarChanged(calendar.getJobIds());
listener.onResponse(new AcknowledgedResponse(true));

jobManager.updateProcessOnCalendarChanged(calendar.getJobIds(), ActionListener.wrap(
r -> listener.onResponse(new AcknowledgedResponse(true)),
listener::onFailure
));
},
listener::onFailure));
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,10 @@ public void onResponse(DeleteResponse response) {
if (response.status() == RestStatus.NOT_FOUND) {
listener.onFailure(new ResourceNotFoundException("No event with id [" + eventId + "]"));
} else {
jobManager.updateProcessOnCalendarChanged(calendar.getJobIds());
listener.onResponse(new AcknowledgedResponse(true));
jobManager.updateProcessOnCalendarChanged(calendar.getJobIds(), ActionListener.wrap(
r -> listener.onResponse(new AcknowledgedResponse(true)),
listener::onFailure
));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,15 @@
import org.elasticsearch.action.support.HandledTransportAction;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.tasks.Task;
import org.elasticsearch.transport.TransportService;
import org.elasticsearch.xpack.core.ml.action.DeleteModelSnapshotAction;
import org.elasticsearch.xpack.core.ml.job.config.Job;
import org.elasticsearch.xpack.core.ml.job.messages.Messages;
import org.elasticsearch.xpack.ml.job.persistence.JobDataDeleter;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSnapshot;
import org.elasticsearch.xpack.ml.job.JobManager;
import org.elasticsearch.xpack.ml.job.persistence.JobDataDeleter;
import org.elasticsearch.xpack.ml.job.persistence.JobResultsProvider;
import org.elasticsearch.xpack.ml.notifications.Auditor;

Expand All @@ -33,19 +31,19 @@ public class TransportDeleteModelSnapshotAction extends HandledTransportAction<D
AcknowledgedResponse> {

private final Client client;
private final JobManager jobManager;
private final JobResultsProvider jobResultsProvider;
private final ClusterService clusterService;
private final Auditor auditor;

@Inject
public TransportDeleteModelSnapshotAction(Settings settings, TransportService transportService, ActionFilters actionFilters,
JobResultsProvider jobResultsProvider, ClusterService clusterService, Client client,
JobResultsProvider jobResultsProvider, Client client, JobManager jobManager,
Auditor auditor) {
super(settings, DeleteModelSnapshotAction.NAME, transportService, actionFilters,
DeleteModelSnapshotAction.Request::new);
this.client = client;
this.jobManager = jobManager;
this.jobResultsProvider = jobResultsProvider;
this.clusterService = clusterService;
this.auditor = auditor;
}

Expand All @@ -70,32 +68,40 @@ protected void doExecute(Task task, DeleteModelSnapshotAction.Request request,
ModelSnapshot deleteCandidate = deleteCandidates.get(0);

// Verify the snapshot is not being used
Job job = JobManager.getJobOrThrowIfUnknown(request.getJobId(), clusterService.state());
String currentModelInUse = job.getModelSnapshotId();
if (currentModelInUse != null && currentModelInUse.equals(request.getSnapshotId())) {
throw new IllegalArgumentException(Messages.getMessage(Messages.REST_CANNOT_DELETE_HIGHEST_PRIORITY,
request.getSnapshotId(), request.getJobId()));
}
jobManager.getJob(request.getJobId(), ActionListener.wrap(
job -> {
String currentModelInUse = job.getModelSnapshotId();
if (currentModelInUse != null && currentModelInUse.equals(request.getSnapshotId())) {
listener.onFailure(
new IllegalArgumentException(Messages.getMessage(Messages.REST_CANNOT_DELETE_HIGHEST_PRIORITY,
request.getSnapshotId(), request.getJobId())));
return;
}

// Delete the snapshot and any associated state files
JobDataDeleter deleter = new JobDataDeleter(client, request.getJobId());
deleter.deleteModelSnapshots(Collections.singletonList(deleteCandidate),
new ActionListener<BulkResponse>() {
@Override
public void onResponse(BulkResponse bulkResponse) {
String msg = Messages.getMessage(Messages.JOB_AUDIT_SNAPSHOT_DELETED,
deleteCandidate.getSnapshotId(), deleteCandidate.getDescription());

// Delete the snapshot and any associated state files
JobDataDeleter deleter = new JobDataDeleter(client, request.getJobId());
deleter.deleteModelSnapshots(Collections.singletonList(deleteCandidate), new ActionListener<BulkResponse>() {
@Override
public void onResponse(BulkResponse bulkResponse) {
String msg = Messages.getMessage(Messages.JOB_AUDIT_SNAPSHOT_DELETED, deleteCandidate.getSnapshotId(),
deleteCandidate.getDescription());
auditor.info(request.getJobId(), msg);
logger.debug("[{}] {}", request.getJobId(), msg);
// We don't care about the bulk response, just that it succeeded
listener.onResponse(new AcknowledgedResponse(true));
}
auditor.info(request.getJobId(), msg);
logger.debug("[{}] {}", request.getJobId(), msg);
// We don't care about the bulk response, just that it succeeded
listener.onResponse(new AcknowledgedResponse(true));
}

@Override
public void onFailure(Exception e) {
listener.onFailure(e);
}
});
@Override
public void onFailure(Exception e) {
listener.onFailure(e);
}
});

},
listener::onFailure
));
}, listener::onFailure);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import org.elasticsearch.Version;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.support.ActionFilters;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.stream.StreamInput;
Expand Down Expand Up @@ -41,14 +40,17 @@ public class TransportForecastJobAction extends TransportJobTaskAction<ForecastJ
private static final ByteSizeValue FORECAST_LOCAL_STORAGE_LIMIT = new ByteSizeValue(500, ByteSizeUnit.MB);

private final JobResultsProvider jobResultsProvider;
private final JobManager jobManager;
@Inject
public TransportForecastJobAction(Settings settings, TransportService transportService,
ClusterService clusterService, ActionFilters actionFilters,
JobResultsProvider jobResultsProvider, AutodetectProcessManager processManager) {
JobResultsProvider jobResultsProvider, AutodetectProcessManager processManager,
JobManager jobManager) {
super(settings, ForecastJobAction.NAME, clusterService, transportService, actionFilters,
ForecastJobAction.Request::new, ForecastJobAction.Response::new,
ThreadPool.Names.SAME, processManager);
this.jobResultsProvider = jobResultsProvider;
this.jobManager = jobManager;
// ThreadPool.Names.SAME, because operations is executed by autodetect worker thread
}

Expand All @@ -62,57 +64,63 @@ protected ForecastJobAction.Response readTaskResponse(StreamInput in) throws IOE
@Override
protected void taskOperation(ForecastJobAction.Request request, TransportOpenJobAction.JobTask task,
ActionListener<ForecastJobAction.Response> listener) {
ClusterState state = clusterService.state();
Job job = JobManager.getJobOrThrowIfUnknown(task.getJobId(), state);
validate(job, request);
jobManager.getJob(task.getJobId(), ActionListener.wrap(
job -> {
validate(job, request);

ForecastParams.Builder paramsBuilder = ForecastParams.builder();
ForecastParams.Builder paramsBuilder = ForecastParams.builder();

if (request.getDuration() != null) {
paramsBuilder.duration(request.getDuration());
}
if (request.getDuration() != null) {
paramsBuilder.duration(request.getDuration());
}

if (request.getExpiresIn() != null) {
paramsBuilder.expiresIn(request.getExpiresIn());
}
if (request.getExpiresIn() != null) {
paramsBuilder.expiresIn(request.getExpiresIn());
}

// tmp storage might be null, we do not log here, because it might not be
// required
Path tmpStorage = processManager.tryGetTmpStorage(task, FORECAST_LOCAL_STORAGE_LIMIT);
if (tmpStorage != null) {
paramsBuilder.tmpStorage(tmpStorage.toString());
}
// tmp storage might be null, we do not log here, because it might not be
// required
Path tmpStorage = processManager.tryGetTmpStorage(task, FORECAST_LOCAL_STORAGE_LIMIT);
if (tmpStorage != null) {
paramsBuilder.tmpStorage(tmpStorage.toString());
}

ForecastParams params = paramsBuilder.build();
processManager.forecastJob(task, params, e -> {
if (e == null) {
Consumer<ForecastRequestStats> forecastRequestStatsHandler = forecastRequestStats -> {
if (forecastRequestStats == null) {
// paranoia case, it should not happen that we do not retrieve a result
listener.onFailure(new ElasticsearchException(
"Cannot run forecast: internal error, please check the logs"));
} else if (forecastRequestStats.getStatus() == ForecastRequestStats.ForecastRequestStatus.FAILED) {
List<String> messages = forecastRequestStats.getMessages();
if (messages.size() > 0) {
listener.onFailure(ExceptionsHelper.badRequestException("Cannot run forecast: "
+ messages.get(0)));
ForecastParams params = paramsBuilder.build();
processManager.forecastJob(task, params, e -> {
if (e == null) {
; getForecastRequestStats(request.getJobId(), params.getForecastId(), listener);
} else {
// paranoia case, it should not be possible to have an empty message list
listener.onFailure(
new ElasticsearchException(
"Cannot run forecast: internal error, please check the logs"));
listener.onFailure(e);
}
} else {
listener.onResponse(new ForecastJobAction.Response(true, params.getForecastId()));
}
};
});
},
listener::onFailure
));
}

jobResultsProvider.getForecastRequestStats(request.getJobId(), params.getForecastId(),
forecastRequestStatsHandler, listener::onFailure);
private void getForecastRequestStats(String jobId, String forecastId, ActionListener<ForecastJobAction.Response> listener) {
Consumer<ForecastRequestStats> forecastRequestStatsHandler = forecastRequestStats -> {
if (forecastRequestStats == null) {
// paranoia case, it should not happen that we do not retrieve a result
listener.onFailure(new ElasticsearchException(
"Cannot run forecast: internal error, please check the logs"));
} else if (forecastRequestStats.getStatus() == ForecastRequestStats.ForecastRequestStatus.FAILED) {
List<String> messages = forecastRequestStats.getMessages();
if (messages.size() > 0) {
listener.onFailure(ExceptionsHelper.badRequestException("Cannot run forecast: "
+ messages.get(0)));
} else {
// paranoia case, it should not be possible to have an empty message list
listener.onFailure(
new ElasticsearchException(
"Cannot run forecast: internal error, please check the logs"));
}
} else {
listener.onFailure(e);
listener.onResponse(new ForecastJobAction.Response(true, forecastId));
}
});
};

jobResultsProvider.getForecastRequestStats(jobId, forecastId, forecastRequestStatsHandler, listener::onFailure);
}

static void validate(Job job, ForecastJobAction.Request request) {
Expand Down
Loading

0 comments on commit 7b56999

Please sign in to comment.