-
Notifications
You must be signed in to change notification settings - Fork 996
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Allow ingestion job grouping/consolidation to be configurable (#825)
* job grouping strategy * apidocs & tests * fix some old docs * adding apidoc in Spring Config
- Loading branch information
Oleksii Moskalenko
authored
Jun 25, 2020
1 parent
89883d4
commit 26c8070
Showing
11 changed files
with
388 additions
and
107 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
85 changes: 85 additions & 0 deletions
85
core/src/main/java/feast/core/job/ConsolidatedJobStrategy.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* Copyright 2018-2020 The Feast Authors | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package feast.core.job; | ||
|
||
import feast.core.dao.JobRepository; | ||
import feast.core.model.Job; | ||
import feast.core.model.JobStatus; | ||
import feast.core.model.Source; | ||
import feast.core.model.Store; | ||
import java.time.Instant; | ||
import java.util.HashSet; | ||
import java.util.Map; | ||
import java.util.Objects; | ||
import java.util.Set; | ||
import java.util.stream.Collectors; | ||
import java.util.stream.Stream; | ||
import org.apache.commons.lang3.tuple.Pair; | ||
|
||
/** | ||
* In this strategy one Ingestion Job per source is created. All stores that subscribed to | ||
* FeatureSets from this source will be included as sinks in this consolidated Job. | ||
* | ||
* <p>JobId will contain only source parameters (type + config). StoreName will remain empty in Job | ||
* table. | ||
*/ | ||
public class ConsolidatedJobStrategy implements JobGroupingStrategy { | ||
private final JobRepository jobRepository; | ||
|
||
public ConsolidatedJobStrategy(JobRepository jobRepository) { | ||
this.jobRepository = jobRepository; | ||
} | ||
|
||
@Override | ||
public Job getOrCreateJob(Source source, Set<Store> stores) { | ||
return jobRepository | ||
.findFirstBySourceTypeAndSourceConfigAndStoreNameAndStatusNotInOrderByLastUpdatedDesc( | ||
source.getType(), source.getConfig(), null, JobStatus.getTerminalStates()) | ||
.orElseGet( | ||
() -> | ||
Job.builder() | ||
.setSource(source) | ||
.setStores(stores) | ||
.setFeatureSetJobStatuses(new HashSet<>()) | ||
.build()); | ||
} | ||
|
||
@Override | ||
public String createJobId(Job job) { | ||
String dateSuffix = String.valueOf(Instant.now().toEpochMilli()); | ||
String jobId = | ||
String.format( | ||
"%s-%d-%s", | ||
job.getSource().getTypeString(), | ||
Objects.hashCode(job.getSource().getConfig()), | ||
dateSuffix); | ||
return jobId.replaceAll("_store", "-").toLowerCase(); | ||
} | ||
|
||
@Override | ||
public Iterable<Pair<Source, Set<Store>>> collectSingleJobInput( | ||
Stream<Pair<Source, Store>> stream) { | ||
Map<Source, Set<Store>> map = | ||
stream.collect( | ||
Collectors.groupingBy( | ||
Pair::getLeft, Collectors.mapping(Pair::getRight, Collectors.toSet()))); | ||
|
||
return map.entrySet().stream() | ||
.map(e -> Pair.of(e.getKey(), e.getValue())) | ||
.collect(Collectors.toList()); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
38 changes: 38 additions & 0 deletions
38
core/src/main/java/feast/core/job/JobGroupingStrategy.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* Copyright 2018-2020 The Feast Authors | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package feast.core.job; | ||
|
||
import feast.core.model.Job; | ||
import feast.core.model.Source; | ||
import feast.core.model.Store; | ||
import java.util.Set; | ||
import java.util.stream.Stream; | ||
import org.apache.commons.lang3.tuple.Pair; | ||
|
||
/** | ||
* Strategy interface that defines how responsibility for sources and stores will be distributed | ||
* across Ingestion Jobs. | ||
*/ | ||
public interface JobGroupingStrategy { | ||
/** Get the non terminated ingestion job ingesting for given source and stores. */ | ||
public Job getOrCreateJob(Source source, Set<Store> stores); | ||
/** Create unique JobId that would be used as key in communications with JobRunner */ | ||
public String createJobId(Job job); | ||
/* Distribute given sources and stores across jobs. One yielded Pair - one created Job **/ | ||
public Iterable<Pair<Source, Set<Store>>> collectSingleJobInput( | ||
Stream<Pair<Source, Store>> stream); | ||
} |
85 changes: 85 additions & 0 deletions
85
core/src/main/java/feast/core/job/JobPerStoreStrategy.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* Copyright 2018-2020 The Feast Authors | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* https://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package feast.core.job; | ||
|
||
import com.google.common.collect.Lists; | ||
import feast.core.dao.JobRepository; | ||
import feast.core.model.Job; | ||
import feast.core.model.JobStatus; | ||
import feast.core.model.Source; | ||
import feast.core.model.Store; | ||
import java.time.Instant; | ||
import java.util.ArrayList; | ||
import java.util.HashSet; | ||
import java.util.Objects; | ||
import java.util.Set; | ||
import java.util.stream.Collectors; | ||
import java.util.stream.Stream; | ||
import org.apache.commons.lang3.tuple.Pair; | ||
|
||
/** | ||
* In this strategy one job per Source-Store pair is created. | ||
* | ||
* <p>JobId is generated accordingly from Source (type+config) and StoreName. | ||
*/ | ||
public class JobPerStoreStrategy implements JobGroupingStrategy { | ||
private final JobRepository jobRepository; | ||
|
||
public JobPerStoreStrategy(JobRepository jobRepository) { | ||
this.jobRepository = jobRepository; | ||
} | ||
|
||
@Override | ||
public Job getOrCreateJob(Source source, Set<Store> stores) { | ||
ArrayList<Store> storesList = Lists.newArrayList(stores); | ||
if (storesList.size() != 1) { | ||
throw new RuntimeException("Only one store is acceptable in JobPerStore Strategy"); | ||
} | ||
Store store = storesList.get(0); | ||
|
||
return jobRepository | ||
.findFirstBySourceTypeAndSourceConfigAndStoreNameAndStatusNotInOrderByLastUpdatedDesc( | ||
source.getType(), source.getConfig(), store.getName(), JobStatus.getTerminalStates()) | ||
.orElseGet( | ||
() -> | ||
Job.builder() | ||
.setSource(source) | ||
.setStoreName(store.getName()) | ||
.setStores(stores) | ||
.setFeatureSetJobStatuses(new HashSet<>()) | ||
.build()); | ||
} | ||
|
||
@Override | ||
public String createJobId(Job job) { | ||
String dateSuffix = String.valueOf(Instant.now().toEpochMilli()); | ||
String jobId = | ||
String.format( | ||
"%s-%d-to-%s-%s", | ||
job.getSource().getTypeString(), | ||
Objects.hashCode(job.getSource().getConfig()), | ||
job.getStoreName(), | ||
dateSuffix); | ||
return jobId.replaceAll("_store", "-").toLowerCase(); | ||
} | ||
|
||
@Override | ||
public Iterable<Pair<Source, Set<Store>>> collectSingleJobInput( | ||
Stream<Pair<Source, Store>> stream) { | ||
return stream.map(p -> Pair.of(p.getLeft(), Set.of(p.getRight()))).collect(Collectors.toList()); | ||
} | ||
} |
Oops, something went wrong.