Skip to content
This repository has been archived by the owner on Jun 6, 2024. It is now read-only.

Commit

Permalink
[Launcher]: Add more info into SummarizedFrameworkInfo (#2435)
Browse files Browse the repository at this point in the history
1. Add more info into SummarizedFrameworkInfo
2. Skip to stop ZKStore in case it is still using
3. Increase description len to 1024
  • Loading branch information
yqwang-ms authored Mar 28, 2019
1 parent 8907053 commit a7525dc
Show file tree
Hide file tree
Showing 11 changed files with 60 additions and 51 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -253,14 +253,6 @@ public synchronized void stop(StopStatus stopStatus) {
ae.addException(e);
}

try {
if (zkStore != null) {
zkStore.stop();
}
} catch (Exception e) {
ae.addException(e);
}

if (ae.getExceptions().size() > 0) {
LOGGER.logWarning(ae, "Failed to stop %s gracefully", serviceName);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -364,8 +364,8 @@ public PlatformSpecificParametersDescriptor getPlatParams() {
return frameworkDescriptor.getPlatformSpecificParameters();
}

public int getTotalGpuCount() {
return frameworkDescriptor.calcTotalGpuCount();
public int getTotalGpuNumber() {
return frameworkDescriptor.calcTotalGpuNumber();
}

public TaskRoleDescriptor getTaskRole(String taskRoleName) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ public synchronized SelectionResult select(ResourceDescriptor requestResource, S
filterNodesByNodeLabel(requestNodeLabel);
filterNodesByGpuType(configuredNodes, requestNodeGpuType);
if (!conf.getAmAllowNoneGpuJobOnGpuNode()) {
int jobTotalRequestGpu = requestManager.getTotalGpuCount();
int jobTotalRequestGpu = requestManager.getTotalGpuNumber();
filterNodesForNoneGpuJob(jobTotalRequestGpu);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,5 +49,9 @@ public Map<String, MigrateTaskRequest> getMigrateTaskRequests() {
public void setMigrateTaskRequests(Map<String, MigrateTaskRequest> migrateTaskRequests) {
this.migrateTaskRequests = migrateTaskRequests;
}

public int calcTotalTaskNumber() {
return frameworkRequest.calcTotalTaskNumber();
}
}

Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

public class FrameworkDescriptor implements Serializable {
@Valid
@Size(max = 512)
@Size(max = 1024)
private String description;

@Valid
Expand Down Expand Up @@ -168,12 +168,20 @@ public boolean containsPortResource() {
return false;
}

public int calcTotalGpuCount() {
int totalGpuCount = 0;
for (TaskRoleDescriptor taskRoleDescriptor : taskRoles.values()) {
totalGpuCount += taskRoleDescriptor.getTaskNumber() *
taskRoleDescriptor.getTaskService().getResource().getGpuNumber();
public int calcTotalGpuNumber() {
int totalGpuNumber = 0;
for (TaskRoleDescriptor taskRole : taskRoles.values()) {
totalGpuNumber += taskRole.getTaskNumber() *
taskRole.getTaskService().getResource().getGpuNumber();
}
return totalGpuNumber;
}

public int calcTotalTaskNumber() {
int totalTaskNumber = 0;
for (TaskRoleDescriptor taskRole : taskRoles.values()) {
totalTaskNumber += taskRole.getTaskNumber();
}
return totalGpuCount;
return totalTaskNumber;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,8 @@ public Long getLastRequestTimestamp() {
public void setLastRequestTimestamp(Long lastRequestTimestamp) {
this.lastRequestTimestamp = lastRequestTimestamp;
}

public int calcTotalTaskNumber() {
return frameworkDescriptor.calcTotalTaskNumber();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ public class SummarizedFrameworkInfo implements Serializable {
private String frameworkDescription;
private String userName;
private String queue;
private Integer totalGpuNumber;
private Integer totalTaskNumber;
private Integer totalTaskRoleNumber;
private Long firstRequestTimestamp;
private Long lastRequestTimestamp;

Expand All @@ -46,6 +49,9 @@ public static SummarizedFrameworkInfo newInstance(FrameworkRequest frameworkRequ
sFrameworkInfo.setFrameworkDescription(frameworkDescriptor.getDescription());
sFrameworkInfo.setUserName(frameworkDescriptor.getUser().getName());
sFrameworkInfo.setQueue(frameworkDescriptor.getPlatformSpecificParameters().getQueue());
sFrameworkInfo.setTotalGpuNumber(frameworkDescriptor.calcTotalGpuNumber());
sFrameworkInfo.setTotalTaskNumber(frameworkDescriptor.calcTotalTaskNumber());
sFrameworkInfo.setTotalTaskRoleNumber(frameworkDescriptor.getTaskRoles().size());
sFrameworkInfo.setFirstRequestTimestamp(frameworkRequest.getFirstRequestTimestamp());
sFrameworkInfo.setLastRequestTimestamp(frameworkRequest.getLastRequestTimestamp());

Expand Down Expand Up @@ -104,6 +110,30 @@ public void setQueue(String queue) {
this.queue = queue;
}

public Integer getTotalGpuNumber() {
return totalGpuNumber;
}

public void setTotalGpuNumber(Integer totalGpuNumber) {
this.totalGpuNumber = totalGpuNumber;
}

public Integer getTotalTaskNumber() {
return totalTaskNumber;
}

public void setTotalTaskNumber(Integer totalTaskNumber) {
this.totalTaskNumber = totalTaskNumber;
}

public Integer getTotalTaskRoleNumber() {
return totalTaskRoleNumber;
}

public void setTotalTaskRoleNumber(Integer totalTaskRoleNumber) {
this.totalTaskRoleNumber = totalTaskRoleNumber;
}

public Long getFirstRequestTimestamp() {
return firstRequestTimestamp;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -192,14 +192,6 @@ public synchronized void stop(StopStatus stopStatus) {
ae.addException(e);
}

try {
if (zkStore != null) {
zkStore.stop();
}
} catch (Exception e) {
ae.addException(e);
}

if (ae.getExceptions().size() > 0) {
LOGGER.logWarning(ae, "Failed to stop %s gracefully", serviceName);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ private boolean deleteFrameworkRequestInternal(String frameworkName) throws Exce
if (aggFrameworkRequests.containsKey(frameworkName)) {
AggregatedFrameworkRequest aggFrameworkRequest = aggFrameworkRequests.get(frameworkName);
int oldTotalTaskNumber = totalTaskNumber;
int frameworkTaskNumber = getFrameworkTaskNumber(aggFrameworkRequest.getFrameworkRequest());
int frameworkTaskNumber = aggFrameworkRequest.calcTotalTaskNumber();
totalTaskNumber -= frameworkTaskNumber;

LOGGER.logDebug(
Expand Down Expand Up @@ -266,19 +266,10 @@ private void gcCompletedFrameworks(Map<String, FrameworkStatus> completedFramewo
}
}

private int getFrameworkTaskNumber(FrameworkRequest frameworkRequest) {
int frameworkTaskNumber = 0;
for (TaskRoleDescriptor taskRole : frameworkRequest.getFrameworkDescriptor().getTaskRoles().values()) {
frameworkTaskNumber += taskRole.getTaskNumber();
}
return frameworkTaskNumber;
}

private int getTotalTaskNumber() {
int totalTaskNumber = 0;
for (AggregatedFrameworkRequest aggFrameworkRequest : aggFrameworkRequests.values()) {
FrameworkRequest frameworkRequest = aggFrameworkRequest.getFrameworkRequest();
totalTaskNumber += getFrameworkTaskNumber(frameworkRequest);
totalTaskNumber += aggFrameworkRequest.calcTotalTaskNumber();
}
return totalTaskNumber;
}
Expand Down Expand Up @@ -369,11 +360,11 @@ public void setFrameworkRequest(
}

Long currentTimestamp = System.currentTimeMillis();
int frameworkTaskNumber = getFrameworkTaskNumber(frameworkRequest);
int frameworkTaskNumber = frameworkRequest.calcTotalTaskNumber();
int newTotalTaskNumber = totalTaskNumber + frameworkTaskNumber;
if (aggFrameworkRequests.containsKey(frameworkName)) {
FrameworkRequest oldFrameworkRequest = aggFrameworkRequests.get(frameworkName).getFrameworkRequest();
newTotalTaskNumber -= getFrameworkTaskNumber(oldFrameworkRequest);
newTotalTaskNumber -= oldFrameworkRequest.calcTotalTaskNumber();
frameworkRequest.setFirstRequestTimestamp(oldFrameworkRequest.getFirstRequestTimestamp());
} else {
frameworkRequest.setFirstRequestTimestamp(currentTimestamp);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,14 +111,6 @@ public synchronized void stop(StopStatus stopStatus) {
AggregateException ae = new AggregateException();

// Stop WebServer's SubServices
try {
if (zkStore != null) {
zkStore.stop();
}
} catch (Exception e) {
ae.addException(e);
}

if (ae.getExceptions().size() > 0) {
LOGGER.logWarning(ae, "Failed to stop %s gracefully", serviceName);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,6 @@ protected ZookeeperStore(ZooKeeperClient zkClient, ZookeeperStoreStructure zkStr
this.zkStruct = zkStruct;
}

public void stop() {
zkClient.stop();
}

// Setup Basic ZookeeperStoreStructure
private void setupZKStructure() throws Exception {
if (!zkClient.exists(zkStruct.getLauncherRootPath())) {
Expand Down

0 comments on commit a7525dc

Please sign in to comment.