diff --git a/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/applicationmaster/ApplicationMaster.java b/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/applicationmaster/ApplicationMaster.java index beaff7c146..36b02a7933 100644 --- a/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/applicationmaster/ApplicationMaster.java +++ b/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/applicationmaster/ApplicationMaster.java @@ -253,14 +253,6 @@ public synchronized void stop(StopStatus stopStatus) { ae.addException(e); } - try { - if (zkStore != null) { - zkStore.stop(); - } - } catch (Exception e) { - ae.addException(e); - } - if (ae.getExceptions().size() > 0) { LOGGER.logWarning(ae, "Failed to stop %s gracefully", serviceName); } diff --git a/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/applicationmaster/RequestManager.java b/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/applicationmaster/RequestManager.java index 108e09ce08..b826244e39 100644 --- a/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/applicationmaster/RequestManager.java +++ b/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/applicationmaster/RequestManager.java @@ -364,8 +364,8 @@ public PlatformSpecificParametersDescriptor getPlatParams() { return frameworkDescriptor.getPlatformSpecificParameters(); } - public int getTotalGpuCount() { - return frameworkDescriptor.calcTotalGpuCount(); + public int getTotalGpuNumber() { + return frameworkDescriptor.calcTotalGpuNumber(); } public TaskRoleDescriptor getTaskRole(String taskRoleName) { diff --git a/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/applicationmaster/SelectionManager.java b/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/applicationmaster/SelectionManager.java index 594815ffdd..cee0e9700a 100644 --- a/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/applicationmaster/SelectionManager.java +++ b/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/applicationmaster/SelectionManager.java @@ -254,7 +254,7 @@ public synchronized SelectionResult select(ResourceDescriptor requestResource, S filterNodesByNodeLabel(requestNodeLabel); filterNodesByGpuType(configuredNodes, requestNodeGpuType); if (!conf.getAmAllowNoneGpuJobOnGpuNode()) { - int jobTotalRequestGpu = requestManager.getTotalGpuCount(); + int jobTotalRequestGpu = requestManager.getTotalGpuNumber(); filterNodesForNoneGpuJob(jobTotalRequestGpu); } diff --git a/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/common/model/AggregatedFrameworkRequest.java b/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/common/model/AggregatedFrameworkRequest.java index 8839c9df56..311f62c0c9 100644 --- a/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/common/model/AggregatedFrameworkRequest.java +++ b/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/common/model/AggregatedFrameworkRequest.java @@ -49,5 +49,9 @@ public Map getMigrateTaskRequests() { public void setMigrateTaskRequests(Map migrateTaskRequests) { this.migrateTaskRequests = migrateTaskRequests; } + + public int calcTotalTaskNumber() { + return frameworkRequest.calcTotalTaskNumber(); + } } diff --git a/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/common/model/FrameworkDescriptor.java b/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/common/model/FrameworkDescriptor.java index 11a466bc22..0edd4ae127 100644 --- a/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/common/model/FrameworkDescriptor.java +++ b/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/common/model/FrameworkDescriptor.java @@ -30,7 +30,7 @@ public class FrameworkDescriptor implements Serializable { @Valid - @Size(max = 512) + @Size(max = 1024) private String description; @Valid @@ -168,12 +168,20 @@ public boolean containsPortResource() { return false; } - public int calcTotalGpuCount() { - int totalGpuCount = 0; - for (TaskRoleDescriptor taskRoleDescriptor : taskRoles.values()) { - totalGpuCount += taskRoleDescriptor.getTaskNumber() * - taskRoleDescriptor.getTaskService().getResource().getGpuNumber(); + public int calcTotalGpuNumber() { + int totalGpuNumber = 0; + for (TaskRoleDescriptor taskRole : taskRoles.values()) { + totalGpuNumber += taskRole.getTaskNumber() * + taskRole.getTaskService().getResource().getGpuNumber(); + } + return totalGpuNumber; + } + + public int calcTotalTaskNumber() { + int totalTaskNumber = 0; + for (TaskRoleDescriptor taskRole : taskRoles.values()) { + totalTaskNumber += taskRole.getTaskNumber(); } - return totalGpuCount; + return totalTaskNumber; } } diff --git a/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/common/model/FrameworkRequest.java b/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/common/model/FrameworkRequest.java index 0d45474bc5..879f492ce2 100644 --- a/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/common/model/FrameworkRequest.java +++ b/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/common/model/FrameworkRequest.java @@ -83,4 +83,8 @@ public Long getLastRequestTimestamp() { public void setLastRequestTimestamp(Long lastRequestTimestamp) { this.lastRequestTimestamp = lastRequestTimestamp; } + + public int calcTotalTaskNumber() { + return frameworkDescriptor.calcTotalTaskNumber(); + } } diff --git a/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/common/model/SummarizedFrameworkInfo.java b/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/common/model/SummarizedFrameworkInfo.java index 3f579c9f61..84539013eb 100644 --- a/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/common/model/SummarizedFrameworkInfo.java +++ b/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/common/model/SummarizedFrameworkInfo.java @@ -27,6 +27,9 @@ public class SummarizedFrameworkInfo implements Serializable { private String frameworkDescription; private String userName; private String queue; + private Integer totalGpuNumber; + private Integer totalTaskNumber; + private Integer totalTaskRoleNumber; private Long firstRequestTimestamp; private Long lastRequestTimestamp; @@ -46,6 +49,9 @@ public static SummarizedFrameworkInfo newInstance(FrameworkRequest frameworkRequ sFrameworkInfo.setFrameworkDescription(frameworkDescriptor.getDescription()); sFrameworkInfo.setUserName(frameworkDescriptor.getUser().getName()); sFrameworkInfo.setQueue(frameworkDescriptor.getPlatformSpecificParameters().getQueue()); + sFrameworkInfo.setTotalGpuNumber(frameworkDescriptor.calcTotalGpuNumber()); + sFrameworkInfo.setTotalTaskNumber(frameworkDescriptor.calcTotalTaskNumber()); + sFrameworkInfo.setTotalTaskRoleNumber(frameworkDescriptor.getTaskRoles().size()); sFrameworkInfo.setFirstRequestTimestamp(frameworkRequest.getFirstRequestTimestamp()); sFrameworkInfo.setLastRequestTimestamp(frameworkRequest.getLastRequestTimestamp()); @@ -104,6 +110,30 @@ public void setQueue(String queue) { this.queue = queue; } + public Integer getTotalGpuNumber() { + return totalGpuNumber; + } + + public void setTotalGpuNumber(Integer totalGpuNumber) { + this.totalGpuNumber = totalGpuNumber; + } + + public Integer getTotalTaskNumber() { + return totalTaskNumber; + } + + public void setTotalTaskNumber(Integer totalTaskNumber) { + this.totalTaskNumber = totalTaskNumber; + } + + public Integer getTotalTaskRoleNumber() { + return totalTaskRoleNumber; + } + + public void setTotalTaskRoleNumber(Integer totalTaskRoleNumber) { + this.totalTaskRoleNumber = totalTaskRoleNumber; + } + public Long getFirstRequestTimestamp() { return firstRequestTimestamp; } diff --git a/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/service/Service.java b/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/service/Service.java index d48d389a7f..1d38641a90 100644 --- a/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/service/Service.java +++ b/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/service/Service.java @@ -192,14 +192,6 @@ public synchronized void stop(StopStatus stopStatus) { ae.addException(e); } - try { - if (zkStore != null) { - zkStore.stop(); - } - } catch (Exception e) { - ae.addException(e); - } - if (ae.getExceptions().size() > 0) { LOGGER.logWarning(ae, "Failed to stop %s gracefully", serviceName); } diff --git a/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/webserver/RequestManager.java b/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/webserver/RequestManager.java index b5375aee02..0bed3f192c 100644 --- a/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/webserver/RequestManager.java +++ b/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/webserver/RequestManager.java @@ -132,7 +132,7 @@ private boolean deleteFrameworkRequestInternal(String frameworkName) throws Exce if (aggFrameworkRequests.containsKey(frameworkName)) { AggregatedFrameworkRequest aggFrameworkRequest = aggFrameworkRequests.get(frameworkName); int oldTotalTaskNumber = totalTaskNumber; - int frameworkTaskNumber = getFrameworkTaskNumber(aggFrameworkRequest.getFrameworkRequest()); + int frameworkTaskNumber = aggFrameworkRequest.calcTotalTaskNumber(); totalTaskNumber -= frameworkTaskNumber; LOGGER.logDebug( @@ -266,19 +266,10 @@ private void gcCompletedFrameworks(Map completedFramewo } } - private int getFrameworkTaskNumber(FrameworkRequest frameworkRequest) { - int frameworkTaskNumber = 0; - for (TaskRoleDescriptor taskRole : frameworkRequest.getFrameworkDescriptor().getTaskRoles().values()) { - frameworkTaskNumber += taskRole.getTaskNumber(); - } - return frameworkTaskNumber; - } - private int getTotalTaskNumber() { int totalTaskNumber = 0; for (AggregatedFrameworkRequest aggFrameworkRequest : aggFrameworkRequests.values()) { - FrameworkRequest frameworkRequest = aggFrameworkRequest.getFrameworkRequest(); - totalTaskNumber += getFrameworkTaskNumber(frameworkRequest); + totalTaskNumber += aggFrameworkRequest.calcTotalTaskNumber(); } return totalTaskNumber; } @@ -369,11 +360,11 @@ public void setFrameworkRequest( } Long currentTimestamp = System.currentTimeMillis(); - int frameworkTaskNumber = getFrameworkTaskNumber(frameworkRequest); + int frameworkTaskNumber = frameworkRequest.calcTotalTaskNumber(); int newTotalTaskNumber = totalTaskNumber + frameworkTaskNumber; if (aggFrameworkRequests.containsKey(frameworkName)) { FrameworkRequest oldFrameworkRequest = aggFrameworkRequests.get(frameworkName).getFrameworkRequest(); - newTotalTaskNumber -= getFrameworkTaskNumber(oldFrameworkRequest); + newTotalTaskNumber -= oldFrameworkRequest.calcTotalTaskNumber(); frameworkRequest.setFirstRequestTimestamp(oldFrameworkRequest.getFirstRequestTimestamp()); } else { frameworkRequest.setFirstRequestTimestamp(currentTimestamp); diff --git a/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/webserver/WebServer.java b/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/webserver/WebServer.java index 6e0160acb4..1796e88d92 100644 --- a/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/webserver/WebServer.java +++ b/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/webserver/WebServer.java @@ -111,14 +111,6 @@ public synchronized void stop(StopStatus stopStatus) { AggregateException ae = new AggregateException(); // Stop WebServer's SubServices - try { - if (zkStore != null) { - zkStore.stop(); - } - } catch (Exception e) { - ae.addException(e); - } - if (ae.getExceptions().size() > 0) { LOGGER.logWarning(ae, "Failed to stop %s gracefully", serviceName); } diff --git a/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/zookeeperstore/ZookeeperStore.java b/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/zookeeperstore/ZookeeperStore.java index 0dc0da5bb7..8d00415d8b 100644 --- a/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/zookeeperstore/ZookeeperStore.java +++ b/subprojects/frameworklauncher/yarn/src/main/java/com/microsoft/frameworklauncher/zookeeperstore/ZookeeperStore.java @@ -49,10 +49,6 @@ protected ZookeeperStore(ZooKeeperClient zkClient, ZookeeperStoreStructure zkStr this.zkStruct = zkStruct; } - public void stop() { - zkClient.stop(); - } - // Setup Basic ZookeeperStoreStructure private void setupZKStructure() throws Exception { if (!zkClient.exists(zkStruct.getLauncherRootPath())) {