-
Notifications
You must be signed in to change notification settings - Fork 6.8k
[MXNET-1041] Add Java benchmark #13095
Changes from all commits
20442c5
6edb645
96eea0e
08dc793
49fb918
b28b970
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
#!/bin/bash | ||
|
||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
set -e | ||
|
||
hw_type=cpu | ||
if [ "$USE_GPU" = "1" ] | ||
then | ||
hw_type=gpu | ||
fi | ||
|
||
platform=linux-x86_64 | ||
|
||
if [[ $OSTYPE = [darwin]* ]] | ||
then | ||
platform=osx-x86_64 | ||
fi | ||
|
||
MXNET_ROOT=$(cd "$(dirname $0)/../../../.."; pwd) | ||
CLASS_PATH=$MXNET_ROOT/scala-package/assembly/$platform-$hw_type/target/*:$MXNET_ROOT/scala-package/examples/target/* | ||
|
||
java -Xmx8G -Dmxnet.traceLeakedObjects=true -cp $CLASS_PATH \ | ||
org.apache.mxnetexamples.javaapi.benchmark.JavaBenchmark $@ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.mxnetexamples.javaapi.benchmark; | ||
|
||
import org.apache.mxnet.javaapi.Context; | ||
import org.kohsuke.args4j.Option; | ||
|
||
import java.util.List; | ||
|
||
abstract class InferBase { | ||
@Option(name = "--num-runs", usage = "Number of runs") | ||
public int numRun = 1; | ||
@Option(name = "--model-name", usage = "Name of the model") | ||
public String modelName = ""; | ||
@Option(name = "--batchsize", usage = "Size of the batch") | ||
public int batchSize = 1; | ||
|
||
public abstract void preProcessModel(List<Context> context); | ||
public abstract void runSingleInference(); | ||
public abstract void runBatchInference(); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.mxnetexamples.javaapi.benchmark; | ||
|
||
import org.apache.mxnet.javaapi.Context; | ||
import org.kohsuke.args4j.CmdLineParser; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
|
||
public class JavaBenchmark { | ||
|
||
private static boolean runBatch = false; | ||
|
||
private static void parse(Object inst, String[] args) { | ||
CmdLineParser parser = new CmdLineParser(inst); | ||
try { | ||
parser.parseArgument(args); | ||
} catch (Exception e) { | ||
System.err.println(e.getMessage() + e); | ||
parser.printUsage(System.err); | ||
System.exit(1); | ||
} | ||
} | ||
|
||
private static long percentile(int p, long[] seq) { | ||
Arrays.sort(seq); | ||
int k = (int) Math.ceil((seq.length - 1) * (p / 100.0)); | ||
return seq[k]; | ||
} | ||
|
||
private static void printStatistics(long[] inferenceTimesRaw, String metricsPrefix) { | ||
long[] inferenceTimes = inferenceTimesRaw; | ||
// remove head and tail | ||
if (inferenceTimes.length > 2) { | ||
inferenceTimes = Arrays.copyOfRange(inferenceTimesRaw, | ||
1, inferenceTimesRaw.length - 1); | ||
} | ||
double p50 = percentile(50, inferenceTimes) / 1.0e6; | ||
double p99 = percentile(99, inferenceTimes) / 1.0e6; | ||
double p90 = percentile(90, inferenceTimes) / 1.0e6; | ||
long sum = 0; | ||
for (long time: inferenceTimes) sum += time; | ||
double average = sum / (inferenceTimes.length * 1.0e6); | ||
|
||
System.out.println( | ||
String.format("\n%s_p99 %fms\n%s_p90 %fms\n%s_p50 %fms\n%s_average %1.2fms", | ||
metricsPrefix, p99, metricsPrefix, p90, | ||
metricsPrefix, p50, metricsPrefix, average) | ||
); | ||
|
||
} | ||
|
||
private static List<Context> getContext() { | ||
List<Context> context = new ArrayList<Context>(); | ||
if (System.getenv().containsKey("SCALA_TEST_ON_GPU") && | ||
Integer.valueOf(System.getenv("SCALA_TEST_ON_GPU")) == 1) { | ||
context.add(Context.gpu()); | ||
} else { | ||
context.add(Context.cpu()); | ||
} | ||
return context; | ||
} | ||
|
||
public static void main(String[] args) { | ||
if (args.length < 2) { | ||
StringBuilder sb = new StringBuilder(); | ||
sb.append("Please follow the format:"); | ||
sb.append("\n --model-name <model-name>"); | ||
sb.append("\n --num-runs <number of runs>"); | ||
sb.append("\n --batchsize <batch size>"); | ||
System.out.println(sb.toString()); | ||
return; | ||
} | ||
String modelName = args[1]; | ||
InferBase model = null; | ||
switch(modelName) { | ||
case "ObjectDetection": | ||
runBatch = true; | ||
ObjectDetectionBenchmark inst = new ObjectDetectionBenchmark(); | ||
parse(inst, args); | ||
model = inst; | ||
default: | ||
System.err.println("Model name not found! " + modelName); | ||
System.exit(1); | ||
} | ||
List<Context> context = getContext(); | ||
if (System.getenv().containsKey("SCALA_TEST_ON_GPU") && | ||
Integer.valueOf(System.getenv("SCALA_TEST_ON_GPU")) == 1) { | ||
context.add(Context.gpu()); | ||
} else { | ||
context.add(Context.cpu()); | ||
} | ||
|
||
long[] result = new long[model.numRun]; | ||
model.preProcessModel(context); | ||
if (runBatch) { | ||
for (int i =0;i < model.numRun; i++) { | ||
long currTime = System.nanoTime(); | ||
model.runBatchInference(); | ||
result[i] = System.nanoTime() - currTime; | ||
} | ||
System.out.println("Batchsize: " + model.batchSize); | ||
System.out.println("Num of runs: " + model.numRun); | ||
printStatistics(result, modelName +"batch_inference"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can remove the modelName from the second parameter as well here. |
||
} | ||
|
||
model.batchSize = 1; | ||
model.preProcessModel(context); | ||
result = new long[model.numRun]; | ||
for (int i = 0; i < model.numRun; i++) { | ||
long currTime = System.nanoTime(); | ||
model.runSingleInference(); | ||
result[i] = System.nanoTime() - currTime; | ||
} | ||
System.out.println("Num of runs: " + model.numRun); | ||
printStatistics(result, modelName + "single_inference"); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.mxnetexamples.javaapi.benchmark; | ||
|
||
import org.apache.mxnet.infer.javaapi.ObjectDetector; | ||
import org.apache.mxnet.javaapi.*; | ||
import org.kohsuke.args4j.Option; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
class ObjectDetectionBenchmark extends InferBase { | ||
@Option(name = "--model-path-prefix", usage = "input model directory and prefix of the model") | ||
public String modelPathPrefix = "/model/ssd_resnet50_512"; | ||
@Option(name = "--input-image", usage = "the input image") | ||
public String inputImagePath = "/images/dog.jpg"; | ||
|
||
private ObjectDetector objDet; | ||
private NDArray img; | ||
private NDArray$ NDArray = NDArray$.MODULE$; | ||
|
||
public void preProcessModel(List<Context> context) { | ||
Shape inputShape = new Shape(new int[] {this.batchSize, 3, 512, 512}); | ||
List<DataDesc> inputDescriptors = new ArrayList<>(); | ||
inputDescriptors.add(new DataDesc("data", inputShape, DType.Float32(), "NCHW")); | ||
objDet = new ObjectDetector(modelPathPrefix, inputDescriptors, context, 0); | ||
img = ObjectDetector.bufferedImageToPixels( | ||
ObjectDetector.reshapeImage( | ||
ObjectDetector.loadImageFromFile(inputImagePath), 512, 512 | ||
), | ||
new Shape(new int[] {1, 3, 512, 512}) | ||
); | ||
} | ||
|
||
public void runSingleInference() { | ||
List<NDArray> nd = new ArrayList<>(); | ||
nd.add(img); | ||
objDet.objectDetectWithNDArray(nd, 3); | ||
} | ||
|
||
public void runBatchInference() { | ||
List<NDArray> nd = new ArrayList<>(); | ||
NDArray[] temp = new NDArray[batchSize]; | ||
for (int i = 0; i < batchSize; i++) temp[i] = img.copy(); | ||
NDArray batched = NDArray.concat(temp, batchSize).setdim(0).invoke().get(); | ||
nd.add(batched); | ||
objDet.objectDetectWithNDArray(nd, 3); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,7 +21,7 @@ package org.apache.mxnet.infer.javaapi | |
import java.awt.image.BufferedImage | ||
// scalastyle:on | ||
|
||
import org.apache.mxnet.javaapi.{Context, DataDesc, NDArray} | ||
import org.apache.mxnet.javaapi.{Context, DataDesc, NDArray, Shape} | ||
|
||
import scala.collection.JavaConverters | ||
import scala.collection.JavaConverters._ | ||
|
@@ -113,6 +113,14 @@ object ObjectDetector { | |
org.apache.mxnet.infer.ImageClassifier.loadImageFromFile(inputImagePath) | ||
} | ||
|
||
def reshapeImage(img : BufferedImage, newWidth: Int, newHeight: Int): BufferedImage = { | ||
org.apache.mxnet.infer.ImageClassifier.reshapeImage(img, newWidth, newHeight) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So I'm not sure whether or not we should add these to the public API for ObjectDetector. I made this mistake with loadImageFromFile earlier apparently because we don't yet have ImageClassifier in the Java API. In the Scala version, the objectDetector benchmark and example call loadImageFromFile, reshapeImage, and bufferedImageToPixels directly from the ImageClassifier class. We're not able to do that from here yet but even if we could it seems odd to do so. Maybe we should move them to a utils class or something? What's everyone else think about how we should handle this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @andrewfayres I am expecting this: https://github.com/apache/incubator-mxnet/blob/master/scala-package/core/src/main/scala/org/apache/mxnet/Image.scala would eventually help and replace the current JavaIO we have. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is OK to keep this method since this class is anyway like a utility. |
||
} | ||
|
||
def bufferedImageToPixels(resizedImage: BufferedImage, inputImageShape: Shape): NDArray = { | ||
org.apache.mxnet.infer.ImageClassifier.bufferedImageToPixels(resizedImage, inputImageShape) | ||
} | ||
|
||
def loadInputBatch(inputImagePaths: java.util.List[String]): java.util.List[BufferedImage] = { | ||
org.apache.mxnet.infer.ImageClassifier | ||
.loadInputBatch(inputImagePaths.asScala.toList).toList.asJava | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A good logic to drop head and tail would be to say, check for a minimum length of the array ( let's say 10 or 20 or 50) and then drop the first 10% for head and last 10% for tail.
Hardcoding it to remove 1 item for head and 1 for tail might not prove to be statistically significant for a large array size.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The reason to remove the first and last is due to the fact that: first GPU inference would introduce Pre-memory allocation and that would make the only first inference takes 10x times than the other. The last one also contains a similar problem with CPU.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree with @piyushghai, i think it would make sense to drop max(x%, 1), wdyt ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't have a statistical reason for x%, x depends on what you are measuring. I don't know if 10% is the right answer.
What i have a seen is that there is a warm up time(for example loading cache, etc.) and cool down period( evicting cache as an example) during the start and end during performance runs, so you tend to drop a certain percentage of it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've witnessed the warmup period during training, I don't doubt there is some effect during inference but I'd be surprised if it's very prominent. If there is one, there's no reason that comes to mind as to why it would depend upon the number of inferences you're performing. Sure it could depend on the example pool size but given the same number of available examples, the warmup for 100 inferences should be the same as 100k. Removing max(x%) in both of these cases would remove the warmup in the 100 case and remove warmup + outliers in the 100k.
I don't see a reason to add rules in an attempt to account for things that we don't really understand. I would vote that we measure every run. We've got a P90, if the assumption about a warmup/cool down is true then it'll be in the 10% that gets trimmed there.