Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

[MXNET-1041] Add Java benchmark #13095

Merged
merged 6 commits into from
Nov 13, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions scala-package/examples/scripts/benchmark/run_java_inference_bm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/bin/bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

set -e

hw_type=cpu
if [ "$USE_GPU" = "1" ]
then
hw_type=gpu
fi

platform=linux-x86_64

if [[ $OSTYPE = [darwin]* ]]
then
platform=osx-x86_64
fi

MXNET_ROOT=$(cd "$(dirname $0)/../../../.."; pwd)
CLASS_PATH=$MXNET_ROOT/scala-package/assembly/$platform-$hw_type/target/*:$MXNET_ROOT/scala-package/examples/target/*

java -Xmx8G -Dmxnet.traceLeakedObjects=true -cp $CLASS_PATH \
org.apache.mxnetexamples.javaapi.benchmark.JavaBenchmark $@

Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ INPUT_IMG=$2
INPUT_DIR=$3

java -Xmx8G -cp $CLASS_PATH \
org.apache.mxnetexamples.infer.javapi.objectdetector.SSDClassifierExample \
org.apache.mxnetexamples.javaapi.infer.objectdetector.SSDClassifierExample \
--model-path-prefix $MODEL_DIR \
--input-image $INPUT_IMG \
--input-dir $INPUT_DIR
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mxnetexamples.javaapi.benchmark;

import org.apache.mxnet.javaapi.Context;
import org.kohsuke.args4j.Option;

import java.util.List;

abstract class InferBase {
@Option(name = "--num-runs", usage = "Number of runs")
public int numRun = 1;
@Option(name = "--model-name", usage = "Name of the model")
public String modelName = "";
@Option(name = "--batchsize", usage = "Size of the batch")
public int batchSize = 1;

public abstract void preProcessModel(List<Context> context);
public abstract void runSingleInference();
public abstract void runBatchInference();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.mxnetexamples.javaapi.benchmark;

import org.apache.mxnet.javaapi.Context;
import org.kohsuke.args4j.CmdLineParser;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

public class JavaBenchmark {

private static boolean runBatch = false;

private static void parse(Object inst, String[] args) {
CmdLineParser parser = new CmdLineParser(inst);
try {
parser.parseArgument(args);
} catch (Exception e) {
System.err.println(e.getMessage() + e);
parser.printUsage(System.err);
System.exit(1);
}
}

private static long percentile(int p, long[] seq) {
Arrays.sort(seq);
int k = (int) Math.ceil((seq.length - 1) * (p / 100.0));
return seq[k];
}

private static void printStatistics(long[] inferenceTimesRaw, String metricsPrefix) {
long[] inferenceTimes = inferenceTimesRaw;
// remove head and tail
if (inferenceTimes.length > 2) {
inferenceTimes = Arrays.copyOfRange(inferenceTimesRaw,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A good logic to drop head and tail would be to say, check for a minimum length of the array ( let's say 10 or 20 or 50) and then drop the first 10% for head and last 10% for tail.

Hardcoding it to remove 1 item for head and 1 for tail might not prove to be statistically significant for a large array size.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason to remove the first and last is due to the fact that: first GPU inference would introduce Pre-memory allocation and that would make the only first inference takes 10x times than the other. The last one also contains a similar problem with CPU.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with @piyushghai, i think it would make sense to drop max(x%, 1), wdyt ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with @piyushghai, i think it would make sense to drop max(x%, 1), wdyt ?
@piyushghai @nswamy Bare with me, any technical reason or proof of concept for doing this? Why dropping the first 10% and the last 10% would bring a better result? Why not 20%?(under the assumption that the time cost is a normal distribution, it would be better to drop the smallest 5% and the largest 5% so we fit 2 sigma)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have a statistical reason for x%, x depends on what you are measuring. I don't know if 10% is the right answer.
What i have a seen is that there is a warm up time(for example loading cache, etc.) and cool down period( evicting cache as an example) during the start and end during performance runs, so you tend to drop a certain percentage of it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've witnessed the warmup period during training, I don't doubt there is some effect during inference but I'd be surprised if it's very prominent. If there is one, there's no reason that comes to mind as to why it would depend upon the number of inferences you're performing. Sure it could depend on the example pool size but given the same number of available examples, the warmup for 100 inferences should be the same as 100k. Removing max(x%) in both of these cases would remove the warmup in the 100 case and remove warmup + outliers in the 100k.

I don't see a reason to add rules in an attempt to account for things that we don't really understand. I would vote that we measure every run. We've got a P90, if the assumption about a warmup/cool down is true then it'll be in the 10% that gets trimmed there.

1, inferenceTimesRaw.length - 1);
}
double p50 = percentile(50, inferenceTimes) / 1.0e6;
double p99 = percentile(99, inferenceTimes) / 1.0e6;
double p90 = percentile(90, inferenceTimes) / 1.0e6;
long sum = 0;
for (long time: inferenceTimes) sum += time;
double average = sum / (inferenceTimes.length * 1.0e6);

System.out.println(
String.format("\n%s_p99 %fms\n%s_p90 %fms\n%s_p50 %fms\n%s_average %1.2fms",
metricsPrefix, p99, metricsPrefix, p90,
metricsPrefix, p50, metricsPrefix, average)
);

}

private static List<Context> getContext() {
List<Context> context = new ArrayList<Context>();
if (System.getenv().containsKey("SCALA_TEST_ON_GPU") &&
Integer.valueOf(System.getenv("SCALA_TEST_ON_GPU")) == 1) {
context.add(Context.gpu());
} else {
context.add(Context.cpu());
}
return context;
}

public static void main(String[] args) {
if (args.length < 2) {
StringBuilder sb = new StringBuilder();
sb.append("Please follow the format:");
sb.append("\n --model-name <model-name>");
sb.append("\n --num-runs <number of runs>");
sb.append("\n --batchsize <batch size>");
System.out.println(sb.toString());
return;
}
String modelName = args[1];
InferBase model = null;
switch(modelName) {
case "ObjectDetection":
runBatch = true;
ObjectDetectionBenchmark inst = new ObjectDetectionBenchmark();
parse(inst, args);
model = inst;
default:
System.err.println("Model name not found! " + modelName);
System.exit(1);
}
List<Context> context = getContext();
if (System.getenv().containsKey("SCALA_TEST_ON_GPU") &&
Integer.valueOf(System.getenv("SCALA_TEST_ON_GPU")) == 1) {
context.add(Context.gpu());
} else {
context.add(Context.cpu());
}

long[] result = new long[model.numRun];
model.preProcessModel(context);
if (runBatch) {
for (int i =0;i < model.numRun; i++) {
long currTime = System.nanoTime();
model.runBatchInference();
result[i] = System.nanoTime() - currTime;
}
System.out.println("Batchsize: " + model.batchSize);
System.out.println("Num of runs: " + model.numRun);
printStatistics(result, modelName +"batch_inference");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can remove the modelName from the second parameter as well here.
The Model Name will be aptly captured when reporting the results of the benchmark run as we'd know which script we are running it with :)

}

model.batchSize = 1;
model.preProcessModel(context);
result = new long[model.numRun];
for (int i = 0; i < model.numRun; i++) {
long currTime = System.nanoTime();
model.runSingleInference();
result[i] = System.nanoTime() - currTime;
}
System.out.println("Num of runs: " + model.numRun);
printStatistics(result, modelName + "single_inference");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.mxnetexamples.javaapi.benchmark;

import org.apache.mxnet.infer.javaapi.ObjectDetector;
import org.apache.mxnet.javaapi.*;
import org.kohsuke.args4j.Option;

import java.util.ArrayList;
import java.util.List;

class ObjectDetectionBenchmark extends InferBase {
@Option(name = "--model-path-prefix", usage = "input model directory and prefix of the model")
public String modelPathPrefix = "/model/ssd_resnet50_512";
@Option(name = "--input-image", usage = "the input image")
public String inputImagePath = "/images/dog.jpg";

private ObjectDetector objDet;
private NDArray img;
private NDArray$ NDArray = NDArray$.MODULE$;

public void preProcessModel(List<Context> context) {
Shape inputShape = new Shape(new int[] {this.batchSize, 3, 512, 512});
List<DataDesc> inputDescriptors = new ArrayList<>();
inputDescriptors.add(new DataDesc("data", inputShape, DType.Float32(), "NCHW"));
objDet = new ObjectDetector(modelPathPrefix, inputDescriptors, context, 0);
img = ObjectDetector.bufferedImageToPixels(
ObjectDetector.reshapeImage(
ObjectDetector.loadImageFromFile(inputImagePath), 512, 512
),
new Shape(new int[] {1, 3, 512, 512})
);
}

public void runSingleInference() {
List<NDArray> nd = new ArrayList<>();
nd.add(img);
objDet.objectDetectWithNDArray(nd, 3);
}

public void runBatchInference() {
List<NDArray> nd = new ArrayList<>();
NDArray[] temp = new NDArray[batchSize];
for (int i = 0; i < batchSize; i++) temp[i] = img.copy();
NDArray batched = NDArray.concat(temp, batchSize).setdim(0).invoke().get();
nd.add(batched);
objDet.objectDetectWithNDArray(nd, 3);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* limitations under the License.
*/

package org.apache.mxnetexamples.infer.javapi.objectdetector;
package org.apache.mxnetexamples.javaapi.infer.objectdetector;
lanking520 marked this conversation as resolved.
Show resolved Hide resolved

import org.apache.mxnet.infer.javaapi.ObjectDetectorOutput;
import org.kohsuke.args4j.CmdLineParser;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ package org.apache.mxnet.infer.javaapi
import java.awt.image.BufferedImage
// scalastyle:on

import org.apache.mxnet.javaapi.{Context, DataDesc, NDArray}
import org.apache.mxnet.javaapi.{Context, DataDesc, NDArray, Shape}

import scala.collection.JavaConverters
import scala.collection.JavaConverters._
Expand Down Expand Up @@ -113,6 +113,14 @@ object ObjectDetector {
org.apache.mxnet.infer.ImageClassifier.loadImageFromFile(inputImagePath)
}

def reshapeImage(img : BufferedImage, newWidth: Int, newHeight: Int): BufferedImage = {
org.apache.mxnet.infer.ImageClassifier.reshapeImage(img, newWidth, newHeight)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I'm not sure whether or not we should add these to the public API for ObjectDetector. I made this mistake with loadImageFromFile earlier apparently because we don't yet have ImageClassifier in the Java API.

In the Scala version, the objectDetector benchmark and example call loadImageFromFile, reshapeImage, and bufferedImageToPixels directly from the ImageClassifier class. We're not able to do that from here yet but even if we could it seems odd to do so. Maybe we should move them to a utils class or something? What's everyone else think about how we should handle this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is OK to keep this method since this class is anyway like a utility.

}

def bufferedImageToPixels(resizedImage: BufferedImage, inputImageShape: Shape): NDArray = {
org.apache.mxnet.infer.ImageClassifier.bufferedImageToPixels(resizedImage, inputImageShape)
}

def loadInputBatch(inputImagePaths: java.util.List[String]): java.util.List[BufferedImage] = {
org.apache.mxnet.infer.ImageClassifier
.loadInputBatch(inputImagePaths.asScala.toList).toList.asJava
Expand Down