Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add flag enforce_max_duration #798

Merged
merged 17 commits into from
Mar 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions WORKSPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
workspace(name = "mlperf_app")

load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository")

http_archive(
name = "bazel_skylib",
Expand Down Expand Up @@ -88,17 +89,14 @@ http_archive(
urls = ["https://github.com/MediaTek-NeuroPilot/tflite-neuron-delegate/archive/refs/heads/update_for_dujac.zip"],
)

http_archive(
new_git_repository(
name = "org_mlperf_inference",
build_file = "@//flutter/android/third_party:loadgen.BUILD",
commit = "238d035ab41d7ddd390b35471af169ea641380f6",
patch_args = ["-p1"],
patch_cmds = ["python3 loadgen/version_generator.py loadgen/version_generated.cc loadgen"],
patches = [],
sha256 = "e664f980e84fcab3573447c0cc3adddd1fcf900367c5dcbff17179ece24c484e",
strip_prefix = "inference-2da0c52666e21e4b296b09e1dbd287bf3a814e96",
urls = [
"https://github.com/mlcommons/inference/archive/2da0c52666e21e4b296b09e1dbd287bf3a814e96.tar.gz",
],
remote = "https://github.com/mlcommons/inference.git",
)

# This is required to pass SNPE SDK path from external environment to sources,
Expand Down
20 changes: 15 additions & 5 deletions docs/result-spec.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,22 @@ If you enable Submission mode, both `performance_run` and `accuracy_run` values
Actual duration of the benchmark in seconds from start to finish.
* `measured_samples`: integer number
Actual number of samples evaluated during the benchmark
* `loadgen_info`: map
* `loadgen`: map
Info provided by loadgen. May be null for accuracy runs.
* `validity`: bool
Indicates whether all constraints were satisfied or not.
* `duration`: floating point number
Duration of the benchmark without loadgen overhead in seconds.
* `queryCount`: bool
Number of queries performed.
* `latencyMean`: bool
Mean latency in seconds.
* `latency90`: bool
90th percentile in seconds.
* `isMinDurationMet`: bool
Indicates whether the min duration condition is met or not.
* `isMinQueryMet`: bool
Indicates whether the min query condition is met or not.
* `isEarlyStoppingMet`: bool
Indicates whether the early stopping condition is met or not.
* `isResultValid`: bool
Indicates whether the result is valid or not.
* `start_datetime`: string
Datetime of the moment when benchmark started
Format is Iso 8601 in UTC timezone: `2022-04-14T03:54:54.687Z`
Expand Down
1 change: 1 addition & 0 deletions flutter/cpp/mlperf_driver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ void MlperfDriver::RunMLPerfTest(const std::string& mode, int min_query_count,
// See https://github.com/mlcommons/inference/issues/1397
mlperf_settings.max_duration_ms =
static_cast<uint64_t>(std::ceil(max_duration * 1000.0));
mlperf_settings.enforce_max_duration = true;

if (scenario_ == "Offline") {
mlperf_settings.scenario = ::mlperf::TestScenario::Offline;
Expand Down
12 changes: 6 additions & 6 deletions flutter/integration_test/expected_throughput.dart
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ const Map<String, Map<String, Interval>> _imageClassification = {
_kPixel6: Interval(min: 800, max: 1100),
},
_kQtiBackend: {
_kS22Ultra: Interval(min: 1900, max: 2400),
_kS22Ultra: Interval(min: 1700, max: 2400),
},
_kMediatekBackend: {
_kDN2103: Interval(min: 30, max: 55),
Expand All @@ -70,7 +70,7 @@ const Map<String, Map<String, Interval>> _objectDetection = {
_kPixel6: Interval(min: 250, max: 450),
},
_kQtiBackend: {
_kS22Ultra: Interval(min: 800, max: 1400),
_kS22Ultra: Interval(min: 700, max: 1400),
},
_kMediatekBackend: {
_kDN2103: Interval(min: 120, max: 210),
Expand All @@ -94,7 +94,7 @@ const Map<String, Map<String, Interval>> _imageSegmentationV2 = {
_kPixel6: Interval(min: 100, max: 180),
},
_kQtiBackend: {
_kS22Ultra: Interval(min: 450, max: 700),
_kS22Ultra: Interval(min: 400, max: 700),
},
_kMediatekBackend: {
_kDN2103: Interval(min: 45, max: 70),
Expand All @@ -119,7 +119,7 @@ const Map<String, Map<String, Interval>> _naturalLanguageProcessing = {
_kPixel6: Interval(min: 2, max: 75),
},
_kQtiBackend: {
_kS22Ultra: Interval(min: 120, max: 180),
_kS22Ultra: Interval(min: 100, max: 180),
},
_kMediatekBackend: {
_kDN2103: Interval(min: 1, max: 6),
Expand All @@ -143,7 +143,7 @@ const Map<String, Map<String, Interval>> _superResolution = {
_kPixel6: Interval(min: 10, max: 14),
},
_kQtiBackend: {
_kS22Ultra: Interval(min: 35, max: 55),
_kS22Ultra: Interval(min: 25, max: 55),
},
_kMediatekBackend: {
_kDN2103: Interval(min: 5, max: 15),
Expand All @@ -167,7 +167,7 @@ const Map<String, Map<String, Interval>> _imageClassificationOffline = {
_kPixel6: Interval(min: 1000, max: 1700),
},
_kQtiBackend: {
_kS22Ultra: Interval(min: 2600, max: 3500),
_kS22Ultra: Interval(min: 2500, max: 3500),
},
_kMediatekBackend: {
_kDN2103: Interval(min: 75, max: 140),
Expand Down
44 changes: 34 additions & 10 deletions flutter/lib/backend/loadgen_info.dart
Original file line number Diff line number Diff line change
@@ -1,19 +1,28 @@
import 'dart:convert';
import 'dart:io';

import 'package:json_annotation/json_annotation.dart';

part 'loadgen_info.g.dart';

@JsonSerializable(fieldRename: FieldRename.snake)
class LoadgenInfo {
// Mean latency in seconds
final double meanLatency;
final int queryCount;
// 90th percentile in seconds
final double latency90;
final bool validity;
final double latencyMean; // Mean latency in seconds
final double latency90; // 90th percentile in seconds
final bool isMinDurationMet;
final bool isMinQueryMet;
final bool isEarlyStoppingMet;
final bool isResultValid;

LoadgenInfo({
required this.meanLatency,
required this.queryCount,
required this.latencyMean,
required this.latency90,
required this.validity,
required this.isMinDurationMet,
required this.isMinQueryMet,
required this.isEarlyStoppingMet,
required this.isResultValid,
});

static Future<LoadgenInfo?> fromFile({required String filepath}) {
Expand Down Expand Up @@ -57,6 +66,10 @@ class LoadgenInfo {
const latency90Key = 'result_90.00_percentile_latency_ns';
// https://github.com/mlcommons/inference/blob/318cb131c0adf3bffcbc3379a502f40891331c54/loadgen/loadgen.cc#L1028-L1029
const validityKey = 'result_validity';
// https://github.com/mlcommons/inference/blob/318cb131c0adf3bffcbc3379a502f40891331c54/loadgen/loadgen.cc#L1033C23-L1035
const minDurationMetKey = 'result_min_duration_met';
const minQueriesMetKey = 'result_min_queries_met';
const earlyStoppingMetKey = 'early_stopping_met';

final result = await extractKeys(
logLines: logLines,
Expand All @@ -65,22 +78,33 @@ class LoadgenInfo {
queryCountKey,
latency90Key,
validityKey,
minDurationMetKey,
minQueriesMetKey,
earlyStoppingMetKey,
},
);

if (result.isEmpty) {
return null;
}

final validity = result[validityKey] as String == 'VALID';
final isResultValid = result[validityKey] as String == 'VALID';

const nanosecondsPerSecond = 1000 * Duration.microsecondsPerSecond;

return LoadgenInfo(
meanLatency: (result[latencyKey] as int) / nanosecondsPerSecond,
queryCount: result[queryCountKey] as int,
latencyMean: (result[latencyKey] as int) / nanosecondsPerSecond,
latency90: (result[latency90Key] as int) / nanosecondsPerSecond,
validity: validity,
isMinDurationMet: result[minDurationMetKey] as bool,
isMinQueryMet: result[minQueriesMetKey] as bool,
isEarlyStoppingMet: result[earlyStoppingMetKey] as bool,
isResultValid: isResultValid,
);
}

factory LoadgenInfo.fromJson(Map<String, dynamic> json) =>
_$LoadgenInfoFromJson(json);

Map<String, dynamic> toJson() => _$LoadgenInfoToJson(this);
}
24 changes: 13 additions & 11 deletions flutter/lib/benchmark/benchmark.dart
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import 'package:collection/collection.dart';

import 'package:mlperfbench/app_constants.dart';
import 'package:mlperfbench/backend/bridge/run_settings.dart';
import 'package:mlperfbench/backend/loadgen_info.dart';
import 'package:mlperfbench/benchmark/info.dart';
import 'package:mlperfbench/benchmark/run_mode.dart';
import 'package:mlperfbench/data/results/benchmark_result.dart';
Expand All @@ -19,17 +20,18 @@ class BenchmarkResult {
final String acceleratorName;
final String delegateName;
final int batchSize;
final bool validity;

BenchmarkResult(
{required this.throughput,
required this.accuracy,
required this.accuracy2,
required this.backendName,
required this.acceleratorName,
required this.delegateName,
required this.batchSize,
required this.validity});
final LoadgenInfo? loadgenInfo;

BenchmarkResult({
required this.throughput,
required this.accuracy,
required this.accuracy2,
required this.backendName,
required this.acceleratorName,
required this.delegateName,
required this.batchSize,
required this.loadgenInfo,
});
}

class Benchmark {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ Future<void> main() async {
makeNullable(definitions['Run']['properties']['throughput']);
makeNullable(definitions['Run']['properties']['accuracy']);
makeNullable(definitions['Run']['properties']['accuracy2']);
makeNullable(definitions['Run']['properties']['loadgen_info']);
makeNullable(definitions['Run']['properties']['loadgen']);
makeNullable(definitions['Value']['properties']['android']);
makeNullable(definitions['Value']['properties']['ios']);
makeNullable(definitions['Value']['properties']['windows']);
Expand Down
12 changes: 9 additions & 3 deletions flutter/lib/data/generation_helpers/sample_generator.dart
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import 'package:uuid/uuid.dart';

import 'package:mlperfbench/backend/loadgen_info.dart';
import 'package:mlperfbench/data/build_info/build_info.dart';
import 'package:mlperfbench/data/environment/env_android.dart';
import 'package:mlperfbench/data/environment/env_ios.dart';
Expand Down Expand Up @@ -37,9 +38,14 @@ class SampleGenerator {
measuredDuration: 123.456,
measuredSamples: 8,
startDatetime: DateTime.now(),
loadgenInfo: BenchmarkLoadgenInfo(
duration: 10.6,
validity: true,
loadgenInfo: LoadgenInfo(
queryCount: 12345,
latencyMean: 0.123,
latency90: 0.123,
isMinDurationMet: true,
isMinQueryMet: true,
isEarlyStoppingMet: true,
isResultValid: true,
),
);

Expand Down
20 changes: 3 additions & 17 deletions flutter/lib/data/results/benchmark_result.dart
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import 'package:json_annotation/json_annotation.dart';

import 'package:mlperfbench/backend/loadgen_info.dart';
import 'package:mlperfbench/data/results/backend_info.dart';
import 'package:mlperfbench/data/results/backend_settings.dart';
import 'package:mlperfbench/data/results/dataset_info.dart';
Expand Down Expand Up @@ -108,22 +109,6 @@ class Accuracy implements Comparable<Accuracy> {
String toUIString() => (normalized * 100).toStringAsFixed(1);
}

@JsonSerializable(fieldRename: FieldRename.snake)
class BenchmarkLoadgenInfo {
final bool validity;
final double duration;

BenchmarkLoadgenInfo({
required this.validity,
required this.duration,
});

factory BenchmarkLoadgenInfo.fromJson(Map<String, dynamic> json) =>
_$BenchmarkLoadgenInfoFromJson(json);

Map<String, dynamic> toJson() => _$BenchmarkLoadgenInfoToJson(this);
}

@JsonSerializable(fieldRename: FieldRename.snake)
class BenchmarkRunResult {
final Throughput? throughput;
Expand All @@ -133,7 +118,8 @@ class BenchmarkRunResult {
final double measuredDuration;
final int measuredSamples;
final DateTime startDatetime;
final BenchmarkLoadgenInfo? loadgenInfo;
@JsonKey(name: 'loadgen')
final LoadgenInfo? loadgenInfo;

BenchmarkRunResult({
required this.throughput,
Expand Down
13 changes: 1 addition & 12 deletions flutter/lib/resources/export_result_helper.dart
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import 'package:mlperfbench/backend/bridge/run_result.dart';
import 'package:mlperfbench/backend/list.dart';
import 'package:mlperfbench/backend/loadgen_info.dart';
import 'package:mlperfbench/benchmark/benchmark.dart';
import 'package:mlperfbench/benchmark/run_info.dart';
import 'package:mlperfbench/benchmark/run_mode.dart';
Expand Down Expand Up @@ -71,17 +70,7 @@ class ResultHelper {
measuredDuration: result.duration,
measuredSamples: result.numSamples,
startDatetime: result.startTime,
loadgenInfo: _makeLoadgenInfo(info.loadgenInfo),
);
}

BenchmarkLoadgenInfo? _makeLoadgenInfo(LoadgenInfo? source) {
if (source == null) {
return null;
}
return BenchmarkLoadgenInfo(
validity: source.validity,
duration: source.meanLatency * source.queryCount,
loadgenInfo: info.loadgenInfo,
);
}

Expand Down
2 changes: 1 addition & 1 deletion flutter/lib/resources/result_manager.dart
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ class ResultManager {
acceleratorName: export.backendInfo.acceleratorName,
delegateName: export.backendSettings.delegate,
batchSize: export.backendSettings.batchSize,
validity: runResult.loadgenInfo?.validity ?? false,
loadgenInfo: runResult.loadgenInfo,
);
}

Expand Down
Loading
Loading