mlcommons · anhappdev · Mar 4, 2024 · Oct 6, 2023 · Oct 6, 2023 · Nov 8, 2023
@@ -1,6 +1,7 @@
 workspace(name = "mlperf_app")
 
 load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+load("@bazel_tools//tools/build_defs/repo:git.bzl", "new_git_repository")
 
 http_archive(
     name = "bazel_skylib",
@@ -88,17 +89,14 @@ http_archive(
     urls = ["https://github.com/MediaTek-NeuroPilot/tflite-neuron-delegate/archive/refs/heads/update_for_dujac.zip"],
 )
 
-http_archive(
+new_git_repository(
     name = "org_mlperf_inference",
     build_file = "@//flutter/android/third_party:loadgen.BUILD",
+    commit = "238d035ab41d7ddd390b35471af169ea641380f6",
     patch_args = ["-p1"],
     patch_cmds = ["python3 loadgen/version_generator.py loadgen/version_generated.cc loadgen"],
     patches = [],
-    sha256 = "e664f980e84fcab3573447c0cc3adddd1fcf900367c5dcbff17179ece24c484e",
-    strip_prefix = "inference-2da0c52666e21e4b296b09e1dbd287bf3a814e96",
-    urls = [
-        "https://github.com/mlcommons/inference/archive/2da0c52666e21e4b296b09e1dbd287bf3a814e96.tar.gz",
-    ],
+    remote = "https://github.com/mlcommons/inference.git",
 )
 
 # This is required to pass SNPE SDK path from external environment to sources,

@@ -76,12 +76,22 @@ If you enable Submission mode, both `performance_run` and `accuracy_run` values
     Actual duration of the benchmark in seconds from start to finish.
   * `measured_samples`: integer number  
     Actual number of samples evaluated during the benchmark
-  * `loadgen_info`: map  
+  * `loadgen`: map  
     Info provided by loadgen. May be null for accuracy runs.
-    * `validity`: bool  
-      Indicates whether all constraints were satisfied or not.
-    * `duration`: floating point number  
-      Duration of the benchmark without loadgen overhead in seconds.
+    * `queryCount`: bool  
+      Number of queries performed.
+    * `latencyMean`: bool  
+      Mean latency in seconds.
+    * `latency90`: bool  
+      90th percentile in seconds.
+    * `isMinDurationMet`: bool
+      Indicates whether the min duration condition is met or not.
+    * `isMinQueryMet`: bool
+      Indicates whether the min query condition is met or not.
+    * `isEarlyStoppingMet`: bool
+      Indicates whether the early stopping condition is met or not.
+    * `isResultValid`: bool  
+      Indicates whether the result is valid or not.
   * `start_datetime`: string  
     Datetime of the moment when benchmark started  
     Format is Iso 8601 in UTC timezone: `2022-04-14T03:54:54.687Z`

@@ -105,6 +105,7 @@ void MlperfDriver::RunMLPerfTest(const std::string& mode, int min_query_count,
   // See https://github.com/mlcommons/inference/issues/1397
   mlperf_settings.max_duration_ms =
       static_cast<uint64_t>(std::ceil(max_duration * 1000.0));
+  mlperf_settings.enforce_max_duration = true;
 
   if (scenario_ == "Offline") {
     mlperf_settings.scenario = ::mlperf::TestScenario::Offline;

@@ -46,7 +46,7 @@ const Map<String, Map<String, Interval>> _imageClassification = {
     _kPixel6: Interval(min: 800, max: 1100),
   },
   _kQtiBackend: {
-    _kS22Ultra: Interval(min: 1900, max: 2400),
+    _kS22Ultra: Interval(min: 1700, max: 2400),
   },
   _kMediatekBackend: {
     _kDN2103: Interval(min: 30, max: 55),
@@ -70,7 +70,7 @@ const Map<String, Map<String, Interval>> _objectDetection = {
     _kPixel6: Interval(min: 250, max: 450),
   },
   _kQtiBackend: {
-    _kS22Ultra: Interval(min: 800, max: 1400),
+    _kS22Ultra: Interval(min: 700, max: 1400),
   },
   _kMediatekBackend: {
     _kDN2103: Interval(min: 120, max: 210),
@@ -94,7 +94,7 @@ const Map<String, Map<String, Interval>> _imageSegmentationV2 = {
     _kPixel6: Interval(min: 100, max: 180),
   },
   _kQtiBackend: {
-    _kS22Ultra: Interval(min: 450, max: 700),
+    _kS22Ultra: Interval(min: 400, max: 700),
   },
   _kMediatekBackend: {
     _kDN2103: Interval(min: 45, max: 70),
@@ -119,7 +119,7 @@ const Map<String, Map<String, Interval>> _naturalLanguageProcessing = {
     _kPixel6: Interval(min: 2, max: 75),
   },
   _kQtiBackend: {
-    _kS22Ultra: Interval(min: 120, max: 180),
+    _kS22Ultra: Interval(min: 100, max: 180),
   },
   _kMediatekBackend: {
     _kDN2103: Interval(min: 1, max: 6),
@@ -143,7 +143,7 @@ const Map<String, Map<String, Interval>> _superResolution = {
     _kPixel6: Interval(min: 10, max: 14),
   },
   _kQtiBackend: {
-    _kS22Ultra: Interval(min: 35, max: 55),
+    _kS22Ultra: Interval(min: 25, max: 55),
   },
   _kMediatekBackend: {
     _kDN2103: Interval(min: 5, max: 15),
@@ -167,7 +167,7 @@ const Map<String, Map<String, Interval>> _imageClassificationOffline = {
     _kPixel6: Interval(min: 1000, max: 1700),
   },
   _kQtiBackend: {
-    _kS22Ultra: Interval(min: 2600, max: 3500),
+    _kS22Ultra: Interval(min: 2500, max: 3500),
   },
   _kMediatekBackend: {
     _kDN2103: Interval(min: 75, max: 140),

@@ -1,19 +1,28 @@
 import 'dart:convert';
 import 'dart:io';
 
+import 'package:json_annotation/json_annotation.dart';
+
+part 'loadgen_info.g.dart';
+
+@JsonSerializable(fieldRename: FieldRename.snake)
 class LoadgenInfo {
-  // Mean latency in seconds
-  final double meanLatency;
   final int queryCount;
-  // 90th percentile in seconds
-  final double latency90;
-  final bool validity;
+  final double latencyMean; // Mean latency in seconds
+  final double latency90; // 90th percentile in seconds
+  final bool isMinDurationMet;
+  final bool isMinQueryMet;
+  final bool isEarlyStoppingMet;
+  final bool isResultValid;
 
   LoadgenInfo({
-    required this.meanLatency,
     required this.queryCount,
+    required this.latencyMean,
     required this.latency90,
-    required this.validity,
+    required this.isMinDurationMet,
+    required this.isMinQueryMet,
+    required this.isEarlyStoppingMet,
+    required this.isResultValid,
   });
 
   static Future<LoadgenInfo?> fromFile({required String filepath}) {
@@ -57,6 +66,10 @@ class LoadgenInfo {
     const latency90Key = 'result_90.00_percentile_latency_ns';
     // https://github.com/mlcommons/inference/blob/318cb131c0adf3bffcbc3379a502f40891331c54/loadgen/loadgen.cc#L1028-L1029
     const validityKey = 'result_validity';
+    // https://github.com/mlcommons/inference/blob/318cb131c0adf3bffcbc3379a502f40891331c54/loadgen/loadgen.cc#L1033C23-L1035
+    const minDurationMetKey = 'result_min_duration_met';
+    const minQueriesMetKey = 'result_min_queries_met';
+    const earlyStoppingMetKey = 'early_stopping_met';
 
     final result = await extractKeys(
       logLines: logLines,
@@ -65,22 +78,33 @@ class LoadgenInfo {
         queryCountKey,
         latency90Key,
         validityKey,
+        minDurationMetKey,
+        minQueriesMetKey,
+        earlyStoppingMetKey,
       },
     );
 
     if (result.isEmpty) {
       return null;
     }
 
-    final validity = result[validityKey] as String == 'VALID';
+    final isResultValid = result[validityKey] as String == 'VALID';
 
     const nanosecondsPerSecond = 1000 * Duration.microsecondsPerSecond;
 
     return LoadgenInfo(
-      meanLatency: (result[latencyKey] as int) / nanosecondsPerSecond,
       queryCount: result[queryCountKey] as int,
+      latencyMean: (result[latencyKey] as int) / nanosecondsPerSecond,
       latency90: (result[latency90Key] as int) / nanosecondsPerSecond,
-      validity: validity,
+      isMinDurationMet: result[minDurationMetKey] as bool,
+      isMinQueryMet: result[minQueriesMetKey] as bool,
+      isEarlyStoppingMet: result[earlyStoppingMetKey] as bool,
+      isResultValid: isResultValid,
     );
   }
+
+  factory LoadgenInfo.fromJson(Map<String, dynamic> json) =>
+      _$LoadgenInfoFromJson(json);
+
+  Map<String, dynamic> toJson() => _$LoadgenInfoToJson(this);
 }
@@ -2,6 +2,7 @@ import 'package:collection/collection.dart';
 
 import 'package:mlperfbench/app_constants.dart';
 import 'package:mlperfbench/backend/bridge/run_settings.dart';
+import 'package:mlperfbench/backend/loadgen_info.dart';
 import 'package:mlperfbench/benchmark/info.dart';
 import 'package:mlperfbench/benchmark/run_mode.dart';
 import 'package:mlperfbench/data/results/benchmark_result.dart';
@@ -19,17 +20,18 @@ class BenchmarkResult {
   final String acceleratorName;
   final String delegateName;
   final int batchSize;
-  final bool validity;
-
-  BenchmarkResult(
-      {required this.throughput,
-      required this.accuracy,
-      required this.accuracy2,
-      required this.backendName,
-      required this.acceleratorName,
-      required this.delegateName,
-      required this.batchSize,
-      required this.validity});
+  final LoadgenInfo? loadgenInfo;
+
+  BenchmarkResult({
+    required this.throughput,
+    required this.accuracy,
+    required this.accuracy2,
+    required this.backendName,
+    required this.acceleratorName,
+    required this.delegateName,
+    required this.batchSize,
+    required this.loadgenInfo,
+  });
 }
 
 class Benchmark {

@@ -44,7 +44,7 @@ Future<void> main() async {
   makeNullable(definitions['Run']['properties']['throughput']);
   makeNullable(definitions['Run']['properties']['accuracy']);
   makeNullable(definitions['Run']['properties']['accuracy2']);
-  makeNullable(definitions['Run']['properties']['loadgen_info']);
+  makeNullable(definitions['Run']['properties']['loadgen']);
   makeNullable(definitions['Value']['properties']['android']);
   makeNullable(definitions['Value']['properties']['ios']);
   makeNullable(definitions['Value']['properties']['windows']);

@@ -1,5 +1,6 @@
 import 'package:uuid/uuid.dart';
 
+import 'package:mlperfbench/backend/loadgen_info.dart';
 import 'package:mlperfbench/data/build_info/build_info.dart';
 import 'package:mlperfbench/data/environment/env_android.dart';
 import 'package:mlperfbench/data/environment/env_ios.dart';
@@ -37,9 +38,14 @@ class SampleGenerator {
         measuredDuration: 123.456,
         measuredSamples: 8,
         startDatetime: DateTime.now(),
-        loadgenInfo: BenchmarkLoadgenInfo(
-          duration: 10.6,
-          validity: true,
+        loadgenInfo: LoadgenInfo(
+          queryCount: 12345,
+          latencyMean: 0.123,
+          latency90: 0.123,
+          isMinDurationMet: true,
+          isMinQueryMet: true,
+          isEarlyStoppingMet: true,
+          isResultValid: true,
         ),
       );
 

@@ -1,5 +1,6 @@
 import 'package:json_annotation/json_annotation.dart';
 
+import 'package:mlperfbench/backend/loadgen_info.dart';
 import 'package:mlperfbench/data/results/backend_info.dart';
 import 'package:mlperfbench/data/results/backend_settings.dart';
 import 'package:mlperfbench/data/results/dataset_info.dart';
@@ -108,22 +109,6 @@ class Accuracy implements Comparable<Accuracy> {
   String toUIString() => (normalized * 100).toStringAsFixed(1);
 }
 
-@JsonSerializable(fieldRename: FieldRename.snake)
-class BenchmarkLoadgenInfo {
-  final bool validity;
-  final double duration;
-
-  BenchmarkLoadgenInfo({
-    required this.validity,
-    required this.duration,
-  });
-
-  factory BenchmarkLoadgenInfo.fromJson(Map<String, dynamic> json) =>
-      _$BenchmarkLoadgenInfoFromJson(json);
-
-  Map<String, dynamic> toJson() => _$BenchmarkLoadgenInfoToJson(this);
-}
-
 @JsonSerializable(fieldRename: FieldRename.snake)
 class BenchmarkRunResult {
   final Throughput? throughput;
@@ -133,7 +118,8 @@ class BenchmarkRunResult {
   final double measuredDuration;
   final int measuredSamples;
   final DateTime startDatetime;
-  final BenchmarkLoadgenInfo? loadgenInfo;
+  @JsonKey(name: 'loadgen')
+  final LoadgenInfo? loadgenInfo;
 
   BenchmarkRunResult({
     required this.throughput,

@@ -1,6 +1,5 @@
 import 'package:mlperfbench/backend/bridge/run_result.dart';
 import 'package:mlperfbench/backend/list.dart';
-import 'package:mlperfbench/backend/loadgen_info.dart';
 import 'package:mlperfbench/benchmark/benchmark.dart';
 import 'package:mlperfbench/benchmark/run_info.dart';
 import 'package:mlperfbench/benchmark/run_mode.dart';
@@ -71,17 +70,7 @@ class ResultHelper {
       measuredDuration: result.duration,
       measuredSamples: result.numSamples,
       startDatetime: result.startTime,
-      loadgenInfo: _makeLoadgenInfo(info.loadgenInfo),
-    );
-  }
-
-  BenchmarkLoadgenInfo? _makeLoadgenInfo(LoadgenInfo? source) {
-    if (source == null) {
-      return null;
-    }
-    return BenchmarkLoadgenInfo(
-      validity: source.validity,
-      duration: source.meanLatency * source.queryCount,
+      loadgenInfo: info.loadgenInfo,
     );
   }
 

@@ -121,7 +121,7 @@ class ResultManager {
       acceleratorName: export.backendInfo.acceleratorName,
       delegateName: export.backendSettings.delegate,
       batchSize: export.backendSettings.batchSize,
-      validity: runResult.loadgenInfo?.validity ?? false,
+      loadgenInfo: runResult.loadgenInfo,
     );
   }