distribution/lib/Standard/Test/0.0.0-dev/src/Bench.enso

from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Errors.Illegal_State.Illegal_State

## Configuration for a measurement phase or a warmup phase of a benchmark.
type Phase_Conf
    ## PRIVATE

       Arguments:
       - iterations: The number of iterations of the phase.
       - seconds: The minimal number of seconds per one iteration.
    Impl (iterations:Integer) (seconds:Integer)

    ## PRIVATE
    to_text self =
        self.iterations.to_text + " iterations, " + self.seconds.to_text + " seconds each"

    ## Validates the config and throws a Panic if it is invalid.
    validate : Nothing
    validate self =
        if self.seconds < 0 then Panic.throw (Illegal_Argument.Error "Seconds must be positive")
        if self.iterations < 0 then Panic.throw (Illegal_Argument.Error "Iterations must be positive")


## The benchmark options for a `Bench_Group`. These options roughly corresponds to the options
   defined in the JMH benchmarking library. See the JMH documentation for more details:
   https://javadoc.io/doc/org.openjdk.jmh/jmh-core/latest/org/openjdk/jmh/annotations/package-summary.html
type Bench_Options
    ## PRIVATE
    Impl (warmup:Phase_Conf) (measure:Phase_Conf)

    ## Sets the warmup phase.
    set_warmup : Phase_Conf -> Bench_Options
    set_warmup self (warm:Phase_Conf) = Bench_Options.Impl warm self.measure

    ## Sets the measurement phase.
    set_measure : Phase_Conf -> Bench_Options
    set_measure self (meas:Phase_Conf) = Bench_Options.Impl self.warmup meas

    ## PRIVATE
    to_text self = "[warmup={" + self.warmup.to_text + "}, measurement={" + self.measure.to_text + "}]"

    ## Validates the config and throws a Panic if it is invalid.
    validate : Nothing
    validate self =
        self.warmup.validate
        self.measure.validate

## Builder to create benchmark run.
type Bench_Builder
    ## PRIVATE
    Impl builder

    ## Add a group to the builder.
    group : Text -> Bench_Options -> (Group_Builder -> Any) -> Any
    group self (name:Text) (configuration:Bench_Options) fn =
        validate_name name
        b = Vector.new_builder
        fn (Group_Builder.Impl b)
        self.builder.append <| Bench.Group name configuration b.to_vector

## Builder to create a group of benchmarks.
type Group_Builder
    ## PRIVATE
    Impl builder

    ## Adds a benchmark specification to the group.

       Arguments:
       - name: The name of the benchmark. Must be a valid Java identifier.
    specify : Text -> Any -> Bench
    specify self (name:Text) ~benchmark =
        validate_name name
        self.builder.append <| Bench.Spec name (_ -> benchmark)


type Bench
    ## A set of groups of benchmarks.
    All (groups : Vector Bench)

    ## A single group of benchmarks sharing configuration.
    Group (name:Text) (configuration:Bench_Options) (specs : Vector Bench)

    ## A specific single benchmark.
    Spec (name:Text) (code : Any -> Any)

    ## Construct a Bench object.
    build : (Bench_Builder -> Any) -> Bench
    build fn =
        b = Vector.new_builder
        fn (Bench_Builder.Impl b)
        groups_vec = b.to_vector
        Bench.All groups_vec . validate

    ## The default options for benchmarks.
    options : Bench_Options
    options = Bench_Options.Impl Bench.phase_conf Bench.phase_conf

    ## Returns the default phase configuration.

       The default used for the JMH library are 5 iterations for 10 seconds
       each. However, our defaults are lower to make the benchmarks run faster.

       Arguments:
       - iterations: The number of iterations of the phase.
       - seconds: The minimal number of seconds per one iteration.
    phase_conf : Integer -> Integer -> Phase_Conf
    phase_conf iterations=2 seconds=3 =
        Phase_Conf.Impl iterations seconds

    ## Validates the benchmark and throws a Panic if it is invalid. Returns self
       if the benchmark is valid.
    validate : Bench
    validate self =
        ensure_distinct_names names =
            if names.length != names.distinct.length then Panic.throw (Illegal_Argument.Error ("Benchmark names must be unique, got: " + names.to_text))
        case self of
            Bench.All groups ->
                group_names = groups.map (_.name)
                ensure_distinct_names group_names
                groups.each _.validate
            Bench.Group name conf specs ->
                validate_name name
                conf.validate
                ensure_distinct_names <| specs.map (_.name)
            Bench.Spec name code ->
                validate_name name
                if code == Nothing then Panic.throw (Illegal_Argument.Error "Benchmark code must be specified")
        self

    ## Fold over the set of benchmarks merging using the specified function.
    fold : Any -> (Any -> Bench -> Bench -> Any) -> Any
    fold self value fn = case self of
        Bench.All groups -> groups.fold value (v-> g-> g.fold v fn)
        Bench.Group _ _ specs -> specs.fold value (v-> s-> fn v self s)
        Bench.Spec _ _ -> fn value self self

    ## Counts all the specs in the benchmark.
    total_specs : Integer
    total_specs self = self.fold 0 v-> _-> _-> v+1

    ## Estimates the runtime based on configurations.
    estimated_runtime : Duration
    estimated_runtime self =
        total_seconds = self.fold 0 acc-> group-> spec->
            single_call_runtime = case group of
                Bench.Group _ conf _ ->
                    warmup = conf.warmup.seconds * conf.warmup.iterations
                    measure = conf.measure.seconds * conf.measure.iterations
                    warmup + measure
                _ ->
                    Panic.throw (Illegal_State.Error "Encountered a specification "+spec.to_text+" outside of a group - cannot estimate runtime without knowing the configuration.")
            acc + single_call_runtime

        Duration.new seconds=total_seconds

    ## Run the specified set of benchmarks.
    run_main self =
        count = self.total_specs
        IO.println <| "Found " + count.to_text + " cases to execute (ETA " + self.estimated_runtime.to_display_text + ")"

        case get_benchmark_report_path of
            Nothing -> Nothing
            path ->
                line = 'Label,Phase,"Invocations count","Average time (ms)","Time Stdev"'
                line.write path on_existing_file=Existing_File_Behavior.Backup

        self.fold Nothing _-> g-> s->
            c = g.configuration
            bench_name = g.name + "." + s.name
            IO.println <| "Benchmarking '" + bench_name + "' with configuration: " + c.to_text
            Bench.measure bench_name c.warmup c.measure (s.code 0)

    ## Measure the amount of time it takes to execute a given computation.

       Arguments:
       - label: A name for the benchmark.
       - warmup_conf: Warmup phase configuration.
       - measure_conf: Measurement phase configuration.
       - act: The action to perform.

       > Example
         Measure a computation called "foo" with an iteration size of 2 and a number
         of iterations of 1.

             import Standard.Examples
             from Standard.Test import Bench

             example_measure =
                 Bench.measure "foo" warmup_iters=2 measurement_iters=1 Examples.get_boolean
    measure : Text -> Phase_Conf -> Phase_Conf -> Any -> Nothing
    measure (label:Text) (warmup_conf:Phase_Conf) (measure_conf:Phase_Conf) ~act =
        dry_run = Environment.get "ENSO_BENCHMARK_TEST_DRY_RUN" "False" == "True"
        case dry_run of
            True ->
                duration_ns = Bench.single_call act
                duration_ms = duration_ns / 1000000
                fmt_duration_ms = duration_ms.format "#.###"
                IO.println <| "[DRY-RUN] Benchmark '" + label + "' finished in " + fmt_duration_ms + " ms"
            False ->
                measure_start = System.nano_time
                Bench.run_phase label "Warmup" warmup_conf act
                Bench.run_phase label "Measurement" measure_conf act
                measure_end = System.nano_time
                measure_duration_ms = (measure_end - measure_start) / 1000000
                fmt_duration_ms = measure_duration_ms.format "#.###"
                IO.println <| "Benchmark '" + label + "' finished in " + fmt_duration_ms + " ms"


    ## Measure the amount of time in ns it takes to execute a given suspended
       computation.
    single_call ~act =
        start = System.nano_time
        r = Runtime.no_inline act
        end = System.nano_time
        # If the computation returned a dataflow error, we raise it to a panic - we do not want silent failures in benchmarks.
        Panic.rethrow r
        end - start

    ## Run a single phase of the benchmark.

       The total run time of the phase is computed as `conf.seconds * conf.iterations`,
       so that it is the same as in JMH.

       Arguments:
       - label: A name for the benchmark.
       - phase_name: The name of the phase.
       - conf: The phase configuration.
       - act: Method that should be measured - benchmark body.
    run_phase : Text -> Text -> Phase_Conf -> (Any -> Any) -> Nothing
    run_phase (label:Text) (phase_name:Text) (conf:Phase_Conf) ~act =
        duration_ns = conf.iterations * conf.seconds * 1000000000
        phase_start = System.nano_time
        stop_ns = phase_start + duration_ns
        durations_builder = Vector.new_builder
        go cur_ns =
            if cur_ns  > stop_ns then Nothing else
                dur = Bench.single_call act
                durations_builder.append dur
                @Tail_Call go (cur_ns + dur)
        go phase_start
        nanos_in_ms = 1000000
        durations = durations_builder.to_vector.map (x-> x / nanos_in_ms)
        stats = durations.compute_bulk [Statistic.Mean, Statistic.Standard_Deviation]
        avg = stats.first
        stddev = stats.second
        run_iters = durations.length
        phase_end = System.nano_time
        phase_duration = Duration.new nanoseconds=(phase_end - phase_start)
        Bench.summarize_phase label phase_name run_iters avg stddev phase_duration

    ## PRIVATE
       This is a very simple implementation of summarizing the benchmark
       results.

       We may want to improve it later, but it gets the job done to give us
       simple summary that can be analysed more easily than logs.
    summarize_phase (label:Text) (phase_name:Text) (invocations:Integer) (average_time:Float) (time_stddev:Float) (phase_duration:Duration) =
        avg_fmt = average_time.format "#.###"
        stddev_fmt = time_stddev.format "#.###"
        IO.println <| phase_name + " duration:    " + (phase_duration.total_milliseconds.format "#.##") + " ms"
        IO.println <| phase_name + " invocations: " + invocations.to_text
        IO.println <| phase_name + " avg time:    " + avg_fmt + " ms (+-" + stddev_fmt + ")"

        case get_benchmark_report_path of
            Nothing -> Nothing
            path ->
                line = '\n"'+label+'","'+phase_name+'",'+invocations.to_text+','+avg_fmt+','+stddev_fmt
                line.write path on_existing_file=Existing_File_Behavior.Append

## PRIVATE

   Checks whether the given name is a valid benchmark name - either group name
   or a spec name. The name should be a valid Java identifier.
   Throw a Panic error if the validation fails.
validate_name : Text -> Nothing
validate_name name =
    # Cannot start with a digit
    valid_java_identifier_regex = Regex.compile "[A-Za-z_$][a-zA-Z0-9_$]*"
    if valid_java_identifier_regex.matches name then Nothing else
        Panic.throw (Illegal_Argument.Error ("Invalid benchmark name: '" + name + "'"))

## PRIVATE
get_benchmark_report_path : Text | Nothing
get_benchmark_report_path = Environment.get "ENSO_BENCHMARK_REPORT_PATH"