RUBY-3315 BSON benchmark scoring (#2773)

jamis · web-flow · commit 532033695460 · 2023-08-22T08:19:50.000-06:00
* RUBY-3314 Implement variable iterations for benchmarks * report percentiles along with the median * rename Benchmarking::Micro to Benchmarking::BSON * refactoring to appease rubocop * RUBY-3315 benchmark scoring * fix merge artifact
diff --git a/profile/benchmarking/bson.rb b/profile/benchmarking/bson.rb
@@ -14,6 +14,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+require_relative 'percentiles'
+require_relative 'summary'
+
 module Mongo
   module Benchmarking
     # These tests focus on BSON encoding and decoding; they are client-side only and
@@ -43,6 +46,20 @@ def run_all(map)
         end
       end
 
+      # As defined by the spec, the score for a given benchmark is the
+      # size of the task (in MB) divided by the median wall clock time.
+      #
+      # @param [ Symbol ] type the type of the task
+      # @param [ Mongo::Benchmarking::Percentiles ] percentiles the Percentiles
+      #   object to query for the median time.
+      # @param [ Numeric ] scale the number of times the operation is performed
+      #   per iteration, used to scale the task size.
+      #
+      # @return [ Numeric ] the score for the given task.
+      def score_for(type, percentiles, scale: 10_000)
+        task_size(type, scale) / percentiles[50]
+      end
+
       # Run a BSON benchmark test.
       #
       # @example Run a test.
@@ -51,10 +68,14 @@ def run_all(map)
       # @param [ Symbol ] type The type of test to run.
       # @param [ :encode | :decode ] action The action to perform.
       #
-      # @return [ Array<Number> ] The test results for each iteration
+      # @return [ Hash<:timings,:percentiles,:score> ] The test results for
+      #    the requested benchmark.
       def run(type, action)
-        file_path = File.join(Benchmarking::DATA_PATH, "#{type}_bson.json")
-        Benchmarking.without_gc { send(action, file_path) }
+        timings = Benchmarking.without_gc { send(action, file_for(type)) }
+        percentiles = Percentiles.new(timings)
+        score = score_for(type, percentiles)
+
+        Summary.new(timings, percentiles, score)
       end
 
       # Run an encoding BSON benchmark test.
@@ -95,6 +116,36 @@ def decode(file_name)
           end
         end
       end
+
+      private
+
+      # The path to the source file for the given task type.
+      #
+      # @param [ Symbol ] type the task type
+      #
+      # @return [ String ] the path to the source file.
+      def file_for(type)
+        File.join(Benchmarking::DATA_PATH, "#{type}_bson.json")
+      end
+
+      # As defined by the spec, the size of a BSON task is the size of the
+      # file, multipled by the scale (the number of times the file is processed
+      # per iteration), divided by a million.
+      #
+      # "the dataset size for a task is the size of the single-document source
+      # file...times 10,000 operations"
+      #
+      # "Each task will have defined for it an associated size in
+      # megabytes (MB)"
+      #
+      # @param [ Symbol ] type the type of the task
+      # @param [ Numeric ] scale the number of times the operation is performed
+      #   per iteration (e.g. 10,000)
+      #
+      # @return [ Numeric ] the score for the task, reported in MB
+      def task_size(type, scale)
+        File.size(file_for(type)) * scale / 1_000_000.0
+      end
     end
   end
 end
diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb
@@ -89,10 +89,11 @@ def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_
     def report(results, indent: 0, percentiles: [ 10, 25, 50, 75, 90, 95, 98, 99 ])
       results.each do |key, value|
         puts format('%*s%s:', indent, '', key)
-        if value.is_a?(Hash)
-          report(value, indent: indent + 2, percentiles: percentiles)
+
+        if value.respond_to?(:summary)
+          puts value.summary(indent + 2, percentiles)
         else
-          report_result(value, indent, percentiles)
+          report(value, indent: indent + 2, percentiles: percentiles)
         end
       end
     end
@@ -143,21 +144,5 @@ def without_gc
     ensure
       GC.enable
     end
-
-    private
-
-    # Formats and displays the results of a single benchmark run.
-    #
-    # @param [ Array<Numeric> ] results the results to report
-    # @param [ Integer ] indent how much the report should be indented
-    # @param [ Array<Numeric> ] percentiles the percentiles to report
-    def report_result(results, indent, percentiles)
-      ps = Percentiles.new(results)
-      puts format('%*smedian: %g', indent + 2, '', ps[50])
-      puts format('%*spercentiles:', indent + 2, '')
-      percentiles.each do |pct|
-        puts format('%*s%g: %g', indent + 4, '', pct, ps[pct])
-      end
-    end
   end
 end
diff --git a/profile/benchmarking/percentiles.rb b/profile/benchmarking/percentiles.rb
@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+
+module Mongo
+  module Benchmarking
+    # A utility class for returning the list item at a given percentile
+    # value.
+    class Percentiles
+      # @return [ Array<Number> ] the sorted list of numbers to consider
+      attr_reader :list
+
+      # Create a new Percentiles object that encapsulates the given list of
+      # numbers.
+      #
+      # @param [ Array<Number> ] list the list of numbers to considier
+      def initialize(list)
+        @list = list.sort
+      end
+
+      # Finds and returns the element in the list that represents the given
+      # percentile value.
+      #
+      # @param [ Number ] percentile a number in the range [1,100]
+      #
+      # @return [ Number ] the element of the list for the given percentile.
+      def [](percentile)
+        i = (list.size * percentile / 100.0).ceil - 1
+        list[i]
+      end
+    end
+  end
+end
diff --git a/profile/benchmarking/summary.rb b/profile/benchmarking/summary.rb
@@ -0,0 +1,56 @@
+# frozen_string_literal: true
+
+module Mongo
+  module Benchmarking
+    # A utility class for encapsulating the summary information for a
+    # benchmark, including behaviors for reporting on the summary.
+    class Summary
+      # @return [ Array<Numeric> ] the timings of each iteration in the
+      #   benchmark
+      attr_reader :timings
+
+      # @return [ Percentiles ] the percentiles object for querying the
+      #   timing at a given percentile value.
+      attr_reader :percentiles
+
+      # @return [ Numeric ] the composite score for the benchmark
+      attr_reader :score
+
+      # Construct a new Summary object with the given timings, percentiles,
+      # and score.
+      #
+      # @param [ Array<Numeric> ] timings the timings of each iteration in the
+      #   benchmark
+      # @param [ Percentiles ] percentiles the percentiles object for querying
+      #   the timing at a given percentile value
+      # @param [ Numeric ] score the composite score for the benchmark
+      def initialize(timings, percentiles, score)
+        @timings = timings
+        @percentiles = percentiles
+        @score = score
+      end
+
+      # @return [ Numeric ] the median timing for the benchmark.
+      def median
+        percentiles[50]
+      end
+
+      # Formats and displays the results of a single benchmark run.
+      #
+      # @param [ Integer ] indent how much the report should be indented
+      # @param [ Array<Numeric> ] points the percentile points to report
+      #
+      # @return [ String ] a YAML-formatted summary
+      def summary(indent, points)
+        [].tap do |lines|
+          lines << format('%*sscore: %g', indent, '', score)
+          lines << format('%*smedian: %g', indent, '', median)
+          lines << format('%*spercentiles:', indent, '')
+          points.each do |pct|
+            lines << format('%*s%g: %g', indent + 2, '', pct, percentiles[pct])
+          end
+        end.join("\n")
+      end
+    end
+  end
+end