Skip to content

Commit 5320336

Browse files
authored
RUBY-3315 BSON benchmark scoring (#2773)
* RUBY-3314 Implement variable iterations for benchmarks * report percentiles along with the median * rename Benchmarking::Micro to Benchmarking::BSON * refactoring to appease rubocop * RUBY-3315 benchmark scoring * fix merge artifact
1 parent 0f866fd commit 5320336

File tree

4 files changed

+145
-22
lines changed

4 files changed

+145
-22
lines changed

profile/benchmarking/bson.rb

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616

17+
require_relative 'percentiles'
18+
require_relative 'summary'
19+
1720
module Mongo
1821
module Benchmarking
1922
# These tests focus on BSON encoding and decoding; they are client-side only and
@@ -43,6 +46,20 @@ def run_all(map)
4346
end
4447
end
4548

49+
# As defined by the spec, the score for a given benchmark is the
50+
# size of the task (in MB) divided by the median wall clock time.
51+
#
52+
# @param [ Symbol ] type the type of the task
53+
# @param [ Mongo::Benchmarking::Percentiles ] percentiles the Percentiles
54+
# object to query for the median time.
55+
# @param [ Numeric ] scale the number of times the operation is performed
56+
# per iteration, used to scale the task size.
57+
#
58+
# @return [ Numeric ] the score for the given task.
59+
def score_for(type, percentiles, scale: 10_000)
60+
task_size(type, scale) / percentiles[50]
61+
end
62+
4663
# Run a BSON benchmark test.
4764
#
4865
# @example Run a test.
@@ -51,10 +68,14 @@ def run_all(map)
5168
# @param [ Symbol ] type The type of test to run.
5269
# @param [ :encode | :decode ] action The action to perform.
5370
#
54-
# @return [ Array<Number> ] The test results for each iteration
71+
# @return [ Hash<:timings,:percentiles,:score> ] The test results for
72+
# the requested benchmark.
5573
def run(type, action)
56-
file_path = File.join(Benchmarking::DATA_PATH, "#{type}_bson.json")
57-
Benchmarking.without_gc { send(action, file_path) }
74+
timings = Benchmarking.without_gc { send(action, file_for(type)) }
75+
percentiles = Percentiles.new(timings)
76+
score = score_for(type, percentiles)
77+
78+
Summary.new(timings, percentiles, score)
5879
end
5980

6081
# Run an encoding BSON benchmark test.
@@ -95,6 +116,36 @@ def decode(file_name)
95116
end
96117
end
97118
end
119+
120+
private
121+
122+
# The path to the source file for the given task type.
123+
#
124+
# @param [ Symbol ] type the task type
125+
#
126+
# @return [ String ] the path to the source file.
127+
def file_for(type)
128+
File.join(Benchmarking::DATA_PATH, "#{type}_bson.json")
129+
end
130+
131+
# As defined by the spec, the size of a BSON task is the size of the
132+
# file, multipled by the scale (the number of times the file is processed
133+
# per iteration), divided by a million.
134+
#
135+
# "the dataset size for a task is the size of the single-document source
136+
# file...times 10,000 operations"
137+
#
138+
# "Each task will have defined for it an associated size in
139+
# megabytes (MB)"
140+
#
141+
# @param [ Symbol ] type the type of the task
142+
# @param [ Numeric ] scale the number of times the operation is performed
143+
# per iteration (e.g. 10,000)
144+
#
145+
# @return [ Numeric ] the score for the task, reported in MB
146+
def task_size(type, scale)
147+
File.size(file_for(type)) * scale / 1_000_000.0
148+
end
98149
end
99150
end
100151
end

profile/benchmarking/helper.rb

Lines changed: 4 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,11 @@ def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_
8989
def report(results, indent: 0, percentiles: [ 10, 25, 50, 75, 90, 95, 98, 99 ])
9090
results.each do |key, value|
9191
puts format('%*s%s:', indent, '', key)
92-
if value.is_a?(Hash)
93-
report(value, indent: indent + 2, percentiles: percentiles)
92+
93+
if value.respond_to?(:summary)
94+
puts value.summary(indent + 2, percentiles)
9495
else
95-
report_result(value, indent, percentiles)
96+
report(value, indent: indent + 2, percentiles: percentiles)
9697
end
9798
end
9899
end
@@ -143,21 +144,5 @@ def without_gc
143144
ensure
144145
GC.enable
145146
end
146-
147-
private
148-
149-
# Formats and displays the results of a single benchmark run.
150-
#
151-
# @param [ Array<Numeric> ] results the results to report
152-
# @param [ Integer ] indent how much the report should be indented
153-
# @param [ Array<Numeric> ] percentiles the percentiles to report
154-
def report_result(results, indent, percentiles)
155-
ps = Percentiles.new(results)
156-
puts format('%*smedian: %g', indent + 2, '', ps[50])
157-
puts format('%*spercentiles:', indent + 2, '')
158-
percentiles.each do |pct|
159-
puts format('%*s%g: %g', indent + 4, '', pct, ps[pct])
160-
end
161-
end
162147
end
163148
end

profile/benchmarking/percentiles.rb

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# frozen_string_literal: true
2+
3+
module Mongo
4+
module Benchmarking
5+
# A utility class for returning the list item at a given percentile
6+
# value.
7+
class Percentiles
8+
# @return [ Array<Number> ] the sorted list of numbers to consider
9+
attr_reader :list
10+
11+
# Create a new Percentiles object that encapsulates the given list of
12+
# numbers.
13+
#
14+
# @param [ Array<Number> ] list the list of numbers to considier
15+
def initialize(list)
16+
@list = list.sort
17+
end
18+
19+
# Finds and returns the element in the list that represents the given
20+
# percentile value.
21+
#
22+
# @param [ Number ] percentile a number in the range [1,100]
23+
#
24+
# @return [ Number ] the element of the list for the given percentile.
25+
def [](percentile)
26+
i = (list.size * percentile / 100.0).ceil - 1
27+
list[i]
28+
end
29+
end
30+
end
31+
end

profile/benchmarking/summary.rb

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# frozen_string_literal: true
2+
3+
module Mongo
4+
module Benchmarking
5+
# A utility class for encapsulating the summary information for a
6+
# benchmark, including behaviors for reporting on the summary.
7+
class Summary
8+
# @return [ Array<Numeric> ] the timings of each iteration in the
9+
# benchmark
10+
attr_reader :timings
11+
12+
# @return [ Percentiles ] the percentiles object for querying the
13+
# timing at a given percentile value.
14+
attr_reader :percentiles
15+
16+
# @return [ Numeric ] the composite score for the benchmark
17+
attr_reader :score
18+
19+
# Construct a new Summary object with the given timings, percentiles,
20+
# and score.
21+
#
22+
# @param [ Array<Numeric> ] timings the timings of each iteration in the
23+
# benchmark
24+
# @param [ Percentiles ] percentiles the percentiles object for querying
25+
# the timing at a given percentile value
26+
# @param [ Numeric ] score the composite score for the benchmark
27+
def initialize(timings, percentiles, score)
28+
@timings = timings
29+
@percentiles = percentiles
30+
@score = score
31+
end
32+
33+
# @return [ Numeric ] the median timing for the benchmark.
34+
def median
35+
percentiles[50]
36+
end
37+
38+
# Formats and displays the results of a single benchmark run.
39+
#
40+
# @param [ Integer ] indent how much the report should be indented
41+
# @param [ Array<Numeric> ] points the percentile points to report
42+
#
43+
# @return [ String ] a YAML-formatted summary
44+
def summary(indent, points)
45+
[].tap do |lines|
46+
lines << format('%*sscore: %g', indent, '', score)
47+
lines << format('%*smedian: %g', indent, '', median)
48+
lines << format('%*spercentiles:', indent, '')
49+
points.each do |pct|
50+
lines << format('%*s%g: %g', indent + 2, '', pct, percentiles[pct])
51+
end
52+
end.join("\n")
53+
end
54+
end
55+
end
56+
end

0 commit comments

Comments
 (0)