diff --git a/profile/benchmarking/bson.rb b/profile/benchmarking/bson.rb index 0a3667b261..88cb979ab2 100644 --- a/profile/benchmarking/bson.rb +++ b/profile/benchmarking/bson.rb @@ -14,6 +14,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +require_relative 'percentiles' +require_relative 'summary' + module Mongo module Benchmarking # These tests focus on BSON encoding and decoding; they are client-side only and @@ -43,6 +46,20 @@ def run_all(map) end end + # As defined by the spec, the score for a given benchmark is the + # size of the task (in MB) divided by the median wall clock time. + # + # @param [ Symbol ] type the type of the task + # @param [ Mongo::Benchmarking::Percentiles ] percentiles the Percentiles + # object to query for the median time. + # @param [ Numeric ] scale the number of times the operation is performed + # per iteration, used to scale the task size. + # + # @return [ Numeric ] the score for the given task. + def score_for(type, percentiles, scale: 10_000) + task_size(type, scale) / percentiles[50] + end + # Run a BSON benchmark test. # # @example Run a test. @@ -51,10 +68,14 @@ def run_all(map) # @param [ Symbol ] type The type of test to run. # @param [ :encode | :decode ] action The action to perform. # - # @return [ Array ] The test results for each iteration + # @return [ Hash<:timings,:percentiles,:score> ] The test results for + # the requested benchmark. def run(type, action) - file_path = File.join(Benchmarking::DATA_PATH, "#{type}_bson.json") - Benchmarking.without_gc { send(action, file_path) } + timings = Benchmarking.without_gc { send(action, file_for(type)) } + percentiles = Percentiles.new(timings) + score = score_for(type, percentiles) + + Summary.new(timings, percentiles, score) end # Run an encoding BSON benchmark test. @@ -95,6 +116,36 @@ def decode(file_name) end end end + + private + + # The path to the source file for the given task type. + # + # @param [ Symbol ] type the task type + # + # @return [ String ] the path to the source file. + def file_for(type) + File.join(Benchmarking::DATA_PATH, "#{type}_bson.json") + end + + # As defined by the spec, the size of a BSON task is the size of the + # file, multipled by the scale (the number of times the file is processed + # per iteration), divided by a million. + # + # "the dataset size for a task is the size of the single-document source + # file...times 10,000 operations" + # + # "Each task will have defined for it an associated size in + # megabytes (MB)" + # + # @param [ Symbol ] type the type of the task + # @param [ Numeric ] scale the number of times the operation is performed + # per iteration (e.g. 10,000) + # + # @return [ Numeric ] the score for the task, reported in MB + def task_size(type, scale) + File.size(file_for(type)) * scale / 1_000_000.0 + end end end end diff --git a/profile/benchmarking/helper.rb b/profile/benchmarking/helper.rb index 309932964c..856cb7659b 100644 --- a/profile/benchmarking/helper.rb +++ b/profile/benchmarking/helper.rb @@ -89,10 +89,11 @@ def benchmark(max_iterations: Benchmarking::TEST_REPETITIONS, min_time: 60, max_ def report(results, indent: 0, percentiles: [ 10, 25, 50, 75, 90, 95, 98, 99 ]) results.each do |key, value| puts format('%*s%s:', indent, '', key) - if value.is_a?(Hash) - report(value, indent: indent + 2, percentiles: percentiles) + + if value.respond_to?(:summary) + puts value.summary(indent + 2, percentiles) else - report_result(value, indent, percentiles) + report(value, indent: indent + 2, percentiles: percentiles) end end end @@ -143,21 +144,5 @@ def without_gc ensure GC.enable end - - private - - # Formats and displays the results of a single benchmark run. - # - # @param [ Array ] results the results to report - # @param [ Integer ] indent how much the report should be indented - # @param [ Array ] percentiles the percentiles to report - def report_result(results, indent, percentiles) - ps = Percentiles.new(results) - puts format('%*smedian: %g', indent + 2, '', ps[50]) - puts format('%*spercentiles:', indent + 2, '') - percentiles.each do |pct| - puts format('%*s%g: %g', indent + 4, '', pct, ps[pct]) - end - end end end diff --git a/profile/benchmarking/percentiles.rb b/profile/benchmarking/percentiles.rb new file mode 100644 index 0000000000..aeebe9d1d9 --- /dev/null +++ b/profile/benchmarking/percentiles.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module Mongo + module Benchmarking + # A utility class for returning the list item at a given percentile + # value. + class Percentiles + # @return [ Array ] the sorted list of numbers to consider + attr_reader :list + + # Create a new Percentiles object that encapsulates the given list of + # numbers. + # + # @param [ Array ] list the list of numbers to considier + def initialize(list) + @list = list.sort + end + + # Finds and returns the element in the list that represents the given + # percentile value. + # + # @param [ Number ] percentile a number in the range [1,100] + # + # @return [ Number ] the element of the list for the given percentile. + def [](percentile) + i = (list.size * percentile / 100.0).ceil - 1 + list[i] + end + end + end +end diff --git a/profile/benchmarking/summary.rb b/profile/benchmarking/summary.rb new file mode 100644 index 0000000000..93fddf5435 --- /dev/null +++ b/profile/benchmarking/summary.rb @@ -0,0 +1,56 @@ +# frozen_string_literal: true + +module Mongo + module Benchmarking + # A utility class for encapsulating the summary information for a + # benchmark, including behaviors for reporting on the summary. + class Summary + # @return [ Array ] the timings of each iteration in the + # benchmark + attr_reader :timings + + # @return [ Percentiles ] the percentiles object for querying the + # timing at a given percentile value. + attr_reader :percentiles + + # @return [ Numeric ] the composite score for the benchmark + attr_reader :score + + # Construct a new Summary object with the given timings, percentiles, + # and score. + # + # @param [ Array ] timings the timings of each iteration in the + # benchmark + # @param [ Percentiles ] percentiles the percentiles object for querying + # the timing at a given percentile value + # @param [ Numeric ] score the composite score for the benchmark + def initialize(timings, percentiles, score) + @timings = timings + @percentiles = percentiles + @score = score + end + + # @return [ Numeric ] the median timing for the benchmark. + def median + percentiles[50] + end + + # Formats and displays the results of a single benchmark run. + # + # @param [ Integer ] indent how much the report should be indented + # @param [ Array ] points the percentile points to report + # + # @return [ String ] a YAML-formatted summary + def summary(indent, points) + [].tap do |lines| + lines << format('%*sscore: %g', indent, '', score) + lines << format('%*smedian: %g', indent, '', median) + lines << format('%*spercentiles:', indent, '') + points.each do |pct| + lines << format('%*s%g: %g', indent + 2, '', pct, percentiles[pct]) + end + end.join("\n") + end + end + end +end