Source code for simple_benchmark

# Licensed under Apache License Version 2.0 - see LICENSE
"""
.. warning::
   This package is under active development. API changes are very likely.

This package aims to give an easy way to benchmark several functions for
different inputs and provide ways to visualize the benchmark results.

To utilize the full features (visualization and post-processing) you need to
install the optional dependencies:

- NumPy
- pandas
- matplotlib
"""

__version__ = '0.1.0'

__all__ = [
    'assert_same_results', 'assert_not_mutating_input', 'benchmark',
    'BenchmarkBuilder', 'BenchmarkResult', 'MultiArgument'
]

import collections
import copy
import datetime
import functools
import itertools
import logging
import platform
import pprint
import random
import sys
import timeit
import warnings

_DEFAULT_ARGUMENT_NAME = ''
_DEFAULT_TIME_PER_BENCHMARK = datetime.timedelta(milliseconds=100)
_DEFAULT_ESTIMATOR = min
_DEFAULT_COPY_FUNC = copy.deepcopy

_MISSING = object()
_MSG_DECORATOR_FACTORY = (
    'A decorator factory cannot be applied to a function directly. The decorator factory returns a decorator when '
    'called so if no arguments should be applied then simply call the decorator factory without arguments.'
)
_MSG_MISSING_ARGUMENTS = "The BenchmarkBuilder instance is missing arguments for the functions."

_logger = logging.getLogger(__name__)
_NaN = float('nan')
_TIMEDELTA_ZERO = datetime.timedelta(0)


[docs]class MultiArgument(tuple):
    """Class that behaves like a tuple but signals to the benchmark that it
    should pass multiple arguments to the function to benchmark.
    """
    pass


def _try_importing_matplotlib():
    """Tries to import matplotlib

    Returns
    -------
    pyplot : module
        The pyplot module from matplotlib

    Raises
    ------
    ImportError
        In case matplotlib is not installed.
    """
    try:
        import matplotlib.pyplot as plt
    except ImportError:
        raise ImportError('simple_benchmark requires matplotlib for the '
                          'plotting functionality.')
    return plt


def _get_python_bits():
    """Is the current platform 64bit.

    Returns
    -------
    result : string
        The string '64bit' in case the Python installation uses 64bit or more, otherwise '32bit'.
    """
    return '64bit' if sys.maxsize > 2 ** 32 else '32bit'


class TimingParams(object):
    def __init__(self, repeats, number, stop, timing):
        self.repeats = repeats  # The number of repeats.
        self.number = number  # The number of timings in each repetition.
        self.stop = stop  # Estimate was too slow, function should not be timed.
        self.timing = timing  # Just informational: The time it took the estimate to run the function.


def _estimate_number_of_repeats(func, target_time, maximum_time):
    """Estimate the number of repeats for a function so that the benchmark will take a specific time.

    In case the function is very slow or really fast some default values are returned.

    Parameters
    ----------
    func : callable
        The function to time. Must not have required arguments!
    target_time : datetime.timedelta
        The amount of time the benchmark should roughly take.
    maximum_time : datetime.timedelta or None
        If not None it represents the maximum time the first call of the function may take
        otherwise a stop will be signalled.

    Returns
    -------
    timing_parameter : TimingParams
        The parameter used for the actual timings.
    """
    # Just for a quick reference:
    # One millisecond is 1e-3
    # One microsecond is 1e-6
    # One nanosecond  is 1e-9
    single_time = timeit.timeit(func, number=1)

    if maximum_time is not None and single_time > maximum_time.total_seconds():
        return TimingParams(0, 0, stop=True, timing=single_time)

    # Get a more accurate baseline if the function was really fast
    if single_time < 1e-6:
        single_time = timeit.timeit(func, number=1000) / 1000
    if single_time < 1e-5:
        single_time = timeit.timeit(func, number=100) / 100
    elif single_time < 1e-4:
        single_time = timeit.timeit(func, number=10) / 10

    n_repeats = int(target_time.total_seconds() / single_time)
    # The timeit execution should be at least 10-100us so that the granularity
    # of the timer isn't a limiting factor.
    if single_time < 1e-4:
        factor = 1e-4 / single_time
        return TimingParams(repeats=max(int(n_repeats // factor), 1), number=max(int(factor), 1), stop=False,
                            timing=single_time)
    # Otherwise the number of timings each repeat should be 1.
    # However make sure there are at least 3 repeats for each function!
    return TimingParams(repeats=max(n_repeats, 3), number=1, stop=False, timing=single_time)


def _get_bound_func(func, argument):
    """Return a function where the arguments are already bound to the function."""
    if isinstance(argument, MultiArgument):
        return functools.partial(func, *argument)
    else:
        return functools.partial(func, argument)


def _get_function_name(func, aliases):
    """Returns the associated name of a function."""
    try:
        return aliases[func]
    except KeyError:
        # Has to be a different branch because not every function has a
        # __name__ attribute. So we cannot simply use the dictionaries `get`
        # with default.
        try:
            return func.__name__
        except AttributeError:
            raise TypeError('function "func" does not have a __name__ attribute. '
                            'Please use "function_aliases" to provide a function name alias.')


[docs]def assert_same_results(funcs, arguments, equality_func):
    """Asserts that all functions return the same result.

    .. versionadded:: 0.1.0

    Parameters
    ----------
    funcs : iterable of callables
        The functions to check.
    arguments : dict
        A dictionary containing where the key represents the reported value
        (for example an integer representing the list size) as key and the argument
        for the functions (for example the list) as value.
        In case you want to plot the result it should be sorted and ordered
        (e.g. an :py:class:`collections.OrderedDict` or a plain dict if you are
        using Python 3.7 or later).
    equality_func : callable
        The function that determines if the results are equal. This function should
        accept two arguments and return a boolean (True if the results should be
        considered equal, False if not).

    Raises
    ------
    AssertionError
        In case any two results are not equal.
    """
    funcs = list(funcs)
    for arg in arguments.values():
        first_result = _MISSING
        for func in funcs:
            bound_func = _get_bound_func(func, arg)
            result = bound_func()
            if first_result is _MISSING:
                first_result = result
            else:
                assert equality_func(first_result, result), (func, first_result, result)


[docs]def assert_not_mutating_input(funcs, arguments, equality_func, copy_func=_DEFAULT_COPY_FUNC):
    """Asserts that none of the functions mutate the arguments.

    .. versionadded:: 0.1.0

    Parameters
    ----------
    funcs : iterable of callables
        The functions to check.
    arguments : dict
        A dictionary containing where the key represents the reported value
        (for example an integer representing the list size) as key and the argument
        for the functions (for example the list) as value.
        In case you want to plot the result it should be sorted and ordered
        (e.g. an :py:class:`collections.OrderedDict` or a plain dict if you are
        using Python 3.7 or later).
    equality_func : callable
        The function that determines if the results are equal. This function should
        accept two arguments and return a boolean (True if the results should be
        considered equal, False if not).
    copy_func : callable, optional
        The function that is used to copy the original argument.
        Default is :py:func:`copy.deepcopy`.

    Raises
    ------
    AssertionError
        In case any two results are not equal.

    Notes
    -----
    In case the arguments are :py:class:`MultiArgument` then the copy_func and the
    equality_func get these :py:class:`MultiArgument` as single arguments and need
    to handle them appropriately.
    """
    funcs = list(funcs)
    for arg in arguments.values():
        original_arguments = copy_func(arg)
        for func in funcs:
            bound_func = _get_bound_func(func, arg)
            bound_func()
            assert equality_func(original_arguments, arg), (func, original_arguments, arg)


[docs]def benchmark(
        funcs,
        arguments,
        argument_name=_DEFAULT_ARGUMENT_NAME,
        warmups=None,
        time_per_benchmark=_DEFAULT_TIME_PER_BENCHMARK,
        function_aliases=None,
        estimator=_DEFAULT_ESTIMATOR,
        maximum_time=None):
    """Create a benchmark suite for different functions and for different arguments.

    Parameters
    ----------
    funcs : iterable of callables
        The functions to benchmark.
    arguments : dict
        A dictionary containing where the key represents the reported value
        (for example an integer representing the list size) as key and the argument
        for the functions (for example the list) as value.
        In case you want to plot the result it should be sorted and ordered
        (e.g. an :py:class:`collections.OrderedDict` or a plain dict if you are
        using Python 3.7 or later).
    argument_name : str, optional
        The name of the reported value. For example if the arguments represent
        list sizes this could be `"size of the list"`.
        Default is an empty string.
    warmups : None or iterable of callables, optional
        If not None it specifies the callables that need a warmup call
        before being timed. That is so, that caches can be filled or
        jitters to kick in.
        Default is None.
    time_per_benchmark : datetime.timedelta, optional
        Each benchmark should take approximately this time.
        The value is ignored for functions that take very little time or very long.
        Default is 0.1 seconds.

        .. versionchanged:: 0.1.0
           Now requires a :py:class:`datetime.timedelta` instead of a :py:class:`float`.
    function_aliases : None or dict, optional
        If not None it should be a dictionary containing the function as key
        and the name of the function as value. The value will be used in the
        final reports and plots.
        Default is None.
    estimator : callable, optional
        Each function is called with each argument multiple times and each
        timing is recorded. The benchmark_estimator (by default :py:func:`min`)
        is used to reduce this list of timings to one final value.
        The minimum is generally a good way to estimate how fast a function can
        run (see also the discussion in :py:meth:`timeit.Timer.repeat`).
        Default is :py:func:`min`.
    maximum_time : datetime.timedelta or None, optional
        If not None it represents the maximum time the first call of the function may take.
        If exceeded the benchmark will stop evaluating the function from then on.
        Default is None.

        .. versionadded:: 0.1.0

    Returns
    -------
    benchmark : BenchmarkResult
        The result of the benchmarks.

    See also
    --------
    BenchmarkBuilder
    """
    if not isinstance(time_per_benchmark, datetime.timedelta):
        warnings.warn("Using a number as 'time_per_benchmark' is deprecated since version 0.1.0. "
                      "Use 'datetime.timedelta(seconds={0})' instead".format(time_per_benchmark),
                      DeprecationWarning)
        time_per_benchmark = datetime.timedelta(seconds=time_per_benchmark)
    if time_per_benchmark <= _TIMEDELTA_ZERO:
        raise ValueError("'time_per_benchmark' ({}) must be positive.".format(time_per_benchmark))
    if maximum_time is not None and maximum_time <= _TIMEDELTA_ZERO:
        raise ValueError("'maximum_time' ({}) must be positive.".format(maximum_time))
    funcs = list(funcs)
    warm_up_calls = {func: 0 for func in funcs}
    if warmups is not None:
        for func in warmups:
            warm_up_calls[func] = 1
    function_aliases = function_aliases or {}
    stopped = set()

    timings = {func: [] for func in funcs}
    for arg_name, arg in arguments.items():
        _logger.info("Benchmark for argument: {}".format(arg_name))
        for func, timing_list in timings.items():
            function_name = _get_function_name(func, function_aliases)
            _logger.info("Benchmark function: {}".format(function_name))
            if func in stopped:
                _logger.info("SKIPPED: Not benchmarking function because a previous run exceeded the maximum time.")
                time_per_run = _NaN
            else:
                bound_func = _get_bound_func(func, arg)
                for _ in itertools.repeat(None, times=warm_up_calls[func]):
                    bound_func()
                params = _estimate_number_of_repeats(bound_func, time_per_benchmark, maximum_time)
                if params.stop:
                    _logger.info(
                        "STOPPED: benchmarking because the first run ({}) exceeded the maximum_time '{}'."
                        .format(datetime.timedelta(seconds=params.timing), maximum_time))
                    stopped.add(func)
                    time_per_run = _NaN
                else:
                    _logger.info(
                        "RUN: benchmark with {} x {} runs (repeats & number) for estimated time {}."
                        .format(params.repeats, params.number, datetime.timedelta(seconds=params.timing)))
                    # As per the timeit module documentation a very good approximation
                    # of a timing is found by repeating the benchmark and using the
                    # minimum.
                    times = timeit.repeat(bound_func, number=params.number, repeat=params.repeats)
                    time = estimator(times)
                    time_per_run = time / params.number
            timing_list.append(time_per_run)

    return BenchmarkResult(timings, function_aliases, arguments, argument_name)


[docs]class BenchmarkResult(object):
    """A class holding a benchmarking result that provides additional printing and plotting functions."""
    def __init__(self, timings, function_aliases, arguments, argument_name):
        self._timings = timings
        self.function_aliases = function_aliases
        self._arguments = arguments
        self._argument_name = argument_name

    def __str__(self):
        """Prints the results as table."""
        try:
            return str(self.to_pandas_dataframe())
        except ImportError:
            return pprint.pformat({self._function_name(k): v for k, v in self._timings.items()})

    __repr__ = __str__

    def _function_name(self, func):
        """Returns the function name taking the aliases into account."""
        return _get_function_name(func, self.function_aliases)

    @staticmethod
    def _get_title():
        """Returns a string containing some information about Python and the machine."""
        return "{0} {1} {2} ({3} {4})".format(
            platform.python_implementation(), platform.python_version(), platform.python_compiler(),
            platform.system(), platform.release())

[docs]    def to_pandas_dataframe(self):
        """Return the timing results as pandas DataFrame. This is the preferred way of accessing the text form of the
        timings.

        Returns
        -------
        results : pandas.DataFrame
            The timings as DataFrame.

        Warns
        -----
        UserWarning
            In case multiple functions have the same name.

        Raises
        ------
        ImportError
            If pandas isn't installed.
        """
        try:
            import pandas as pd
        except ImportError:
            raise ImportError('simple_benchmark requires pandas for this method.')
        dct = {self._function_name(func): timings for func, timings in self._timings.items()}
        if len(dct) != len(self._timings):
            warnings.warn('Some timings are not included in the result. Likely '
                          'because multiple functions have the same name. You '
                          'can add an alias to the `function_aliases` mapping '
                          'to avoid this problem.', UserWarning)

        return pd.DataFrame(dct, index=list(self._arguments))

[docs]    def plot_difference_percentage(self, relative_to, ax=None):
        """Plot the benchmarks relative to one of the benchmarks with
        percentages on the y-axis.

        Parameters
        ----------
        relative_to : callable
            The benchmarks are plotted relative to the timings of the given
            function.
        ax : matplotlib.axes.Axes or None, optional
            The axes on which to plot. If None plots on the currently active axes.

        Raises
        ------
        ImportError
            If matplotlib isn't installed.
        """
        plt = _try_importing_matplotlib()
        ax = ax or plt.gca()

        self.plot(relative_to=relative_to, ax=ax)

        ax.set_yscale('linear')
        # Use percentage always including the sign for the y ticks.
        ticks = ax.get_yticks()
        ax.set_yticklabels(['{:+.1f}%'.format((x-1) * 100) for x in ticks])

[docs]    def plot(self, relative_to=None, ax=None):
        """Plot the benchmarks, either relative or absolute.

        Parameters
        ----------
        relative_to : callable or None, optional
            If None it will plot the absolute timings, otherwise it will use the
            given *relative_to* function as reference for the timings.
        ax : matplotlib.axes.Axes or None, optional
            The axes on which to plot. If None plots on the currently active axes.

        Raises
        ------
        ImportError
            If matplotlib isn't installed.
        """
        plt = _try_importing_matplotlib()
        ax = ax or plt.gca()

        x_axis = list(self._arguments)

        for func, timing in self._timings.items():
            label = self._function_name(func)
            if relative_to is None:
                plot_time = timing
            else:
                plot_time = [time / ref for time, ref in zip(self._timings[func], self._timings[relative_to])]
            ax.plot(x_axis, plot_time, label=label)

        ax.set_title(BenchmarkResult._get_title())
        ax.set_xscale('log')
        ax.set_yscale('log')
        ax.set_xlabel(self._argument_name)
        if relative_to is None:
            ax.set_ylabel('time [seconds]')
        else:
            ax.set_ylabel('time relative to "{}"'.format(self._function_name(relative_to)))
        ax.grid(which='both')
        ax.legend()

[docs]    def plot_both(self, relative_to):
        """Plot both the absolute times and the relative time.

        Parameters
        ----------
        relative_to : callable or None
            If None it will plot the absolute timings, otherwise it will use the
            given *relative_to* function as reference for the timings.

        Raises
        ------
        ImportError
            If matplotlib isn't installed.
        """
        plt = _try_importing_matplotlib()

        f, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
        self.plot(ax=ax1)
        self.plot(ax=ax2, relative_to=relative_to)


[docs]class BenchmarkBuilder(object):
    """A class useful for building benchmarks by adding decorators to the functions instead of collecting them later.

    Parameters
    ----------
    time_per_benchmark : datetime.timedelta, optional
        Each benchmark should take approximately this time.
        The value is ignored for functions that take very little time or very long.
        Default is 0.1 seconds.

        .. versionchanged:: 0.1.0
           Now requires a :py:class:`datetime.timedelta` instead of a :py:class:`float`.
    estimator : callable, optional
        Each function is called with each argument multiple times and each
        timing is recorded. The benchmark_estimator (by default :py:func:`min`)
        is used to reduce this list of timings to one final value.
        The minimum is generally a good way to estimate how fast a function can
        run (see also the discussion in :py:meth:`timeit.Timer.repeat`).
        Default is :py:func:`min`.
    maximum_time : datetime.timedelta or None, optional
        If not None it represents the maximum time the first call of the function may take.
        If exceeded the benchmark will stop evaluating the function from then on.
        Default is None.

        .. versionadded:: 0.1.0

    See also
    --------
    benchmark
    """
    def __init__(self, time_per_benchmark=_DEFAULT_TIME_PER_BENCHMARK, estimator=_DEFAULT_ESTIMATOR, maximum_time=None):
        self._funcs = []
        self._arguments = collections.OrderedDict()
        self._warmups = []
        self._function_aliases = {}
        self._argument_name = _DEFAULT_ARGUMENT_NAME
        self._time_per_benchmark = time_per_benchmark
        self._estimator = estimator
        self._maximum_time = maximum_time

[docs]    def add_functions(self, functions):
        """Add multiple functions to the benchmark.

        Parameters
        ----------
        functions : iterable of callables
            The functions to add to the benchmark
        """
        self._funcs.extend(functions)

[docs]    def add_function(self, warmups=False, alias=None):
        """A decorator factory that returns a decorator that can be used to add a function to the benchmark.

        Parameters
        ----------
        warmups : bool, optional
            If true the function is called once before each benchmark run.
            Default is False.
        alias : str or None, optional
            If None then the displayed function name is the name of the function, otherwise the string is used when
            the function is referred to.
            Default is None.

        Returns
        -------
        decorator : callable
            The decorator that adds the function to the benchmark.

        Raises
        ------
        TypeError
            In case ``name`` is a callable.
        """
        if callable(warmups):
            raise TypeError(_MSG_DECORATOR_FACTORY)

        def inner(func):
            self._funcs.append(func)
            if warmups:
                self._warmups.append(func)
            if alias is not None:
                self._function_aliases[func] = alias
            return func

        return inner

[docs]    def add_arguments(self, name=_DEFAULT_ARGUMENT_NAME):
        """A decorator factory that returns a decorator that can be used to add a function that produces the x-axis
         values and the associated test data for the benchmark.

        Parameters
        ----------
        name : str, optional
            The label for the x-axis.

        Returns
        -------
        decorator : callable
            The decorator that adds the function that produces the x-axis values and the test data to the benchmark.

        Raises
        ------
        TypeError
            In case ``name`` is a callable.
        """
        if callable(name):
            raise TypeError(_MSG_DECORATOR_FACTORY)

        def inner(func):
            self._arguments = collections.OrderedDict(func())
            self._argument_name = name
            return func

        return inner

[docs]    def assert_same_results(self, equality_func):
        """Asserts that all stored functions return the same result.

        .. versionadded:: 0.1.0

        Parameters
        ----------
        equality_func : callable
            The function that determines if the results are equal. This function should
            accept two arguments and return a boolean (True if the results should be
            considered equal, False if not).

        Warns
        -----
        UserWarning
            In case the instance has no arguments for the functions.

        Raises
        ------
        AssertionError
            In case any two results are not equal.
        """
        if not self._arguments:
            warnings.warn(_MSG_MISSING_ARGUMENTS, UserWarning)
            return
        assert_same_results(self._funcs, self._arguments, equality_func=equality_func)

[docs]    def assert_not_mutating_input(self, equality_func, copy_func=_DEFAULT_COPY_FUNC):
        """Asserts that none of the stored functions mutate the arguments.

        .. versionadded:: 0.1.0

        Parameters
        ----------
        equality_func : callable
            The function that determines if the results are equal. This function should
            accept two arguments and return a boolean (True if the results should be
            considered equal, False if not).
        copy_func : callable, optional
            The function that is used to copy the original argument.
            Default is :py:func:`copy.deepcopy`.

        Warns
        -----
        UserWarning
            In case the instance has no arguments for the functions.

        Raises
        ------
        AssertionError
            In case any two results are not equal.

        Notes
        -----
        In case the arguments are :py:class:`MultiArgument` then the copy_func and the
        equality_func get these :py:class:`MultiArgument` as single arguments and need
        to handle them appropriately.
        """
        if not self._arguments:
            warnings.warn(_MSG_MISSING_ARGUMENTS, UserWarning)
            return
        assert_not_mutating_input(self._funcs, self._arguments, equality_func=equality_func, copy_func=copy_func)

[docs]    def run(self):
        """Starts the benchmark.

        Returns
        -------
        result : BenchmarkResult
            The result of the benchmark.

        Warns
        -----
        UserWarning
            In case the instance has no arguments for the functions.

            .. versionadded:: 0.1.0
        """
        if not self._arguments:
            warnings.warn(_MSG_MISSING_ARGUMENTS, UserWarning)
        return benchmark(
            funcs=self._funcs,
            arguments=self._arguments,
            argument_name=self._argument_name,
            warmups=self._warmups,
            time_per_benchmark=self._time_per_benchmark,
            function_aliases=self._function_aliases,
            estimator=self._estimator,
            maximum_time=self._maximum_time
        )

[docs]    def use_random_arrays_as_arguments(self, sizes):
        """Alternative to :meth:`add_arguments` that provides random arrays of the specified sizes as arguments for the
        benchmark.

        Parameters
        ----------
        sizes : iterable of int
            An iterable containing the sizes for the arrays (should be sorted).

        Raises
        ------
        ImportError
            If NumPy isn't installed.
        """
        try:
            import numpy as np
        except ImportError:
            raise ImportError('simple_benchmark requires NumPy for this function.')

        def provide_random_arrays():
            for size in sizes:
                yield size, np.random.random(size)

        self.add_arguments('array size')(provide_random_arrays)

[docs]    def use_random_lists_as_arguments(self, sizes):
        """Alternative to :meth:`add_arguments` that provides random lists of the specified sizes as arguments for the
        benchmark.

        Parameters
        ----------
        sizes : iterable of int
            An iterable containing the sizes for the lists (should be sorted).
        """
        def provide_random_lists():
            random_func = random.random
            for size in sizes:
                yield size, [random_func() for _ in itertools.repeat(None, times=size)]

        self.add_arguments('list size')(provide_random_lists)