Search code examples
javamultithreadingtimerthread-safetyconcurrenthashmap

Calculating average and percentiles from a histogram map?


I have written a timer which will measure the performance of a particular code in any multithreaded application. In the below timer, it will also populate the map with how many calls took x milliseconds. I will use this map as part of my histogram to do further analysis, like what percentage of calls took this much milliseconds and etc.

public static class StopWatch {

    public static ConcurrentHashMap<Long, Long> histogram = new ConcurrentHashMap<Long, Long>();

    /**
     * Creates an instance of the timer and starts it running.
     */
    public static StopWatch getInstance() {
        return new StopWatch();
    }

    private long m_end = -1;
    private long m_interval = -1;
    private final long m_start;

    private StopWatch() {
        m_start = m_interval = currentTime();
    }

    /**
     * Returns in milliseconds the amount of time that has elapsed since the timer was created. If the
     * <code>stop</code> method has been invoked, then this returns instead the elapsed time between the creation of
     * the timer and the moment when <code>stop</code> was invoked.
     * 
     * @return duration it took
     */
    public long getDuration() {
        long result = 0;

        final long startTime = m_start;
        final long endTime = isStopWatchRunning() ? currentTime() : m_end;

        result = convertNanoToMilliseconds(endTime - startTime);

        boolean done = false;
        while (!done) {
            Long oldValue = histogram.putIfAbsent(result, 1L);
            if (oldValue != null) {
                done = histogram.replace(result, oldValue, oldValue + 1);
            } else {
                done = true;
            }
        }

        return result;
    }

    /**
     * Returns in milliseconds the amount of time that has elapsed since the last invocation of this same method. If
     * this method has not previously been invoked, then it is the amount of time that has elapsed since the timer
     * was created. <strong>Note</strong> that once the <code>stop</code> method has been invoked this will just
     * return zero.
     * 
     * @return interval period
     */
    public long getInterval() {
        long result = 0;

        final long startTime = m_interval;
        final long endTime;

        if (isStopWatchRunning()) {
            endTime = m_interval = currentTime();
        } else {
            endTime = m_end;
        }

        result = convertNanoToMilliseconds(endTime - startTime);

        return result;
    }

    /**
     * Stops the timer from advancing. This has an impact on the values returned by both the
     * <code>getDuration</code> and the <code>getInterval</code> methods.
     */
    public void stop() {
        if (isStopWatchRunning()) {
            m_end = currentTime();
        }
    }

    /**
     * What is the current time in nanoseconds?
     * 
     * @return returns back the current time in nanoseconds
     */
    private long currentTime() {
        return System.nanoTime();
    }

    /**
     * This is used to check whether the timer is alive or not
     * 
     * @return checks whether the timer is running or not
     */
    private boolean isStopWatchRunning() {
        return (m_end <= 0);
    }

    /**
     * This is used to convert NanoSeconds to Milliseconds
     * 
     * @param nanoseconds
     * @return milliseconds value of nanoseconds
     */
    private long convertNanoToMilliseconds(final long nanoseconds) {
        return nanoseconds / 1000000L;
    }
}

For example, this is the way I will use my above timer class to measure the performance of a particular code in my multithreading application:

StopWatch timer = StopWatch.getInstance();
//... some code here to measure
timer.getDuration();

Now my question is - What is the best way to calculate average, median, 95th and 99th percentile of the request from my histogram map? I mean to say, I want to add certain methods in my StopWatch class only which will does all the calculation like finding the average, median, 95th and 99th percentile.

And then I can just directly get it by using StopWatch instance.

My histogram map will look like this:

key - means number of milliseconds

value - means number of calls that took that much milliseconds.


Solution

  • The mean is straightforward to implement. Median is the 50th percentile, so you just need a single percentile method that works, and create a utility method for the median. There are several variations of Percentile calculation, but this one should generate the same results as the Microsoft Excel PERCENTILE.INC function.

    import java.util.Map;
    import java.util.SortedSet;
    import java.util.concurrent.ConcurrentSkipListSet;
    
    public class HistogramStatistics
    {
        public static Double average(final Map<Long, Long> histogram)
        {
            return HistogramStatistics.mean(histogram);
        }
    
        public static Double mean(final Map<Long, Long> histogram)
        {
            double sum = 0L;
    
            for (Long value : histogram.keySet())
            {
                sum += (value * histogram.get(value));
            }
    
            return sum / (double) HistogramStatistics.count(histogram);
        }
    
        public static Double median(final Map<Long, Long> histogram)
        {
            return HistogramStatistics.percentile(histogram, 0.50d);
        }
    
        public static Double percentile(final Map<Long, Long> histogram, final double percent)
        {
            if ((percent < 0d) || (percent > 1d))
            {
                throw new IllegalArgumentException("Percentile must be between 0.00 and 1.00.");
            }
    
            if ((histogram == null) || histogram.isEmpty())
            {
                return null;
            }
    
            double n = (percent * (HistogramStatistics.count(histogram).doubleValue() - 1d)) + 1d;
            double d = n - Math.floor(n);
            SortedSet<Long> bins = new ConcurrentSkipListSet<Long>(histogram.keySet());
            long observationsBelowBinInclusive = 0L;
            Long lowBin = bins.first();
    
            Double valuePercentile = null;
    
            for (Long highBin : bins)
            {
                observationsBelowBinInclusive += histogram.get(highBin);
    
                if (n <= observationsBelowBinInclusive)
                {
                    if ((d == 0f) || (histogram.get(highBin) > 1L))
                    {
                        lowBin = highBin;
                    }
    
                    valuePercentile = lowBin.doubleValue() + ((highBin - lowBin) * d);
    
                    break;
                }
    
                lowBin = highBin;
            }
    
            return valuePercentile;
        }
    
        public static Long count(final Map<Long, Long> histogram)
        {
            long observations = 0L;
    
            for (Long value : histogram.keySet())
            {
                observations += histogram.get(value);
            }
    
            return observations;
        }
    }