Search code examples
pythonhtmlmatplotlibplotly-pythonmpld3

How to create html plots which dynamically adjust marker size when zooming


I've created a Python script that generates a bar chart using Matplotlib and converts it into an interactive HTML graphic using mpld3. The chart displays average hashing times for different algorithms and string lengths. However, I'd like to enhance the user experience by dynamically adjusting the dot size as I zoom in on the graphic in the HTML document.

Here's part of the script:

import hashlib
import random
import string
import time
import matplotlib.pyplot as plt
import multiprocessing
import os
from datetime import datetime
import mpld3

algorithms = ['sha1', 'sha256', 'sha3_256']
string_lengths = [10, 25, 50, 75, 100]
num_samples = 50000
random_seed = 42
random.seed(random_seed)
custom_colors = ['#1f77b4', '#FFD700', '#2ca02c']

def generate_random_string(length):
    characters = string.ascii_letters + string.digits
    return ''.join(random.choice(characters) for _ in range(length))

def hash_string(input_string, algorithm):
    hash_func = hashlib.new(algorithm)
    hash_func.update(input_string.encode())
    return hash_func.hexdigest()

def hash_and_measure_avg_time(args):
    algorithm, random_strings = args
    num_samples = len(random_strings)
    
    start_time = time.time_ns()
    
    [hash_string(s, algorithm) for s in random_strings]
    
    end_time = time.time_ns()

    total_time = end_time - start_time
    return total_time / num_samples

def main():
    for i in range(10):
        num_cpus = multiprocessing.cpu_count()
        cpu_count = os.cpu_count()

        print(f"\nUsing {num_cpus} CPU cores for analysis")
        print(f"Number of CPU cores available: {cpu_count}")

        random_strings = {}
        for length in string_lengths:
            random_strings[length] = [generate_random_string(length) for _ in range(num_samples)]

        results = {}
        timestamp = datetime.now().strftime("%m-%d-%Y-%H-%M-%S")
        results_folder = f"results_{timestamp}"
        os.makedirs(results_folder, exist_ok=True)

        for algorithm in algorithms:
            results[algorithm] = {}

            print(f"\nTesting hashing algorithm: {algorithm}")
            with multiprocessing.Pool(processes=num_cpus) as pool:
                args = [(algorithm, random_strings[length]) for length in string_lengths]
                results_list = pool.map(hash_and_measure_avg_time, args)

            for length, result in zip(string_lengths, results_list):
                results[algorithm][length] = result

            time.sleep(1)

        plt.figure(figsize=(10, 6))
        for i, (algorithm, avg_times) in enumerate(results.items()):
            avg_times = [avg_times[length] for length in string_lengths]
            plt.plot(
                string_lengths,
                avg_times,
                marker='o',
                markersize=4,
                label=algorithm,
                color=custom_colors[i]
            )

        plt.title('Average Hashing Time vs. String Length')
        plt.xlabel('String Length')
        plt.ylabel('Average Hashing Time (ns)')
        plt.legend()
        plt.grid(True)

        interactive_plot = mpld3.fig_to_html(plt.gcf())
        plot_filename = f"c_result_{timestamp}.html"
        with open(os.path.join(results_folder, plot_filename), 'w') as html_file:
            html_file.write(interactive_plot)

if __name__ == "__main__":
    print("\n----- Welcome to the Hashing Performance Benchmark -----")
    main()

As it currently stands, the chart displays dots representing the data points, but they remain the same size when zooming in or out in the HTML graphic. I want to make the dot size change dynamically based on the zoom level so that when I zoom in, the dots appear smaller, and when I zoom out, they appear larger.

It reaches a point where, if I zoom in enough, the marker stops reducing its size and stays 'gigantic.' Is there a way to change this and make it continue dynamically decreasing until the maximum zoom level?

Without Zoom

enter image description here

With Zoom

enter image description here

Max Zoom Level

enter image description here


Solution

  • Hey, Olla, as far as I know, the markers do not dynamically change their size when zooming in or out in the HTML graphic due to the limitations of the mpld3 library. I don't know if it would fit your project, but you could use the Plotly library instead of matplotlib and mpld3. This way, you could achieve dynamic marker size changes based on the zoom level and have it continue to decrease even with high zoom levels and lots of other functionalities.

    import hashlib
    import random
    import string
    import time
    import multiprocessing
    import os
    from datetime import datetime
    import plotly.graph_objects as go
    
    algorithms = ['sha1', 'sha256', 'sha3_256']
    string_lengths = [1, 10, 25, 50, 75]
    num_samples = 50000
    random_seed = 42
    random.seed(random_seed)
    custom_colors = ['#1f77b4', '#FFD700', '#2ca02c']
    
    def generate_random_string(length):
        characters = string.ascii_letters + string.digits
        return ''.join(random.choice(characters) for _ in range(length))
    
    def hash_string(input_string, algorithm):
        hash_func = hashlib.new(algorithm)
        hash_func.update(input_string.encode())
        return hash_func.hexdigest()
    
    def hash_and_measure_avg_time(args):
        algorithm, random_strings = args
        num_samples = len(random_strings)
        
        start_time = time.time_ns()
        
        [hash_string(s, algorithm) for s in random_strings]
        
        end_time = time.time_ns()
    
        total_time = end_time - start_time
        return total_time / num_samples
    
    def main():
        for i in range(10):
            num_cpus = multiprocessing.cpu_count()
            cpu_count = os.cpu_count()
    
            print(f"\nUsing {num_cpus} CPU cores for analysis")
            print(f"Number of CPU cores available: {cpu_count}")
    
            random_strings = {}
            for length in string_lengths:
                random_strings[length] = [generate_random_string(length) for _ in range(num_samples)]
    
            results = {}
            timestamp = datetime.now().strftime("%m-%d-%Y-%H-%M-%S")
            results_folder = f"results_{timestamp}"
            os.makedirs(results_folder, exist_ok=True)
    
            for algorithm in algorithms:
                results[algorithm] = {}
    
                print(f"\nTesting hashing algorithm: {algorithm}")
                with multiprocessing.Pool(processes=num_cpus) as pool:
                    args = [(algorithm, random_strings[length]) for length in string_lengths]
                    results_list = pool.map(hash_and_measure_avg_time, args)
    
                for length, result in zip(string_lengths, results_list):
                    results[algorithm][length] = result
    
                time.sleep(1)
    
            # Create a Plotly scatter plot
            fig = go.Figure()
    
            for algorithm in algorithms:
                avg_times = [results[algorithm][length] for length in string_lengths]
                fig.add_trace(go.Scatter(x=string_lengths, y=avg_times, mode='markers', name=algorithm, marker=dict(size=4, color=custom_colors[algorithms.index(algorithm)])))
    
            # Add lines between the dots
            for i in range(1, len(string_lengths)):
                for algorithm in algorithms:
                    x = string_lengths
                    y = [results[algorithm][length] for length in x]
                    fig.add_trace(go.Scatter(x=x, y=y, mode='lines', name=f'{algorithm} Lines', line=dict(color=custom_colors[algorithms.index(algorithm)], dash='dot')))
            
            # Customize the layout
            fig.update_layout(title='Average Hashing Time vs. String Length', xaxis_title='String Length', yaxis_title='Average Hashing Time (ns)', legend_title='Algorithm')
            fig.update_xaxes(type='log')
    
            # Save the interactive plot as an HTML file
            html_file = os.path.join(results_folder, f"c_result_{timestamp}.html")
            fig.write_html(html_file)
    
    if __name__ == "__main__":
        print("\n----- Welcome to the Hashing Performance Benchmark -----")
        main()