Search code examples
juliascatter-plotmakie.jl

Makie: Non-overlapping label placement algorithm for scatter plots


I am using CairoMakie to scatter plot an XY data set but using labels as markers:

using CairoMakie


x = [0, 0.5, 0.50]
y = [0, 0.5, 0.51]
lbls = ["O", "A", "B"]

fig = Figure()
ax = Axis(fig[1,1])
scatter!(ax, x, y, marker=:circle, markersize=10, color=:red)
foreach(i -> text!(ax, position=(x[i], y[i]), lbls[i]), 1:3)

display(fig)

This produces the following figure:

XY scatter with text labels

Because points A and B are very close with each other, their respective labels overlap. Does CairoMakie have an algorithm to place the labels in such a way to avoid label overlaps?

I know Gadfly has this capability with Geom.label but I am hoping I do not have to use a separate package just to plot such charts. I also know in CairoMakie, I can use arguments such as position and offset to adjust the label positioning in such a way the labels do not overlap, but I cannot do this for every data set in my case.

Can anyone help? Or perhaps have a label placement algorithm written in Julia? Thanks.


Solution

  • It is amazing that several math papers have been written on non-overlapping label placement algorithms, but I neither have the math background nor the time to study these papers, let alone translate the algorithm into code. Python and R have codes for this kind of algorithm.

    Anyway, the following code in Julia is sufficient for my case, not necessarily holistic enough for all situations and for everyone's case.

    using DataFrames
    using CairoMakie
    using Random
    
    
    function allpairs(n::Int)
        arr = collect(Iterators.product(1:n, 1:n))
        row, col = size(arr)
        combo = []
        for r ∈ 1:row-1, c ∈ (r + 1):col
            push!(combo, arr[r, c])
        end
        combo
    end
    
    
    angle(x, y) = atan(y[2] - y[1], x[2] - x[1])
    
    
    dist(x, y) = sqrt((x[2] - x[1])^2 + (y[2] - y[1])^2)
    
    
    function label_dist(x, y, δw, δh)
        x1, x2 = x[1], x[2]
        y1, y2 = y[1], y[2]
        x1b, x2b = x1 + δw, x2 + δw
        y1b, y2b = y1 + δh, y2 + δh
    
        left = x2b < x1
        right = x1b < x2
        bottom = y2b < y1
        top = y1b < y2
    
        d = top && left ? dist((x1, x2b), (y1b, y2)) :
            left && bottom ? dist((x1, x2b), (y1, y2b)) :
            bottom && right ? dist((x1b, x2), (y1, y2b)) :
            right && top ? dist((x1b, x2), (y1b, y2)) :
            left ? x1 - x2b :
            right ? x2 - x1b :
            bottom ? y1 - y2b :
            top ? y2 - y1b :
            0.0
        d
    end
    
    
    function shift!(x, y, idx, r, angle)
        x[idx[1]] -= r * cos(angle)
        y[idx[1]] -= r * sin(angle)
        x[idx[2]] += r * cos(angle)
        y[idx[2]] += r * sin(angle)
    end
    
    
    function label_pos(x, y;               # label positions (original/untransformed scale)
                       min_dist=0.002,     # min. distance between labels (on a 0-1 scale)
                       shift_size=0.002,   # distance to shift labels (on a 0-1 scale)
                       width=0.05,         # width of label (on a 0-1 scale)
                       height=0.03,        # height of label (on a 0-1 scale)
                       attempts::Int=100)  # no. of tries to place labels
        xlims = extrema(x)
        Δx = xlims[2] - xlims[1]
        ylims = extrema(y)
        Δy = ylims[2] - ylims[1]
    
        xt = (x .- xlims[1]) ./ Δx  # map to a 0-1 scale
        yt = (y .- ylims[1]) ./ Δy
    
        combo = allpairs(length(xt))
        iter = true
        i = 1
        while iter
            pts = map(combo) do p
                idx = [p[1], p[2]]
                d = label_dist(xt[idx], yt[idx], width, height)
                θ = angle(xt[idx], yt[idx])
                (idx=idx, dist=d, angle=θ)
            end
    
            pos = findall(p -> p.dist < min_dist, pts)
            foreach(p -> shift!(xt, yt, p.idx, shift_size, p.angle), pts[pos])
            i += 1
            iter = (i <= attempts) && !isempty(pos)
        end
    
        xt = xt .* Δx .+ xlims[1]   # revert to original scale
        yt = yt .* Δy .+ ylims[1]
    
        (; x=xt, y=yt)
    end
    
    
    xlims = (0, 1)
    ylims = (-10, 10)
    
    Δxl = xlims[2] - xlims[1]
    Δyl = ylims[2] - ylims[1]
    
    n = 150      # no. of points to generate
    seed = 392348
    Random.seed!(seed)
    xs = xlims[1] .+ rand(n) .* Δxl
    ys = ylims[1] .+ rand(n) .* Δyl
    
    labels = map(i -> "$i", eachindex(xs))
    
    xt = xs[:]
    yt = ys[:]
    xt2 = xs[:]
    yt2 = ys[:]
    
    xt, yt = label_pos(xt, yt)
    
    xlims2 = (xlims[1] - Δxl * 0.05, xlims[2] + Δxl * 0.05)
    ylims2 = (ylims[1] - Δyl * 0.05, ylims[2] + Δyl * 0.05)
    
    fig = Figure(resolution=(600,1200))  # must be square and preferably >500 px
    axs = Axis(fig[1,1])
    scatter!(axs, xs, ys, marker=:circle, markersize=5, color=:red)
    axs.title = "non-overlap label placement"
    
    foreach(eachindex(labels)) do i
        text!(axs, position=(xt[i], yt[i]), labels[i], textsize=15, align=(:right, :bottom))
    end
    
    axs2 = Axis(fig[2,1])
    scatter!(axs2, xs, ys, marker=:circle, markersize=5, color=:red)
    axs2.title = "default label placement"
    
    limits!(axs, xlims2, ylims2)
    limits!(axs2, xlims2, ylims2)
    
    foreach(eachindex(labels)) do i
        text!(axs2, position=(xt2[i], yt2[i]), labels[i], textsize=15, align=(:right, :bottom))
    end
    
    display(fig)
    
    

    The code produces the following 2 charts:

    enter image description here

    It is hardly near perfect or optimized, but it teases apart the labels far enough, so that I can save the figures as SVG, then use a vector drawing app to manually adjust the labels, where needed.

    This is a start, as I am sure cleverer people can optimize this code. I hope this will be helpful to others.