python algorithm performance optimization

How to find an optimal circular area with regions missing

I have an n by n matrix of integers and I want to find the circular area, with origin at the top left corner, with maximum sum. What makes this optimization problem more difficult is that I am allowed to miss out up to a fixed number of regions. A region is defined to be a consecutive block of rows. Let us call the number of regions that can be omitted, k.

Consider the following grid with a circle imposed on it.

This is made with:

import matplotlib.pyplot as plt
from matplotlib.patches import Circle
import numpy as np
plt.yticks(np.arange(0, 10.01, 1))
plt.xticks(np.arange(0, 10.01, 1))
plt.xlim(0,9)
plt.ylim(0,9)
plt.gca().invert_yaxis()
# Set aspect ratio to be equal
plt.gca().set_aspect('equal', adjustable='box')
plt.grid()   
np.random.seed(40)
square = np.empty((10, 10), dtype=np.int_)
for x in np.arange(0, 10, 1):
    for y in np.arange(0, 10, 1):
        plt.scatter(x, y, color='blue', s=2, zorder=2, clip_on=False) 
for x in np.arange(0, 10, 1):
    for y in np.arange(0, 10, 1):
        value = np.random.randint(-3, 4)
        square[int(x), int(y)] = value
        plt.text(x-0.2, y-0.2, str(value), ha='center', va='center', fontsize=8, color='black')

r1 = 3
circle1 = Circle((0, 0), r1,  color="blue", alpha=0.5, ec='k', lw=1)
plt.gca().add_patch(circle1)

In this case the matrix is:

  [[ 3, -1,  1,  0, -1, -1, -3, -2, -2,  2],
   [ 0,  0,  3,  0,  0, -1,  2,  0, -2,  3],
   [ 2,  0,  3, -2,  3,  1,  2,  2,  1,  1],
   [-3,  0,  1,  0,  1,  2,  3,  1, -3, -1],
   [-3, -2,  1,  2,  1, -3, -2,  2, -2,  0],
   [-1, -3, -3,  1,  3, -2,  0,  2, -1,  1],
   [-2, -2, -1,  2, -2,  1, -1,  1,  3, -1],
   [ 1,  2, -1,  2,  0, -2, -1, -1,  2,  3],
   [-1, -2,  3, -1,  0,  0,  3, -3,  3, -2],
   [ 0, -3,  0, -1, -1,  0, -2, -3, -3, -1]]

When the circle has radius 2 * sqrt(2) = sqrt(8), the sum of the points in the grid within the circle is 11 which is optimal if no regions can be omitted. As the radius increases, more and more points fall into the circle and in this case the sum is never larger than 11.

This can be computed quickly using this code:

import numpy as np
def make_data(N):
    np.random.seed(40)
    g = np.random.randint(-3, 4, (N, N))
    return g

def find_max(g):
    n = g.shape[0]
    sum_dist = np.zeros(2 * N * N, dtype=np.int32)
    for i in range(n):
        for j in range(n):
            dist = i**2 + j**2
            sum_dist[dist] += g[i, j]
    cusum = np.cumsum(sum_dist)
    return np.argmax(cusum), np.max(cusum)


N = 10
g = make_data(N)]
g = g.T # Just to match the picture
print(g)  

squared_dist, score = find_max(g)
print(np.sqrt(squared_dist), score)

The problem

If we were to zero out row 0 and also rows 4, 5 and 6 (this corresponds to eliminating k = 2 regions), then the optimal radius is increased to sqrt(58) giving a score of 24.

I want to find both the optimal radius and the optimal choice of up to k regions to eliminate. I would like the code to be fast for much larger matrices but k will never be bigger than 5.

This question is related but I can't see how to use it to give a fast solution here.

Solution

Here are two different approaches. Both need the idea of a heap, which in Python you can get with heapq.

The first is based on the answer you were looking at. It gives a guaranteed O(n^3) conclusion by iterating over cirles/sums, and then doing the dp calculation.

import heapq
def circle_sums_iter (matrix):
    # Returns an iterator over (r2, sums)
    # Where r2 is the square of the radius of the omitted circle
    # And sums are the sums of the rows.
    sums = [sum(matrix[i]) for i in range(len(matrix))]
    heap = [(0, 0, 0)]
    yield (-1, sums)
    while 0 < len(heap):
        r2 = heap[0][0]
        while 0 < len(heap) and r2 == heap[0][0]:
            _, i, j = heapq.heappop(heap)
            if len(sums) == i:
                sums.append(0)
            sums[i] -= matrix[i][j]
            if j == 0 and i+1 < len(matrix):
                heapq.heappush(heap, (r2 + 2*i + 1, i+1, j))
            if j+1 < len(matrix[i]):
                heapq.heappush(heap, (r2 + 2*j + 2, i, j+1))
        yield (r2, sums)

def optimize_circle_exclude(matrix, blocks=5):
    """Finds the optimal exclusion pattern for a matrix.

    matrix is an nxm array of arrays.

    blocks is the number of include/exclude blocks. We
    start with an include block, so 5 blocks will include
    up to three stretches of rows and exclude the other 2.

    It returns (best_sum, radius^2, boundaries). So a return of
    (25, 104, [5, 7, 10, 12]) represents that we excluded
    all entries (i, j) where 5 <= i < 7, or 10 <= i < 12, or
    104 <= i^2 + j^2.

    It will run in time O(n*(n+m)^2*blocks).
    """
    sums = []
    best = None
    n = len(matrix)
    if 0 == n:
        # Handle trivial edge case.
        return (0, -1, [])
    m = len(matrix[0])
    for r2, sums in circle_sums_iter(matrix):
        # Start dp. It will be an array of,
        # (total, (boundary, (boundary, ...()...))) with the
        # current block being the position. We include even
        # blocks, exclude odd ones.
        #
        # At first we can be on block 0, sum 0, no boundaries.
        dp = [(0, ())]
        if 1 < blocks:
            # If we can have a second block, we could start excluding.
            dp.append((0, (0, ())))

        for i, s in enumerate(sums):
            # Start next_dp with basic "sum everything."
            # We do this so that we can assume next_dp[block]
            # is always there in the loop.
            next_dp = [(dp[0][0] + s, dp[0][1])]
            for block, entry in enumerate(dp):
                total, boundaries = entry
                # Are we including or excluding this block?
                if 0 == block%2:
                    # Include row?
                    if next_dp[block][0] < total + s:
                        next_dp[block] = (total + s, boundaries)
                    # Start new block?
                    if block + 1 < blocks:
                        next_dp.append((total, (i, boundaries)))
                else:
                    # Exclude row?
                    if next_dp[block][0] < total:
                        next_dp[block] = entry
                    # Start new block?
                    if block + 1 < blocks:
                        next_dp.append((total + s, (i, boundaries)))
            dp = next_dp

        # Did we improve best?
        for total, boundaries in dp:
            if best is None or best[0] < total:
                best = (total, r2, boundaries)

    # Undo the linked list for convenience.
    total, r2, boundary_link = best
    reversed_boundaries = []
    while 0 < len(boundary_link):
        reversed_boundaries.append(boundary_link[0])
        boundary_link = boundary_link[1]
    return (total, r2, list(reversed(reversed_boundaries)))

The second tries to be faster. Under some assumptions, it is likely to run in O(n^2). It is based on the idea of Find the optimal shift for max circle sum. We look at pairs of circles, and find the optimum answer that includes everything in the first circle, and maybe positive values in the second circle that are not in the first. We search by decreasing possible best value, until we find how to achieve the current best possible value. That is our answer.

The hoped for average is similar to the reasoning in the other article. Assume that numbers are random with an average of 0, some variance v and some average positive value a. A random circle leaves O(n^2) points in the matrix, whose sum's absolute values will be (thanks to the Central Limit Theorem) O(n). Excluding blocks similarly adds O(n). And so the differences between a random circle and the best one is roughly O(n). (If you take k independent samples out of a normal distribution with variance var, the max will very likely be O(var log(log(k))). So I'm actually skipping a O(log(log(n))) here.) And so the last time we look at the range containing an average circle is when the gap between guaranteed and best is O(n), which happens when there are O(n) matrix points between the inner and outer circle, which turns out to happen when the radius differs by O(1). Which, given that the radius is O(n), means that the radius squared differs by O(n).

Therefore we wind up with a heap with about O(n) things on it before we narrow in on the actual answer. And that goes pretty fast. So we should (waving hands furiously) look at about O(n) candidates that we find from the heap in O(n log(n)) time, each one of which we do O(n) work to analyze, leading to an average time of O(n^2). With some probably not particularly good constants in there. And a factor of O(log(log(n))) that I'm ignoring. I've also chosen to ignore the include/exclude pattern - but I think that that generally gets set by the values in the inner circle once the two are close.

The result is that, on a random matrix, this probably works out to O(n^2). Though in a worst case scenario, it becomes O(n^3 log(n)).

Here is the code.

import heapq
def basic_exclude_dp(sums, blocks):
    # Start dp. It will be an array of,
    # (total, (boundary, (boundary, ...()...))) with the
    # current block being the position. We include even
    # blocks, exclude odd ones.
    #
    # At first we can be on block 0, sum 0, no boundaries.
    dp = [(0, ())]
    if 1 < blocks:
        # If we can have a second block, we could start excluding.
        dp.append((0, (0, ())))

    for i, s in enumerate(sums):
        # Start next_dp with basic "sum everything."
        # We do this so that we can assume next_dp[block]
        # is always there in the loop.
        next_dp = [(dp[0][0] + s, dp[0][1])]
        for block, entry in enumerate(dp):
            total, boundaries = entry
            # Are we including or excluding this block?
            if 0 == block%2:
                # Include row?
                if next_dp[block][0] < total + s:
                    next_dp[block] = (total + s, boundaries)
                # Start new block?
                if block + 1 < blocks:
                    next_dp.append((total, (i, boundaries)))
            else:
                # Exclude row?
                if next_dp[block][0] < total:
                    next_dp[block] = entry
                # Start new block?
                if block + 1 < blocks:
                    next_dp.append((total + s, (i, boundaries)))
        dp = next_dp

    best_total, best_boundaries = dp[0]
    for total, boundaries in dp:
        if best_total < total:
            best_total = total
            best_boundaries = boundaries
    return best_total, best_boundaries


def optimize_circle_exclude(matrix, blocks=5):
    """Finds the optimal exclusion pattern for a matrix.

    matrix is an nxm array of arrays.

    blocks is the number of include/exclude blocks. We
    start with an include block, so 5 blocks will include
    up to three stretches of rows and exclude the other 2.

    It returns (best_sum, radius^2, boundaries). So a return of
    (25, 104, [5, 7, 10, 12]) represents that we excluded
    all entries (i, j) where 5 <= i < 7, or 10 <= i < 12, or
    104 <= i^2 + j^2.

    It often runs in time O((n+m)^2*blocks).
    
    Note that r2 may be larger than needed.
    """
    n = len(matrix)
    m = max(len(row) for row in matrix)
    # Trivial edge case.
    if 0 == n or 0 == m:
        return (0, -1, ())
    # We will be considering a lot of "the answer is between this and that".
    # Avoid repeatedly summing things.

    # partial_sums[i][j] will be the sum of matrix[i][0]..matrix[i][j]
    partial_sums = []
    # negative_partial_sums[i][j] will be the sum of negative values
    # from matrix[i][0]..matrix[i][j]
    negative_partial_sums = []
    for row in matrix:
        # We start each row of sums with the sum of nothing,
        partial_sum = [0]
        negative_partial_sum = [0]
        for v in row:
            partial_sum.append(partial_sum[-1] + v)
            negative_partial_sum.append(negative_partial_sum[-1] + min(v, 0))
        # We are now off by 1 in indexing. Pop off the empty sum.
        partial_sum.pop(0)
        negative_partial_sum.pop(0)
        # And add it to our data structure.
        partial_sums.append(partial_sum)
        negative_partial_sums.append(negative_partial_sum)

    # The heap will contain entries of the form:
    # (-best_sum, -guaranteed_sum, r21, r22, (boundary, (boundary, ...()...)))
    #
    # Where each represents a range of possible circles as follows.
    #
    #   - best_sum is an upper limit on how good the sum can be.
    #   - guaranteed_sum is a sum we know we can get.
    #   - r21 is the radius squared of our smallest circle.
    #   - r22 is the radius squared of our largest circle.
    #   - (boundary, (boundary, ...()...)) is our include/exclude pattern.
    #
    # The negative sums turn Python's min-heap into a max-heap.
    #
    # We start our heap with a fake entry.
    heap = [(
        -1,          # Getting actual best sum is not needed.
        0,           # We can trivially guarantee this.
        -1,          # Circle excludes nothing.
        n*n + m*m,   # Circle excludes everything.
        (),          # Actual boundaries don't matter.
    )]
    while heap[0][0] < heap[0][1]:
        # Only care about search.
        _, _, r21, r22, _ = heapq.heappop(heap)
        mid = (r21 + r22) // 2
        for _r21, _r22 in [(r21, mid), (mid+1, r22)]:
            # We need to find sums excluding _r21.
            sums1 = []
            # We need to find an upper bound on sums excluding _r22.
            sums2 = []
            j1 = j2 = m-1
            for i in range(n):
                i2 = i*i
                # Figure out where both circles are.
                while -1 < j1 and _r21 < i2 + j1*j1:
                    j1 -= 1
                while -1 < j2 and _r22 < i2 + j2*j2:
                    j2 -= 1

                # Now calculate the sum excluding the first circle.
                if j1 == -1:
                    sums1.append(partial_sums[i][-1])
                else:
                    sums1.append(partial_sums[i][-1] - partial_sums[i][j1])
                
                # sums2 is an upper bound on the sum excluding the second circle.
                # This is sums1 minus the negative values in the second only.
                if j2 == -1:
                    sums2.append(sums1[-1])
                elif j1 == -1:
                    sums2.append(sums1[-1] - negative_partial_sums[i][j2])
                else:
                    sums2.append(sums1[-1] - negative_partial_sums[i][j2] + negative_partial_sums[i][j1])
            # Now we can do the dp bit.
            # This shows how to get the best we know how to achieve.
            dp1 = basic_exclude_dp(sums1, blocks)
            # This finds an upper bound.
            dp2 = basic_exclude_dp(sums2, blocks)
            heapq.heappush(heap, (
                -dp2[0],  # Our upper bound
                -dp1[0],  # Guaranteed achievable
                _r21,     # We exclude this circle.
                _r22,
                dp1[1],
            ))

    # And now our answer, mangled, is at heap[0].
    neg_total, _, r2, _, boundary_link = heap[0]

    # Undo the linked list for convenience.
    reversed_boundaries = []
    while 0 < len(boundary_link):
        reversed_boundaries.append(boundary_link[0])
        boundary_link = boundary_link[1]
    return (- neg_total, r2, list(reversed(reversed_boundaries)))