Search code examples
pythonsumkeyvaluepair

Is it possible to optimize this Python code?


Given a list of integers and a single sum value, return the first two values (parse from the left please) in order of appearance that add up to form the sum.

Example:

sum_pairs([11, 3, 7, 5],         10)
#              ^--^      3 + 7 = 10
== [3, 7]

sum_pairs([4, 3, 2, 3, 4],         6)
#          ^-----^         4 + 2 = 6, indices: 0, 2 *
#             ^-----^      3 + 3 = 6, indices: 1, 3
#                ^-----^   2 + 4 = 6, indices: 2, 4
#  * entire pair is earlier, and therefore is the correct answer
== [4, 2]

sum_pairs([0, 0, -2, 3], 2)
#  there are no pairs of values that can be added to produce 2.
== None/nil/undefined (Based on the language)

sum_pairs([10, 5, 2, 3, 7, 5],         10)
#              ^-----------^   5 + 5 = 10, indices: 1, 5
#                    ^--^      3 + 7 = 10, indices: 3, 4 *
#  * entire pair is earlier, and therefore is the correct answer
== [3, 7]

My code:

def sum_pairs(L,I):
    past = {}
    #set the first value in L as my key for dict
    past[L[0]] = 1
    idx = 1
    while idx < len(L):
        needed = I - L[idx]
        if needed in past:
            return [needed, L[idx]]
        past[L[idx]] = 0
        idx += 1
    return None

Solution

  • This should be faster since its O(n) rather than O(n^2) algorithm of your post

    def sum_pairs(arr, s):
      " O(n) algorithm "
      # Reverse dictionary lookup for values (only keep first entry)
      # It produces a list of indices for a given value
      # Example arr = (10, 5, 5)  => d = {10: [0], 5: [1, 2]}
      d = {}
      for i, v in enumerate(arr):
        if not v in d:
          d[v] = [i]
        elif len(d[v]) == 1:
          # Only take first two values (i.e. arr = [5]*1e7
          # will only produce {5: [0, 1]}
          d[v].append(i)
    
    
      # Find all possible pairs
      # For each v in dictionary d, we can only use it in a pair if s-v is also in the dictionary
      # so we need to find elements such that both v and s-v are in d (which would be array arr)
      result = [[(x, y) for x in d.get(v, []) for y in d.get(s-v, []) if x < y] for v in d]
      flatten_result = [item for sublist in result for item in sublist]
      if flatten_result:
        # Find the earliest indexes
        min1 = min(flatten_result, key = lambda x: max(x))
    
        # Return values with earliest indexes
        return arr[min1[0]], arr[min1[1]], min1
      else:
        return None
    
    
    print(sum_pairs([11, 3, 7, 5], 10))          # (3, 7, (1, 2))
    print(sum_pairs([4, 3, 2, 3, 4], 6))         # (4, 2, (0, 2))
    print(sum_pairs([0, 0, -2, 3], 2))           # None
    print(sum_pairs([10, 5, 2, 3, 7, 5],  10))   # (3, 7, (3, 4))
    print(sum_pairs([5, 9, 13, -3], 10))         # (13, -3, (2, 3))
    print(sum_pairs([10, 5, 5], 10))   
    

    Performance Testing

    import time
    
    N = 10000000 # 10 million elements
    
    # Test 1 -- repeating elements
    arr = [5]*N
    t_start = time.time()
    ans = sum_pairs(arr, 10)
    print(f'Processing time: {time.time() - t_start:.2f} seconds')
    #>> Processing time: 11.81 seconds
    
    from random import randint
    arr = [randint(0, 20) for _ in range(N)]
    
    t_start = time.time()
    ans = sum_pairs(arr, 10)
    print(f'Processing time: {time.time() - t_start:.2f} seconds')
    #>> Processing time: 9.12 seconds
    

    Result Less than 12 seconds for 10 million samples using on line IDE from https://repl.it/languages/python3