Is it possible to optimize this Python code?

Given a list of integers and a single sum value, return the first two values (parse from the left please) in order of appearance that add up to form the sum.

Example:

sum_pairs([11, 3, 7, 5],         10)
#              ^--^      3 + 7 = 10
== [3, 7]

sum_pairs([4, 3, 2, 3, 4],         6)
#          ^-----^         4 + 2 = 6, indices: 0, 2 *
#             ^-----^      3 + 3 = 6, indices: 1, 3
#                ^-----^   2 + 4 = 6, indices: 2, 4
#  * entire pair is earlier, and therefore is the correct answer
== [4, 2]

sum_pairs([0, 0, -2, 3], 2)
#  there are no pairs of values that can be added to produce 2.
== None/nil/undefined (Based on the language)

sum_pairs([10, 5, 2, 3, 7, 5],         10)
#              ^-----------^   5 + 5 = 10, indices: 1, 5
#                    ^--^      3 + 7 = 10, indices: 3, 4 *
#  * entire pair is earlier, and therefore is the correct answer
== [3, 7]

My code:

def sum_pairs(L,I):
    past = {}
    #set the first value in L as my key for dict
    past[L[0]] = 1
    idx = 1
    while idx < len(L):
        needed = I - L[idx]
        if needed in past:
            return [needed, L[idx]]
        past[L[idx]] = 0
        idx += 1
    return None

Solution

This should be faster since its O(n) rather than O(n^2) algorithm of your post

def sum_pairs(arr, s):
  " O(n) algorithm "
  # Reverse dictionary lookup for values (only keep first entry)
  # It produces a list of indices for a given value
  # Example arr = (10, 5, 5)  => d = {10: [0], 5: [1, 2]}
  d = {}
  for i, v in enumerate(arr):
    if not v in d:
      d[v] = [i]
    elif len(d[v]) == 1:
      # Only take first two values (i.e. arr = [5]*1e7
      # will only produce {5: [0, 1]}
      d[v].append(i)


  # Find all possible pairs
  # For each v in dictionary d, we can only use it in a pair if s-v is also in the dictionary
  # so we need to find elements such that both v and s-v are in d (which would be array arr)
  result = [[(x, y) for x in d.get(v, []) for y in d.get(s-v, []) if x < y] for v in d]
  flatten_result = [item for sublist in result for item in sublist]
  if flatten_result:
    # Find the earliest indexes
    min1 = min(flatten_result, key = lambda x: max(x))

    # Return values with earliest indexes
    return arr[min1[0]], arr[min1[1]], min1
  else:
    return None


print(sum_pairs([11, 3, 7, 5], 10))          # (3, 7, (1, 2))
print(sum_pairs([4, 3, 2, 3, 4], 6))         # (4, 2, (0, 2))
print(sum_pairs([0, 0, -2, 3], 2))           # None
print(sum_pairs([10, 5, 2, 3, 7, 5],  10))   # (3, 7, (3, 4))
print(sum_pairs([5, 9, 13, -3], 10))         # (13, -3, (2, 3))
print(sum_pairs([10, 5, 5], 10))

Performance Testing

import time

N = 10000000 # 10 million elements

# Test 1 -- repeating elements
arr = [5]*N
t_start = time.time()
ans = sum_pairs(arr, 10)
print(f'Processing time: {time.time() - t_start:.2f} seconds')
#>> Processing time: 11.81 seconds

from random import randint
arr = [randint(0, 20) for _ in range(N)]

t_start = time.time()
ans = sum_pairs(arr, 10)
print(f'Processing time: {time.time() - t_start:.2f} seconds')
#>> Processing time: 9.12 seconds

Result Less than 12 seconds for 10 million samples using on line IDE from https://repl.it/languages/python3