I am working an a divide and conquer algorithm to determine if more than 1/3 elements in list are the same. For example: [1,2,3,4] No, all element are unique. [1,1,2,4,5] Yes, 2 of them are the same.
Without sorting, is there a divide and conquer strategy ? I get stucked on how to divide...
def is_valid(ids):
n = len(ids)
is_valid_recur(ids, n n-1)
def is_valid_recur(ids, l, r):
m = (l + h) // 2
return .... is_valid_recur(ids, l, m) ...is_valid_recur(ids, m+1, r):
Thanks a lot!
Here's a rough draft I experimented with for fun. It looks like the divide and conquer may reduce the number of candidate frequency checks but I'm not sure (see the last example, where only 0 is checked against the full list).
If we divide the list in three, the smallest frequency a valid candidate can have is 1/3 of each part. This narrows our list of candidates for searching in other parts. Let f(A, l, r)
represent candidates that could have a frequency of 1/3 or more in their parent group. Then:
from math import ceil
def f(A, l, r):
length = r - l + 1
if length <= 3:
candidates = A[l:r+1]
print "l, r, candidates: %s, %s, %s\n" % (l, r, candidates)
return candidates
i = 0
j = 0
third = length // 3
lg_third = int(ceil(length / float(3)))
sm_third = lg_third // 3
if length % 3 == 1:
i, j = l + third, l + 2 * third
elif length % 3 == 2:
i, j = l + third, l + 2 * third + 1
else:
i, j = l + third - 1, l + 2 * third - 1
left_candidates = f(A, l, i)
middle_candidates = f(A, i + 1, j)
right_candidates = f(A, j + 1, r)
print "length: %s, sm_third: %s, lg_third: %s" % (length, sm_third, lg_third)
print "Candidate parts: %s, %s, %s" % (left_candidates, middle_candidates, right_candidates)
left_part = A[l:i+1]
middle_part = A[i+1:j+1]
right_part = A[j+1:r+1]
candidates = []
seen = []
for e in left_candidates:
if e in seen or e in candidates:
continue
seen.append(e)
count = left_part.count(e)
if count >= lg_third:
candidates.append(e)
else:
middle_part_count = middle_part.count(e)
print "Left: counting %s in middle: %s" % (e, middle_part_count)
if middle_part_count >= sm_third:
count = count + middle_part_count
right_part_count = right_part.count(e)
print "Left: counting %s in right: %s" % (e, right_part_count)
if right_part_count >= sm_third:
count = count + right_part_count
if count >= lg_third:
candidates.append(e)
seen = []
for e in middle_candidates:
if e in seen or e in candidates:
continue
seen.append(e)
count = middle_part.count(e)
if count >= lg_third:
candidates.append(e)
else:
left_part_count = left_part.count(e)
print "Middle: counting %s in left: %s" % (e, left_part_count)
if left_part_count >= sm_third:
count = count + left_part_count
right_part_count = right_part.count(e)
print "Middle: counting %s in right: %s" % (e, right_part_count)
if right_part_count >= sm_third:
count = count + right_part_count
if count >= lg_third:
candidates.append(e)
seen = []
for e in right_candidates:
if e in seen or e in candidates:
continue
seen.append(e)
count = right_part.count(e)
if count >= lg_third:
candidates.append(e)
else:
left_part_count = left_part.count(e)
print "Right: counting %s in left: %s" % (e, left_part_count)
if left_part_count >= sm_third:
count = count + left_part_count
middle_part_count = middle_part.count(e)
print "Right: counting %s in middle: %s" % (e, middle_part_count)
if middle_part_count >= sm_third:
count = count + middle_part_count
if count >= lg_third:
candidates.append(e)
print "l, r, candidates: %s, %s, %s\n" % (l, r, candidates)
return candidates
#A = [1, 1, 2, 4, 5]
#A = [1, 2, 3, 1, 2, 3, 1, 2, 3]
#A = [1, 1, 1, 1, 1, 2, 2, 2, 2, 3]
A = [2, 2, 1, 3, 3, 1, 4, 4, 1]
#A = [x for x in range(1, 13)] + [0] * 6
print f(A, 0, len(A) - 1)