I'm working on a Python function that is supposed to return the number with the highest frequency in an array. In cases where two numbers or more have the same frequency, it should return the larger number.
However, I'm facing an issue with my current implementation.
def highest_rank(arr):
count_num = {}
for i in arr:
if i not in count_num:
count_num[i] = 0
else:
count_num[i] = arr.count(i)
return max(count_num, key=count_num.get)
In the this example [9, 48, 1, 8, 44, 45, 32]
, I expect the function to return 48
but it return 9
This is a very fast solution that finds the correct result (as tested with a few different inputs)
from collections import defaultdict
def highest_rank(arr):
count = defaultdict(int)
highest_rank = 0
for num in arr:
cnt = count[num]+1
count[num]=cnt
highest_rank_cnt = count[highest_rank]
if cnt > highest_rank_cnt or (cnt == highest_rank_cnt and num > highest_rank):
highest_rank = num
return highest_rank
print(highest_rank([9, 48, 1, 8, 44, 45, 32])) # Prints 48 as expected
A quick comparison between my method and your solution (applied the fix from accepted solution):
import numpy as np
from collections import defaultdict
def highest_rank(arr):
count = defaultdict(int)
highest_rank = 0
highest_rank_cnt = 0
for num in arr:
cnt = count[num]+1
count[num]=cnt
if cnt > highest_rank_cnt or (cnt == highest_rank_cnt and num > highest_rank):
highest_rank = num
highest_rank_cnt = cnt
return highest_rank
def highest_rank_slow(arr):
count_num = {}
for i in arr:
if i not in count_num:
count_num[i] = 0
else:
count_num[i] = arr.count(i)
return max(count_num,key=lambda x:(count_num.get(x),x))
nums = list(np.random.randint(0,1000,10_000))
%timeit highest_rank(nums) # 2.97 ms ± 393 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
%timeit highest_rank_slow(nums) # 1.56 s ± 52.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
In fact, my approach can do this for a million numbers in roughy 230ms which is still faster than your solution for 10K numbers since .count()
is REPEATEDLY looping through the list instead of counting in one go.
Edit:
If you don't want to use the built-in defaultdict
, you can simply use your if num not in count
trick:
def highest_rank_no_defaultdict(arr):
count = {}
highest_rank = 0
highest_rank_cnt = 0
for num in arr:
if num not in count:
cnt=1
else:
cnt = count[num]+1
count[num]=cnt
if cnt > highest_rank_cnt or (cnt == highest_rank_cnt and num > highest_rank):
highest_rank = num
highest_rank_cnt = cnt
return highest_rank
this is both more verbose and a bit slower than using defaultdict
so unless you absolutely need to, stick with defaultdict
.