I have a Python code that searches for a list of strings in a dictionary of lists. It works for now; but my code traverses through each item in the dictionary and then tries to find the list that matches the sub list. If the dictionary is large, my code wouldn't be efficient. Is there a better/efficient way of doing this?
My python code:
from collections import defaultdict
import numpy as np
from bisect import bisect_left
def matchingStringsByKeys(dictionary, searchString):
return [val for key, val in dictionary.items() if searchString in key]
def getsublist(firstArr, secondArr):
# Maps elements of firstArr[]
# to their respective indices
mp = {}
print('len(firstArr): %s' % str(len(firstArr)))
# Traverse the array firstArr[]
for i in range(len(firstArr)):
mp[firstArr[i]] = i + 1
# Stores the indices of common elements
# between firstArr[] and secondArr[]
tempArr = []
# Traverse the array secondArr[]
for i in range(len(secondArr)):
returned_keys = matchingStringsByKeys(mp, secondArr[i])
if (len(returned_keys) > 0):
print('Match found for: %s' % secondArr[i])
for k in returned_keys:
tempArr.append(k)
tail = []
tempArrLen = len(tempArr)
if ( tempArrLen > 0):
# Stores lIS from tempArr[]
tail.append(tempArr[0])
for i in range(1, len(tempArr)):
if (tempArr[i] > tail[-1]):
tail.append(tempArr[i])
elif (tempArr[i] < tail[0]):
tail.insert(0, tempArr[i])
else :
it = bisect_left(tail, tempArr[i])
tail.insert(it, tempArr[i])
else:
print('Did not find a match')
return tail
#keyword_list = ['men', 'boy']
keyword_list = ['men', 'boy', 'dog']
#db_data={'male':['man', 'men'], 'child':['boy','lad', 'girl'], 'animal':['cat','dog']}
db_data={'male':['1:man', '2:men'], 'child':['3:boy','4:lad', '5:girl'], 'animal':['6:cat','7:dog, puppy']}
output = defaultdict(list)
for key, value in db_data.items():
print('\nchecking in: %s\n ' % (value))
a = np.array(value)
b = a.ravel()
result = getsublist(b, keyword_list)
print('Result: %s' % (result))
for x in result:
output[key].append(b[x-1])
print(output)
I recommend to make inverse db_data
, then the search would be easy:
inv_db = {}
for k, v in db_data.items():
for vv in v:
inv_db.setdefault(vv, []).append(k)
# inv_db is now:
{
"man": ["male"],
"men": ["male"],
"boy": ["child"],
"lad": ["child"],
"girl": ["child"],
"cat": ["animal"],
"dog": ["animal"],
}
and then:
keyword_list = ["men", "boy"]
out = {}
for k in keyword_list:
for v in inv_db.get(k, []):
out.setdefault(v, []).append(k)
print(out)
Prints:
{'male': ['men'], 'child': ['boy']}