Given lists as follows:
make = ['ford', 'fiat', 'nissan', 'suzuki', 'dacia']
model = ['x', 'y', 'z']
version = ['A', 'B', 'C']
typ = ['sedan', 'coupe', 'van', 'kombi']
infos = ['steering wheel problems', 'gearbox problems', 'broken engine', 'throttle problems', None]
total.append(make)
total.append(model)
total.append(version)
total.append(typ)
total.append(infos)
I need to create list of lists of all possible combinations of these lists so I did:
combos = list(itertools.product(*total))
all_combos = [list(elem) for elem in combos]
Now I want to compare, find in a JSON object items with the same set of values as occurs in item of all_combos
and count number of these occurences. My JSON is large and looks a bit like:
data = [
{ 'make': 'dacia'
'model': 'x',
'version': 'A',
'typ': 'sedan',
'infos': 'steering wheel problems'
}, ...]
I want to get output like:
output = [
{ 'make': 'dacia'
'model': 'x',
'version': 'A',
'typ': 'sedan',
'infos': 'steering wheel problems',
'number_of_occurences_of_such_combination_of_fields_with__such_values': 75
}, ...]
How to resolve such task?
If I understand you correctly, you want to add to each dictionary in your data key number_of_occurences_of_such_combination_of_fields_with__such_values
:
from operator import itemgetter
from itertools import product
make = ["ford", "fiat", "nissan", "suzuki", "dacia"]
model = ["x", "y", "z"]
version = ["A", "B", "C"]
typ = ["sedan", "coupe", "van", "kombi"]
infos = [
"steering wheel problems",
"gearbox problems",
"broken engine",
"throttle problems",
None,
]
total = [make, model, version, typ, infos]
data = [
{
"make": "dacia",
"model": "x",
"version": "A",
"typ": "sedan",
"infos": "steering wheel problems",
},
{
"make": "dacia",
"model": "x",
"version": "A",
"typ": "sedan",
"infos": "steering wheel problems",
},
{
"make": "ford",
"model": "x",
"version": "A",
"typ": "sedan",
"infos": "steering wheel problems",
},
]
i = itemgetter("make", "model", "version", "typ", "infos")
cnt = {}
for c in itertools.product(*total):
for d in data:
if i(d) == c:
cnt.setdefault(c, []).append(d)
for k, v in cnt.items():
for d in v:
d[
"number_of_occurences_of_such_combination_of_fields_with__such_values"
] = len(v)
print(data)
Prints:
[
{
"make": "dacia",
"model": "x",
"version": "A",
"typ": "sedan",
"infos": "steering wheel problems",
"number_of_occurences_of_such_combination_of_fields_with__such_values": 2,
},
{
"make": "dacia",
"model": "x",
"version": "A",
"typ": "sedan",
"infos": "steering wheel problems",
"number_of_occurences_of_such_combination_of_fields_with__such_values": 2,
},
{
"make": "ford",
"model": "x",
"version": "A",
"typ": "sedan",
"infos": "steering wheel problems",
"number_of_occurences_of_such_combination_of_fields_with__such_values": 1,
},
]
Version 2: (without itertools.product):
from operator import itemgetter
i = itemgetter("make", "model", "version", "typ", "infos")
cnt = {}
for d in data:
c = i(d)
cnt[c] = cnt.get(c, 0) + 1
for d in data:
d[
"number_of_occurences_of_such_combination_of_fields_with__such_values"
] = cnt[i(d)]
print(data)