If I have the following list of list of dicts, is there a way to extract only the int and float items?
res = [
[
{
"tasks_id": "436f4cd0-9686-4b01-8a82-a24885999183",
"qos_level": "default",
"cpu_cores": 1,
"memory_mb": 128.899,
"storage_mb": 0,
"cpu_flops": 1917778687,
"gpu_flops": 0,
"gpu_memory_mb": 0,
"egress_network_bandwidth_gbps": 2334.232,
"ingress_network_bandwidth_gbps": 2334.232,
"create_time": "2025-01-08T12:08:55.828355"
},
{
"tasks_id": "ec9872cb-3b6f-4222-bb31-6aacd2a46ee8",
"qos_level": "default",
"cpu_cores": 1,
"memory_mb": 64.34,
"storage_mb": 0,
"cpu_flops": 12032,
"gpu_flops": 0,
"gpu_memory_mb": 0,
"egress_network_bandwidth_gbps": 1300.3432,
"ingress_network_bandwidth_gbps": 1300.3432,
"create_time": "2025-01-24T13:23:12.818722"
},
{
"tasks_id": "4ba1d839-1550-4c2e-bea3-3f1e04871d8e",
"qos_level": "default",
"cpu_cores": 1,
"memory_mb": 128,
"storage_mb": 0,
"cpu_flops": 19177,
"gpu_flops": 0,
"gpu_memory_mb": 0,
"egress_network_bandwidth_gbps": 2334,
"ingress_network_bandwidth_gbps": 2334,
"create_time": "2025-01-08T12:08:55.828355"
}
]
]
so that I get something like this:
[
{
"cpu_cores": 1,
"memory_mb": 128.899,
"storage_mb": 0,
"cpu_flops": 1917778687,
"gpu_flops": 0,
"gpu_memory_mb": 0,
"egress_network_bandwidth_gbps": 2334.232,
"ingress_network_bandwidth_gbps": 2334.232
},
{
"cpu_cores": 1,
"memory_mb": 64.34,
"storage_mb": 0,
"cpu_flops": 12032,
"gpu_flops": 0,
"gpu_memory_mb": 0,
"egress_network_bandwidth_gbps": 1300.3432,
"ingress_network_bandwidth_gbps": 1300.3432
},
{
"cpu_cores": 1,
"memory_mb": 128,
"storage_mb": 0,
"cpu_flops": 19177,
"gpu_flops": 0,
"gpu_memory_mb": 0,
"egress_network_bandwidth_gbps": 2334,
"ingress_network_bandwidth_gbps": 2334
}
]
I can do this below by specifying the specific key names, but I was looking for a more dynamic way to extract key/values that were based on a specific data type incase a new numeric key/value was added:
data = [
{
"cpu_cores": d["cpu_cores"],
"cpu_flops": d["cpu_flops"],
"memory_mb": d["memory_mb"],
"storage_mb": d["storage_mb"],
"network_bandwidth_gbps": d["egress_network_bandwidth_gbps"]
+ d["ingress_network_bandwidth_gbps"],
}
for d in res
for d in d
]
A more generic way to do this specifically:
data = [
[
{
k: v for k, v in d.items() if isinstance(v, (int, float))
} for d in ds
] for ds in res
]
If you want something even more generic, where you can just give it a compound data structure of lists and dictionaries and you want to only retain values in dictionaries that match:
from typing import TypeVar
T = TypeVar('T')
def only_numeric(x: T) -> T:
if isinstance(x, list):
return [only_numeric(v) for v in x]
elif isinstance(x, dict):
return {k: v for k, v in x.items() if isinstance(v, (int, float))}
else:
return x
data = only_numeric(res)
print(data)
Note that you can go even further. For example, this solution doesn't account for nested dictionaries, which you recursively process (it only processes dictionaries in lists), and it doesn't do anything with other non-list, non-dict values in a list, where you may want to filter those as well.
Edit: I'm noticing that you wanted to flatten the list, which would change the first solution to:
data = [
{
k: v for k, v in d.items() if isinstance(v, (int, float))
}
for ds in res
for d in ds
]
And the second to something like:
from typing import TypeVar, Generator
T = TypeVar('T')
def only_numeric_dict(x: T) -> Generator[T, None, None]:
if isinstance(x, list):
for y in x:
yield from only_numeric_dict(y)
elif isinstance(x, dict):
yield {k: v for k, v in x.items() if isinstance(v, (int, float))}
else:
pass
data = list(only_numeric_dict(res))