Search code examples
javacsvmultimap

How to convert csv to multiMap in java?


I'm dealing with some data like the following, the first column is the trade id, the second column is the simulation id(duplicate a lot), the third column is some stupid date also quite duplicated, the forth one is the present value of a trade, mostly it is just 0, but any other value should be quite unique.

41120634|1554|20150203|-509057.56
40998001|1554|20150203|0
40960705|1554|20150203|0
40998049|1554|20150203|0
41038826|1554|20150203|0
41081136|1554|20150203|-7198152.23
41120653|1554|20150203|-319.436349
41081091|1554|20150203|-4.28520907E+009

I've decided to use a data structure as follows:

Map<Integer,Map<Integer,List<Map<Integer,Float>>>

Then my csv will be saved like:

 {20150203:{1554:[{41120634:-509057.56,41120634:0,...}]}}

My question is how to convert such a csv file to my intended data structure efficiently?


Solution

  • Easy to implement would be a structure like Map<K1, Map<K2, Map<K3, V>>>. This format is called NestedMap, in this case a NestedMap3, three keys and one value.

    It is very easy to implement using inheritance since a NestedMap3 internally uses a Map<K1, NestedMap2<K2, K3, V>> and the NestedMap2 uses a Map<K1, Map<K2, V>>.

    Next you should think about where to use a Map and where to use a multiple container like Pair<A, B>, Triple<A, B, C>, etc.
    If your values change frequently, use a container like Pair. If your values are quite often the same, use a Map. Based on this you may mix some values, for example a Map<K, Triple<A, B, C>> might be good if the later values change frequently.

    In your provided scenario the second and third value are often the same. So I suggest to use a NestedMap3<Integer, Integer, Integer, Float> in your case.

    First the code to setup the data structure, I'll assume your input are lines given as String stored in inputLines:

    NestedMap3<Integer, Integer, Integer, Float> map = new NestedMap3<>();
    for (String line : inputLines) {
        String[] values = inputLines.split("|");
        map.put(toInt(values[0]), toInt(values[1]), toInt(values[2]), toFloat(values[3]));
    }
    

    Of course we also need to implement toInt and toFloat:

    public Integer toInt(final String value) {
        return Integer.parseInt(value);
    }
    
    public Float toFloat(final String value) {
        return Float.parseFloat(value);
    }
    

    And finally the implementation of NestedMap3 and NestedMap2:

    public class NestedMap3<K1, K2, K3, V> {
    
        private final Map<K1, NestedMap2<K2, K3, V>> mK1ToK2ToK3V = 
            new HashMap<K1, NestedMap2<K2, K3, V>>();
    
        public V put(K1 key1, K2 key2, K3 key3, V value) {
                    NestedMap2<K2, K3, V> k2tok3toV = mK1ToK2ToK3V.get(key1);
                if (k2tok3toV == null) {
                    k2tok3toV = new NestedMap2<>();
                    mK1ToK2ToK3V.put(key1, k2tok3toV);
                }
            return k2tok3toV.put(key2, key3, value);
            }
    
            public V get(K1 key1, K2 key2, K3 key3) {
                    final NestedMap2<K2, K3, V> k2tok3toV = mK1ToK2ToK3V.get(key1);
                if (k2tok3toV == null) {
                    return null;
                } else {
                    return k2tok3toV.get(key2, key3);
                }
        }
    
        public Map<K3, V> get(K1 key1, K2 key2) {
            final NestedMap2<K2, K3, V> k2toV = mK1ToK2ToK3V.get(key1);
                if (k2toV == null) {
                    return null;
                } else {
                    return k2toV.get(key2);
                }
        }
    
        public NestedMap2<K2, K3, V> get(K1 key1) {
            return mK1ToK2ToK3V.get(key1);
        }
    
        public Set<K1> keySet() {
            return mK1ToK2ToK3V.keySet();
        }
    
        public void clear() {
            mK1ToK2ToK3V.clear();
        }
    }
    
    public class NestedMap2<K1, K2, V> {
    
        private final Map<K1, Map<K2, V>> mK1ToK2ToV = new HashMap<K1, Map<K2, V>>();
    
        public V put(K1 key1, K2 key2, V value) {
        Map<K2, V> k2toV = mK1ToK2ToV.get(key1);
        if (k2toV == null) {
            k2toV = new HashMap<>();
            mK1ToK2ToV.put(key1, k2toV);
        }
        return k2toV.put(key2, value);
        }
    
        public V get(K1 key1, K2 key2) {
        final Map<K2, V> k2toV = mK1ToK2ToV.get(key1);
        if (k2toV == null) {
            return null;
        } else {
            return k2toV.get(key2);
        }
        }
    
        public Map<K2,V> get(K1 key1) {
        return mK1ToK2ToV.get(key1);
        }
    
        public Set<K1> keySet() {
        return mK1ToK2ToV.keySet();
        }
    
        public Iterable<Pair<K1,K2>> keys2() {
        return new Iterable<Pair<K1,K2>>() {
    
            @Override
            public Iterator<Pair<K1, K2>> iterator() {
                return new Iterator<Pair<K1,K2>>() {
                    private Iterator<Entry<K1, Map<K2, V>>> mIterator1;
                    private Entry<K1, Map<K2, V>> mIterator1Object;
                    private Iterator<K2> mIterator2;
    
                    {
                        mIterator1 = mK1ToK2ToV.entrySet().iterator();
                        if (mIterator1.hasNext()) {
                            mIterator1Object = mIterator1.next();
                            mIterator2 = mIterator1Object.getValue().keySet().iterator();
                        }
                    }
    
                    @Override
                    public boolean hasNext() {
                        if (mIterator1Object == null) {
                            return false;
                        } else {
                            return mIterator2.hasNext();
                        }
                    }
    
                    @Override
                    public Pair<K1, K2> next() {
                        if (mIterator1Object == null) {
                            throw new NoSuchElementException();
                        } else {
                            if (!mIterator2.hasNext()) {
                                if (!mIterator1.hasNext()) {
                                    throw new NoSuchElementException();
                                } else {
                                    mIterator1Object = mIterator1.next();
                                    assert mIterator1Object.getValue().size() > 0 : "must contain at least one value";
                                    mIterator2 = mIterator1Object.getValue().keySet().iterator();
                                }
                            }
                            return new Pair<K1, K2>(mIterator1Object.getKey(), mIterator2.next());
                        }
                    }
                };
            }
        };
    
        }
    
        public Iterable<Triple<K1,K2,V>> entrySet() {
        final ArrayList<Triple<K1,K2,V>> result = new ArrayList<Triple<K1,K2,V>>();
        for (final Entry<K1, Map<K2, V>> entryOuter  : mK1ToK2ToV.entrySet()) {
                    for (final Entry<K2, V> entryInner : entryOuter.getValue().entrySet()) {
                        result.add(new Triple<>(entryOuter.getKey(), entryInner.getKey(), entryInner.getValue()));
                    }
        }
        return result;
        }
    
        public void addAll(NestedMap2<K1, K2, V> nestedMap) {
        for (final Triple<K1, K2, V> triple : nestedMap.entrySet()) {
            this.put(triple.getFirst(), triple.getSecond(), triple.getThird());
        }
        }
    
        public Map<K2, V> remove(K1 k1) {
        return mK1ToK2ToV.remove(k1);
        }
    
        public V remove(K1 k1, K2 k2) {
            final Map<K2, V> k2ToV = mK1ToK2ToV.get(k1);
        if (k2ToV == null) {
                    return null;
        } else {
                    return k2ToV.remove(k2);
        }
        }
    
        @Override
        public String toString() {
                return mK1ToK2ToV.toString();
        }
    
        public void clear() {
            mK1ToK2ToV.clear();
        }
    }