Search code examples
javamultithreadingdata-structuresconcurrency

data access in java multithreading


I am trying a make my own webserver and in that i have implemented a key value store which is a nested hashmaps and it has versioning support in it.

Everything works fine but when i hit the server with 25000 requests the get is always returning the previous version of the particular value.

I am using locks and concurrenthashmap for synchronized access, but i am failing. Here are the classes for worker and datamanager:

package cis5550.kvs;
import javax.swing.text.html.HTMLDocument;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

public class DataManager {

    private Map<String, Map<String, Map<String, Map<Integer, byte[]>>>> data;
    private ReentrantReadWriteLock lock;

    public DataManager() {
        data = new ConcurrentHashMap<>();
        lock = new ReentrantReadWriteLock();
    }

    public synchronized String put(String table, String row, String column, byte[] value) {
        try {
            lock.writeLock().lock();
            Map<String, Map<String, Map<Integer, byte[]>>> rowMap = data.get(table);
            if (rowMap == null) {
                rowMap = new ConcurrentHashMap<>();
                data.put(table, rowMap);
            }
            Map<String, Map<Integer, byte[]>> colMap = rowMap.get(row);
            if (colMap == null) {
                colMap = new ConcurrentHashMap<>();
                rowMap.put(row, colMap);
            }
            Map<Integer, byte[]> versionMap = colMap.get(column);
            if (versionMap == null) {
                versionMap = new ConcurrentHashMap<>();
                colMap.put(column, versionMap);
            }
            int latestVersion = getLatestVersion(versionMap);
            int newVersion = latestVersion + 1;
            versionMap.put(newVersion, value);
            return String.valueOf(newVersion);
        }finally {
            lock.writeLock().unlock();
        }

    }

    private synchronized int getLatestVersion(Map<Integer, byte[]> versionMap) {
        return versionMap.keySet().stream().max(Integer::compareTo).orElse(0);
    }

    public synchronized byte[] get(String table, String row, String column, int version) {
        try {
            lock.readLock().lock();
            Map<String, Map<String, Map<Integer, byte[]>>> rowMap = data.get(table);
            if (rowMap == null) {
                return null;
            }
            Map<String, Map<Integer, byte[]>> colMap = rowMap.get(row);
            if (colMap == null) {
                return null;
            }
            Map<Integer, byte[]> versionMap = colMap.get(column);
            if (versionMap == null) {
                return null;
            }
            return versionMap.get(version);
        }finally {
            lock.readLock().unlock();
        }
    }

    public synchronized int getLatestVersion(String table, String row, String column) {
        Map<String, Map<String, Map<Integer, byte[]>>> rowMap = data.get(table);
        if (rowMap == null) {
            return 0;
        }
        Map<String, Map<Integer, byte[]>> colMap = rowMap.get(row);
        if (colMap == null) {
            return 0;
        }
        Map<Integer, byte[]> versionMap = colMap.get(column);
        if (versionMap == null || versionMap.isEmpty()) {
            return 0;
        }
        return getLatestVersion(versionMap);
    }
}
package cis5550.kvs;

import cis5550.webserver.Server;


import java.nio.charset.StandardCharsets;
import java.util.concurrent.*;

public class Worker extends cis5550.generic.Worker {

    private static final int MAX_THREADS = 1000;

    public static void main(String[] args) {
        if (args.length < 3) {
            System.out.println("Enter the required <port> <storage directory> <ip:port>");
            System.exit(1);
        }
        //passing the port as a server
        Server.port(Integer.parseInt(args[0]));
        startPingThread(args[2], args[0], args[1]); // calling start ping thread

        DataManager dataManager = new DataManager(); // data structure for storing data
        ExecutorService threadPool = Executors.newFixedThreadPool(MAX_THREADS); // thread pool for handling requests

        Server.put("/data/:T/:R/:C", (req, res) -> {
            try {
                String tableName = req.params("T");
                String rowName = req.params("R");
                String columnName = req.params("C");
                if (req.queryParams().contains("ifcolumn") && req.queryParams().contains("equals")) {
                    String ifColumnName = req.queryParams("ifcolumn");
                    String ifColumnValue = req.queryParams("equals");

                    // Check if the ifcolumn exists and has the value specified in equals
                    int latestVersion = dataManager.getLatestVersion(tableName, rowName, columnName);
                    byte[] byteData = dataManager.get(tableName, rowName, ifColumnName , latestVersion) != null ? dataManager.get(tableName, rowName, ifColumnName , latestVersion) : new byte[0];
                    String data = new String(byteData, StandardCharsets.UTF_8);
                    if (!data.equals("") && data.equals(ifColumnValue)) {
                        // If the ifcolumn exists and has the value specified in equals, execute the PUT operation
                        threadPool.execute(() -> {
                            res.header("version", dataManager.put(tableName, rowName, columnName, req.bodyAsBytes()));
                        });
                        return "OK";
                    } else {
                        // If the ifcolumn does not exist or does not have the value specified in equals, return FAIL
                        return "FAIL";
                    }
                } else {
                    // If the query parameters are not present, execute the PUT operation
                    threadPool.execute(() -> {
                        res.header("version", dataManager.put(tableName, rowName, columnName, req.bodyAsBytes()));
                    });
                    return "OK";
                }
            } catch (Exception e) {
                res.status(404, "FAIL");
                return null;
            }
        });

        Server.get("/data/:T/:R/:C", (req, res) -> {
            try {
                String tableName = req.params("T");
                String rowName = req.params("R");
                String columnName = req.params("C");
                if (req.queryParams().contains("version")) {
                    int version = Integer.parseInt(req.queryParams("version"));
                    String data = new String(dataManager.get(tableName, rowName, columnName, version), StandardCharsets.UTF_8);
                    res.header("version", req.params("version"));
                    res.body(data);
                } else {
                    int latestVersion = dataManager.getLatestVersion(tableName, rowName, columnName);
                    String data = new String(dataManager.get(tableName, rowName, columnName, latestVersion), StandardCharsets.UTF_8);
                    res.header("version", String.valueOf(latestVersion));
                    res.body(data);
                }
            } catch (Exception e) {
                res.status(404, "FAIL");
            }
            return null;
        });
    }
}

i tried locking and using concurrenthashmap Here is the test cases for this Test cases for this code


Solution

  • You need to surround all accesses to the map with an acquisition/release of the lock.

    You don't do that in the public synchronized int getLatestVersion method.

    You need to acquire the read lock at the start of that method, and release it at the end.

    You have synchronized that method (and several other methods), but that's a separate thing from the lock. You have to create a happens-before relationship between the writes and reads, and you only get that - from a ReentrantReadWriteLock - by doing writes while holding the write lock, and reads while holding the read lock.


    BTW, this logic in put:

            Map<String, Map<String, Map<Integer, byte[]>>> rowMap = data.get(table);
            if (rowMap == null) {
                rowMap = new ConcurrentHashMap<>();
                data.put(table, rowMap);
            }
            Map<String, Map<Integer, byte[]>> colMap = rowMap.get(row);
            if (colMap == null) {
                colMap = new ConcurrentHashMap<>();
                rowMap.put(row, colMap);
            }
            Map<Integer, byte[]> versionMap = colMap.get(column);
            if (versionMap == null) {
                versionMap = new ConcurrentHashMap<>();
                colMap.put(column, versionMap);
            }
    

    can be written a lot more easily as:

    Map<Integer, byte[]> versionMap =
        data.computeIfAbsent(table, k -> new ConcurrentHashMap<>())
            .computeIfAbsent(row, k -> new ConcurrentHashMap<>())
            .computeIfAbsent(column, k -> new ConcurrentHashMap<>());
    

    Similarly for the logic in get.