Search code examples
javaarrayscsvarraylistsequential

How to get specific data from a CSV file


I have a very big CSV file, I have managed to put all this into an ArrayList using Scanner

    Path filepath = Paths.get("./data.csv");

    try{
      Scanner InputStream = new Scanner(filepath);
      while (InputStream.hasNext()){

        wholefile.add(String.valueOf(InputStream.next()));
      } InputStream.close();

    System.out.println(wholefile);

    } catch (IOException e) {
      e.printStackTrace();
    }

  }

and my array looks like this :

wholefile = [id,property, address,first_name,last_name,email,Owner, contact, address,Price,Date, sold, 1,94032, Mockingbird, Alley,Brander,Verillo,bverillo0@sogou.com,,435587.57,, 2,293, Haas, Lane,Maxy,Reynalds...........]

Here is a screenshot of the csv file in excel https://plus.google.com/photos/photo/115135191238195349859/6559552907258825106?authkey=CIu-hovf5pj29gE

There are some things that I would like to do with this data but I am confused what methods I need to write:

  1. Get a property record by ID
  2. Get a list of n number of top priced properties
  3. Total sales for a month.

any help or guidance would be much appreciated, I'm not sure if I'm goign about this the right way

https://plus.google.com/photos/photo/115135191238195349859/6559637333893665186


Solution

  • I had to roll out a custom CSV parser for some proof of concept we were trying to do and I think you could re purpose it here:

    CSVReader.java

    public class CSVReader implements Iterable<CSVRow> {
    
        private List<String> _data;
        private int _itPos = 0;
        private int _skip = 0;
        private FileIterator _it;
        private boolean _hasTrailingComma = false;
    
        public CSVReader(Path path, boolean hasTrailingComma) throws IOException {
            this(Files.readAllLines(path), hasTrailingComma);
        }
    
        public CSVReader(Path path) throws IOException {
            this(path, false);
        }
    
        public CSVReader(List<String> data, boolean hasTrailingComma) {
            _data = data;
            _it = new FileIterator();
            _hasTrailingComma = hasTrailingComma;
        }
    
        public CSVReader(List<String> data) {
            this(data, false);
        }
    
        public CSVRow getHeaders() {
            return new CSVRow(_data.get(0), _hasTrailingComma);
        }
    
        public void skip(int rows) {
            _skip = rows;
        }
    
        @Override
        public Iterator<CSVRow> iterator() {
            _itPos = _skip;
            return _it;
        }
    
        private class FileIterator implements Iterator<CSVRow> {
    
            @Override
            public boolean hasNext() {
                return _itPos < _data.size();
            }
    
            @Override
            public CSVRow next() {
                if (_itPos == _data.size()) {
                    throw new NoSuchElementException();
                }
                return new CSVRow(_data.get(_itPos++), _hasTrailingComma);
            }
    
        }
    }
    

    CSVRow.java

    public class CSVRow implements Iterable<String> {
    
        private String[] _data;
        private int _itPos = 0;
        private int _skip = 0;
        private RowIterator _it = null;
        private int _actualLength = 0;
    
        public CSVRow(String row, boolean trailingComma) {
            // Minor hack
            // in case the data doesn't end in commas
            // we check for the last character and add
            // a comma. Ideally, the input file should be fixed;
            if(trailingComma && !row.endsWith(",")) {
                row += ",";
            }
            _data = row.split("\\s*,\\s*", -1);
            _actualLength = trailingComma ? _data.length - 1 : _data.length;
            _it = new RowIterator();
        }
    
        public CSVRow(String row) {
            this(row, false);
        }
    
        public void skip(int cells) {
            _skip = cells;
        }
    
        @Override
        public Iterator<String> iterator() {
            _itPos = _skip;
            return _it;
        }
    
        public String[] toArray() {
            return Arrays.copyOf(_data, _actualLength);
        }
    
        private class RowIterator implements Iterator<String> {
    
            @Override
            public boolean hasNext() {
                return _itPos < _actualLength;
            }
    
            @Override
            public String next() {
                if (_itPos == _actualLength) {
                    throw new NoSuchElementException();
                }
                return _data[_itPos++];
            }
    
        }
    }
    

    Usage

    public static void main(String[] args) {
        Path filepath = Paths.get("./data.csv");
        CSVReader reader = new CSVReader(filepath);
        for (CSVRow row : reader) {
            for (String str : row) {
                    System.out.printf("%s ", str);
            }
            System.out.println();
        }
    }
    

    Now it will be useful to model each row as an object so that you can do stuff with it in Java. You can define a class Property that models each row

    public class Property {
    
        private int id;
        private String address;
        private String firstName;
        private String lastName;
        private String email;
        private String ownerContactAddress;
        private BigDecimal price;
        private java.sql.Date dateSold;
    
        public Property() {
        } 
    
        // Setters and getters
        public long getId() {
            return this.id;
        }
        public void setId(String id) {
            this.id = Long.parseLong(id);
        }
        public String getAddress() {
            return this.address;
        }
        public void setAddress(String address) {
            this.address = address;
        }
        // TODO: setter/getters for firstName, lastName, email, ownerContactAddress
    
        public BigDecimal getPrice() {
            return this.price;
        }
        public void setPrice(String price, Locale locale) throws ParseException {
            NumberFormat format = NumberFormat.getNumberInstance(locale);
            if (format instanceof DecimalFormat) {
                ((DecimalFormat) format).setParseBigDecimal(true);
            }
            this.price = (BigDecimal) format.parse(amount.replaceAll("[^\\d.,]",""));
        }
        public java.sql.Date getDateSold() {
            return this.dateSold;
        }
        public void setDateSold(String date, String format) throws ParseException {
            SimpleDateFormat sdf = new SimpleDateFormat(format);
            this.dateSold = new Date(sdf.parse(date).getTime());
        }
    }
    

    Bringing everything together (Not tested)

    public static void main(String[] args) {
    
        // Collection to store properties
        // You could also write a class to wrap this 
        // map along with the methods you need to implement
        // Say PropertyTable {
        //        private Map<Long, Property> properties ...
        //        Property getPropertyById(long id);
        //        getHighestPriced() // sort the map by price
        // }
        Map<Long, Property> properties = new HashMap<>();
    
        Path filepath = Paths.get("./data.csv");
        CSVReader reader = new CSVReader(filepath);
        for (CSVRow row : reader) {
            Iterator<String> it = row.iterator();
            Property p = new Property();
            p.setId(it.next());
            p.setAddress(it.next());
            // ... set the remaining properties
            p.setPrice(it.next(), new Locale("en", "GB"));
            p.seDateSold(it.next(), "MM/dd/yyyy");
            properties.put(p.getId(), p);
        }
        // At this point, you should have all the properties read
    
        // let's try to get property with id 5
        Property prop = properties.get(5L);
    }
    

    I hope this helps.