Search code examples
javaioiteratorbufferedreader

How to read from a file block by block


I need to process a large text file and because there are always several numbers of lines from which I want to get information which also depend on each other, I wanted to read the file block by block, rather than storing only specific features from some lines above.

Every block would be indicated by a unique symbol in the first line of it.

Is it possible with some kind of iterator and then check if my symbol appears at each line? i really do not have any great ideas how to handle this, so help would be very much appreciated.

Example:

a1    $    12    20    namea1
b1    x    12    15    namea1,nameb1
c1    x    13    17    namea1,namec1
d1    x    18    20    namea1,named1
a2    $    36    55    namea2
b2    x    38    40    namea2,nameb2
c2    x    46    54    namea2,namec2

As you can see all lines after the line with symbol $ refer to this line in some way, the numbers are in between the distance from line a1 and the names are always combined. I thought it might be better to read a file like this block by block rather than line by line.


Solution

  • I'm not really sure what you mean by "block-by-block", and even having said that, your text file structure seems well suited for line-by-line analysis. So based on your file structure, you could simply parse it in a basic while loop. Pseudo-code:

    While not end of file
        Read line into a String
        split this String on whatspace, "\\s+" into a String array
        Check the String held by the 2nd item in the String array, item[1] 
        Do action with line (create a certain object) based on this String
    end of file
    

    Now if one of the symbols represent a heading of some sort, and if that is what you mean by block-by-block, then all you would need to do is alter your parsing strategy, using a state-dependent handling of your object, something like SAX parsing. So, if for instance "$" indicates a new "block", then create a new block, and within the while loop, create objects to put into this block, until a new one is encountered.

    So assuming a text file looking like:

    $    12    20    namea1
    x    12    15    namea1,nameb1
    x    13    17    namea1,namec1
    x    18    20    namea1,named1
    $    36    55    namea2
    x    38    40    namea2,nameb2
    x    46    54    namea2,namec2
    

    I'm assuming that the first symbol you show is not really in the file

    And assuming you have a class called Line looking something like:

    public class Line {
        private int x;
        private int y;
        private List<String> names  = new ArrayList<>();
    
        public Line(int x, int y) {
            this.x = x;
            this.y = y;
        }
    
        public void addName(String name) {
            names.add(name);
        }
    
        @Override
        public String toString() {
            return "Line [x=" + x + ", y=" + y + ", names=" + names + "]";
        }
    
    }
    

    And a Block class,...

    public class Block {
        private String name;
        private int x;
        private int y;
        private List<Line> lines = new ArrayList<>();
    
        public Block(String name, int x, int y) {
            this.name = name;
            this.x = x;
            this.y = y;
        }
    
        public void addLine(Line line) {
            lines.add(line);
        }
    
        @Override
        public String toString() {
            return "Block [name=" + name + ", x=" + x + ", y=" + y + ", lines=" + lines + "]";
        }
    
    }
    

    You could parser it like so:

    Scanner blockScanner = new Scanner(resource);
    
    Block currentBlock = null;
    while (blockScanner.hasNextLine()) {
        String line = blockScanner.nextLine();
        String[] tokens = line.split("\\s+");
    
        // NEW_BLOCK == "$"
        if (tokens[0].equals(NEW_BLOCK)) {
            currentBlock = createBlockFromTokens(tokens);
            blocks.add(currentBlock);
        } else if (currentBlock != null) {
            currentBlock.addLine(createLineFromTokens(tokens));
        }
    }
    

    Where the createXxxxFromTokens(tokens) creates a new line or new block from the String array


    For example, the whole thing as a single MCVE:

    import java.io.InputStream;
    import java.util.ArrayList;
    import java.util.List;
    import java.util.Scanner;
    
    public class ReadBlocks {
        private static final String RESOURCE_PATH = "blocks.txt";
        private static final String NEW_BLOCK = "$";
    
        public static void main(String[] args) {
            List<Block> blocks = new ArrayList<>();
    
            InputStream resource = ReadBlocks.class.getResourceAsStream(RESOURCE_PATH);
            Scanner blockScanner = new Scanner(resource);
    
            Block currentBlock = null;
            while (blockScanner.hasNextLine()) {
                String line = blockScanner.nextLine();
                String[] tokens = line.split("\\s+");
                if (tokens[0].equals(NEW_BLOCK)) {
                    currentBlock = createBlockFromTokens(tokens);
                    blocks.add(currentBlock);
                } else if (currentBlock != null) {
                    currentBlock.addLine(createLineFromTokens(tokens));
                }
            }
    
            if (blockScanner != null) {
                blockScanner.close();
            }
    
            for (Block block : blocks) {
                System.out.println(block);
            }
        }
    
        private static Line createLineFromTokens(String[] tokens) {
            if (tokens.length < 4) {
                // throw exception
            }
            int x = Integer.parseInt(tokens[1]);
            int y = Integer.parseInt(tokens[2]);
    
            Line line = new Line(x, y);
            String[] names = tokens[3].split(",");
            for (String name : names) {
                line.addName(name);
            }
            return line;
        }
    
        private static Block createBlockFromTokens(String[] tokens) {
            if (tokens.length < 4) {
                // throw exception
            }
            int x = Integer.parseInt(tokens[1]);
            int y = Integer.parseInt(tokens[2]);
            String name = tokens[3];
            return new Block(name, x, y);
        }
    }
    

    class Block {
        private String name;
        private int x;
        private int y;
        private List<Line> lines = new ArrayList<>();
    
        public Block(String name, int x, int y) {
            this.name = name;
            this.x = x;
            this.y = y;
        }
    
        public void addLine(Line line) {
            lines.add(line);
        }
    
        @Override
        public String toString() {
            return "Block [name=" + name + ", x=" + x + ", y=" + y + ", lines=" + lines + "]";
        }
    
    }
    

    class Line {
        private int x;
        private int y;
        private List<String> names = new ArrayList<>();
    
        public Line(int x, int y) {
            this.x = x;
            this.y = y;
        }
    
        public void addName(String name) {
            names.add(name);
        }
    
        @Override
        public String toString() {
            return "Line [x=" + x + ", y=" + y + ", names=" + names + "]";
        }
    
    }