Search code examples
javaoutputstream

navigate directory structure and name each processed file uniquely


I have a directory structure of the form start/one/two/three/*files*

My goal is to construct this program such that it can navigate my directory structure autonomously, grab each file then process it, which it seems to be doing correctly.

BUT I also need the output to be written to a new file with a unique name, i.e. the file named 00001.txt should be processed and the results should be written to 00001_output.txt

I thought I implemented that correctly but, apparently not.

Where have I gone astray?

    String dirStart = "/home/data/";

    Path root = Paths.get(dirStart);

    Files.walkFileTree(root.toAbsolutePath().normalize(), new SimpleFileVisitor<Path>() 
    {
        @Override
        public FileVisitResult visitFile(Path file, java.nio.file.attribute.BasicFileAttributes attrs) throws IOException 
        {


            try(InputStream inputStream = Files.newInputStream(file);
                    BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream)))
            {

                // CHANGE OUTPUT TO NEW FILE
                String print_file = file.getFileName().toString();
                String fileNameWithOutExt = FilenameUtils.removeExtension(print_file);
                System.out.println(fileNameWithOutExt);

                PrintStream out = new PrintStream(new FileOutputStream( fileNameWithOutExt + "_output.txt" ) );
                System.setOut(out);


                // SOUP PART
                StringBuilder sb = new StringBuilder();
                String line = bufferedReader.readLine();

                while (line != null) 
                {

                    sb.append(line);
                    sb.append(System.lineSeparator());
                    line = bufferedReader.readLine();
                }
                String everything = sb.toString();


                Document doc = Jsoup.parse(everything);
                String link = doc.select("block.full_text").text();
                System.out.println(link);


            } 
            catch (IOException e) 
            {
                e.printStackTrace();
            }

            return FileVisitResult.CONTINUE;
        }
    }); 

This is also my question, it might give some additional insight on what I'm actually trying to do.


Solution

  • System.setOut seems like a bad idea.

    Below is some untested code which might work.

    import java.io.FileOutputStream;
    import java.io.IOException;
    import java.io.PrintStream;
    import java.nio.charset.StandardCharsets;
    import java.nio.file.FileVisitResult;
    import java.nio.file.Files;
    import java.nio.file.Path;
    import java.nio.file.Paths;
    import java.nio.file.SimpleFileVisitor;
    import org.apache.commons.io.FilenameUtils;
    import org.jsoup.Jsoup;
    import org.jsoup.nodes.Document;
    
    public class App {
    
        public static void main(String[] args) throws IOException {
            String dirStart = "/home/data/";
    
            Path root = Paths.get(dirStart);
    
            Files.walkFileTree(root.toAbsolutePath().normalize(), new SimpleFileVisitor<Path>() {
                @Override
                public FileVisitResult visitFile(Path file, java.nio.file.attribute.BasicFileAttributes attrs) throws IOException {
    
                    // CHANGE OUTPUT TO NEW FILE
                    String print_file = file.getFileName().toString();
                    String fileNameWithOutExt = FilenameUtils.removeExtension(print_file);
                    System.out.println(fileNameWithOutExt);
    
                    // SOUP PART
                    String everything = new String(Files.readAllBytes(file), StandardCharsets.UTF_8);
    
                    Document doc = Jsoup.parse(everything);
                    String link = doc.select("block.full_text").text();
    
                    try (PrintStream out = new PrintStream(new FileOutputStream(fileNameWithOutExt + "_output.txt"))) {
                        out.println(link);
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
    
                    return FileVisitResult.CONTINUE;
                }
            });
        }
    
    }