java algorithm design-patterns collections java-8

How to sort the map as per the word occurrence in a string array

I am writing a java program logic for printing wrods with number of occurance and line number. Below is the code

package test;
import java.util.HashMap;
 import java.util.Scanner;
 import java.util.Set;

 public class Countcharacters {

/**
 * @param args
 */
static HashMap<String, Integer> countcharact=new HashMap<>();
static HashMap<String, String> linenumbertrack=new HashMap<>();
static int count=1;
static void countwords(String line){
    //System.out.println(line);
    String[] input=line.split("\\s");
    int j=0;
    String linenumber="";
    for(int i=0;i<input.length;i++){
        //System.out.println(input[i]);
        if(countcharact.containsKey(input[i])==true){
            j=countcharact.get(input[i]);
            linenumber=linenumbertrack.get(input[i]);
            countcharact.put(input[i],j+1);
            linenumbertrack.put(input[i],linenumber+", "+count);

        }
        else{
            countcharact.put(input[i], 1);
            linenumbertrack.put(input[i],count+"" );
        }

    }
    count++;


}
public static void main(String[] args) {
    // TODO Auto-generated method stub
   String inp="the quick brown fox jumped over the lazy dog's bowl.\nthe dog was angry with the fox for considering him lazy.";
   String[] line=inp.split("\n");
   for(int i=0;i<line.length;i++){
       Countcharacters.countwords(line[i]);
   }
    Set<String> s=countcharact.keySet();
    for(String c:s){
        System.out.println(c+" "+countcharact.get(c)+" "+"["+linenumbertrack.get(c)+"]");
    }

}

}

The output I am getting is

over 1 [1]
quick 1 [1]
lazy. 1 [2]
lazy 1 [1]
considering 1 [2]
jumped 1 [1]
was 1 [2]
for 1 [2]
angry 1 [2]
brown 1 [1]
him 1 [2]
fox 2 [1, 2]
the 4 [1, 1, 2, 2]
with 1 [2]
bowl. 1 [1]
dog's 1 [1]
dog 1 [2]

But I am having two issue.

1st: if you see "the" occurrence is 4 but number of line is [1,1,2,2] instead it should be [1,2] only.

2nd: I want to sort them. It should be sorted first by descending order of cardinality and then alphabetical order.

Like this:

the 4 [1,2]
fox 2 [1,2]
lazy 2 [1,2]
angry 1 [1]
bowl 1 [1]
.
.

Solution

It is always best to abstract away logical units of data within classes. In your problem you have two clear units:

Words occurrence (word string and line numbers).

     class WordOccurrence {
         private final String word;
         private final int lineNumber;

         ...
     }

Statistics about words (number occurrences, set of line numbers where they occur, etc).

     class WordStats {
         private List<Word> occurrences;

         public String getWord() { ... }
         public int getCount() { ... }
         public Set<Integer> getLines() { ... }
     }

With these classes you can first break down your text into a Map of List of WordOccurrence; so for each different word, the Map will contain an entry with:

Key equal to the actual String word
Value equal to a List containing WordOccurrence objects for each of its occurrences in the text

You can achieve this with something like:

    public static Map<String, List<WordOccurrence>> createOccurrencesMap(String text) {
        text = text.replaceAll("\\.", " ");
//      text = text.replaceAll("'s", ""); // dog's != dog ???
        Map<String, List<WordOccurrence>> result = new HashMap<>();
        String[] lines = text.split("\n");
        for (int i = 0; i < lines.length; i++)
            for (String word : lines[i].split("\\s+")) 
                result.computeIfAbsent(word, w -> new ArrayList<>())
                            .add(new WordOccurrence(word, i + 1));
        
        return result;
    }

Then you can easily transform this map into a List of WordStats (sorted using a flexible parametrizable criteria) with something like this:

    List<WordStats> createStats(String text, Comparator<WordStats> sortingCriteria) {
        return createOccurrencesMap(text).values().stream()
                .map(WordStats::new)
                .sorted(sortingCriteria)
                .collect(Collectors.toList());
    }

And that's it! Once you break down your problem into smaller intuitively logically grouped components (classes, methods, data structure, etc), the only thing left is to wire them all up.

The following code is a complete working demo of this solution for you to play with:

import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

public class CountWords {

    public static void main(String[] args) {
        String text = "the quick brown fox jumped over the lazy dog's bowl.\nthe dog was angry with the fox for considering him lazy.";
        Comparator<WordStats> sortingCriteria = Comparator
                .comparing(WordStats::getCount).reversed()
                .thenComparing(WordStats::getWord);

        createStats(text, sortingCriteria).forEach(System.out::println);
    }

    public static List<WordStats> createStats(String text, Comparator<WordStats> sortingCriteria) {
        return createOccurrencesMap(text).values().stream()
                .map(WordStats::new)
                .sorted(sortingCriteria)
                .collect(Collectors.toList());
    }
    
    public static Map<String, List<WordOccurrence>> createOccurrencesMap(String text) {
        text = text.replaceAll("\\.", " ");
//      text = text.replaceAll("'s", ""); // dog's != dog ???
        Map<String, List<WordOccurrence>> result = new HashMap<>();
        String[] lines = text.split("\n");
        for (int i = 0; i < lines.length; i++)
            for (String word : lines[i].split("\\s+")) 
                result.computeIfAbsent(word, w -> new ArrayList<>())
                            .add(new WordOccurrence(word, i + 1));
        
        return result;
    }
    
    static class WordStats {
        private List<WordOccurrence> occurrences;

        public WordStats(List<WordOccurrence> words) {
            this.occurrences = words;
        }
        
        public String getWord() {
            return occurrences.get(0).getWord();
        }

        public int getCount() {
            return occurrences.size();
        }
        
        public Set<Integer> getLines() {
            return occurrences.stream().map(WordOccurrence::getLineNumber).collect(Collectors.toSet());
        }
        
        public String toString() {
            return String.format("%s %d %s", getWord(), getCount(), getLines());
        }
    }
    
    static class WordOccurrence {
        private final String word;
        private final int lineNumber;

        public WordOccurrence(String word, int lineNumber) {
            this.word = word;
            this.lineNumber = lineNumber;
        }

        public String getWord() {
            return word;
        }

        public int getLineNumber() {
            return lineNumber;
        }
        
        public String toString() {
            return word + "@" + lineNumber;
        }
    }
}

Complete code on GitHub

Hope this helps.