Search code examples
javaarraysranking

Finding 'n' most frequent words from a file using Java?


I want to read an file, and want to collect top n words depends on word frequency.

I have tried the following code to count every words in a string.

public static void main(String[] args) throws FileNotFoundException, IOException {
     FileReader fr = new FileReader("txtFile.txt");
     BufferedReader br = new BufferedReader(fr);
     String text = "";
     String sz = null;
     while ((sz = br.readLine()) != null) {
         text = text.concat(sz);
     }
     String[] words = text.split(" ");
     String[] uniqueLabels;
     int count = 0;
     System.out.println(text);
     uniqueLabels = getLabels(words);

     for (String l: uniqueLabels) {
         if ("".equals(l) || null == l) {
             break;
         }
         for (String s: words) {
             if (l.equals(s)) {
                 count++;
             }
         }
         System.out.println("Word :: " + l + " Count :: " + count);
         count = 0;
     }
 }

And I used the following code to collect unique lbels(words) get if from link,

private static String[] getLabels(String[] keys) {
      String[] uniqueKeys = new String[keys.length];

      uniqueKeys[0] = keys[0];
      int uniqueKeyIndex = 1;
      boolean keyAlreadyExists = false;

      for (int i = 1; i < keys.length; i++) {
          for (int j = 0; j <= uniqueKeyIndex; j++) {
              if (keys[i].equals(uniqueKeys[j])) {
                  keyAlreadyExists = true;
              }
          }

          if (!keyAlreadyExists) {
              uniqueKeys[uniqueKeyIndex] = keys[i];
              uniqueKeyIndex++;
          }
          keyAlreadyExists = false;
      }
      return uniqueKeys;
  }

And this works fine, I want to collect top 10 ranked words depend on it's frequency in file.


Solution

  • I solved it as,

    public class wordFreq {
    private static String[] w = null;
    private static int[] r = null;
    public static void main(String[] args){
        try {
            System.out.println("Enter 'n' value :: ");
            Scanner in = new Scanner(System.in);
            int n = in.nextInt();
            w = new String[n];
            r = new int[n];
            FileReader fr = new FileReader("acq.txt");
            BufferedReader br = new BufferedReader(fr);
            String text = "";
            String sz = null;
            while((sz=br.readLine())!=null){
                text = text.concat(sz);
            }
            String[] words = text.split(" ");
            String[] uniqueLabels;
            int count = 0;
            uniqueLabels = getUniqLabels(words);
            for(int j=0; j<n; j++){
                    r[j] = 0;
                }
            for(String l: uniqueLabels)
            {
                if("".equals(l) || null == l)
                {
                    break;
                }           
                for(String s : words)
                {
                    if(l.equals(s))
                    {
                        count++;
                    }               
                }
    
                for(int i=0; i<n; i++){
                    if(count>r[i]){
                        r[i] = count;
                        w[i] = l;
                        break;
                    }
                }
                count=0;
            }
            display(n);
        } catch (Exception e) {
            System.err.println("ERR "+e.getMessage());
        }
    }
    
    public static void display(int n){
        for(int k=0; k<n; k++){
            System.out.println("Label :: "+w[k]+"\tCount :: "+r[k]);
        }
    }
    
    private static String[] getUniqLabels(String[] keys)
    {
        String[] uniqueKeys = new String[keys.length];
    
        uniqueKeys[0] = keys[0];
        int uniqueKeyIndex = 1;
        boolean keyAlreadyExists = false;
    
        for(int i=1; i<keys.length ; i++)
        {
            for(int j=0; j<=uniqueKeyIndex; j++)
            {
                if(keys[i].equals(uniqueKeys[j]))
                {
                    keyAlreadyExists = true;
                }
            }           
    
            if(!keyAlreadyExists)
            {
                uniqueKeys[uniqueKeyIndex] = keys[i];
                uniqueKeyIndex++;               
            }
            keyAlreadyExists = false;
        }       
        return uniqueKeys;
    }
    
    }
    

    And the sample output is,

    Enter 'n' value :: 
    5
    Label :: computer   Count :: 30
    Label :: company    Count :: 22
    Label :: express    Count :: 20
    Label :: offer  Count :: 16
    Label :: shearson   Count :: 16