David David - 4 months ago 33
Java Question

Sorting characters from a text file in descending order of frequencies

I'm trying to write a code that counts the number of words and characters from a text file, then sorts any character (including space and commas) from the text file in descending order of frequencies.
I couldn't figure out how to count comma and space.

what I come up with.

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.*;

public class Marking {
public static void main(String[] args) throws IOException {
int max = 150;
String lines[] = new String[max];
FileReader f = new FileReader("C:/input.txt");
BufferedReader b = new BufferedReader(f);
String d;
int i = 0;
int numberOfWordsLine = 0;
int numberOfWords = 0;
int numberOfChars = 0;

while ((d = b.readLine()) != null) {
String[] regexStr;
int counter = 0;
lines[i] = d;
numberOfWordsLine = lines[i].split(" ").length;
regexStr = new String[numberOfWordsLine];
regexStr = lines[i].split(" ");
for (int j = 0; j < numberOfWordsLine; j++) {
counter += regexStr[j].length();

}
numberOfWords += numberOfWordsLine;
numberOfChars += counter;
i++;
}

System.out.println("Number Of Words: " + numberOfWords);
System.out.println("Number Of Characters : " + numberOfChars);

File file1 = new File("C:/input.txt");
BufferedReader in = new BufferedReader(new FileReader(file1));

int nextChar;
char ch;

int[] count = new int[1000];

while ((nextChar = in.read()) != -1) {
ch = ((char) nextChar);
if (ch >= 'a' && ch <= 'z') {
count[ch - 'a']++;
}
}

for (i = 0; i < 26; i++) {
System.out.printf("%c : %d", i + 'A', count[i]);
System.out.println("");
}

in.close();
}
}


This is the actual output:

Number Of Words: 68
Number Of Characters : 342
A : 32
B : 6
C : 8
D : 10
E : 35
F : 7
G : 9
H : 11
I : 27
J : 0
K : 1
L : 15
M : 10
N : 28
O : 24
P : 9
Q : 0
R : 16
S : 28
T : 31
U : 11
V : 4
W : 4
X : 1
Y : 6
Z : 0


I want the output to look like this:

Number Of Words: 68
Number Of Characters : 342
E : 35
A : 32
T : 31
N : 28
I : 27
O : 24
...


Thanks in advance!

Answer

You could use a Map which you then sort using a self-written comparator (I stole the code from this thread), this way you don't have to pre-define what characters to count (as you would with arrays).

This would look something like this:

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;

public class CountCharsFromFile {
    static BufferedReader b;

    public static void main(String[] args) {

        try {
            FileReader fr = new FileReader("C:\\test.txt");
            b = new BufferedReader(fr);

            Map<String, Double> count = new HashMap<String,Double>();
            ValueComparator bvc = new ValueComparator(count);
            TreeMap<String, Double> sorted_map = new TreeMap<String, Double>(bvc);

            int totalChars = 0;
            int totalWords = 0;

            String currentLine;

            while ((currentLine = b.readLine()) != null){
                for (int i = 0; i < currentLine.length(); i++) {

                    //Char count:
                    totalChars += 1;

                    //Adding all chars to the Map:
                    char currentChar = Character.toLowerCase(currentLine.charAt(i));

                    if (! count.containsKey(String.valueOf(currentChar))){
                        count.put(String.valueOf(currentChar), 1.0);
                    }else{
                        count.put(String.valueOf(currentChar), count.get(String.valueOf(currentChar)) + 1);
                    }

                }

                //Counting words:

                String[] currentLineSplit= currentLine.split("\\s+");

                for (String string : currentLineSplit) {
                    totalWords += 1;
                }

            }

            sorted_map.putAll(count);

            //Output:
            System.out.println("Words: " + totalWords);
            System.out.println("Chars: " + totalChars);
            System.out.println(sorted_map.toString());


        } catch (FileNotFoundException e) {
            System.err.println("Error, file not found!");
            e.printStackTrace();
        } catch (IOException e) {
            System.err.println("Error reading file!");
            e.printStackTrace();
        }finally{
            try {
                b.close();
            } catch (IOException e) {
                System.err.println("Couldn't close the BufferedReader!");
                e.printStackTrace();
            }

        }

    }
}




//comparator class:

class ValueComparator implements Comparator<String> {
    Map<String, Double> base;

    public ValueComparator(Map<String, Double> base) {
        this.base = base;
    }

    // Note: this comparator imposes orderings that are inconsistent with
    // equals.
    public int compare(String a, String b) {
        if (base.get(a) >= base.get(b)) {
            return -1;
        } else {
            return 1;
        } // returning 0 would merge keys
    }
}

Output looks like this:

Words: 9
Chars: 59
{ =16.0, h=7.0, i=5.0, r=4.0, c=4.0, �=3.0, s=3.0, o=3.0, l=3.0, f=3.0, ,=2.0, w=1.0, u=1.0, n=1.0, m=1.0, b=1.0, a=1.0}

The output of "sorted_map.toString()" is not really nice, so I wrote a quick output method:

static void output(TreeMap<String, Double> sm) {

        String map = sm.toString();

        if (map.length() > 2) { //If the map is empty it looks like this: {}

            map = map.substring(1, map.length() - 1); //cutting the leading and closing { }

            String[] charCount = map.split(", "); //Splitting

            //And then formatting:
            for (String string : charCount) {
                if (string.charAt(0) == ' ') {

                    string = string.substring(1, string.length() - 2);
                    string = " " + string.substring(0, 1) + " " + string.substring(1, string.length());
                    System.out.println("SPACE" + string);

                } else {

                    string = string.substring(0, string.length() - 2);
                    string = string.substring(0, 1) + " " + string.substring(1, 2) + " "
                            + string.substring(2, string.length());
                    System.out.println(string);
                }
            }

        }

    }

Which you call like so:

    System.out.println("Words: " + totalWords);
    System.out.println("Chars: " + totalChars);
    System.out.println();
    //System.out.println(sorted_map.toString()); <--- old
    output(sorted_map);

And the Output looks like this:

Words: 9
Chars: 60

SPACE = 8
R = 6
T = 5
E = 5
A = 5
N = 3
U = 2
O = 2
M = 2
L = 2
I = 2
H = 2
. = 1
Z = 1
Y = 1
X = 1
W = 1
V = 1
S = 1
Q = 1
P = 1
K = 1
J = 1
G = 1
F = 1
D = 1
C = 1
B = 1

And there you go, it got a little bit messy (the comparator breaks the "TreeMap.get" method so I had to build a workaround using substrings) but I hope that this will somewhat help you :)

Comments