dan luo dan luo - 4 months ago 20
Java Question

running time error when trying to implent a keyword frequency counter in my parser java

I want to implement my input reading method into my main class, I want use my code to parse.

Right now I have the following running time error, how should I fix this?

Exception in thread "main" java.lang.Error: Unresolved compilation problems:
StringTokenizer cannot be resolved to a type
StringTokenizer cannot be resolved to a type
Syntax error on token "=", { expected
StringTokenizer cannot be resolved to a type
Arrays cannot be resolved
Arrays cannot be resolved
Syntax error on token "}", delete this token

at DocumentParser.<init>(DocumentParser.java:14)
at TfIdfMain.main(TfIdfMain.java:6)


Here is my main class:

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class DocumentParser {

//This variable will hold all terms of each document in an array.

String text = "Professor, engineering, data, mining, research";
StringTokenizer str = new StringTokenizer(text);
String word[] = new String[10];
String unique[] = new String[10];
String x;
int count = -1;=
while (str.hasMoreTokens()) {
count++;
x = str.nextToken();
word[count] = x;
System.out.println(count + ": " + word[count]);

}

System.out.println("---Frequency---");

// create unique words
for (int i = 0; i < 7; i++) {

if ((!Arrays.asList(unique).contains(word[i]))) {
unique[i] = word[i];
}
}

// measuring frequency
int[] measure = new int[10];

for (int a = 0; a < 7; a++) {
if (Arrays.asList(unique).contains(word[a])) {
measure[a] += 1;
System.out.println(unique[a] + " : " + measure[a]);
}
}
}
}

private List<String[]> termsDocsArray = new ArrayList<String[]>();
private List<String> allTerms = new ArrayList<String>(); //to hold all terms
private List<double[]> tfidfDocsVector = new ArrayList<double[]>();

/**
* Method to read files and store in array.
*/
public void parseFiles(String filePath) throws FileNotFoundException, IOException {
File[] allfiles = new File(filePath).listFiles();
BufferedReader in = null;
for (File f : allfiles) {
if (f.getName().endsWith(".txt")) {
in = new BufferedReader(new FileReader(f));
StringBuilder sb = new StringBuilder();
String s = null;
while ((s = in.readLine()) != null) {
sb.append(s);
}
String[] tokenizedTerms = sb.toString().replaceAll("[\\W&&[^\\s]]", "").split("\\W+"); //to get individual terms
for (String term : tokenizedTerms) {
if (!allTerms.contains(term)) { //avoid duplicate entry
allTerms.add(term);
}
}
termsDocsArray.add(tokenizedTerms);
}
}

}

/**
* Method to create termVector according to its tfidf score.
*/
public void tfIdfCalculator() {
double tf; //term frequency
double idf; //inverse document frequency
double tfidf; //term requency inverse document frequency
for (String[] docTermsArray : termsDocsArray) {
double[] tfidfvectors = new double[allTerms.size()];
int count = 0;
for (String terms : allTerms) {
tf = new TfIdf().tfCalculator(docTermsArray, terms);
idf = new TfIdf().idfCalculator(termsDocsArray, terms);
tfidf = tf * idf;
tfidfvectors[count] = tfidf;
count++;
}
tfidfDocsVector.add(tfidfvectors); //storing document vectors;
}
}

/**
* Method to calculate cosine similarity between all the documents.
*/
public void getCosineSimilarity() {
for (int i = 0; i < tfidfDocsVector.size(); i++) {
for (int j = 0; j < tfidfDocsVector.size(); j++) {
System.out.println("between " + i + " and " + j + " = "
+ new CosineSimilarity().cosineSimilarity
(
tfidfDocsVector.get(i),
tfidfDocsVector.get(j)
)
);
}
}
}
}


Here is my main class:

import java.io.FileNotFoundException;
import java.io.IOException;
public class TfIdfMain {

public static void main(String args[]) throws FileNotFoundException, IOException {
DocumentParser dp = new DocumentParser();
dp.parseFiles("C:\\Users\\dachen\\Documents");
dp.getCosineSimilarity(); //calculates cosine similarity
}
}


Also should I make my keyword frequency printout method in main java rather than document parser java?

Answer

To start with your code

String text = "Professor, engineering, data, mining, research";
        StringTokenizer str = new StringTokenizer(text);
        String word[] = new String[10];
        String unique[] = new String[10];
        String x;
        int count = -1;
        while (str.hasMoreTokens()) {
            count++;
            x = str.nextToken();
            word[count] = x;
           System.out.println(count + ": " + word[count]);

        }

        System.out.println("---Frequency---");

        // create unique words
        for (int i = 0; i < 7; i++) {

            if ((!Arrays.asList(unique).contains(word[i]))) {
                unique[i] = word[i];
            }
        }

        // measuring frequency
        int[] measure = new int[10];

        for (int a = 0; a < 7; a++) {
            if (Arrays.asList(unique).contains(word[a])) {
                measure[a] += 1;
                System.out.println(unique[a] + " : " + measure[a]);
            }
        }

should be in it's own method like .

private void doSomething(){
      //This variable will hold all terms of each document in an array.

        String text = "Professor, engineering, data, mining, research";
        StringTokenizer str = new StringTokenizer(text);
        String word[] = new String[10];
        String unique[] = new String[10];
        String x;
        int count = -1;
        while (str.hasMoreTokens()) {
            count++;
            x = str.nextToken();
            word[count] = x;
           System.out.println(count + ": " + word[count]);

        }

        System.out.println("---Frequency---");

        // create unique words
        for (int i = 0; i < 7; i++) {

            if ((!Arrays.asList(unique).contains(word[i]))) {
                unique[i] = word[i];
            }
        }

        // measuring frequency
        int[] measure = new int[10];

        for (int a = 0; a < 7; a++) {
            if (Arrays.asList(unique).contains(word[a])) {
                measure[a] += 1;
                System.out.println(unique[a] + " : " + measure[a]);
            }
        }
    }

Secondly in ur given code u have written like

int count = -1;= 

which accounts to this error Syntax error on token "=", { expected.It should be

int count = -1;

And since all your code is simply written in class without any method so it is giving you the error saying { expected.

Please make sure you have copied the code correctly.