Steve mccain Steve mccain - 4 months ago 19
Java Question

Fixing running error of array in my java parser, also trying to fix the output print

This is a parser that go through 5 text documents of html sources. The parser will then go through each text file, count frequency of 5 keywords, then display the cosine similiarty.

1) There is Arrays cannot be resolved error around my arraylist line in parser.java

2) an error like this Multiple markers at this line in my main class

Exception in thread "main" java.lang.Error: Unresolved compilation problems:
Syntax error on token ")", delete this token
Cannot make a static reference to the non-static field files
Syntax error on token "}", delete this token

at TfIdfMain.main(TfIdfMain.java:7)


This the output I'm trying to show:

Doc1 Doc2 Doc3 Doc4 Doc5

Doc1 1 0.78
Doc2 0.57
Doc3
Doc4
Doc5


Engineering Research Data Minining Professor

Doc1 23 644
Doc2 457
Doc3
Doc4
Doc5


This is my main java:

import java.io.FileNotFoundException;
import java.io.IOException;
public class TfIdfMain {

String[] files = {"doc1.txt", "doc2.txt", "doc3.txt" , "doc4.txt", "doc5.txt"};

public static void main(String args[]) throws FileNotFoundException, IOException) {
for(String file : files) {
DocumentParser dp = new DocumentParser();
dp.parseFiles(file);
dp.getCosineMatrix();
}
}
}


My parser java:

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;

public class DocumentParser {

private void doSomething(){
String text = "Professor, engineering, data, mining, research";
StringTokenizer str = new StringTokenizer(text);
String word[] = new String[10];
String unique[] = new String[10];
String x;
int count = -1;
while (str.hasMoreTokens()) {
count++;
x = str.nextToken();
word[count] = x;
System.out.println(count + ": " + word[count]);
}

System.out.println("---Frequency---");
for (int i = 0; i < 7; i++) {

if ((!Arrays.asList(unique).contains(word[i]))) {
unique[i] = word[i];
}
}

int[] measure = new int[10];

for (int a = 0; a < 7; a++) {
if (Arrays.asList(unique).contains(word[a])) {
measure[a] += 1;
System.out.println(unique[a] + " : " + measure[a]);
}
}
}

private List<String[]> termsDocsArray = new ArrayList<String[]>();
private List<String> allTerms = new ArrayList<String>();
private List<double[]> tfidfDocsVector = new ArrayList<double[]>();

public void parseFiles(String filePath) throws FileNotFoundException, IOException {
File[] allfiles = new File(filePath).listFiles();
BufferedReader in = null;
for (File f : allfiles) {
if (f.getName().endsWith(".txt")) {
in = new BufferedReader(new FileReader(f));
StringBuilder sb = new StringBuilder();
String s = null;
while ((s = in.readLine()) != null) {
sb.append(s);
}
String[] tokenizedTerms = sb.toString().replaceAll("[\\W&&[^\\s]]", "").split("\\W+"); //to get individual terms
for (String term : tokenizedTerms) {
if (!allTerms.contains(term)) {
allTerms.add(term);
}
}
termsDocsArray.add(tokenizedTerms);
}
}

}

public void tfIdfCalculator() {
double tf;
double idf;
double tfidf;
for (String[] docTermsArray : termsDocsArray) {
double[] tfidfvectors = new double[allTerms.size()];
int count = 0;
for (String terms : allTerms) {
tf = new TfIdf().getTf(docTermsArray, terms);
idf = new TfIdf().idfCalculation(termsDocsArray, terms);
tfidf = tf * idf;
tfidfvectors[count] = tfidf;
count++;
}
tfidfDocsVector.add(tfidfvectors);
}
}
public void getCosineMatrix() {
for (int i = 0; i < tfidfDocsVector.size(); i++) {
for (int j = 0; j < tfidfDocsVector.size(); j++) {
System.out.println("between " + i + " and " + j + " = "
+ new CosineSimilarity().getCosine
(
tfidfDocsVector.get(i),
tfidfDocsVector.get(j)
)
);
}
}
}
}

Answer

This code block:

for(String file : files) {
    DocumentParser dp = new DocumentParser();
    dp.parseFiles(file);
    dp.getCosineMatrix();
}

Is not within a method, but should be.

Try moving it inside the main() method.

public class TfIdfMain {

    String[] files = {"doc1.txt", "doc2.txt", "doc3.txt" , "doc4.txt", "doc5.txt"};

    public static void main(String args[]) throws FileNotFoundException, IOException) {
        for(String file : files) {
            DocumentParser dp = new DocumentParser();
            dp.parseFiles(file);
            dp.getCosineMatrix();
        }
    }
}

Note that I cleaned up other problems with your code.

You should probably just do this at the top of your parser class:

import java.util.*;

as you haven't imported that, which is giving you the actual error,

Formatting your code in your IDE would really have helped you understand what's wrong with it.

Comments