Roughosing Roughosing - 7 days ago
62 0

Creates a HashSet of correctly spelled words from file (english-words), also includes Levenshtein Distance calculation function, as well as suggestions function for correcting misspelled words.

Java

AssignTwo.java

import java.io.*;
import java.util.*;
import java.util.stream.Collectors;

public class AssignTwo {
	
	static HashSet<String> spellCheck = new HashSet<String>(); 								//stores words from text file
	
	public AssignTwo() throws IOException{
		spellCheck = initCorrectSpelling("C:\\Users\\Gregs\\InfoRetrieval\\src\\english-words");
	}
	
	public static HashSet<String> initCorrectSpelling(String filename) throws IOException { //store correct spelling of words in HashSet
		Scanner scanner = new Scanner(new FileInputStream(filename));
		try{
			while(scanner.hasNextLine()){
				String next = scanner.nextLine();
				spellCheck.add(next);
			}
		}
		finally{
			scanner.close();
		}
		return spellCheck;
	}
	
	public static boolean isSpelledCorrectly(String word){	//check if any given word is spelled correctly by seeing if it is 
		boolean output = false;								//contained within the spellCheck list
		if(spellCheck.contains(word)) output = true;
		return output;
	}
	
	public static List<String> suggestions(String word){ //take misspelled word and find possible suggestions
		List<String> sug = new ArrayList<String>();
		int max = 1;									//words of length < 6, use LevDist of 1, as too many suggestions for >=2
		if(word.length()>6){
			max=2;										//words of length > 6, use LevDist of 2, long words, more chance for error
		}
		Iterator<String> it = spellCheck.iterator();
		while(it.hasNext()){
			String itWord = it.next();
			int dist = levenshteinDistance(word, itWord);
			if(dist<=max){
				sug.add(itWord);
			}
		}
		return sug;
	}
	
	public static int levenshteinDistance(String a, String b) { //Calculate Levenshtein Distance between two words
        a = a.toLowerCase();									
        b = b.toLowerCase();
        int [] costs = new int [b.length() + 1];				
        for (int j = 0; j < costs.length; j++)
            costs[j] = j;
        for (int i = 1; i <= a.length(); i++) {
            costs[0] = i;
            int nw = i - 1;
            for (int j = 1; j <= b.length(); j++) {
                int cj = Math.min(1 + Math.min(costs[j], costs[j - 1]), a.charAt(i - 1) == b.charAt(j - 1) ? nw : nw + 1);
                nw = costs[j];
                costs[j] = cj;
            }
        }
        return costs[b.length()];
    }
	
	public static HashMap<String, Integer> topTen(HashMap<String, Integer> map){ //returns top 10 misspelled words and their value
		HashMap<String, Integer> output = new HashMap<String, Integer>();
		int i=0;
		HashMap<String, Integer> sorted = sortByValue(map);
		for(Map.Entry<String, Integer> entry : sorted.entrySet())
		{
		    output.put(entry.getKey(),entry.getValue());
		    if(i==9){
		    	break;
		    }
		    i++;
		}
		return output;
	}
	
	public static <String, Integer extends Comparable<? super Integer>> 
				HashMap<String, Integer> sortByValue(HashMap<String, Integer> map) { //sorts the HashMap by Values in descending order
	    return map.entrySet()
	              .stream()
	              .sorted(Map.Entry.comparingByValue(Collections.reverseOrder()))
	              .collect(Collectors.toMap(
	                Map.Entry::getKey, 
	                Map.Entry::getValue, 
	                (e1, e2) -> e1, 
	                LinkedHashMap::new
	              ));
	}
}
Comments