Stephen Ostermiller Stephen Ostermiller - 4 years ago 277
Java Question

How to truncate a string after n words in Java?

Is there library that has a routine for truncating a string after n words? I'm looking for something that can turn:

truncateAfterWords(3, "hello, this\nis a long sentence");


into

"hello, this\nis"


I could write it myself, but I thought that something like this might already exist in some open source string manipulation library.




Here is a full list of test cases that I would expect any solution to pass:

import java.util.regex.*;

public class Test {

private static final TestCase[] TEST_CASES = new TestCase[]{
new TestCase(5, null, null),
new TestCase(5, "", ""),
new TestCase(5, "single", "single"),
new TestCase(1, "single", "single"),
new TestCase(0, "single", ""),
new TestCase(2, "two words", "two words"),
new TestCase(1, "two words", "two"),
new TestCase(0, "two words", ""),
new TestCase(2, "line\nbreak", "line\nbreak"),
new TestCase(1, "line\nbreak", "line"),
new TestCase(2, "multiple spaces", "multiple spaces"),
new TestCase(1, "multiple spaces", "multiple"),
new TestCase(3, " starts with space", " starts with space"),
new TestCase(2, " starts with space", " starts with"),
new TestCase(10, "A full sentence, with puncutation.", "A full sentence, with puncutation."),
new TestCase(4, "A full sentence, with puncutation.", "A full sentence, with"),
new TestCase(50, "Testing a very long number of words in the testcase to see if the solution performs well in such a situation. Some solutions don't do well with lots of input.", "Testing a very long number of words in the testcase to see if the solution performs well in such a situation. Some solutions don't do well with lots of input."),
};

public static void main(String[] args){
for (TestCase t: TEST_CASES){
try {
String r = truncateAfterWords(t.n, t.s);
if (!t.equals(r)){
System.out.println(t.toString(r));
}
} catch (Exception x){
System.out.println(t.toString(x));
}
}
}

public static String truncateAfterWords(int n, String s) {
// TODO: implementation
return null;
}
}


class TestCase {
public int n;
public String s;
public String e;

public TestCase(int n, String s, String e){
this.n=n;
this.s=s;
this.e=e;
}

public String toString(){
return "truncateAfterWords(" + n + ", " + toJavaString(s) + ")\n expected: " + toJavaString(e);
}

public String toString(String r){
return this + "\n actual: " + toJavaString(r) + "";
}

public String toString(Exception x){
return this + "\n exception: " + x.getMessage();
}

public boolean equals(String r){
if (e == null && r == null) return true;
if (e == null) return false;
return e.equals(r);
}

public static final String escape(String s){
if (s == null) return null;
s = s.replaceAll("\\\\","\\\\\\\\");
s = s.replaceAll("\n","\\\\n");
s = s.replaceAll("\r","\\\\r");
s = s.replaceAll("\"","\\\\\"");
return s;
}

private static String toJavaString(String s){
if (s == null) return "null";
return " \"" + escape(s) + "\"";
}
}





There are solutions for this on this site in other languages:


Answer Source

I found a way to do it using the java.text.BreakIterator class:

private static String truncateAfterWords(int n, String s) {
    if (s == null) return null;
    BreakIterator wb = BreakIterator.getWordInstance();
    wb.setText(s);
    int pos = 0;
    for (int i = 0; i < n && pos != BreakIterator.DONE && pos < s.length();) {
        if (Character.isLetter(s.codePointAt(pos))) i++;
        pos = wb.next();
    }
    if (pos == BreakIterator.DONE || pos >= s.length()) return s;
    return s.substring(0, pos);
}
Recommended from our users: Dynamic Network Monitoring from WhatsUp Gold from IPSwitch. Free Download