Imène Bsm Imène Bsm - 2 months ago 10
Java Question

SemgrexPattern lemma attribute doesn't seem to work

Here is a very simple example using

SemgrexPattern
from Stanford NLP.
I do not understand why it doesn't find any matches with
{lemma:/eat/}
while it finds a match with
{word:/eats/}
. I used the
LemmaAnnotation
class to get the lemma of the verb "to eat" and it's "eat".

Thank you for your help :)

package Project;
import java.io.File;
import java.util.Scanner;

import edu.stanford.nlp.parser.lexparser.TreebankLangParserParams;
import edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.semgraph.SemanticGraphFactory;
import edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher;
import edu.stanford.nlp.semgraph.semgrex.SemgrexPattern;
import edu.stanford.nlp.trees.GrammaticalStructure;
import edu.stanford.nlp.trees.GrammaticalStructureFactory;
import edu.stanford.nlp.trees.Tree;

public class SemgrexDemo {
public static void main(String[] args) throws FileNotFoundException {
String treeString = "(ROOT (S (NP (NNP John)) (VP (VBZ eats) (NP (NN pizza))) (. .)))";
Tree tree = Tree.valueOf(treeString);
SemanticGraph graph = SemanticGraphFactory.generateUncollapsedDependencies(tree);
TreebankLangParserParams params = new EnglishTreebankParserParams();
GrammaticalStructureFactory gsf = params.treebankLanguagePack().grammaticalStructureFactory(params.treebankLanguagePack().punctuationWordRejectFilter(), params.typedDependencyHeadFinder());
GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
System.err.println(graph);
SemgrexPattern semgrex = SemgrexPattern.compile("{}=A <<dobj=reln {lemma:/eat/}=B");
SemgrexMatcher matcher = semgrex.matcher(graph);
while (matcher.find()) {
System.err.println(matcher.getNode("A") + " <<dobj " + matcher.getNode("B"));
}
}
}

Answer

The lemmata are not added automatically to the tokens when you parse a tree string to a Tree object, so the lemma attribute of all the nodes in the SemanticGraph is null and therefore {lemma:/eat/} doesn't match any node.

You can add the lemmata using the lemma(String word, String pos) method of the Morphology class:

public static void main(String[] args) throws FileNotFoundException {
  String treeString = "(ROOT (S (NP (NNP John)) (VP (VBZ eats) (NP (NN pizza))) (. .)))";
  Tree tree = Tree.valueOf(treeString);
  SemanticGraph graph = SemanticGraphFactory.generateUncollapsedDependencies(tree);

  //add lemmata
  Morphology morphology = new Morphology();
  for (IndexedWord node : graph.vertexSet()) {
    String lemma = morphology.lemma(node.word(), node.tag());
    node.setLemma(lemma);
  }

  System.err.println(graph);
  SemgrexPattern semgrex = SemgrexPattern.compile("{}=A <<dobj=reln {lemma:/eat/}=B");
  SemgrexMatcher matcher = semgrex.matcher(graph);
  while (matcher.find()) {
    System.err.println(matcher.getNode("A") + " <<dobj " + matcher.getNode("B"));
  }
}
Comments