1   package eu.fbk.dkm.premon.premonitor;
3   import java.io.File;
4   import java.util.ArrayList;
5   import java.util.HashMap;
6   import java.util.List;
7   import java.util.Map;
8   import java.util.Properties;
9   import java.util.Set;
10  import java.util.regex.Matcher;
12  import com.google.common.collect.HashMultimap;
13  import com.google.common.collect.ImmutableList;
14  import com.google.common.collect.ImmutableSet;
15  import com.google.common.collect.Multimap;
17  import org.openrdf.model.URI;
18  import org.openrdf.model.vocabulary.RDF;
19  import org.openrdf.rio.RDFHandler;
21  import eu.fbk.dkm.premon.premonitor.propbank.Inflection;
22  import eu.fbk.dkm.premon.premonitor.propbank.Role;
23  import eu.fbk.dkm.premon.premonitor.propbank.Roleset;
24  import eu.fbk.dkm.premon.vocab.NIF;
25  import eu.fbk.dkm.premon.vocab.PM;
26  import eu.fbk.dkm.premon.vocab.PMOPB;
28  /**
29   * Created by alessio on 28/10/15.
30   */
32  public class PropbankConverter extends BankConverter {
34      private static String LINK_PATTERN = "http://verbs.colorado.edu/propbank/framesets-english/%s-%s.html";
36      private static Set<String> PREPOSITIONS = ImmutableSet.of("from", "on", "to", "as", "at",
37              "by", "for", "in", "of", "with", "upon", "into", "around", "about");
39      public PropbankConverter(File path, RDFHandler sink, Properties properties, Map<String, URI> wnInfo) {
40          super(path, properties.getProperty("source"), sink, properties, properties.getProperty("language"), wnInfo);
42          this.nonVerbsToo = properties.getProperty("extractnonverbs", "0").equals("1");
43          this.isOntoNotes = properties.getProperty("ontonotes", "0").equals("1");
44          this.noDef = !properties.getProperty("extractdefinitions", "0").equals("1");
45          this.extractExamples = properties.getProperty("extractexamples", "0").equals("1");
46          this.defaultType = "v";
48      }
50      private boolean usableInflectionPart(String part) {
51          return part != null && part.length() > 0 && !part.equals("ns");
52      }
54      @Override Type getType(String code) {
55          if (code != null) {
56              if (PMOPB.mapM.containsKey(code)) {
57                  return Type.M_FUNCTION;
58              }
59              if (PMOPB.mapO.containsKey(code)) {
60                  return Type.ADDITIONAL;
61              }
62              if (PREPOSITIONS.contains(code)) {
63                  return Type.PREPOSITION;
64              }
66              Matcher matcher = ARG_NUM_PATTERN.matcher(code);
67              if (matcher.find()) {
68                  return Type.NUMERIC;
69              }
71              if (code.equals("a")) {
72                  return Type.AGENT;
73              }
75              throw new IllegalArgumentException(String.format("String %s not found", code));
76          }
77          return Type.NULL;
78      }
80      protected void addExternalLinks(ComplexLemmaWithMappings complexLemmaWithMappings, URI conceptualizationURI, String uriLemma, String type) {
82          String rolesetID = complexLemmaWithMappings.getRolesetID();
84          //added to cope with same rolesets for different lexical entries (noun and verb)
85          if (isOntoNotes)
86              if (type.equals("n"))
87                  rolesetID="n-"+rolesetID;
89          URI rolesetURI = uriForRoleset(rolesetID);
91          // FrameNet
92          List<String> fnPredicates = new ArrayList<>();
93          if (complexLemmaWithMappings.getFramenet() != null) {
94              String[] tmpFnPreds = complexLemmaWithMappings.getFramenet().trim().toLowerCase()
95                      .split("\\s+");
96              for (String tmpClass : tmpFnPreds) {
97                  tmpClass = tmpClass.trim();
98                  if (tmpClass.length() > 1) {
99                      fnPredicates.add(tmpClass);
100                 }
101             }
102         }
104         for (String fnPredicate : fnPredicates) {
105             for (String fnLink : fnLinks) {
106                 URI fnFrameURI = uriForRoleset(fnPredicate, fnLink);
107                 URI fnConceptualizationURI = uriForConceptualizationWithPrefix(uriLemma, type, fnPredicate, fnLink);
108                 addMappings(rolesetURI, fnFrameURI, conceptualizationURI, fnConceptualizationURI);
109             }
110         }
112         // VerbNet
113         List<String> vnClasses = getVnClasses(complexLemmaWithMappings.getVn());
114         for (String vnClass : vnClasses) {
115             for (String vnLink : vnLinks) {
116                 URI vnClassURI = uriForRoleset(vnClass, vnLink);
117                 URI vnConceptualizationURI = uriForConceptualizationWithPrefix(uriLemma, "v", vnClass, vnLink);
118                 addMappings(rolesetURI, vnClassURI, conceptualizationURI, vnConceptualizationURI);
119             }
120         }
122         // PropBank
123 //        ArrayList<Matcher> matchers = getPropBankPredicates(roleset);
124 //        for (Matcher matcher : matchers) {
125 //            String pbLemma = matcher.group(2);
126 //            String pbPredicate = matcher.group(1);
127 //
128 //            for (String pbLink : pbLinks) {
129 //                String lemma = getLemmaFromPredicateName(pbLemma);
130 //                URI pbRolesetURI = uriForRoleset(pbPredicate, pbLink);
131 //                URI pbConceptualizationURI = uriForConceptualizationWithPrefix(lemma, type, pbPredicate, pbLink);
132 //                addMappings(rolesetURI, pbRolesetURI, conceptualizationURI, pbConceptualizationURI);
133 //            }
134 //        }
135     }
137     @Override void addInflectionToSink(URI exampleURI, Inflection inflection) {
139         if (inflection == null) {
140             return;
141         }
143         ArrayList<String> inflectionParts = new ArrayList<>();
144         Multimap<URI, URI> inflections = HashMultimap.create();
146         if (usableInflectionPart(inflection.getAspect())) {
147             inflectionParts.add(inflection.getAspect());
148             if (inflection.getAspect().equals("both")) {
149                 inflections.put(PMOPB.ASPECT_P, PMOPB.PROGRESSIVE);
150                 inflections.put(PMOPB.ASPECT_P, PMOPB.PERFECT);
151             } else {
152                 inflections.put(PMOPB.ASPECT_P, PMOPB.mapAspect.get(inflection.getAspect()));
153             }
154         }
155         if (usableInflectionPart(inflection.getForm())) {
156             inflectionParts.add(inflection.getForm());
157             inflections.put(PMOPB.FORM_P, PMOPB.mapForm.get(inflection.getForm()));
158         }
159         if (usableInflectionPart(inflection.getPerson())) {
160             inflectionParts.add(inflection.getPerson());
161             inflections.put(PMOPB.PERSON_P, PMOPB.mapPerson.get(inflection.getPerson()));
162         }
163         if (usableInflectionPart(inflection.getTense())) {
164             inflectionParts.add(inflection.getTense());
165             inflections.put(PMOPB.TENSE_P, PMOPB.mapTense.get(inflection.getTense()));
166         }
167         if (usableInflectionPart(inflection.getVoice())) {
168             inflectionParts.add(inflection.getVoice());
169             inflections.put(PMOPB.VOICE_P, PMOPB.mapVoice.get(inflection.getVoice()));
170         }
172         if (inflectionParts.size() > 0) {
174             // Build inflection URI
175             StringBuilder builder = new StringBuilder();
176             builder.append(NAMESPACE);
177             builder.append(INFLECTION_PREFIX);
178             for (String part : inflectionParts) {
179                 builder.append(separator);
180                 builder.append(part);
181             }
182             URI inflectionURI = createURI(builder.toString());
184             for (URI key : inflections.keySet()) {
185                 for (URI uri : inflections.get(key)) {
186                     addStatementToSink(inflectionURI, key, uri, PM.TBOX);
187                 }
188             }
190             addStatementToSink(exampleURI, PMOPB.INFLECTION_P, inflectionURI, EXAMPLE_GRAPH);
191             addStatementToSink(inflectionURI, RDF.TYPE, PMOPB.INFLECTION_C, PM.TBOX);
192         }
193     }
195     @Override URI getPredicate() {
196         return PMOPB.ROLESET;
197     }
199     @Override URI getSemanticArgument() {
200         return PMOPB.SEMANTIC_ROLE;
201     }
203     @Override URI getRoleToArgumentProperty() {
204         return PMOPB.ARGUMENT_P;
205     }
207     @Override URI getCoreProperty() {
208         return PMOPB.CORE;
209     }
211     @Override HashMap<String, URI> getFunctionMap() {
212         return PMOPB.mapM;
213     }
215     @Override void addArgumentToSink(URI argumentURI, String argName, String f, Type argType,
216             String lemma, String type, String rolesetID, URI lexicalEntryURI, Role role, Roleset roleset) {
217         //todo: transform this double switch into an external class
218         List<String> vnLemmas = ImmutableList.of(lemma);
219         switch (argType) {
220         case NUMERIC:
221             addArgumentToSink(argName, PMOPB.mapF.get(argName), argumentURI, lemma, type,
222                     rolesetID, lexicalEntryURI, role, vnLemmas);
223             addStatementForSecondType(argumentURI, f);
224             break;
225         case M_FUNCTION:
226             // Should be already there...
227             addArgumentToSink(argName, PMOPB.mapM.get(argName), argumentURI, lemma, type,
228                     rolesetID, lexicalEntryURI, role, vnLemmas);
229             break;
230         case AGENT:
231             addArgumentToSink("a", PMOPB.ARGA, argumentURI, lemma, type, rolesetID,
232                     lexicalEntryURI, role, vnLemmas);
233             break;
234         default:
235             //todo: should never happen, but it happens
236         }
237     }
239     private void addStatementForSecondType(URI argumentURI, String f) {
240         Type secondType;
241         try {
242             secondType = getType(f);
243         } catch (Exception e) {
244             LOGGER.error("Error: " + e.getMessage());
245             return;
246         }
247         switch (secondType) {
248         case M_FUNCTION:
249             addStatementToSink(argumentURI, PMOPB.TAG_P, PMOPB.mapM.get(f));
250             break;
251         case ADDITIONAL:
252             addStatementToSink(argumentURI, PMOPB.TAG_P, PMOPB.mapO.get(f));
253             break;
254         case PREPOSITION:
255             URI lexicalEntry = addLexicalEntry(f, f, null, null, "prep", getLexicon());
256             addStatementToSink(argumentURI, PMOPB.TAG_P, lexicalEntry);
257             break;
258         default:
259             // FC: it happens, don't know whether it's ok or not :-)
260         }
261     }
263     @Override protected URI getExternalLink(String lemma, String type) {
264         return createURI(String.format(LINK_PATTERN, lemma, type));
265     }
267     @Override protected void addRelToSink(Type argType, String argName, URI markableURI) {
268         switch (argType) {
269         case M_FUNCTION:
270             addStatementToSink(markableURI, PMOPB.TAG_P, PMOPB.mapM.get(argName), EXAMPLE_GRAPH);
271             break;
272         default:
273             //todo: should never happen (and strangely it really never happens)
274         }
275     }
277     @Override protected URI addExampleArgToSink(Type argType, String argName, URI markableURI,
278             String f, String rolesetID, URI asURI) {
279         URI argumentURI = uriForArgument(rolesetID, argName);
281         switch (argType) {
282         case NUMERIC:
283             addStatementToSink(markableURI, NIF.ANNOTATION_P, asURI, EXAMPLE_GRAPH);
284 //            addStatementToSink(asURI, PMOPB.FUNCTION_TAG, PMOPB.mapF.get(argName), EXAMPLE_GRAPH);
285             addStatementForSecondType(markableURI, f);
286             break;
287         case M_FUNCTION:
288             addStatementToSink(markableURI, NIF.ANNOTATION_P, asURI, EXAMPLE_GRAPH);
289             addStatementToSink(asURI, PMOPB.TAG_P, PMOPB.mapM.get(argName), EXAMPLE_GRAPH);
290             break;
291         case AGENT:
292             addStatementToSink(markableURI, NIF.ANNOTATION_P, asURI, EXAMPLE_GRAPH);
293             addStatementToSink(asURI, PMOPB.TAG_P, PMOPB.ARGA, EXAMPLE_GRAPH);
294             break;
295         default:
296             //todo: should never happen, but it happens
297         }
299         return argumentURI;
300     }
301 }