1 package eu.fbk.dkm.premon.premonitor;
2
3 import java.io.File;
4 import java.util.ArrayList;
5 import java.util.HashMap;
6 import java.util.List;
7 import java.util.Map;
8 import java.util.Properties;
9 import java.util.Set;
10 import java.util.regex.Matcher;
11
12 import com.google.common.collect.HashMultimap;
13 import com.google.common.collect.ImmutableList;
14 import com.google.common.collect.ImmutableSet;
15 import com.google.common.collect.Multimap;
16
17 import org.openrdf.model.URI;
18 import org.openrdf.model.vocabulary.RDF;
19 import org.openrdf.rio.RDFHandler;
20
21 import eu.fbk.dkm.premon.premonitor.propbank.Inflection;
22 import eu.fbk.dkm.premon.premonitor.propbank.Role;
23 import eu.fbk.dkm.premon.premonitor.propbank.Roleset;
24 import eu.fbk.dkm.premon.vocab.NIF;
25 import eu.fbk.dkm.premon.vocab.PM;
26 import eu.fbk.dkm.premon.vocab.PMOPB;
27
28
29
30
31
32 public class PropbankConverter extends BankConverter {
33
34 private static String LINK_PATTERN = "http://verbs.colorado.edu/propbank/framesets-english/%s-%s.html";
35
36 private static Set<String> PREPOSITIONS = ImmutableSet.of("from", "on", "to", "as", "at",
37 "by", "for", "in", "of", "with", "upon", "into", "around", "about");
38
39 public PropbankConverter(File path, RDFHandler sink, Properties properties, Map<String, URI> wnInfo) {
40 super(path, properties.getProperty("source"), sink, properties, properties.getProperty("language"), wnInfo);
41
42 this.nonVerbsToo = properties.getProperty("extractnonverbs", "0").equals("1");
43 this.isOntoNotes = properties.getProperty("ontonotes", "0").equals("1");
44 this.noDef = !properties.getProperty("extractdefinitions", "0").equals("1");
45 this.extractExamples = properties.getProperty("extractexamples", "0").equals("1");
46 this.defaultType = "v";
47
48 }
49
50 private boolean usableInflectionPart(String part) {
51 return part != null && part.length() > 0 && !part.equals("ns");
52 }
53
54 @Override Type getType(String code) {
55 if (code != null) {
56 if (PMOPB.mapM.containsKey(code)) {
57 return Type.M_FUNCTION;
58 }
59 if (PMOPB.mapO.containsKey(code)) {
60 return Type.ADDITIONAL;
61 }
62 if (PREPOSITIONS.contains(code)) {
63 return Type.PREPOSITION;
64 }
65
66 Matcher matcher = ARG_NUM_PATTERN.matcher(code);
67 if (matcher.find()) {
68 return Type.NUMERIC;
69 }
70
71 if (code.equals("a")) {
72 return Type.AGENT;
73 }
74
75 throw new IllegalArgumentException(String.format("String %s not found", code));
76 }
77 return Type.NULL;
78 }
79
80 protected void addExternalLinks(ComplexLemmaWithMappings complexLemmaWithMappings, URI conceptualizationURI, String uriLemma, String type) {
81
82 String rolesetID = complexLemmaWithMappings.getRolesetID();
83
84
85 if (isOntoNotes)
86 if (type.equals("n"))
87 rolesetID="n-"+rolesetID;
88
89 URI rolesetURI = uriForRoleset(rolesetID);
90
91
92 List<String> fnPredicates = new ArrayList<>();
93 if (complexLemmaWithMappings.getFramenet() != null) {
94 String[] tmpFnPreds = complexLemmaWithMappings.getFramenet().trim().toLowerCase()
95 .split("\\s+");
96 for (String tmpClass : tmpFnPreds) {
97 tmpClass = tmpClass.trim();
98 if (tmpClass.length() > 1) {
99 fnPredicates.add(tmpClass);
100 }
101 }
102 }
103
104 for (String fnPredicate : fnPredicates) {
105 for (String fnLink : fnLinks) {
106 URI fnFrameURI = uriForRoleset(fnPredicate, fnLink);
107 URI fnConceptualizationURI = uriForConceptualizationWithPrefix(uriLemma, type, fnPredicate, fnLink);
108 addMappings(rolesetURI, fnFrameURI, conceptualizationURI, fnConceptualizationURI);
109 }
110 }
111
112
113 List<String> vnClasses = getVnClasses(complexLemmaWithMappings.getVn());
114 for (String vnClass : vnClasses) {
115 for (String vnLink : vnLinks) {
116 URI vnClassURI = uriForRoleset(vnClass, vnLink);
117 URI vnConceptualizationURI = uriForConceptualizationWithPrefix(uriLemma, "v", vnClass, vnLink);
118 addMappings(rolesetURI, vnClassURI, conceptualizationURI, vnConceptualizationURI);
119 }
120 }
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135 }
136
137 @Override void addInflectionToSink(URI exampleURI, Inflection inflection) {
138
139 if (inflection == null) {
140 return;
141 }
142
143 ArrayList<String> inflectionParts = new ArrayList<>();
144 Multimap<URI, URI> inflections = HashMultimap.create();
145
146 if (usableInflectionPart(inflection.getAspect())) {
147 inflectionParts.add(inflection.getAspect());
148 if (inflection.getAspect().equals("both")) {
149 inflections.put(PMOPB.ASPECT_P, PMOPB.PROGRESSIVE);
150 inflections.put(PMOPB.ASPECT_P, PMOPB.PERFECT);
151 } else {
152 inflections.put(PMOPB.ASPECT_P, PMOPB.mapAspect.get(inflection.getAspect()));
153 }
154 }
155 if (usableInflectionPart(inflection.getForm())) {
156 inflectionParts.add(inflection.getForm());
157 inflections.put(PMOPB.FORM_P, PMOPB.mapForm.get(inflection.getForm()));
158 }
159 if (usableInflectionPart(inflection.getPerson())) {
160 inflectionParts.add(inflection.getPerson());
161 inflections.put(PMOPB.PERSON_P, PMOPB.mapPerson.get(inflection.getPerson()));
162 }
163 if (usableInflectionPart(inflection.getTense())) {
164 inflectionParts.add(inflection.getTense());
165 inflections.put(PMOPB.TENSE_P, PMOPB.mapTense.get(inflection.getTense()));
166 }
167 if (usableInflectionPart(inflection.getVoice())) {
168 inflectionParts.add(inflection.getVoice());
169 inflections.put(PMOPB.VOICE_P, PMOPB.mapVoice.get(inflection.getVoice()));
170 }
171
172 if (inflectionParts.size() > 0) {
173
174
175 StringBuilder builder = new StringBuilder();
176 builder.append(NAMESPACE);
177 builder.append(INFLECTION_PREFIX);
178 for (String part : inflectionParts) {
179 builder.append(separator);
180 builder.append(part);
181 }
182 URI inflectionURI = createURI(builder.toString());
183
184 for (URI key : inflections.keySet()) {
185 for (URI uri : inflections.get(key)) {
186 addStatementToSink(inflectionURI, key, uri, PM.TBOX);
187 }
188 }
189
190 addStatementToSink(exampleURI, PMOPB.INFLECTION_P, inflectionURI, EXAMPLE_GRAPH);
191 addStatementToSink(inflectionURI, RDF.TYPE, PMOPB.INFLECTION_C, PM.TBOX);
192 }
193 }
194
195 @Override URI getPredicate() {
196 return PMOPB.ROLESET;
197 }
198
199 @Override URI getSemanticArgument() {
200 return PMOPB.SEMANTIC_ROLE;
201 }
202
203 @Override URI getRoleToArgumentProperty() {
204 return PMOPB.ARGUMENT_P;
205 }
206
207 @Override URI getCoreProperty() {
208 return PMOPB.CORE;
209 }
210
211 @Override HashMap<String, URI> getFunctionMap() {
212 return PMOPB.mapM;
213 }
214
215 @Override void addArgumentToSink(URI argumentURI, String argName, String f, Type argType,
216 String lemma, String type, String rolesetID, URI lexicalEntryURI, Role role, Roleset roleset) {
217
218 List<String> vnLemmas = ImmutableList.of(lemma);
219 switch (argType) {
220 case NUMERIC:
221 addArgumentToSink(argName, PMOPB.mapF.get(argName), argumentURI, lemma, type,
222 rolesetID, lexicalEntryURI, role, vnLemmas);
223 addStatementForSecondType(argumentURI, f);
224 break;
225 case M_FUNCTION:
226
227 addArgumentToSink(argName, PMOPB.mapM.get(argName), argumentURI, lemma, type,
228 rolesetID, lexicalEntryURI, role, vnLemmas);
229 break;
230 case AGENT:
231 addArgumentToSink("a", PMOPB.ARGA, argumentURI, lemma, type, rolesetID,
232 lexicalEntryURI, role, vnLemmas);
233 break;
234 default:
235
236 }
237 }
238
239 private void addStatementForSecondType(URI argumentURI, String f) {
240 Type secondType;
241 try {
242 secondType = getType(f);
243 } catch (Exception e) {
244 LOGGER.error("Error: " + e.getMessage());
245 return;
246 }
247 switch (secondType) {
248 case M_FUNCTION:
249 addStatementToSink(argumentURI, PMOPB.TAG_P, PMOPB.mapM.get(f));
250 break;
251 case ADDITIONAL:
252 addStatementToSink(argumentURI, PMOPB.TAG_P, PMOPB.mapO.get(f));
253 break;
254 case PREPOSITION:
255 URI lexicalEntry = addLexicalEntry(f, f, null, null, "prep", getLexicon());
256 addStatementToSink(argumentURI, PMOPB.TAG_P, lexicalEntry);
257 break;
258 default:
259
260 }
261 }
262
263 @Override protected URI getExternalLink(String lemma, String type) {
264 return createURI(String.format(LINK_PATTERN, lemma, type));
265 }
266
267 @Override protected void addRelToSink(Type argType, String argName, URI markableURI) {
268 switch (argType) {
269 case M_FUNCTION:
270 addStatementToSink(markableURI, PMOPB.TAG_P, PMOPB.mapM.get(argName), EXAMPLE_GRAPH);
271 break;
272 default:
273
274 }
275 }
276
277 @Override protected URI addExampleArgToSink(Type argType, String argName, URI markableURI,
278 String f, String rolesetID, URI asURI) {
279 URI argumentURI = uriForArgument(rolesetID, argName);
280
281 switch (argType) {
282 case NUMERIC:
283 addStatementToSink(markableURI, NIF.ANNOTATION_P, asURI, EXAMPLE_GRAPH);
284
285 addStatementForSecondType(markableURI, f);
286 break;
287 case M_FUNCTION:
288 addStatementToSink(markableURI, NIF.ANNOTATION_P, asURI, EXAMPLE_GRAPH);
289 addStatementToSink(asURI, PMOPB.TAG_P, PMOPB.mapM.get(argName), EXAMPLE_GRAPH);
290 break;
291 case AGENT:
292 addStatementToSink(markableURI, NIF.ANNOTATION_P, asURI, EXAMPLE_GRAPH);
293 addStatementToSink(asURI, PMOPB.TAG_P, PMOPB.ARGA, EXAMPLE_GRAPH);
294 break;
295 default:
296
297 }
298
299 return argumentURI;
300 }
301 }