1 package eu.fbk.dkm.premon.premonitor;
2
3 import java.io.File;
4 import java.io.IOException;
5 import java.util.*;
6
7 import javax.annotation.Nullable;
8
9 import com.google.common.base.Preconditions;
10 import com.google.common.collect.ImmutableList;
11 import com.google.common.collect.ImmutableMap;
12 import com.google.common.collect.Maps;
13
14 import org.openrdf.model.Resource;
15 import org.openrdf.model.Statement;
16 import org.openrdf.model.URI;
17 import org.openrdf.model.Value;
18 import org.openrdf.model.impl.ValueFactoryImpl;
19 import org.openrdf.model.vocabulary.DCTERMS;
20 import org.openrdf.model.vocabulary.OWL;
21 import org.openrdf.model.vocabulary.RDF;
22 import org.openrdf.model.vocabulary.RDFS;
23 import org.openrdf.query.algebra.evaluation.function.hash.MD5;
24 import org.openrdf.rio.RDFHandler;
25 import org.openrdf.rio.RDFHandlerException;
26 import org.slf4j.Logger;
27 import org.slf4j.LoggerFactory;
28
29 import eu.fbk.dkm.premon.util.Replacer;
30 import eu.fbk.dkm.premon.util.URITreeSet;
31 import eu.fbk.dkm.premon.vocab.DECOMP;
32 import eu.fbk.dkm.premon.vocab.LEXINFO;
33 import eu.fbk.dkm.premon.vocab.LIME;
34 import eu.fbk.dkm.premon.vocab.ONTOLEX;
35 import eu.fbk.dkm.premon.vocab.PM;
36 import eu.fbk.dkm.premon.vocab.PMO;
37 import eu.fbk.rdfpro.util.Hash;
38
39 public abstract class Converter {
40
41 protected static final Replacer REPLACER;
42
43 static {
44 try {
45 REPLACER = new Replacer("classpath:/eu/fbk/dkm/premon/premonitor/replacements");
46 } catch (IOException ex) {
47 throw new Error(ex);
48 }
49 }
50
51 protected int added = 0, notadded = 0, total = 0;
52 protected int ncon = 0, nclass = 0, nrole = 0;
53
54 public static final Logger LOGGER = LoggerFactory.getLogger(Converter.class);
55
56 static final Map<String, URI> LANGUAGE_CODES_TO_URIS;
57 static final ValueFactoryImpl factory = ValueFactoryImpl.getInstance();
58 public static final String NAMESPACE = "http://premon.fbk.eu/resource/";
59
60 static final URI LE_GRAPH = PM.ENTRIES;
61
62 protected URI website = null;
63 protected String baseResource = null;
64
65 protected URI DEFAULT_GRAPH;
66 protected URI EXAMPLE_GRAPH;
67 protected URI BASE_RESOURCE;
68 protected URI RESOURCE;
69
70 public String prefix;
71 boolean extractExamples = false;
72 public String separator = "-";
73 public String argumentSeparator = "@";
74 public static final String FORM_PREFIX = "form";
75 public static final String CONCEPTUALIZATION_PREFIX = "co";
76 protected Map<String, URI> wnInfo;
77 protected static final String DEFAULT_SENSE_SUFFIX = "sense";
78 protected static final String DEFAULT_PRED_SUFFIX = "pred";
79 protected static final String DEFAULT_ARG_SUFFIX = "arg";
80 protected static final String DEFAULT_CON_SUFFIX = "con";
81 protected static final String DEFAULT_ANNSET_SUFFIX = "annotationSet";
82
83 static final Map<URI, String> wnMap = Maps.newHashMap();
84
85 static {
86 final Map<String, URI> codesToURIs = Maps.newHashMap();
87 for (final String language : Locale.getISOLanguages()) {
88 final Locale locale = new Locale(language);
89 final URI uri = ValueFactoryImpl.getInstance().createURI(
90 "http://lexvo.org/id/iso639-3/", locale.getISO3Language());
91 codesToURIs.put(language, uri);
92 }
93 LANGUAGE_CODES_TO_URIS = ImmutableMap.copyOf(codesToURIs);
94
95 wnMap.put(LEXINFO.NOUN, "n");
96 wnMap.put(LEXINFO.VERB, "v");
97 wnMap.put(LEXINFO.ADJECTIVE, "a");
98 wnMap.put(LEXINFO.ADVERB, "r");
99 }
100
101
102 public static String WN_NAMESPACE = "http://wordnet-rdf.princeton.edu/wn31/";
103
104 protected final File path;
105 protected final RDFHandler defaultSink;
106 protected RDFHandler sink;
107 protected final Properties properties;
108 protected final String language;
109
110 protected String resource;
111
112 protected static HashSet<String> fileToDiscard = new HashSet<>();
113
114 protected String onlyOne = null;
115
116 public String getOnlyOne() {
117 return onlyOne;
118 }
119
120 public void setOnlyOne(String onlyOne) {
121 this.onlyOne = onlyOne;
122 }
123
124 public void setResource(String resource) {
125 this.resource = resource;
126 }
127
128 protected Converter(final File path, final String resource, final RDFHandler sink,
129 final Properties properties, final String language, Map<String, URI> wnInfo) {
130
131 this.path = Objects.requireNonNull(path);
132 this.resource = Objects.requireNonNull(resource);
133 this.defaultSink = Objects.requireNonNull(sink);
134 this.sink = defaultSink;
135 this.properties = Objects.requireNonNull(properties);
136 this.language = language;
137 this.wnInfo = wnInfo;
138 this.website = createURI(properties.getProperty("web"));
139 this.baseResource = properties.getProperty("resource");
140
141 this.onlyOne = properties.getProperty("only-one");
142 this.prefix = resource;
143
144 this.RESOURCE = createURI(NAMESPACE, resource);
145 this.DEFAULT_GRAPH = this.RESOURCE;
146 this.EXAMPLE_GRAPH = createURI(NAMESPACE, resource + "-ex");
147 this.BASE_RESOURCE = createURI(NAMESPACE, baseResource);
148
149 this.extractExamples = properties.getProperty("extractexamples", "0").equals("1");
150 }
151
152 public void setDefaultSinkAsSink() {
153 this.sink = defaultSink;
154 }
155
156 public void setSink(RDFHandler newSink) {
157 this.sink = newSink;
158 }
159
160 public abstract void convert() throws IOException, RDFHandlerException;
161
162 protected void addLinks(ArrayList<String> linkList, String linkString) {
163 if (linkString != null) {
164 for (String link : linkString.split(",")) {
165 link = link.trim();
166 if (link.length() > 0) {
167 linkList.add(link.toLowerCase());
168 }
169 }
170 }
171 }
172
173 protected static List<String> parseLinks(String linkString) {
174 ImmutableList.Builder<String> builder = ImmutableList.builder();
175 if (linkString != null) {
176 for (String link : linkString.split(",")) {
177 link = link.trim();
178 if (link.length() > 0) {
179 builder.add(link.toLowerCase());
180 }
181 }
182 }
183 return builder.build();
184 }
185
186
187
188 protected void addStatementToSink(Resource subject, URI predicate, Value object) {
189 addStatementToSink(subject, predicate, object, DEFAULT_GRAPH);
190 }
191
192 protected void addStatementToSink(Statement statement) {
193 try {
194 sink.handleStatement(statement);
195 } catch (RDFHandlerException e) {
196 e.printStackTrace();
197 }
198 }
199
200 protected void addStatementToSink(Resource subject, URI predicate, Value object, URI graph) {
201
202
203
204 if (object == null) {
205 return;
206 }
207
208 Statement statement = factory.createStatement(subject, predicate, object, graph);
209 try {
210 sink.handleStatement(statement);
211 } catch (RDFHandlerException e) {
212 throw new IllegalArgumentException(e);
213 }
214 }
215
216 protected void addStatementToSink(Resource subject, URI predicate, String objectValue) {
217 addStatementToSink(subject, predicate, objectValue, true);
218 }
219
220 protected void addStatementToSink(Resource subject, URI predicate, String objectValue,
221 URI graph) {
222 addStatementToSink(subject, predicate, objectValue, true, graph);
223 }
224
225 protected void addStatementToSink(Resource subject, URI predicate, String objectValue,
226 boolean useLanguage) {
227 addStatementToSink(subject, predicate, objectValue, useLanguage, DEFAULT_GRAPH);
228 }
229
230 protected void addStatementToSink(Resource subject, URI predicate, String objectValue,
231 boolean useLanguage, URI graph) {
232
233
234 if (objectValue == null || objectValue.length() == 0) {
235 return;
236 }
237
238 Value object;
239 if (useLanguage) {
240 object = factory.createLiteral(objectValue, language);
241 } else {
242 object = factory.createLiteral(objectValue);
243 }
244
245 addStatementToSink(subject, predicate, object, graph);
246 }
247
248 protected void addStatementToSink(Resource subject, URI predicate, boolean objectValue) {
249 Value object = factory.createLiteral(objectValue);
250 addStatementToSink(subject, predicate, object);
251 }
252
253 protected void addStatementToSink(Resource subject, URI predicate, Date objectValue) {
254 Value object = factory.createLiteral(objectValue);
255 addStatementToSink(subject, predicate, object);
256 }
257
258 protected void addStatementToSink(Resource subject, URI predicate, int objectValue) {
259 Value object = factory.createLiteral(objectValue);
260 addStatementToSink(subject, predicate, object);
261 }
262
263 protected void addStatementToSink(Resource subject, URI predicate, int objectValue, URI graph) {
264 Value object = factory.createLiteral(objectValue);
265 addStatementToSink(subject, predicate, object, graph);
266 }
267
268 protected URI uriForRoleset(String rolesetID) {
269 return uriForRoleset(rolesetID, null);
270 }
271
272 protected URI uriForRoleset(String rolesetID, @Nullable String prefix) {
273 StringBuilder builder = new StringBuilder();
274 builder.append(NAMESPACE);
275 builder.append(rolesetPart(rolesetID, prefix));
276 return createURI(builder.toString());
277 }
278
279 protected String rolesetPart(String rolesetID) {
280 return rolesetPart(rolesetID, null);
281 }
282
283 protected String rolesetPart(String rolesetID, @Nullable String prefix) {
284 if (prefix == null) {
285 prefix = this.prefix;
286 }
287 StringBuilder builder = new StringBuilder();
288 if (prefix.length() > 0) {
289 builder.append(prefix);
290 builder.append(separator);
291 }
292 builder.append(rolesetID);
293 return builder.toString();
294 }
295
296 protected URI addLexicalEntry(String goodLemma, String uriLemma, @Nullable List<String> tokens,
297 @Nullable List<String> pos, String mainPos, Resource lexiconURI) {
298
299 URI leURI = addSingleEntry(goodLemma, uriLemma, mainPos, lexiconURI);
300 if (tokens != null && tokens.size() > 1) {
301 for (int i = 0; i < tokens.size(); i++) {
302 String token = tokens.get(i);
303 String thisPOS = null;
304 if (pos != null) {
305 thisPOS = pos.get(i);
306 }
307
308 if (thisPOS != null) {
309 URI thisURI = addSingleEntry(token, token, thisPOS, lexiconURI);
310 addStatementToSink(leURI, DECOMP.SUBTERM, thisURI, LE_GRAPH);
311 }
312 }
313 }
314
315 return leURI;
316 }
317
318 protected URI addSingleEntry(String goodLemma, String uriLemma, String pos, Resource lexiconURI) {
319 URI posURI = getPosURI(pos);
320 if (posURI == null) {
321 System.out.println(pos);
322 }
323 URI leURI = uriForLexicalEntry(uriLemma, posURI);
324 URI formURI = uriForForm(uriLemma, posURI);
325
326 if (posURI == null) {
327 LOGGER.error("POS URI is null: {}", pos);
328 }
329
330 addStatementToSink(leURI, RDF.TYPE, ONTOLEX.LEXICAL_ENTRY, LE_GRAPH);
331 addStatementToSink(leURI, LEXINFO.PART_OF_SPEECH_P, posURI, LE_GRAPH);
332 addStatementToSink(lexiconURI, LIME.ENTRY, leURI, LE_GRAPH);
333 addStatementToSink(formURI, RDF.TYPE, ONTOLEX.FORM, LE_GRAPH);
334 addStatementToSink(leURI, ONTOLEX.CANONICAL_FORM, formURI, LE_GRAPH);
335 addStatementToSink(formURI, ONTOLEX.WRITTEN_REP, goodLemma, LE_GRAPH);
336 addStatementToSink(leURI, RDFS.LABEL, goodLemma, LE_GRAPH);
337 addStatementToSink(leURI, LIME.LANGUAGE, language, false, LE_GRAPH);
338
339 if (wnInfo.size() > 0 && posURI != null) {
340 String wnPos = wnMap.get(posURI);
341 if (wnPos != null) {
342 String wnLemma = uriLemma + "-" + wnPos;
343 URI wnURI = factory.createURI(WN_NAMESPACE, wnLemma);
344 if (wnInfo.containsKey(wnURI.toString())) {
345 addStatementToSink(leURI, OWL.SAMEAS, wnURI, LE_GRAPH);
346 } else {
347 LOGGER.debug("Word not found: {}", wnLemma);
348 }
349 }
350 }
351
352 return leURI;
353 }
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385 private URI uriForForm(String lemma, URI type) {
386 StringBuilder builder = new StringBuilder();
387 builder.append(NAMESPACE);
388 builder.append(FORM_PREFIX);
389 builder.append(separator);
390 builder.append(lemmaPart(lemma, type));
391 return createURI(builder.toString());
392 }
393
394 protected URI uriForLexicalEntry(String lemma, URI type) {
395 StringBuilder builder = new StringBuilder();
396 builder.append(NAMESPACE);
397 builder.append(lemmaPart(lemma, type));
398 return createURI(builder.toString());
399 }
400
401 protected String lemmaPart(String lemma, URI type) {
402 Preconditions.checkNotNull(type);
403 StringBuilder builder = new StringBuilder();
404 builder.append(LEXINFO.map.get(type));
405 builder.append(separator);
406 builder.append(lemma.equals("%") ? "perc-sign" : lemma);
407 return builder.toString();
408 }
409
410
411
412
413
414 protected URI uriForConceptualization(String lemma, String type, String rolesetID) {
415 return uriForConceptualizationGen(lemma, type, rolesetPart(rolesetID));
416 }
417
418 protected URI uriForConceptualizationWithPrefix(String lemma, String type, String rolesetID, String prefix) {
419 return uriForConceptualizationGen(lemma, type, rolesetPart(rolesetID, prefix));
420 }
421
422
423
424
425
426
427 private URI uriForConceptualizationGen(String lemma, String type, String rolesetID) {
428
429 URI posURI = getPosURI(type);
430
431 StringBuilder builder = new StringBuilder();
432 builder.append(NAMESPACE);
433 builder.append(CONCEPTUALIZATION_PREFIX);
434 builder.append(separator);
435 builder.append(lemmaPart(lemma, posURI));
436 builder.append(separator);
437 builder.append(rolesetID);
438 return createURI(builder.toString());
439 }
440
441 protected URI uriForArgument(String rolesetID, String argName, @Nullable String prefix) {
442 StringBuilder builder = new StringBuilder();
443 builder.append(NAMESPACE);
444 builder.append(argPart(rolesetID, argName, prefix));
445 return createURI(builder.toString());
446 }
447
448 protected URI uriForArgument(String rolesetID, String argName) {
449 return uriForArgument(rolesetID, argName, null);
450 }
451
452 public String getArgLabel() {
453 return DEFAULT_ARG_SUFFIX;
454 }
455
456 protected String argPart(String rolesetID, String argName) {
457 StringBuilder builder = new StringBuilder();
458 builder.append(rolesetPart(rolesetID));
459 builder.append(argumentSeparator);
460 builder.append(getArgLabel());
461 builder.append(formatArg(argName));
462 return builder.toString();
463 }
464
465 protected String argPart(String rolesetID, String argName, String prefix) {
466 StringBuilder builder = new StringBuilder();
467 builder.append(rolesetPart(rolesetID, prefix));
468 builder.append(argumentSeparator);
469 builder.append(getArgLabel());
470 builder.append(formatArg(argName));
471 return builder.toString();
472 }
473
474 protected void addMappings(URI class1, URI class2, @Nullable URI conceptualization1,
475 @Nullable URI conceptualization2) {
476
477 Preconditions.checkNotNull(class1);
478 Preconditions.checkNotNull(class2);
479
480 List<URI> classes = new ArrayList<URI>();
481 classes.add(class1);
482 classes.add(class2);
483 List<URI> conceptualizations = new ArrayList<URI>();
484 if(conceptualization1 != null && conceptualization2 != null){
485 conceptualizations.add(conceptualization1);
486 conceptualizations.add(conceptualization2);
487 }
488
489 addMappings(classes, conceptualizations, null);
490
491
492
493
494
495
496
497 }
498
499 protected void addMappings(URI class1, URI class2, @Nullable URI conceptualization1,
500 @Nullable URI conceptualization2, URI argument1, URI argument2) {
501
502 Preconditions.checkNotNull(class1);
503 Preconditions.checkNotNull(class2);
504 Preconditions.checkNotNull(argument1);
505 Preconditions.checkNotNull(argument2);
506
507 List<URI> classes = new ArrayList<URI>();
508 classes.add(class1);
509 classes.add(class2);
510 List<URI> conceptualizations = new ArrayList<URI>();
511 if(conceptualization1 != null && conceptualization2 != null){
512 conceptualizations.add(conceptualization1);
513 conceptualizations.add(conceptualization2);
514 }
515 List<URI> arguments = new ArrayList<URI>();
516 arguments.add(argument1);
517 arguments.add(argument2);
518
519 addMappings(classes, conceptualizations, arguments);
520
521
522
523
524
525
526
527
528
529
530 }
531
532 protected void addMappings(@Nullable List<URI> classes, @Nullable List<URI> conceptualizations, @Nullable List<URI> arguments){
533 int nClasses = classes == null? 0:classes.size();
534 int nConceptualization = conceptualizations == null? 0:conceptualizations.size();
535 int nArguments = arguments == null? 0:arguments.size();
536 URI classMapping, conceptualizationMapping;
537
538 if(nClasses >= 2){
539 classMapping = addMappingFromList(null, prefix, DEFAULT_PRED_SUFFIX, classes);
540 nclass ++;
541 if(nArguments >= 2){
542 addMappingFromList(classMapping, prefix, DEFAULT_ARG_SUFFIX, arguments);
543 nrole ++;
544 }
545 if(nConceptualization >= 2) {
546 conceptualizationMapping = addMappingFromList(null, prefix, DEFAULT_CON_SUFFIX, conceptualizations);
547 ncon++;
548 if (nArguments >= 2) {
549 addMappingFromList(conceptualizationMapping, prefix, DEFAULT_ARG_SUFFIX, arguments);
550 }
551 }
552 }else if(nConceptualization >= 2){
553 addMappingFromList(null, prefix, DEFAULT_CON_SUFFIX, conceptualizations);
554 ncon++;
555 }
556 }
557
558 protected List<URI> removeNullElements(List<URI> uris){
559 for(int i = 0; i < uris.size(); i++){
560 if(uris.get(i) == null){
561 uris.remove(i);
562 }
563 }
564 return uris;
565 }
566
567 protected URI addMappingFromList(@Nullable URI parentMapping, String prefix, String suffix, List<URI> uris){
568 TreeSet<URI> cluster = new URITreeSet();
569 for (URI uri : uris) {
570 cluster.add(uri);
571 }
572
573 return addMappingToSink(parentMapping, cluster, suffix, prefix);
574 }
575
576 protected URI addSingleMapping(@Nullable URI parentMapping, String prefix, String suffix, URI... uris) {
577 TreeSet<URI> cluster = new URITreeSet();
578 for (URI uri : uris) {
579 cluster.add(uri);
580 }
581
582 return addMappingToSink(parentMapping, cluster, suffix, prefix);
583 }
584
585 protected URI addMappingToSink(@Nullable URI parentMapping, TreeSet<URI> mapping, String suffix, String prefix) {
586
587 if (mapping.size() <= 1) {
588 LOGGER.warn("Mapping involves only 1 concept! - " + mapping);
589 return null;
590 }
591
592 URI mappingURI = uriForMapping(mapping, suffix, prefix);
593
594 if (suffix.equals(DEFAULT_ARG_SUFFIX)) {
595 Preconditions.checkArgument(parentMapping != null);
596 addStatementToSink(mappingURI, RDF.TYPE, PMO.SEMANTIC_ROLE_MAPPING);
597 addStatementToSink(parentMapping, PMO.SEM_ROLE_MAPPING, mappingURI);
598 } else if (suffix.equals(DEFAULT_PRED_SUFFIX)) {
599 Preconditions.checkArgument(parentMapping == null);
600 addStatementToSink(mappingURI, RDF.TYPE, PMO.SEMANTIC_CLASS_MAPPING);
601 } else if (suffix.equals(DEFAULT_CON_SUFFIX)) {
602 Preconditions.checkArgument(parentMapping == null);
603 addStatementToSink(mappingURI, RDF.TYPE, PMO.CONCEPTUALIZATION_MAPPING);
604 } else {
605 Preconditions.checkArgument(parentMapping == null);
606 addStatementToSink(mappingURI, RDF.TYPE, PMO.MAPPING);
607
608 }
609
610 for (URI uri : mapping) {
611 addStatementToSink(mappingURI, PMO.ITEM, uri);
612 }
613
614 return mappingURI;
615 }
616
617 protected URI uriForMapping(TreeSet<URI> mapping, String suffix, String prefix) {
618 TreeSet<String> strings = new TreeSet<>();
619 for (URI uri : mapping) {
620 strings.add(uri.toString());
621 }
622 strings.add(prefix);
623 String hash = Hash.murmur3(String.join("|", strings)).toString();
624
625 StringBuilder builder = new StringBuilder();
626 builder.append(NAMESPACE);
627 builder.append(suffix);
628 builder.append(separator);
629 builder.append(hash);
630 return createURI(builder.toString());
631 }
632
633 protected String formatArg(String arg) {
634 return arg;
635 }
636
637 protected URI getLexicon() {
638 return createURI(NAMESPACE, "lexicon");
639 }
640
641 public static URI createURI(String text) {
642 text = text.replaceAll("\\s+", "_");
643 return factory.createURI(text);
644 }
645
646 public static URI createURI(String namespace, String text) {
647 text = text.replaceAll("\\s+", "_");
648 namespace = namespace.replaceAll("\\s+", "_");
649 return factory.createURI(namespace, text);
650 }
651
652 public static URI uriForMarkable(URI base, int start, int end) {
653 URI markableURI = createURI(String.format("%s/char=%d,%d", base.toString(), start, end));
654 return markableURI;
655 }
656
657 protected abstract URI getPosURI(String textualPOS);
658
659 protected URI uriForAnnotationSet(URI exampleURI, @Nullable String addendum) {
660 StringBuilder builder = new StringBuilder();
661 builder.append(exampleURI.toString());
662 builder.append(separator).append(DEFAULT_ANNSET_SUFFIX);
663 if (addendum != null) {
664 builder.append(separator).append(addendum);
665 }
666 return createURI(builder.toString());
667 }
668
669 protected void addMetaToSink() {
670 addStatementToSink(getLexicon(), RDF.TYPE, LIME.LEXICON, LE_GRAPH);
671 addStatementToSink(getLexicon(), LIME.LANGUAGE, language, false, LE_GRAPH);
672 addStatementToSink(getLexicon(), DCTERMS.LANGUAGE, LANGUAGE_CODES_TO_URIS.get(language), LE_GRAPH);
673
674 addStatementToSink(DEFAULT_GRAPH, DCTERMS.SOURCE, RESOURCE, PM.META);
675 addStatementToSink(LE_GRAPH, DCTERMS.SOURCE, RESOURCE, PM.META);
676
677 if (website != null) {
678 addStatementToSink(BASE_RESOURCE, DCTERMS.SOURCE, website);
679 }
680 addStatementToSink(RESOURCE, DCTERMS.IS_VERSION_OF, BASE_RESOURCE);
681 addStatementToSink(EXAMPLE_GRAPH, DCTERMS.REQUIRES, RESOURCE);
682 addStatementToSink(RESOURCE, RDF.TYPE, PM.RESOURCE);
683 addStatementToSink(EXAMPLE_GRAPH, RDF.TYPE, PM.EXAMPLE);
684 }
685
686 }