1   package eu.fbk.dkm.premon.premonitor;
2   
3   import com.google.common.io.Files;
4   import eu.fbk.dkm.premon.premonitor.propbank.*;
5   import eu.fbk.dkm.premon.util.NF;
6   import eu.fbk.dkm.premon.util.PropBankResource;
7   import eu.fbk.dkm.premon.vocab.LEXINFO;
8   import eu.fbk.dkm.premon.vocab.NIF;
9   import eu.fbk.dkm.premon.vocab.ONTOLEX;
10  import eu.fbk.dkm.premon.vocab.PMO;
11  import eu.fbk.rdfpro.util.Hash;
12  import org.joox.JOOX;
13  import org.joox.Match;
14  import org.openrdf.model.URI;
15  import org.openrdf.model.vocabulary.DCTERMS;
16  import org.openrdf.model.vocabulary.RDF;
17  import org.openrdf.model.vocabulary.RDFS;
18  import org.openrdf.model.vocabulary.SKOS;
19  import org.openrdf.rio.RDFHandler;
20  import org.openrdf.rio.RDFHandlerException;
21  import org.slf4j.Logger;
22  import org.slf4j.LoggerFactory;
23  import org.w3c.dom.Document;
24  import org.w3c.dom.Element;
25  
26  import javax.annotation.Nullable;
27  import javax.xml.bind.JAXBContext;
28  import javax.xml.bind.Unmarshaller;
29  import javax.xml.parsers.DocumentBuilderFactory;
30  import java.io.File;
31  import java.io.IOException;
32  import java.util.*;
33  import java.util.regex.Matcher;
34  import java.util.regex.Pattern;
35  
36  /**
37   * Created by alessio on 28/10/15.
38   */
39  
40  public abstract class BankConverter extends Converter {
41  
42      public static final Logger LOGGER = LoggerFactory.getLogger(BankConverter.class);
43  
44      public static final String EXAMPLE_PREFIX = "example";
45      public static final String INFLECTION_PREFIX = "inflection";
46  
47      boolean nonVerbsToo = false;
48      boolean isOntoNotes = false;
49      boolean noDef = false;
50      String defaultType;
51  
52      protected ArrayList<String> fnLinks = new ArrayList<>();
53      protected ArrayList<String> vnLinks = new ArrayList<>();
54      protected ArrayList<String> pbLinks = new ArrayList<>();
55      protected Map<String, String> vnMap = new HashMap<>();
56      protected static final Pattern VN_PATTERN = Pattern.compile("([^-]*)-([0-9\\.-]*)");
57  
58      static final Pattern ARG_NUM_PATTERN = Pattern.compile("^[0123456]$");
59      Pattern PB_PATTERN = Pattern.compile("^verb-((.*)\\.[0-9]+)$");
60  
61      // Bugs!
62      private static HashMap<String, String> bugMap = new HashMap<String, String>();
63      private static HashMap<String, String> rolesetBugMap = new HashMap<String, String>();
64      private static HashMap<String, String> lemmaToTransform = new HashMap();
65  
66      public enum Type {
67          M_FUNCTION, ADDITIONAL, PREPOSITION, NUMERIC, AGENT, NULL
68      }
69  
70      String mapArgLabel = null;
71  
72      static {
73          bugMap.put("@", "2"); // overburden-v.xml
74          bugMap.put("av", "adv"); // turn-v.xml (turn.15)
75          bugMap.put("ds", "dis"); // assume-v.xml
76          bugMap.put("pred", "prd"); // flatten-v.xml
77          bugMap.put("o", "0"); // be.xml (be.04)
78          bugMap.put("emitter of hoot", "0"); // hoot.xml
79  
80          bugMap.put("8", "tmp"); // NomBank: date, meeting
81          bugMap.put("9", "loc"); // NomBank: date, meeting, option
82  
83          rolesetBugMap.put("transfuse.101", "transfuse.01");
84  
85          lemmaToTransform.put("cry+down(e)", "cry+down");
86  
87          fileToDiscard.add("except-v.xml");
88      }
89  
90      //    public BankConverter(File path, String resource, RDFHandler sink, Properties properties, String language, Set<URI> wnURIs) {
91      public BankConverter(File path, String resource, RDFHandler sink, Properties properties, String language,
92              Map<String, URI> wnInfo) {
93          super(path, resource, sink, properties, language, wnInfo);
94  
95          // todo: use default input path
96  
97          String vnPath = properties.getProperty("vnpath");
98          if (vnPath != null) {
99              LOGGER.info("Loading VerbNet");
100             File vnFile = new File(vnPath);
101             if (vnFile.exists() && vnFile.isDirectory()) {
102                 final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
103 
104                 for (final File file : Files.fileTreeTraverser().preOrderTraversal(vnFile)) {
105                     if (!file.isDirectory() && file.getName().endsWith(".xml")) {
106                         LOGGER.debug("Processing {} ...", file);
107 
108                         try {
109                             final Document document = dbf.newDocumentBuilder().parse(file);
110                             final Match vnClass = JOOX.$(document.getElementsByTagName("VNCLASS"))
111                                     .add(JOOX.$(document.getElementsByTagName("VNSUBCLASS")));
112 
113                             for (Element thisClass : vnClass) {
114                                 String id = thisClass.getAttribute("ID");
115                                 Matcher mID = VN_PATTERN.matcher(id);
116                                 if (mID.find()) {
117                                     vnMap.put(mID.group(2), mID.group(1) + "-" + mID.group(2));
118                                 } else {
119                                     LOGGER.error("Unable to parse {}", id);
120                                 }
121                             }
122 
123                         } catch (final Exception ex) {
124                             ex.printStackTrace();
125                         }
126                     }
127                 }
128             }
129         }
130 
131         addLinks(fnLinks, properties.getProperty("linkfn"));
132         LOGGER.info("Links to: {}", fnLinks.toString());
133         addLinks(vnLinks, properties.getProperty("linkvn"));
134         LOGGER.info("Links to: {}", vnLinks.toString());
135         addLinks(pbLinks, properties.getProperty("linkpb"));
136         LOGGER.info("Links to: {}", pbLinks.toString());
137     }
138 
139     private static boolean discardFile(File file, boolean onlyVerbs, boolean isOntoNotes) {
140         if (file.isDirectory()) {
141             LOGGER.trace("File {} is a directory", file.getName());
142             return true;
143         }
144 
145         if (!file.getAbsolutePath().endsWith(".xml")) {
146             LOGGER.trace("File {} is not XML", file.getName());
147             return true;
148         }
149 
150         if (onlyVerbs && isOntoNotes) {
151             if (!file.getAbsolutePath().endsWith("-v.xml")) {
152                 LOGGER.trace("File {} is not a verb", file.getName());
153                 return true;
154             }
155         }
156 
157         return false;
158     }
159 
160     @Override
161     public void convert() throws IOException, RDFHandlerException {
162 
163         addMetaToSink();
164 
165         //todo: the first tour is not necessary any more
166 
167         int noArgCount = 0;
168         int noStringFound = 0;
169 
170         try {
171             JAXBContext jaxbContext = JAXBContext.newInstance(Frameset.class);
172             Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
173 
174             for (File file : Files.fileTreeTraverser().preOrderTraversal(path)) {
175 
176                 if (discardFile(file, !nonVerbsToo, isOntoNotes)) {
177                     continue;
178                 }
179 
180                 PropBankResource resource;
181                 try {
182                     resource = new PropBankResource(file.getName(), isOntoNotes, defaultType);
183                 } catch (Exception e) {
184                     throw new IOException(e);
185                 }
186                 if (fileToDiscard.contains(resource.getFileName())) {
187                     continue;
188                 }
189 
190                 if (onlyOne != null && !onlyOne.equals(resource.getLemma())) {
191                     continue;
192                 }
193 
194                 Frameset frameset;
195 
196                 try {
197                     frameset = (Frameset) jaxbUnmarshaller.unmarshal(file);
198                     resource.setMain(frameset);
199                 } catch (Throwable e) {
200                     LOGGER.error("Skipping {}", file.getAbsolutePath());
201                     continue;
202                 }
203 
204                 LOGGER.debug("Processing {}", file.getAbsolutePath());
205 
206                 String mainType = resource.getType();
207                 String origLemma = resource.getLemma();
208                 String uriOrigLemma = getLemmaFromPredicateName(origLemma);
209 
210                 List<Object> noteOrPredicate = frameset.getNoteOrPredicate();
211 
212                 for (Object predicate : noteOrPredicate) {
213                     if (predicate instanceof Predicate) {
214 
215 //                        List<ComplexLemma> lemmas = new ArrayList<>();
216 
217                         ComplexLemma complexLemma;
218 //                        if (true) {
219                             String replacedLemma = REPLACER.apply(((Predicate) predicate).getLemma(), this.baseResource, "lemma", file.getName());
220                             String uLemma = getLemmaFromPredicateName(replacedLemma);
221                             String goodLemma = uLemma.replaceAll("\\+", " ");
222 
223                             List<String> tokens = new ArrayList<>();
224                             List<String> pos = new ArrayList<>();
225                             tokens.add(origLemma);
226                             pos.add(mainType);
227 
228                             URI leURI = addLexicalEntry(goodLemma, uLemma, tokens, pos, mainType, getLexicon());
229 
230                             complexLemma = new ComplexLemma(goodLemma, uLemma, tokens, pos, mainType, getLexicon(), leURI);
231   //                      }
232 //                        lemmas.add(complexLemma);
233 
234 //                        System.out.println("Lemma: " + ((Predicate) predicate).getLemma());
235 
236                         List<Object> noteOrRoleset = ((Predicate) predicate).getNoteOrRoleset();
237 //                        for (Object roleset : noteOrRoleset) {
238 //                            if (roleset instanceof Roleset) {
239 //                                for (Object aliases : ((Roleset) roleset).getNoteOrRolesOrExampleOrAliases()) {
240 //                                    if (aliases instanceof Aliases) {
241 //                                        lemmas = new ArrayList<>();
242 //                                        for (Object alias : ((Aliases) aliases).getNoteOrAlias()) {
243 //                                            if (alias instanceof Alias) {
244 //                                                System.out.println("Alias: " + ((Alias) alias).getvalue());
245 //                                            }
246 //                                        }
247 //                                    }
248 //                                }
249 //                            }
250 //                        }
251 
252                         for (Object roleset : noteOrRoleset) {
253                             if (roleset instanceof Roleset) {
254                                 String rolesetID = REPLACER.apply(((Roleset) roleset).getId(), this.baseResource, "predicate", file.getName());
255 
256                                 // Let's collect lemmas
257                                 List<ComplexLemmaWithMappings> lemmas = new ArrayList<>();
258                                 for (Object aliases : ((Roleset) roleset).getNoteOrRolesOrExampleOrAliases()) {
259                                     if (aliases instanceof Aliases) {
260                                         lemmas = new ArrayList<>();
261                                         for (Object alias : ((Aliases) aliases).getNoteOrAlias()) {
262                                             if (alias instanceof Alias) {
263                                                 String aliasLemma = ((Alias) alias).getvalue();
264                                                 String aliasULemma = getLemmaFromPredicateName(aliasLemma);
265                                                 String aliasSinglePos = ((Alias) alias).getPos();
266                                                 List<String> aliasTokens = new ArrayList<>();
267                                                 List<String> aliasPos = new ArrayList<>();
268                                                 aliasTokens.add(aliasLemma);
269                                                 aliasPos.add(aliasSinglePos);
270                                                 URI aliasLexicalEntry = addLexicalEntry(aliasLemma, aliasLemma, aliasTokens, aliasPos, aliasSinglePos,
271                                                         getLexicon());
272                                                 ComplexLemma aliasComplexLemma = new ComplexLemma(aliasLemma, aliasULemma, aliasTokens, aliasPos,
273                                                         aliasPos.get(0), getLexicon(), aliasLexicalEntry);
274                                                 ComplexLemmaWithMappings complexLemmaWithMappings = new ComplexLemmaWithMappings(aliasComplexLemma);
275                                                 complexLemmaWithMappings.setFramenet(((Alias) alias).getFramenet());
276                                                 complexLemmaWithMappings.setVn(((Alias) alias).getVerbnet());
277                                                 complexLemmaWithMappings.setRolesetID(rolesetID);
278                                                 complexLemmaWithMappings.setPbSource(((Roleset) roleset).getSource());
279                                                 lemmas.add(complexLemmaWithMappings);
280                                             }
281                                         }
282                                     }
283                                 }
284                                 if (lemmas.size() == 0) {
285                                     ComplexLemmaWithMappings complexLemmaWithMappings = new ComplexLemmaWithMappings(complexLemma);
286                                     complexLemmaWithMappings.setFramenet(((Roleset) roleset).getFramnet());
287                                     complexLemmaWithMappings.setVn(((Roleset) roleset).getVncls());
288                                     complexLemmaWithMappings.setRolesetID(rolesetID);
289                                     complexLemmaWithMappings.setPbSource(((Roleset) roleset).getSource());
290                                     lemmas.add(complexLemmaWithMappings);
291                                 }
292 
293                                 if (rolesetBugMap.containsKey(rolesetID)) {
294                                     rolesetID = rolesetBugMap.get(rolesetID);
295                                 }
296 
297                                 //added to cope with same rolesets for different lexical entries (noun and verb)
298                                 if (isOntoNotes)
299                                     if (mainType.equals("n"))
300                                         rolesetID="n-"+rolesetID;
301 
302                                 URI rolesetURI = uriForRoleset(rolesetID);
303 
304                                 addStatementToSink(rolesetURI, RDF.TYPE, getPredicate());
305                                 if (!noDef) {
306                                     addStatementToSink(rolesetURI, SKOS.DEFINITION, ((Roleset) roleset).getName());
307                                 }
308                                 addStatementToSink(rolesetURI, RDFS.LABEL, rolesetID, false);
309 
310                                 // Stuff needing lemma information
311                                 for (ComplexLemmaWithMappings lemma : lemmas) {
312 
313                                     URI lexicalEntryURI = lemma.getLemma().getLexicalEntryURI();
314                                     String clOLemma = lemma.getLemma().getGoodLemma();
315                                     String uriLemma = lemmas.size() == 1 ? uriOrigLemma : lemma.getLemma().getUriLemma();
316                                     String mainPos = lemma.getLemma().getMainPos();
317 
318                                     addStatementToSink(rolesetURI, RDFS.SEEALSO, getExternalLink(clOLemma, mainPos));
319                                     addStatementToSink(lexicalEntryURI, ONTOLEX.EVOKES, rolesetURI);
320 
321                                     URI conceptualizationURI = uriForConceptualization(uriLemma, mainPos, rolesetID);
322                                     addStatementToSink(conceptualizationURI, RDF.TYPE, PMO.CONCEPTUALIZATION);
323                                     addStatementToSink(conceptualizationURI, PMO.EVOKING_ENTRY, lexicalEntryURI);
324                                     addStatementToSink(conceptualizationURI, PMO.EVOKED_CONCEPT, rolesetURI);
325 
326                                     addExternalLinks(lemma, conceptualizationURI, uriLemma, mainPos);
327 
328                                     HashMap<String, URI> functionMap = getFunctionMap();
329                                     for (String key : functionMap.keySet()) {
330                                         URI argumentURI = uriForArgument(rolesetID, key);
331                                         addArgumentToSink(key, functionMap.get(key), argumentURI, uriLemma, mainPos, rolesetID, lexicalEntryURI, null,
332                                                 null);
333                                     }
334 
335                                 }
336 
337                                 List<Example> examples = new ArrayList<Example>();
338 
339                                 List<Object> rolesOrExample = ((Roleset) roleset).getNoteOrRolesOrExampleOrAliases();
340                                 for (Object rOrE : rolesOrExample) {
341                                     if (rOrE instanceof Roles) {
342                                         List<Object> noteOrRole = ((Roles) rOrE).getNoteOrRole();
343                                         for (Object role : noteOrRole) {
344                                             if (role instanceof Role) {
345                                                 String n = ((Role) role).getN();
346                                                 String f = ((Role) role).getF();
347                                                 String descr = ((Role) role).getDescr();
348 
349                                                 NF nf = new NF(n, f);
350                                                 String argName = nf.getArgName();
351 
352                                                 if (argName == null) {
353                                                     //todo: this should never happen; however it happens
354                                                     noArgCount++;
355                                                     continue;
356                                                 }
357 
358                                                 // Bugs!
359                                                 if (bugMap.containsKey(argName)) {
360                                                     argName = bugMap.get(argName);
361                                                 }
362 
363                                                 Type argType;
364                                                 try {
365                                                     argType = getType(argName);
366                                                 } catch (Exception e) {
367                                                     LOGGER.error(e.getMessage());
368                                                     continue;
369                                                 }
370 
371                                                 URI argumentURI = uriForArgument(rolesetID, argName);
372                                                 addStatementToSink(argumentURI, RDF.TYPE, getSemanticArgument());
373                                                 addStatementToSink(argumentURI, getCoreProperty(), true);
374                                                 if (!noDef) {
375                                                     addStatementToSink(argumentURI, SKOS.DEFINITION, descr);
376                                                 }
377                                                 addStatementToSink(rolesetURI, PMO.SEM_ROLE, argumentURI);
378 
379                                                 for (ComplexLemmaWithMappings lemma : lemmas) {
380                                                     // todo: check this, add lemma
381 //                                                    addArgumentToSink(argumentURI, argName, nf.getF(), argType, uriLemma,
382 //                                                            type, rolesetID, lexicalEntryURI, (Role) role,
383 //                                                            (Roleset) roleset);
384                                                     addArgumentToSink(argumentURI, argName, nf.getF(), argType, lemma.getLemma().getUriLemma(),
385                                                             lemma.getLemma().getMainPos(), rolesetID, lemma.getLemma().lexicalEntryURI, (Role) role,
386                                                             (Roleset) roleset);
387                                                 }
388                                             }
389                                         }
390                                     }
391                                 }
392 
393                                 rolesOrExample
394                                         .stream()
395                                         .filter(rOrE -> rOrE instanceof Example && extractExamples)
396                                         .forEach(rOrE -> {
397                                             examples.add((Example) rOrE);
398                                         });
399 
400                                 //todo: shall we start from 0?
401                                 //int exampleCount = 0;
402 
403                                 exampleLoop:
404                                 for (Example example : examples) {
405                                     String text = null;
406                                     Inflection inflection = null;
407 
408                                     String exName = example.getName();
409                                     String exSrc = example.getSrc();
410 
411                                     List<Rel> myRels = new ArrayList<Rel>();
412                                     List<Arg> myArgs = new ArrayList<Arg>();
413 
414                                     List<Object> exThings = example
415                                             .getInflectionOrNoteOrTextOrArgOrRel();
416                                     for (Object thing : exThings) {
417                                         if (thing instanceof Text) {
418                                             text = ((Text) thing).getvalue()
419                                                     .replaceAll("\\s+", " ").trim();
420                                         }
421                                         if (thing instanceof Inflection) {
422                                             inflection = (Inflection) thing;
423                                         }
424 
425                                         if (thing instanceof Arg) {
426                                             myArgs.add((Arg) thing);
427                                         }
428 
429                                         // Should be one, but it's not defined into the DTD
430                                         if (thing instanceof Rel) {
431                                             myRels.add((Rel) thing);
432                                         }
433                                     }
434 
435                                     if (text != null && text.length() > 0) {
436 
437                                         // URI exampleURI = uriForExample(rolesetID, exampleCount++);
438                                         URI exampleURI = uriForExample(rolesetID, text);
439                                         URI annotationSetURI = uriForAnnotationSet(exampleURI, null);
440 
441                                         addStatementToSink(exampleURI, RDF.TYPE, PMO.EXAMPLE, EXAMPLE_GRAPH);
442                                         addStatementToSink(annotationSetURI, RDF.TYPE, PMO.ANNOTATION_SET,
443                                                 EXAMPLE_GRAPH);
444 
445                                         addStatementToSink(exampleURI, RDFS.COMMENT, exName, EXAMPLE_GRAPH);
446                                         if (exSrc != null && !exSrc.equals(exName)) {
447                                             addStatementToSink(exampleURI, DCTERMS.SOURCE, exSrc, EXAMPLE_GRAPH);
448                                         }
449                                         addStatementToSink(exampleURI, NIF.IS_STRING, text, EXAMPLE_GRAPH);
450 
451                                         // Bugfix
452                                         text = text.toLowerCase();
453 
454                                         addInflectionToSink(exampleURI, inflection);
455 
456                                         for (int i = 0; i < myRels.size(); i++) {
457                                             Rel rel = myRels.get(i);
458 
459                                             String origValue = rel.getvalue().toLowerCase()
460                                                     .replaceAll("\\s+", " ").trim();
461 //                                            String value = origValue.toLowerCase();
462 
463                                             int start = text.indexOf(origValue);
464                                             if (start == -1) {
465                                                 //todo: fix these
466                                                 // LOGGER.error("Rel string not found in {}: {}", rolesetID, value);
467                                                 noStringFound++;
468                                                 continue exampleLoop;
469                                             }
470                                             int end = start + origValue.length();
471 
472                                             URI markableURI = uriForMarkable(exampleURI, start, end);
473                                             URI annotationURI = createURI(annotationSetURI.toString() + "-rel-" + i);
474 
475                                             addStatementToSink(exampleURI, NIF.ANNOTATION_P, annotationURI, EXAMPLE_GRAPH);
476                                             addStatementToSink(annotationURI, RDF.TYPE, NIF.ANNOTATION_C, EXAMPLE_GRAPH);
477                                             addStatementToSink(annotationURI, PMO.VALUE_OBJ, rolesetURI, EXAMPLE_GRAPH);
478                                             addStatementToSink(annotationSetURI, PMO.ITEM, annotationURI, EXAMPLE_GRAPH);
479 
480                                             // Impossible to connect the example to the lemma due to missing information
481 //                                            addStatementToSink(annotationURI, PMO.VALUE_OBJ, conceptualizationURI, EXAMPLE_GRAPH);
482                                             if (lemmas.size() == 1) {
483                                                 URI conceptualizationURI = uriForConceptualization(lemmas.get(0).getLemma().getUriLemma(),
484                                                         lemmas.get(0).getLemma().getMainPos(), rolesetID);
485                                                 addStatementToSink(annotationURI, PMO.VALUE_OBJ, conceptualizationURI, EXAMPLE_GRAPH);
486                                             }
487 
488                                             addStatementToSink(markableURI, RDF.TYPE, PMO.MARKABLE, EXAMPLE_GRAPH);
489                                             addStatementToSink(markableURI, NIF.BEGIN_INDEX, start, EXAMPLE_GRAPH);
490                                             addStatementToSink(markableURI, NIF.END_INDEX, end, EXAMPLE_GRAPH);
491                                             addStatementToSink(markableURI, NIF.ANCHOR_OF, origValue, EXAMPLE_GRAPH);
492                                             addStatementToSink(markableURI, NIF.REFERENCE_CONTEXT, exampleURI, EXAMPLE_GRAPH);
493                                             addStatementToSink(markableURI, NIF.ANNOTATION_P, rolesetURI, EXAMPLE_GRAPH);
494 
495                                             NF nf = new NF(null, rel.getF());
496                                             String argName = nf.getArgName();
497                                             Type argType = getType(argName);
498 
499                                             addRelToSink(argType, argName, markableURI);
500                                         }
501 
502                                         for (int i = 0; i < myArgs.size(); i++) {
503                                             Arg arg = myArgs.get(i);
504                                             String value = arg.getvalue().toLowerCase()
505                                                     .replaceAll("\\s+", " ").trim();
506 
507                                             int start = text.indexOf(value);
508                                             if (start == -1) {
509                                                 //todo: fix these
510                                                 // LOGGER.error("Arg string not found in {}: {}", rolesetID, value);
511                                                 continue;
512                                             }
513                                             int end = start + value.length();
514 
515                                             URI markableURI = uriForMarkable(exampleURI, start, end);
516                                             URI annotationURI = createURI(annotationSetURI.toString() + "-arg-" + i);
517 
518                                             addStatementToSink(exampleURI, NIF.ANNOTATION_P, annotationURI,
519                                                     EXAMPLE_GRAPH);
520                                             addStatementToSink(annotationURI, RDF.TYPE, NIF.ANNOTATION_C,
521                                                     EXAMPLE_GRAPH);
522                                             addStatementToSink(annotationSetURI, PMO.ITEM, annotationURI,
523                                                     EXAMPLE_GRAPH);
524 
525                                             addStatementToSink(markableURI, RDF.TYPE, PMO.MARKABLE, EXAMPLE_GRAPH);
526                                             addStatementToSink(markableURI, NIF.BEGIN_INDEX, start, EXAMPLE_GRAPH);
527                                             addStatementToSink(markableURI, NIF.END_INDEX, end, EXAMPLE_GRAPH);
528                                             addStatementToSink(markableURI, NIF.ANCHOR_OF, value, EXAMPLE_GRAPH);
529                                             addStatementToSink(markableURI, NIF.REFERENCE_CONTEXT, exampleURI,
530                                                     EXAMPLE_GRAPH);
531 
532                                             NF nf = new NF(arg.getN(), arg.getF());
533                                             String argName = nf.getArgName();
534 
535                                             if (argName == null) {
536                                                 //todo: this should never happen; however it happens
537                                                 continue;
538                                             }
539 
540                                             // Bugs!
541                                             if (bugMap.containsKey(argName)) {
542                                                 argName = bugMap.get(argName);
543                                             }
544 
545                                             Type argType;
546                                             try {
547                                                 argType = getType(argName);
548                                             } catch (Exception e) {
549                                                 LOGGER.error("Error in lemma {}: " + e.getMessage(), uriOrigLemma);
550                                                 continue;
551                                             }
552 
553                                             URI argumentURI = addExampleArgToSink(argType, argName, markableURI,
554                                                     nf.getF(), rolesetID, annotationURI);
555                                             addStatementToSink(annotationURI, PMO.VALUE_OBJ, argumentURI,
556                                                     EXAMPLE_GRAPH);
557                                         }
558                                     }
559                                 }
560                             }
561                         }
562                     }
563                 }
564             }
565 
566             LOGGER.info("No arg found: {}", noArgCount);
567             LOGGER.info("No string found: {}", noStringFound);
568         } catch (Exception e) {
569             e.printStackTrace();
570         }
571     }
572 
573     protected abstract void addExternalLinks(ComplexLemmaWithMappings complexLemmaWithMappings, URI conceptualizationURI, String uriLemma,
574             String type);
575 
576     protected ArrayList<Matcher> getPropBankPredicates(String source) {
577 
578         ArrayList<Matcher> ret = new ArrayList<>();
579 
580         if (source != null && source.length() > 0) {
581 
582             String[] parts = source.split("\\s+");
583             for (String part : parts) {
584                 if (part.trim().length() == 0) {
585                     continue;
586                 }
587 
588                 Matcher matcher = PB_PATTERN.matcher(source);
589                 if (!matcher.find()) {
590                     continue;
591                 }
592 
593                 ret.add(matcher);
594             }
595         }
596 
597         return ret;
598     }
599 
600     protected List<String> getVnClasses(String vnList) {
601 
602         List<String> vnClasses = new ArrayList<>();
603 
604         if (vnList != null) {
605             vnList = vnList.replaceAll(",", " ");
606             vnList = vnList.trim();
607 
608             String[] tmpClasses = vnList.split("\\s+");
609             for (String tmpClass : tmpClasses) {
610                 tmpClass = tmpClass.trim();
611                 if (tmpClass.length() == 0) {
612                     continue;
613                 }
614                 if (tmpClass.equals("-")) {
615                     continue;
616                 }
617                 if (tmpClass.endsWith(".")) {
618                     tmpClass = tmpClass.substring(0, tmpClass.length() - 1);
619                 }
620 
621                 String realVnClass = vnMap.get(tmpClass);
622                 if (realVnClass == null && vnMap.size() > 0) {
623                     Matcher matcher = VN_PATTERN.matcher(tmpClass);
624                     if (matcher.find()) {
625                         realVnClass = tmpClass;
626                     } else {
627                         LOGGER.warn("VerbNet class not found: {}", tmpClass);
628                         continue;
629                     }
630                 }
631 
632                 vnClasses.add(realVnClass);
633             }
634         }
635 
636         return vnClasses;
637     }
638 
639     protected void addExternalLinks(Role role, URI argumentURI, String uriLemma, String type, String rolesetID, Iterable<String> vnLemmas) {
640 
641         URI rolesetURI = uriForRoleset(rolesetID);
642         URI conceptualizationURI = uriForConceptualization(uriLemma, type, rolesetID);
643 
644         List<Vnrole> vnroleList = role.getVnrole();
645         for (Vnrole vnrole : vnroleList) {
646             List<String> vnClasses = getVnClasses(vnrole.getVncls());
647 
648             // todo: thetha is unique (information got by grepping the dataset)
649             String theta = vnrole.getVntheta();
650             theta = theta.replaceAll("[0-9]", "");
651             theta = theta.trim();
652             theta = theta.toLowerCase();
653 
654             for (String vnClass : vnClasses) {
655                 for (String vnLink : vnLinks) {
656                     for (String vnLemma : vnLemmas) {
657 
658                         // todo: bad!
659                         mapArgLabel = "";
660                         URI vnClassURI = uriForRoleset(vnClass, vnLink);
661                         URI vnConceptualizationURI = uriForConceptualizationWithPrefix(vnLemma,
662                                 "v", vnClass, vnLink);
663                         URI vnArgumentURI = uriForArgument(vnClass, theta, vnLink);
664                         mapArgLabel = null;
665 
666                         addMappings(rolesetURI, vnClassURI, conceptualizationURI,
667                                 vnConceptualizationURI, argumentURI, vnArgumentURI);
668 
669                     }
670                 }
671             }
672 
673         }
674     }
675 
676     protected abstract URI getExternalLink(String lemma, String type);
677 
678     public static String getLemmaFromPredicateName(String lemmaFromPredicate) {
679         String lemma = lemmaFromPredicate.replace('_', '+')
680                 .replace(' ', '+');
681         if (lemmaToTransform.keySet().contains(lemma)) {
682             lemma = lemmaToTransform.get(lemma);
683         }
684         return lemma;
685     }
686 
687     protected void addArgumentToSink(String key, URI keyURI, URI argumentURI, String lemma,
688             String type, String rolesetID, URI lexicalEntryURI, @Nullable Role role, @Nullable Iterable<String> vnLemmas) {
689         addStatementToSink(argumentURI, getRoleToArgumentProperty(), keyURI);
690         addStatementToSink(uriForRoleset(rolesetID), PMO.SEM_ROLE, argumentURI);
691 
692         //    URI argConceptualizationURI = uriForConceptualization(lemma, type, rolesetID, key);
693         //    addStatementToSink(argConceptualizationURI, RDF.TYPE, PMO.CONCEPTUALIZATION);
694         //    addStatementToSink(argConceptualizationURI, PMO.EVOKING_ENTRY, lexicalEntryURI);
695         //    addStatementToSink(argConceptualizationURI, PMO.EVOKED_CONCEPT, argumentURI);
696 
697         if (role != null) {
698             addExternalLinks(role, argumentURI, lemma, type, rolesetID, vnLemmas);
699         }
700     }
701 
702     // URIs
703 
704     private URI uriForExample(String rolesetID, String exampleText) {
705         return createURI(NAMESPACE
706                 + rolesetPart(rolesetID)
707                 + separator
708                 + EXAMPLE_PREFIX
709                 + "_"
710                 + Hash.murmur3(exampleText).toString().replace("_", "").replace("-", "")
711                 .substring(0, 8));
712     }
713 
714 //    private URI uriForExample(String rolesetID, int exampleCount) {
715 //        StringBuilder builder = new StringBuilder();
716 //        builder.append(NAMESPACE);
717 //        builder.append(examplePart(rolesetID, exampleCount));
718 //        return createURI(builder.toString());
719 //    }
720 
721     // Parts
722 
723 //    private String examplePart(String rolesetID, Integer exampleCount) {
724 //        StringBuilder builder = new StringBuilder();
725 //        builder.append(rolesetPart(rolesetID));
726 //        builder.append(separator);
727 //        builder.append(EXAMPLE_PREFIX);
728 //        builder.append(exampleCount);
729 //        return builder.toString();
730 //    }
731 
732     // Abstract methods
733 
734     abstract URI getPredicate();
735 
736     abstract URI getSemanticArgument();
737 
738     abstract URI getRoleToArgumentProperty();
739 
740     abstract URI getCoreProperty();
741 
742     abstract HashMap<String, URI> getFunctionMap();
743 
744     abstract void addInflectionToSink(URI exampleURI, Inflection inflection);
745 
746     abstract void addArgumentToSink(URI argumentURI, String argName, String f, Type argType,
747             String lemma, String type, String rolesetID, URI lexicalEntryURI, Role role, Roleset roleset);
748 
749     abstract Type getType(String code);
750 
751     protected abstract URI addExampleArgToSink(Type argType, String argName, URI markableURI,
752             String f, String rolesetID, URI asURI);
753 
754     protected abstract void addRelToSink(Type argType, String argName, URI markableURI);
755 
756     @Override protected URI getPosURI(String textualPOS) {
757         if (textualPOS == null) {
758             return null;
759         }
760 
761         switch (textualPOS) {
762         case "v":
763         case "l":
764             return LEXINFO.VERB;
765         case "n":
766             return LEXINFO.NOUN;
767         case "j":
768             return LEXINFO.ADJECTIVE;
769         case "prep":
770             return LEXINFO.PREPOSITION;
771         }
772 
773         LOGGER.error("POS not found: {}", textualPOS);
774         return null;
775     }
776 
777     @Override public String getArgLabel() {
778         if (mapArgLabel != null) {
779             return mapArgLabel;
780         }
781         return super.getArgLabel();
782     }
783 }