1   package eu.fbk.dkm.premon.premonitor;
2   
3   import java.io.File;
4   import java.io.FileInputStream;
5   import java.lang.reflect.Constructor;
6   import java.net.URL;
7   import java.nio.file.Files;
8   import java.util.Collection;
9   import java.util.HashMap;
10  import java.util.List;
11  import java.util.Map;
12  import java.util.Properties;
13  import java.util.Set;
14  import java.util.concurrent.atomic.AtomicInteger;
15  import java.util.regex.Matcher;
16  import java.util.regex.Pattern;
17  import java.util.stream.Collectors;
18  
19  import javax.annotation.Nullable;
20  
21  import com.google.common.base.Charsets;
22  import com.google.common.base.Joiner;
23  import com.google.common.base.MoreObjects;
24  import com.google.common.base.Preconditions;
25  import com.google.common.collect.HashBasedTable;
26  import com.google.common.collect.HashMultimap;
27  import com.google.common.collect.ImmutableList;
28  import com.google.common.collect.ImmutableMap;
29  import com.google.common.collect.ImmutableSet;
30  import com.google.common.collect.Iterables;
31  import com.google.common.collect.Lists;
32  import com.google.common.collect.Maps;
33  import com.google.common.collect.Multimap;
34  import com.google.common.collect.Multimaps;
35  import com.google.common.collect.Ordering;
36  import com.google.common.collect.Sets;
37  import com.google.common.collect.Table;
38  import com.google.common.collect.Table.Cell;
39  import com.google.common.io.Resources;
40  
41  import org.openrdf.model.BNode;
42  import org.openrdf.model.Namespace;
43  import org.openrdf.model.Resource;
44  import org.openrdf.model.Statement;
45  import org.openrdf.model.URI;
46  import org.openrdf.model.Value;
47  import org.openrdf.model.impl.ContextStatementImpl;
48  import org.openrdf.model.impl.URIImpl;
49  import org.openrdf.model.vocabulary.DCTERMS;
50  import org.openrdf.model.vocabulary.OWL;
51  import org.openrdf.model.vocabulary.RDF;
52  import org.openrdf.model.vocabulary.RDFS;
53  import org.openrdf.rio.RDFHandler;
54  import org.openrdf.rio.RDFHandlerException;
55  import org.slf4j.Logger;
56  import org.slf4j.LoggerFactory;
57  
58  import eu.fbk.dkm.premon.util.ProcessorUndoRDFS;
59  import eu.fbk.dkm.premon.vocab.DECOMP;
60  import eu.fbk.dkm.premon.vocab.FB;
61  import eu.fbk.dkm.premon.vocab.LEXINFO;
62  import eu.fbk.dkm.premon.vocab.NIF;
63  import eu.fbk.dkm.premon.vocab.ONTOLEX;
64  import eu.fbk.dkm.premon.vocab.PM;
65  import eu.fbk.dkm.premon.vocab.PMO;
66  import eu.fbk.dkm.premon.vocab.PMONB;
67  import eu.fbk.dkm.premon.vocab.PMOPB;
68  import eu.fbk.dkm.utils.CommandLine;
69  import eu.fbk.rdfpro.AbstractRDFHandler;
70  import eu.fbk.rdfpro.RDFHandlers;
71  import eu.fbk.rdfpro.RDFProcessor;
72  import eu.fbk.rdfpro.RDFProcessors;
73  import eu.fbk.rdfpro.RDFSource;
74  import eu.fbk.rdfpro.RDFSources;
75  import eu.fbk.rdfpro.RuleEngine;
76  import eu.fbk.rdfpro.Ruleset;
77  import eu.fbk.rdfpro.SetOperator;
78  import eu.fbk.rdfpro.util.Hash;
79  import eu.fbk.rdfpro.util.IO;
80  import eu.fbk.rdfpro.util.QuadModel;
81  import eu.fbk.rdfpro.util.Statements;
82  import eu.fbk.rdfpro.util.Tracker;
83  
84  /**
85   * Premonitor command line tool for converting predicate resources to the PreMOn model
86   */
87  public class Premonitor {
88  
89      private static final String DEFAULT_PATH = ".";
90      private static final String DEFAULT_PROPERTIES_FILE = "premonitor.properties";
91      private static final String DEFAULT_OUTPUT_BASE = "output/premon";
92      private static final String DEFAULT_OUTPUT_FORMATS = "trig.gz,tql.gz,ttl.gz";
93      private static final String DEFAULT_WORDNET_FILE = "wordnet-3.1/wn31.nt.gz";
94  
95      private static final Pattern PROPERTIES_RESOURCES_PATTERN = Pattern
96              .compile("^resource([0-9]+)\\.(.*)$");
97  
98      private static final String WN_PREFIX = "http://wordnet-rdf.princeton.edu/wn31/";
99  
100     private static final URI LEMON_LEXICAL_ENTRY = Statements.VALUE_FACTORY
101             .createURI("http://lemon-model.net/lemon#LexicalEntry");
102     private static final URI LEMON_REFERENCE = Statements.VALUE_FACTORY
103             .createURI("http://lemon-model.net/lemon#reference");
104     private static final URI WN_OLD_SENSE = Statements.VALUE_FACTORY
105             .createURI("http://wordnet-rdf.princeton.edu/ontology#old_sense_key");
106 
107     private static final Logger LOGGER = LoggerFactory.getLogger(Premonitor.class);
108 
109     public static void main(final String[] args) {
110 
111         try {
112             final CommandLine cmd = CommandLine.parser().withName("./premonitor")
113                     .withHeader("Transform linguistic resources into RDF")
114                     .withOption("i", "input",
115                             String.format("input folder (default %s)", DEFAULT_PATH), "FOLDER",
116                             CommandLine.Type.DIRECTORY_EXISTING, true, false, false)
117                     .withOption("b", "output-base", "Output base path/name (default 'premon')",
118                             "PATH", CommandLine.Type.FILE, true, false, false)
119                     .withOption("f", "output-formats",
120                             "Comma-separated list of output formats (default 'tql.gz')", "FMTS",
121                             CommandLine.Type.STRING, true, false, false)
122                     .withOption("p", "properties",
123                             String.format("Property file (default %s)", DEFAULT_PROPERTIES_FILE),
124                             "FILE", CommandLine.Type.FILE, true, false, false)
125                     .withOption("s", "single", "Extract single lemma (apply to all resources)",
126                             "LEMMA", CommandLine.Type.STRING, true, false, false)
127                     .withOption(null, "wordnet",
128                             String.format("WordNet RDF triple file (default: %s)",
129                                     DEFAULT_WORDNET_FILE),
130                             "FILE", CommandLine.Type.FILE_EXISTING, true, false, false)
131                     .withOption(null, "wordnet-sensekeys", "WordNet senseKey mapping", "FILE",
132                             CommandLine.Type.FILE_EXISTING, true, false, false)
133                     .withOption("r", "omit-owl2rl", "Omit OWL2RL reasoning (faster)")
134                     .withOption("x", "omit-stats", "Omit generation of statistics (faster)")
135                     .withOption("m", "omit-filter-mappings",
136                             "Omit filtering illegal mappings " //
137                                     + "referring to non-existing conceptualizations (faster)")
138                     .withLogger(LoggerFactory.getLogger("eu.fbk")).parse(args);
139 
140             // Input/output
141             File inputFolder = new File(DEFAULT_PATH);
142             if (cmd.hasOption("input")) {
143                 inputFolder = cmd.getOptionValue("input", File.class);
144             }
145             File propertiesFile = new File(DEFAULT_PROPERTIES_FILE);
146             if (cmd.hasOption("properties")) {
147                 propertiesFile = cmd.getOptionValue("properties", File.class);
148             }
149 
150             System.setProperty("javax.xml.accessExternalDTD", "file");
151 
152             // WordNet
153             final HashMap<String, URI> wnInfo = new HashMap<>();
154 
155             final URL resource = ClassLoader.getSystemClassLoader()
156                     .getResource("eu/fbk/dkm/premon/premonitor/wn30-senseKeys.tsv");
157             List<String> allLines = null;
158             if (resource != null) {
159                 allLines = Resources.readLines(resource, Charsets.UTF_8);
160             }
161 
162             if (cmd.hasOption("wordnet-sensekeys")) {
163                 allLines = Files.readAllLines(
164                         cmd.getOptionValue("wordnet-sensekeys", File.class).toPath());
165             }
166             if (allLines != null) {
167                 for (String line : allLines) {
168                     line = line.trim();
169                     final String[] parts = line.split("\\s+");
170                     if (parts.length >= 2) {
171                         String senseKey = parts[0];
172                         final String synsetID = parts[1];
173                         senseKey = senseKey.replaceAll(":[^:]*:[^:]*$", "");
174                         wnInfo.put(senseKey, Converter.createURI(WN_PREFIX, synsetID));
175                     }
176                 }
177             }
178 
179             if (cmd.hasOption("wordnet")) {
180                 final File wnRDF = cmd.getOptionValue("wordnet", File.class);
181                 if (wnRDF != null && wnRDF.exists()) {
182                     LOGGER.info("Loading WordNet");
183                     final RDFSource source = RDFSources.read(true, true, null, null,
184                             wnRDF.getAbsolutePath());
185                     source.emit(new AbstractRDFHandler() {
186 
187                         @Override
188                         public void handleStatement(final Statement statement)
189                                 throws RDFHandlerException {
190 
191                             // Really really bad!
192                             if (statement.getPredicate().equals(RDF.TYPE)
193                                     && statement.getObject().equals(LEMON_LEXICAL_ENTRY)) {
194                                 if (statement.getSubject() instanceof URI) {
195                                     synchronized (wnInfo) {
196                                         // required to establish owl:sameAs links
197                                         wnInfo.put(statement.getSubject().stringValue(),
198                                                 (URI) statement.getSubject());
199                                     }
200                                 }
201                             }
202 
203                             // Really really bad!
204                             if (statement.getPredicate().equals(LEMON_REFERENCE)) {
205                                 final Resource s = statement.getSubject();
206                                 final Value o = statement.getObject();
207                                 if (s instanceof URI && o instanceof URI) {
208                                     synchronized (wnInfo) {
209                                         // required to establish VN32 links
210                                         final String name = s.stringValue();
211                                         final int start = name.lastIndexOf('/') + 1;
212                                         final int end = name.lastIndexOf('-',
213                                                 name.indexOf('#', start));
214                                         final String lemma = name.substring(start, end)
215                                                 .replace('+', '_');
216                                         final String key = o.stringValue() + "|" + lemma;
217                                         final URI oldURI = wnInfo.put(key, (URI) s);
218                                         Preconditions
219                                                 .checkState(oldURI == null || oldURI.equals(s));
220                                     }
221                                 }
222                             }
223                         }
224                     }, 1);
225 
226                     LOGGER.info("Loaded {} URIs", wnInfo.size());
227                 }
228             }
229 
230             // Load properties
231             final HashMap<Integer, Properties> multiProperties = new HashMap<>();
232 
233             LOGGER.info("Loading properties file: {}", propertiesFile.getAbsolutePath());
234             if (propertiesFile.exists()) {
235                 final Properties tmpProp = new Properties();
236                 tmpProp.load(new FileInputStream(propertiesFile));
237 
238                 for (final Object key : tmpProp.keySet()) {
239                     final Matcher m = PROPERTIES_RESOURCES_PATTERN.matcher((String) key);
240                     if (m.find()) {
241                         final Integer id = Integer.parseInt(m.group(1));
242                         final String subProperty = m.group(2);
243 
244                         if (multiProperties.get(id) == null) {
245                             multiProperties.put(id, new Properties());
246                         }
247 
248                         multiProperties.get(id).setProperty(subProperty,
249                                 tmpProp.getProperty((String) key));
250                     }
251                 }
252             }
253 
254             final Map<String, Map<URI, QuadModel>> models = new HashMap<>();
255             for (final Integer id : multiProperties.keySet()) {
256                 final Properties properties = multiProperties.get(id);
257 
258                 final boolean active = properties.getProperty("active", "0").equals("1");
259                 if (!active) {
260                     LOGGER.info("Resource {} is not active", id);
261                     continue;
262                 }
263 
264                 final String source = properties.getProperty("source");
265                 if (source == null || source.length() == 0) {
266                     LOGGER.error("Resource {} has no source", id);
267                     continue;
268                 }
269 
270                 LOGGER.info("Processing {}", properties.getProperty("label"));
271 
272                 // Check class
273                 final String className = properties.getProperty("class");
274                 if (className == null) {
275                     LOGGER.error("Resource {} has no class", id);
276                     continue;
277                 }
278 
279                 // Check folder
280                 String folderName = properties.getProperty("folder");
281                 if (folderName == null) {
282                     LOGGER.error("Resource {} has no folder", id);
283                     continue;
284                 }
285                 if (!folderName.startsWith(File.separator)) {
286                     folderName = inputFolder + File.separator + folderName;
287                 }
288                 final File folder = new File(folderName);
289                 if (!folder.exists()) {
290                     LOGGER.error("Folder {} does not exist", folderName);
291                     continue;
292                 }
293                 if (!folder.isDirectory()) {
294                     LOGGER.error("Folder {} is not a folder", folderName);
295                     continue;
296                 }
297 
298                 try {
299                     // Build an RDFHandler that populates a NS map and a QuadModel for each graph
300                     final AtomicInteger numQuads = new AtomicInteger();
301                     final Map<String, String> namespaces = Maps.newHashMap();
302                     final Map<URI, QuadModel> graphModels = new HashMap<>();
303                     models.put(source, graphModels);
304                     final RDFHandler handler = new AbstractRDFHandler() {
305 
306                         @Override
307                         public void handleNamespace(final String prefix, final String uri) {
308                             namespaces.put(prefix, uri);
309                         }
310 
311                         @Override
312                         public synchronized void handleStatement(final Statement stmt) {
313                             numQuads.incrementAndGet();
314                             URI graph;
315                             try {
316                                 graph = (URI) stmt.getContext();
317                             } catch (final ClassCastException ex) {
318                                 LOGGER.warn("Unexpected non-URI graph: " + stmt.getContext());
319                                 return;
320                             }
321                             QuadModel graphModel = graphModels.get(graph);
322                             if (graphModel == null) {
323                                 graphModel = QuadModel.create();
324                                 graphModels.put(graph, graphModel);
325                             }
326                             graphModel.add(stmt.getSubject(), stmt.getPredicate(),
327                                     stmt.getObject());
328                         }
329 
330                     };
331 
332                     // Create and invoke Converter using reflection
333                     final Class<?> cls = Class.forName(className);
334                     final Constructor<?> constructor = cls.getConstructor(File.class,
335                             RDFHandler.class, Properties.class, Map.class);
336                     final Object converter = constructor.newInstance(folder, handler, properties,
337                             wnInfo);
338                     if (converter instanceof Converter) {
339                         ((Converter) converter).convert();
340                     }
341 
342                     // Apply default + Converter namespaces to all the graphs collected
343                     int numUniqueQuads = 0;
344                     for (final QuadModel model : graphModels.values()) {
345                         numUniqueQuads += model.size();
346                         for (final Map.Entry<String, String> entry : namespaces.entrySet()) {
347                             model.setNamespace(entry.getKey(), entry.getValue());
348                         }
349                         model.setNamespace(PM.PREFIX, PM.NAMESPACE);
350                         model.setNamespace(PMO.PREFIX, PMO.NAMESPACE);
351                         model.setNamespace(PMOPB.PREFIX, PMOPB.NAMESPACE);
352                         model.setNamespace(PMONB.PREFIX, PMONB.NAMESPACE);
353                         model.setNamespace(ONTOLEX.PREFIX, ONTOLEX.NAMESPACE);
354                         model.setNamespace(DECOMP.PREFIX, DECOMP.NAMESPACE);
355                         model.setNamespace(LEXINFO.PREFIX, LEXINFO.NAMESPACE);
356                         model.setNamespace(FB.PREFIX, FB.NAMESPACE);
357                     }
358 
359                     // Log the number of triples extracted
360                     LOGGER.info("Extracted {} quads ({} before deduplication)", numUniqueQuads,
361                             numQuads.get());
362 
363                 } catch (final ClassNotFoundException e) {
364                     // Log and ignore
365                     LOGGER.error("Class {} not found", className);
366                 }
367             }
368 
369             try {
370                 // Extract output base name and formats, removing leading '.' character from them
371                 final String base = cmd.getOptionValue("b", String.class, DEFAULT_OUTPUT_BASE);
372                 final String[] formats = cmd
373                         .getOptionValue("f", String.class, DEFAULT_OUTPUT_FORMATS).split(",");
374                 for (int i = 0; i < formats.length; ++i) {
375                     if (formats[i].charAt(0) == '.') {
376                         formats[i] = formats[i].substring(1);
377                     }
378                 }
379 
380                 // Extract flags controlling output generation
381                 final boolean owl2rl = !cmd.hasOption("r");
382                 final boolean statistics = !cmd.hasOption("x");
383                 final boolean filterMappings = !cmd.hasOption("m");
384 
385                 // Emit the output based on previous settings
386                 emit(base, formats, models, owl2rl, statistics, filterMappings);
387 
388             } catch (final Exception ex) {
389                 // Wrap and propagate
390                 throw new RDFHandlerException(
391                         "IO error, some files might not have been properly saved ("
392                                 + ex.getMessage() + ")",
393                         ex);
394             }
395 
396         } catch (final Throwable ex) {
397             CommandLine.fail(ex);
398         }
399     }
400 
401     private static void emit(final String base, final String[] formats,
402             final Map<String, Map<URI, QuadModel>> models, final boolean owl2rl,
403             final boolean statistics, final boolean filterMappings) throws RDFHandlerException {
404 
405         // Load TBox and get rid of unwanted classes
406         final QuadModel tbox = QuadModel.create();
407         RDFSources
408                 .read(false, true, null, null, "classpath:/eu/fbk/dkm/premon/premonitor/tbox.ttl")
409                 .emit(RDFHandlers.wrap(tbox), 1);
410         final String semNS = "http://www.ontologydesignpatterns.org/cp/owl/semiotics.owl#";
411         final Set<URI> unwantedConcepts = ImmutableSet.of(RDFS.RESOURCE, NIF.URISCHEME,
412                 NIF.RFC5147_STRING, NIF.CSTRING, new URIImpl(semNS + "InformationEntity"),
413                 new URIImpl(semNS + "Expression"), new URIImpl(semNS + "Meaning"));
414         for (final Statement stmt : ImmutableList.copyOf(tbox)) {
415             final Resource s = stmt.getSubject();
416             final URI p = stmt.getPredicate();
417             final Value o = stmt.getObject();
418             if (unwantedConcepts.contains(s) || unwantedConcepts.contains(o)
419                     || (s.equals(PMO.SEMANTIC_CLASS_MAPPING) || s.equals(PMO.SEMANTIC_ROLE_MAPPING)
420                             || s.equals(PMO.CONCEPTUALIZATION_MAPPING))
421                             && p.equals(RDFS.SUBCLASSOF) && o instanceof BNode) {
422                 tbox.remove(stmt);
423             }
424         }
425         LOGGER.info("TBox loaded - {} quads", tbox.size());
426 
427         // Close TBox
428         final Ruleset tboxRuleset = Ruleset
429                 .fromRDF("classpath:/eu/fbk/dkm/premon/premonitor/ruleset.ttl");
430         RuleEngine.create(tboxRuleset).eval(tbox);
431         LOGGER.info("TBox closed - {} quads", tbox.size());
432 
433         if (owl2rl) {
434             // Initialize ABox rule engine
435             final Ruleset aboxRuleset = tboxRuleset.getABoxRuleset(tbox);
436             final RuleEngine aboxEngine = RuleEngine.create(aboxRuleset);
437             LOGGER.info("ABox rule engine initialized - {}", aboxEngine);
438 
439             // Perform ABox inference
440             for (final Map.Entry<String, Map<URI, QuadModel>> entry1 : models.entrySet()) {
441                 for (final Map.Entry<URI, QuadModel> entry2 : entry1.getValue().entrySet()) {
442                     final int sizeBefore = entry2.getValue().size();
443                     aboxEngine.eval(entry2.getValue());
444                     for (final Statement stmt : tbox) {
445                         entry2.getValue().remove(stmt.getSubject(), stmt.getPredicate(),
446                                 stmt.getObject());
447                     }
448                     final int sizeAfter = entry2.getValue().size();
449                     LOGGER.info("ABox closed for {}, graph {}: from {} to {} quads",
450                             entry1.getKey(), entry2.getKey(), sizeBefore, sizeAfter);
451                 }
452             }
453 
454             // Remove redundant quads (i.e., type quads of pm:entries from other graphs, and type
455             // quads of pm:entries and resource graphs from pm:examples)
456             for (final Map.Entry<String, Map<URI, QuadModel>> entry1 : models.entrySet()) {
457                 final String source = entry1.getKey();
458                 final Map<URI, QuadModel> sourceModels = entry1.getValue();
459                 final QuadModel entriesModel = sourceModels.get(PM.ENTRIES);
460                 for (final Map.Entry<URI, QuadModel> entry2 : sourceModels.entrySet()) {
461                     final URI graph = entry2.getKey();
462                     final boolean isEntries = graph.equals(PM.ENTRIES);
463                     final boolean isExamples = isExampleGraph(graph);
464                     final QuadModel filteredModel = QuadModel.create();
465                     outer: for (final Statement stmt : entry2.getValue()) {
466                         if (stmt.getPredicate().getNamespace().equals("sys:")) {
467                             continue;
468                         } else if (stmt.getPredicate().equals(RDF.TYPE)) {
469                             if (stmt.getObject() instanceof BNode) {
470                                 continue;
471                             } else if (stmt.getObject() instanceof URI
472                                     && ((URI) stmt.getObject()).getNamespace().equals("sys:")) {
473                                 continue;
474                             } else if (isExamples) {
475                                 for (final QuadModel model : sourceModels.values()) {
476                                     if (model != entry2.getValue() && model.contains(stmt)) {
477                                         continue outer;
478                                     }
479                                 }
480                             } else if (!isEntries) {
481                                 if (entriesModel != null && entriesModel.contains(stmt)) {
482                                     continue;
483                                 }
484                             }
485                         }
486                         filteredModel.add(stmt);
487                     }
488                     final int sizeBefore = entry2.getValue().size();
489                     entry2.setValue(filteredModel);
490                     final int sizeAfter = entry2.getValue().size();
491                     LOGGER.info("ABox filtered for {}, graph {}: from {} to {} quads", source,
492                             entry2.getKey(), sizeBefore, sizeAfter);
493                 }
494             }
495         }
496 
497         // Filter TBox
498         for (final Statement stmt : ImmutableList.copyOf(tbox)) {
499             if (stmt.getPredicate().getNamespace().equals("sys:")
500                     || stmt.getObject() instanceof URI
501                             && ((URI) stmt.getObject()).getNamespace().equals("sys:")) {
502                 tbox.remove(stmt);
503             }
504         }
505 
506         // Compute mapping statistics before filtering mappings
507         final List<String> sourceKeys = ImmutableList.copyOf(Iterables.concat(models.keySet(),
508                 ImmutableList.of("on5", "wn30", "wn31", "ili", "all")));
509         final List<QuadModel> quadModels = models.values().stream()
510                 .flatMap(m -> m.values().stream()).collect(Collectors.toList());
511         Map<String, MappingStatistics> msBefore = null;
512         Map<String, MappingStatistics> msAfter = null;
513         if (statistics) {
514             msBefore = Maps.newHashMap();
515             for (final Map.Entry<String, Map<URI, QuadModel>> entry : models.entrySet()) {
516                 msBefore.put(entry.getKey(), new MappingStatistics(entry.getValue().values(),
517                         sourceKeys, entry.getKey()));
518             }
519             msBefore.put("all", new MappingStatistics(quadModels, ImmutableList.of(), "all"));
520             msAfter = msBefore;
521         }
522 
523         // Remove illegal mappings
524         if (filterMappings) {
525             filterMappings(models);
526         }
527 
528         // Compute and emit statistics
529         if (statistics) {
530             if (filterMappings) {
531                 msAfter = Maps.newHashMap();
532                 for (final Map.Entry<String, Map<URI, QuadModel>> entry : models.entrySet()) {
533                     msAfter.put(entry.getKey(), new MappingStatistics(entry.getValue().values(),
534                             sourceKeys, entry.getKey()));
535                 }
536                 msAfter.put("all", new MappingStatistics(quadModels, ImmutableList.of(), "all"));
537             }
538             LOGGER.info("Resource statistics");
539             LOGGER.info(String.format("  %-10s %-9s %-9s %-9s %-9s %-9s %-9s %-9s %-9s %-9s",
540                     "source", "#classes", "#roles", "#conc", "#entries", "#examples", "#annsets",
541                     "#classrel", "#rolerel", "#corestmt"));
542             for (final Map.Entry<String, Map<URI, QuadModel>> entry : models.entrySet()) {
543                 final String source = entry.getKey();
544                 final InstanceStatistics s = new InstanceStatistics(entry.getValue().values(),
545                         tbox);
546                 LOGGER.info(String.format("  %-10s %-9d %-9d %-9d %-9d %-9d %-9d %-9d %-9d %-9d",
547                         source, s.numSemanticClasses, s.numSemanticRoles, s.numConceptualizations,
548                         s.numLexicalEntries, s.numExamples, s.numAnnotationSets, s.numClassRels,
549                         s.numRoleRels, s.numCoreTriples));
550             }
551             final InstanceStatistics s = new InstanceStatistics(quadModels, tbox);
552             LOGGER.info(String.format("  %-10s %-9d %-9d %-9d %-9d %-9d %-9d %-9d %-9d %-9d",
553                     "all", s.numSemanticClasses, s.numSemanticRoles, s.numConceptualizations,
554                     s.numLexicalEntries, s.numExamples, s.numAnnotationSets, s.numClassRels,
555                     s.numRoleRels, s.numCoreTriples));
556             LOGGER.info("Mapping statistics");
557             LOGGER.info(String.format("  %-32s %-39s %-39s", "sources", "# good mappings",
558                     "# invalid mappings"));
559             LOGGER.info(String.format(
560                     "  %-10s %-10s %-10s %-9s %-9s %-9s %-9s %-9s %-9s %-9s %-9s", "from", "to",
561                     "resource", "con", "class", "role", "other", "con", "class", "role", "other"));
562             for (final String from : sourceKeys) {
563                 final Integer z = new Integer(0);
564                 for (final String to : sourceKeys) {
565                     for (final String resource : Iterables.concat(models.keySet(),
566                             ImmutableList.of("all"))) {
567                         final MappingStatistics ms = msAfter.get(resource);
568                         final MappingStatistics msb = msBefore.get(resource);
569                         final int nx, nc, nr, no, nxb, ncb, nrb, nob;
570                         nx = MoreObjects.firstNonNull(ms.conMappings.get(from, to), z);
571                         nc = MoreObjects.firstNonNull(ms.classMappings.get(from, to), z);
572                         nr = MoreObjects.firstNonNull(ms.roleMappings.get(from, to), z);
573                         no = MoreObjects.firstNonNull(ms.otherMappings.get(from, to), z);
574                         nxb = MoreObjects.firstNonNull(msb.conMappings.get(from, to), z);
575                         ncb = MoreObjects.firstNonNull(msb.classMappings.get(from, to), z);
576                         nrb = MoreObjects.firstNonNull(msb.roleMappings.get(from, to), z);
577                         nob = MoreObjects.firstNonNull(msb.otherMappings.get(from, to), z);
578                         if (nxb + ncb + nrb + nob > 0) {
579                             LOGGER.info(String.format(
580                                     "  %-10s %-10s %-10s %-9d %-9d %-9d %-9d %-9d %-9d %-9d %-9d",
581                                     from, to, resource, nx, nc, nr, no, nxb - nx, ncb - nc,
582                                     nrb - nr, nob - no));
583                         }
584                     }
585                 }
586             }
587         }
588 
589         // Start emitting data
590         LOGGER.info("Emitting datasets ...");
591 
592         // Emit TBox
593         emit(base, "tbox", formats, ImmutableMap.of(PM.TBOX, tbox), null, owl2rl, false);
594 
595         // Emit data of each resource, separating examples from other graphs
596         final Multimap<URI, QuadModel> modelsByURI = HashMultimap.create();
597         for (final Map.Entry<String, Map<URI, QuadModel>> entry : models.entrySet()) {
598             final String source = entry.getKey();
599             final Map<URI, QuadModel> graphModels = entry.getValue();
600             emit(base, source, formats, Maps.filterKeys(graphModels, g -> !isExampleGraph(g)),
601                     tbox, owl2rl, statistics);
602             emit(base, source + "-examples", formats,
603                     Maps.filterKeys(graphModels, g -> isExampleGraph(g)), tbox, owl2rl,
604                     statistics);
605             modelsByURI.putAll(Multimaps.forMap(graphModels));
606         }
607 
608         // Emit aggregated data
609         final Map<URI, QuadModel> mergedGraphModels = Maps.newHashMap();
610         mergedGraphModels.put(PM.TBOX, tbox);
611         for (final Map.Entry<URI, Collection<QuadModel>> entry : modelsByURI.asMap().entrySet()) {
612             if (entry.getValue().size() == 1) {
613                 mergedGraphModels.put(entry.getKey(), entry.getValue().iterator().next());
614             } else if (entry.getValue().size() > 1) {
615                 final QuadModel mergedModel = QuadModel.create();
616                 for (final QuadModel model : entry.getValue()) {
617                     for (final Namespace ns : model.getNamespaces()) {
618                         mergedModel.setNamespace(ns);
619                     }
620                     mergedModel.addAll(model);
621                 }
622                 mergedGraphModels.put(entry.getKey(), mergedModel);
623             }
624         }
625         emit(base, "models", formats, Maps.filterKeys(mergedGraphModels, g -> !isExampleGraph(g)),
626                 tbox, owl2rl, statistics);
627         emit(base, "all", formats, mergedGraphModels, tbox, owl2rl, statistics);
628     }
629 
630     private static void emit(final String base, final String classifier, final String[] formats,
631             final Map<URI, QuadModel> models, @Nullable final QuadModel tbox, final boolean owl2rl,
632             final boolean statistics) throws RDFHandlerException {
633 
634         // Assemble RDFpro pipeline - start emitting closed data in all configured formats
635         final List<RDFProcessor> processors = Lists.newArrayList();
636         for (final String format : formats) {
637             final String location = base + "-" + classifier + (owl2rl ? "-inf." : ".") + format;
638             processors.add(RDFProcessors.write(null, 1000, location));
639         }
640         processors.add(RDFProcessors.track(new Tracker(LOGGER, null,
641                 classifier + (owl2rl ? "-inf" : "") + " - %d quads", null)));
642 
643         // Compute and emit statistics if enabled
644         if (statistics) {
645             final List<RDFProcessor> statsProcessors = Lists.newArrayList();
646             statsProcessors.add(RDFProcessors.stats(null, null, null, null, false));
647             for (final String format : formats) {
648                 final String location = base + "-" + classifier + "-stats." + format;
649                 statsProcessors.add(RDFProcessors.write(null, 1000, location));
650             }
651             statsProcessors.add(RDFProcessors
652                     .track(new Tracker(LOGGER, null, classifier + "-stats - %d quads", null)));
653             statsProcessors.add(RDFProcessors.NIL);
654             processors
655                     .add(RDFProcessors.parallel(SetOperator.UNION_MULTISET, RDFProcessors.IDENTITY,
656                             RDFProcessors.sequence(statsProcessors.toArray(new RDFProcessor[0]))));
657         }
658 
659         // Remove inferrable triples, write, compute statistics, write
660         if (owl2rl && tbox != null) {
661             processors.add(new ProcessorUndoRDFS(RDFSources.wrap(tbox)));
662             for (final String format : formats) {
663                 final String location = base + "-" + classifier + "-noinf." + format;
664                 processors.add(RDFProcessors.write(null, 1000, location));
665             }
666             processors.add(RDFProcessors
667                     .track(new Tracker(LOGGER, null, classifier + "-noinf - %d quads", null)));
668         }
669 
670         // Build the resulting sequence processor
671         final RDFProcessor processor = RDFProcessors
672                 .sequence(processors.toArray(new RDFProcessor[processors.size()]));
673 
674         // Apply the processor
675         final RDFHandler handler = RDFHandlers.decouple(processor.wrap(RDFHandlers.NIL));
676         try {
677             // Start
678             handler.startRDF();
679 
680             // Emit namespaces first
681             final Set<Namespace> namespaces = Sets.newHashSet();
682             for (final QuadModel model : models.values()) {
683                 namespaces.addAll(model.getNamespaces());
684             }
685             for (final Namespace namespace : Ordering.natural().sortedCopy(namespaces)) {
686                 handler.handleNamespace(namespace.getPrefix(), namespace.getName());
687             }
688 
689             // Emit data, one graph at a time and starting with pm:meta and pm:entries
690             final List<URI> sortedGraphs = Lists.newArrayList();
691             if (models.containsKey(PM.META)) {
692                 sortedGraphs.add(PM.META);
693             }
694             if (models.containsKey(PM.ENTRIES)) {
695                 sortedGraphs.add(PM.ENTRIES);
696             }
697             for (final URI graph : Ordering.from(Statements.valueComparator())
698                     .sortedCopy(models.keySet())) {
699                 if (!graph.equals(PM.META) && !graph.equals(PM.ENTRIES)) {
700                     sortedGraphs.add(graph);
701                 }
702             }
703             for (final URI graph : sortedGraphs) {
704                 for (final Statement stmt : models.get(graph)) {
705                     handler.handleStatement(new ContextStatementImpl(stmt.getSubject(),
706                             stmt.getPredicate(), stmt.getObject(), graph));
707                 }
708             }
709         } catch (final Throwable ex) {
710             LOGGER.error("File generation failed", ex);
711 
712         } finally {
713             // End and release allocated resources
714             handler.endRDF();
715             IO.closeQuietly(handler);
716         }
717     }
718 
719     private static void filterMappings(final Map<String, Map<URI, QuadModel>> models) {
720 
721         LOGGER.info("Removing illegal mappings...");
722 
723         final Set<URI> validItems = Sets.newHashSet();
724         for (final Map<URI, QuadModel> map : models.values()) {
725             for (final QuadModel model : map.values()) {
726                 for (final Statement stmt : model.filter(null, PMO.EVOKED_CONCEPT, null)) {
727                     validItems.add((URI) stmt.getSubject()); // conceptualizations
728 
729                 }
730                 for (final Statement stmt : model.filter(null, PMO.SEM_ROLE, null)) {
731                     validItems.add((URI) stmt.getSubject()); // semantic class
732                     validItems.add((URI) stmt.getObject()); // semantic roles
733                 }
734             }
735         }
736 
737         for (final Map<URI, QuadModel> map : models.values()) {
738             for (final Map.Entry<URI, QuadModel> entry : map.entrySet()) {
739                 final QuadModel model = entry.getValue();
740                 for (final URI type : new URI[] { PMO.CONCEPTUALIZATION_MAPPING,
741                         PMO.SEMANTIC_CLASS_MAPPING, PMO.SEMANTIC_ROLE_MAPPING }) {
742 
743                     int numMappingsToDelete = 0;
744                     int numMappings = 0;
745                     int mappingsDeletedCompletely = 0;
746                     int referencesRemoved = 0;
747                     final Map<String, Integer> numMappingsPerSource = Maps.newHashMap();
748                     final List<Statement> stmtsToDelete = Lists.newArrayList();
749                     for (final Resource m : model.filter(null, RDF.TYPE, type).subjects()) {
750                         ++numMappings;
751                         final List<Statement> stmts = ImmutableList
752                                 .copyOf(model.filter(m, null, null));
753                         boolean valid = true;
754                         final List<Statement> stmtsInvalid = Lists.newArrayList();
755                         for (final Statement stmt : stmts) {
756                             if (stmt.getPredicate().equals(PMO.ITEM)
757                                     && !validItems.contains(stmt.getObject())) {
758 
759                                 ++numMappingsToDelete;
760                                 final String str = stmt.getObject().stringValue();
761                                 for (final String source : models.keySet()) {
762                                     if (str.contains("-" + source + "-")
763                                             || str.contains("/" + source + "-")) {
764                                         numMappingsPerSource.put(source,
765                                                 1 + numMappingsPerSource.getOrDefault(source, 0));
766                                     }
767                                 }
768 
769                                 if (numMappingsToDelete <= 10) {
770                                     LOGGER.warn("Removing illegal mapping {} - missing {}", m,
771                                             stmt.getObject());
772                                 } else if (LOGGER.isDebugEnabled()) {
773                                     LOGGER.debug("Removing illegal mapping {} - missing {}", m,
774                                             stmt.getObject());
775                                 } else if (numMappingsToDelete == 11) {
776                                     LOGGER.warn("Omitting further illegal mappings ....");
777                                 }
778                                 stmtsInvalid.add(stmt);
779                                 valid = false;
780                                 break;
781                             }
782                         }
783                         if (!valid) {
784                             int items = 0, itemsInv = 0;
785                             for (final Statement stmt : stmts) {
786                                 if (stmt.getPredicate().equals(PMO.ITEM)) {
787                                     items++;
788                                 }
789                             }
790                             for (final Statement stmt : stmtsInvalid) {
791                                 if (stmt.getPredicate().equals(PMO.ITEM)) {
792                                     itemsInv++;// useless (all Statement in stmtsInvalid are
793                                                // items)
794                                 }
795                             }
796                             if (items - itemsInv < 2) {
797                                 stmtsToDelete.addAll(stmts);
798                                 mappingsDeletedCompletely++;
799                                 if (numMappingsToDelete <= 10 || LOGGER.isDebugEnabled()) {
800                                     LOGGER.info("Removing the complete mapping");
801                                 }
802                             } else {
803                                 stmtsToDelete.addAll(stmtsInvalid);
804                                 referencesRemoved++;
805                                 if (numMappingsToDelete <= 10 || LOGGER.isDebugEnabled()) {
806                                     LOGGER.info("Removing only missing reference");
807                                 }
808                             }
809                         }
810                     }
811                     if (numMappingsToDelete > 0) {
812                         for (final Statement stmt : stmtsToDelete) {
813                             model.remove(stmt);
814                         }
815                         LOGGER.warn(
816                                 "{}/{} illegal {} mappings and {} references {} removed from {}\n############################################################################################################",
817                                 mappingsDeletedCompletely, numMappings,
818                                 type.equals(PMO.SEMANTIC_CLASS_MAPPING) ? "semantic class"
819                                         : type.equals(PMO.CONCEPTUALIZATION_MAPPING)
820                                                 ? "conceptualization" : "semantic role",
821                                 referencesRemoved, numMappingsPerSource, entry.getKey());
822                     }
823                 }
824             }
825         }
826 
827         // Cleaning Ontological Mappings
828         for (final Map<URI, QuadModel> map : models.values()) {
829             for (final Map.Entry<URI, QuadModel> entry : map.entrySet()) {
830                 final QuadModel model = entry.getValue();
831 
832                 final List<Statement> stmts = ImmutableList
833                         .copyOf(model.filter(null, PMO.ONTO_MATCH, null));
834                 int numMappingsToDelete = 0;
835                 int numTriplesToDelete = 0;
836 
837                 for (Statement stmt : stmts) {
838                     if (!validItems.contains(stmt.getSubject())) {
839                         ++numMappingsToDelete;
840 
841                         // delete ontology matching triple
842                         ++numTriplesToDelete;
843                         model.remove(stmt);
844                         if (numMappingsToDelete <= 10) {
845                             LOGGER.warn("Removing illegal ontoMatch {} - missing {}",
846                                     stmt.getSubject(), stmt.getObject());
847                         } else if (LOGGER.isDebugEnabled()) {
848                             LOGGER.debug("Removing illegal ontoMatch {} - missing {}",
849                                     stmt.getSubject(), stmt.getObject());
850                         } else if (numMappingsToDelete == 11) {
851                             LOGGER.warn("Omitting further illegal ontoMatch assertions ....");
852                         }
853 
854                         // check if there are other things mapping to the same ontological
855                         // concept, otherwise remove all its triple
856                         if (ImmutableList
857                                 .copyOf(model.filter(null, PMO.ONTO_MATCH, stmt.getObject()))
858                                 .isEmpty()) {
859 
860                             final List<Statement> onto_stmts_all = ImmutableList
861                                     .copyOf(model.filter((URI) stmt.getObject(), null, null));
862                             for (final Statement s : onto_stmts_all) {
863 
864                                 ++numTriplesToDelete;
865                                 model.remove(s);
866                                 LOGGER.debug("Removing onto triple {} - {} - {}", s.getSubject(),
867                                         s.getPredicate(), s.getObject());
868                             }
869                         }
870 
871                         // for all
872                         // Check if only remaining triple on subject is "rdf:type skos:Concept".
873                         // if so remove
874 
875                         if (!model.contains(stmt.getSubject(), PMO.ONTO_MATCH, null)) {
876 
877                             for (Statement rel_stmt : ImmutableList
878                                     .copyOf(model.filter(stmt.getSubject(), null, null))) {
879 
880                                 ++numTriplesToDelete;
881                                 LOGGER.debug("Removing type triple {} - {} - {}",
882                                         rel_stmt.getSubject(), rel_stmt.getPredicate(),
883                                         rel_stmt.getObject());
884                                 model.remove(stmt);
885 
886                             }
887                         }
888 
889                     }
890 
891                 }
892 
893                 LOGGER.warn(
894                         "{} illegal ontoMatch assertions and {} related triples removed from {}\n############################################################################################################",
895                         numMappingsToDelete, numTriplesToDelete, entry.getKey());
896 
897             }
898         }
899 
900     }
901 
902     private static boolean isExampleGraph(final URI uri) {
903         return uri.getLocalName().endsWith("-ex");
904     }
905 
906     private static final class InstanceStatistics {
907 
908         final int numSemanticClasses;
909 
910         final int numSemanticRoles;
911 
912         final int numConceptualizations;
913 
914         final int numLexicalEntries;
915 
916         final int numExamples;
917 
918         final int numAnnotationSets;
919 
920         final int numClassRels;
921 
922         final int numRoleRels;
923 
924         final int numCoreTriples;
925 
926         public InstanceStatistics(final Iterable<? extends QuadModel> models,
927                 final QuadModel tbox) {
928 
929             final Set<URI> roleRelProperties = Sets.newHashSet();
930             for (final Resource rel : tbox.filter(null, RDFS.SUBPROPERTYOF, PMO.ROLE_REL)
931                     .subjects()) {
932                 if (rel instanceof URI && !rel.equals(PMO.ROLE_REL)) {
933                     roleRelProperties.add((URI) rel);
934                 }
935             }
936 
937             final Set<Value> classes = Sets.newHashSet();
938             final Set<Value> roles = Sets.newHashSet();
939             final Set<Value> examples = Sets.newHashSet();
940             final Set<Value> annotationSets = Sets.newHashSet();
941             final Set<Statement> classRels = Sets.newHashSet();
942             final Set<Statement> roleRels = Sets.newHashSet();
943             for (final QuadModel model : models) {
944                 for (final Resource c : model.filter(null, RDF.TYPE, PMO.SEMANTIC_CLASS)
945                         .subjects()) {
946                     if (model.contains(null, PMO.EVOKED_CONCEPT, c)
947                             || model.contains(c, PMO.CLASS_REL, null)
948                             || model.contains(null, PMO.CLASS_REL, c)) {
949                         classes.add(c);
950                     }
951                 }
952                 roles.addAll(model.filter(null, PMO.SEM_ROLE, null).objects());
953                 examples.addAll(model.filter(null, RDF.TYPE, PMO.EXAMPLE).subjects());
954                 annotationSets.addAll(model.filter(null, RDF.TYPE, PMO.ANNOTATION_SET).subjects());
955                 classRels.addAll(model.filter(null, PMO.CLASS_REL, null));
956                 for (final URI roleRelProperty : roleRelProperties) {
957                     roleRels.addAll(model.filter(null, roleRelProperty, null));
958                 }
959             }
960             this.numSemanticClasses = classes.size();
961             this.numSemanticRoles = roles.size();
962             this.numExamples = examples.size();
963             this.numAnnotationSets = annotationSets.size();
964             this.numClassRels = classRels.size();
965             this.numRoleRels = roleRels.size();
966 
967             final Set<Statement> conceptualizations = Sets.newHashSet();
968             final Set<Value> lexicalEntries = Sets.newHashSet();
969             for (final QuadModel model : models) {
970                 for (final Statement stmt : model.filter(null, ONTOLEX.EVOKES, null)) {
971                     if (classes.contains(stmt.getObject()) || roles.contains(stmt.getObject())) {
972                         conceptualizations.add(stmt);
973                         lexicalEntries.add(stmt.getSubject());
974                     }
975                 }
976             }
977             this.numConceptualizations = conceptualizations.size();
978             this.numLexicalEntries = lexicalEntries.size();
979 
980             final Set<Value> coreInstances = Sets.newHashSet();
981             for (final QuadModel model : models) {
982                 for (final Statement stmt : model.filter(null, RDF.TYPE, null)) {
983                     final Value type = stmt.getObject();
984                     if (type.equals(PMO.SEMANTIC_CLASS) || type.equals(PMO.SEMANTIC_ROLE)
985                             || type.equals(PMO.CONCEPTUALIZATION) || type.equals(PMO.MAPPING)
986                             || type.equals(ONTOLEX.LEXICAL_ENTRY) || type.equals(ONTOLEX.FORM)) {
987                         coreInstances.add(stmt.getSubject());
988                     }
989                 }
990             }
991 
992             final Set<Statement> coreStmts = Sets.newHashSet();
993             for (final QuadModel model : models) {
994                 for (final Statement stmt : model) {
995                     if (coreInstances.contains(stmt.getSubject())
996                             || coreInstances.contains(stmt.getObject())) {
997                         if (stmt.getPredicate().equals(ONTOLEX.CANONICAL_FORM)
998                                 || stmt.getPredicate().equals(ONTOLEX.WRITTEN_REP)
999                                 || stmt.getPredicate().equals(PMO.FIRST)) {
1000                             continue; // avoid counting inferences
1001                         }
1002                         final String ns = stmt.getPredicate().getNamespace();
1003                         if (ns.equals(PMO.NAMESPACE) || ns.equals(ONTOLEX.NAMESPACE)
1004                                 || ns.equals(DECOMP.NAMESPACE) || ns.equals(LEXINFO.NAMESPACE)
1005                                 || ns.equals(RDFS.NAMESPACE) || ns.equals(OWL.NAMESPACE)
1006                                 || ns.equals(DCTERMS.NAMESPACE)) {
1007                             coreStmts.add(stmt);
1008                         }
1009                     }
1010                 }
1011             }
1012             this.numCoreTriples = coreStmts.size();
1013         }
1014 
1015     }
1016 
1017     private static final class MappingStatistics {
1018 
1019         final Table<String, String, Integer> conMappings;
1020 
1021         final Table<String, String, Integer> classMappings;
1022 
1023         final Table<String, String, Integer> roleMappings;
1024 
1025         final Table<String, String, Integer> otherMappings;
1026 
1027         public MappingStatistics(final Iterable<? extends QuadModel> models,
1028                 final Iterable<String> sources, final String resource) {
1029 
1030             final Table<String, String, Set<Hash>> conHashes = HashBasedTable.create();
1031             final Table<String, String, Set<Hash>> classHashes = HashBasedTable.create();
1032             final Table<String, String, Set<Hash>> roleHashes = HashBasedTable.create();
1033             final Table<String, String, Set<Hash>> otherHashes = HashBasedTable.create();
1034 
1035             final List<String> sourceKeys = ImmutableList.copyOf(sources);
1036             final List<Pattern> sourcePatterns = ImmutableList.copyOf(sourceKeys.stream()
1037                     .map(s -> Pattern.compile("[-/]" + Pattern.quote(s) + "-")).iterator());
1038 
1039             for (final QuadModel model : models) {
1040                 for (final Resource mapping : model.filter(null, RDF.TYPE, PMO.MAPPING)
1041                         .subjects()) {
1042 
1043                     final Table<String, String, Set<Hash>> hashes;
1044                     if (model.contains(mapping, RDF.TYPE, PMO.CONCEPTUALIZATION_MAPPING)) {
1045                         hashes = conHashes;
1046                     } else if (model.contains(mapping, RDF.TYPE, PMO.SEMANTIC_CLASS_MAPPING)) {
1047                         hashes = classHashes;
1048                     } else if (model.contains(mapping, RDF.TYPE, PMO.SEMANTIC_ROLE_MAPPING)) {
1049                         hashes = roleHashes;
1050                     } else {
1051                         hashes = otherHashes;
1052                     }
1053 
1054                     final Map<String, String> items = Maps.newHashMap();
1055                     for (final Value item : model.filter(mapping, PMO.ITEM, null).objects()) {
1056                         final String str = item.stringValue();
1057                         for (int i = 0; i < sourceKeys.size(); ++i) {
1058                             if (sourcePatterns.get(i).matcher(str).find()) {
1059                                 items.put(sourceKeys.get(i), item.stringValue());
1060                             }
1061                         }
1062                     }
1063 
1064                     for (final String fromSource : items.keySet()) {
1065                         for (final String toSource : items.keySet()) {
1066                             if (fromSource.compareTo(toSource) < 0) {
1067                                 addHash(hashes, fromSource, toSource, items.get(fromSource), "|",
1068                                         items.get(toSource));
1069                             }
1070                         }
1071                     }
1072 
1073                     addHash(hashes, "all", "all",
1074                             Joiner.on('|').join(Ordering.natural().sortedCopy(items.values())));
1075                 }
1076 
1077                 int mappingsCount = model.filter(null, PMO.ONTO_MATCH, null).size();
1078                 if (mappingsCount != 0)
1079                     LOGGER.debug(
1080                             "Processing " + mappingsCount + " for mapping resource " + resource);
1081 
1082                 for (final Statement mapping : model.filter(null, PMO.ONTO_MATCH, null)) {
1083 
1084                     final Resource subject = mapping.getSubject();
1085                     final Value object = mapping.getObject();
1086 
1087                     final Table<String, String, Set<Hash>> hashes;
1088 
1089                     if (model.contains(subject, RDF.TYPE, PMO.CONCEPTUALIZATION)) {
1090                         hashes = conHashes;
1091                     } else if (model.contains(subject, RDF.TYPE, PMO.SEMANTIC_CLASS)) {
1092                         hashes = classHashes;
1093                     } else if (model.contains(subject, RDF.TYPE, PMO.SEMANTIC_ROLE)) {
1094                         hashes = roleHashes;
1095                     } else
1096                         hashes = otherHashes;
1097 
1098                     final String subjStr = subject.stringValue();
1099                     String subjRes = "";
1100                     for (int i = 0; i < sourceKeys.size(); ++i) {
1101                         if (sourcePatterns.get(i).matcher(subjStr).find()) {
1102                             subjRes = sourceKeys.get(i);
1103                             break;
1104                         }
1105                     }
1106                     final String objStr = object.stringValue();
1107 
1108                     addHash(hashes, subjRes, resource, subjStr, "|", objStr);
1109                     addHash(hashes, "all", "all", subjStr, "|", objStr);
1110                 }
1111 
1112             }
1113 
1114             this.conMappings = countHashes(conHashes);
1115             this.classMappings = countHashes(classHashes);
1116             this.roleMappings = countHashes(roleHashes);
1117             this.otherMappings = countHashes(otherHashes);
1118         }
1119 
1120         private static void addHash(final Table<String, String, Set<Hash>> hashes,
1121                 final String row, final String col, final String... hashedStrings) {
1122             Set<Hash> set = hashes.get(row, col);
1123             if (set == null) {
1124                 set = Sets.newHashSet();
1125                 hashes.put(row, col, set);
1126             }
1127             set.add(Hash.murmur3(hashedStrings));
1128         }
1129 
1130         private static Table<String, String, Integer> countHashes(
1131                 final Table<String, String, Set<Hash>> hashes) {
1132             final Table<String, String, Integer> counts = HashBasedTable.create();
1133             for (final Cell<String, String, Set<Hash>> cell : hashes.cellSet()) {
1134                 counts.put(cell.getRowKey(), cell.getColumnKey(), cell.getValue().size());
1135             }
1136             return counts;
1137         }
1138 
1139     }
1140 
1141 }