1 package eu.fbk.dkm.premon.util;
2
3 import com.google.common.base.Charsets;
4 import com.google.common.base.Objects;
5 import com.google.common.base.Preconditions;
6 import com.google.common.collect.*;
7 import com.google.common.io.Resources;
8 import eu.fbk.rdfpro.util.Environment;
9 import net.didion.jwnl.JWNL;
10 import net.didion.jwnl.JWNLException;
11 import net.didion.jwnl.data.*;
12 import net.didion.jwnl.dictionary.Dictionary;
13
14 import javax.annotation.Nullable;
15 import java.io.ByteArrayInputStream;
16 import java.io.InputStream;
17 import java.util.*;
18
19 public final class WordNet {
20
21 public static final String POS_NOUN = "n";
22
23 public static final String POS_VERB = "v";
24
25 public static final String POS_ADJECTIVE = "a";
26
27 public static final String POS_ADVERB = "r";
28
29 private static final Map<String, String> BBN_TO_SYNSET;
30
31 private static final Map<String, List<String>> SYNSET_TO_BBN;
32
33 private static final Map<String, String> BBN_TO_SST;
34
35 private static final Map<String, String> SYNSET_TO_SST;
36
37
38 private static String dictionaryPath = Objects.firstNonNull(
39 Environment.getProperty("wordnet.home"), "wordnet");
40
41 private static Dictionary dictionary;
42
43 static {
44
45 final Map<String, String> bbnToSynset = Maps.newLinkedHashMap();
46 bbnToSynset.put("person", "00007846-n");
47 bbnToSynset.put("organization", "08008335-n");
48 bbnToSynset.put("gpe", "00027167-n");
49 bbnToSynset.put("location", "00027167-n");
50 bbnToSynset.put("event", "00029378-n");
51 bbnToSynset.put("product", "04007894-n");
52 bbnToSynset.put("fac", "03315023-n");
53 bbnToSynset.put("work_of_art", "02743547-n");
54 bbnToSynset.put("law", "06532330-n");
55 bbnToSynset.put("language", "06282651-n");
56 bbnToSynset.put("quantity", "00033615-n");
57 bbnToSynset.put("date", "15113229-n");
58 bbnToSynset.put("time", "15113229-n");
59 bbnToSynset.put("percent", "13817526-n");
60 bbnToSynset.put("money", "13384557-n");
61 bbnToSynset.put("ordinal", "14429985-n");
62 bbnToSynset.put("cardinal", "13582013-n");
63 BBN_TO_SYNSET = ImmutableMap.copyOf(bbnToSynset);
64
65 final Map<String, List<String>> synsetToBbn = Maps.newHashMap();
66 for (final Map.Entry<String, String> entry : bbnToSynset.entrySet()) {
67 final String bbn = entry.getKey();
68 final String synset = entry.getValue();
69 final List<String> list = synsetToBbn.get(synset);
70 if (list == null) {
71 synsetToBbn.put(synset, ImmutableList.of(bbn));
72 } else {
73 synsetToBbn.put(
74 synset,
75 Ordering.natural().immutableSortedCopy(
76 Iterables.concat(list, ImmutableList.of(bbn))));
77 }
78 }
79 SYNSET_TO_BBN = ImmutableMap.copyOf(synsetToBbn);
80
81 final Map<String, String> bbnToSst = Maps.newLinkedHashMap();
82 bbnToSst.put("person", "B-noun.person");
83 bbnToSst.put("organization", "B-noun.group");
84 bbnToSst.put("gpe", "B-noun.location");
85 bbnToSst.put("location", "B-noun.location");
86 bbnToSst.put("event", "B-noun.event");
87 bbnToSst.put("product", "B-noun.artifact");
88 bbnToSst.put("fac", "B-noun.artifact");
89 bbnToSst.put("work_of_art", "B-noun.artifact");
90 bbnToSst.put("law", "B-noun.communication");
91 bbnToSst.put("language", "B-noun.communication");
92 bbnToSst.put("quantity", "B-noun.quantity");
93 bbnToSst.put("date", "B-noun.time");
94 bbnToSst.put("time", "B-noun.time");
95 bbnToSst.put("percent", "B-noun.relation");
96 bbnToSst.put("money", "B-noun.possession");
97 bbnToSst.put("ordinal", "B-noun.state");
98 bbnToSst.put("cardinal", "B-noun.quantity");
99 BBN_TO_SST = ImmutableMap.copyOf(bbnToSst);
100
101 final Map<String, String> synsetToSst = Maps.newHashMap();
102 synsetToSst.put("00007846-n", "B-noun.person");
103 synsetToSst.put("00027167-n", "B-noun.location");
104 synsetToSst.put("00033615-n", "B-noun.quantity");
105 SYNSET_TO_SST = ImmutableMap.copyOf(synsetToSst);
106 }
107
108 public static Dictionary getDictionary() {
109 synchronized (WordNet.class) {
110 if (dictionary == null) {
111 JWNL.shutdown();
112 try {
113 final String properties = Resources.toString(
114 WordNet.class.getClassLoader().getResource("jwnl.xml"), Charsets.UTF_8).replace(
115 "DICTIONARY_PATH_PLACEHOLDER", dictionaryPath);
116 final InputStream stream = new ByteArrayInputStream(
117 properties.getBytes(Charsets.UTF_8));
118 JWNL.initialize(stream);
119 dictionary = Dictionary.getInstance();
120 } catch (final Throwable ex) {
121 JWNL.shutdown();
122 throw new Error("Cannot initialize JWNL using dictionary path '"
123 + dictionaryPath + "'", ex);
124 }
125 }
126 return dictionary;
127 }
128 }
129
130 private static void releaseDictionary() {
131 synchronized (WordNet.class) {
132 dictionary = null;
133 JWNL.shutdown();
134 }
135 }
136
137 private static Synset getSynset(final String id) {
138 final POS pos = POS.getPOSForKey(getPOS(id));
139 final long offset = getOffset(id);
140 try {
141 synchronized (WordNet.class) {
142 return getDictionary().getSynsetAt(pos, offset);
143 }
144 } catch (final JWNLException ex) {
145 throw new Error(ex);
146 }
147 }
148
149
150
151
152 public static void init() {
153 getDictionary();
154 }
155
156 public static List<String> getSynsetsForLemma(String lemma, String pos) {
157 try {
158 synchronized (WordNet.class) {
159 IndexWord indexWord = getDictionary().lookupIndexWord(POS.getPOSForKey(pos), lemma);
160 if (indexWord == null) {
161 return new ArrayList<>();
162 }
163 Synset[] synsets = indexWord.getSenses();
164 ArrayList<String> ret = new ArrayList<>();
165 for (int i = 0; i < synsets.length; i++) {
166 Synset synset = synsets[i];
167 ret.add(getSynsetID(synset.getOffset(), synset.getPOS().getKey()));
168 }
169
170 return ret;
171 }
172 } catch (final JWNLException ex) {
173 throw new Error(ex);
174 }
175 }
176
177 public static String getPath() {
178 synchronized (WordNet.class) {
179 return dictionaryPath;
180 }
181 }
182
183 public static void setPath(final String dictionaryPath) {
184 Preconditions.checkNotNull(dictionaryPath);
185 synchronized (WordNet.class) {
186 if (!WordNet.dictionaryPath.equals(dictionaryPath)) {
187 releaseDictionary();
188 WordNet.dictionaryPath = dictionaryPath;
189 }
190 }
191 }
192
193
194
195 public static String getSynsetID(final long offset, final String pos) {
196 return String.format("%08d-%s", offset, pos);
197 }
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212 @Nullable
213 public static String getSynsetID(@Nullable final String readableSynsetID) {
214 if (readableSynsetID == null) {
215 return null;
216 }
217 try {
218 final int length = readableSynsetID.length();
219 final int offset = readableSynsetID.lastIndexOf('-');
220 final String lemma = readableSynsetID.substring(0, offset);
221 final int index = Integer.parseInt(readableSynsetID.substring(offset + 1, length - 1)) - 1;
222 final POS pos = POS.getPOSForKey(readableSynsetID.substring(length - 1, length));
223 final IndexWord word;
224 synchronized (WordNet.class) {
225 word = getDictionary().getIndexWord(pos, lemma);
226 }
227 final Synset synset = word.getSenses()[index];
228 return getSynsetID(synset.getOffset(), pos.getKey());
229 } catch (final JWNLException ex) {
230 throw new Error(ex);
231 } catch (final Throwable ex) {
232 throw new IllegalArgumentException("Illegal (readable) synset ID " + readableSynsetID,
233 ex);
234 }
235 }
236
237 @Nullable
238 public static String getReadableSynsetID(@Nullable final String synsetID) {
239 if (synsetID == null) {
240 return null;
241 }
242 final Synset synset = getSynset(synsetID);
243 if (synset == null) {
244 throw new IllegalArgumentException("Illegal synset ID " + synsetID);
245 }
246 final String lemma = synset.getWords()[0].getLemma();
247 final POS pos = POS.getPOSForKey(getPOS(synsetID));
248 try {
249 final IndexWord word;
250 synchronized (WordNet.class) {
251 word = getDictionary().lookupIndexWord(pos, lemma);
252 }
253 final Synset[] senses = word.getSenses();
254 for (int i = 0; i < senses.length; ++i) {
255 if (senses[i].equals(synset)) {
256 return lemma + "-" + (i + 1) + pos.getKey();
257 }
258 }
259 throw new Error("Could not determine sense index for lemma " + lemma + " and synset "
260 + synsetID);
261 } catch (final JWNLException ex) {
262 throw new Error(ex);
263 }
264
265 }
266
267 public static String getPOS(final String synsetID) {
268 Preconditions.checkNotNull(synsetID);
269 final int index = synsetID.lastIndexOf('-');
270 if (index == synsetID.length() - 1 || synsetID.isEmpty()) {
271 throw new IllegalArgumentException("Cannot extract POS from '" + synsetID
272 + "' - invalid string");
273 }
274 return ""
275 + Character.toLowerCase(index < 0 ? synsetID.charAt(0) : synsetID
276 .charAt(index + 1));
277 }
278
279 public static long getOffset(String synsetID) {
280 Preconditions.checkNotNull(synsetID);
281 try {
282 final int index = synsetID.lastIndexOf('-');
283 if (index > 0) {
284 synsetID = synsetID.substring(0, index);
285 }
286 return Long.parseLong(synsetID);
287 } catch (final Throwable ex) {
288 throw new IllegalArgumentException("Cannot extract offset from '" + synsetID + "'", ex);
289 }
290 }
291
292 public static Set<String> getLemmas(final String synsetID) {
293 final Set<String> lemmas = Sets.newLinkedHashSet();
294 final Synset synset = getSynset(synsetID);
295 if (synset != null) {
296 for (final Word word : synset.getWords()) {
297 lemmas.add(word.getLemma());
298 }
299 }
300 return lemmas;
301 }
302
303 public static Set<String> getGenericSet(final String synsetID,
304 final PointerType... pointerTypes) {
305 final Set<String> ret = Sets.newHashSet();
306 final Synset synset = getSynset(synsetID);
307 if (synset != null) {
308 for (final PointerType pointerType : pointerTypes) {
309 for (final Pointer pointer : synset.getPointers(pointerType)) {
310 try {
311 final Synset target = pointer.getTargetSynset();
312 ret.add(getSynsetID(target.getOffset(), target.getPOS().getKey()));
313 } catch (final Throwable ex) {
314 throw new RuntimeException(ex);
315 }
316 }
317 }
318 }
319 return ret;
320 }
321
322 public static Set<String> getGenericSet(final String synsetID, final boolean recursive,
323 final PointerType... pointerTypes) {
324 if (!recursive) {
325 return getGenericSet(synsetID, pointerTypes);
326 }
327 final Set<String> result = Sets.newHashSet();
328 final List<String> queue = Lists.newArrayList(synsetID);
329 while (!queue.isEmpty()) {
330 final String id = queue.remove(0);
331 if (result.add(id)) {
332 queue.addAll(getGenericSet(id, pointerTypes));
333 }
334 }
335 return result;
336 }
337
338 public static Set<String> getHypernyms(final String synsetID) {
339 return getGenericSet(synsetID, PointerType.HYPERNYM);
340 }
341
342 public static Set<String> getHyponyms(final String synsetID) {
343 return getGenericSet(synsetID, PointerType.HYPONYM);
344 }
345
346 public static Set<String> getHypernyms(final String synsetID, final boolean recursive) {
347 return getGenericSet(synsetID, recursive, PointerType.HYPERNYM,
348 PointerType.INSTANCE_HYPERNYM);
349 }
350
351 public static Set<String> getHyponims(final String synsetID, final boolean recursive) {
352 return getGenericSet(synsetID, recursive, PointerType.HYPONYM,
353 PointerType.INSTANCES_HYPONYM);
354 }
355
356
357 @Nullable
358 public static String mapBBNToSynset(@Nullable final String bbn) {
359 return bbn == null ? null : BBN_TO_SYNSET.get(bbn.trim().toLowerCase());
360 }
361
362
363 @Nullable
364 public static String mapSynsetToBBN(@Nullable final String synsetID) {
365 final List<String> ids = Lists.newLinkedList();
366 ids.add(synsetID);
367 while (!ids.isEmpty()) {
368 final String id = ids.remove(0);
369 final List<String> bbns = SYNSET_TO_BBN.get(id);
370 if (bbns != null && !bbns.isEmpty()) {
371 return bbns.get(0);
372 }
373 try {
374 final Synset source = getSynset(id);
375 final List<String> hypernymIDs = Lists.newArrayList();
376 for (final PointerType type : new PointerType[] { PointerType.HYPERNYM,
377 PointerType.INSTANCE_HYPERNYM }) {
378 for (final Pointer pointer : source.getPointers(type)) {
379 final Synset target = pointer.getTargetSynset();
380 hypernymIDs.add(getSynsetID(target.getOffset(), target.getPOS().getKey()));
381 }
382 }
383 Collections.sort(hypernymIDs);
384 ids.addAll(hypernymIDs);
385 } catch (final JWNLException ex) {
386 throw new Error("Unexpected exception (!)", ex);
387 }
388 }
389 return null;
390 }
391
392 @Nullable
393 public static String mapSynsetToSST(@Nullable final String synsetID) {
394 if (synsetID != null) {
395 final String sst = SYNSET_TO_SST.get(synsetID);
396 if (sst != null) {
397 return sst;
398 }
399 return "B-" + getSynset(synsetID).getLexFileName();
400 }
401 return null;
402 }
403
404
405 @Nullable
406 public static String mapBBNToSST(@Nullable final String bbn) {
407 if (bbn != null) {
408 return BBN_TO_SST.get(bbn.trim().toLowerCase());
409 }
410 return null;
411 }
412 }