class WordDictionary extends AbstractDictionary
| Modifier and Type | Field and Description |
|---|---|
private char[] |
charIndexTable |
static int |
PRIME_INDEX_LENGTH
Large prime number for hash function
|
private static WordDictionary |
singleInstance |
private short[] |
wordIndexTable
wordIndexTable guarantees to hash all Chinese characters in Unicode into
PRIME_INDEX_LENGTH array.
|
private char[][][] |
wordItem_charArrayTable
To avoid taking too much space, the data structure needed to store the
lexicon requires two multidimensional arrays to store word and frequency.
|
private int[][] |
wordItem_frequencyTable |
CHAR_NUM_IN_FILE, GB2312_CHAR_NUM, GB2312_FIRST_CHAR| Modifier | Constructor and Description |
|---|---|
private |
WordDictionary() |
| Modifier and Type | Method and Description |
|---|---|
private void |
expandDelimiterData()
The original lexicon puts all information with punctuation into a
chart (from 1 to 3755).
|
private int |
findInTable(short knownHashIndex,
char[] charArray)
Look up the text string corresponding with the word char array,
and return the position of the word list.
|
private short |
getAvaliableTableIndex(char c) |
int |
getFrequency(char[] charArray)
Get the frequency of a word from the dictionary
|
static WordDictionary |
getInstance()
Get the singleton dictionary instance.
|
int |
getPrefixMatch(char[] charArray)
Find the first word in the dictionary that starts with the supplied prefix
|
int |
getPrefixMatch(char[] charArray,
int knownStart)
Find the nth word in the dictionary that starts with the supplied prefix
|
private short |
getWordItemTableIndex(char c) |
boolean |
isEqual(char[] charArray,
int itemIndex)
Return true if the dictionary entry at itemIndex for table charArray[0] is charArray
|
void |
load()
Load coredict.mem internally from the jar file.
|
void |
load(java.lang.String dctFileRoot)
Attempt to load dictionary from provided directory, first trying coredict.mem, failing back on coredict.dct
|
private boolean |
loadFromObj(java.nio.file.Path serialObj) |
private void |
loadFromObjectInputStream(java.io.InputStream serialObjectInputStream) |
private int |
loadMainDataFromFile(java.lang.String dctFilePath)
Load the datafile into this WordDictionary
|
private void |
mergeSameWords() |
private void |
saveToObj(java.nio.file.Path serialObj) |
private boolean |
setTableIndex(char c,
int j) |
private void |
sortEachItems() |
getCCByGB2312Id, getGB2312Id, hash1, hash1, hash2, hash2private static WordDictionary singleInstance
public static final int PRIME_INDEX_LENGTH
private short[] wordIndexTable
private char[] charIndexTable
private char[][][] wordItem_charArrayTable
private int[][] wordItem_frequencyTable
public static WordDictionary getInstance()
public void load(java.lang.String dctFileRoot)
dctFileRoot - path to dictionary directorypublic void load()
throws java.io.IOException,
java.lang.ClassNotFoundException
java.io.IOException - If there is a low-level I/O error.java.lang.ClassNotFoundExceptionprivate boolean loadFromObj(java.nio.file.Path serialObj)
private void loadFromObjectInputStream(java.io.InputStream serialObjectInputStream)
throws java.io.IOException,
java.lang.ClassNotFoundException
java.io.IOExceptionjava.lang.ClassNotFoundExceptionprivate void saveToObj(java.nio.file.Path serialObj)
private int loadMainDataFromFile(java.lang.String dctFilePath)
throws java.io.IOException
dctFilePath - path to word dictionary (coredict.dct)java.io.IOException - If there is a low-level I/O error.private void expandDelimiterData()
private void mergeSameWords()
private void sortEachItems()
private boolean setTableIndex(char c,
int j)
private short getAvaliableTableIndex(char c)
private short getWordItemTableIndex(char c)
private int findInTable(short knownHashIndex,
char[] charArray)
knownHashIndex - already figure out position of the first word
symbol charArray[0] in hash table. If not calculated yet, can be
replaced with function int findInTable(char[] charArray).charArray - look up the char array corresponding with the word.public int getPrefixMatch(char[] charArray)
charArray - input prefixgetPrefixMatch(char[], int)public int getPrefixMatch(char[] charArray,
int knownStart)
charArray - input prefixknownStart - relative position in the dictionary to startgetPrefixMatch(char[])public int getFrequency(char[] charArray)
charArray - input wordpublic boolean isEqual(char[] charArray,
int itemIndex)
charArray - input worditemIndex - item index for table charArray[0]