00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00044
#include "Dictionary.h"
00045
00046
namespace PLearn {
00047
using namespace std;
00048
00049 Dictionary::Dictionary()
00050 :
00051 dict_type(-1),
00052 update_mode(0),
00053 stem_mode(0),
00054 file_name_dict("")
00055 {
00056
00057
00058 }
00059
00060 Dictionary::Dictionary(
string file_name,
bool up_mode)
00061 {
00062
setStemMode(
NO_STEM);
00063
setUpdateMode(up_mode);
00064
setDictionaryType(
FILE_DICTIONARY);
00065
file_name_dict=file_name;
00066 }
00067
00068 Dictionary::Dictionary(
TVec<string> symbols,
bool up_mode)
00069 {
00070
setStemMode(
NO_STEM);
00071
setUpdateMode(up_mode);
00072
setDictionaryType(
VECTOR_DICTIONARY);
00073
vector_dict=symbols;
00074 }
00075
00076 Dictionary::Dictionary(
WordNetOntology *ont,
int ontology_type,
bool up_mode,
bool stem)
00077 {
00078
setStemMode(stem);
00079
setUpdateMode(up_mode);
00080
setDictionaryType(ontology_type);
00081
wno=ont;
00082 }
00083
00084
00085
00086
PLEARN_IMPLEMENT_OBJECT(
Dictionary,
00087
"Mapping string->int and int->string",
00088
"MULTI LINE\nHELP"
00089 );
00090
00091 void Dictionary::declareOptions(
OptionList& ol)
00092 {
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
declareOption(ol,
"dict_type", &Dictionary::dict_type, OptionBase::buildoption,
"type of the dictionary");
00107
declareOption(ol,
"file_name_dict", &Dictionary::file_name_dict, OptionBase::buildoption,
"file name for the dictionary");
00108
declareOption(ol,
"vector_dict", &Dictionary::vector_dict, OptionBase::buildoption,
"vector for the dictionary");
00109
declareOption(ol,
"update_mode", &Dictionary::update_mode, OptionBase::buildoption,
"update_mode : 0(no_update)/1(update)");
00110
00111
00112 inherited::declareOptions(ol);
00113 }
00114
00115 void Dictionary::build_()
00116 {
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
int saved_up_mode=
update_mode;
00127
00128 update_mode =
UPDATE;
00129
string line;
00130
00131
00132
if(
dict_type ==
FILE_DICTIONARY){
00133 ifstream ifs(
file_name_dict.c_str());
00134
if (!ifs)
PLERROR(
"Cannot open file %s",
file_name_dict.c_str());
00135
while(!ifs.eof()){
00136 getline(ifs, line,
'\n');
00137
if(line ==
"")
continue;
00138
getId(line);
00139 }
00140 ifs.close();
00141 }
else if(
dict_type ==
VECTOR_DICTIONARY){
00142
for(
int i=0; i<
vector_dict.
size(); i++){
00143
getId(
vector_dict[i]);
00144 }
00145 }
else if(
dict_type ==
WORDNET_WORD_DICTIONARY){
00146
00147
if (update_mode==
NO_UPDATE){
00148
if (!
wno->
containsWord(
OOV_TAG)){
00149
wno->
extractWord(
OOV_TAG,
ALL_WN_TYPE,
true,
true,
false);
00150 }
00151 }
00152 }
else{
00153
PLERROR(
"Bad dictionary type %d",
dict_type);
00154 }
00155
00156
00157 update_mode=saved_up_mode;
00158
if(update_mode==
NO_UPDATE){
00159
00160
getId(
OOV_TAG);
00161 }
00162
00163
00164 }
00165
00166
00167 void Dictionary::build()
00168 {
00169 inherited::build();
00170
build_();
00171 }
00172
00173 int Dictionary::size()
00174 {
00175
if(
dict_type ==
VECTOR_DICTIONARY ||
dict_type ==
FILE_DICTIONARY)
00176 {
00177
return int_to_string.size();
00178 }
00179
00180
if(
dict_type ==
WORDNET_WORD_DICTIONARY)
00181 {
00182
return wno->
getVocSize();
00183 }
00184
00185
PLERROR(
"Dictionary is of incorrect type %d",
dict_type);
00186
return -1;
00187 }
00188
00189 void Dictionary::setUpdateMode(
bool up_mode)
00190 {
00191
update_mode =up_mode;
00192 }
00193
00194 void Dictionary::setStemMode(
bool stem)
00195 {
00196
stem_mode =stem;
00197 }
00198
00199 void Dictionary::setDictionaryType(
int type)
00200 {
00201
dict_type=type;
00202 }
00203
00204
00205
00206 int Dictionary::getId(
string symbol)
00207 {
00208
00209
00210
00211
00212
00213
if(
update_mode==
UPDATE){
00214
if(
dict_type ==
VECTOR_DICTIONARY ||
dict_type ==
FILE_DICTIONARY)
00215 {
00216
if(
string_to_int.find(symbol) ==
string_to_int.end()){
00217
00218
int index=
string_to_int.size();
00219
string_to_int[symbol] = index;
00220
int_to_string[index] = symbol;
00221 cout <<
"add "<< symbol <<
endl;
00222 }
00223
00224
return string_to_int[symbol];
00225 }
00226
00227
if(
dict_type ==
WORDNET_WORD_DICTIONARY){
00228
if(!
wno->
containsWord(symbol)){
00229
wno->
extractWord(symbol,
ALL_WN_TYPE,
true,
true,
false);
00230 }
00231
return wno->
getWordId(symbol);
00232 }
00233
if(
dict_type ==
WORDNET_SENSE_DICTIONARY){
00234
vector<string> tokens =
split(symbol,
"/");
00235
if(tokens.size()!=2)
PLERROR(
"Badly formed word for sense extraction %s",symbol.c_str());
00236
if(!
wno->
containsWord(tokens[0])){
00237
wno->
extractWord(symbol,
ALL_WN_TYPE,
true,
true,
false);
00238 }
00239
return wno->
getSynsetIDForSenseKey(
wno->
getWordId(tokens[0]),tokens[1]);
00240 }
00241
PLERROR(
" Dictionary::getId : bad dictionary type %d",
dict_type);
00242 }
else{
00243
if(
dict_type ==
VECTOR_DICTIONARY ||
dict_type ==
FILE_DICTIONARY){
00244
if(
string_to_int.find(symbol) ==
string_to_int.end()){
00245
00246
return string_to_int[
OOV_TAG];
00247 }
else{
00248
return string_to_int[symbol];
00249 }
00250 }
00251
if(
dict_type ==
WORDNET_WORD_DICTIONARY){
00252
return wno->
getWordId(symbol);
00253 }
00254
if(
dict_type ==
WORDNET_SENSE_DICTIONARY){
00255
vector<string> tokens =
split(symbol,
"/");
00256
if(tokens.size()!=2)
PLERROR(
"Badly formed word for sense extraction %s",symbol.c_str());
00257
return wno->
getSynsetIDForSenseKey(
wno->
getWordId(tokens[0]),tokens[1]);
00258 }
00259
PLERROR(
" Dictionary::getId : bad dictionary type %d",
dict_type);
00260 }
00261
return 1;
00262 }
00263
00264 string Dictionary::getSymbol(
int id)
00265 {
00266
if(
dict_type ==
VECTOR_DICTIONARY ||
dict_type ==
FILE_DICTIONARY)
00267 {
00268
if(
id >= 0 &&
id < (
int)
int_to_string.size())
00269
return int_to_string[
id];
00270
else
00271
PLERROR(
"Entry id is doesn't satisfy 0 <= %d < %d",
id,
int_to_string.size());
00272 }
00273
00274
if(
dict_type ==
WORDNET_WORD_DICTIONARY)
00275 {
00276
return wno->
getWord(
id);
00277 }
00278
00279
PLERROR(
"Dictionary is of incorrect type %d",
dict_type);
00280
return "";
00281 }
00282
00283
00284
00285 void Dictionary::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies)
00286 {
00287 inherited::makeDeepCopyFromShallowCopy(copies);
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297 }
00298
00299 }