Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

Dictionary.cc

Go to the documentation of this file.
00001 // -*- C++ -*- 00002 00003 // Dictionary.cc 00004 // 00005 // Copyright (C) 2004 Hugo Larochelle, Christopher Kermorvant 00006 // 00007 // Redistribution and use in source and binary forms, with or without 00008 // modification, are permitted provided that the following conditions are met: 00009 // 00010 // 1. Redistributions of source code must retain the above copyright 00011 // notice, this list of conditions and the following disclaimer. 00012 // 00013 // 2. Redistributions in binary form must reproduce the above copyright 00014 // notice, this list of conditions and the following disclaimer in the 00015 // documentation and/or other materials provided with the distribution. 00016 // 00017 // 3. The name of the authors may not be used to endorse or promote 00018 // products derived from this software without specific prior written 00019 // permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00024 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00025 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00026 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 // 00032 // This file is part of the PLearn library. For more information on the PLearn 00033 // library, go to the PLearn Web site at www.plearn.org 00034 00035 /* ******************************************************* 00036 * $Id: Dictionary.cc,v 1.2 2004/08/13 15:16:34 kermorvc Exp $ 00037 ******************************************************* */ 00038 00039 // Authors: Hugo Larochelle, Christopher Kermorvant 00040 00044 #include "Dictionary.h" 00045 00046 namespace PLearn { 00047 using namespace std; 00048 00049 Dictionary::Dictionary() 00050 : 00051 dict_type(-1), 00052 update_mode(0), 00053 stem_mode(0), 00054 file_name_dict("") 00055 { 00056 // ### You may or may not want to call build_() to finish building the object 00057 // build_(); 00058 } 00059 00060 Dictionary::Dictionary(string file_name, bool up_mode) 00061 { 00062 setStemMode(NO_STEM); 00063 setUpdateMode(up_mode); 00064 setDictionaryType(FILE_DICTIONARY); 00065 file_name_dict=file_name; 00066 } 00067 00068 Dictionary::Dictionary(TVec<string> symbols, bool up_mode) 00069 { 00070 setStemMode(NO_STEM); 00071 setUpdateMode(up_mode); 00072 setDictionaryType(VECTOR_DICTIONARY); 00073 vector_dict=symbols; 00074 } 00075 00076 Dictionary::Dictionary(WordNetOntology *ont,int ontology_type,bool up_mode, bool stem) 00077 { 00078 setStemMode(stem); 00079 setUpdateMode(up_mode); 00080 setDictionaryType(ontology_type); 00081 wno=ont; 00082 } 00083 00084 00085 00086 PLEARN_IMPLEMENT_OBJECT(Dictionary, 00087 "Mapping string->int and int->string", 00088 "MULTI LINE\nHELP" 00089 ); 00090 00091 void Dictionary::declareOptions(OptionList& ol) 00092 { 00093 // ### Declare all of this object's options here 00094 // ### For the "flags" of each option, you should typically specify 00095 // ### one of OptionBase::buildoption, OptionBase::learntoption or 00096 // ### OptionBase::tuningoption. Another possible flag to be combined with 00097 // ### is OptionBase::nosave 00098 00099 // ### ex: 00100 // declareOption(ol, "myoption", &Dictionary::myoption, OptionBase::buildoption, 00101 // "Help text describing this option"); 00102 // ... 00103 00104 // Now call the parent class' declareOptions 00105 00106 declareOption(ol, "dict_type", &Dictionary::dict_type, OptionBase::buildoption, "type of the dictionary"); 00107 declareOption(ol, "file_name_dict", &Dictionary::file_name_dict, OptionBase::buildoption, "file name for the dictionary"); 00108 declareOption(ol, "vector_dict", &Dictionary::vector_dict, OptionBase::buildoption, "vector for the dictionary"); 00109 declareOption(ol, "update_mode", &Dictionary::update_mode, OptionBase::buildoption, "update_mode : 0(no_update)/1(update)"); 00110 00111 00112 inherited::declareOptions(ol); 00113 } 00114 00115 void Dictionary::build_() 00116 { 00117 // ### This method should do the real building of the object, 00118 // ### according to set 'options', in *any* situation. 00119 // ### Typical situations include: 00120 // ### - Initial building of an object from a few user-specified options 00121 // ### - Building of a "reloaded" object: i.e. from the complete set of all serialised options. 00122 // ### - Updating or "re-building" of an object after a few "tuning" options have been modified. 00123 // ### You should assume that the parent class' build_() has already been called. 00124 00125 // save update mode for later 00126 int saved_up_mode=update_mode; 00127 // set the dictionary in update mode to insert the words 00128 update_mode = UPDATE; 00129 string line; 00130 00131 00132 if(dict_type == FILE_DICTIONARY){ 00133 ifstream ifs(file_name_dict.c_str()); 00134 if (!ifs) PLERROR("Cannot open file %s", file_name_dict.c_str()); 00135 while(!ifs.eof()){ 00136 getline(ifs, line, '\n'); 00137 if(line == "") continue; 00138 getId(line); 00139 } 00140 ifs.close(); 00141 }else if(dict_type == VECTOR_DICTIONARY){ 00142 for(int i=0; i<vector_dict.size(); i++){ 00143 getId(vector_dict[i]); 00144 } 00145 }else if(dict_type == WORDNET_WORD_DICTIONARY){ 00146 // Add OOV if necessary 00147 if (update_mode==NO_UPDATE){ 00148 if (!wno->containsWord(OOV_TAG)){ 00149 wno->extractWord(OOV_TAG, ALL_WN_TYPE, true, true, false); 00150 } 00151 } 00152 }else{ 00153 PLERROR("Bad dictionary type %d",dict_type); 00154 } 00155 00156 // restore update mode; 00157 update_mode=saved_up_mode; 00158 if(update_mode==NO_UPDATE){ 00159 // the dictionary must contain oov 00160 getId(OOV_TAG); 00161 } 00162 00163 00164 } 00165 00166 // ### Nothing to add here, simply calls build_ 00167 void Dictionary::build() 00168 { 00169 inherited::build(); 00170 build_(); 00171 } 00172 00173 int Dictionary::size() 00174 { 00175 if(dict_type == VECTOR_DICTIONARY || dict_type == FILE_DICTIONARY) 00176 { 00177 return int_to_string.size(); 00178 } 00179 00180 if(dict_type == WORDNET_WORD_DICTIONARY) 00181 { 00182 return wno->getVocSize(); 00183 } 00184 00185 PLERROR("Dictionary is of incorrect type %d", dict_type); 00186 return -1; 00187 } 00188 00189 void Dictionary::setUpdateMode(bool up_mode) 00190 { 00191 update_mode =up_mode; 00192 } 00193 00194 void Dictionary::setStemMode(bool stem) 00195 { 00196 stem_mode =stem; 00197 } 00198 00199 void Dictionary::setDictionaryType(int type) 00200 { 00201 dict_type=type; 00202 } 00203 00204 00205 00206 int Dictionary::getId(string symbol) 00207 { 00208 // Gives the id of a symbol in the dictionary 00209 // If the symbol is not in the dictionary, 00210 // returns index of OOV_TAG if update_mode = NO_UPDATE 00211 // insert the new word otherwise and return its index 00212 00213 if(update_mode== UPDATE){ 00214 if(dict_type == VECTOR_DICTIONARY || dict_type == FILE_DICTIONARY) 00215 { 00216 if(string_to_int.find(symbol) == string_to_int.end()){ 00217 // word not found, add it 00218 int index=string_to_int.size(); 00219 string_to_int[symbol] = index; 00220 int_to_string[index] = symbol; 00221 cout << "add "<< symbol <<endl; 00222 } 00223 00224 return string_to_int[symbol]; 00225 } 00226 00227 if(dict_type == WORDNET_WORD_DICTIONARY){ 00228 if(!wno->containsWord(symbol)){ 00229 wno->extractWord(symbol, ALL_WN_TYPE, true, true, false); 00230 } 00231 return wno->getWordId(symbol); 00232 } 00233 if(dict_type == WORDNET_SENSE_DICTIONARY){ 00234 vector<string> tokens = split(symbol, "/"); 00235 if(tokens.size()!=2)PLERROR("Badly formed word for sense extraction %s",symbol.c_str()); 00236 if(!wno->containsWord(tokens[0])){ 00237 wno->extractWord(symbol, ALL_WN_TYPE, true, true, false); 00238 } 00239 return wno->getSynsetIDForSenseKey( wno->getWordId(tokens[0]),tokens[1]); 00240 } 00241 PLERROR(" Dictionary::getId : bad dictionary type %d",dict_type); 00242 }else{ 00243 if(dict_type == VECTOR_DICTIONARY || dict_type == FILE_DICTIONARY){ 00244 if(string_to_int.find(symbol) == string_to_int.end()){ 00245 // word not found, return oov 00246 return string_to_int[OOV_TAG]; 00247 }else{ 00248 return string_to_int[symbol]; 00249 } 00250 } 00251 if(dict_type == WORDNET_WORD_DICTIONARY){ 00252 return wno->getWordId(symbol); 00253 } 00254 if(dict_type == WORDNET_SENSE_DICTIONARY){ 00255 vector<string> tokens = split(symbol, "/"); 00256 if(tokens.size()!=2)PLERROR("Badly formed word for sense extraction %s",symbol.c_str()); 00257 return wno->getSynsetIDForSenseKey( wno->getWordId(tokens[0]),tokens[1]); 00258 } 00259 PLERROR(" Dictionary::getId : bad dictionary type %d",dict_type); 00260 } 00261 return 1; 00262 } 00263 00264 string Dictionary::getSymbol(int id) 00265 { 00266 if(dict_type == VECTOR_DICTIONARY || dict_type == FILE_DICTIONARY) 00267 { 00268 if(id >= 0 && id < (int)int_to_string.size()) 00269 return int_to_string[id]; 00270 else 00271 PLERROR("Entry id is doesn't satisfy 0 <= %d < %d", id, int_to_string.size()); 00272 } 00273 00274 if(dict_type == WORDNET_WORD_DICTIONARY) 00275 { 00276 return wno->getWord(id); 00277 } 00278 00279 PLERROR("Dictionary is of incorrect type %d", dict_type); 00280 return ""; 00281 } 00282 00283 00284 00285 void Dictionary::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies) 00286 { 00287 inherited::makeDeepCopyFromShallowCopy(copies); 00288 00289 // ### Call deepCopyField on all "pointer-like" fields 00290 // ### that you wish to be deepCopied rather than 00291 // ### shallow-copied. 00292 // ### ex: 00293 // deepCopyField(trainvec, copies); 00294 00295 // ### Remove this line when you have fully implemented this method. 00296 //PLERROR("Dictionary::makeDeepCopyFromShallowCopy not fully (correctly) implemented yet!"); 00297 } 00298 00299 } // end of namespace PLearn

Generated on Tue Aug 17 15:51:19 2004 for PLearn by doxygen 1.3.7