00001 // -*- C++ -*- 00002 00003 // Dictionary.h 00004 // 00005 // Copyright (C) 2004 Hugo Larochelle Christopher Kermorvant 00006 // 00007 // Redistribution and use in source and binary forms, with or without 00008 // modification, are permitted provided that the following conditions are met: 00009 // 00010 // 1. Redistributions of source code must retain the above copyright 00011 // notice, this list of conditions and the following disclaimer. 00012 // 00013 // 2. Redistributions in binary form must reproduce the above copyright 00014 // notice, this list of conditions and the following disclaimer in the 00015 // documentation and/or other materials provided with the distribution. 00016 // 00017 // 3. The name of the authors may not be used to endorse or promote 00018 // products derived from this software without specific prior written 00019 // permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00024 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00025 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00026 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 // 00032 // This file is part of the PLearn library. For more information on the PLearn 00033 // library, go to the PLearn Web site at www.plearn.org 00034 00035 /* ******************************************************* 00036 * $Id: Dictionary.h,v 1.2 2004/08/13 15:16:34 kermorvc Exp $ 00037 ******************************************************* */ 00038 00039 // Authors: Hugo Larochelle, Christopher Kermorvant 00040 00044 #ifndef Dictionary_INC 00045 #define Dictionary_INC 00046 #include <plearn/base/stringutils.h> 00047 #include <plearn/base/Object.h> 00048 #include <map> 00049 #include <string> 00050 #include <plearn_learners/language/WordNet/WordNetOntology.h> 00051 00052 #define VECTOR_DICTIONARY 1 00053 #define FILE_DICTIONARY 2 00054 #define WORDNET_WORD_DICTIONARY 3 00055 #define WORDNET_SENSE_DICTIONARY 4 00056 00057 #define NO_UPDATE 0 00058 #define UPDATE 1 00059 #define DEFAULT_UPDATE 0 00060 00061 // For words only 00062 #define NO_STEM 0 00063 #define STEM 1 00064 #define OOV_TAG "<oov>" 00065 00066 //for WordNet senses only 00067 // No sense exists for this word 00068 #define NO_SENSE -1 00069 // Sense exists but is hidden (un-known) 00070 #define HIDDEN_SENSE 0 00071 00072 namespace PLearn { 00073 using namespace std; 00074 00075 class Dictionary: public Object 00076 { 00077 00078 private: 00079 00080 typedef Object inherited; 00081 00082 protected: 00083 // ********************* 00084 // * protected options * 00085 // ********************* 00086 00087 // ### declare protected option fields (such as learnt parameters) here 00088 // ... 00089 00090 public: 00091 00092 // ************************ 00093 // * public build options * 00094 // ************************ 00095 00097 map<string,int> string_to_int; 00099 map<int,string> int_to_string; 00101 WordNetOntology *wno; 00103 int dict_type; 00105 int update_mode; 00107 int stem_mode; 00109 string file_name_dict; 00111 TVec<string> vector_dict; 00112 00113 // ### declare public option fields (such as build options) here 00114 // ... 00115 00116 // **************** 00117 // * Constructors * 00118 // **************** 00119 00121 // ### Make sure the implementation in the .cc 00122 // ### initializes all fields to reasonable default values. 00123 Dictionary(); 00124 00125 00127 00130 Dictionary(string file_name,bool up_mode=DEFAULT_UPDATE); 00131 00133 00136 Dictionary(TVec<string> symbols,bool up_mode=DEFAULT_UPDATE); 00137 00139 00143 Dictionary(WordNetOntology *ont,int ontology_type,bool up_mode=DEFAULT_UPDATE, bool stem =NO_STEM); 00144 00145 00146 // ****************** 00147 // * Object methods * 00148 // ****************** 00149 00150 private: 00152 void build_(); 00153 00154 protected: 00156 static void declareOptions(OptionList& ol); 00157 00158 public: 00159 // Declares other standard object methods. 00160 // ### If your class is not instantiatable (it has pure virtual methods) 00161 // ### you should replace this by PLEARN_DECLARE_ABSTRACT_OBJECT_METHODS 00162 PLEARN_DECLARE_OBJECT(Dictionary); 00163 00165 int size(); 00166 00168 void setUpdateMode(bool up_mode); 00169 00171 void setStemMode(bool stem); 00172 00175 void setDictionaryType(int type); 00176 00181 int getId(string symbol); 00182 00184 string getSymbol(int id); 00185 00186 // simply calls inherited::build() then build_() 00187 virtual void build(); 00188 00190 virtual void makeDeepCopyFromShallowCopy(map<const void*, void*>& copies); 00191 00192 }; 00193 00194 // Declares a few other classes and functions related to this class 00195 DECLARE_OBJECT_PTR(Dictionary); 00196 00197 } // end of namespace PLearn 00198 00199 #endif