00001 // -*- C++ -*- 00002 00003 // PLearn (A C++ Machine Learning Library) 00004 // Copyright (C) 1998 Pascal Vincent 00005 // Copyright (C) 1999,2000 Pascal Vincent, Yoshua Bengio and University of Montreal 00006 // 00007 // Redistribution and use in source and binary forms, with or without 00008 // modification, are permitted provided that the following conditions are met: 00009 // 00010 // 1. Redistributions of source code must retain the above copyright 00011 // notice, this list of conditions and the following disclaimer. 00012 // 00013 // 2. Redistributions in binary form must reproduce the above copyright 00014 // notice, this list of conditions and the following disclaimer in the 00015 // documentation and/or other materials provided with the distribution. 00016 // 00017 // 3. The name of the authors may not be used to endorse or promote 00018 // products derived from this software without specific prior written 00019 // permission. 00020 // 00021 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00022 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00023 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00024 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00025 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00026 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00027 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00028 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00029 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 // 00032 // This file is part of the PLearn library. For more information on the PLearn 00033 // library, go to the PLearn Web site at www.plearn.org 00034 00035 /* ******************************************************* 00036 * $Id: TypesNumeriques.h,v 1.2 2004/07/12 13:39:30 tihocan Exp $ 00037 * AUTHORS: Steven Pigeon & Yoshua Bengio 00038 * This file is part of the PLearn library. 00039 ******************************************************* */ 00040 00041 // Utilities to translate a text stream into a sequence of tokens 00042 // that are more manageable to model for building a statistical 00043 // language model. In particular, to convert numeric-looking words 00044 // into a more compact representation where numbers are replaced by 00045 // special codes (see "rules" below). 00046 00047 00050 #ifndef MODULE_TYPES_NUMERIQUES 00051 #define MODULE_TYPES_NUMERIQUES 00052 00053 #include <iostream> 00054 00055 namespace PLearn { 00056 using namespace std; 00057 00058 00059 extern const char DIGITsymbols[]; 00060 extern const char ALPHAsymbols[]; 00061 00063 bool containsChar(const char *s, const char *symbols); 00064 00066 typedef enum { 00067 NT_NOT_NUMERIC = 0x0000, 00068 NT_ORDINAL = 0x0001, 00069 NT_CARDINAL = 0x0002, 00070 NT_CURRENCY = 0x0004, 00071 NT_PREFIXED = 0x0008, 00072 NT_SUFFIXED = 0x0010, 00073 NT_RANGE = 0x0020, 00074 NT_TIME = 0x0040, 00075 NT_CODE = 0x0080, 00076 NT_PERCENT = 0x0100, 00077 NT_UNKNOWN_NUMERIC_TYPE = 0x8000 00078 } eNumericType; 00079 00080 00082 typedef struct 00083 { 00084 char * pattern; 00085 int attributs; 00086 } tRule; 00087 00089 const tRule rules[] = 00090 { 00091 {"#an", NT_CARDINAL + NT_PREFIXED }, 00092 {"#n", NT_CARDINAL }, 00093 {"#na", NT_CARDINAL + NT_SUFFIXED }, 00094 {"#ar", NT_RANGE + NT_PREFIXED }, 00095 {"#r", NT_RANGE }, 00096 {"#ra", NT_RANGE + NT_SUFFIXED }, 00097 {"#n'a", NT_ORDINAL + NT_SUFFIXED }, 00098 {"#ao", NT_ORDINAL + NT_PREFIXED }, 00099 {"#o", NT_ORDINAL }, 00100 {"#oa", NT_ORDINAL + NT_SUFFIXED }, 00101 {"#o'a", NT_ORDINAL + NT_SUFFIXED }, 00102 {"#$n", NT_CURRENCY }, 00103 {"#$na", NT_CURRENCY + NT_SUFFIXED }, 00104 {"#$r", NT_CURRENCY + NT_RANGE }, 00105 {"#$ra", NT_CURRENCY + NT_RANGE + NT_SUFFIXED }, 00106 {"#n:n", NT_TIME }, 00107 {"#n:n:n", NT_TIME }, 00108 {"#r:n", NT_CODE }, 00109 {"#n:r", NT_CODE }, 00110 {"", NT_UNKNOWN_NUMERIC_TYPE} 00111 }; 00112 00114 00115 00116 const char *eNumericTypeNames(int a); 00117 int numericType(const char *word); 00118 bool looksNumeric(const char *s); 00119 void compactRepresentation(char *t); 00120 00121 } // end of namespace PLearn 00122 00123 #endif 00124 //MODULE_TYPES_NUMERIQUES