Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

databases.h

Go to the documentation of this file.
00001 // -*- C++ -*- 00002 00003 // PLearn (A C++ Machine Learning Library) 00004 // Copyright (C) 1998 Pascal Vincent 00005 // Copyright (C) 1999,2000 Pascal Vincent, Yoshua Bengio and University of Montreal 00006 // 00007 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are met: 00010 // 00011 // 1. Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // 00014 // 2. Redistributions in binary form must reproduce the above copyright 00015 // notice, this list of conditions and the following disclaimer in the 00016 // documentation and/or other materials provided with the distribution. 00017 // 00018 // 3. The name of the authors may not be used to endorse or promote 00019 // products derived from this software without specific prior written 00020 // permission. 00021 // 00022 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00023 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00024 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00025 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00027 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00033 // This file is part of the PLearn library. For more information on the PLearn 00034 // library, go to the PLearn Web site at www.plearn.org 00035 00036 00037 00038 /* ******************************************************* 00039 * $Id: databases.h,v 1.9 2004/08/04 14:10:49 mariusmuja Exp $ 00040 * AUTHORS: Pascal Vincent 00041 * This file is part of the PLearn library. 00042 ******************************************************* */ 00043 00044 00047 #ifndef DATABASES_INC 00048 #define DATABASES_INC 00049 00050 #include "UCISpecification.h" 00051 #include <plearn/vmat/VMat.h> 00052 //#include "NistDB.h" 00053 00054 namespace PLearn { 00055 using namespace std; 00056 00057 00059 Mat input2dSet(const string& filename="data2d.amat"); 00060 00062 void normalizeDataSets(Mat& training_set, Mat& test_set); 00063 void normalizeDataSets(Mat& training_set, Mat& validation_set, Mat& test_set); 00064 void normalizeDataSets(VMat& training_set, VMat& validation_set, VMat& test_set); 00065 00066 // split the data_set into training_set, valid_set and test_set 00067 void splitTrainValidTest(VMat &data_set,VMat &train_set,VMat &valid_set, 00068 real valid_fraction,VMat &test_set, real test_fraction, 00069 bool normalize=true); 00070 00071 // reduce nb of inputs (usefull for rapid experimentation) 00072 VMat reduceInputSize(real fraction,VMat data); 00073 00074 // reduce nb of examples 00075 VMat reduceDataSetSize(real fraction,VMat data); 00076 00078 void remapClassnums(VMat& data, real remap_minval_to, real remap_maxval_to); 00079 00081 int loadBreastCancer(VMat& training_set, VMat& validation_set, VMat& test_set, int ntrain, int nvalid, bool uniq=true); 00082 int loadDiabetes(VMat& training_set, VMat& validation_set, VMat& test_set, int ntrain, int nvalid); 00083 int loadATT800(VMat& training_set, VMat& test_set); 00084 int loadLetters(VMat& training_set, VMat& validation_set, VMat& test_set, char* letters="ABCDEFGHIJKLMNOPQRSTUVWXYZ", real validation_fraction=.2, real test_fraction=.4,bool do_shuffle=true); 00085 int loadLetters(VMat& training_set, VMat& validation_set, VMat& test_set, int n_letters, real validation_fraction=.2, real test_fraction=.4,bool do_shuffle=true); 00086 00087 void loadCorelDatamat(int classnum, Mat& train, Mat& valid, Mat& test); 00088 Mat smoothCorelHisto(Mat& data); 00089 void loadCorel(Mat& training_set, Mat& validation_set, Mat& test_set, int negative_class=2, int positive_class=3); 00090 void loadCallxx(int year, VMat& d); 00091 00092 VMat loadBreastCancerWisconsin(bool normalize=true, bool uniq=true); 00093 VMat loadSonar(); 00094 VMat loadIonosphere(); 00095 VMat loadDiabetes(bool normalize=true); 00096 VMat loadPimaIndians(bool normalize=true); 00097 VMat loadLetters(const char* class0, const char* class1, bool normalize=true); 00098 VMat loadLetters(bool normalize=true); 00099 VMat loadLetters(int n_letters, bool do_shuffle); 00100 void loadUSPS(VMat& trainset, VMat& testset, bool use_smooth=true); 00101 VMat loadUSPS(bool use_smooth=true); 00102 VMat loadHousing(bool normalize=true); 00107 void loadUCI(VMat& trainset, VMat& testset, VMat& allset, string db_spec, string id, bool &normalize, const string& type); 00109 void loadUCISet(VMat& data, string file, PP<UCISpecification> uci_spec); 00111 void loadUCIAMat(VMat& data, string file, PP<UCISpecification> uci_spec); 00112 00113 00127 inline string loadClassificationDatasetHelp() 00128 { 00129 return " Preprogrammed datasets are: \n" 00130 " 2d \n" 00131 " letters \n" 00132 " breast \n" 00133 " usps \n" 00134 " mnist \n" 00135 " usps0 ... usps9 \n" 00136 " nist0 ... usps9 \n" 00137 " They can optionally be followed by :size in which case only the 'size' \n" 00138 " first rows will be kept. \n"; 00139 } 00140 00141 void loadClassificationDataset(const string& dbname, int& inputsize, int& nclasses, VMat& trainset, VMat& testset, bool normalizeinputs, VMat& allset); 00142 00143 00144 } // end of namespace PLearn 00145 00146 00147 #endif 00148 00149 00150 00151 00152 00153 00154

Generated on Tue Aug 17 15:50:55 2004 for PLearn by doxygen 1.3.7