Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

IntStream.h

Go to the documentation of this file.
00001 // -*- C++ -*- 00002 00003 // PLearn (A C++ Machine Learning Library) 00004 // Copyright (C) 1998 Pascal Vincent 00005 // Copyright (C) 1999-2002 Pascal Vincent, Yoshua Bengio and University of Montreal 00006 // 00007 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are met: 00010 // 00011 // 1. Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // 00014 // 2. Redistributions in binary form must reproduce the above copyright 00015 // notice, this list of conditions and the following disclaimer in the 00016 // documentation and/or other materials provided with the distribution. 00017 // 00018 // 3. The name of the authors may not be used to endorse or promote 00019 // products derived from this software without specific prior written 00020 // permission. 00021 // 00022 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00023 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00024 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00025 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00027 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00033 // This file is part of the PLearn library. For more information on the PLearn 00034 // library, go to the PLearn Web site at www.plearn.org 00035 00036 /* IntStream.h 00037 00038 This class represents a read-only stream of integers (int). 00039 00040 It can be constructed from a set of files (using the 00041 FilesIntStream subclass). 00042 00043 */ 00044 00045 00048 #ifndef MODULE_INTSTREAM 00049 #define MODULE_INTSTREAM 00050 00051 //#include <cstdio> 00052 //#include "VMat.h" //!< to make a sub-class of VMat with an IntStream inside 00053 #include <plearn/vmat/RowBufferedVMatrix.h> 00054 00055 namespace PLearn { 00056 using namespace std; 00057 00058 class IntStream : public Object { 00059 00060 protected: 00061 long pos; 00062 00063 public: 00064 IntStream(int p=0) : pos(p) {} 00065 00067 virtual void seek(long position) { if (position!=0) PLERROR("IntStream::position should be 0"); } 00068 00071 virtual int next() { return 0; } 00072 00074 virtual int current() { return 0; } 00075 00077 virtual long size() { return 0; } 00078 00080 virtual int position() { return pos; } 00081 00083 virtual long remaining() { return size()-position(); } 00084 00089 virtual void reopen() {} 00090 00091 virtual ~IntStream() {} 00092 }; 00093 00094 // JS-HACK 00095 #define USE_JS_HACK 0 00096 #if USE_JS_HACK 00097 #define MAX_VOC_SIZE 3 00098 #endif 00099 // a VMat which is implemented with an IntStream 00100 class IntStreamVMatrix : public RowBufferedVMatrix { 00101 protected: 00102 PP<IntStream> stream; //<! where the data actually is 00103 mutable int position; //<! position in the stream corresponding to the current window 00104 00109 int dummy_input; 00114 int end_of_sequence_symbol; 00115 public: 00116 IntStreamVMatrix() {} 00117 IntStreamVMatrix(IntStream& s, int window_size, int dummy_input, int eos); 00118 virtual void getRow(int i, Vec v) const; 00119 }; 00120 00121 class FilesIntStream : public IntStream { 00122 00123 protected: 00124 int n_files; 00125 const char* *file_names; 00126 FILE** fp; 00127 int current_file; 00128 int next_pos_in_current_file; 00129 int* sizes; 00130 int total_size; 00131 int current_value; 00132 00133 FilesIntStream(FilesIntStream& x) { PLERROR("FilesIntStream can't be copied"); } 00134 00136 void read_current(); 00137 00138 public: 00139 FilesIntStream(int nfiles, const char* files[]); 00140 00142 virtual void seek(long position); 00143 00145 virtual int next(); 00146 00148 virtual int current(); 00149 00151 virtual long size(); 00152 00157 virtual void reopen(); 00158 00159 virtual ~FilesIntStream(); 00160 }; 00161 00162 class InMemoryIntStream : public IntStream { 00163 protected: 00164 int* data; 00165 int length; 00166 00167 public: 00169 InMemoryIntStream(IntStream& stream); 00170 00171 virtual void seek(long position) 00172 { 00173 pos = position; 00174 #ifdef BOUNDCHECK 00175 if (pos<0 || pos>=length) PLERROR("InMemoryIntStream::seek(%d) out of range (0,%d)", 00176 position,length-1); 00177 #endif 00178 } 00179 virtual int next() { 00180 int v=data[pos]; 00181 pos++; 00182 if (pos>=length) pos=0; 00183 return v; 00184 } 00185 virtual int current() { return data[pos]; } 00186 virtual long size() { return length; } 00187 00188 virtual ~InMemoryIntStream() { delete[] data; } 00189 }; 00190 00191 //*!< ***************************************************** 00197 FilesIntStream* word_sequences2files_int_stream(const char* word_sequences_file); 00198 00199 } // end of namespace PLearn 00200 00201 #endif

Generated on Tue Aug 17 15:55:40 2004 for PLearn by doxygen 1.3.7