Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

SentencesBlocks.cc

Go to the documentation of this file.
00001 // -*- C++ -*- 00002 00003 // PLearn (A C++ Machine Learning Library) 00004 // Copyright (C) 1998 Pascal Vincent 00005 // Copyright (C) 1999-2001 Pascal Vincent, Yoshua Bengio, Rejean Ducharme and University of Montreal 00006 // Copyright (C) 2002 Pascal Vincent, Julien Keable, Xavier Saint-Mleux 00007 // 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are met: 00010 // 00011 // 1. Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // 00014 // 2. Redistributions in binary form must reproduce the above copyright 00015 // notice, this list of conditions and the following disclaimer in the 00016 // documentation and/or other materials provided with the distribution. 00017 // 00018 // 3. The name of the authors may not be used to endorse or promote 00019 // products derived from this software without specific prior written 00020 // permission. 00021 // 00022 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00023 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00024 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00025 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00027 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00033 // This file is part of the PLearn library. For more information on the PLearn 00034 // library, go to the PLearn Web site at www.plearn.org 00035 00036 00037 /* ******************************************************* 00038 * $Id: SentencesBlocks.cc,v 1.3 2004/02/20 21:14:44 chrish42 Exp $ 00039 ******************************************************* */ 00040 00041 #include "SentencesBlocks.h" 00042 00043 namespace PLearn { 00044 using namespace std; 00045 00048 SentencesBlocks::SentencesBlocks(int n_blocks, VMat d, Vec separator) 00049 : TVec<VMat>(n_blocks) 00050 { 00051 if (n_blocks==1) 00052 { 00053 (*this)[0]=d; 00054 return; 00055 } 00056 int total_size = d->length(); 00057 if (total_size < n_blocks * 2) 00058 PLERROR("SentencesBlocks: can't have blocks of size < 2 in average"); 00059 Vec v(d->width()); 00060 int b=0; 00061 int previous_previous_block=0,previous_beginning_of_block = 0, previous_beginning_of_sentence=0; 00062 int next_target = (int)(total_size / (real)n_blocks); 00063 for (int i=0;i<total_size && b<n_blocks-1;i++) 00064 { 00065 d->getRow(i,v); 00066 if (v==separator) 00067 { 00068 if (i>=next_target) 00069 { 00070 int cut=0; 00071 if (i-next_target < next_target-previous_beginning_of_sentence || 00072 previous_beginning_of_sentence < previous_beginning_of_block) 00073 cut=i+1; 00074 else 00075 { 00076 cut=previous_beginning_of_sentence; 00077 previous_beginning_of_sentence = i+1; 00078 } 00079 (*this)[b++] = d.subMatRows(previous_beginning_of_block, 00080 cut-previous_beginning_of_block); 00081 previous_previous_block = previous_beginning_of_block; 00082 previous_beginning_of_block=cut; 00083 if (b<n_blocks) 00084 { 00085 if (b>n_blocks-3) 00086 next_target = (int)((total_size - cut) / (real)(n_blocks-b)); 00087 else 00088 next_target = (int)(total_size * (real)(b+1.0) / n_blocks); 00089 } 00090 } 00091 else 00092 previous_beginning_of_sentence=i+1; 00093 } 00094 } 00095 if (b==n_blocks-1) 00096 (*this)[b++] = d.subMatRows(previous_beginning_of_block, 00097 total_size-previous_beginning_of_block); 00098 if (b<n_blocks-1) // we have to backtrack, split previous block in two 00099 { 00100 if (b<n_blocks-2) 00101 PLERROR("SentencesBlocks: blocks are too small!"); 00102 if (previous_beginning_of_sentence<previous_beginning_of_block) 00103 PLERROR("SentencesBlocks: Blocks are too small!"); 00104 int cut = previous_beginning_of_sentence; 00105 (*this)[b++] = d.subMatRows(previous_beginning_of_block, 00106 cut-previous_beginning_of_block); 00107 previous_beginning_of_block=cut; 00108 (*this)[b++] = d.subMatRows(previous_beginning_of_block, 00109 total_size-previous_beginning_of_block); 00110 } 00111 } 00112 00113 } // end of namespace PLearn

Generated on Tue Aug 17 16:05:10 2004 for PLearn by doxygen 1.3.7