PLearn: EntropyContrast.cc Source File

00001 00002 // -*- C++ -*- 00003 00004 // EntropyContrast.cc 00005 // 00006 // Copyright (C) 2004 Dan Popovici 00007 // 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are met: 00010 // 00011 // 1. Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // 00014 // 2. Redistributions in binary form must reproduce the above copyright 00015 // notice, this list of conditions and the following disclaimer in the 00016 // documentation and/or other materials provided with the distribution. 00017 // 00018 // 3. The name of the authors may not be used to endorse or promote 00019 // products derived from this software without specific prior written 00020 // permission. 00021 // 00022 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00023 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00024 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00025 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00027 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00033 // This file is part of the PLearn library. For more information on the PLearn 00034 // library, go to the PLearn Web site at www.plearn.org 00035 00036 /* ******************************************************* 00037 * $Id: EntropyContrast.cc,v 1.4 2004/08/17 16:25:57 mariusmuja Exp $ 00038 ******************************************************* */ 00039 00041 #include "EntropyContrast.h" 00042 #include <plearn/vmat/VMat_maths.h> 00043 //#include "TMat_maths.h" 00044 #include <plearn/math/plapack.h> 00045 #include <plearn/math/random.h> 00046 namespace PLearn { 00047 using namespace std; 00048 00049 EntropyContrast::EntropyContrast() 00050 :nconstraints(4) //TODO: change to input_size 00051 { 00052 learning_rate = 0.001; 00053 decay_factor = 0; 00054 weight_real = weight_gen = weight_extra = 1; 00055 nconstraints = 0 ; 00056 n = 0 ; 00057 evaluate_every_n_epochs = 1; 00058 evaluate_first_epoch = true; 00059 evaluation_method = "no_evaluation"; 00060 // Continuous 00061 nhidden = 0 ; 00062 alpha = 0.0 ; 00063 } 00064 00065 PLEARN_IMPLEMENT_OBJECT(EntropyContrast, 00066 "Performs a EntropyContrast search", 00067 "Detailed Description "); 00068 00069 void EntropyContrast::declareOptions(OptionList& ol) 00070 { 00071 00072 declareOption(ol, "nconstraints", &EntropyContrast::nconstraints, OptionBase::buildoption, 00073 "The number of constraints to create (that's also the outputsize)"); 00074 declareOption(ol, "learning_rate", &EntropyContrast::learning_rate, OptionBase::buildoption, 00075 "The learning rate of the algorithm"); 00076 declareOption(ol, "decay_factor", &EntropyContrast::decay_factor, OptionBase::buildoption, 00077 "The decay factor of the learning rate"); 00078 00079 declareOption(ol, "weight_decay_hidden", &EntropyContrast::weight_decay_hidden, OptionBase::buildoption, 00080 "The decay factor for the hidden units"); 00081 declareOption(ol, "weight_decay_output", &EntropyContrast::weight_decay_output, OptionBase::buildoption, 00082 "The decay factor for the output units"); 00083 00084 declareOption(ol, "cost_real", &EntropyContrast::cost_real, OptionBase::buildoption, 00085 "The method to compute the real cost"); 00086 declareOption(ol, "cost_gen", &EntropyContrast::cost_gen, OptionBase::buildoption, 00087 "The method to compute the cost for the generated cost"); 00088 declareOption(ol, "cost_extra", &EntropyContrast::cost_extra, OptionBase::buildoption, 00089 "The method to compute the extra cost"); 00090 declareOption(ol, "gen_method", &EntropyContrast::gen_method, OptionBase::buildoption, 00091 "Method used to generate new points"); 00092 declareOption(ol, "weight_real", &EntropyContrast::weight_real, OptionBase::buildoption, 00093 "the relative weight of the cost of the real data, by default it is 1"); 00094 declareOption(ol, "weight_gen", &EntropyContrast::weight_gen, OptionBase::buildoption, 00095 "the relative weight of the cost of the generated data, by default it is 1"); 00096 declareOption(ol, "weight_extra", &EntropyContrast::weight_extra, OptionBase::buildoption, 00097 "the relative weight of the extra cost, by default it is 1"); 00098 declareOption(ol, "evaluation_method", &EntropyContrast::evaluation_method, OptionBase::buildoption, 00099 "Method for evaluation of constraint learning"); 00100 declareOption(ol, "evaluate_every_n_epochs", &EntropyContrast::evaluate_every_n_epochs, OptionBase::buildoption, 00101 "Number of epochs after which the constraints evaluation is done"); 00102 declareOption(ol, "test_set", &EntropyContrast::test_set, OptionBase::buildoption, 00103 "VMat test set"); 00104 // Continuous options 00105 declareOption(ol, "nhidden", &EntropyContrast::nhidden, OptionBase::buildoption, 00106 "the number of hidden units"); 00107 00108 // Discrete options 00109 00110 // Now call the parent class' declareOptions 00111 inherited::declareOptions(ol); 00112 } 00113 00114 // Functions for the continuous case 00115 00119 void EntropyContrast::initialize_NNcontinuous() 00120 { 00121 fill_random_uniform(w,-1.0,1.0) ; 00122 fill_random_uniform(v,-1.0,1.0) ; 00123 00124 fill_random_uniform(bias_hidden,-1.0,1.0) ; 00125 fill_random_uniform(bias_output,-1.0,1.0) ; 00126 00127 mu_f.fill(0.0) ; 00128 sigma_f.fill(1.0) ; 00129 00130 mu_f_hat.fill(0.0) ; 00131 sigma_f_hat.fill(1.0) ; 00132 00133 // the extra_diversity constraint 00134 mu_g = 0.0 ; 00135 sigma_g = 1.0 ; 00136 sigma_g.fill(1.0) ; 00137 mu_g.fill(0.0) ; 00138 00139 mu_f_square.fill(0.0) ; 00140 sigma_f_square.fill(1.0) ; 00141 00142 00143 full = 1.0 ; 00144 } 00145 00149 void EntropyContrast::computeNNcontinuous_hidden(const Vec& input_units,Vec &hidden_units) 00150 { 00151 00152 for (int i = 0 ; i < nhidden ; ++i ) 00153 { 00154 hidden_units[i] = bias_hidden[i] ; 00155 for (int j = 0 ; j < n ; ++j) 00156 hidden_units[i] += v(i,j) * input_units[j] ; 00157 } 00158 compute_tanh(hidden_units,hidden_units) ; 00159 00160 } 00161 00162 00166 void EntropyContrast::computeNNcontinuous_constraints(Vec& hidden_units,Vec &output_units) 00167 { 00168 for (int i = 0 ; i < nconstraints ; ++i ) 00169 { 00170 output_units[i] = bias_output[i] ; 00171 for (int j = 0 ; j < nhidden ; ++j) 00172 output_units[i] += w(i,j) * hidden_units[j] ; 00173 } 00174 } 00175 00176 00180 void EntropyContrast::get_NNcontinuous_output(const Vec & input_units,Vec &output_units,Vec &hidden_units) 00181 { 00182 00183 computeNNcontinuous_hidden(input_units,hidden_units) ; // compute the hidden units 00184 00185 computeNNcontinuous_constraints(hidden_units,output_units) ; // compute the hidden units 00186 00187 } 00188 00193 void EntropyContrast::gen_normal_0_1(Vec & output) 00194 { 00195 for (int i = 0 ; i < output.length() ; ++ i) { 00196 output[i] = gaussian_01(); 00197 } 00198 } 00199 00200 00204 void EntropyContrast::update_mu_sigma_f(const Vec & f_x,Vec & mu, Vec &sigma) 00205 { 00206 // :update mu_f_hat 00207 mu = mu * alpha + f_x * (1-alpha) ; 00208 00209 // :update sigma_f_hat 00210 sigma = alpha * (sigma) + (1-alpha) * square(f_x - mu) ; 00211 00212 } 00213 00217 void EntropyContrast::update_alpha(int stage,int current_input_index) 00218 { 00219 00220 if (stage==0) 00221 alpha = 1.0 - 1.0 / ( current_input_index + 2 ) ; 00222 else 00223 alpha = 1.0 - 1.0/inputsize; 00224 } 00225 00229 void EntropyContrast::compute_diversity_cost(const Vec & f_x,const Vec & cost,Vec & grad_C_extra_cost_wrt_f_x) 00230 { 00231 cost.fill (0.0); 00232 for (int i = 0; i < nconstraints; ++i) 00233 { 00234 for (int j = 0; j <= i; ++j) 00235 cost[i] += pow (f_x[j], 2); 00236 00237 cost[i] /= i + 1; 00238 } 00239 Vec full_sum(nconstraints) ; 00240 full_sum[0] = (pow(f_x[0],2) - (sigma_f[0] + pow(mu_f[0],2) ) ) ; 00241 for (int i = 1 ; i<nconstraints ; ++i) 00242 { 00243 full_sum[i] = full_sum[i-1] + (pow(f_x[i],2) - (sigma_f[i] + pow(mu_f[i],2) ) ) ; 00244 grad_C_extra_cost_wrt_f_x[i] = full_sum[i-1] * f_x[i] / train_set.length() ; 00245 } 00246 00247 } 00248 00249 00250 00254 void EntropyContrast::compute_df_dx(Mat &df_dx, const Vec &input) 00255 { 00256 Vec ones(nhidden); 00257 ones.fill(1); 00258 Vec hidden(nhidden); 00259 hidden = product(v,input); 00260 hidden = hidden + bias_hidden; 00261 Vec diag(nhidden) ; 00262 diag = ones - square(tanh(hidden)) ; 00263 diagonalizedFactorsProduct(df_dx,w,diag,v); 00264 } 00265 00269 void EntropyContrast:: get_real_grad_variance_wrt_f(const Vec & f_x, Vec & grad ) 00270 { 00271 for (int i = 0 ; i < f_x.length() ; ++i) 00272 { 00273 grad[i] = (f_x[i] - mu_f[i]) / sigma_f[i] ; 00274 } 00275 } 00276 00280 void EntropyContrast:: get_gen_grad_variance_wrt_f(const Vec & f_x_hat, Vec & grad ) 00281 { 00282 for (int i = 0 ; i < f_x_hat.length() ; ++i) 00283 { 00284 grad[i] = (f_x_hat[i] - mu_f_hat[i]) / sigma_f_hat[i] ; 00285 } 00286 } 00287 00291 void EntropyContrast::set_NNcontinuous_gradient(Vec &grad_C_real_wrt_f_x,Mat& grad_H_f_x_wrt_w, Mat& grad_H_f_x_wrt_v, 00292 Vec & hidden_units, Vec & input_units, Vec &grad_H_f_x_wrt_bias_hidden, Vec &grad_H_f_x_wrt_bias_output) 00293 { 00294 // set the gradiant grad_H_f_x_wrt_w ; 00295 00296 for (int i = 0 ; i < nconstraints ; ++ i) 00297 for (int j = 0 ; j < nhidden ; ++j) 00298 { 00299 grad_H_f_x_wrt_w(i,j) = grad_C_real_wrt_f_x[i] * hidden_units[j] ; 00300 } 00301 00302 // set the gradiant grad_H_f_x_wrt_bias_z_output ; 00303 for (int i = 0 ; i < nconstraints ; ++i) 00304 grad_H_f_x_wrt_bias_output[i] = grad_C_real_wrt_f_x[i] ; 00305 00306 00307 00308 // set the gradiant grad_H_f_x_wrt_v ; 00309 real sum; // keep sum v_i_k * x_k 00310 real grad_tmp ; // keep sum grad_C_wrt_f * grad_f_k_wrt_z 00311 for (int i = 0 ; i < nhidden ; ++ i) 00312 { 00313 sum = 0 ; 00314 for (int k = 0 ; k < n ; ++ k) 00315 sum+=v(i,k) * input_units[k] ; 00316 00317 grad_tmp = 0; 00318 for (int l = 0 ; l < nconstraints ; ++l) 00319 grad_tmp += grad_C_real_wrt_f_x[l] * w(l,i) ; 00320 00321 for(int j=0 ; j<n ; ++j) 00322 grad_H_f_x_wrt_v(i,j) = grad_tmp * (1 - tanh(bias_hidden[i] + sum) * tanh(bias_hidden[i] + sum)) * input_units[j]; 00323 00324 grad_H_f_x_wrt_bias_hidden[i] = grad_tmp * (1 - tanh(bias_hidden[i] + sum) * tanh(bias_hidden[i] + sum)); 00325 00326 } 00327 } 00331 void EntropyContrast::update_NNcontinuous_from_extra_cost() 00332 { 00333 //TODO: maybe change the learning_rate used for the extra_cost 00334 00335 for (int i = 0 ; i < nhidden ; ++i) { 00336 for(int j = 0 ; j < n ; ++ j) { 00337 v(i,j)-= learning_rate * grad_extra_wrt_v(i,j); 00338 } 00339 } 00340 00341 for (int i = 0 ; i < nconstraints ; ++i) { 00342 for(int j = 0 ; j < nhidden ; ++ j) { 00343 w(i,j)-= learning_rate * grad_extra_wrt_w(i,j); 00344 } 00345 } 00346 00347 for(int j = 0 ; j < nhidden ; ++ j) { 00348 bias_hidden[j] -= learning_rate * grad_extra_wrt_bias_hidden[j]; 00349 } 00350 00351 } 00355 void EntropyContrast::update_NNcontinuous() 00356 { 00357 for (int i = 0 ; i < nhidden ; ++i) 00358 for(int j = 0 ; j < n ; ++ j) 00359 v(i,j)-= learning_rate * (grad_H_f_x_wrt_v(i,j) - grad_H_f_x_hat_wrt_v(i,j)) + weight_decay_hidden * v(i,j) ; 00360 00361 for (int i = 0 ; i < nconstraints ; ++i) 00362 for(int j = 0 ; j < nhidden ; ++ j) 00363 w(i,j)-= learning_rate * (grad_H_f_x_wrt_w(i,j) - grad_H_f_x_hat_wrt_w(i,j)) + weight_decay_output * w(i,j) ; 00364 00365 for(int j = 0 ; j < nhidden ; ++ j) 00366 bias_hidden[j] -= learning_rate * (grad_H_f_x_wrt_bias_hidden[j] - grad_H_f_x_hat_wrt_bias_hidden[j] ); 00367 00368 for(int j = 0 ; j < nconstraints ; ++ j) 00369 bias_output[j] -= learning_rate * (grad_H_f_x_wrt_bias_output[j] - grad_H_f_x_hat_wrt_bias_output[j] ); 00370 } 00371 00375 void EntropyContrast::compute_extra_grad_wrt_df_dx(Mat& grad_C_wrt_df_dx) 00376 { 00377 for(int i=0;i< n;i++){ 00378 grad_C_wrt_df_dx[0][i] = 0.0 ; 00379 } 00380 00381 00382 // compute dot product g_i , g_j 00383 Mat dot_g(nconstraints,nconstraints) ; 00384 for (int i=0; i<nconstraints ;++i) 00385 for (int j=0; j<i ; ++j) 00386 dot_g(i,j) = dot(df_dx(i),df_dx(j)) ; 00387 00388 for (int j = 1; j<nconstraints; ++j ) 00389 { 00390 for (int k = 0; k<n ; ++k) 00391 { 00392 grad_C_wrt_df_dx(j,k) = 0; 00393 for (int i = 0; i < j ; ++i) 00394 grad_C_wrt_df_dx(j,k) += 2 * dot_g(j,i) * df_dx(i,k) ; 00395 } 00396 } 00397 } 00398 00402 void EntropyContrast::set_NNcontinuous_gradient_from_extra_cost(Mat &grad_C_wrt_df_dx,const Vec &input) 00403 { 00404 00405 //compute a = 1 - tanh^2(v * x) 00406 // b = 1 - tanh( v * x ) ; 00407 Vec ones(nhidden) ; 00408 Vec b(nhidden) ; 00409 ones.fill(1) ; 00410 Vec hidden(nhidden); 00411 hidden = product(v,input); 00412 hidden = hidden + bias_hidden; 00413 Vec diag(nhidden) ; 00414 diag = ones - square(tanh(hidden)) ; 00415 00416 b = ones - tanh(hidden) ; 00417 00418 Mat a(nhidden,nhidden) ; 00419 a.fill(0.0) ; 00420 addToDiagonal(a,diag) ; 00421 // compute dC / dw = dC/dg * v' * a 00422 Mat temp(nconstraints,nhidden); 00423 productTranspose(temp,grad_C_wrt_df_dx,v) ; 00424 product(grad_extra_wrt_w,temp,a) ; 00425 00426 // compute dC/dv = a * w' * dC/dg -2 * (dC/da * b * a) x' ; 00427 { 00428 Mat tmp(nhidden,nconstraints) ; 00429 product(tmp,a,transpose(w)) ; 00430 product(grad_extra_wrt_v,tmp,grad_C_wrt_df_dx) ; 00431 } 00432 00433 // compute dC/da 00434 { 00435 Vec grad_C_wrt_a ; 00436 Mat tmp(nhidden,n) ; 00437 product(tmp,transpose(w),grad_C_wrt_df_dx) ; 00438 Mat tmp_a(nhidden,nhidden) ; 00439 product(tmp_a,tmp,transpose(v)) ; 00440 00441 // grad_extra_wrt_v += (-2 * diag * b * diag(tmp_a) ) * transpose(input) ; 00442 Vec temp(nhidden) ; 00443 for (int i= 0 ; i < nhidden ; ++i) 00444 { 00445 temp[i] = (-2) * tmp_a(i,i) * b[i] * a(i,i); 00446 00447 for (int j = 0 ; j < n ; ++j) 00448 { 00449 grad_extra_wrt_v(i,j) += temp[i] * input[j]; 00450 } 00451 } 00452 grad_extra_wrt_bias_hidden = temp; 00453 } 00454 00455 } 00456 00457 00458 00459 void EntropyContrast::build_() 00460 { 00461 if (!train_set.isNull()) 00462 { 00463 n = train_set->width() ; // setting the input dimension 00464 00465 inputsize = train_set->length() ; // set the number of training inputs 00466 00467 x.resize(n) ; // the current input sample, presented 00468 00469 f_x.resize(nconstraints) ; // the constraints on the real sample 00470 00471 grad_C_real_wrt_f_x.resize(nconstraints); // the gradient of the real cost wrt to the constraints 00472 00473 x_hat.resize(n) ; // the current generated sample 00474 00475 f_x_hat.resize(nconstraints) ; // the constraints on the generated sample 00476 00477 grad_C_generated_wrt_f_x_hat.resize(nconstraints); // the gradient of the generated cost wrt to the constraints 00478 00479 grad_C_extra_cost_wrt_f_x.resize(nconstraints); 00480 00481 starting_learning_rate = learning_rate; 00482 00483 n_seen_examples = 0; 00484 00485 // Continuous 00486 00487 w.resize(nconstraints,nhidden) ; // setting the size of the weights between the hidden layer and the output(the constraints) 00488 00489 z_x.resize(nhidden) ; // set the size of the hidden units 00490 z_x_hat.resize(nhidden) ; // set the size of the hidden units 00491 00492 v.resize(nhidden,n) ; // set the size of the weights between the hidden input and the hidden units 00493 00494 mu_f.resize(nconstraints) ; // the average of the constraints over time, used in the computation on certain gradiants 00495 00496 mu_f_hat.resize(nconstraints) ; // the average of the constraints over time, used in the computation on certain gradiants 00497 00498 sigma_f.resize(nconstraints) ; // the variance of the constraints over time,, sued in the computation on certain gradiants 00499 00500 sigma_f_hat.resize(nconstraints) ;//the variance of the constraints over time,, sued in the computation on certain gradiants 00501 00502 mu_f_square.resize(nconstraints) ; 00503 sigma_f_square.resize(nconstraints) ; 00504 00505 bias_hidden.resize(nhidden) ; 00506 bias_output.resize(nconstraints); 00507 00508 grad_H_f_x_wrt_bias_output.resize(nconstraints) ; 00509 grad_H_f_x_wrt_bias_hidden.resize(nhidden) ; 00510 00511 grad_H_f_x_hat_wrt_bias_output.resize(nconstraints) ; 00512 grad_H_f_x_hat_wrt_bias_hidden.resize(nhidden) ; 00513 00514 grad_H_f_x_hat_wrt_w.resize(nconstraints,nhidden); 00515 grad_H_f_x_wrt_w.resize(nconstraints,nhidden) ; 00516 00517 grad_H_g_wrt_w.resize(nconstraints,nhidden) ; 00518 00519 00520 grad_H_f_x_wrt_v.resize(nhidden,n) ; 00521 grad_H_f_x_hat_wrt_v.resize(nhidden,n) ; 00522 00523 // used for the computation of the extra diversity constraints 00524 sigma_g.resize(nconstraints) ; 00525 mu_g.resize(nconstraints) ; 00526 g_x.resize(nconstraints) ; 00527 grad_C_wrt_df_dx.resize(nconstraints,n) ; 00528 df_dx.resize(nconstraints,n) ; 00529 00530 grad_extra_wrt_w.resize(nconstraints, nhidden) ; 00531 grad_extra_wrt_v.resize(nhidden, n) ; 00532 00533 full_sum.resize(nconstraints) ; 00534 } 00535 00536 } 00537 00538 // ### Nothing to add here, simply calls build_ 00539 void EntropyContrast::build() 00540 { 00541 inherited::build(); 00542 build_(); 00543 } 00544 00545 00546 void EntropyContrast::makeDeepCopyFromShallowCopy(map<const void*, void*>& copies) 00547 { 00548 inherited::makeDeepCopyFromShallowCopy(copies); 00549 // deepCopyField(eigenvecs, copies); 00550 } 00551 00552 00553 00554 int EntropyContrast::outputsize() const 00555 { 00556 return nconstraints; 00557 } 00558 00559 void EntropyContrast::forget() 00560 { 00561 // Initialization 00562 00563 // continuous 00564 initialize_NNcontinuous() ; 00565 } 00566 00567 void EntropyContrast::train() 00568 { 00569 int t ; 00570 manual_seed(12345678); 00571 forget(); 00572 for (;stage < nstages;stage++) 00573 { 00574 cout << getInfo() << endl; 00575 cout << "Stage = " << stage << endl; 00576 cout << "Learning rate = " << learning_rate << endl; 00577 00578 for (t = 0 ; t < train_set.length(); ++ t) 00579 { 00580 update_alpha(stage,t) ; // used in the update of the running averages 00581 00582 train_set->getRow(t,x); 00583 00585 // Real data section 00587 00588 // Get constraint output for real data (fill the f_x field) 00589 get_NNcontinuous_output(x,f_x,z_x) ; // this also computes the value of the hidden units , which will be needed when we compute all the gradiants 00590 00591 update_mu_sigma_f(f_x,mu_f,sigma_f) ; 00592 00593 if (cost_real == "constraint_variance") 00594 update_mu_sigma_f(square(f_x),mu_f_square,sigma_f_square) ; 00595 00596 // Get gradient for cost function for real data (fill grad_C_real_wrt_f_x) 00597 if(cost_real == "constraint_variance") { 00598 // compute gradiant of the cost wrt to f_x 00599 get_real_grad_variance_wrt_f(f_x,grad_C_real_wrt_f_x) ; 00600 } 00601 00602 // Adjust weight of the gradient 00603 grad_C_real_wrt_f_x *= weight_real; 00604 00606 // Extra cost function 00608 00609 if(cost_extra == "variance_sum_square_constraints") { 00610 compute_diversity_cost(f_x,g_x,grad_C_extra_cost_wrt_f_x) ; // this also computes the gradiant extra_cost wrt to the constrains f_i(x) grad_C_extra_cost_wrt_f_x 00611 00612 grad_C_extra_cost_wrt_f_x *= weight_extra; 00613 } 00614 00615 if(cost_extra == "derivative") 00616 { 00617 compute_df_dx(df_dx,x) ; 00618 compute_extra_grad_wrt_df_dx(grad_C_wrt_df_dx) ; 00619 00620 grad_C_wrt_df_dx *= weight_extra; 00621 } 00622 00623 00624 // Set gradient for the constraint using real data 00625 // set the gradiant of the cost wrt to the weights w,v and to the bias 00626 set_NNcontinuous_gradient(grad_C_real_wrt_f_x,grad_H_f_x_wrt_w,grad_H_f_x_wrt_v,z_x,x, 00627 grad_H_f_x_wrt_bias_hidden,grad_H_f_x_wrt_bias_output); 00628 00629 if (cost_extra == "derivative"){ 00630 set_NNcontinuous_gradient_from_extra_cost(grad_C_wrt_df_dx,x) ; 00631 } 00632 00633 if (cost_extra == "variance_sum_square_constraints") { 00634 // combine the grad_real & grad_extra 00635 for(int it=0; it<grad_C_real_wrt_f_x.length(); it++) 00636 grad_C_real_wrt_f_x[it] += grad_C_extra_cost_wrt_f_x[it]; 00637 } 00638 00640 // Generated data section 00642 00643 // Generate a new point (fill x_hat) 00644 if(gen_method == "N(0,1)") { 00645 gen_normal_0_1(x_hat) ; 00646 } 00647 // Get constraint output from generated data (fill the f_x_hat field) 00648 00649 get_NNcontinuous_output(x_hat,f_x_hat,z_x_hat); 00650 update_mu_sigma_f(f_x_hat,mu_f_hat,sigma_f_hat); 00651 // Get gradient for cost function for generated data (fill grad_C_generated_wrt_f_x_hat) 00652 00653 if(cost_gen == "constraint_variance") { 00654 get_gen_grad_variance_wrt_f(f_x_hat,grad_C_generated_wrt_f_x_hat) ; 00655 } 00656 00657 // Adjust weight of the gradient 00658 for(int it=0; it<grad_C_generated_wrt_f_x_hat.length(); it++) { 00659 grad_C_generated_wrt_f_x_hat[it] *= weight_gen; 00660 } 00661 00662 // Set gradient for the constraint using generated data 00663 00664 set_NNcontinuous_gradient(grad_C_generated_wrt_f_x_hat,grad_H_f_x_hat_wrt_w,grad_H_f_x_hat_wrt_v,z_x_hat,x_hat, 00665 grad_H_f_x_hat_wrt_bias_hidden,grad_H_f_x_hat_wrt_bias_output); 00666 00668 // Update 00670 00671 update_NNcontinuous(); 00672 if (cost_extra=="derivative") { 00673 update_NNcontinuous_from_extra_cost(); 00674 } 00675 n_seen_examples++; 00676 00677 full = alpha * full + (1-alpha) * (f_x[0] * f_x[0] - (sigma_f[0] + mu_f[0]*mu_f[0])) * (f_x[1] * f_x[1] - (sigma_f[1] + mu_f[1]*mu_f[1]) ) ; 00678 } 00679 00680 learning_rate = starting_learning_rate / (1 + decay_factor*n_seen_examples); 00681 00683 // Train evaluation 00685 00686 if(stage % evaluate_every_n_epochs == 0 && !(!evaluate_first_epoch && stage == 0)) 00687 { 00688 if(evaluation_method == "dump_all") 00689 { 00690 if (n_seen_examples == 250000) 00691 { 00692 FILE * f1 = fopen("gen1.dat","wt") ; 00693 FILE * f2 = fopen("gen2.dat","wt") ; 00694 FILE * f3 = fopen("gen3.dat","wt") ; 00695 00696 for (int i = -10 ; i <= 10 ; i+=2) 00697 for (int j = -1 ; j <= 9 ; j+=2 ) 00698 for (int k = -1 ; k <= 9 ; k+=3 ) 00699 { 00700 Mat res(2,3) ; 00701 Vec input(3) ; 00702 Vec ones(nhidden) ; 00703 ones.fill(1) ; 00704 input[0] = (real)i / 10 ; 00705 input[1] = (real)j / 10 ; 00706 input[2] = (real)k / 100 ; 00707 Vec hidden(nhidden); 00708 hidden = product(v,input) ; 00709 Vec diag(nhidden) ; 00710 diag = ones - square(tanh(hidden)) ; 00711 diagonalizedFactorsProduct(res,w,diag,v); 00712 fprintf(f1,"%f %f %f %f %f %f\n",(real)i/10,(real)j/10,(real)k/100,res(0,0),res(0,1),res(0,2)); 00713 fprintf(f2,"%f %f %f %f %f %f\n",(real)i/10,(real)j/10,(real)k/100,res(1,0),res(1,1),res(1,2)); 00714 real norm0 = sqrt(res(0,0)*res(0,0)+res(0,1)*res(0,1)+res(0,2)*res(0,2)) ; 00715 real norm1 = sqrt(res(1,0)*res(1,0)+res(1,1)*res(1,1)+res(1,2)*res(1,2)) ; 00716 real angle = res(0,0) / norm0 * res(1,0) / norm1 + res(0,1) / norm0 * res(1,1) / norm1 + res(0,2) / norm0 * res(1,2) / norm1 ; 00717 fprintf(f3,"%f %f %f %f\n",(real)i/10,(real)j/10,(real)k/100,angle) ; 00718 // fprintf(f2,"%f %f %f %f\n",(real)i/10,(real)j/10,res(1,0),res(1,1)) ; 00719 } 00720 fclose(f1) ; 00721 fclose(f2) ; 00722 fclose(f3) ; 00723 /* FILE * f3 = fopen("gen3.dat","wt"); 00724 FILE * f4 = fopen("gen4.dat","wt"); 00725 real eps = 0.001 ; 00726 for (int j = 90 ; j >= -10 ; --j) 00727 { 00728 for (int i = -100 ; i <= 100 ; ++ i ) 00729 { 00730 bool close = false ; 00731 for (int k = 0 ; k < train_set.length(); ++k) 00732 { 00733 if (pow(train_set->get(k,0) - (real)i/100,2) + pow(train_set->get(k,1) - real(j)/100,2) < eps ) 00734 { 00735 close = true ; 00736 break ; 00737 } 00738 } 00739 if (close) 00740 fprintf(f4,"%f ",1.0) ; 00741 else 00742 { 00743 fprintf(f4,"%f ",0.0) ; 00744 } 00745 00746 Vec input(n) ; 00747 input[0] = (real)i/100 ; 00748 input[1] = (real)j/100 ; 00749 Vec hidden(nhidden) ; 00750 Vec output(nconstraints) ; 00751 computeNNcontinuous_hidden(input,hidden) ; 00752 computeNNcontinuous_constraints(hidden,output) ; 00753 fprintf(f3,"%f ",output[0]) ; 00754 } 00755 fprintf(f3,"\n") ; 00756 fprintf(f4,"\n") ; 00757 } 00758 00759 fclose(f3) ; 00760 fclose(f4) ; 00761 // generate data that will be used to create a colormap 00762 */ 00763 00764 exit(0) ; 00765 } 00766 for (int i = 0 ; i < f_x.length() ;++i) 00767 cout << f_x[i] << " "; 00768 cout << endl << "cov = " << full/train_set.length() << endl ; 00769 cout << "var f_square: " << sigma_f_square[0] << " "<< sigma_f_square[1] << endl ; 00770 cout << "corr: " << full / sqrt(sigma_f_square[0] / sqrt(sigma_f_square[1])) << endl ; 00771 } 00772 // discrete 00773 } 00774 cout << "--------------------------------" << endl; 00775 } 00776 } 00777 void EntropyContrast::computeOutput(const Vec& input, Vec& output) const 00778 { 00779 } 00780 00781 00782 void EntropyContrast::reconstruct(const Vec& output, Vec& input) const 00783 { 00784 } 00785 00786 void EntropyContrast::computeCostsFromOutputs(const Vec& input, const Vec& output, 00787 const Vec& target, Vec& costs) const 00788 { 00789 } 00790 00791 TVec<string> EntropyContrast::getTestCostNames() const 00792 { 00793 return TVec<string>(1,"squared_reconstruction_error"); 00794 } 00795 00796 TVec<string> EntropyContrast::getTrainCostNames() const 00797 { 00798 return TVec<string>(); 00799 } 00800 00801 00802 00803 } // end of namespace PLearn