00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
#include "AffineTransformWeightPenalty.h"
00044
#include "Var_utils.h"
00045
00046
namespace PLearn {
00047
using namespace std;
00048
00049
00050
PLEARN_IMPLEMENT_OBJECT(AffineTransformWeightPenalty,
"Affine transformation with Weight decay terms",
"NO HELP");
00051
00052 void AffineTransformWeightPenalty::recomputeSize(
int& l,
int& w)
const
00053
{ l=1; w=1; }
00054
00055
void
00056 AffineTransformWeightPenalty::declareOptions(
OptionList &ol)
00057 {
00058
declareOption(ol,
"weight_decay_", &AffineTransformWeightPenalty::weight_decay_, OptionBase::buildoption,
"");
00059
declareOption(ol,
"bias_decay_", &AffineTransformWeightPenalty::bias_decay_, OptionBase::buildoption,
"");
00060
declareOption(ol,
"L1_penalty_", &AffineTransformWeightPenalty::L1_penalty_, OptionBase::buildoption,
"");
00061 inherited::declareOptions(ol);
00062 }
00063
00064 void AffineTransformWeightPenalty::fprop()
00065 {
00066
if (
L1_penalty_)
00067 {
00068
if (input->
length()>1)
00069 valuedata[0] =
weight_decay_*
sumabs(input->matValue.subMatRows(1,input->
length()-1));
00070
else
00071 valuedata[0] = 0;
00072
if(
bias_decay_!=0)
00073 valuedata[0] +=
bias_decay_*
sumabs(input->matValue(0));
00074 }
00075
else
00076 {
00077
if (input->
length()>1)
00078 valuedata[0] =
weight_decay_*
sumsquare(input->matValue.subMatRows(1,input->
length()-1));
00079
else
00080 valuedata[0] = 0;
00081
if(
bias_decay_!=0)
00082 valuedata[0] +=
bias_decay_*
sumsquare(input->matValue(0));
00083 }
00084 }
00085
00086
00087 void AffineTransformWeightPenalty::bprop()
00088 {
00089
int l = input->
length() - 1;
00090
if (
L1_penalty_)
00091 {
00092
if (!input->matGradient.isCompact())
00093
PLERROR(
"AffineTransformWeightPenalty::bprop, L1 penalty currently not handling non-compact weight matrix");
00094
int n=input->
width();
00095
if (
weight_decay_!=0)
00096 {
00097
real delta =
weight_decay_ * gradientdata[0];
00098
real* w = input->matValue[n];
00099
real* d_w = input->matGradient[n];
00100
int tot = l * n;
00101
for (
int i = 0; i < tot; i++) {
00102
if (w[i] > 0)
00103 d_w[i] += delta;
00104
else if (w[i] < 0)
00105 d_w[i] -= delta;
00106 }
00107 }
00108
if(
bias_decay_!=0)
00109 {
00110
real* d_biases = input->matGradient[0];
00111
real* biases = input->matValue[0];
00112
for (
int i=0;i<n;i++)
00113
if (biases[i]>0)
00114 d_biases[i] +=
bias_decay_*gradientdata[0];
00115
else if (biases[i]<0)
00116 d_biases[i] -=
bias_decay_*gradientdata[0];
00117 }
00118 }
00119
else
00120 {
00121
multiplyAcc(input->matGradient.subMatRows(1,l), input->matValue.subMatRows(1,l),
two(
weight_decay_)*gradientdata[0]);
00122
if(
bias_decay_!=0)
00123
multiplyAcc(input->matGradient(0), input->matValue(0),
two(
bias_decay_)*gradientdata[0]);
00124 }
00125 }
00126
00127
00128
00129 }
00130
00131