00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00047
#ifndef VMat_maths_INC
00048
#define VMat_maths_INC
00049
00050
#include <map>
00051
#include <plearn/math/TMat.h>
00052
#include <plearn/math/TMat_maths.h>
00053
#include <plearn/math/StatsCollector.h>
00054
#include <plearn/math/ConditionalStatsCollector.h>
00055
#include "VMat.h"
00056
00057
namespace PLearn {
00058
using namespace std;
00059
00060
class VecStatsCollector;
00061
00062 #define MEAN_ROW 0
00063 #define STDDEV_ROW 1
00064 #define MIN_ROW 2
00065 #define MAX_ROW 3
00066 #define NMISSING_ROW 4
00067 #define NZERO_ROW 5
00068 #define NPOSITIVE_ROW 6
00069 #define NNEGATIVE_ROW 7
00070 #define MEANPOS_ROW 8
00071 #define STDDEVPOS_ROW 9
00072
00085 Mat
computeBasicStats(VMat m);
00086
00088 TVec<StatsCollector>
computeStats(VMat m,
int maxnvalues,
bool report_progress =
true);
00089
00090
void computeStats(VMat m, VecStatsCollector& st,
bool report_progress =
true);
00091
00092
00094 PP<ConditionalStatsCollector>
computeConditionalStats(VMat m,
int condfield, TVec<RealMapping> ranges);
00095
00096
void computeRowMean(VMat d, Vec& meanvec);
00097
void computeMean(VMat d, Vec& meanvec);
00098
void computeWeightedMean(Vec weights, VMat d, Vec& meanvec);
00099
void computeMeanAndVariance(VMat d, Vec& meanvec, Vec& variancevec);
00100
void computeMeanAndStddev(VMat d, Vec& meanvec, Vec& stddevvec);
00101
void computeMeanAndCovar(VMat d, Vec& meanvec, Mat& covarmat, ostream& logstream=cerr);
00102
void computeWeightedMeanAndCovar(Vec weights, VMat d, Vec& meanvec, Mat& covarmat);
00103
00104
void autocorrelation_function(
const VMat& data, Mat& acf);
00105
00108
void computeInputMean(VMat d, Vec& meanvec);
00109
void computeInputMeanAndCovar(VMat d, Vec& meanvec, Mat& covarmat);
00110
void computeInputMeanAndVariance(VMat d, Vec& meanvec, Vec& var);
00111
00112
00113
void computeRange(VMat d, Vec& minvec, Vec& maxvec);
00114
00118
real computeWeightedMeanAndCovar(VMat d, Vec& meanvec, Mat& covarmat,
real threshold=0);
00119
00134 Array<Mat>
computeConditionalMeans(VMat trainset,
int targetsize, Mat& basic_stats);
00135
00141 VMat
normalize(VMat d, Vec meanvec, Vec stddevvec);
00142 VMat
normalize(VMat d,
int inputsize,
int ntrain);
00143 inline VMat normalize(
VMat d,
int inputsize) {
return normalize(d,inputsize,d.
length()); }
00144
00154 VMat
grep(VMat d,
int col, Vec values,
bool exclude=
false);
00155
00157 map< real, int>
countOccurencesInColumn(VMat m,
int col);
00158
00161 map< real, TVec<int> >
indicesOfOccurencesInColumn(VMat m,
int col);
00162
00168 VMat
grep(VMat d,
int col, Vec values,
const string& indexfile,
bool exclude=
false);
00169
00174 VMat
filter(VMat d,
const string& indexfile);
00175
00177 VMat
shuffle(VMat d);
00178
00185 VMat
bootstrap(VMat d,
bool reorder=
true,
bool norepeat=
true);
00186
00188 Mat
transposeProduct(VMat m1, VMat m2);
00189
00191 Mat
transposeProduct(VMat m);
00192
00194 Vec
transposeProduct(VMat m1, Vec v2);
00195
00197 Mat
productTranspose(VMat m1, VMat m2);
00198
00200 Mat
product(Mat m1, VMat m2);
00201
00203 VMat
transpose(VMat m1);
00204
00218
real linearRegression(VMat inputs, VMat outputs,
real weight_decay, Mat theta_t,
00219
bool use_precomputed_XtX_XtY, Mat XtX, Mat XtY,
real& sum_squared_Y,
00220
bool return_squared_loss=
false,
int verbose_computation_every=0,
00221
bool cholesky =
true);
00222
00223
00225 Mat
linearRegression(VMat inputs, VMat outputs,
real weight_decay);
00226
00229
real weightedLinearRegression(VMat inputs, VMat outputs, VMat gammas,
00230
real weight_decay, Mat theta_t,
bool use_precomputed_XtX_XtY, Mat XtX,
00231 Mat XtY,
real& sum_squared_Y,
real& sum_gammas,
bool return_squared_loss=
false,
00232
int verbose_computation_every=0,
bool cholesky =
true);
00233
00239 VMat
rebalanceNClasses(VMat inputs,
int nclasses,
const string& filename);
00240
00243
void fullyRebalance2Classes(VMat inputs,
const string& filename,
bool save_indices=
true);
00244
00246 Mat
weightedLinearRegression(VMat inputs, VMat outputs, VMat gammas,
real weight_decay);
00247
00253 VMat
temporalThreshold(VMat distr,
int threshold_date,
bool is_before,
00254
int yyyymmdd_col);
00255 VMat
temporalThreshold(VMat distr,
int threshold_date,
bool is_before,
00256
int yyyy_col,
int mm_col,
int dd_col);
00257
00260
void correlations(
const VMat& x,
const VMat& y, Mat& r, Mat& pvalues);
00261
00262
void computeNearestNeighbors(VMat dataset, Vec x, TVec<int>& neighbors,
int ignore_row=-1);
00263
00264 }
00265
00266
#endif