00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00040
#include "TestDependencyCommand.h"
00041
#include <plearn/db/getDataSet.h>
00042
#include <plearn/math/stats_utils.h>
00043
#include <plearn/vmat/VMat_maths.h>
00044
#include <plearn/sys/procinfo.h>
00045
00046
namespace PLearn {
00047
using namespace std;
00048
00050
PLearnCommandRegistry TestDependencyCommand::reg_(
new TestDependencyCommand);
00051
00053 void TestDependencyCommand::run(
const vector<string>& args)
00054 {
00055
if(args.size()<3 || args.size()>3)
00056
PLERROR(
"test-dependencies expects 3 arguments, check the help");
00057
00058
VMat data =
getDataSet(args[0]);
00059
string x_spec = args[1];
00060
string y_spec = args[2];
00061
int x_col=0, y_col=0;
00062
if (x_spec[0]!=
'@')
00063 x_col =
toint(x_spec);
00064
else {
00065
string x_name = x_spec.substr(1,x_spec.length()-1);
00066 x_col = data->fieldIndex(x_name);
00067
if (x_col<0)
PLERROR(
"could not find field named %s in %s",x_name.c_str(),args[0].c_str());
00068 }
00069
if (y_spec[0]!=
'@')
00070 y_col =
toint(y_spec);
00071
else {
00072
string y_name = y_spec.substr(1,y_spec.length()-1);
00073 y_col = data->fieldIndex(y_name);
00074
if (y_col<0)
PLERROR(
"could not find field named %s in %s",y_name.c_str(),args[0].c_str());
00075 }
00076
00077
00078
TVec<int> columns(2);
00079 columns[0]=x_col;
00080 columns[1]=y_col;
00081
Mat xy_mat = data.
columns(columns).
toMat();
00082
VMat x =
VMat(xy_mat.
column(0));
00083 VMat y = VMat(xy_mat.
column(1));
00084
00085
Mat spearman_pvalue(1,1);
00086
Mat spearman_r(1,1);
00087
testSpearmanRankCorrelation(
x,y,spearman_r,spearman_pvalue);
00088
Mat linear_pvalue(1,1);
00089
Mat linear_r(1,1);
00090
correlations(
x,y,linear_r,linear_pvalue);
00091
00092 cout <<
"test-dependency between " << data->
fieldName(x_col) <<
" (column " << x_col <<
") and "
00093 << data->
fieldName(y_col) <<
" (column " << y_col <<
"):" <<
endl;
00094 cout <<
"rank correlation = " << spearman_r(0,0) <<
" {p-value = " << spearman_pvalue(0,0) <<
"}" <<
endl;
00095 cout <<
"linear correlation = " << linear_r(0,0) <<
" {p-value = " << linear_pvalue(0,0) <<
"}" <<
endl;
00096 }
00097
00098 }
00099