00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 
00034 
00035 
00036 
00037 
00038 
00040 
#include "TestDependencyCommand.h"
00041 
#include <plearn/db/getDataSet.h>
00042 
#include <plearn/math/stats_utils.h>
00043 
#include <plearn/vmat/VMat_maths.h>
00044 
#include <plearn/sys/procinfo.h>
00045 
00046 
namespace PLearn {
00047 
using namespace std;
00048 
00050 
PLearnCommandRegistry TestDependencyCommand::reg_(
new TestDependencyCommand);
00051 
00053 void TestDependencyCommand::run(
const vector<string>& args)
00054 {
00055   
if(args.size()<3 || args.size()>3)
00056     
PLERROR(
"test-dependencies expects 3 arguments, check the help");
00057 
00058   
VMat data = 
getDataSet(args[0]);
00059   
string x_spec = args[1];
00060   
string y_spec = args[2];
00061   
int x_col=0, y_col=0;
00062   
if (x_spec[0]!=
'@')
00063     x_col = 
toint(x_spec);
00064   
else {
00065     
string x_name = x_spec.substr(1,x_spec.length()-1);
00066     x_col = data->fieldIndex(x_name);
00067     
if (x_col<0) 
PLERROR(
"could not find field named %s in %s",x_name.c_str(),args[0].c_str());
00068   }
00069   
if (y_spec[0]!=
'@')
00070     y_col = 
toint(y_spec);
00071   
else {
00072     
string y_name = y_spec.substr(1,y_spec.length()-1);
00073     y_col = data->fieldIndex(y_name);
00074     
if (y_col<0) 
PLERROR(
"could not find field named %s in %s",y_name.c_str(),args[0].c_str());
00075   }
00076 
00077   
00078   
TVec<int> columns(2);
00079   columns[0]=x_col;
00080   columns[1]=y_col;
00081   
Mat xy_mat = data.
columns(columns).
toMat();
00082   
VMat x = 
VMat(xy_mat.
column(0));
00083   VMat y = VMat(xy_mat.
column(1));
00084 
00085   
Mat spearman_pvalue(1,1);
00086   
Mat spearman_r(1,1);
00087   
testSpearmanRankCorrelation(
x,y,spearman_r,spearman_pvalue);
00088   
Mat linear_pvalue(1,1);
00089   
Mat linear_r(1,1);
00090   
correlations(
x,y,linear_r,linear_pvalue);
00091 
00092   cout << 
"test-dependency between " << data->
fieldName(x_col) << 
" (column " << x_col << 
") and "
00093        <<  data->
fieldName(y_col) << 
" (column " << y_col << 
"):" << 
endl;
00094   cout << 
"rank correlation = " << spearman_r(0,0) << 
" {p-value = " << spearman_pvalue(0,0) << 
"}" << 
endl;
00095   cout << 
"linear correlation = " << linear_r(0,0) << 
" {p-value = " << linear_pvalue(0,0) << 
"}" << 
endl;
00096 }
00097 
00098 } 
00099