00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00041
#include "VMatCommand.h"
00042
#include <plearn/db/getDataSet.h>
00043
#include <plearn/misc/vmatmain.h>
00044
00045
namespace PLearn {
00046
using namespace std;
00047
00049
PLearnCommandRegistry VMatCommand::reg_(
new VMatCommand);
00050
00051 VMatCommand::VMatCommand():
00052
PLearnCommand("vmat",
00053
00054 "Examination and manipulation of vmat datasets",
00055
00056 "Usage: vmat info <dataset> \n"
00057 " Will info about dataset (size, etc..)\n"
00058 " or: vmat fields <dataset> \n"
00059 " To list the fields with their names \n"
00060 " or: vmat fieldinfo <dataset> <fieldname_or_num>\n"
00061 " To display statistics for that field \n"
00062 " or: vmat cat <dataset> [<optional_vpl_filtering_code>]\n"
00063 " To display the dataset \n"
00064 " or: vmat sascat <dataset.vmat> <dataset.txt>\n"
00065 " To output in <filename.txt> the dataset in SAS-like tab-separated format with field names on the first line\n"
00066 " or: vmat view <dataset> [<line#> [<column#>]]\n"
00067 " Interactive display to browse on the data. \n"
00068 " or: vmat stats <dataset> \n"
00069 " Will display basic statistics for each field \n"
00070 " or: vmat convert <source> <destination> \n"
00071 " To convert any dataset into a .amat .pmat or .dmat format \n"
00072 " The extension of the destination is used to determine the format you want \n"
00073 " or: vmat gendef <source> [binnum1 binnum2 ...] \n"
00074 " Generate stats for dataset (will put them in its associated metadatadir). \n"
00075 " or: vmat genvmat <source_dataset> <dest_vmat> [binned{num} |
onehot{num} | normalized]\n
"
00076
" Will generate a
template .vmat file with all the fields of the source preprocessed\n
"
00077
" with the processing you specify\n
"
00078
" or: vmat genkfold <source_dataset> <fileprefix> <kvalue>\n
"
00079
" Will generate <kvalue> pairs of .vmat that are splitted so they can be used
for kfold trainings\n
"
00080
" The first .vmat-pair will be named <fileprefix>_train_1.vmat (all source_dataset except the first 1/k)\n
"
00081
" and <fileprefix>_test_1.vmat (the first 1/k of <source_dataset>\n
"
00082
" or: vmat diff <dataset1> <dataset2> [tolerance] \n
"
00083
" Will report all elements that differ by more than tolerance (defauts to 1e-6) \n
"
00084
" or: vmat cdf <dataset> [<dataset> ...] \n
"
00085
" To interactively display cumulative density function
for each field \n
"
00086
" along with its basic statistics \n
"
00087
" or: vmat cond <dataset> <condfield#> \n
"
00088
" Interactive display of coditional statistics conditioned on the \n
"
00089
" conditioning field <condfield#> \n
"
00090
" or: vmat diststat <dataset> <inputsize>\n
"
00091
" Will compute and output basic statistics on the euclidean distance \n
"
00092
" between two consecutive input points \n\n
"
00093
"<dataset> is a parameter understandable by getDataSet: \n
"
00094
+ getDataSetHelp()
00095
)
00096
{}
00097
00098
00100 void VMatCommand::run(const vector<string>& args)
00101
{
00102
// Dirty hack to plug into old vmatmain code
00103
// Eventually, should get vmatmain code in here and clean
00104
00105
int argc = (int)args.size()+1;
00106
char** argv = new char*[argc];
00107
string commandname = "vmat
";
00108
argv[0] = const_cast<char*>(commandname.c_str());
00109
for(int i=1 ; i<argc; i++)
00110
argv[i] = const_cast<char*>(args[i-1].c_str());
00111
vmatmain(argc, argv);
00112
delete[] argv;
00113
}
00114
00115
} // end of namespace PLearn
00116