Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

stop_word_filter.cc

Go to the documentation of this file.
00001 #include <plearn/base/general.h> 00002 #include <plearn/base/stringutils.h> 00003 00004 using namespace PLearn; 00005 00006 // usage : stop_word_filter proper_nouns stop_words < in.text > filtered_out.text 00007 00008 set<string> extractWordSet(string file) 00009 { 00010 set<string> words; 00011 ifstream in(file.c_str()); 00012 while (!in.eof()) 00013 { 00014 string line = pgetline(in); 00015 if (line == "") continue; 00016 words.insert(line); 00017 } 00018 in.close(); 00019 return words; 00020 } 00021 00022 int main(int argc, char** argv) 00023 { 00024 if (argc != 2) 00025 PLERROR("usage : stop_word_filter stop_words < in.text > filtered_out.text"); 00026 set<string> stop_words = extractWordSet(argv[1]); 00027 string word; 00028 while (true) 00029 { 00030 cin >> word; 00031 if (!cin) break; 00032 word = lowerstring(word); 00033 bool is_stop_word = stop_words.find(word) != stop_words.end(); 00034 if (is_stop_word) 00035 cout << "<stop>" << endl; 00036 else 00037 cout << word << endl; 00038 } 00039 }

Generated on Tue Aug 17 16:07:31 2004 for PLearn by doxygen 1.3.7