更新时间:2021-07-08 14:59:09
算法的思路是:
具体实现和结果如下:
// 出现次数最多的K个单词.cpp : Defines the entry point for the console application. #include "stdafx.h" #include <hash_map> #include <string> #include <fstream> #include <queue> #include <iostream> #include <algorithm> #include <boost/timer.hpp> using namespace std; using namespace boost; void top_k_words()//出现次数最多的是个单词 { timer t; ifstream fin; fin.open("modern c.txt"); if (!fin) { cout<<"can not open file"<<endl; } string s; hash_map<string,int> countwords; while (true) { fin>>s; countwords[s]++; if (fin.eof()) { break; } } cout<<"单词总数 (重复的不计数):"<<countwords.size()<<endl; priority_queue<pair<int,string>,vector<pair<int,string>>,greater<pair<int,string>>> countmax; for(hash_map<string,int>::const_iterator i=countwords.begin(); i!=countwords.end();i++) { countmax.push(make_pair(i->second,i->first)); if (countmax.size()>10) { countmax.pop(); } } while(!countmax.empty()) { cout<<countmax.top().second<<" "<<countmax.top().first<<endl; countmax.pop(); } cout<<"time elapsed "<<t.elapsed()<<endl; } int main(int argc, char* argv[]) { top_k_words(); system("pause"); return 0; }
linux下不能使用hash_map,改为map来统计单词的个数:
// 出现次数最多的K个单词.cpp : Defines the entry point for the console application. #include <map> #include <string> #include <fstream> #include <queue> #include <iostream> #include <algorithm> using namespace std; void top_k_words()//出现次数最多的是个单词 { ifstream fin; fin.open("modern c.txt"); if (!fin) { cout<<"can not open file"<<endl; } string s; map<string,int> countwords; while (true) { fin>>s; countwords[s]++; if (fin.eof()) { break; } } cout<<"单词总数 (重复的不计数):"<<countwords.size()<<endl; priority_queue<pair<int,string>,vector<pair<int,string>>,greater<pair<int,string>>> countmax; for(map<string,int>::const_iterator i=countwords.begin(); i!=countwords.end();i++) { countmax.push(make_pair(i->second,i->first)); if (countmax.size()>10) { countmax.pop(); } } while(!countmax.empty()) { cout<<countmax.top().second<<" "<<countmax.top().first<<endl; countmax.pop(); } } int main(int argc, char* argv[]) { top_k_words(); return 0; }