From 50ee9af073a92ddff5486c6d24dc46067411dc9a Mon Sep 17 00:00:00 2001 From: karinashin Date: Mon, 2 May 2022 22:35:37 -0500 Subject: [PATCH] changed ranking to map --- DocParser.cpp | 4 ++-- UserInterface.cpp | 25 +++++++++++++------------ UserInterface.h | 6 ++---- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/DocParser.cpp b/DocParser.cpp index 486259f..60b6393 100644 --- a/DocParser.cpp +++ b/DocParser.cpp @@ -66,13 +66,13 @@ void DocParser::parse(const string& filename, StopWord& stop) { space = text.find(" "); Word curr(text.substr(0, space)); curr.toLower();//remove caps + curr.removePunc();//remove punctuation + curr.stemming(); if (stop.isStopWord(curr.getStr())){ text = text.substr(space + 1);//cut off curr word space = text.find(" "); continue;//don't add to tree } - curr.removePunc();//remove punctuation - curr.stemming(); if (curr.getStr().empty()){//don't insert an empty string text = text.substr(space + 1); diff --git a/UserInterface.cpp b/UserInterface.cpp index a2b163f..8a0e784 100644 --- a/UserInterface.cpp +++ b/UserInterface.cpp @@ -150,8 +150,7 @@ void UserInterface::topWordsHelper(Node* n) { if (n != nullptr){ topWordsHelper(n->getLeft()); - frequency.push_back(n->getData().getTotal()); - all.push_back(n->getData()); + bestWords.insert(std::pair(n->getData().getTotal(), n->getData())); topWordsHelper(n->getRight()); } } @@ -164,21 +163,23 @@ void UserInterface::getTopWords()//go through tree and get the frequency of each //get the top 25 most frequent words cout << "Top 25 Most Frequent Words: " << endl; + map::iterator it = bestWords.begin(); + map::iterator curr = bestWords.begin(); for (int n = 0; n < 25; n++){ - int highest = frequency.at(0); - int index = 0; - if (n > frequency.size())//less than 25 total words + it = bestWords.begin(); + int highest = it->first; + if (n > bestWords.size()) break; - for (int i = 1; i < frequency.size(); i++)//find the next highest freq + while (it != bestWords.end()) { - if (frequency.at(i) > highest){//get highest freq - highest = frequency.at(i); - index = i; + if(it->first > highest){ + highest = it->first; + curr = it; } + it++; } - cout << all.at(index) << ": " << frequency.at(index) << endl; - frequency.erase(frequency.begin() + index); - all.erase(all.begin() + index); + cout << curr->second << ": " << curr->first << endl; + bestWords.erase(curr); } } diff --git a/UserInterface.h b/UserInterface.h index 774c130..1c23ea8 100644 --- a/UserInterface.h +++ b/UserInterface.h @@ -7,6 +7,7 @@ #include #include +#include #include "DocParser.h" #include "QueryProcessor.h" #include "DSAVLTree.h" @@ -22,10 +23,7 @@ class UserInterface { DocParser docReader; QueryProcessor process; StopWord stops; - - //for ranking - vector frequency;//frequencies of words - vector all;//vector of all words + map bestWords;//for ranking public: UserInterface();