Skip to content

Commit

Permalink
changed ranking to map
Browse files Browse the repository at this point in the history
  • Loading branch information
karinashin committed May 3, 2022
1 parent fedb87a commit 50ee9af
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 18 deletions.
4 changes: 2 additions & 2 deletions DocParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,13 @@ void DocParser::parse(const string& filename, StopWord& stop) {
space = text.find(" ");
Word curr(text.substr(0, space));
curr.toLower();//remove caps
curr.removePunc();//remove punctuation
curr.stemming();
if (stop.isStopWord(curr.getStr())){
text = text.substr(space + 1);//cut off curr word
space = text.find(" ");
continue;//don't add to tree
}
curr.removePunc();//remove punctuation
curr.stemming();

if (curr.getStr().empty()){//don't insert an empty string
text = text.substr(space + 1);
Expand Down
25 changes: 13 additions & 12 deletions UserInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,7 @@ void UserInterface::topWordsHelper(Node<Word>* n)
{
if (n != nullptr){
topWordsHelper(n->getLeft());
frequency.push_back(n->getData().getTotal());
all.push_back(n->getData());
bestWords.insert(std::pair<int, Word>(n->getData().getTotal(), n->getData()));
topWordsHelper(n->getRight());
}
}
Expand All @@ -164,21 +163,23 @@ void UserInterface::getTopWords()//go through tree and get the frequency of each

//get the top 25 most frequent words
cout << "Top 25 Most Frequent Words: " << endl;
map<int, Word>::iterator it = bestWords.begin();
map<int, Word>::iterator curr = bestWords.begin();
for (int n = 0; n < 25; n++){
int highest = frequency.at(0);
int index = 0;
if (n > frequency.size())//less than 25 total words
it = bestWords.begin();
int highest = it->first;
if (n > bestWords.size())
break;
for (int i = 1; i < frequency.size(); i++)//find the next highest freq
while (it != bestWords.end())
{
if (frequency.at(i) > highest){//get highest freq
highest = frequency.at(i);
index = i;
if(it->first > highest){
highest = it->first;
curr = it;
}
it++;
}
cout << all.at(index) << ": " << frequency.at(index) << endl;
frequency.erase(frequency.begin() + index);
all.erase(all.begin() + index);
cout << curr->second << ": " << curr->first << endl;
bestWords.erase(curr);
}
}

Expand Down
6 changes: 2 additions & 4 deletions UserInterface.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include <iostream>
#include <chrono>
#include <map>
#include "DocParser.h"
#include "QueryProcessor.h"
#include "DSAVLTree.h"
Expand All @@ -22,10 +23,7 @@ class UserInterface {
DocParser docReader;
QueryProcessor process;
StopWord stops;

//for ranking
vector<int> frequency;//frequencies of words
vector<Word> all;//vector of all words
map<int, Word> bestWords;//for ranking

public:
UserInterface();
Expand Down

0 comments on commit 50ee9af

Please sign in to comment.