Skip to content

Commit

Permalink
updated ranking, tested
Browse files Browse the repository at this point in the history
  • Loading branch information
karinashin committed May 2, 2022
1 parent 27e9a68 commit 45f3f59
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 27 deletions.
6 changes: 6 additions & 0 deletions DocParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,12 @@ void DocParser::parse(const string& filename, StopWord& stop) {
curr.removePunc();//remove punctuation
curr.stemming();

if (curr.getStr().empty()){//don't insert an empty string
text = text.substr(space + 1);
space = text.find(" ");
continue;
}

//put unique words into the avl tree
if (!words.contains(curr)){//if the word is not already in the tree/new unique word
curr.incrFreq(currDoc);
Expand Down
15 changes: 9 additions & 6 deletions QueryProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ void QueryProcessor::parseQuery(string& q, DSAVLTree<Word>& words, DSAVLTree<Wor
space = query.find(" ");//to check if youve reached the end of the query
}

// rankIndex();//TODO
rankIndex();//TODO
}

vector<Word> QueryProcessor::parseAndOr()
Expand Down Expand Up @@ -268,6 +268,7 @@ void QueryProcessor::addPersonOrg(vector<Document>& a)//remove any docs from fin

void QueryProcessor::rankIndex()
{
//TODO something with NOT operator doesn't work with ranking system
cout << "Rank index" << endl;
// cout << "finalIndex size " << finalIndex.size() << endl;
// cout << "query words size: " << queryWords.size() << endl;
Expand All @@ -279,20 +280,22 @@ void QueryProcessor::rankIndex()
{
//get the each words frequency in the current doc and add them all together
sum += queryWords.at(i).getDocFreq(finalIndex.at(queryIndex));//add total freq of each word for this doc
cout << queryWords.at(i).getDocFreq(finalIndex.at(queryIndex)) << " " << finalIndex.at(queryIndex).getPath() << endl;
// cout << queryWords.at(i).getDocFreq(finalIndex.at(queryIndex)) << " " << finalIndex.at(queryIndex).getPath() << endl;
}
freqs.push_back(sum);
// cout << "sum: " << sum << endl;
}
//result: total frequency for each doc

cout << "Frequency" << endl;
for (int i = 0; i < freqs.size(); i++)
cout << freqs.at(i) << " " << finalIndex.at(i).getPath() << endl;
// cout << "Frequency" << endl;
// for (int i = 0; i < freqs.size(); i++)
// cout << freqs.at(i) << " " << finalIndex.at(i).getPath() << endl;


//get the top 15 docs with the highest freq
for (int n = 0; n < 15; n++){
// if (n > freqs.size() || freqs.size() == 0)//less that 15 docs in the finalIndex
// break;
int highest = freqs.at(0);
int index = 0;
if (n > freqs.size())//less that 15 docs in the finalIndex
Expand All @@ -305,7 +308,7 @@ void QueryProcessor::rankIndex()
}
}
best.push_back(finalIndex.at(index));//get the corresponding doc for that freq
cout << "next higheset frequency: " << freqs.at(index) << endl;
// cout << "next higheset frequency: " << freqs.at(index) << endl;
freqs.erase(freqs.begin() + index);
finalIndex.erase(finalIndex.begin() + index);
}
Expand Down
46 changes: 25 additions & 21 deletions UserInterface.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//
// Created by 18476 on 4/23/2022.
// Created by Karina Shin on 4/23/2022.
//

#include "UserInterface.h"
Expand All @@ -19,7 +19,11 @@ void UserInterface::run(const string& file)
cin >> choice;
if (choice == "1"){
cout << "parsing..." << endl;
start = std::chrono::high_resolution_clock::now();
docReader.getFiles(file, stops);
end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> time_in_seconds = end - start;
cout << std::fixed << "Parsing Time: " << time_in_seconds.count() << endl;
cout << "done!" << endl;
}
else if (choice == "2"){
Expand Down Expand Up @@ -97,36 +101,36 @@ void UserInterface::parseDocs(const string& direct)
std::cout << "done parsing!" << std::endl;
}

//void UserInterface::displayResults()//with ranking
//{
// if (process.getBest().size() == 0)
// cout << "No results found" << endl;
//
// for (int i = 0; i < process.getBest().size(); i++)
// {
// if (i == 15)
// break;
// cout << i + 1 << ") ";
// cout << "Title: " << process.getBest().at(i).getTitle() << ", " << process.getBest().at(i).getPub() << ", Date: " << process.getBest().at(i).getDate() << endl;
// cout << "Path: " << process.getBest().at(i).getPath() << endl;
// }
//}

void UserInterface::displayResults()//without ranking
void UserInterface::displayResults()//with ranking
{
if (process.getFinal().size() == 0)
if (process.getBest().size() == 0)
cout << "No results found" << endl;

for (int i = 0; i < process.getFinal().size(); i++)
for (int i = 0; i < process.getBest().size(); i++)
{
if (i == 15)
break;
cout << i + 1 << ") ";
cout << "Title: " << process.getFinal().at(i).getTitle() << ", " << process.getFinal().at(i).getPub() << ", Date: " << process.getFinal().at(i).getDate() << endl;
cout << "Path: " << process.getFinal().at(i).getPath() << endl;
cout << "Title: " << process.getBest().at(i).getTitle() << ", " << process.getBest().at(i).getPub() << ", Date: " << process.getBest().at(i).getDate() << endl;
cout << "Path: " << process.getBest().at(i).getPath() << endl;
}
}

//void UserInterface::displayResults()//without ranking
//{
// if (process.getFinal().size() == 0)
// cout << "No results found" << endl;
//
// for (int i = 0; i < process.getFinal().size(); i++)
// {
// if (i == 15)
// break;
// cout << i + 1 << ") ";
// cout << "Title: " << process.getFinal().at(i).getTitle() << ", " << process.getFinal().at(i).getPub() << ", Date: " << process.getFinal().at(i).getDate() << endl;
// cout << "Path: " << process.getFinal().at(i).getPath() << endl;
// }
//}

void UserInterface::showText(Document& d)
{
rapidjson::Document doc;
Expand Down

0 comments on commit 45f3f59

Please sign in to comment.