Skip to content

Commit

Permalink
slightly fixed person/org, added stats for top 25
Browse files Browse the repository at this point in the history
  • Loading branch information
karinashin committed May 1, 2022
1 parent c68453a commit 9c989c0
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 7 deletions.
22 changes: 16 additions & 6 deletions QueryProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,19 +265,28 @@ void QueryProcessor::complement(vector<Document>& a)//delete set a from finalInd

void QueryProcessor::addPersonOrg(vector<Document>& a)//remove any docs from final that don't include the person/org
{//doesn't work with AND EX: AND investor stock PERSON cramer
cout << "Add person/org" << endl;
cout << "Add person/org" << endl;//TODO 21883 is being printed with random chars
//if finalIndex already has values, remove any docs that don't contain person/org
//else: query only has person/org keywords, just add the files that contain the person/org
vector<Document> personList;
if (finalIndex.size() > 0)
{
for (int i = 0; i < finalIndex.size(); i++)
cout << "person if" << endl;
for (int i = 0; i < a.size(); i++)//used to be finalIndex.size();
{//get person/org document index list from doc parser (a), each file in finalIndex should be in the person/orgs index
vector<Document>::iterator it = find(a.begin(), a.end(), finalIndex.at(i));
if (it == a.end()){//doc of final index does NOT exist in person/org doc list
finalIndex.erase(finalIndex.begin() + i);//remove the file that doens't contain person/org
i--;//account for file lost
cout << "person for" << endl;
// vector<Document>::iterator it = find(a.begin(), a.end(), finalIndex.at(i));
vector<Document>::iterator it = find(finalIndex.begin(), finalIndex.end(), a.at(i));
if (it != a.end()){//doc of person/org list exists in final index, keep
// cout << "does not exist in doc list" << endl;
personList.push_back(*it);
cout << "added " << it->getPath() << endl;
// finalIndex.erase(finalIndex.begin() + i);//remove the file that doens't contain person/org
// i--;//account for file lost
}
}
finalIndex.clear();
finalIndex = personList;
}
else{//finalIndex doesn't have any other values in it
for (int i = 0; i < a.size(); i++)
Expand All @@ -287,6 +296,7 @@ void QueryProcessor::addPersonOrg(vector<Document>& a)//remove any docs from fin
finalIndex.push_back(a.at(i));//add files with person/org
}
}
cout << "person done" << endl;
}

void QueryProcessor::rankIndex()
Expand Down
31 changes: 30 additions & 1 deletion UserInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,36 @@ void UserInterface::stats()

void UserInterface::getTopWords()
{

cout << "Get top words" << endl;
vector<Word> top;//top 25 words
vector<int> freqs; //corresponding total freqs for each word
vector<Word> all;//save words too? corresponding to freqs
//go through tree and get the frequency of each word (in order)
//result: total frequency for each doc

// cout << "Frequency" << endl;
// for (int i = 0; i < freqs.size(); i++)
// cout << freqs.at(i) << " " << finalIndex.at(i).getPath() << endl;


//get the top 15 docs with the highest freq
for (int n = 0; n < 15; n++){
int highest = freqs.at(0);
int index = 0;
if (n > freqs.size())//less that 15 docs in the finalIndex
break;
for (int i = 1; i < freqs.size(); i++)//find the next highest freq
{
if (freqs.at(i) > highest){//get highest freq
highest = freqs.at(i);
index = i;
}
}
top.push_back(all.at(index));//get the corresponding doc for that freq
cout << "next higheset frequency: " << freqs.at(index) << endl;
freqs.erase(freqs.begin() + index);
all.erase(all.begin() + index);
}
}

DocParser& UserInterface::getDocParser() { return docReader; }
Expand Down

0 comments on commit 9c989c0

Please sign in to comment.