Skip to content

Commit

Permalink
cleaned comments
Browse files Browse the repository at this point in the history
  • Loading branch information
karinashin committed May 1, 2022
1 parent c5af51a commit 5a2759b
Show file tree
Hide file tree
Showing 11 changed files with 14 additions and 114 deletions.
33 changes: 1 addition & 32 deletions DSAVLTree.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,6 @@ template <typename T>
class DSAVLTree {
private:
Node<T>* root;
// Node<T>* left;
// Node<T>* right;
// int height;//height of entire tree
int count;//total number of nodes

bool contains(Node<T>* n, T& val);//TODO test private functions?
Expand All @@ -126,11 +123,9 @@ class DSAVLTree {
DSAVLTree& operator= (const DSAVLTree<T>& copy);
Node<T>& copyHelper(Node<T>*& node);

// int getHeight(Node<T>* node);//get height from any starting node
void insert(T& x);//TODO doesn't accept an int
void insert(T& x);
bool contains(T& val) { return contains(root, val); }
T& find(Node<T>* node, T& val);//given a value, find the matching object in the tree
// Node<T>*& find(Node<T>* node, T& val);//iterative

void balanceTree(Node<T>*& node);//balance the tree using right/left rotate
void rightRotate(Node<T>*& k1);
Expand All @@ -148,9 +143,6 @@ template <typename T>
DSAVLTree<T>::DSAVLTree()
{
root = nullptr;
// left = nullptr;
// right = nullptr;
// height = 0;
count = 0;
}

Expand Down Expand Up @@ -220,7 +212,6 @@ void DSAVLTree<T>::insert(T& x)//public
template <typename T>
bool DSAVLTree<T>::contains(Node<T>* n, T& val)
{
// std::cout << "contains function" << std::endl;
if (n == nullptr)
return false;
else if (n->getData() == val)
Expand All @@ -234,7 +225,6 @@ bool DSAVLTree<T>::contains(Node<T>* n, T& val)
template <typename T>
void DSAVLTree<T>::insert(Node<T>*& n, T& val)//private
{
// std::cout << "insert function" << std::endl;
if (n == nullptr){//tree is empty or at the end of a leaf
n = new Node<T>(val);//make new node to insert
}
Expand All @@ -258,27 +248,6 @@ T& DSAVLTree<T>::find(Node<T>* node, T& val)//TODO add an edge case for when the
else
return find(node->getRight(), val);
}
//Node<T>*& DSAVLTree<T>::find(Node<T>* node, T& val)//iterative TODO seg fault
//{
// Node<T>* empty;
// std::cout << "find function" << std::endl;
// if (node == nullptr)
// return empty;
// while (node != nullptr)
// {
// std::cout << "while loop" << std::endl;
// if (node->getData() == val){
// std::cout << "Found node" << std::endl;
// return node;
// }
// else if (val < node->getData())
// node = node->getLeft();
// else
// node = node->getRight();
// }
// std::cout << "not found" << std::endl;
// return empty;
//}

template <typename T>
void DSAVLTree<T>::balanceTree(Node<T>*& node)
Expand Down
13 changes: 1 addition & 12 deletions DocParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
DocParser::DocParser() {}

void DocParser::parse(const string& filename, StopWord& stop) {
// cout << "NEW DOC: " << filename << endl;
numDocs++;

//TODO write to persistence file
Expand Down Expand Up @@ -67,27 +66,22 @@ void DocParser::parse(const string& filename, StopWord& stop) {
{
space = text.find(" ");
Word curr(text.substr(0, space));
// cout << "current: " << curr.getStr() << endl;
curr.toLower();//remove caps
if (stop.isStopWord(curr.getStr())){
// cout << "stop word found" << endl;
text = text.substr(space + 1);//cut off curr word
space = text.find(" ");
continue;//don't add to tree
}
curr.removePunc();//remove punctuation
curr.stemming();
// cout << "current: " << curr.getStr() << endl;
//put unique words into the avl tree

//put unique words into the avl tree
if (!words.contains(curr)){//if the word is not already in the tree/new unique word
curr.incrFreq(currDoc);
words.insert(curr);
// cout << "inserted " << curr.getStr() << endl;
}
else{
words.find(words.getRoot(), curr).incrFreq(currDoc);//index document on object in tree
// curr.incrFreq(currDoc);//indexes a temporary variable, not the actual Word object in the tree
}

text = text.substr(space + 1);//cut off curr word
Expand All @@ -111,11 +105,6 @@ void DocParser::persistenceIndex()//read in persistence file to index words

}

void DocParser::order(Word& w)//for top 25 most frequent words
{
// top.insert(pair<Word, int> (w, w.getDocs().size())); // ERROR
}

DSAVLTree<Word>& DocParser::getWordTree() { return words; }
DSAVLTree<Word>& DocParser::getOrgTree() { return orgs; }
DSAVLTree<Word>& DocParser::getPersonTree() { return people; }
Expand Down
2 changes: 0 additions & 2 deletions DocParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,12 @@ class DocParser {
DSAVLTree<Word> words;
DSAVLTree<Word> orgs;
DSAVLTree<Word> people;
map<Word, int> top;
int numDocs = 0;

public:
DocParser();
void parse(const string& filename, StopWord& stop);//parse the documents for unique words
void getFiles(const string& directory, StopWord& stop);//returns filenames for traversal through directory
void order(Word& w);//order top 25 more frequent words
void persistenceIndex();//read in persistence file to index words

DSAVLTree<Word>& getWordTree();
Expand Down
2 changes: 2 additions & 0 deletions Document.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ Document& Document::operator= (const Document& copy)
date = copy.date;
filePath = copy.filePath;
uuid = copy.uuid;

return *this;
}

bool Document::operator==(const Document& d)
Expand Down
29 changes: 0 additions & 29 deletions QueryProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,12 @@ QueryProcessor::QueryProcessor(){}
void QueryProcessor::parseQuery(string& q, DSAVLTree<Word>& words, DSAVLTree<Word>& orgs, DSAVLTree<Word>& people, StopWord& stop)//parse query
{
this->query = q;
// std::cout << "NEW QUERY: " << query << std::endl;
int space;
while (space != -1)
{
space = query.find(" ");//for first word
Word curr(query.substr(0, space));
curr.toLower();
cout << curr.getStr() << endl;
if (specialStopCheck(stop, curr.getStr()))
{
query = query.substr(space + 1);//cut off word
Expand All @@ -25,15 +23,12 @@ void QueryProcessor::parseQuery(string& q, DSAVLTree<Word>& words, DSAVLTree<Wor

if (curr.getStr() == "and" || curr.getStr() == "or"){
if (curr.getStr() == "and"){//perform corresponding set operation
// std::cout << "intersection" << std::endl;
vector<Word> wordList = parseAndOr();
for (int i = 0; i < wordList.size(); i++){
cout << wordList.at(i).getStr() << endl;
intersection(wordList.at(i), words);
}
}
else{
// std::cout << "union" << std::endl;
setUnion(parseAndOr(), words);
}
}
Expand Down Expand Up @@ -68,20 +63,16 @@ void QueryProcessor::parseQuery(string& q, DSAVLTree<Word>& words, DSAVLTree<Wor
}
else
cout << org.getStr() << " is not found." << endl;
// addPersonOrg(orgs.find(orgs.getRoot(), org)->getData().getDocs());
}
else if (curr.getStr() == "person"){
query = query.substr(space + 1);//cut off operator KEEP
// cout << "query: " << query << endl;
Word person(findPersonOrg());
cout << "person " << person.getStr() << std::endl;
if (people.contains(person)){
addPersonOrg(people.find(people.getRoot(), person).getDocs());//index has to include only those that have this person
queryWords.push_back(people.find(people.getRoot(), person));
}
else
cout << person.getStr() << " is not found." << endl;
//addPersonOrg(people.find(people.getRoot(), person)->getData().getDocs());
}
else{//just a term
Word term(curr);
Expand All @@ -92,7 +83,6 @@ void QueryProcessor::parseQuery(string& q, DSAVLTree<Word>& words, DSAVLTree<Wor
}
else
cout << term.getStr() << " is not found." << endl;
// addTerm(words.find(words.getRoot(), term)->getData().getDocs());
query = query.substr(space + 1);
}
space = query.find(" ");//to check if youve reached the end of the query
Expand All @@ -108,12 +98,10 @@ vector<Word> QueryProcessor::parseAndOr()

int space = query.find(" ");
query = query.substr(space + 1);//cut off operator
// cout << "query: " << query << endl;
while (check)
{
Word word;
space = query.find(" ");
// cout << space << endl;
if (space != -1){//not at the end of the line
if (query.substr(0, space) != "AND" && query.substr(0, space) != "OR" && query.substr(0, space) != "NOT" && query.substr(0, space) != "PERSON" && query.substr(0, space) != "ORG")
{//if its not a key word
Expand Down Expand Up @@ -184,7 +172,6 @@ void QueryProcessor::setUnion(vector<Word> a, DSAVLTree<Word>& tree)//OR keyword
cout << a.at(i).getStr() << " is not found." << endl;
continue;
}
// vector<Document> temp = tree.find(tree.getRoot(), a.at(i))->getData().getDocs();
for (int d = 0; d < temp.size(); d++)//for every doc in the Word objects index
{
vector<Document>::iterator it = find(finalIndex.begin(), finalIndex.end(), temp.at(d));
Expand All @@ -200,7 +187,6 @@ void QueryProcessor::intersection(Word& word, DSAVLTree<Word>& tree)//AND keywor
vector<Document> finalList;
if (finalIndex.size() == 0)//first word
{
cout << "if statement" << endl;
if (tree.contains(word))
{
queryWords.push_back(tree.find(tree.getRoot(), word));
Expand All @@ -220,31 +206,25 @@ void QueryProcessor::intersection(Word& word, DSAVLTree<Word>& tree)//AND keywor
}
else
cout << word.getStr() << " not found" << endl;
// cout << "else statment" << endl;
for (int i = 0; i < temp.size(); i++)
{
// cout << "inner for loop running" << endl;
vector<Document>::iterator it = find(finalIndex.begin(), finalIndex.end(), temp.at(i));
if (it != finalIndex.end()){//doc of a exists in final, only add docs that are in word.getDocs and final
finalList.push_back(*it);//add the docs that contain the word from the finalIndex
// cout << "added to final list" << endl;
}
}
// cout << "Done" << endl;
finalIndex.clear();
finalIndex = finalList;//should only contain docs that contain the word
}
}

void QueryProcessor::addTerm(vector<Document>& a)
{
// cout << "size: " << a.size() << endl;
for (int i = 0; i < a.size(); i++)
{
vector<Document>::iterator finalIt = find(finalIndex.begin(), finalIndex.end(), a.at(i));
if (finalIt == finalIndex.end()){//if the doc is NOT in the final index, add it
finalIndex.push_back(a.at(i));
// cout << "added new " << endl;
}
}
}
Expand All @@ -261,24 +241,16 @@ void QueryProcessor::complement(vector<Document>& a)//delete set a from finalInd

void QueryProcessor::addPersonOrg(vector<Document>& a)//remove any docs from final that don't include the person/org
{
cout << "Add person/org" << endl;
//if finalIndex already has values, remove any docs that don't contain person/org
//else: query only has person/org keywords, just add the files that contain the person/org
vector<Document> personList;
if (finalIndex.size() > 0)
{
cout << "person if" << endl;
for (int i = 0; i < a.size(); i++)//used to be finalIndex.size();
{//get person/org document index list from doc parser (a), each file in finalIndex should be in the person/orgs index
cout << "person for" << endl;
// vector<Document>::iterator it = find(a.begin(), a.end(), finalIndex.at(i));
vector<Document>::iterator it = find(finalIndex.begin(), finalIndex.end(), a.at(i));
if (it != finalIndex.end()){//doc of person/org list exists in final index, keep
// cout << "does not exist in doc list" << endl;
personList.push_back(*it);
cout << "added " << it->getPath() << endl;
// finalIndex.erase(finalIndex.begin() + i);//remove the file that doens't contain person/org
// i--;//account for file lost
}
}
finalIndex.clear();
Expand All @@ -292,7 +264,6 @@ void QueryProcessor::addPersonOrg(vector<Document>& a)//remove any docs from fin
finalIndex.push_back(a.at(i));//add files with person/org
}
}
cout << "person done" << endl;
}

void QueryProcessor::rankIndex()
Expand Down
6 changes: 4 additions & 2 deletions QueryProcessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,20 @@ class QueryProcessor {

public:
QueryProcessor();

void parseQuery(string& query, DSAVLTree<Word>& words, DSAVLTree<Word>& orgs, DSAVLTree<Word>& people, StopWord& stop);//parse query
vector<Word> parseAndOr();
Word findPersonOrg();//get the full name of the person being searched for (accounts for those w/first+last
// void intersection(vector<Document>& a, vector<Document>&b);//AND keyword

void setUnion(vector<Word> a, DSAVLTree<Word>& tree);//OR keyword
void intersection(Word& word, DSAVLTree<Word> & tree);//AND keyword
// void intersection(vector<Word> a, DSAVLTree<Word>& tree);//AND keyword
void addTerm(vector<Document>& a);//add a single term's docs to the final
void complement(vector<Document>& a);//set subtraction
void addPersonOrg(vector<Document>& a);//remove any docs that don't include the given person or org

void rankIndex();//rank the documents in the final index by relevancy/frecuency
bool specialStopCheck(StopWord& stop, string& word);

void clearFinal();//resets all vectors for next query
vector<Document>& getFinal();
vector<Document>& getBest();//returns the top 15 ranked documents
Expand Down
4 changes: 2 additions & 2 deletions StopWord.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ StopWord::StopWord()
{
ifstream stop;//make the stop words AVL tree
stop.open("stopWords.txt");
// if (stop.is_open())
// cout << "OPened stops" << endl;
if (stop.is_open())
cout << "OPened stops" << endl;
string curr;
while (getline(stop, curr))//make an avl tree of stop words
{
Expand Down
3 changes: 0 additions & 3 deletions UserInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,13 @@ void UserInterface::run(const string& file)
cout << "Search: " << endl;
string query;
getline(cin, query);
// cout << "query: " << query << endl;
start = std::chrono::high_resolution_clock::now();
process.parseQuery(query, docReader.getWordTree(), docReader.getOrgTree(), docReader.getPersonTree(), stops);
end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> time_in_seconds = end - start;
cout << std::fixed << "Query Execution Time: " << time_in_seconds.count() << endl;

displayResults();
// cout << "displayed" << endl;

choice = -1;
while (choice != "0"){
Expand Down Expand Up @@ -90,7 +88,6 @@ void UserInterface::clearIndex()
docReader.getWordTree().deleteTree(docReader.getWordTree().getRoot());
docReader.getOrgTree().deleteTree(docReader.getOrgTree().getRoot());
docReader.getPersonTree().deleteTree(docReader.getPersonTree().getRoot());
//TODO also erase contents of persistence file?
}

void UserInterface::parseDocs(const string& direct)
Expand Down
14 changes: 1 addition & 13 deletions Word.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,12 @@ Word::Word()
{
str = "";
}

Word::Word(string word)
{
str = word;
}

//Word& Word::operator=(const Word& w)
//{
// str = w.str;
// docs = w.docs;
// frequency = w.frequency;
// total = w.total;
//}

bool Word::operator<(const Word& w)
{
if (str < w.str)
Expand All @@ -32,11 +25,6 @@ bool Word::operator==(const Word& w)
{
return str == w.str;//if theyre the same string, theyre the same word
}
//
//void Word::sort()
//{
// //TODO
//}

void Word::toLower()
{
Expand Down
Loading

0 comments on commit 5a2759b

Please sign in to comment.