forked from SMUCSE2341/22s-final-project-karinashin
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Word.cpp
99 lines (84 loc) · 2.05 KB
/
Word.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
//
// Created by Karina Shin on 4/9/2022.
//
#include "Word.h"
Word::Word()
{
str = "";
}
Word::Word(string word)
{
str = word;
}
bool Word::operator<(const Word& w)
{
if (str < w.str)
return true;
return false;
}
bool Word::operator==(const Word& w)
{
return str == w.str;//if theyre the same string, theyre the same word
}
void Word::toLower()
{
for (int i = 0; i < str.length(); i++)
str.at(i) = tolower(str.at(i));
}
void Word::removePunc()
{
string buffer;//no punc string
for (int i = 0; i < str.length(); ++i)
{
if (str.at(i) != '.' && str.at(i) != '!' && str.at(i) != '?' && str.at(i) != ',' && str.at(i) != ';' && str.at(i) != ':' && str.at(i) != '\"' && str.at(i) != '\n')//only add if its a letter
buffer += str.at(i);
}
str = buffer;
}
void Word::stemming()
{
Porter2Stemmer::trim(str);
Porter2Stemmer::stem(str);
}
string& Word::getStr() { return str; }
vector<Document>& Word::getDocs() { return docs; }
void Word::printDocs()
{
for (int i = 0; i < docs.size(); i++)
{
// cout << docs.at(i).getID() << endl;
cout << docs.at(i).getPath() << endl;
}
cout << endl;
}
void Word::incrFreq(Document& doc)
{
for (int i = 0; i < docs.size(); i++){
if (docs.at(i) == doc){//found doc
frequency.at(i)++;//increment corresponding freq for existing doc
total++;
return;
}
}
//if function didn't return, no doc was found
docs.push_back(doc);//add new doc to word's index
frequency.push_back(1);//frequency that corresponds to the current doc
total++;
}
int Word::getDocFreq(Document& doc)
{
for (int i = 0; i < docs.size(); i++)
{
if (docs.at(i) == doc){
return frequency.at(i);//return corresponding freq. for given doc
}
}
cout << "Doc not found." << endl;
}
vector<int>& Word::getFrequency() { return frequency; }
int Word::getTotal() { return total; }
std::ostream& operator<< (std::ostream& out, const Word& w)
{
out << w.str;
return out;
}