forked from SMUCSE2341/22s-final-project-karinashin
-
Notifications
You must be signed in to change notification settings - Fork 0
/
DocParser.h
44 lines (35 loc) · 1 KB
/
DocParser.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
//
// Created by Karina Shin on 4/9/2022.
//
#ifndef INC_22S_FINAL_PROJ_DOCPARSER_H
#define INC_22S_FINAL_PROJ_DOCPARSER_H
#include <iostream>
#include <fstream>
#include <dirent.h>
#include <filesystem>
#include <vector>
#include <map>
#include <sys/stat.h>
#include "DSAVLTree.h"
#include "Word.h"
#include "include/rapidjson/document.h"
#include "StopWord.h"
using namespace std;
namespace fs = std::filesystem;
class DocParser {
private:
DSAVLTree<Word> words;
DSAVLTree<Word> orgs;
DSAVLTree<Word> people;
int numDocs = 0;
public:
DocParser();
void parse(const string& filename, StopWord& stop);//parse the documents for unique words
void getFiles(const string& directory, StopWord& stop);//returns filenames for traversal through directory
void persistenceIndex();//read in persistence file to index words
DSAVLTree<Word>& getWordTree();
DSAVLTree<Word>& getOrgTree();
DSAVLTree<Word>& getPersonTree();
int getNumDocs();
};
#endif //INC_22S_FINAL_PROJ_DOCPARSER_H