From 4ccacc85dc413668b13e2fdc994218f109710bf0 Mon Sep 17 00:00:00 2001 From: karinashin Date: Mon, 18 Apr 2022 17:12:51 -0500 Subject: [PATCH] trying to parse larger dataset --- DocParser.cpp | 2 +- main.cpp | 47 ++--------------------------------------------- 2 files changed, 3 insertions(+), 46 deletions(-) diff --git a/DocParser.cpp b/DocParser.cpp index 033c81b..35a2b26 100644 --- a/DocParser.cpp +++ b/DocParser.cpp @@ -33,7 +33,7 @@ void DocParser::parse(const string& filename) { stream.close(); doc.Parse(wholeFile.c_str()); - if (!doc.IsObject()) cout << "somethings wrong" << endl; +// if (!doc.IsObject()) cout << "somethings wrong" << endl; //make Document object for current file // string title = doc["title"].GetString(); TODO add back later diff --git a/main.cpp b/main.cpp index 3a49e08..b6ba11e 100644 --- a/main.cpp +++ b/main.cpp @@ -14,51 +14,8 @@ int main(int argc, char** argv) { cout << "done!" << endl; // cout << parse.getWordTree().getRoot()->getData().getStr() << endl; // cout << parse.getWordTree().getCount(); - Word w("investors"); +// Word w("investors"); + Word w(argv[2]); w.stemming(); -// if (parse.getWordTree().contains(w)){ -// cout << "true" << endl; -// parse.getWordTree().find(parse.getWordTree().getRoot(), w).printDocs(); -// } parse.getWordTree().find(parse.getWordTree().getRoot(), w).printDocs(); - - //data folder took 1:45 4/17 -// DSAVLTree tree; -// Word a("a"); -// Word z("z"); -// Word b("b"); -// tree.insert(a); -// tree.insert(z); -// tree.insert(b); -// Word check("b"); -// if (tree.contains(check)) -// cout << "true"; - -// rapidjson::Document doc; -// ifstream stream; -// stream.open("blogs_0000001.json"); -// if (stream.is_open()) -// cout << "open" << endl; -// -// string wholeFile; -// string temp; -// while (getline(stream, temp))//not reading anything -// { -// cout << temp << endl; -// wholeFile += temp; -// } -// stream.close(); -// cout << wholeFile << endl; -// -// doc.Parse(wholeFile.c_str()); -// if (doc.IsObject()) cout << "ITS AN OBJECT" << endl; -// -//// string notArr = doc["root"]["entities"]["persons"][0]["name"].GetString(); -// string notArr = doc["url"].GetString(); -// cout << "notArr: " << notArr << endl; -// -// doc["entities"]["persons"].IsObject(); -// doc["entities"]["persons"].IsArray(); -// for (auto& v : doc["entities"]["persons"].GetArray()) -// cout << "Value: " << v["name"].GetString() << endl; }