Skip to content

Commit

Permalink
Merge pull request #5 from Arkorri/version-1.1
Browse files Browse the repository at this point in the history
Version 1.1
  • Loading branch information
Arkorri authored Oct 20, 2020
2 parents 2aabffd + 1178f4e commit 492e23b
Show file tree
Hide file tree
Showing 6 changed files with 132 additions and 29 deletions.
26 changes: 17 additions & 9 deletions MLdigits/src/DataHandler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,11 @@ bool dataHandler::read_feature_vector(std::string path){
if(file){
for(unsigned int i = 0; i < 4; i++){
if(fread(bytes, sizeof(bytes), 1, file)){
header[i] = convert_to_little_endian(bytes);
if(isIntel){
header[i] = convert_to_little_endian(bytes);
} else {
header[i] = (uint32_t)bytes;
}//if/else
}//if
}//for
std::cout << "Finished getting data file header" << std::endl;
Expand All @@ -87,7 +91,6 @@ bool dataHandler::read_feature_vector(std::string path){
image->append_to_vector(element[0]);
} else {
std::cerr << "Error reading from file in dataHandler.read_feature_vector()" << std::endl;
this->unload();
return false;
}//if/else
}//for
Expand All @@ -96,7 +99,6 @@ bool dataHandler::read_feature_vector(std::string path){
std::cout << "Successfully read file and stored data points. " << this->data_array->size() << std::endl;
} else {
std::cerr << "Unable to read file in dataHandler.read_feature_vector()" << std::endl;
this->unload();
return false;
}//if/else
return true;
Expand All @@ -109,7 +111,11 @@ uint32_t header[2];// |MAGIC|NUM IMAGES|
if(file){
for(unsigned int i = 0; i < 2; i++){
if(fread(bytes, sizeof(bytes), 1, file)){
header[i] = convert_to_little_endian(bytes);
if(isIntel){
header[i] = convert_to_little_endian(bytes);
} else {
header[i] = (uint32_t)bytes;
}//if/else
}//if
}//for
std::cout << "Finished getting label file header" << std::endl;
Expand All @@ -119,14 +125,12 @@ uint32_t header[2];// |MAGIC|NUM IMAGES|
this->data_array->at(i)->set_lable(element[0]);
} else {
std::cerr << "Error reading from file in dataHandler.read_feature_labels()" << std::endl;
this->unload();
return false;
}//if/else
}//for
std::cout << "Successfully read file and stored labels. " << std::endl;
} else {
std::cerr << "Unable to read file in dataHandler.read_feature_labels()" << std::endl;
this->unload();
return false;
}//if/else
return true;
Expand Down Expand Up @@ -220,14 +224,14 @@ bool dataHandler::setFeaturePath(void){
std::string extension = ".idx3-ubyte";
std::string path = "";
do{
path = getString(64);
path = getString(1,64);
if(path.length() > extension.length()){
if(0 == path.compare(path.length() - extension.length(), extension.length(), extension)){
this->featurePath = path;
return true;
} else {
std::cerr << "incorrect file type, please try again (must end in " << extension << ")" << std::endl;
}//if/e;se
}//if/else
}else {
std::cerr << "incorrect file type, please try again (must end in " << extension << ")" << std::endl;
}//if/else
Expand All @@ -244,7 +248,7 @@ bool dataHandler::setLabelPath(void){
std::string extension = ".idx1-ubyte";
std::string path = "";
do{
path = getString(64);
path = getString(1,64);
if(path.length() > extension.length()){
if(0 == path.compare(path.length() - extension.length(), extension.length(), extension)){
this->labelPath = path;
Expand All @@ -264,3 +268,7 @@ bool dataHandler::setLabelPath(std::string path){//overloaded for default
return true;
}//setLabelPath

void dataHandler::setIsIntel(bool x){
this->isIntel = x;
}//setIsIntel

7 changes: 7 additions & 0 deletions MLdigits/src/DataHandler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,12 @@ class dataHandler {
// -returns true if successful, false if not
bool setLabelPath(std::string);

//setIsIntel
//Desciption: sets weather the system is using intel or not
//Preconditions: -none
//Postconditions: -isIntel is set as true or false
void setIsIntel(bool);

private:
std::vector<data *> *data_array; //array holding all data from the file
std::vector<data *> *training_data; //array holding a portion of data_array
Expand All @@ -181,6 +187,7 @@ class dataHandler {
std::string featurePath; //directory path for the file holding class data
std::string labelPath; //directory path for the file holding class names
bool loaded; //keeps track of if the class is holding data
bool isIntel; //keeps track of if the system is using an Intel processor

//used for data splits
//double represents a % of data to be used for each set of data
Expand Down
2 changes: 2 additions & 0 deletions MLdigits/src/MLdigits.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ int main() {
// unsigned int best_k = 1;

do{
std::cout << "Is your system using an Intel processor?" << std::endl;
dh->setIsIntel(yn());
std::cout << "Do you want to use default data or use custom data?" << std::endl;
std::cout << "1. Default Data (recommended)" << std::endl;
std::cout << "2. Custom Data" << std::endl;
Expand Down
71 changes: 60 additions & 11 deletions MLdigits/src/Utilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,46 @@

#include "Utilities.hpp"
#include "climits"
#include <cstring>

std::string getString(unsigned int maxLength){
//std::string getString(unsigned int maxLength){
// std::string str = "";
// do{
// std::cout << "Please enter a string less than " << std::to_string(maxLength) << " characters" << std::endl;
// std::getline(std::cin, str);
// if(str.length() <= maxLength){
// return str;
// } else {
// std::cout << "Invalid value, please try again" << std::endl;
// }//if/else
// }while(str.length() > maxLength);//do-while
// return "";
//}//getString

void clearCin(void){
std::cin.clear();
std::cin.ignore(INT_MAX, '\n');
}//clearCin

std::string getString(unsigned int min, unsigned int max) {
std::string str = "";
do{
std::cout << "Please enter a string less than " << std::to_string(maxLength) << " characters" << std::endl;
std::cout << "Please enter a string between " << std::to_string(min) <<
" and " << std::to_string(max) << std::endl;
std::getline(std::cin, str);
if(str.length() <= maxLength){
return str;
} else {
if (str.length() < min || str.length() > max) {
std::cout << "Invalid value, please try again" << std::endl;
} else {
const char *Whitespace = " \t\v\r\n"; //checks for, and removes leading/trailing whitespace
size_t start = str.find_first_not_of(Whitespace); //..
size_t end = str.find_last_not_of(Whitespace); //..
return start == end ? //..
std::string() : str.substr(start, end - start + 1); //..
}//if/else
}while(str.length() > maxLength);//do-while
}while(str.length() < min || str.length() > max);//do-while
return "";
}//getString

void clearCin(void){
std::cin.clear();
std::cin.ignore(INT_MAX, '\n');
}//clearCin

int getInt(unsigned int min, unsigned int max){
unsigned int num = 0;
do{
Expand All @@ -44,3 +64,32 @@ int getInt(unsigned int min, unsigned int max){
return -1;
}//getInt

char getChar(void) {
char c_str[32];
do{
std::cin.getline(c_str, INT_MAX);
if (strlen(c_str) > 1) {
std::cout << "Invalid value, please enter a single character" << std::endl;
} else {
return c_str[0];
}//if/else
}while(strlen(c_str) > 1);//do-while
return '0';
}//getChar

bool yn(void) {
char choice;
do{
std::cout << "(y/n)" << std::endl;
choice = getChar();
if (toupper(choice) == 'Y') {
return true;
} else if (toupper(choice) == 'N') {
return false;
} else {
std::cout << "Invalid input, please try again (y/n)" << std::endl;
}//if/elif/else
}while(toupper(choice) != 'Y' && toupper(choice) != 'N');//do-while
return false;//should never reach
}//yn

35 changes: 34 additions & 1 deletion MLdigits/src/Utilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
#include <iostream>
#include <string>

//function pre-fix template
//Desciption:
//Preconditions:
//Postconditions:

//error struct for try/catch blocks
struct error{
int errorCode;
Expand All @@ -25,7 +30,19 @@
//Preconditions: -maxLength is positive
//Postconditions: -a string is returned
// -returns "" as default
std::string getString(unsigned int maxLength);
//std::string getString(unsigned int maxLength);

//getString
//Description: gets a string from the user
// that's length is between a min
// and max value. Continues to ask
// until value is valid
//Preconditions: -max is positive
// -min is positive
//Postconditions: -a string is returned
// -returns "" as default
// -leading/trailing whitespace removed
std::string getString(unsigned int min, unsigned int max);

//getInt
//Description: gets an integer from the user between
Expand All @@ -42,4 +59,20 @@
//Postconditions: -cin is empty
void clearCin(void);

//getChar
//Desciption: gets and returns a single character
// input from the user
//Preconditions: -none
//Postconditions: -returns a char
// -returns '0' as default
char getChar(void);

//function yn
//Desciption: gets and returns a char of 'y' or 'n'
// from the user
//Preconditions: -none
//Postconditions: -returns true if y
// -false if n
bool yn(void);

#endif /* UTILITIES_HPP_ */
20 changes: 12 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ Intel(R) Core(TM) i7-10710U CPU @ 1.10GHz, 1608 Mhz, 6 Core(s), 12 Logical Proce
-If you are using eclipse, the zip file can be **imported** as an **existing project**

## How to use the provided main
**1. On program start it will ask if you want to use custom data, or provided MNIST data**\
**1. On program start it will ask if you are using an Intel processor, reply with a 'y' or 'n'**\
\
**2. It will then ask if you want to use custom data, or provided MNIST data**\
-It is highly recommended to use the provided data\
-If you want to use custom data, when the prompts you for the path you must input the full path starting from the directory in which the .exe is called\
-If you want to use custom data, when prompted for the file path you must input the full path starting from the directory in which the .exe is called\
-Custom file(s) must also have the extension idx3-ubyte (for image files) or idx1-ubyte (for label files)\
\
**2. Input the desired K value when prompted**\
Expand All @@ -30,24 +32,26 @@ Intel(R) Core(TM) i7-10710U CPU @ 1.10GHz, 1608 Mhz, 6 Core(s), 12 Logical Proce
\
**1. Create a new DataHandler object**\
\
**2. Set the file paths using the DataHandler setLabelPath(string) and setFeaturePath(string) function**\
**2. Use the DataHandler setIsIntel(bool) function to say whether or not an Intel processor is being used**\
\
**3. Set the file paths using the DataHandler setLabelPath(string) and setFeaturePath(string) function**\
-Paths must be defined from the location in which the program was launched\
-i.e. if launching from eclipse with the files in the project folder ("./filename")\
\
**3. Load the data with the DataHandler load() function**\
**4. Load the data with the DataHandler load() function**\
-This will return true if the load was successful, or false if it failed\
-If the load failed, the class will automatically unload data\
\
**4. Create a new Knn object**\
**5. Create a new Knn object**\
-This can be done with the default constructor, or can be given a k value by passing an integer\
-If the default constructor was called you must first call Knn->set_k(int) before step 5\
\
**5. Call the Knn .load(DataHandler) function by passing it a pointer to the previously created DataHandler object**\
**6. Call the Knn .load(DataHandler) function by passing it a pointer to the previously created DataHandler object**\
-This will return true if successfull, or false if the load failed\
\
**6. You can now call the Knn validate_performance() and test_performance() functions**\
**7. You can now call the Knn validate_performance() and test_performance() functions**\
-These will run the algorithm and print results to the console.\
-After they finish they will return a double containing the overall performance as a percent\
-These functions take a long time to complete\
\
**7. If you wish to input new data into the classes you must call the unload() function for each object first**
**8. If you wish to input new data into the classes you must call the unload() function for each object first**

0 comments on commit 492e23b

Please sign in to comment.