Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handling unrecoverable ABRT signals in HDF5 Library #126

Merged
merged 8 commits into from
Nov 8, 2024
7 changes: 7 additions & 0 deletions api/bag_dataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <regex>
#include <string>
#include <memory>
#include <csignal>


namespace BAG {
Expand Down Expand Up @@ -1055,6 +1056,11 @@ std::tuple<double, double> Dataset::gridToGeo(
return {x, y};
}

void handleAbrt(int signum) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this runs into some problems that @selimnairb and I talked about during the last meeting, I'm working on some code now to handle it "safely" within the specification. The worry is that using the std::cerr and streams allocates, which isn't well defined behavior within an abort handler. I wonder if there's a way that we can tell aborts apart from segfaults?

std::cerr << "\nUnrecoverable HDF5 Error \n";
exit(signum);
}

//! Read an existing BAG.
/*!
\param fileName
Expand All @@ -1066,6 +1072,7 @@ void Dataset::readDataset(
const std::string& fileName,
OpenMode openMode)
{
signal(SIGABRT, handleAbrt);
m_pH5file = std::unique_ptr<::H5::H5File, DeleteH5File>(new ::H5::H5File{
fileName.c_str(),
(openMode == BAG_OPEN_READONLY) ? H5F_ACC_RDONLY : H5F_ACC_RDWR},
Expand Down
17 changes: 15 additions & 2 deletions api/bag_metadata_import.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -859,6 +859,7 @@ bool decodeDataQualityInfo(
else if (schemaVersion == 2)
{
//gmd:DQ_DataQuality/gmd:scope/gmd:DQ_Scope/gmd:level

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be better here to change BagDataQuality::scope to be a std::string instead of a char*? It would mean we wouldn't have to use delete[] manually everywhere, which might also help with bugs

delete[] dataQualityInfo.scope;
dataQualityInfo.scope = getContentsAsCharStar(node,
"gmd:DQ_DataQuality/gmd:scope/gmd:DQ_Scope/gmd:level/gmd:MD_ScopeCode");

Expand Down Expand Up @@ -986,6 +987,7 @@ bool decodeSpatialRepresentationInfo(
"gmd:MD_Georectified/gmd:axisDimensionProperties/gmd:MD_Dimension/gmd:dimensionName/gmd:MD_DimensionNameTypeCode[@codeListValue='column']/parent::*/parent::*/gmd:resolution/gco:Measure", "uom");

//gmd:MD_Georectified/gmd:cellGeometry
delete[] spatialRepresentationInfo.cellGeometry;
spatialRepresentationInfo.cellGeometry = getContentsAsCharStar(node,
"gmd:MD_Georectified/gmd:cellGeometry/gmd:MD_CellGeometryCode");

Expand Down Expand Up @@ -1660,18 +1662,22 @@ BagError bagImportMetadataFromXmlV2(
return BAG_METADTA_NOT_INITIALIZED;

//gmd:fileIdentifier
delete[] metadata.fileIdentifier;
metadata.fileIdentifier = getContentsAsCharStar(*pRoot,
"/gmi:MI_Metadata/gmd:fileIdentifier/gco:CharacterString");

//gmd:language
delete[] metadata.language;
metadata.language = getContentsAsCharStar(*pRoot,
"/gmi:MI_Metadata/gmd:language/gmd:LanguageCode");

//gmd:characterSet
delete[] metadata.characterSet;
metadata.characterSet = getContentsAsCharStar(*pRoot,
"/gmi:MI_Metadata/gmd:characterSet/gmd:MD_CharacterSetCode");

//gmd:hierarchyLevel
delete[] metadata.hierarchyLevel;
metadata.hierarchyLevel = getContentsAsCharStar(*pRoot,
"/gmi:MI_Metadata/gmd:hierarchyLevel/gmd:MD_ScopeCode");

Expand All @@ -1686,14 +1692,17 @@ BagError bagImportMetadataFromXmlV2(
}

//gmd:dateStamp
delete[] metadata.dateStamp;
metadata.dateStamp = getContentsAsCharStar(*pRoot,
"/gmi:MI_Metadata/gmd:dateStamp/gco:Date");

//gmd:metadataStandardName
delete[] metadata.metadataStandardName;
metadata.metadataStandardName = getContentsAsCharStar(*pRoot,
"/gmi:MI_Metadata/gmd:metadataStandardName/gco:CharacterString");

//gmd:metadataStandardVersion
delete[] metadata.metadataStandardVersion;
metadata.metadataStandardVersion = getContentsAsCharStar(*pRoot,
"/gmi:MI_Metadata/gmd:metadataStandardVersion/gco:CharacterString");

Expand Down Expand Up @@ -1849,7 +1858,9 @@ BagError bagImportMetadataFromXmlBuffer(
if (!pDocument)
return BAG_METADTA_NOT_INITIALIZED;

return bagImportMetadataFromXml(*pDocument, metadata, doValidation);
const BagError err = bagImportMetadataFromXml(*pDocument, metadata, doValidation);
xmlFreeDoc(pDocument);
return err;
}

//************************************************************************
Expand Down Expand Up @@ -1880,7 +1891,9 @@ BagError bagImportMetadataFromXmlFile(
if (!pDocument)
return BAG_METADTA_NOT_INITIALIZED;

return bagImportMetadataFromXml(*pDocument, metadata, doValidation);
const BagError err = bagImportMetadataFromXml(*pDocument, metadata, doValidation);
xmlFreeDoc(pDocument);
return err;
}

} // namespace BAG
Expand Down
1 change: 1 addition & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ set(examples
bag_read
bag_vr_create
bag_vr_read
driver
)

foreach(example ${examples})
Expand Down
43 changes: 43 additions & 0 deletions examples/driver.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#include <cstring>
#include <iostream>
#include <fstream>
#include <string>
#include <ios>
#include <vector>
#include <stdint.h>
#include <stddef.h>
#include <unistd.h>
#include <cassert>
#include "extended_fuzzer.cpp"

int main(int argc, char** argv)//argv, which is an array of pointers to strings representing command-line arguments
// argc, which is an integer representing the number of command-line arguments
{
assert(argc == 2);
std::string filename = argv[1]; //initialize a string variable named filename with the value of the first command-line argument

// https://stackoverflow.com/questions/7880/how-do-you-open-a-file-in-c
std::ifstream input(filename, std::ios::binary);
//create an input file stream object named input and associates it with the file whose name is stored in the filename variable

std::vector<uint8_t> bytes;// vector used to store the bytes read from the input file

uint8_t byte;
while (input >> byte) //reads bytes from the input file stream input
// The loop continues until there are no more bytes to read from the file
{
bytes.push_back(byte);//each byte read from the file is appended to the bytes vector using the push_back method
}

uint8_t *array = &bytes[0];//array that points to the memory address of the first element of the bytes vector
//BAG uses this
size_t len = bytes.size();

std::cout << "got " << len << " bytes" << std::endl;

//char* filename_cstr = new char[filename.length() + 1];

//strcpy(filename_cstr, filename.c_str());

LLVMFuzzerTestOneInputByFile(filename.c_str());
}
118 changes: 118 additions & 0 deletions examples/extended_fuzzer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#include <iostream>
#include <iomanip>

#include <stddef.h>
#include <stdint.h>
#include <unistd.h>

#include "bag_dataset.h"

using BAG::Dataset;


void printLayerDescriptor(
const BAG::LayerDescriptor& descriptor)
{
std::cout << "\t\tchunkSize == " << descriptor.getChunkSize() << '\n';
std::cout << "\t\tcompression level == " << descriptor.getCompressionLevel() << '\n';
std::cout << "\t\tdata type == " << descriptor.getDataType() << '\n';
std::cout << "\t\telement size == " << +descriptor.getElementSize() << '\n';
std::cout << "\t\tinternalPath == " << descriptor.getInternalPath() << '\n';
std::cout << "\t\tlayer type == " << descriptor.getLayerType() << '\n';

const auto minMax = descriptor.getMinMax();
std::cout << "\t\tmin max == (" << std::get<0>(minMax) << ", " <<
std::get<1>(minMax) << ")\n";
}

extern "C" int LLVMFuzzerTestOneInputByFile(const char* filename) {


auto pDataset = Dataset::open(filename, BAG_OPEN_READONLY);
if (pDataset == NULL) {
return EXIT_FAILURE;
}

const auto& descriptor = pDataset->getDescriptor();
uint64_t numRows = 0, numCols = 0;
std::tie(numRows, numCols) = descriptor.getDims();
std::cout << "\trows, columns == " << numRows << ", " << numCols << '\n';

double minX = 0., minY = 0., maxX = 0., maxY = 0.;
std::tie(minX, minY) = pDataset->gridToGeo(0, 0);
std::tie(maxX, maxY) = pDataset->gridToGeo(numRows - 1, numCols - 1);

std::cout << "\tgrid cover (llx, lly), (urx, ury) == (" <<
std::setprecision(10) << minX << ", " << minY << "), (" << maxX <<
", " << maxY << ")\n";

const auto& dims = descriptor.getDims();
std::cout << "\tdims == (" << std::get<0>(dims) << ", " <<
std::get<1>(dims) << ")\n";

const auto& gridSpacing = descriptor.getGridSpacing();
std::cout << "\tgrid spacing == (" << std::get<0>(gridSpacing) << ", " <<
std::get<1>(gridSpacing) << ")\n";

const auto& origin = descriptor.getOrigin();
std::cout << "\torigin == (" << std::get<0>(origin) << ", "
<< std::get<1>(origin) << ")\n";

const auto& projCover = descriptor.getProjectedCover();
std::cout << "\tprojected cover (llx, lly), (urx, ury) == (" << std::get<0>(projCover) << ", " <<
std::get<1>(projCover) << "), (" << std::get<2>(projCover) << ", " <<
std::get<3>(projCover) << ")\n";

std::cout << "\tversion == " << descriptor.getVersion() << '\n';

std::cout << "\thorizontal reference system ==\n" <<
descriptor.getHorizontalReferenceSystem() << "\n\n";

std::cout << "\tvertical reference system ==\n" << descriptor.getVerticalReferenceSystem() << '\n';

std::cout << "\nLayers:\n";

for (const auto& layer : pDataset->getLayers())
{
auto pDescriptor = layer->getDescriptor();

std::cout << "\t" << pDescriptor->getName() << " Layer .. id(" <<
pDescriptor->getId() << ")\n";

printLayerDescriptor(*pDescriptor);
}

const auto& trackingList = pDataset->getTrackingList();
std::cout << "\nTracking List: (" << trackingList.size() << " items)\n";

size_t itemNum = 0;
for (const auto& item : trackingList)
{
std::cout << "\tTracking list item #" << itemNum++ << '\n';
std::cout << "\t\trow == " << item.row << '\n';
std::cout << "\t\tcol == " << item.col << '\n';
std::cout << "\t\tdepth == " << item.depth << '\n';
std::cout << "\t\tuncertainty == " << item.uncertainty << '\n';
std::cout << "\t\ttrack_code == " << item.track_code << '\n';
std::cout << "\t\tlist_series == " << item.list_series << '\n';
}

pDataset->close();

return EXIT_SUCCESS;
}

extern "C" int LLVMFuzzerTestOneInput(const uint8_t *buf, size_t len) {
char filename[256];
snprintf(filename, 255, "/tmp/libfuzzer.%d", getpid());

// Save input file to temporary file so that we can read it.
FILE *fp = fopen(filename, "wb");
if (!fp) {
return 0;
}
fwrite(buf, len, 1, fp);
fclose(fp);

return LLVMFuzzerTestOneInputByFile(filename);
}
Loading