Skip to content

Commit

Permalink
Merge pull request #4478 from BSBandme/proto2_to_proto3_tools
Browse files Browse the repository at this point in the history
Add gogo benchmark
  • Loading branch information
BSBandme authored Apr 10, 2018
2 parents 5fb73f8 + e3af023 commit 320d56c
Show file tree
Hide file tree
Showing 6 changed files with 584 additions and 53 deletions.
286 changes: 235 additions & 51 deletions benchmarks/Makefile.am

Large diffs are not rendered by default.

Empty file added benchmarks/python/__init__.py
Empty file.
6 changes: 4 additions & 2 deletions benchmarks/python/py_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,9 @@ def run_benchmark(self, test_method_args='', setup_method_args=''):
print "Message %s of dataset file %s" % \
(result["message_name"], result["filename"])
print "Average time for parse_from_benchmark: %.2f ns" % \
(result["benchmarks"]["parse_from_benchmark"])
(result["benchmarks"][ \
args.behavior_prefix + "_parse_from_benchmark"])
print "Average time for serialize_to_benchmark: %.2f ns" % \
(result["benchmarks"]["serialize_to_benchmark"])
(result["benchmarks"][ \
args.behavior_prefix + "_serialize_to_benchmark"])
print ""
105 changes: 105 additions & 0 deletions benchmarks/util/gogo_data_scrubber.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#include "benchmarks.pb.h"
#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h"
#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h"
#include "datasets/google_message2/benchmark_message2.pb.h"
#include "datasets/google_message3/benchmark_message3.pb.h"
#include "datasets/google_message4/benchmark_message4.pb.h"

#include "google/protobuf/message.h"
#include "google/protobuf/descriptor.h"

#include <fstream>

using google::protobuf::FieldDescriptor;
using google::protobuf::Message;
using google::protobuf::Reflection;


class DataGroupStripper {
public:
static void StripMessage(Message *message) {
std::vector<const FieldDescriptor*> set_fields;
const Reflection* reflection = message->GetReflection();
reflection->ListFields(*message, &set_fields);

for (size_t i = 0; i < set_fields.size(); i++) {
const FieldDescriptor* field = set_fields[i];
if (field->type() == FieldDescriptor::TYPE_GROUP) {
reflection->ClearField(message, field);
}
if (field->type() == FieldDescriptor::TYPE_MESSAGE) {
if (field->is_repeated()) {
for (int j = 0; j < reflection->FieldSize(*message, field); j++) {
StripMessage(reflection->MutableRepeatedMessage(message, field, j));
}
} else {
StripMessage(reflection->MutableMessage(message, field));
}
}
}

reflection->MutableUnknownFields(message)->Clear();
}
};

std::string ReadFile(const std::string& name) {
std::ifstream file(name.c_str());
GOOGLE_CHECK(file.is_open()) << "Couldn't find file '"
<< name
<< "', please make sure you are running this command from the benchmarks"
<< " directory.\n";
return std::string((std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());
}

int main(int argc, char *argv[]) {
if (argc % 2 == 0 || argc == 1) {
std::cerr << "Usage: [input_files] [output_file_names] where " <<
"input_files are one to one mapping to output_file_names." <<
std::endl;
return 1;
}

for (int i = argc / 2; i > 0; i--) {
const std::string &input_file = argv[i];
const std::string &output_file = argv[i + argc / 2];

std::cerr << "Generating " << input_file
<< " to " << output_file << std::endl;
benchmarks::BenchmarkDataset dataset;
Message* message;
std::string dataset_payload = ReadFile(input_file);
GOOGLE_CHECK(dataset.ParseFromString(dataset_payload))
<< "Can' t parse data file " << input_file;

if (dataset.message_name() == "benchmarks.proto3.GoogleMessage1") {
message = new benchmarks::proto3::GoogleMessage1;
} else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage1") {
message = new benchmarks::proto2::GoogleMessage1;
} else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage2") {
message = new benchmarks::proto2::GoogleMessage2;
} else if (dataset.message_name() ==
"benchmarks.google_message3.GoogleMessage3") {
message = new benchmarks::google_message3::GoogleMessage3;
} else if (dataset.message_name() ==
"benchmarks.google_message4.GoogleMessage4") {
message = new benchmarks::google_message4::GoogleMessage4;
} else {
std::cerr << "Unknown message type: " << dataset.message_name();
exit(1);
}

for (int i = 0; i < dataset.payload_size(); i++) {
message->ParseFromString(dataset.payload(i));
DataGroupStripper::StripMessage(message);
dataset.set_payload(i, message->SerializeAsString());
}

std::ofstream ofs(output_file);
ofs << dataset.SerializeAsString();
ofs.close();
}


return 0;
}
103 changes: 103 additions & 0 deletions benchmarks/util/protoc-gen-gogoproto.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#include "google/protobuf/compiler/code_generator.h"
#include "google/protobuf/io/zero_copy_stream.h"
#include "google/protobuf/io/printer.h"
#include "google/protobuf/descriptor.h"
#include "google/protobuf/descriptor.pb.h"
#include "schema_proto2_to_proto3_util.h"

#include "google/protobuf/compiler/plugin.h"

using google::protobuf::FileDescriptorProto;
using google::protobuf::FileDescriptor;
using google::protobuf::DescriptorPool;
using google::protobuf::io::Printer;
using google::protobuf::util::SchemaGroupStripper;
using google::protobuf::util::SchemaAddZeroEnumValue;

namespace google {
namespace protobuf {
namespace compiler {

namespace {

string StripProto(string filename) {
if (filename.substr(filename.size() - 11) == ".protodevel") {
// .protodevel
return filename.substr(0, filename.size() - 11);
} else {
// .proto
return filename.substr(0, filename.size() - 6);
}
}

DescriptorPool new_pool_;

} // namespace

class GoGoProtoGenerator : public CodeGenerator {
public:
virtual bool GenerateAll(const std::vector<const FileDescriptor*>& files,
const string& parameter,
GeneratorContext* context,
string* error) const {
for (int i = 0; i < files.size(); i++) {
for (auto file : files) {
bool can_generate =
(new_pool_.FindFileByName(file->name()) == nullptr);
for (int j = 0; j < file->dependency_count(); j++) {
can_generate &= (new_pool_.FindFileByName(
file->dependency(j)->name()) != nullptr);
}
for (int j = 0; j < file->public_dependency_count(); j++) {
can_generate &= (new_pool_.FindFileByName(
file->public_dependency(j)->name()) != nullptr);
}
for (int j = 0; j < file->weak_dependency_count(); j++) {
can_generate &= (new_pool_.FindFileByName(
file->weak_dependency(j)->name()) != nullptr);
}
if (can_generate) {
Generate(file, parameter, context, error);
break;
}
}
}

return true;
}

virtual bool Generate(const FileDescriptor* file,
const string& parameter,
GeneratorContext* context,
string* error) const {
FileDescriptorProto new_file;
file->CopyTo(&new_file);
SchemaGroupStripper::StripFile(file, &new_file);

SchemaAddZeroEnumValue enum_scrubber;
enum_scrubber.ScrubFile(&new_file);

string filename = file->name();
string basename = StripProto(filename);

std::vector<std::pair<string,string>> option_pairs;
ParseGeneratorParameter(parameter, &option_pairs);

std::unique_ptr<google::protobuf::io::ZeroCopyOutputStream> output(
context->Open(basename + ".proto"));
string content = new_pool_.BuildFile(new_file)->DebugString();
Printer printer(output.get(), '$');
printer.WriteRaw(content.c_str(), content.size());

return true;
}
};

} // namespace compiler
} // namespace protobuf
} // namespace google

int main(int argc, char* argv[]) {
google::protobuf::compiler::GoGoProtoGenerator generator;
return google::protobuf::compiler::PluginMain(argc, argv, &generator);
}
137 changes: 137 additions & 0 deletions benchmarks/util/schema_proto2_to_proto3_util.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#ifndef PROTOBUF_BENCHMARKS_UTIL_SCHEMA_PROTO2_TO_PROTO3_UTIL_H_
#define PROTOBUF_BENCHMARKS_UTIL_SCHEMA_PROTO2_TO_PROTO3_UTIL_H_

#include "google/protobuf/message.h"
#include "google/protobuf/descriptor.h"
#include "google/protobuf/descriptor.pb.h"

#include <sstream>
#include <algorithm>

using google::protobuf::Descriptor;
using google::protobuf::DescriptorProto;
using google::protobuf::FileDescriptorProto;
using google::protobuf::FieldDescriptorProto;
using google::protobuf::Message;
using google::protobuf::EnumValueDescriptorProto;

namespace google {
namespace protobuf {
namespace util {

class SchemaGroupStripper {

public:
static void StripFile(const FileDescriptor* old_file,
FileDescriptorProto *file) {
for (int i = file->mutable_message_type()->size() - 1; i >= 0; i--) {
if (IsMessageSet(old_file->message_type(i))) {
file->mutable_message_type()->DeleteSubrange(i, 1);
continue;
}
StripMessage(old_file->message_type(i), file->mutable_message_type(i));
}
for (int i = file->mutable_extension()->size() - 1; i >= 0; i--) {
auto field = old_file->extension(i);
if (field->type() == FieldDescriptor::TYPE_GROUP ||
IsMessageSet(field->message_type()) ||
IsMessageSet(field->containing_type())) {
file->mutable_extension()->DeleteSubrange(i, 1);
}
}
}

private:
static bool IsMessageSet(const Descriptor *descriptor) {
if (descriptor != nullptr
&& descriptor->options().message_set_wire_format()) {
return true;
}
return false;
}

static void StripMessage(const Descriptor *old_message,
DescriptorProto *new_message) {
for (int i = new_message->mutable_field()->size() - 1; i >= 0; i--) {
if (old_message->field(i)->type() == FieldDescriptor::TYPE_GROUP ||
IsMessageSet(old_message->field(i)->message_type())) {
new_message->mutable_field()->DeleteSubrange(i, 1);
}
}
for (int i = new_message->mutable_extension()->size() - 1; i >= 0; i--) {
auto field_type_name = new_message->mutable_extension(i)->type_name();
if (old_message->extension(i)->type() == FieldDescriptor::TYPE_GROUP ||
IsMessageSet(old_message->extension(i)->containing_type()) ||
IsMessageSet(old_message->extension(i)->message_type())) {
new_message->mutable_extension()->DeleteSubrange(i, 1);
}
}
for (int i = 0; i < new_message->mutable_nested_type()->size(); i++) {
StripMessage(old_message->nested_type(i),
new_message->mutable_nested_type(i));
}
}

};

class SchemaAddZeroEnumValue {

public:
SchemaAddZeroEnumValue()
: total_added_(0) {
}

void ScrubFile(FileDescriptorProto *file) {
for (int i = 0; i < file->enum_type_size(); i++) {
ScrubEnum(file->mutable_enum_type(i));
}
for (int i = 0; i < file->mutable_message_type()->size(); i++) {
ScrubMessage(file->mutable_message_type(i));
}
}

private:
void ScrubEnum(EnumDescriptorProto *enum_type) {
if (enum_type->value(0).number() != 0) {
bool has_zero = false;
for (int j = 0; j < enum_type->value().size(); j++) {
if (enum_type->value(j).number() == 0) {
EnumValueDescriptorProto temp_enum_value;
temp_enum_value.CopyFrom(enum_type->value(j));
enum_type->mutable_value(j)->CopyFrom(enum_type->value(0));
enum_type->mutable_value(0)->CopyFrom(temp_enum_value);
has_zero = true;
break;
}
}
if (!has_zero) {
enum_type->mutable_value()->Add();
for (int i = enum_type->mutable_value()->size() - 1; i > 0; i--) {
enum_type->mutable_value(i)->CopyFrom(
*enum_type->mutable_value(i - 1));
}
enum_type->mutable_value(0)->set_number(0);
enum_type->mutable_value(0)->set_name("ADDED_ZERO_VALUE_" +
std::to_string(total_added_++));
}
}

}

void ScrubMessage(DescriptorProto *message_type) {
for (int i = 0; i < message_type->mutable_enum_type()->size(); i++) {
ScrubEnum(message_type->mutable_enum_type(i));
}
for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) {
ScrubMessage(message_type->mutable_nested_type(i));
}
}

int total_added_;
};

} // namespace util
} // namespace protobuf
} // namespace google

#endif // PROTOBUF_BENCHMARKS_UTIL_SCHEMA_PROTO2_TO_PROTO3_UTIL_H_

0 comments on commit 320d56c

Please sign in to comment.