From b17ec3ca11ed13cc0d984f6d8be112c246b1994d Mon Sep 17 00:00:00 2001 From: Feng Xiao Date: Sun, 23 Aug 2015 17:50:38 -0700 Subject: [PATCH] Down-integrate from internal code base. --- cmake/tests.cmake | 17 +++- java/pom.xml | 1 + .../java/com/google/protobuf/AnyTest.java | 92 +++++++++++++++++++ .../java/com/google/protobuf/any_test.proto | 42 +++++++++ python/google/protobuf/pyext/descriptor.cc | 66 +++++++++++-- .../google/protobuf/pyext/descriptor_pool.cc | 46 ++++++++-- .../google/protobuf/pyext/descriptor_pool.h | 15 +-- src/Makefile.am | 41 +++++++-- .../protobuf/compiler/cpp/cpp_helpers.cc | 88 ++++++++++++++++++ .../protobuf/compiler/cpp/cpp_helpers.h | 19 +++- .../protobuf/compiler/cpp/cpp_map_field.cc | 63 ++++++++++++- .../protobuf/compiler/cpp/cpp_string_field.cc | 84 +++++++---------- src/google/protobuf/compiler/parser.cc | 36 ++++++++ src/google/protobuf/lite_arena_unittest.cc | 83 +++++++++++++++++ src/google/protobuf/wire_format.cc | 56 +++++------ src/google/protobuf/wire_format.h | 19 ++-- src/google/protobuf/wire_format_lite.cc | 31 +++++++ src/google/protobuf/wire_format_lite.h | 10 ++ update_file_lists.sh | 4 + 19 files changed, 669 insertions(+), 144 deletions(-) create mode 100644 java/src/test/java/com/google/protobuf/AnyTest.java create mode 100644 java/src/test/java/com/google/protobuf/any_test.proto create mode 100644 src/google/protobuf/lite_arena_unittest.cc diff --git a/cmake/tests.cmake b/cmake/tests.cmake index 921510873734e..ef4dfe0221acb 100644 --- a/cmake/tests.cmake +++ b/cmake/tests.cmake @@ -99,6 +99,12 @@ set(common_test_files ${protobuf_source_dir}/src/google/protobuf/testing/googletest.cc ) +set(common_lite_test_files + ${protobuf_source_dir}/src/google/protobuf/arena_test_util.cc + ${protobuf_source_dir}/src/google/protobuf/map_lite_test_util.cc + ${protobuf_source_dir}/src/google/protobuf/test_util_lite.cc +) + set(tests_files ${protobuf_source_dir}/src/google/protobuf/any_test.cc ${protobuf_source_dir}/src/google/protobuf/arena_unittest.cc @@ -179,10 +185,13 @@ add_executable(test_plugin ${test_plugin_files}) target_link_libraries(test_plugin libprotoc libprotobuf gmock) set(lite_test_files - ${protobuf_source_dir}/src/google/protobuf/arena_test_util.cc ${protobuf_source_dir}/src/google/protobuf/lite_unittest.cc - ${protobuf_source_dir}/src/google/protobuf/map_lite_test_util.cc - ${protobuf_source_dir}/src/google/protobuf/test_util_lite.cc ) -add_executable(lite-test ${lite_test_files} ${lite_test_proto_files}) +add_executable(lite-test ${lite_test_files} ${common_lite_test_files} ${lite_test_proto_files}) target_link_libraries(lite-test libprotobuf-lite) + +set(lite_arena_test_files + ${protobuf_source_dir}/src/google/protobuf/lite_arena_unittest.cc +) +add_executable(lite-arena-test ${lite_arena_test_files} ${common_lite_test_files} ${lite_test_proto_files}) +target_link_libraries(lite-arena-test libprotobuf-lite gmock_main) diff --git a/java/pom.xml b/java/pom.xml index fb7e4168e95f7..49099b4ae8f7e 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -142,6 +142,7 @@ + diff --git a/java/src/test/java/com/google/protobuf/AnyTest.java b/java/src/test/java/com/google/protobuf/AnyTest.java new file mode 100644 index 0000000000000..e169f69d2453b --- /dev/null +++ b/java/src/test/java/com/google/protobuf/AnyTest.java @@ -0,0 +1,92 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package com.google.protobuf; + +import any_test.AnyTestProto.TestAny; +import protobuf_unittest.UnittestProto.TestAllTypes; + +import junit.framework.TestCase; + +/** + * Unit tests for Any message. + */ +public class AnyTest extends TestCase { + public void testAnyGeneratedApi() throws Exception { + TestAllTypes.Builder builder = TestAllTypes.newBuilder(); + TestUtil.setAllFields(builder); + TestAllTypes message = builder.build(); + + TestAny container = TestAny.newBuilder() + .setValue(Any.pack(message)).build(); + + assertTrue(container.getValue().is(TestAllTypes.class)); + assertFalse(container.getValue().is(TestAny.class)); + + TestAllTypes result = container.getValue().unpack(TestAllTypes.class); + TestUtil.assertAllFieldsSet(result); + + + // Unpacking to a wrong type will throw an exception. + try { + TestAny wrongMessage = container.getValue().unpack(TestAny.class); + fail("Exception is expected."); + } catch (InvalidProtocolBufferException e) { + // expected. + } + + // Test that unpacking throws an exception if parsing fails. + TestAny.Builder containerBuilder = container.toBuilder(); + containerBuilder.getValueBuilder().setValue( + ByteString.copyFrom(new byte[]{0x11})); + container = containerBuilder.build(); + try { + TestAllTypes parsingFailed = container.getValue().unpack(TestAllTypes.class); + fail("Exception is expected."); + } catch (InvalidProtocolBufferException e) { + // expected. + } + } + + public void testCachedUnpackResult() throws Exception { + TestAllTypes.Builder builder = TestAllTypes.newBuilder(); + TestUtil.setAllFields(builder); + TestAllTypes message = builder.build(); + + TestAny container = TestAny.newBuilder() + .setValue(Any.pack(message)).build(); + + assertTrue(container.getValue().is(TestAllTypes.class)); + + TestAllTypes result1 = container.getValue().unpack(TestAllTypes.class); + TestAllTypes result2 = container.getValue().unpack(TestAllTypes.class); + assertTrue(result1 == result2); + } +} diff --git a/java/src/test/java/com/google/protobuf/any_test.proto b/java/src/test/java/com/google/protobuf/any_test.proto new file mode 100644 index 0000000000000..80173d8a0e422 --- /dev/null +++ b/java/src/test/java/com/google/protobuf/any_test.proto @@ -0,0 +1,42 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +syntax = "proto3"; + +package any_test; + +option java_package = "any_test"; +option java_outer_classname = "AnyTestProto"; + +import "google/protobuf/any.proto"; + +message TestAny { + google.protobuf.Any value = 1; +} diff --git a/python/google/protobuf/pyext/descriptor.cc b/python/google/protobuf/pyext/descriptor.cc index 8581f529e2bb3..3806643f11483 100644 --- a/python/google/protobuf/pyext/descriptor.cc +++ b/python/google/protobuf/pyext/descriptor.cc @@ -62,6 +62,14 @@ namespace google { namespace protobuf { namespace python { +// Store interned descriptors, so that the same C++ descriptor yields the same +// Python object. Objects are not immortal: this map does not own the +// references, and items are deleted when the last reference to the object is +// released. +// This is enough to support the "is" operator on live objects. +// All descriptors are stored here. +hash_map interned_descriptors; + PyObject* PyString_FromCppString(const string& str) { return PyString_FromStringAndSize(str.c_str(), str.size()); } @@ -147,6 +155,24 @@ static int CheckCalledFromGeneratedFile(const char* attr_name) { // Helper functions for descriptor objects. +// A set of templates to retrieve the C++ FileDescriptor of any descriptor. +template +const FileDescriptor* GetFileDescriptor(const DescriptorClass* descriptor) { + return descriptor->file(); +} +template<> +const FileDescriptor* GetFileDescriptor(const FileDescriptor* descriptor) { + return descriptor; +} +template<> +const FileDescriptor* GetFileDescriptor(const EnumValueDescriptor* descriptor) { + return descriptor->type()->file(); +} +template<> +const FileDescriptor* GetFileDescriptor(const OneofDescriptor* descriptor) { + return descriptor->containing_type()->file(); +} + // Converts options into a Python protobuf, and cache the result. // // This is a bit tricky because options can contain extension fields defined in @@ -156,8 +182,13 @@ static int CheckCalledFromGeneratedFile(const char* attr_name) { // Always returns a new reference. template static PyObject* GetOrBuildOptions(const DescriptorClass *descriptor) { + // Options (and their extensions) are completely resolved in the proto file + // containing the descriptor. + PyDescriptorPool* pool = GetDescriptorPool_FromPool( + GetFileDescriptor(descriptor)->pool()); + hash_map* descriptor_options = - GetDescriptorPool()->descriptor_options; + pool->descriptor_options; // First search in the cache. if (descriptor_options->find(descriptor) != descriptor_options->end()) { PyObject *value = (*descriptor_options)[descriptor]; @@ -170,7 +201,7 @@ static PyObject* GetOrBuildOptions(const DescriptorClass *descriptor) { const Message& options(descriptor->options()); const Descriptor *message_type = options.GetDescriptor(); PyObject* message_class(cdescriptor_pool::GetMessageClass( - GetDescriptorPool(), message_type)); + pool, message_type)); if (message_class == NULL) { PyErr_Format(PyExc_TypeError, "Could not retrieve class for Options: %s", message_type->full_name().c_str()); @@ -192,7 +223,7 @@ static PyObject* GetOrBuildOptions(const DescriptorClass *descriptor) { options.SerializeToString(&serialized); io::CodedInputStream input( reinterpret_cast(serialized.c_str()), serialized.size()); - input.SetExtensionRegistry(GetDescriptorPool()->pool, + input.SetExtensionRegistry(pool->pool, GetDescriptorPool()->message_factory); bool success = cmsg->message->MergePartialFromCodedStream(&input); if (!success) { @@ -203,7 +234,7 @@ static PyObject* GetOrBuildOptions(const DescriptorClass *descriptor) { // Cache the result. Py_INCREF(value); - (*GetDescriptorPool()->descriptor_options)[descriptor] = value.get(); + (*pool->descriptor_options)[descriptor] = value.get(); return value.release(); } @@ -237,6 +268,9 @@ typedef struct PyBaseDescriptor { // Pointer to the C++ proto2 descriptor. // Like all descriptors, it is owned by the global DescriptorPool. const void* descriptor; + + // Owned reference to the DescriptorPool, to ensure it is kept alive. + PyDescriptorPool* pool; } PyBaseDescriptor; @@ -258,7 +292,9 @@ namespace descriptor { // 'was_created' is an optional pointer to a bool, and is set to true if a new // object was allocated. // Always return a new reference. -PyObject* NewInternedDescriptor(PyTypeObject* type, const void* descriptor, +template +PyObject* NewInternedDescriptor(PyTypeObject* type, + const DescriptorClass* descriptor, bool* was_created) { if (was_created) { *was_created = false; @@ -270,8 +306,8 @@ PyObject* NewInternedDescriptor(PyTypeObject* type, const void* descriptor, // See if the object is in the map of interned descriptors hash_map::iterator it = - GetDescriptorPool()->interned_descriptors->find(descriptor); - if (it != GetDescriptorPool()->interned_descriptors->end()) { + interned_descriptors.find(descriptor); + if (it != interned_descriptors.end()) { GOOGLE_DCHECK(Py_TYPE(it->second) == type); Py_INCREF(it->second); return it->second; @@ -283,10 +319,21 @@ PyObject* NewInternedDescriptor(PyTypeObject* type, const void* descriptor, return NULL; } py_descriptor->descriptor = descriptor; + // and cache it. - GetDescriptorPool()->interned_descriptors->insert( + interned_descriptors.insert( std::make_pair(descriptor, reinterpret_cast(py_descriptor))); + // Ensures that the DescriptorPool stays alive. + PyDescriptorPool* pool = GetDescriptorPool_FromPool( + GetFileDescriptor(descriptor)->pool()); + if (pool == NULL) { + Py_DECREF(py_descriptor); + return NULL; + } + Py_INCREF(pool); + py_descriptor->pool = pool; + if (was_created) { *was_created = true; } @@ -295,7 +342,8 @@ PyObject* NewInternedDescriptor(PyTypeObject* type, const void* descriptor, static void Dealloc(PyBaseDescriptor* self) { // Remove from interned dictionary - GetDescriptorPool()->interned_descriptors->erase(self->descriptor); + interned_descriptors.erase(self->descriptor); + Py_CLEAR(self->pool); Py_TYPE(self)->tp_free(reinterpret_cast(self)); } diff --git a/python/google/protobuf/pyext/descriptor_pool.cc b/python/google/protobuf/pyext/descriptor_pool.cc index d5ba2b6ffb9fd..7aed651d3f758 100644 --- a/python/google/protobuf/pyext/descriptor_pool.cc +++ b/python/google/protobuf/pyext/descriptor_pool.cc @@ -54,9 +54,13 @@ namespace google { namespace protobuf { namespace python { +// A map to cache Python Pools per C++ pointer. +// Pointers are not owned here, and belong to the PyDescriptorPool. +static hash_map descriptor_pool_map; + namespace cdescriptor_pool { -PyDescriptorPool* NewDescriptorPool() { +static PyDescriptorPool* NewDescriptorPool() { PyDescriptorPool* cdescriptor_pool = PyObject_New( PyDescriptorPool, &PyDescriptorPool_Type); if (cdescriptor_pool == NULL) { @@ -77,22 +81,27 @@ PyDescriptorPool* NewDescriptorPool() { // storage. cdescriptor_pool->classes_by_descriptor = new PyDescriptorPool::ClassesByMessageMap(); - cdescriptor_pool->interned_descriptors = - new hash_map(); cdescriptor_pool->descriptor_options = new hash_map(); + if (!descriptor_pool_map.insert( + std::make_pair(cdescriptor_pool->pool, cdescriptor_pool)).second) { + // Should never happen -- would indicate an internal error / bug. + PyErr_SetString(PyExc_ValueError, "DescriptorPool already registered"); + return NULL; + } + return cdescriptor_pool; } static void Dealloc(PyDescriptorPool* self) { typedef PyDescriptorPool::ClassesByMessageMap::iterator iterator; + descriptor_pool_map.erase(self->pool); for (iterator it = self->classes_by_descriptor->begin(); it != self->classes_by_descriptor->end(); ++it) { Py_DECREF(it->second); } delete self->classes_by_descriptor; - delete self->interned_descriptors; // its references were borrowed. for (hash_map::iterator it = self->descriptor_options->begin(); it != self->descriptor_options->end(); ++it) { @@ -391,22 +400,43 @@ PyTypeObject PyDescriptorPool_Type = { PyObject_Del, // tp_free }; -static PyDescriptorPool* global_cdescriptor_pool = NULL; +// This is the DescriptorPool which contains all the definitions from the +// generated _pb2.py modules. +static PyDescriptorPool* python_generated_pool = NULL; bool InitDescriptorPool() { if (PyType_Ready(&PyDescriptorPool_Type) < 0) return false; - global_cdescriptor_pool = cdescriptor_pool::NewDescriptorPool(); - if (global_cdescriptor_pool == NULL) { + python_generated_pool = cdescriptor_pool::NewDescriptorPool(); + if (python_generated_pool == NULL) { return false; } + // Register this pool to be found for C++-generated descriptors. + descriptor_pool_map.insert( + std::make_pair(DescriptorPool::generated_pool(), + python_generated_pool)); return true; } PyDescriptorPool* GetDescriptorPool() { - return global_cdescriptor_pool; + return python_generated_pool; +} + +PyDescriptorPool* GetDescriptorPool_FromPool(const DescriptorPool* pool) { + // Fast path for standard descriptors. + if (pool == python_generated_pool->pool || + pool == DescriptorPool::generated_pool()) { + return python_generated_pool; + } + hash_map::iterator it = + descriptor_pool_map.find(pool); + if (it != descriptor_pool_map.end()) { + PyErr_SetString(PyExc_KeyError, "Unknown descriptor pool"); + return NULL; + } + return it->second; } } // namespace python diff --git a/python/google/protobuf/pyext/descriptor_pool.h b/python/google/protobuf/pyext/descriptor_pool.h index 6f6c5cdb5252d..541d920bc820b 100644 --- a/python/google/protobuf/pyext/descriptor_pool.h +++ b/python/google/protobuf/pyext/descriptor_pool.h @@ -72,14 +72,6 @@ typedef struct PyDescriptorPool { typedef hash_map ClassesByMessageMap; ClassesByMessageMap* classes_by_descriptor; - // Store interned descriptors, so that the same C++ descriptor yields the same - // Python object. Objects are not immortal: this map does not own the - // references, and items are deleted when the last reference to the object is - // released. - // This is enough to support the "is" operator on live objects. - // All descriptors are stored here. - hash_map* interned_descriptors; - // Cache the options for any kind of descriptor. // Descriptor pointers are owned by the DescriptorPool above. // Python objects are owned by the map. @@ -91,9 +83,6 @@ extern PyTypeObject PyDescriptorPool_Type; namespace cdescriptor_pool { -// Builds a new DescriptorPool. Normally called only once per process. -PyDescriptorPool* NewDescriptorPool(); - // Looks up a message by name. // Returns a message Descriptor, or NULL if not found. const Descriptor* FindMessageTypeByName(PyDescriptorPool* self, @@ -150,6 +139,10 @@ PyObject* FindOneofByName(PyDescriptorPool* self, PyObject* arg); // Returns a *borrowed* reference. PyDescriptorPool* GetDescriptorPool(); +// Retrieve the python descriptor pool owning a C++ descriptor pool. +// Returns a *borrowed* reference. +PyDescriptorPool* GetDescriptorPool_FromPool(const DescriptorPool* pool); + // Initialize objects used by this module. bool InitDescriptorPool(); diff --git a/src/Makefile.am b/src/Makefile.am index 4c8919d705ce2..caae2933e494a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -191,6 +191,7 @@ libprotobuf_lite_la_SOURCES = \ google/protobuf/stubs/stringpiece.h \ google/protobuf/stubs/stringprintf.cc \ google/protobuf/stubs/stringprintf.h \ + google/protobuf/stubs/structurally_valid.cc \ google/protobuf/stubs/strutil.cc \ google/protobuf/stubs/strutil.h \ google/protobuf/stubs/time.cc \ @@ -232,7 +233,6 @@ libprotobuf_la_SOURCES = \ google/protobuf/service.cc \ google/protobuf/source_context.pb.cc \ google/protobuf/struct.pb.cc \ - google/protobuf/stubs/structurally_valid.cc \ google/protobuf/stubs/substitute.cc \ google/protobuf/stubs/substitute.h \ google/protobuf/text_format.cc \ @@ -659,7 +659,8 @@ COMMON_TEST_SOURCES = \ google/protobuf/testing/file.h check_PROGRAMS = protoc protobuf-test protobuf-lazy-descriptor-test \ - protobuf-lite-test test_plugin $(GZCHECKPROGRAMS) + protobuf-lite-test test_plugin protobuf-lite-arena-test \ + $(GZCHECKPROGRAMS) protobuf_test_LDADD = $(PTHREAD_LIBS) libprotobuf.la libprotoc.la \ ../gmock/gtest/lib/libgtest.la \ ../gmock/lib/libgmock.la \ @@ -756,21 +757,40 @@ protobuf_lazy_descriptor_test_SOURCES = \ $(COMMON_TEST_SOURCES) nodist_protobuf_lazy_descriptor_test_SOURCES = $(protoc_outputs) -# Build lite_unittest separately, since it doesn't use gtest. -protobuf_lite_test_LDADD = $(PTHREAD_LIBS) libprotobuf-lite.la -protobuf_lite_test_CXXFLAGS = $(NO_OPT_CXXFLAGS) -protobuf_lite_test_SOURCES = \ +COMMON_LITE_TEST_SOURCES = \ google/protobuf/arena_test_util.cc \ google/protobuf/arena_test_util.h \ - google/protobuf/lite_unittest.cc \ google/protobuf/map_lite_test_util.cc \ google/protobuf/map_lite_test_util.h \ google/protobuf/test_util_lite.cc \ google/protobuf/test_util_lite.h - # TODO(teboring) add the file back and make the test build. - # google/protobuf/map_lite_test.cc + +# Build lite_unittest separately, since it doesn't use gtest. It can't +# depend on gtest because our internal version of gtest depend on proto +# full runtime and we want to make sure this test builds without full +# runtime. +protobuf_lite_test_LDADD = $(PTHREAD_LIBS) libprotobuf-lite.la +protobuf_lite_test_CXXFLAGS = $(NO_OPT_CXXFLAGS) +protobuf_lite_test_SOURCES = \ + google/protobuf/lite_unittest.cc \ + $(COMMON_LITE_TEST_SOURCES) nodist_protobuf_lite_test_SOURCES = $(protoc_lite_outputs) +# lite_arena_unittest depends on gtest because teboring@ found that without +# gtest when building the test internally our memory sanitizer doesn't detect +# memory leaks (don't know why). +protobuf_lite_arena_test_LDADD = $(PTHREAD_LIBS) libprotobuf-lite.la \ + ../gmock/gtest/lib/libgtest.la \ + ../gmock/lib/libgmock.la \ + ../gmock/lib/libgmock_main.la +protobuf_lite_arena_test_CPPFLAGS = -I$(srcdir)/../gmock/include \ + -I$(srcdir)/../gmock/gtest/include +protobuf_lite_arena_test_CXXFLAGS = $(NO_OPT_CXXFLAGS) +protobuf_lite_arena_test_SOURCES = \ + google/protobuf/lite_arena_unittest.cc \ + $(COMMON_LITE_TEST_SOURCES) +nodist_protobuf_lite_arena_test_SOURCES = $(protoc_lite_outputs) + # Test plugin binary. test_plugin_LDADD = $(PTHREAD_LIBS) libprotobuf.la libprotoc.la \ ../gmock/gtest/lib/libgtest.la @@ -790,4 +810,5 @@ zcgunzip_SOURCES = google/protobuf/testing/zcgunzip.cc endif TESTS = protobuf-test protobuf-lazy-descriptor-test protobuf-lite-test \ - google/protobuf/compiler/zip_output_unittest.sh $(GZTESTS) + google/protobuf/compiler/zip_output_unittest.sh $(GZTESTS) \ + protobuf-lite-arena-test diff --git a/src/google/protobuf/compiler/cpp/cpp_helpers.cc b/src/google/protobuf/compiler/cpp/cpp_helpers.cc index 678a995aeb74a..09845458b9ae0 100644 --- a/src/google/protobuf/compiler/cpp/cpp_helpers.cc +++ b/src/google/protobuf/compiler/cpp/cpp_helpers.cc @@ -600,6 +600,94 @@ bool IsAnyMessage(const Descriptor* descriptor) { descriptor->file()->name() == kAnyProtoFile; } +enum Utf8CheckMode { + STRICT = 0, // Parsing will fail if non UTF-8 data is in string fields. + VERIFY = 1, // Only log an error but parsing will succeed. + NONE = 2, // No UTF-8 check. +}; + +// Which level of UTF-8 enforcemant is placed on this file. +static Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field) { + if (field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3) { + return STRICT; + } else if (field->file()->options().optimize_for() != + FileOptions::LITE_RUNTIME) { + return VERIFY; + } else { + return NONE; + } +} + +static void GenerateUtf8CheckCode(const FieldDescriptor* field, + bool for_parse, + const map& variables, + const char* parameters, + const char* strict_function, + const char* verify_function, + io::Printer* printer) { + switch (GetUtf8CheckMode(field)) { + case STRICT: { + if (for_parse) { + printer->Print("DO_("); + } + printer->Print( + "::google::protobuf::internal::WireFormatLite::$function$(\n", + "function", strict_function); + printer->Indent(); + printer->Print(variables, parameters); + if (for_parse) { + printer->Print("::google::protobuf::internal::WireFormatLite::PARSE,\n"); + } else { + printer->Print("::google::protobuf::internal::WireFormatLite::SERIALIZE,\n"); + } + printer->Print("\"$full_name$\")", "full_name", field->full_name()); + if (for_parse) { + printer->Print(")"); + } + printer->Print(";\n"); + printer->Outdent(); + break; + } + case VERIFY: { + printer->Print( + "::google::protobuf::internal::WireFormat::$function$(\n", + "function", verify_function); + printer->Indent(); + printer->Print(variables, parameters); + if (for_parse) { + printer->Print("::google::protobuf::internal::WireFormat::PARSE,\n"); + } else { + printer->Print("::google::protobuf::internal::WireFormat::SERIALIZE,\n"); + } + printer->Print("\"$full_name$\");\n", "full_name", field->full_name()); + printer->Outdent(); + break; + } + case NONE: + break; + } +} + +void GenerateUtf8CheckCodeForString(const FieldDescriptor* field, + bool for_parse, + const map& variables, + const char* parameters, + io::Printer* printer) { + GenerateUtf8CheckCode(field, for_parse, variables, parameters, + "VerifyUtf8String", "VerifyUTF8StringNamedField", + printer); +} + +void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field, + bool for_parse, + const map& variables, + const char* parameters, + io::Printer* printer) { + GenerateUtf8CheckCode(field, for_parse, variables, parameters, + "VerifyUtf8Cord", "VerifyUTF8CordNamedField", + printer); +} + } // namespace cpp } // namespace compiler } // namespace protobuf diff --git a/src/google/protobuf/compiler/cpp/cpp_helpers.h b/src/google/protobuf/compiler/cpp/cpp_helpers.h index 29c1f90bf71e9..985cb04cae330 100644 --- a/src/google/protobuf/compiler/cpp/cpp_helpers.h +++ b/src/google/protobuf/compiler/cpp/cpp_helpers.h @@ -202,11 +202,6 @@ inline bool HasGenericServices(const FileDescriptor* file) { file->options().cc_generic_services(); } -// Should string fields in this file verify that their contents are UTF-8? -inline bool HasUtf8Verification(const FileDescriptor* file) { - return file->options().optimize_for() != FileOptions::LITE_RUNTIME; -} - // Should we generate a separate, super-optimized code path for serializing to // flat arrays? We don't do this in Lite mode because we'd rather reduce code // size. @@ -270,6 +265,20 @@ inline bool SupportsArenas(const FieldDescriptor* field) { bool IsAnyMessage(const FileDescriptor* descriptor); bool IsAnyMessage(const Descriptor* descriptor); +void GenerateUtf8CheckCodeForString( + const FieldDescriptor* field, + bool for_parse, + const map& variables, + const char* parameters, + io::Printer* printer); + +void GenerateUtf8CheckCodeForCord( + const FieldDescriptor* field, + bool for_parse, + const map& variables, + const char* parameters, + io::Printer* printer); + } // namespace cpp } // namespace compiler } // namespace protobuf diff --git a/src/google/protobuf/compiler/cpp/cpp_map_field.cc b/src/google/protobuf/compiler/cpp/cpp_map_field.cc index a14d8986395d5..25acc61bedccf 100644 --- a/src/google/protobuf/compiler/cpp/cpp_map_field.cc +++ b/src/google/protobuf/compiler/cpp/cpp_map_field.cc @@ -234,6 +234,20 @@ GenerateMergeFromCodedStream(io::Printer* printer) const { "}\n"); } + const FieldDescriptor* key_field = + descriptor_->message_type()->FindFieldByName("key"); + if (key_field->type() == FieldDescriptor::TYPE_STRING) { + GenerateUtf8CheckCodeForString( + key_field, true, variables_, + "entry->key().data(), entry->key().length(),\n", printer); + } + if (value_field->type() == FieldDescriptor::TYPE_STRING) { + GenerateUtf8CheckCodeForString( + value_field, true, variables_, + "entry->mutable_value()->data(),\n" + "entry->mutable_value()->length(),\n", printer); + } + // If entry is allocated by arena, its desctructor should be avoided. if (SupportsArenas(descriptor_)) { printer->Print(variables_, @@ -261,7 +275,30 @@ GenerateSerializeWithCachedSizes(io::Printer* printer) const { printer->Print(variables_, " entry.reset($name$_.New$wrapper$(it->first, it->second));\n" " ::google::protobuf::internal::WireFormatLite::Write$stream_writer$(\n" - " $number$, *entry, output);\n" + " $number$, *entry, output);\n"); + + printer->Indent(); + printer->Indent(); + + const FieldDescriptor* key_field = + descriptor_->message_type()->FindFieldByName("key"); + const FieldDescriptor* value_field = + descriptor_->message_type()->FindFieldByName("value"); + if (key_field->type() == FieldDescriptor::TYPE_STRING) { + GenerateUtf8CheckCodeForString( + key_field, false, variables_, + "it->first.data(), it->first.length(),\n", printer); + } + if (value_field->type() == FieldDescriptor::TYPE_STRING) { + GenerateUtf8CheckCodeForString( + value_field, false, variables_, + "it->second.data(), it->second.length(),\n", printer); + } + + printer->Outdent(); + printer->Outdent(); + + printer->Print( " }\n"); // If entry is allocated by arena, its desctructor should be avoided. @@ -296,7 +333,29 @@ GenerateSerializeWithCachedSizesToArray(io::Printer* printer) const { " entry.reset($name$_.New$wrapper$(it->first, it->second));\n" " target = ::google::protobuf::internal::WireFormatLite::\n" " Write$declared_type$NoVirtualToArray(\n" - " $number$, *entry, target);\n" + " $number$, *entry, target);\n"); + + printer->Indent(); + printer->Indent(); + + const FieldDescriptor* key_field = + descriptor_->message_type()->FindFieldByName("key"); + const FieldDescriptor* value_field = + descriptor_->message_type()->FindFieldByName("value"); + if (key_field->type() == FieldDescriptor::TYPE_STRING) { + GenerateUtf8CheckCodeForString( + key_field, false, variables_, + "it->first.data(), it->first.length(),\n", printer); + } + if (value_field->type() == FieldDescriptor::TYPE_STRING) { + GenerateUtf8CheckCodeForString( + value_field, false, variables_, + "it->second.data(), it->second.length(),\n", printer); + } + + printer->Outdent(); + printer->Outdent(); + printer->Print( " }\n"); // If entry is allocated by arena, its desctructor should be avoided. diff --git a/src/google/protobuf/compiler/cpp/cpp_string_field.cc b/src/google/protobuf/compiler/cpp/cpp_string_field.cc index d1af6ddaf7f13..6b0821a6c1772 100644 --- a/src/google/protobuf/compiler/cpp/cpp_string_field.cc +++ b/src/google/protobuf/compiler/cpp/cpp_string_field.cc @@ -367,25 +367,19 @@ GenerateMergeFromCodedStream(io::Printer* printer) const { "DO_(::google::protobuf::internal::WireFormatLite::Read$declared_type$(\n" " input, this->mutable_$name$()));\n"); - if (HasUtf8Verification(descriptor_->file()) && - descriptor_->type() == FieldDescriptor::TYPE_STRING) { - printer->Print(variables_, - "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n" - " this->$name$().data(), this->$name$().length(),\n" - " ::google::protobuf::internal::WireFormat::PARSE,\n" - " \"$full_name$\");\n"); + if (descriptor_->type() == FieldDescriptor::TYPE_STRING) { + GenerateUtf8CheckCodeForString( + descriptor_, true, variables_, + "this->$name$().data(), this->$name$().length(),\n", printer); } } void StringFieldGenerator:: GenerateSerializeWithCachedSizes(io::Printer* printer) const { - if (HasUtf8Verification(descriptor_->file()) && - descriptor_->type() == FieldDescriptor::TYPE_STRING) { - printer->Print(variables_, - "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n" - " this->$name$().data(), this->$name$().length(),\n" - " ::google::protobuf::internal::WireFormat::SERIALIZE,\n" - " \"$full_name$\");\n"); + if (descriptor_->type() == FieldDescriptor::TYPE_STRING) { + GenerateUtf8CheckCodeForString( + descriptor_, false, variables_, + "this->$name$().data(), this->$name$().length(),\n", printer); } printer->Print(variables_, "::google::protobuf::internal::WireFormatLite::Write$declared_type$MaybeAliased(\n" @@ -394,13 +388,10 @@ GenerateSerializeWithCachedSizes(io::Printer* printer) const { void StringFieldGenerator:: GenerateSerializeWithCachedSizesToArray(io::Printer* printer) const { - if (HasUtf8Verification(descriptor_->file()) && - descriptor_->type() == FieldDescriptor::TYPE_STRING) { - printer->Print(variables_, - "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n" - " this->$name$().data(), this->$name$().length(),\n" - " ::google::protobuf::internal::WireFormat::SERIALIZE,\n" - " \"$full_name$\");\n"); + if (descriptor_->type() == FieldDescriptor::TYPE_STRING) { + GenerateUtf8CheckCodeForString( + descriptor_, false, variables_, + "this->$name$().data(), this->$name$().length(),\n", printer); } printer->Print(variables_, "target =\n" @@ -665,13 +656,10 @@ GenerateMergeFromCodedStream(io::Printer* printer) const { "DO_(::google::protobuf::internal::WireFormatLite::Read$declared_type$(\n" " input, this->mutable_$name$()));\n"); - if (HasUtf8Verification(descriptor_->file()) && - descriptor_->type() == FieldDescriptor::TYPE_STRING) { - printer->Print(variables_, - "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n" - " this->$name$().data(), this->$name$().length(),\n" - " ::google::protobuf::internal::WireFormat::PARSE,\n" - " \"$full_name$\");\n"); + if (descriptor_->type() == FieldDescriptor::TYPE_STRING) { + GenerateUtf8CheckCodeForString( + descriptor_, true, variables_, + "this->$name$().data(), this->$name$().length(),\n", printer); } } @@ -817,14 +805,12 @@ GenerateMergeFromCodedStream(io::Printer* printer) const { printer->Print(variables_, "DO_(::google::protobuf::internal::WireFormatLite::Read$declared_type$(\n" " input, this->add_$name$()));\n"); - if (HasUtf8Verification(descriptor_->file()) && - descriptor_->type() == FieldDescriptor::TYPE_STRING) { - printer->Print(variables_, - "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n" - " this->$name$(this->$name$_size() - 1).data(),\n" - " this->$name$(this->$name$_size() - 1).length(),\n" - " ::google::protobuf::internal::WireFormat::PARSE,\n" - " \"$full_name$\");\n"); + if (descriptor_->type() == FieldDescriptor::TYPE_STRING) { + GenerateUtf8CheckCodeForString( + descriptor_, true, variables_, + "this->$name$(this->$name$_size() - 1).data(),\n" + "this->$name$(this->$name$_size() - 1).length(),\n", + printer); } } @@ -832,14 +818,13 @@ void RepeatedStringFieldGenerator:: GenerateSerializeWithCachedSizes(io::Printer* printer) const { printer->Print(variables_, "for (int i = 0; i < this->$name$_size(); i++) {\n"); - if (HasUtf8Verification(descriptor_->file()) && - descriptor_->type() == FieldDescriptor::TYPE_STRING) { - printer->Print(variables_, - "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n" - " this->$name$(i).data(), this->$name$(i).length(),\n" - " ::google::protobuf::internal::WireFormat::SERIALIZE,\n" - " \"$full_name$\");\n"); + printer->Indent(); + if (descriptor_->type() == FieldDescriptor::TYPE_STRING) { + GenerateUtf8CheckCodeForString( + descriptor_, false, variables_, + "this->$name$(i).data(), this->$name$(i).length(),\n", printer); } + printer->Outdent(); printer->Print(variables_, " ::google::protobuf::internal::WireFormatLite::Write$declared_type$(\n" " $number$, this->$name$(i), output);\n" @@ -850,14 +835,13 @@ void RepeatedStringFieldGenerator:: GenerateSerializeWithCachedSizesToArray(io::Printer* printer) const { printer->Print(variables_, "for (int i = 0; i < this->$name$_size(); i++) {\n"); - if (HasUtf8Verification(descriptor_->file()) && - descriptor_->type() == FieldDescriptor::TYPE_STRING) { - printer->Print(variables_, - " ::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n" - " this->$name$(i).data(), this->$name$(i).length(),\n" - " ::google::protobuf::internal::WireFormat::SERIALIZE,\n" - " \"$full_name$\");\n"); + printer->Indent(); + if (descriptor_->type() == FieldDescriptor::TYPE_STRING) { + GenerateUtf8CheckCodeForString( + descriptor_, false, variables_, + "this->$name$(i).data(), this->$name$(i).length(),\n", printer); } + printer->Outdent(); printer->Print(variables_, " target = ::google::protobuf::internal::WireFormatLite::\n" " Write$declared_type$ToArray($number$, this->$name$(i), target);\n" diff --git a/src/google/protobuf/compiler/parser.cc b/src/google/protobuf/compiler/parser.cc index 895ff34a10a6b..4d0184252bd72 100644 --- a/src/google/protobuf/compiler/parser.cc +++ b/src/google/protobuf/compiler/parser.cc @@ -939,6 +939,42 @@ void Parser::GenerateMapEntry(const MapField& map_field, } else { value_field->set_type_name(map_field.value_type_name); } + // Propagate the "enforce_utf8" option to key and value fields if they + // are strings. This helps simplify the implementation of code generators + // and also reflection-based parsing code. + // + // The following definition: + // message Foo { + // map value = 1 [enforce_utf8 = false]; + // } + // will be interpreted as: + // message Foo { + // message ValueEntry { + // option map_entry = true; + // string key = 1 [enforce_utf8 = false]; + // string value = 2 [enforce_utf8 = false]; + // } + // repeated ValueEntry value = 1 [enforce_utf8 = false]; + // } + // + // TODO(xiaofeng): Remove this when the "enforce_utf8" option is removed + // from protocol compiler. + for (int i = 0; i < field->options().uninterpreted_option_size(); ++i) { + const UninterpretedOption& option = + field->options().uninterpreted_option(i); + if (option.name_size() == 1 && + option.name(0).name_part() == "enforce_utf8" && + !option.name(0).is_extension()) { + if (key_field->type() == FieldDescriptorProto::TYPE_STRING) { + key_field->mutable_options()->add_uninterpreted_option() + ->CopyFrom(option); + } + if (value_field->type() == FieldDescriptorProto::TYPE_STRING) { + value_field->mutable_options()->add_uninterpreted_option() + ->CopyFrom(option); + } + } + } } bool Parser::ParseFieldOptions(FieldDescriptorProto* field, diff --git a/src/google/protobuf/lite_arena_unittest.cc b/src/google/protobuf/lite_arena_unittest.cc new file mode 100644 index 0000000000000..f0bee880aa669 --- /dev/null +++ b/src/google/protobuf/lite_arena_unittest.cc @@ -0,0 +1,83 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include +#include +#include +#include + +namespace google { +namespace protobuf { +namespace { + +TEST(LiteArenaTest, MapNoHeapAllocation) { + // Allocate a large initial block to avoid mallocs during hooked test. + std::vector arena_block(128 * 1024); + google::protobuf::ArenaOptions options; + options.initial_block = &arena_block[0]; + options.initial_block_size = arena_block.size(); + google::protobuf::Arena arena(options); + string data; + data.reserve(128 * 1024); + + { + // TODO(teboring): Enable no heap check when ArenaStringPtr is used in + // Map. + // google::protobuf::internal::NoHeapChecker no_heap; + + protobuf_unittest::TestArenaMapLite* from = + google::protobuf::Arena::CreateMessage(&arena); + google::protobuf::MapLiteTestUtil::SetArenaMapFields(from); + from->SerializeToString(&data); + + protobuf_unittest::TestArenaMapLite* to = + google::protobuf::Arena::CreateMessage(&arena); + to->ParseFromString(data); + google::protobuf::MapLiteTestUtil::ExpectArenaMapFieldsSet(*to); + } +} + +TEST(LiteArenaTest, UnknownFieldMemLeak) { + google::protobuf::Arena arena; + protobuf_unittest::ForeignMessageArenaLite* message = + google::protobuf::Arena::CreateMessage( + &arena); + string data = "\012\000"; + int original_capacity = data.capacity(); + while (data.capacity() <= original_capacity) { + data.append("a"); + } + data[1] = data.size() - 2; + message->ParseFromString(data); +} + +} // namespace +} // namespace protobuf +} // namespace google diff --git a/src/google/protobuf/wire_format.cc b/src/google/protobuf/wire_format.cc index 8fbd116b33e36..5ee4e25d4935b 100644 --- a/src/google/protobuf/wire_format.cc +++ b/src/google/protobuf/wire_format.cc @@ -461,6 +461,10 @@ bool WireFormat::ParseAndMergeMessageSetField(uint32 field_number, } } +static bool StrictUtf8Check(const FieldDescriptor* field) { + return field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3; +} + bool WireFormat::ParseAndMergeField( uint32 tag, const FieldDescriptor* field, // May be NULL for unknown @@ -633,10 +637,19 @@ bool WireFormat::ParseAndMergeField( // Handle strings separately so that we can optimize the ctype=CORD case. case FieldDescriptor::TYPE_STRING: { + bool strict_utf8_check = StrictUtf8Check(field); string value; if (!WireFormatLite::ReadString(input, &value)) return false; - VerifyUTF8StringNamedField(value.data(), value.length(), PARSE, - field->name().c_str()); + if (strict_utf8_check) { + if (!WireFormatLite::VerifyUtf8String( + value.data(), value.length(), WireFormatLite::PARSE, + field->full_name().c_str())) { + return false; + } + } else { + VerifyUTF8StringNamedField(value.data(), value.length(), PARSE, + field->full_name().c_str()); + } if (field->is_repeated()) { message_reflection->AddString(message, field, value); } else { @@ -894,13 +907,20 @@ void WireFormat::SerializeFieldWithCachedSizes( // Handle strings separately so that we can get string references // instead of copying. case FieldDescriptor::TYPE_STRING: { + bool strict_utf8_check = StrictUtf8Check(field); string scratch; const string& value = field->is_repeated() ? message_reflection->GetRepeatedStringReference( message, field, j, &scratch) : message_reflection->GetStringReference(message, field, &scratch); - VerifyUTF8StringNamedField(value.data(), value.length(), SERIALIZE, - field->name().c_str()); + if (strict_utf8_check) { + WireFormatLite::VerifyUtf8String(value.data(), value.length(), + WireFormatLite::SERIALIZE, + field->full_name().c_str()); + } else { + VerifyUTF8StringNamedField(value.data(), value.length(), SERIALIZE, + field->full_name().c_str()); + } WireFormatLite::WriteString(field->number(), value, output); break; } @@ -1108,34 +1128,6 @@ int WireFormat::MessageSetItemByteSize( return our_size; } -void WireFormat::VerifyUTF8StringFallback(const char* data, - int size, - Operation op, - const char* field_name) { - if (!IsStructurallyValidUTF8(data, size)) { - const char* operation_str = NULL; - switch (op) { - case PARSE: - operation_str = "parsing"; - break; - case SERIALIZE: - operation_str = "serializing"; - break; - // no default case: have the compiler warn if a case is not covered. - } - string quoted_field_name = ""; - if (field_name != NULL) { - quoted_field_name = StringPrintf(" '%s'", field_name); - } - // no space below to avoid double space when the field name is missing. - GOOGLE_LOG(ERROR) << "String field" << quoted_field_name << " contains invalid " - << "UTF-8 data when " << operation_str << " a protocol " - << "buffer. Use the 'bytes' type if you intend to send raw " - << "bytes. "; - } -} - - } // namespace internal } // namespace protobuf } // namespace google diff --git a/src/google/protobuf/wire_format.h b/src/google/protobuf/wire_format.h index 84270fee7e907..941be75ba1e2a 100644 --- a/src/google/protobuf/wire_format.h +++ b/src/google/protobuf/wire_format.h @@ -231,8 +231,8 @@ class LIBPROTOBUF_EXPORT WireFormat { const Message& message); enum Operation { - PARSE, - SERIALIZE, + PARSE = 0, + SERIALIZE = 1, }; // Verifies that a string field is valid UTF8, logging an error if not. @@ -247,13 +247,6 @@ class LIBPROTOBUF_EXPORT WireFormat { const char* field_name); private: - // Verifies that a string field is valid UTF8, logging an error if not. - static void VerifyUTF8StringFallback( - const char* data, - int size, - Operation op, - const char* field_name); - // Skip a MessageSet field. static bool SkipMessageSetField(io::CodedInputStream* input, uint32 field_number, @@ -265,8 +258,6 @@ class LIBPROTOBUF_EXPORT WireFormat { Message* message, io::CodedInputStream* input); - - GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(WireFormat); }; @@ -321,7 +312,8 @@ inline int WireFormat::TagSize(int field_number, FieldDescriptor::Type type) { inline void WireFormat::VerifyUTF8String(const char* data, int size, WireFormat::Operation op) { #ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED - WireFormat::VerifyUTF8StringFallback(data, size, op, NULL); + WireFormatLite::VerifyUtf8String( + data, size, static_cast(op), NULL); #else // Avoid the compiler warning about unsued variables. (void)data; (void)size; (void)op; @@ -332,7 +324,8 @@ inline void WireFormat::VerifyUTF8StringNamedField( const char* data, int size, WireFormat::Operation op, const char* field_name) { #ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED - WireFormat::VerifyUTF8StringFallback(data, size, op, field_name); + WireFormatLite::VerifyUtf8String( + data, size, static_cast(op), field_name); #endif } diff --git a/src/google/protobuf/wire_format_lite.cc b/src/google/protobuf/wire_format_lite.cc index dade41afd547b..847e35007a0d2 100644 --- a/src/google/protobuf/wire_format_lite.cc +++ b/src/google/protobuf/wire_format_lite.cc @@ -39,10 +39,12 @@ #include #include #include +#include #include #include #include + namespace google { namespace protobuf { namespace internal { @@ -505,6 +507,35 @@ bool WireFormatLite::ReadBytes(io::CodedInputStream* input, string** p) { return ReadBytesToString(input, *p); } +bool WireFormatLite::VerifyUtf8String(const char* data, + int size, + Operation op, + const char* field_name) { + if (!IsStructurallyValidUTF8(data, size)) { + const char* operation_str = NULL; + switch (op) { + case PARSE: + operation_str = "parsing"; + break; + case SERIALIZE: + operation_str = "serializing"; + break; + // no default case: have the compiler warn if a case is not covered. + } + string quoted_field_name = ""; + if (field_name != NULL) { + quoted_field_name = StringPrintf(" '%s'", field_name); + } + // no space below to avoid double space when the field name is missing. + GOOGLE_LOG(ERROR) << "String field" << quoted_field_name << " contains invalid " + << "UTF-8 data when " << operation_str << " a protocol " + << "buffer. Use the 'bytes' type if you intend to send raw " + << "bytes. "; + return false; + } + return true; +} + } // namespace internal } // namespace protobuf } // namespace google diff --git a/src/google/protobuf/wire_format_lite.h b/src/google/protobuf/wire_format_lite.h index 19fbc2c591451..55fc7ecdfc580 100644 --- a/src/google/protobuf/wire_format_lite.h +++ b/src/google/protobuf/wire_format_lite.h @@ -316,6 +316,16 @@ class LIBPROTOBUF_EXPORT WireFormatLite { static bool ReadBytes(input, string** p); + enum Operation { + PARSE = 0, + SERIALIZE = 1, + }; + + // Returns true if the data is valid UTF-8. + static bool VerifyUtf8String(const char* data, int size, + Operation op, + const char* field_name); + static inline bool ReadGroup (field_number, input, MessageLite* value); static inline bool ReadMessage(input, MessageLite* value); diff --git a/update_file_lists.sh b/update_file_lists.sh index 206be4399c534..bc2911d2ae53e 100755 --- a/update_file_lists.sh +++ b/update_file_lists.sh @@ -55,8 +55,10 @@ LITE_PROTOS=$(get_proto_files $MAKEFILE protoc_lite_outputs) PROTOS=$(get_proto_files $MAKEFILE protoc_outputs) WKT_PROTOS=$(get_variable_value $MAKEFILE nobase_dist_proto_DATA) COMMON_TEST_SOURCES=$(get_source_files $MAKEFILE COMMON_TEST_SOURCES) +COMMON_LITE_TEST_SOURCES=$(get_source_files $MAKEFILE COMMON_LITE_TEST_SOURCES) TEST_SOURCES=$(get_source_files $MAKEFILE protobuf_test_SOURCES) LITE_TEST_SOURCES=$(get_source_files $MAKEFILE protobuf_lite_test_SOURCES) +LITE_ARENA_TEST_SOURCES=$(get_source_files $MAKEFILE protobuf_lite_arena_test_SOURCES) TEST_PLUGIN_SOURCES=$(get_source_files $MAKEFILE test_plugin_SOURCES) ################################################################################ @@ -112,8 +114,10 @@ set_cmake_value $CMAKE_DIR/libprotoc.cmake libprotoc_files $CMAKE_PREFIX $LIBPRO set_cmake_value $CMAKE_DIR/tests.cmake lite_test_protos "" $LITE_PROTOS set_cmake_value $CMAKE_DIR/tests.cmake tests_protos "" $PROTOS set_cmake_value $CMAKE_DIR/tests.cmake common_test_files $CMAKE_PREFIX $COMMON_TEST_SOURCES +set_cmake_value $CMAKE_DIR/tests.cmake common_lite_test_files $CMAKE_PREFIX $COMMON_LITE_TEST_SOURCES set_cmake_value $CMAKE_DIR/tests.cmake tests_files $CMAKE_PREFIX $TEST_SOURCES set_cmake_value $CMAKE_DIR/tests.cmake lite_test_files $CMAKE_PREFIX $LITE_TEST_SOURCES +set_cmake_value $CMAKE_DIR/tests.cmake lite_arena_test_files $CMAKE_PREFIX $LITE_ARENA_TEST_SOURCES # Generate extract_includes.bat echo "mkdir include" > $EXTRACT_INCLUDES_BAT