diff --git a/cmake/tests.cmake b/cmake/tests.cmake
index 921510873734e..ef4dfe0221acb 100644
--- a/cmake/tests.cmake
+++ b/cmake/tests.cmake
@@ -99,6 +99,12 @@ set(common_test_files
${protobuf_source_dir}/src/google/protobuf/testing/googletest.cc
)
+set(common_lite_test_files
+ ${protobuf_source_dir}/src/google/protobuf/arena_test_util.cc
+ ${protobuf_source_dir}/src/google/protobuf/map_lite_test_util.cc
+ ${protobuf_source_dir}/src/google/protobuf/test_util_lite.cc
+)
+
set(tests_files
${protobuf_source_dir}/src/google/protobuf/any_test.cc
${protobuf_source_dir}/src/google/protobuf/arena_unittest.cc
@@ -179,10 +185,13 @@ add_executable(test_plugin ${test_plugin_files})
target_link_libraries(test_plugin libprotoc libprotobuf gmock)
set(lite_test_files
- ${protobuf_source_dir}/src/google/protobuf/arena_test_util.cc
${protobuf_source_dir}/src/google/protobuf/lite_unittest.cc
- ${protobuf_source_dir}/src/google/protobuf/map_lite_test_util.cc
- ${protobuf_source_dir}/src/google/protobuf/test_util_lite.cc
)
-add_executable(lite-test ${lite_test_files} ${lite_test_proto_files})
+add_executable(lite-test ${lite_test_files} ${common_lite_test_files} ${lite_test_proto_files})
target_link_libraries(lite-test libprotobuf-lite)
+
+set(lite_arena_test_files
+ ${protobuf_source_dir}/src/google/protobuf/lite_arena_unittest.cc
+)
+add_executable(lite-arena-test ${lite_arena_test_files} ${common_lite_test_files} ${lite_test_proto_files})
+target_link_libraries(lite-arena-test libprotobuf-lite gmock_main)
diff --git a/java/pom.xml b/java/pom.xml
index fb7e4168e95f7..49099b4ae8f7e 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -142,6 +142,7 @@
+
diff --git a/java/src/test/java/com/google/protobuf/AnyTest.java b/java/src/test/java/com/google/protobuf/AnyTest.java
new file mode 100644
index 0000000000000..e169f69d2453b
--- /dev/null
+++ b/java/src/test/java/com/google/protobuf/AnyTest.java
@@ -0,0 +1,92 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package com.google.protobuf;
+
+import any_test.AnyTestProto.TestAny;
+import protobuf_unittest.UnittestProto.TestAllTypes;
+
+import junit.framework.TestCase;
+
+/**
+ * Unit tests for Any message.
+ */
+public class AnyTest extends TestCase {
+ public void testAnyGeneratedApi() throws Exception {
+ TestAllTypes.Builder builder = TestAllTypes.newBuilder();
+ TestUtil.setAllFields(builder);
+ TestAllTypes message = builder.build();
+
+ TestAny container = TestAny.newBuilder()
+ .setValue(Any.pack(message)).build();
+
+ assertTrue(container.getValue().is(TestAllTypes.class));
+ assertFalse(container.getValue().is(TestAny.class));
+
+ TestAllTypes result = container.getValue().unpack(TestAllTypes.class);
+ TestUtil.assertAllFieldsSet(result);
+
+
+ // Unpacking to a wrong type will throw an exception.
+ try {
+ TestAny wrongMessage = container.getValue().unpack(TestAny.class);
+ fail("Exception is expected.");
+ } catch (InvalidProtocolBufferException e) {
+ // expected.
+ }
+
+ // Test that unpacking throws an exception if parsing fails.
+ TestAny.Builder containerBuilder = container.toBuilder();
+ containerBuilder.getValueBuilder().setValue(
+ ByteString.copyFrom(new byte[]{0x11}));
+ container = containerBuilder.build();
+ try {
+ TestAllTypes parsingFailed = container.getValue().unpack(TestAllTypes.class);
+ fail("Exception is expected.");
+ } catch (InvalidProtocolBufferException e) {
+ // expected.
+ }
+ }
+
+ public void testCachedUnpackResult() throws Exception {
+ TestAllTypes.Builder builder = TestAllTypes.newBuilder();
+ TestUtil.setAllFields(builder);
+ TestAllTypes message = builder.build();
+
+ TestAny container = TestAny.newBuilder()
+ .setValue(Any.pack(message)).build();
+
+ assertTrue(container.getValue().is(TestAllTypes.class));
+
+ TestAllTypes result1 = container.getValue().unpack(TestAllTypes.class);
+ TestAllTypes result2 = container.getValue().unpack(TestAllTypes.class);
+ assertTrue(result1 == result2);
+ }
+}
diff --git a/java/src/test/java/com/google/protobuf/any_test.proto b/java/src/test/java/com/google/protobuf/any_test.proto
new file mode 100644
index 0000000000000..80173d8a0e422
--- /dev/null
+++ b/java/src/test/java/com/google/protobuf/any_test.proto
@@ -0,0 +1,42 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+syntax = "proto3";
+
+package any_test;
+
+option java_package = "any_test";
+option java_outer_classname = "AnyTestProto";
+
+import "google/protobuf/any.proto";
+
+message TestAny {
+ google.protobuf.Any value = 1;
+}
diff --git a/python/google/protobuf/pyext/descriptor.cc b/python/google/protobuf/pyext/descriptor.cc
index 8581f529e2bb3..3806643f11483 100644
--- a/python/google/protobuf/pyext/descriptor.cc
+++ b/python/google/protobuf/pyext/descriptor.cc
@@ -62,6 +62,14 @@ namespace google {
namespace protobuf {
namespace python {
+// Store interned descriptors, so that the same C++ descriptor yields the same
+// Python object. Objects are not immortal: this map does not own the
+// references, and items are deleted when the last reference to the object is
+// released.
+// This is enough to support the "is" operator on live objects.
+// All descriptors are stored here.
+hash_map interned_descriptors;
+
PyObject* PyString_FromCppString(const string& str) {
return PyString_FromStringAndSize(str.c_str(), str.size());
}
@@ -147,6 +155,24 @@ static int CheckCalledFromGeneratedFile(const char* attr_name) {
// Helper functions for descriptor objects.
+// A set of templates to retrieve the C++ FileDescriptor of any descriptor.
+template
+const FileDescriptor* GetFileDescriptor(const DescriptorClass* descriptor) {
+ return descriptor->file();
+}
+template<>
+const FileDescriptor* GetFileDescriptor(const FileDescriptor* descriptor) {
+ return descriptor;
+}
+template<>
+const FileDescriptor* GetFileDescriptor(const EnumValueDescriptor* descriptor) {
+ return descriptor->type()->file();
+}
+template<>
+const FileDescriptor* GetFileDescriptor(const OneofDescriptor* descriptor) {
+ return descriptor->containing_type()->file();
+}
+
// Converts options into a Python protobuf, and cache the result.
//
// This is a bit tricky because options can contain extension fields defined in
@@ -156,8 +182,13 @@ static int CheckCalledFromGeneratedFile(const char* attr_name) {
// Always returns a new reference.
template
static PyObject* GetOrBuildOptions(const DescriptorClass *descriptor) {
+ // Options (and their extensions) are completely resolved in the proto file
+ // containing the descriptor.
+ PyDescriptorPool* pool = GetDescriptorPool_FromPool(
+ GetFileDescriptor(descriptor)->pool());
+
hash_map* descriptor_options =
- GetDescriptorPool()->descriptor_options;
+ pool->descriptor_options;
// First search in the cache.
if (descriptor_options->find(descriptor) != descriptor_options->end()) {
PyObject *value = (*descriptor_options)[descriptor];
@@ -170,7 +201,7 @@ static PyObject* GetOrBuildOptions(const DescriptorClass *descriptor) {
const Message& options(descriptor->options());
const Descriptor *message_type = options.GetDescriptor();
PyObject* message_class(cdescriptor_pool::GetMessageClass(
- GetDescriptorPool(), message_type));
+ pool, message_type));
if (message_class == NULL) {
PyErr_Format(PyExc_TypeError, "Could not retrieve class for Options: %s",
message_type->full_name().c_str());
@@ -192,7 +223,7 @@ static PyObject* GetOrBuildOptions(const DescriptorClass *descriptor) {
options.SerializeToString(&serialized);
io::CodedInputStream input(
reinterpret_cast(serialized.c_str()), serialized.size());
- input.SetExtensionRegistry(GetDescriptorPool()->pool,
+ input.SetExtensionRegistry(pool->pool,
GetDescriptorPool()->message_factory);
bool success = cmsg->message->MergePartialFromCodedStream(&input);
if (!success) {
@@ -203,7 +234,7 @@ static PyObject* GetOrBuildOptions(const DescriptorClass *descriptor) {
// Cache the result.
Py_INCREF(value);
- (*GetDescriptorPool()->descriptor_options)[descriptor] = value.get();
+ (*pool->descriptor_options)[descriptor] = value.get();
return value.release();
}
@@ -237,6 +268,9 @@ typedef struct PyBaseDescriptor {
// Pointer to the C++ proto2 descriptor.
// Like all descriptors, it is owned by the global DescriptorPool.
const void* descriptor;
+
+ // Owned reference to the DescriptorPool, to ensure it is kept alive.
+ PyDescriptorPool* pool;
} PyBaseDescriptor;
@@ -258,7 +292,9 @@ namespace descriptor {
// 'was_created' is an optional pointer to a bool, and is set to true if a new
// object was allocated.
// Always return a new reference.
-PyObject* NewInternedDescriptor(PyTypeObject* type, const void* descriptor,
+template
+PyObject* NewInternedDescriptor(PyTypeObject* type,
+ const DescriptorClass* descriptor,
bool* was_created) {
if (was_created) {
*was_created = false;
@@ -270,8 +306,8 @@ PyObject* NewInternedDescriptor(PyTypeObject* type, const void* descriptor,
// See if the object is in the map of interned descriptors
hash_map::iterator it =
- GetDescriptorPool()->interned_descriptors->find(descriptor);
- if (it != GetDescriptorPool()->interned_descriptors->end()) {
+ interned_descriptors.find(descriptor);
+ if (it != interned_descriptors.end()) {
GOOGLE_DCHECK(Py_TYPE(it->second) == type);
Py_INCREF(it->second);
return it->second;
@@ -283,10 +319,21 @@ PyObject* NewInternedDescriptor(PyTypeObject* type, const void* descriptor,
return NULL;
}
py_descriptor->descriptor = descriptor;
+
// and cache it.
- GetDescriptorPool()->interned_descriptors->insert(
+ interned_descriptors.insert(
std::make_pair(descriptor, reinterpret_cast(py_descriptor)));
+ // Ensures that the DescriptorPool stays alive.
+ PyDescriptorPool* pool = GetDescriptorPool_FromPool(
+ GetFileDescriptor(descriptor)->pool());
+ if (pool == NULL) {
+ Py_DECREF(py_descriptor);
+ return NULL;
+ }
+ Py_INCREF(pool);
+ py_descriptor->pool = pool;
+
if (was_created) {
*was_created = true;
}
@@ -295,7 +342,8 @@ PyObject* NewInternedDescriptor(PyTypeObject* type, const void* descriptor,
static void Dealloc(PyBaseDescriptor* self) {
// Remove from interned dictionary
- GetDescriptorPool()->interned_descriptors->erase(self->descriptor);
+ interned_descriptors.erase(self->descriptor);
+ Py_CLEAR(self->pool);
Py_TYPE(self)->tp_free(reinterpret_cast(self));
}
diff --git a/python/google/protobuf/pyext/descriptor_pool.cc b/python/google/protobuf/pyext/descriptor_pool.cc
index d5ba2b6ffb9fd..7aed651d3f758 100644
--- a/python/google/protobuf/pyext/descriptor_pool.cc
+++ b/python/google/protobuf/pyext/descriptor_pool.cc
@@ -54,9 +54,13 @@ namespace google {
namespace protobuf {
namespace python {
+// A map to cache Python Pools per C++ pointer.
+// Pointers are not owned here, and belong to the PyDescriptorPool.
+static hash_map descriptor_pool_map;
+
namespace cdescriptor_pool {
-PyDescriptorPool* NewDescriptorPool() {
+static PyDescriptorPool* NewDescriptorPool() {
PyDescriptorPool* cdescriptor_pool = PyObject_New(
PyDescriptorPool, &PyDescriptorPool_Type);
if (cdescriptor_pool == NULL) {
@@ -77,22 +81,27 @@ PyDescriptorPool* NewDescriptorPool() {
// storage.
cdescriptor_pool->classes_by_descriptor =
new PyDescriptorPool::ClassesByMessageMap();
- cdescriptor_pool->interned_descriptors =
- new hash_map();
cdescriptor_pool->descriptor_options =
new hash_map();
+ if (!descriptor_pool_map.insert(
+ std::make_pair(cdescriptor_pool->pool, cdescriptor_pool)).second) {
+ // Should never happen -- would indicate an internal error / bug.
+ PyErr_SetString(PyExc_ValueError, "DescriptorPool already registered");
+ return NULL;
+ }
+
return cdescriptor_pool;
}
static void Dealloc(PyDescriptorPool* self) {
typedef PyDescriptorPool::ClassesByMessageMap::iterator iterator;
+ descriptor_pool_map.erase(self->pool);
for (iterator it = self->classes_by_descriptor->begin();
it != self->classes_by_descriptor->end(); ++it) {
Py_DECREF(it->second);
}
delete self->classes_by_descriptor;
- delete self->interned_descriptors; // its references were borrowed.
for (hash_map::iterator it =
self->descriptor_options->begin();
it != self->descriptor_options->end(); ++it) {
@@ -391,22 +400,43 @@ PyTypeObject PyDescriptorPool_Type = {
PyObject_Del, // tp_free
};
-static PyDescriptorPool* global_cdescriptor_pool = NULL;
+// This is the DescriptorPool which contains all the definitions from the
+// generated _pb2.py modules.
+static PyDescriptorPool* python_generated_pool = NULL;
bool InitDescriptorPool() {
if (PyType_Ready(&PyDescriptorPool_Type) < 0)
return false;
- global_cdescriptor_pool = cdescriptor_pool::NewDescriptorPool();
- if (global_cdescriptor_pool == NULL) {
+ python_generated_pool = cdescriptor_pool::NewDescriptorPool();
+ if (python_generated_pool == NULL) {
return false;
}
+ // Register this pool to be found for C++-generated descriptors.
+ descriptor_pool_map.insert(
+ std::make_pair(DescriptorPool::generated_pool(),
+ python_generated_pool));
return true;
}
PyDescriptorPool* GetDescriptorPool() {
- return global_cdescriptor_pool;
+ return python_generated_pool;
+}
+
+PyDescriptorPool* GetDescriptorPool_FromPool(const DescriptorPool* pool) {
+ // Fast path for standard descriptors.
+ if (pool == python_generated_pool->pool ||
+ pool == DescriptorPool::generated_pool()) {
+ return python_generated_pool;
+ }
+ hash_map::iterator it =
+ descriptor_pool_map.find(pool);
+ if (it != descriptor_pool_map.end()) {
+ PyErr_SetString(PyExc_KeyError, "Unknown descriptor pool");
+ return NULL;
+ }
+ return it->second;
}
} // namespace python
diff --git a/python/google/protobuf/pyext/descriptor_pool.h b/python/google/protobuf/pyext/descriptor_pool.h
index 6f6c5cdb5252d..541d920bc820b 100644
--- a/python/google/protobuf/pyext/descriptor_pool.h
+++ b/python/google/protobuf/pyext/descriptor_pool.h
@@ -72,14 +72,6 @@ typedef struct PyDescriptorPool {
typedef hash_map ClassesByMessageMap;
ClassesByMessageMap* classes_by_descriptor;
- // Store interned descriptors, so that the same C++ descriptor yields the same
- // Python object. Objects are not immortal: this map does not own the
- // references, and items are deleted when the last reference to the object is
- // released.
- // This is enough to support the "is" operator on live objects.
- // All descriptors are stored here.
- hash_map* interned_descriptors;
-
// Cache the options for any kind of descriptor.
// Descriptor pointers are owned by the DescriptorPool above.
// Python objects are owned by the map.
@@ -91,9 +83,6 @@ extern PyTypeObject PyDescriptorPool_Type;
namespace cdescriptor_pool {
-// Builds a new DescriptorPool. Normally called only once per process.
-PyDescriptorPool* NewDescriptorPool();
-
// Looks up a message by name.
// Returns a message Descriptor, or NULL if not found.
const Descriptor* FindMessageTypeByName(PyDescriptorPool* self,
@@ -150,6 +139,10 @@ PyObject* FindOneofByName(PyDescriptorPool* self, PyObject* arg);
// Returns a *borrowed* reference.
PyDescriptorPool* GetDescriptorPool();
+// Retrieve the python descriptor pool owning a C++ descriptor pool.
+// Returns a *borrowed* reference.
+PyDescriptorPool* GetDescriptorPool_FromPool(const DescriptorPool* pool);
+
// Initialize objects used by this module.
bool InitDescriptorPool();
diff --git a/src/Makefile.am b/src/Makefile.am
index 4c8919d705ce2..caae2933e494a 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -191,6 +191,7 @@ libprotobuf_lite_la_SOURCES = \
google/protobuf/stubs/stringpiece.h \
google/protobuf/stubs/stringprintf.cc \
google/protobuf/stubs/stringprintf.h \
+ google/protobuf/stubs/structurally_valid.cc \
google/protobuf/stubs/strutil.cc \
google/protobuf/stubs/strutil.h \
google/protobuf/stubs/time.cc \
@@ -232,7 +233,6 @@ libprotobuf_la_SOURCES = \
google/protobuf/service.cc \
google/protobuf/source_context.pb.cc \
google/protobuf/struct.pb.cc \
- google/protobuf/stubs/structurally_valid.cc \
google/protobuf/stubs/substitute.cc \
google/protobuf/stubs/substitute.h \
google/protobuf/text_format.cc \
@@ -659,7 +659,8 @@ COMMON_TEST_SOURCES = \
google/protobuf/testing/file.h
check_PROGRAMS = protoc protobuf-test protobuf-lazy-descriptor-test \
- protobuf-lite-test test_plugin $(GZCHECKPROGRAMS)
+ protobuf-lite-test test_plugin protobuf-lite-arena-test \
+ $(GZCHECKPROGRAMS)
protobuf_test_LDADD = $(PTHREAD_LIBS) libprotobuf.la libprotoc.la \
../gmock/gtest/lib/libgtest.la \
../gmock/lib/libgmock.la \
@@ -756,21 +757,40 @@ protobuf_lazy_descriptor_test_SOURCES = \
$(COMMON_TEST_SOURCES)
nodist_protobuf_lazy_descriptor_test_SOURCES = $(protoc_outputs)
-# Build lite_unittest separately, since it doesn't use gtest.
-protobuf_lite_test_LDADD = $(PTHREAD_LIBS) libprotobuf-lite.la
-protobuf_lite_test_CXXFLAGS = $(NO_OPT_CXXFLAGS)
-protobuf_lite_test_SOURCES = \
+COMMON_LITE_TEST_SOURCES = \
google/protobuf/arena_test_util.cc \
google/protobuf/arena_test_util.h \
- google/protobuf/lite_unittest.cc \
google/protobuf/map_lite_test_util.cc \
google/protobuf/map_lite_test_util.h \
google/protobuf/test_util_lite.cc \
google/protobuf/test_util_lite.h
- # TODO(teboring) add the file back and make the test build.
- # google/protobuf/map_lite_test.cc
+
+# Build lite_unittest separately, since it doesn't use gtest. It can't
+# depend on gtest because our internal version of gtest depend on proto
+# full runtime and we want to make sure this test builds without full
+# runtime.
+protobuf_lite_test_LDADD = $(PTHREAD_LIBS) libprotobuf-lite.la
+protobuf_lite_test_CXXFLAGS = $(NO_OPT_CXXFLAGS)
+protobuf_lite_test_SOURCES = \
+ google/protobuf/lite_unittest.cc \
+ $(COMMON_LITE_TEST_SOURCES)
nodist_protobuf_lite_test_SOURCES = $(protoc_lite_outputs)
+# lite_arena_unittest depends on gtest because teboring@ found that without
+# gtest when building the test internally our memory sanitizer doesn't detect
+# memory leaks (don't know why).
+protobuf_lite_arena_test_LDADD = $(PTHREAD_LIBS) libprotobuf-lite.la \
+ ../gmock/gtest/lib/libgtest.la \
+ ../gmock/lib/libgmock.la \
+ ../gmock/lib/libgmock_main.la
+protobuf_lite_arena_test_CPPFLAGS = -I$(srcdir)/../gmock/include \
+ -I$(srcdir)/../gmock/gtest/include
+protobuf_lite_arena_test_CXXFLAGS = $(NO_OPT_CXXFLAGS)
+protobuf_lite_arena_test_SOURCES = \
+ google/protobuf/lite_arena_unittest.cc \
+ $(COMMON_LITE_TEST_SOURCES)
+nodist_protobuf_lite_arena_test_SOURCES = $(protoc_lite_outputs)
+
# Test plugin binary.
test_plugin_LDADD = $(PTHREAD_LIBS) libprotobuf.la libprotoc.la \
../gmock/gtest/lib/libgtest.la
@@ -790,4 +810,5 @@ zcgunzip_SOURCES = google/protobuf/testing/zcgunzip.cc
endif
TESTS = protobuf-test protobuf-lazy-descriptor-test protobuf-lite-test \
- google/protobuf/compiler/zip_output_unittest.sh $(GZTESTS)
+ google/protobuf/compiler/zip_output_unittest.sh $(GZTESTS) \
+ protobuf-lite-arena-test
diff --git a/src/google/protobuf/compiler/cpp/cpp_helpers.cc b/src/google/protobuf/compiler/cpp/cpp_helpers.cc
index 678a995aeb74a..09845458b9ae0 100644
--- a/src/google/protobuf/compiler/cpp/cpp_helpers.cc
+++ b/src/google/protobuf/compiler/cpp/cpp_helpers.cc
@@ -600,6 +600,94 @@ bool IsAnyMessage(const Descriptor* descriptor) {
descriptor->file()->name() == kAnyProtoFile;
}
+enum Utf8CheckMode {
+ STRICT = 0, // Parsing will fail if non UTF-8 data is in string fields.
+ VERIFY = 1, // Only log an error but parsing will succeed.
+ NONE = 2, // No UTF-8 check.
+};
+
+// Which level of UTF-8 enforcemant is placed on this file.
+static Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field) {
+ if (field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3) {
+ return STRICT;
+ } else if (field->file()->options().optimize_for() !=
+ FileOptions::LITE_RUNTIME) {
+ return VERIFY;
+ } else {
+ return NONE;
+ }
+}
+
+static void GenerateUtf8CheckCode(const FieldDescriptor* field,
+ bool for_parse,
+ const map& variables,
+ const char* parameters,
+ const char* strict_function,
+ const char* verify_function,
+ io::Printer* printer) {
+ switch (GetUtf8CheckMode(field)) {
+ case STRICT: {
+ if (for_parse) {
+ printer->Print("DO_(");
+ }
+ printer->Print(
+ "::google::protobuf::internal::WireFormatLite::$function$(\n",
+ "function", strict_function);
+ printer->Indent();
+ printer->Print(variables, parameters);
+ if (for_parse) {
+ printer->Print("::google::protobuf::internal::WireFormatLite::PARSE,\n");
+ } else {
+ printer->Print("::google::protobuf::internal::WireFormatLite::SERIALIZE,\n");
+ }
+ printer->Print("\"$full_name$\")", "full_name", field->full_name());
+ if (for_parse) {
+ printer->Print(")");
+ }
+ printer->Print(";\n");
+ printer->Outdent();
+ break;
+ }
+ case VERIFY: {
+ printer->Print(
+ "::google::protobuf::internal::WireFormat::$function$(\n",
+ "function", verify_function);
+ printer->Indent();
+ printer->Print(variables, parameters);
+ if (for_parse) {
+ printer->Print("::google::protobuf::internal::WireFormat::PARSE,\n");
+ } else {
+ printer->Print("::google::protobuf::internal::WireFormat::SERIALIZE,\n");
+ }
+ printer->Print("\"$full_name$\");\n", "full_name", field->full_name());
+ printer->Outdent();
+ break;
+ }
+ case NONE:
+ break;
+ }
+}
+
+void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
+ bool for_parse,
+ const map& variables,
+ const char* parameters,
+ io::Printer* printer) {
+ GenerateUtf8CheckCode(field, for_parse, variables, parameters,
+ "VerifyUtf8String", "VerifyUTF8StringNamedField",
+ printer);
+}
+
+void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
+ bool for_parse,
+ const map& variables,
+ const char* parameters,
+ io::Printer* printer) {
+ GenerateUtf8CheckCode(field, for_parse, variables, parameters,
+ "VerifyUtf8Cord", "VerifyUTF8CordNamedField",
+ printer);
+}
+
} // namespace cpp
} // namespace compiler
} // namespace protobuf
diff --git a/src/google/protobuf/compiler/cpp/cpp_helpers.h b/src/google/protobuf/compiler/cpp/cpp_helpers.h
index 29c1f90bf71e9..985cb04cae330 100644
--- a/src/google/protobuf/compiler/cpp/cpp_helpers.h
+++ b/src/google/protobuf/compiler/cpp/cpp_helpers.h
@@ -202,11 +202,6 @@ inline bool HasGenericServices(const FileDescriptor* file) {
file->options().cc_generic_services();
}
-// Should string fields in this file verify that their contents are UTF-8?
-inline bool HasUtf8Verification(const FileDescriptor* file) {
- return file->options().optimize_for() != FileOptions::LITE_RUNTIME;
-}
-
// Should we generate a separate, super-optimized code path for serializing to
// flat arrays? We don't do this in Lite mode because we'd rather reduce code
// size.
@@ -270,6 +265,20 @@ inline bool SupportsArenas(const FieldDescriptor* field) {
bool IsAnyMessage(const FileDescriptor* descriptor);
bool IsAnyMessage(const Descriptor* descriptor);
+void GenerateUtf8CheckCodeForString(
+ const FieldDescriptor* field,
+ bool for_parse,
+ const map& variables,
+ const char* parameters,
+ io::Printer* printer);
+
+void GenerateUtf8CheckCodeForCord(
+ const FieldDescriptor* field,
+ bool for_parse,
+ const map& variables,
+ const char* parameters,
+ io::Printer* printer);
+
} // namespace cpp
} // namespace compiler
} // namespace protobuf
diff --git a/src/google/protobuf/compiler/cpp/cpp_map_field.cc b/src/google/protobuf/compiler/cpp/cpp_map_field.cc
index a14d8986395d5..25acc61bedccf 100644
--- a/src/google/protobuf/compiler/cpp/cpp_map_field.cc
+++ b/src/google/protobuf/compiler/cpp/cpp_map_field.cc
@@ -234,6 +234,20 @@ GenerateMergeFromCodedStream(io::Printer* printer) const {
"}\n");
}
+ const FieldDescriptor* key_field =
+ descriptor_->message_type()->FindFieldByName("key");
+ if (key_field->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ key_field, true, variables_,
+ "entry->key().data(), entry->key().length(),\n", printer);
+ }
+ if (value_field->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ value_field, true, variables_,
+ "entry->mutable_value()->data(),\n"
+ "entry->mutable_value()->length(),\n", printer);
+ }
+
// If entry is allocated by arena, its desctructor should be avoided.
if (SupportsArenas(descriptor_)) {
printer->Print(variables_,
@@ -261,7 +275,30 @@ GenerateSerializeWithCachedSizes(io::Printer* printer) const {
printer->Print(variables_,
" entry.reset($name$_.New$wrapper$(it->first, it->second));\n"
" ::google::protobuf::internal::WireFormatLite::Write$stream_writer$(\n"
- " $number$, *entry, output);\n"
+ " $number$, *entry, output);\n");
+
+ printer->Indent();
+ printer->Indent();
+
+ const FieldDescriptor* key_field =
+ descriptor_->message_type()->FindFieldByName("key");
+ const FieldDescriptor* value_field =
+ descriptor_->message_type()->FindFieldByName("value");
+ if (key_field->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ key_field, false, variables_,
+ "it->first.data(), it->first.length(),\n", printer);
+ }
+ if (value_field->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ value_field, false, variables_,
+ "it->second.data(), it->second.length(),\n", printer);
+ }
+
+ printer->Outdent();
+ printer->Outdent();
+
+ printer->Print(
" }\n");
// If entry is allocated by arena, its desctructor should be avoided.
@@ -296,7 +333,29 @@ GenerateSerializeWithCachedSizesToArray(io::Printer* printer) const {
" entry.reset($name$_.New$wrapper$(it->first, it->second));\n"
" target = ::google::protobuf::internal::WireFormatLite::\n"
" Write$declared_type$NoVirtualToArray(\n"
- " $number$, *entry, target);\n"
+ " $number$, *entry, target);\n");
+
+ printer->Indent();
+ printer->Indent();
+
+ const FieldDescriptor* key_field =
+ descriptor_->message_type()->FindFieldByName("key");
+ const FieldDescriptor* value_field =
+ descriptor_->message_type()->FindFieldByName("value");
+ if (key_field->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ key_field, false, variables_,
+ "it->first.data(), it->first.length(),\n", printer);
+ }
+ if (value_field->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ value_field, false, variables_,
+ "it->second.data(), it->second.length(),\n", printer);
+ }
+
+ printer->Outdent();
+ printer->Outdent();
+ printer->Print(
" }\n");
// If entry is allocated by arena, its desctructor should be avoided.
diff --git a/src/google/protobuf/compiler/cpp/cpp_string_field.cc b/src/google/protobuf/compiler/cpp/cpp_string_field.cc
index d1af6ddaf7f13..6b0821a6c1772 100644
--- a/src/google/protobuf/compiler/cpp/cpp_string_field.cc
+++ b/src/google/protobuf/compiler/cpp/cpp_string_field.cc
@@ -367,25 +367,19 @@ GenerateMergeFromCodedStream(io::Printer* printer) const {
"DO_(::google::protobuf::internal::WireFormatLite::Read$declared_type$(\n"
" input, this->mutable_$name$()));\n");
- if (HasUtf8Verification(descriptor_->file()) &&
- descriptor_->type() == FieldDescriptor::TYPE_STRING) {
- printer->Print(variables_,
- "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n"
- " this->$name$().data(), this->$name$().length(),\n"
- " ::google::protobuf::internal::WireFormat::PARSE,\n"
- " \"$full_name$\");\n");
+ if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ descriptor_, true, variables_,
+ "this->$name$().data(), this->$name$().length(),\n", printer);
}
}
void StringFieldGenerator::
GenerateSerializeWithCachedSizes(io::Printer* printer) const {
- if (HasUtf8Verification(descriptor_->file()) &&
- descriptor_->type() == FieldDescriptor::TYPE_STRING) {
- printer->Print(variables_,
- "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n"
- " this->$name$().data(), this->$name$().length(),\n"
- " ::google::protobuf::internal::WireFormat::SERIALIZE,\n"
- " \"$full_name$\");\n");
+ if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ descriptor_, false, variables_,
+ "this->$name$().data(), this->$name$().length(),\n", printer);
}
printer->Print(variables_,
"::google::protobuf::internal::WireFormatLite::Write$declared_type$MaybeAliased(\n"
@@ -394,13 +388,10 @@ GenerateSerializeWithCachedSizes(io::Printer* printer) const {
void StringFieldGenerator::
GenerateSerializeWithCachedSizesToArray(io::Printer* printer) const {
- if (HasUtf8Verification(descriptor_->file()) &&
- descriptor_->type() == FieldDescriptor::TYPE_STRING) {
- printer->Print(variables_,
- "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n"
- " this->$name$().data(), this->$name$().length(),\n"
- " ::google::protobuf::internal::WireFormat::SERIALIZE,\n"
- " \"$full_name$\");\n");
+ if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ descriptor_, false, variables_,
+ "this->$name$().data(), this->$name$().length(),\n", printer);
}
printer->Print(variables_,
"target =\n"
@@ -665,13 +656,10 @@ GenerateMergeFromCodedStream(io::Printer* printer) const {
"DO_(::google::protobuf::internal::WireFormatLite::Read$declared_type$(\n"
" input, this->mutable_$name$()));\n");
- if (HasUtf8Verification(descriptor_->file()) &&
- descriptor_->type() == FieldDescriptor::TYPE_STRING) {
- printer->Print(variables_,
- "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n"
- " this->$name$().data(), this->$name$().length(),\n"
- " ::google::protobuf::internal::WireFormat::PARSE,\n"
- " \"$full_name$\");\n");
+ if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ descriptor_, true, variables_,
+ "this->$name$().data(), this->$name$().length(),\n", printer);
}
}
@@ -817,14 +805,12 @@ GenerateMergeFromCodedStream(io::Printer* printer) const {
printer->Print(variables_,
"DO_(::google::protobuf::internal::WireFormatLite::Read$declared_type$(\n"
" input, this->add_$name$()));\n");
- if (HasUtf8Verification(descriptor_->file()) &&
- descriptor_->type() == FieldDescriptor::TYPE_STRING) {
- printer->Print(variables_,
- "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n"
- " this->$name$(this->$name$_size() - 1).data(),\n"
- " this->$name$(this->$name$_size() - 1).length(),\n"
- " ::google::protobuf::internal::WireFormat::PARSE,\n"
- " \"$full_name$\");\n");
+ if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ descriptor_, true, variables_,
+ "this->$name$(this->$name$_size() - 1).data(),\n"
+ "this->$name$(this->$name$_size() - 1).length(),\n",
+ printer);
}
}
@@ -832,14 +818,13 @@ void RepeatedStringFieldGenerator::
GenerateSerializeWithCachedSizes(io::Printer* printer) const {
printer->Print(variables_,
"for (int i = 0; i < this->$name$_size(); i++) {\n");
- if (HasUtf8Verification(descriptor_->file()) &&
- descriptor_->type() == FieldDescriptor::TYPE_STRING) {
- printer->Print(variables_,
- "::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n"
- " this->$name$(i).data(), this->$name$(i).length(),\n"
- " ::google::protobuf::internal::WireFormat::SERIALIZE,\n"
- " \"$full_name$\");\n");
+ printer->Indent();
+ if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ descriptor_, false, variables_,
+ "this->$name$(i).data(), this->$name$(i).length(),\n", printer);
}
+ printer->Outdent();
printer->Print(variables_,
" ::google::protobuf::internal::WireFormatLite::Write$declared_type$(\n"
" $number$, this->$name$(i), output);\n"
@@ -850,14 +835,13 @@ void RepeatedStringFieldGenerator::
GenerateSerializeWithCachedSizesToArray(io::Printer* printer) const {
printer->Print(variables_,
"for (int i = 0; i < this->$name$_size(); i++) {\n");
- if (HasUtf8Verification(descriptor_->file()) &&
- descriptor_->type() == FieldDescriptor::TYPE_STRING) {
- printer->Print(variables_,
- " ::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(\n"
- " this->$name$(i).data(), this->$name$(i).length(),\n"
- " ::google::protobuf::internal::WireFormat::SERIALIZE,\n"
- " \"$full_name$\");\n");
+ printer->Indent();
+ if (descriptor_->type() == FieldDescriptor::TYPE_STRING) {
+ GenerateUtf8CheckCodeForString(
+ descriptor_, false, variables_,
+ "this->$name$(i).data(), this->$name$(i).length(),\n", printer);
}
+ printer->Outdent();
printer->Print(variables_,
" target = ::google::protobuf::internal::WireFormatLite::\n"
" Write$declared_type$ToArray($number$, this->$name$(i), target);\n"
diff --git a/src/google/protobuf/compiler/parser.cc b/src/google/protobuf/compiler/parser.cc
index 895ff34a10a6b..4d0184252bd72 100644
--- a/src/google/protobuf/compiler/parser.cc
+++ b/src/google/protobuf/compiler/parser.cc
@@ -939,6 +939,42 @@ void Parser::GenerateMapEntry(const MapField& map_field,
} else {
value_field->set_type_name(map_field.value_type_name);
}
+ // Propagate the "enforce_utf8" option to key and value fields if they
+ // are strings. This helps simplify the implementation of code generators
+ // and also reflection-based parsing code.
+ //
+ // The following definition:
+ // message Foo {
+ // map value = 1 [enforce_utf8 = false];
+ // }
+ // will be interpreted as:
+ // message Foo {
+ // message ValueEntry {
+ // option map_entry = true;
+ // string key = 1 [enforce_utf8 = false];
+ // string value = 2 [enforce_utf8 = false];
+ // }
+ // repeated ValueEntry value = 1 [enforce_utf8 = false];
+ // }
+ //
+ // TODO(xiaofeng): Remove this when the "enforce_utf8" option is removed
+ // from protocol compiler.
+ for (int i = 0; i < field->options().uninterpreted_option_size(); ++i) {
+ const UninterpretedOption& option =
+ field->options().uninterpreted_option(i);
+ if (option.name_size() == 1 &&
+ option.name(0).name_part() == "enforce_utf8" &&
+ !option.name(0).is_extension()) {
+ if (key_field->type() == FieldDescriptorProto::TYPE_STRING) {
+ key_field->mutable_options()->add_uninterpreted_option()
+ ->CopyFrom(option);
+ }
+ if (value_field->type() == FieldDescriptorProto::TYPE_STRING) {
+ value_field->mutable_options()->add_uninterpreted_option()
+ ->CopyFrom(option);
+ }
+ }
+ }
}
bool Parser::ParseFieldOptions(FieldDescriptorProto* field,
diff --git a/src/google/protobuf/lite_arena_unittest.cc b/src/google/protobuf/lite_arena_unittest.cc
new file mode 100644
index 0000000000000..f0bee880aa669
--- /dev/null
+++ b/src/google/protobuf/lite_arena_unittest.cc
@@ -0,0 +1,83 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include
+#include
+#include
+#include
+
+namespace google {
+namespace protobuf {
+namespace {
+
+TEST(LiteArenaTest, MapNoHeapAllocation) {
+ // Allocate a large initial block to avoid mallocs during hooked test.
+ std::vector arena_block(128 * 1024);
+ google::protobuf::ArenaOptions options;
+ options.initial_block = &arena_block[0];
+ options.initial_block_size = arena_block.size();
+ google::protobuf::Arena arena(options);
+ string data;
+ data.reserve(128 * 1024);
+
+ {
+ // TODO(teboring): Enable no heap check when ArenaStringPtr is used in
+ // Map.
+ // google::protobuf::internal::NoHeapChecker no_heap;
+
+ protobuf_unittest::TestArenaMapLite* from =
+ google::protobuf::Arena::CreateMessage(&arena);
+ google::protobuf::MapLiteTestUtil::SetArenaMapFields(from);
+ from->SerializeToString(&data);
+
+ protobuf_unittest::TestArenaMapLite* to =
+ google::protobuf::Arena::CreateMessage(&arena);
+ to->ParseFromString(data);
+ google::protobuf::MapLiteTestUtil::ExpectArenaMapFieldsSet(*to);
+ }
+}
+
+TEST(LiteArenaTest, UnknownFieldMemLeak) {
+ google::protobuf::Arena arena;
+ protobuf_unittest::ForeignMessageArenaLite* message =
+ google::protobuf::Arena::CreateMessage(
+ &arena);
+ string data = "\012\000";
+ int original_capacity = data.capacity();
+ while (data.capacity() <= original_capacity) {
+ data.append("a");
+ }
+ data[1] = data.size() - 2;
+ message->ParseFromString(data);
+}
+
+} // namespace
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/wire_format.cc b/src/google/protobuf/wire_format.cc
index 8fbd116b33e36..5ee4e25d4935b 100644
--- a/src/google/protobuf/wire_format.cc
+++ b/src/google/protobuf/wire_format.cc
@@ -461,6 +461,10 @@ bool WireFormat::ParseAndMergeMessageSetField(uint32 field_number,
}
}
+static bool StrictUtf8Check(const FieldDescriptor* field) {
+ return field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3;
+}
+
bool WireFormat::ParseAndMergeField(
uint32 tag,
const FieldDescriptor* field, // May be NULL for unknown
@@ -633,10 +637,19 @@ bool WireFormat::ParseAndMergeField(
// Handle strings separately so that we can optimize the ctype=CORD case.
case FieldDescriptor::TYPE_STRING: {
+ bool strict_utf8_check = StrictUtf8Check(field);
string value;
if (!WireFormatLite::ReadString(input, &value)) return false;
- VerifyUTF8StringNamedField(value.data(), value.length(), PARSE,
- field->name().c_str());
+ if (strict_utf8_check) {
+ if (!WireFormatLite::VerifyUtf8String(
+ value.data(), value.length(), WireFormatLite::PARSE,
+ field->full_name().c_str())) {
+ return false;
+ }
+ } else {
+ VerifyUTF8StringNamedField(value.data(), value.length(), PARSE,
+ field->full_name().c_str());
+ }
if (field->is_repeated()) {
message_reflection->AddString(message, field, value);
} else {
@@ -894,13 +907,20 @@ void WireFormat::SerializeFieldWithCachedSizes(
// Handle strings separately so that we can get string references
// instead of copying.
case FieldDescriptor::TYPE_STRING: {
+ bool strict_utf8_check = StrictUtf8Check(field);
string scratch;
const string& value = field->is_repeated() ?
message_reflection->GetRepeatedStringReference(
message, field, j, &scratch) :
message_reflection->GetStringReference(message, field, &scratch);
- VerifyUTF8StringNamedField(value.data(), value.length(), SERIALIZE,
- field->name().c_str());
+ if (strict_utf8_check) {
+ WireFormatLite::VerifyUtf8String(value.data(), value.length(),
+ WireFormatLite::SERIALIZE,
+ field->full_name().c_str());
+ } else {
+ VerifyUTF8StringNamedField(value.data(), value.length(), SERIALIZE,
+ field->full_name().c_str());
+ }
WireFormatLite::WriteString(field->number(), value, output);
break;
}
@@ -1108,34 +1128,6 @@ int WireFormat::MessageSetItemByteSize(
return our_size;
}
-void WireFormat::VerifyUTF8StringFallback(const char* data,
- int size,
- Operation op,
- const char* field_name) {
- if (!IsStructurallyValidUTF8(data, size)) {
- const char* operation_str = NULL;
- switch (op) {
- case PARSE:
- operation_str = "parsing";
- break;
- case SERIALIZE:
- operation_str = "serializing";
- break;
- // no default case: have the compiler warn if a case is not covered.
- }
- string quoted_field_name = "";
- if (field_name != NULL) {
- quoted_field_name = StringPrintf(" '%s'", field_name);
- }
- // no space below to avoid double space when the field name is missing.
- GOOGLE_LOG(ERROR) << "String field" << quoted_field_name << " contains invalid "
- << "UTF-8 data when " << operation_str << " a protocol "
- << "buffer. Use the 'bytes' type if you intend to send raw "
- << "bytes. ";
- }
-}
-
-
} // namespace internal
} // namespace protobuf
} // namespace google
diff --git a/src/google/protobuf/wire_format.h b/src/google/protobuf/wire_format.h
index 84270fee7e907..941be75ba1e2a 100644
--- a/src/google/protobuf/wire_format.h
+++ b/src/google/protobuf/wire_format.h
@@ -231,8 +231,8 @@ class LIBPROTOBUF_EXPORT WireFormat {
const Message& message);
enum Operation {
- PARSE,
- SERIALIZE,
+ PARSE = 0,
+ SERIALIZE = 1,
};
// Verifies that a string field is valid UTF8, logging an error if not.
@@ -247,13 +247,6 @@ class LIBPROTOBUF_EXPORT WireFormat {
const char* field_name);
private:
- // Verifies that a string field is valid UTF8, logging an error if not.
- static void VerifyUTF8StringFallback(
- const char* data,
- int size,
- Operation op,
- const char* field_name);
-
// Skip a MessageSet field.
static bool SkipMessageSetField(io::CodedInputStream* input,
uint32 field_number,
@@ -265,8 +258,6 @@ class LIBPROTOBUF_EXPORT WireFormat {
Message* message,
io::CodedInputStream* input);
-
-
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(WireFormat);
};
@@ -321,7 +312,8 @@ inline int WireFormat::TagSize(int field_number, FieldDescriptor::Type type) {
inline void WireFormat::VerifyUTF8String(const char* data, int size,
WireFormat::Operation op) {
#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
- WireFormat::VerifyUTF8StringFallback(data, size, op, NULL);
+ WireFormatLite::VerifyUtf8String(
+ data, size, static_cast(op), NULL);
#else
// Avoid the compiler warning about unsued variables.
(void)data; (void)size; (void)op;
@@ -332,7 +324,8 @@ inline void WireFormat::VerifyUTF8StringNamedField(
const char* data, int size, WireFormat::Operation op,
const char* field_name) {
#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED
- WireFormat::VerifyUTF8StringFallback(data, size, op, field_name);
+ WireFormatLite::VerifyUtf8String(
+ data, size, static_cast(op), field_name);
#endif
}
diff --git a/src/google/protobuf/wire_format_lite.cc b/src/google/protobuf/wire_format_lite.cc
index dade41afd547b..847e35007a0d2 100644
--- a/src/google/protobuf/wire_format_lite.cc
+++ b/src/google/protobuf/wire_format_lite.cc
@@ -39,10 +39,12 @@
#include
#include
#include
+#include
#include
#include
#include
+
namespace google {
namespace protobuf {
namespace internal {
@@ -505,6 +507,35 @@ bool WireFormatLite::ReadBytes(io::CodedInputStream* input, string** p) {
return ReadBytesToString(input, *p);
}
+bool WireFormatLite::VerifyUtf8String(const char* data,
+ int size,
+ Operation op,
+ const char* field_name) {
+ if (!IsStructurallyValidUTF8(data, size)) {
+ const char* operation_str = NULL;
+ switch (op) {
+ case PARSE:
+ operation_str = "parsing";
+ break;
+ case SERIALIZE:
+ operation_str = "serializing";
+ break;
+ // no default case: have the compiler warn if a case is not covered.
+ }
+ string quoted_field_name = "";
+ if (field_name != NULL) {
+ quoted_field_name = StringPrintf(" '%s'", field_name);
+ }
+ // no space below to avoid double space when the field name is missing.
+ GOOGLE_LOG(ERROR) << "String field" << quoted_field_name << " contains invalid "
+ << "UTF-8 data when " << operation_str << " a protocol "
+ << "buffer. Use the 'bytes' type if you intend to send raw "
+ << "bytes. ";
+ return false;
+ }
+ return true;
+}
+
} // namespace internal
} // namespace protobuf
} // namespace google
diff --git a/src/google/protobuf/wire_format_lite.h b/src/google/protobuf/wire_format_lite.h
index 19fbc2c591451..55fc7ecdfc580 100644
--- a/src/google/protobuf/wire_format_lite.h
+++ b/src/google/protobuf/wire_format_lite.h
@@ -316,6 +316,16 @@ class LIBPROTOBUF_EXPORT WireFormatLite {
static bool ReadBytes(input, string** p);
+ enum Operation {
+ PARSE = 0,
+ SERIALIZE = 1,
+ };
+
+ // Returns true if the data is valid UTF-8.
+ static bool VerifyUtf8String(const char* data, int size,
+ Operation op,
+ const char* field_name);
+
static inline bool ReadGroup (field_number, input, MessageLite* value);
static inline bool ReadMessage(input, MessageLite* value);
diff --git a/update_file_lists.sh b/update_file_lists.sh
index 206be4399c534..bc2911d2ae53e 100755
--- a/update_file_lists.sh
+++ b/update_file_lists.sh
@@ -55,8 +55,10 @@ LITE_PROTOS=$(get_proto_files $MAKEFILE protoc_lite_outputs)
PROTOS=$(get_proto_files $MAKEFILE protoc_outputs)
WKT_PROTOS=$(get_variable_value $MAKEFILE nobase_dist_proto_DATA)
COMMON_TEST_SOURCES=$(get_source_files $MAKEFILE COMMON_TEST_SOURCES)
+COMMON_LITE_TEST_SOURCES=$(get_source_files $MAKEFILE COMMON_LITE_TEST_SOURCES)
TEST_SOURCES=$(get_source_files $MAKEFILE protobuf_test_SOURCES)
LITE_TEST_SOURCES=$(get_source_files $MAKEFILE protobuf_lite_test_SOURCES)
+LITE_ARENA_TEST_SOURCES=$(get_source_files $MAKEFILE protobuf_lite_arena_test_SOURCES)
TEST_PLUGIN_SOURCES=$(get_source_files $MAKEFILE test_plugin_SOURCES)
################################################################################
@@ -112,8 +114,10 @@ set_cmake_value $CMAKE_DIR/libprotoc.cmake libprotoc_files $CMAKE_PREFIX $LIBPRO
set_cmake_value $CMAKE_DIR/tests.cmake lite_test_protos "" $LITE_PROTOS
set_cmake_value $CMAKE_DIR/tests.cmake tests_protos "" $PROTOS
set_cmake_value $CMAKE_DIR/tests.cmake common_test_files $CMAKE_PREFIX $COMMON_TEST_SOURCES
+set_cmake_value $CMAKE_DIR/tests.cmake common_lite_test_files $CMAKE_PREFIX $COMMON_LITE_TEST_SOURCES
set_cmake_value $CMAKE_DIR/tests.cmake tests_files $CMAKE_PREFIX $TEST_SOURCES
set_cmake_value $CMAKE_DIR/tests.cmake lite_test_files $CMAKE_PREFIX $LITE_TEST_SOURCES
+set_cmake_value $CMAKE_DIR/tests.cmake lite_arena_test_files $CMAKE_PREFIX $LITE_ARENA_TEST_SOURCES
# Generate extract_includes.bat
echo "mkdir include" > $EXTRACT_INCLUDES_BAT