diff --git a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/ProtobufUtils.java b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/ProtobufUtils.java index ef5bc52aaee7a..8420512624368 100644 --- a/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/ProtobufUtils.java +++ b/metadata-integration/java/datahub-protobuf/src/main/java/datahub/protobuf/ProtobufUtils.java @@ -7,7 +7,6 @@ import com.linkedin.util.Pair; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; -import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.HashMap; import java.util.LinkedList; @@ -22,8 +21,7 @@ public class ProtobufUtils { private ProtobufUtils() {} public static String collapseLocationComments(DescriptorProtos.SourceCodeInfo.Location location) { - String orig = - Stream.concat( + return Stream.concat( location.getLeadingDetachedCommentsList().stream(), Stream.of(location.getLeadingComments(), location.getTrailingComments())) .filter(Objects::nonNull) @@ -31,14 +29,6 @@ public static String collapseLocationComments(DescriptorProtos.SourceCodeInfo.Lo .map(line -> line.replaceFirst("^[*/ ]+", "")) .collect(Collectors.joining("\n")) .trim(); - - /* - * Sometimes DataHub doesn't like these strings. Not sure if its DataHub - * or protobuf issue: https://github.com/protocolbuffers/protobuf/issues/4691 - * - * We essentially smash utf8 chars to ascii here - */ - return new String(orig.getBytes(StandardCharsets.ISO_8859_1)); } /* diff --git a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java index 9bf649041e035..78a90048bca59 100644 --- a/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java +++ b/metadata-integration/java/datahub-protobuf/src/test/java/datahub/protobuf/ProtobufUtilsTest.java @@ -8,6 +8,7 @@ import com.google.protobuf.ExtensionRegistry; import datahub.protobuf.model.ProtobufGraph; import java.io.IOException; +import java.util.Arrays; import org.testng.annotations.Test; public class ProtobufUtilsTest { @@ -44,4 +45,18 @@ public void registryTest() throws IOException, IllegalArgumentException { + "[meta.msg.repeat_enum]: EVENT\n", graph.root().messageProto().getOptions().toString()); } -} + + @Test + public void testCollapseLocationCommentsWithUTF8() { + DescriptorProtos.SourceCodeInfo.Location location = DescriptorProtos.SourceCodeInfo.Location.newBuilder() + .addAllLeadingDetachedComments(Arrays.asList("/* Emoji 😊 */", "/* Accented é */")) + .setLeadingComments("/* Chinese 你好 */\n// Russian Привет") + .setTrailingComments("// Korean 안녕") + .build(); + + String actual = ProtobufUtils.collapseLocationComments(location); + String expected = "Emoji 😊 */\nAccented é */\nChinese 你好 */\nRussian Привет\nKorean 안녕"; + + assertEquals(expected, actual); + } +} \ No newline at end of file