diff --git a/.github/workflows/cifuzz.yml b/.github/workflows/cifuzz.yml new file mode 100644 index 00000000..7aed5e1a --- /dev/null +++ b/.github/workflows/cifuzz.yml @@ -0,0 +1,26 @@ +name: CIFuzz +on: [pull_request] +jobs: + Fuzzing: + runs-on: ubuntu-latest + steps: + - name: Build Fuzzers + id: build + uses: google/oss-fuzz/infra/cifuzz/actions/build_fuzzers@master + with: + oss-fuzz-project-name: 'sentencepiece' + dry-run: false + language: c++ + - name: Run Fuzzers + uses: google/oss-fuzz/infra/cifuzz/actions/run_fuzzers@master + with: + oss-fuzz-project-name: 'sentencepiece' + fuzz-seconds: 300 + dry-run: false + language: c++ + - name: Upload Crash + uses: actions/upload-artifact@v3 + if: failure() && steps.build.outcome == 'success' + with: + name: artifacts + path: ./out/artifacts diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml new file mode 100644 index 00000000..eabb2925 --- /dev/null +++ b/.github/workflows/cmake.yml @@ -0,0 +1,77 @@ +name: CI for general build + +on: + push: + branches: [ master ] + tags: + - 'v*' + pull_request: + branches: [ master ] + +jobs: + build: + strategy: + matrix: + os: [ ubuntu-latest, ubuntu-20.04, windows-latest, macOS-11 ] + arch: [ x64 ] + include: + - os: windows-latest + arch: x86 + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.x' + architecture: ${{matrix.arch}} + + - name: Config for Windows + if: runner.os == 'Windows' + run: | + if ("${{matrix.arch}}" -eq "x64") { + $msbuildPlatform = "x64" + } else { + $msbuildPlatform = "Win32" + } + cmake -A $msbuildPlatform -B ${{github.workspace}}/build -DSPM_BUILD_TEST=ON -DSPM_ENABLE_SHARED=OFF -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/build/root + + - name: Config for Unix + if: runner.os != 'Windows' + run: cmake -B ${{github.workspace}}/build -DSPM_BUILD_TEST=ON -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/build/root + + - name: Build + run: cmake --build ${{github.workspace}}/build --config Release --target install --parallel 8 + + - name: Test + working-directory: ${{github.workspace}}/build + run: ctest -C Release --output-on-failure + + - name: Package + working-directory: ${{github.workspace}}/build + run: cpack + + - name: Build Python wrapper + working-directory: ${{github.workspace}}/python + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + python setup.py test + python setup.py bdist_wheel + + - name: Upload artifcacts + uses: actions/upload-artifact@v3 + with: + path: ./build/*.7z + + - name: Upload Release Assets + if: startsWith(github.ref, 'refs/tags/') + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: ./build/*.7z + tag: ${{ github.ref }} + overwrite: true + prerelease: true + file_glob: true + body: "This is my release text" diff --git a/.github/workflows/cross_build.yml b/.github/workflows/cross_build.yml new file mode 100644 index 00000000..5fc6d3a3 --- /dev/null +++ b/.github/workflows/cross_build.yml @@ -0,0 +1,41 @@ +name: CrossBuild + +on: + push: + branches: [ master ] + tags: + - 'v*' + pull_request: + branches: [ master ] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + arch: [ i686, arm, aarch64, riscv64, powerpc, powerpc64, powerpc64le, s390x, sparc64, m68k, sh4, alpha ] + + steps: + - uses: actions/checkout@v3 + + - name: Install cross tools + run: | + sudo apt-get install -y sudo qemu-user gdb zstd dwarfdump {gcc,g++}-10-{i686,aarch64,riscv64,powerpc,powerpc64,powerpc64le,s390x,sparc64,m68k,sh4,alpha}-linux-gnu {gcc,g++}-10-arm-linux-gnueabihf + sudo ln -sf /usr/bin/arm-linux-gnueabihf-gcc-10 /usr/bin/arm-linux-gnu-gcc-10 + sudo ln -sf /usr/bin/arm-linux-gnueabihf-g++-10 /usr/bin/arm-linux-gnu-g++-10 + sudo ln -sf /usr/arm-linux-gnueabihf /usr/arm-linux-gnu + + - name: Build + run: | + mkdir -p ${{github.workspace}}/build + cd ${{github.workspace}}/build + env CXX=/usr/bin/${{matrix.arch}}-linux-gnu-g++-10 CC=/usr/bin/${{matrix.arch}}-linux-gnu-gcc-10 cmake .. -DSPM_BUILD_TEST=ON -DSPM_ENABLE_SHARED=OFF -DCMAKE_FIND_ROOT_PATH=/usr/${{matrix.arch}}-linux-gnu -DSPM_CROSS_SYSTEM_PROCESSOR=${{matrix.arch}} + make -j$(nproc) + + - name: Test on QEMU + if: matrix.arch != 'sparc64' && matrix.arch != 'm68k' && matrix.arch != 'sh4' + run: | + cd ${{github.workspace}}/build + qemu_arch=`echo ${{matrix.arch}} | sed -e s/powerpc/ppc/ -e s/686/386/` + qemu-${qemu_arch} -L /usr/${{matrix.arch}}-linux-gnu src/spm_test diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml new file mode 100644 index 00000000..48d584d3 --- /dev/null +++ b/.github/workflows/wheel.yml @@ -0,0 +1,147 @@ +name: Build Wheels + +on: + push: + branches: [ master ] + tags: + - 'v*' + pull_request: + branches: [ master ] + +jobs: + build_wheels: + outputs: + digests-linux: ${{ steps.hash-linux.outputs.digests }} + digests-macos: ${{ steps.hash-macos.outputs.digests }} + digests-windows: ${{ steps.hash-windows.outputs.digests }} + strategy: + matrix: + os: [ubuntu-latest, windows-latest, macOS-11] + runs-on: ${{ matrix.os }} + name: Build wheels on ${{ matrix.os }} + + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.x" + + - name: Set up QEMU + if: runner.os == 'Linux' + uses: docker/setup-qemu-action@v2 + with: + platforms: arm64 + + - name: Build for Windows + if: runner.os == 'Windows' + run: | + cmake -A Win32 -B ${{github.workspace}}/build_win32 -DSPM_ENABLE_SHARED=OFF -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/build/root_win32 + cmake --build ${{github.workspace}}/build_win32 --config Release --target install --parallel 8 + cmake -A x64 -B ${{github.workspace}}/build_amd64 -DSPM_ENABLE_SHARED=OFF -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/build/root_amd64 + cmake --build ${{github.workspace}}/build_amd64 --config Release --target install --parallel 8 + + - name: Build for Mac + if: runner.os == 'macOS' + run: | + cmake -B ${{github.workspace}}/build -DSPM_ENABLE_SHARED=OFF -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/build/root + cmake --build ${{github.workspace}}/build --config Release --target install --parallel 8 + env: + CMAKE_OSX_ARCHITECTURES: arm64;x86_64 + + - name: Install cibuildwheel + working-directory: ${{github.workspace}}/python + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + python -m pip install cibuildwheel==2.12.0 + + - name: Build wheels + working-directory: ${{github.workspace}}/python + run: python -m cibuildwheel --output-dir wheelhouse + env: + CIBW_ARCHS_LINUX: auto aarch64 + CIBW_ARCHS_MACOS: x86_64 universal2 arm64 + CIBW_SKIP: "pp* *-musllinux_*" + CIBW_BUILD_VERBOSITY: 1 + + - name: Build sdist archive + working-directory: ${{github.workspace}}/python + run: sh build_sdist.sh + + - name: Fetch sdist archive + uses: tj-actions/glob@v17 + id: sdist + with: + files: ./python/dist/*.tar.gz + + - name: Build wheel from sdist + run: python -m pip wheel "${{ steps.sdist.outputs.paths }}" --verbose + + - name: Copy sdist + working-directory: ${{github.workspace}}/python + if: runner.os == 'macOS' + run: cp -f dist/*.tar.gz wheelhouse/ + + - name: Upload artifact + uses: actions/upload-artifact@v3 + with: + path: | + ./python/wheelhouse/*.whl + ./python/wheelhouse/*.tar.gz + + - name: Upload wheel release + if: startsWith(github.ref, 'refs/tags/') + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: ./python/wheelhouse/* + tag: ${{ github.ref }} + overwrite: true + prerelease: true + file_glob: true + + - name: Generate SLSA subjects - Macos + id: hash-macos + if: runner.os == 'macOS' + run: echo "digests=$(shasum -a 256 ./python/wheelhouse/* | base64)" >> $GITHUB_OUTPUT + + - name: Generate SLSA subjects - Linux + id: hash-linux + if: runner.os == 'Linux' + run: echo "digests=$(sha256sum ./python/wheelhouse/* | base64 -w0)" >> $GITHUB_OUTPUT + + - name: Generate SLSA subjects - Windows + id: hash-windows + if: runner.os == 'Windows' + run: echo "digests=$(sha256sum ./python/wheelhouse/* | base64 -w0)" >> $GITHUB_OUTPUT + + gather-disgests: + needs: [build_wheels] + outputs: + digests: ${{ steps.hash.outputs.digests }} + runs-on: ubuntu-latest + steps: + - name: Merge results + id: hash + env: + LINUX_DIGESTS: "${{ needs.build_wheels.outputs.digests-linux }}" + MACOS_DIGESTS: "${{ needs.build_wheels.outputs.digests-macos }}" + WINDOWS_DIGESTS: "${{ needs.build_wheels.outputs.digests-windows }}" + run: | + set -euo pipefail + echo "$LINUX_DIGESTS" | base64 -d > checksums.txt + echo "$MACOS_DIGESTS" | base64 -d >> checksums.txt + echo "$WINDOWS_DIGESTS" | base64 -d >> checksums.txt + echo "digests=$(cat checksums.txt | base64 -w0)" >> $GITHUB_OUTPUT + + provenance: + if: startsWith(github.ref, 'refs/tags/') + needs: [build_wheels, gather-disgests] + permissions: + actions: read # To read the workflow path. + id-token: write # To sign the provenance. + contents: write # To add assets to a release. + uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v1.2.0 + with: + base64-subjects: "${{ needs.gather-disgests.outputs.digests }}" + upload-assets: true # Optional: Upload to a new release diff --git a/.gitignore b/.gitignore index aac6692b..743769de 100644 --- a/.gitignore +++ b/.gitignore @@ -72,3 +72,6 @@ libsentencepiece.so* libsentencepiece_train.so* python/bundled _sentencepiece.*.so +third_party/abseil-cpp + +python/sentencepiece diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index b99be8f0..00000000 --- a/.travis.yml +++ /dev/null @@ -1,98 +0,0 @@ -language: cpp -matrix: - include: - - os: linux - env: IMAGE=ubuntu:rolling COMMAND=build_linux_gcc_coverall_ubuntu RELEASE_FILES="$TRAVIS_BUILD_DIR/build/*.xz" - services: docker - - os: linux - env: IMAGE=i386/ubuntu:rolling COMMAND=build_linux_gcc_ubuntu - services: docker - - os: linux - env: IMAGE=ubuntu:bionic COMMAND=build_linux_gcc_ubuntu - services: docker - - os: linux - env: IMAGE=ubuntu:xenial COMMAND=build_linux_gcc_ubuntu - services: docker - - os: linux - env: IMAGE=ubuntu:trusty COMMAND=build_linux_gcc_ubuntu - services: docker - - os: linux - env: IMAGE=debian:stable COMMAND=build_linux_gcc_debian - services: docker - - os: linux - env: IMAGE=fedora:latest COMMAND=build_linux_gcc_fedora - services: docker - - os: linux - env: IMAGE=ubuntu:rolling COMMAND=build_linux_clang_ubuntu - services: docker - - os: linux - arch: arm64 - env: IMAGE=arm64v8/ubuntu:rolling COMMAND=build_linux_gcc_ubuntu - services: docker - - os: linux - arch: ppc64le - env: IMAGE=ppc64le/ubuntu:rolling COMMAND=build_linux_gcc_ubuntu - services: docker - - os: linux - arch: s390x - env: IMAGE=s390x/ubuntu:rolling COMMAND=build_linux_gcc_ubuntu - services: docker - - os: linux - env: IMAGE=x86_64 COMMAND=make_py_wheel_py RELEASE_FILES="$TRAVIS_BUILD_DIR/python/dist/*manylinux*.whl" - script: - - $TRAVIS_BUILD_DIR/python/make_py_wheel.sh ${IMAGE} - - if [[ "$RELEASE_FILES" != "" ]]; then ls -l $RELEASE_FILES ; fi - services: docker - - os: linux - env: IMAGE=i686 COMMAND=make_py_wheel_py RELEASE_FILES="$TRAVIS_BUILD_DIR/python/dist/*manylinux*.whl" - script: - - $TRAVIS_BUILD_DIR/python/make_py_wheel.sh ${IMAGE} - - if [[ "$RELEASE_FILES" != "" ]]; then ls -l $RELEASE_FILES ; fi - services: docker - - os: linux - arch: arm64 - env: IMAGE=aarch64 COMMAND=make_py_wheel_py RELEASE_FILES="$TRAVIS_BUILD_DIR/python/dist/*manylinux*.whl" - script: - - $TRAVIS_BUILD_DIR/python/make_py_wheel.sh ${IMAGE} - - if [[ "$RELEASE_FILES" != "" ]]; then ls -l $RELEASE_FILES ; fi - services: docker - - os: linux - arch: ppc64le - env: IMAGE=ppc64le COMMAND=make_py_wheel_py RELEASE_FILES="$TRAVIS_BUILD_DIR/python/dist/*manylinux*.whl" - script: - - $TRAVIS_BUILD_DIR/python/make_py_wheel.sh ${IMAGE} - - if [[ "$RELEASE_FILES" != "" ]]; then ls -l $RELEASE_FILES ; fi - services: docker - - os: linux - arch: s390x - env: IMAGE=s390x COMMAND=make_py_wheel_py RELEASE_FILES="$TRAVIS_BUILD_DIR/python/dist/*manylinux*.whl" - script: - - $TRAVIS_BUILD_DIR/python/make_py_wheel.sh ${IMAGE} - - if [[ "$RELEASE_FILES" != "" ]]; then ls -l $RELEASE_FILES ; fi - services: docker - - os: osx - osx_image: xcode9.4 - env: IMAGE=native COMMAND=build_osx - - os: osx - osx_image: xcode9.4 - env: IMAGE=native COMMAND=make_py_wheel_mac_py RELEASE_FILES="$TRAVIS_BUILD_DIR/python/dist/delocated_wheel/*.whl" - script: - - $TRAVIS_BUILD_DIR/python/make_py_wheel_mac.sh - - if [[ "$RELEASE_FILES" != "" ]]; then ls -l $RELEASE_FILES ; fi -script: - - $TRAVIS_BUILD_DIR/test.sh ${IMAGE} ${COMMAND} - - if [[ "$RELEASE_FILES" != "" ]]; then ls -l $RELEASE_FILES ; fi -deploy: - provider: releases - skip_cleanup: true - api_key: - secure: WnrgfoRVSoi+E2YwFDgpQlxldfYQycN8DmMqbJab6uP0FWTmPptS9nmXWVGsXJS1u+sTsx/E+lM5xggl31u88hUJYsEUg+xPszSf+eiLfmdoEY+qYj2Vsuh7cT7P1tBScVMUiEQsoCcg9gZbHFHkSYJ74gyQxQhqJ52UmCJ1aNcp3nbtzgjBGvtsi2WBUdG1jSW0qwRj9gcq9eOWA4zkeHj9QKWhBtRD7fhpUiUDWVqaDSMu1E10QLNjkZ//qwbrWXb4MBzCa1ISla/ZoKv4TMQQrzYEwqxmbX2bxk1lMkJD3sKt3Wq/qNWDYaPKk9gz/cU9nAKwzSlJzus5c9pac6U/mh0IU8JhEGlkzFb1Ng3cHLdYT0hk0jAW15Ptcijqt+UGs0Arb1pdKvQV2e5bLEBrujCNGF8NFdsE23WDofEM/VKXuMNWW/j6b+VLESf05rz5p07IBMczLfW/Qs8mY5cqR9WaqPbYxMZlgwxtD+MiKERHlq1qVdK25M1UuB0wH/EbstVuEX2iNZRvffT9A+NglriLR74vNiCnfRlzGx4U4/Z79r2mwFrJTGupgq9N/jvKMs92qrT200VRtIto3JLEd3cnlM/9Gpv39SsYKA0seHKBpyFz/pGfXkOStv+14hzmEmXIFwG1QRTeFsZIUzmvvfMuhaG8Jjhdwpfvr68= - file_glob: true - file: "${RELEASE_FILES}" - on: - branch: master - tags: true - condition: $RELEASE_FILES != "" -env: - global: - secure: J52dK8uM1haWOP5Ktz01VETiYdpyOKtnGZXcZjxEXI7RV+44/MpkSSpKFrIex1jHDodn01Tv+/otmxotaz1HOPv4DgT2gg8FbHlpvnc6+B1/dEaeCDvnd33odmARoOszP0MNFTZdlvg6zGeJwPDYFfITn1jiFBtjazu19VIbQE4D1CSKkWsMXeyH1WjTb0LEtxhYwUcFgNqDb6trArx8xlvZNrh2/j5nPgAzvmuT0JuzwcRz9swwZftKcMjaK5JooSBTydtAzgVpVMZf1q+pF0nR9VlYIY34qQLsWirBjWHGRKdkgAEEN4vEMD1BKbhkIn7TjEpWLrH3BZuJY8uXAfnxvT8KXns2fhA1EDjlP/5n2y1jXAjqCZX8o1dC2fn6qxpL1Qg1WE0n9mhOZLMpbzCpJjBumjQPPUsviggRUs4awSYv3JrYuavvXQZ9rFM634O7CLIDVmbqssVyIYMhgIqLFAWgDxTyAxt+67vUy5ONsAenMOJ6bO36pYZHWH53isCRblUD5nq6Dj6WrW9P7lQhAdhvZ+Hyt+zyVCCblDY9lAv1KetU4i9sDSNYUkQtFTPVBw8LE4JmEctuM7iC6YqeneffPzzDLsGZ70m66VT1L4MYg5h2fGbtRuQ1nPz0+k2CNibN7NegaY35d7gUosnJJF04AeOUcea4+rgQkVM= diff --git a/CMakeLists.txt b/CMakeLists.txt index d214ee41..9a71b082 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,10 @@ message(STATUS "VERSION: ${SPM_VERSION}") SET(SPM_ARTIFACT_NAME "sentencepiece" CACHE STRING "Default name of the \ generated artifacts. Override to avoid name conflicts.") +if(POLICY CMP0091) + cmake_policy(SET CMP0091 NEW) +endif() + project(${SPM_ARTIFACT_NAME} VERSION ${SPM_VERSION} LANGUAGES C CXX) option(SPM_ENABLE_NFKC_COMPILE "Enables NFKC compile" OFF) @@ -29,10 +33,29 @@ option(SPM_ENABLE_TCMALLOC "Enable TCMalloc if available." ON) option(SPM_TCMALLOC_STATIC "Link static library of TCMALLOC." OFF) option(SPM_NO_THREADLOCAL "Disable thread_local operator" OFF) option(SPM_USE_BUILTIN_PROTOBUF "Use built-in protobuf" ON) +option(SPM_USE_EXTERNAL_ABSL "Use external abseil" OFF) +option(SPM_ENABLE_MSVC_MT_BUILD, "Use /MT flag in MSVC build" OFF) +option(SPM_CROSS_SYSTEM_PROCESSOR, "Override system processor" "") + +if (SPM_CROSS_SYSTEM_PROCESSOR) + set(CMAKE_SYSTEM_PROCESSOR ${SPM_CROSS_SYSTEM_PROCESSOR}) +endif() -set(CMAKE_CXX_STANDARD 11) +# Disable shared build on windows +if(WIN32) + set(SPM_ENABLE_SHARED OFF) +endif() + +set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) +if((CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND + CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 10.0) OR + (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND + CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0)) + string(APPEND CMAKE_CXX_FLAGS " -fmacro-prefix-map=${CMAKE_SOURCE_DIR}/=''") +endif() + if (UNIX) include(GNUInstallDirs) set(prefix ${CMAKE_INSTALL_PREFIX}) @@ -47,18 +70,24 @@ else() endif() set(GNUCXX_STD_SUPPORT_VERSION "4.3") +if(${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD") +add_definitions(-D_FREEBSD) +endif() + if (SPM_USE_BUILTIN_PROTOBUF) set(libprotobuf_lite "") else() - set(libprotobuf_lite "-lprotobuf-lite") + set(libprotobuf_lite "protobuf-lite") endif() if (MSVC) - string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) - string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_MINSIZEREL ${CMAKE_CXX_FLAGS_MINSIZEREL}) - string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) - string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELWITHDEBINFO ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}) add_definitions("/wd4267 /wd4244 /wd4305 /Zc:strictStrings /utf-8") + if (SPM_ENABLE_MSVC_MT_BUILD) + string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_DEBUG ${CMAKE_CXX_FLAGS_DEBUG}) + string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_MINSIZEREL ${CMAKE_CXX_FLAGS_MINSIZEREL}) + string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE}) + string(REPLACE "/MD" "/MT" CMAKE_CXX_FLAGS_RELWITHDEBINFO ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}) + endif() endif() if (APPLE) @@ -85,10 +114,36 @@ if (NOT DEFINED CMAKE_INSTALL_INCDIR) set(CMAKE_INSTALL_INCDIR include) endif() +# SPDX-License-Identifier: (MIT OR CC0-1.0) +# Copyright 2020 Jan Tojnar +# https://github.com/jtojnar/cmake-snips +# +# Modelled after Python’s os.path.join +# https://docs.python.org/3.7/library/os.path.html#os.path.join +# Windows not supported +function(join_paths joined_path first_path_segment) + set(temp_path "${first_path_segment}") + foreach(current_segment IN LISTS ARGN) + if(NOT ("${current_segment}" STREQUAL "")) + if(IS_ABSOLUTE "${current_segment}") + set(temp_path "${current_segment}") + else() + set(temp_path "${temp_path}/${current_segment}") + endif() + endif() + endforeach() + set(${joined_path} "${temp_path}" PARENT_SCOPE) +endfunction() + +join_paths(libdir_for_pc_file "\${exec_prefix}" "${CMAKE_INSTALL_LIBDIR}") +join_paths(includedir_for_pc_file "\${prefix}" "${CMAKE_INSTALL_INCLUDEDIR}") + configure_file("${PROJECT_SOURCE_DIR}/config.h.in" "config.h") configure_file("${PROJECT_SOURCE_DIR}/sentencepiece.pc.in" "sentencepiece.pc" @ONLY) if (NOT MSVC) + # suppress warning for C++11 features. +# add_definitions("-Wno-deprecated-declarations -Wno-deprecated-enum-enum-conversion") install(FILES "${CMAKE_CURRENT_BINARY_DIR}/sentencepiece.pc" DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) endif() @@ -98,6 +153,10 @@ if (SPM_BUILD_TEST) enable_testing() endif() +if (SPM_USE_EXTERNAL_ABSL) + add_subdirectory(third_party/abseil-cpp) +endif() + add_subdirectory(src) add_subdirectory(third_party) diff --git a/README.md b/README.md index 1adddfa9..76acb3e6 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,18 @@ # SentencePiece -[![Build Status](https://travis-ci.org/google/sentencepiece.svg?branch=master)](https://travis-ci.org/google/sentencepiece) -[![Build status](https://ci.appveyor.com/api/projects/status/vxoub3qx4fwpysyq?svg=true)](https://ci.appveyor.com/project/taku910/sentencepiece) -[![Coverage Status](https://coveralls.io/repos/github/google/sentencepiece/badge.svg?branch=master)](https://coveralls.io/github/google/sentencepiece?branch=master) +[![Build C++](https://github.com/google/sentencepiece/actions/workflows/cmake.yml/badge.svg)](https://github.com/google/sentencepiece/actions/workflows/cmake.yml) +[![Build Wheels](https://github.com/google/sentencepiece/actions/workflows/wheel.yml/badge.svg)](https://github.com/google/sentencepiece/actions/workflows/wheel.yml) [![GitHub Issues](https://img.shields.io/github/issues/google/sentencepiece.svg)](https://github.com/google/sentencepiece/issues) -[![Codacy Badge](https://api.codacy.com/project/badge/Grade/5851945fc54947fc9e964f78c3b6bdfa)](https://app.codacy.com/app/taku910/sentencepiece?utm_source=github.com&utm_medium=referral&utm_content=google/sentencepiece&utm_campaign=Badge_Grade_Dashboard) [![PyPI version](https://badge.fury.io/py/sentencepiece.svg)](https://badge.fury.io/py/sentencepiece) [![PyPi downloads](https://img.shields.io/pypi/dm/sentencepiece?style=flat-square&logo=pypi&logoColor=white)](https://pypi.org/project/sentencepiece/) [![Contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CONTRIBUTING.md) [![License](https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg)](https://opensource.org/licenses/Apache-2.0) +[![SLSA 3](https://slsa.dev/images/gh-badge-level3.svg)](https://slsa.dev) SentencePiece is an unsupervised text tokenizer and detokenizer mainly for Neural Network-based text generation systems where the vocabulary size is predetermined prior to the neural model training. SentencePiece implements -**subword units** (e.g., **byte-pair-encoding (BPE)** [[Sennrich et al.](http://www.aclweb.org/anthology/P16-1162)]) and +**subword units** (e.g., **byte-pair-encoding (BPE)** [[Sennrich et al.](https://www.aclweb.org/anthology/P16-1162)]) and **unigram language model** [[Kudo.](https://arxiv.org/abs/1804.10959)]) with the extension of direct training from raw sentences. SentencePiece allows us to make a purely end-to-end system that does not depend on language-specific pre/postprocessing. @@ -23,19 +22,22 @@ with the extension of direct training from raw sentences. SentencePiece allows u - **Purely data driven**: SentencePiece trains tokenization and detokenization models from sentences. Pre-tokenization ([Moses tokenizer](https://github.com/moses-smt/mosesdecoder/blob/master/scripts/tokenizer/tokenizer.perl)/[MeCab](http://taku910.github.io/mecab/)/[KyTea](http://www.phontron.com/kytea/)) is not always required. - **Language independent**: SentencePiece treats the sentences just as sequences of Unicode characters. There is no language-dependent logic. -- **Multiple subword algorithms**: **BPE** [[Sennrich et al.](http://www.aclweb.org/anthology/P16-1162)] and **unigram language model** [[Kudo.](https://arxiv.org/abs/1804.10959)] are supported. +- **Multiple subword algorithms**: **BPE** [[Sennrich et al.](https://www.aclweb.org/anthology/P16-1162)] and **unigram language model** [[Kudo.](https://arxiv.org/abs/1804.10959)] are supported. - **Subword regularization**: SentencePiece implements subword sampling for [subword regularization](https://arxiv.org/abs/1804.10959) and [BPE-dropout](https://arxiv.org/abs/1910.13267) which help to improve the robustness and accuracy of NMT models. - **Fast and lightweight**: Segmentation speed is around 50k sentences/sec, and memory footprint is around 6MB. - **Self-contained**: The same tokenization/detokenization is obtained as long as the same model file is used. - **Direct vocabulary id generation**: SentencePiece manages vocabulary to id mapping and can directly generate vocabulary id sequences from raw sentences. - **NFKC-based normalization**: SentencePiece performs NFKC-based text normalization. +For those unfamiliar with SentencePiece as a software/algorithm, one can read [a gentle introduction here](https://medium.com/@jacky2wong/understanding-sentencepiece-under-standing-sentence-piece-ac8da59f6b08). + + ## Comparisons with other implementations |Feature|SentencePiece|[subword-nmt](https://github.com/rsennrich/subword-nmt)|[WordPiece](https://arxiv.org/pdf/1609.08144.pdf)| |:---|:---:|:---:|:---:| |Supported algorithm|BPE, unigram, char, word|BPE|BPE*| |OSS?|Yes|Yes|Google internal| -|Subword regularization|[Yes](#subword-regularization)|No|No| +|Subword regularization|[Yes](#subword-regularization-and-bpe-dropout)|No|No| |Python Library (pip)|[Yes](python/README.md)|No|N/A| |C++ Library|[Yes](doc/api.md)|No|N/A| |Pre-segmentation required?|[No](#whitespace-is-treated-as-a-basic-symbol)|Yes|Yes| @@ -98,7 +100,7 @@ special symbol. Tokenized sequences do not preserve the necessary information to * (ja) こんにちは世界。 → [こんにちは] [世界] [。] \(No space between こんにちは and 世界\) #### Subword regularization and BPE-dropout -Subword regularization [[Kudo.](https://arxiv.org/abs/1804.10959)] and BPE-droptout [Provilkov et al](https://arxiv.org/abs/1910.13267) are simple regularization methods +Subword regularization [[Kudo.](https://arxiv.org/abs/1804.10959)] and BPE-dropout [Provilkov et al](https://arxiv.org/abs/1910.13267) are simple regularization methods that virtually augment training data with on-the-fly subword sampling, which helps to improve the accuracy as well as robustness of NMT models. To enable subword regularization, you would like to integrate SentencePiece library @@ -108,7 +110,7 @@ To enable subword regularization, you would like to integrate SentencePiece libr >>> import sentencepiece as spm >>> s = spm.SentencePieceProcessor(model_file='spm.model') >>> for n in range(5): -... s.encode('New York', out_type=str, enable_sampling=True, alpha=0.1, nbest=-1) +... s.encode('New York', out_type=str, enable_sampling=True, alpha=0.1, nbest_size=-1) ... ['▁', 'N', 'e', 'w', '▁York'] ['▁', 'New', '▁York'] @@ -119,29 +121,17 @@ To enable subword regularization, you would like to integrate SentencePiece libr ## Installation -### Building sentencepiece - Using vcpkg - -You can download and install sentencepiece using the [vcpkg](https://github.com/Microsoft/vcpkg) dependency manager: - - git clone https://github.com/Microsoft/vcpkg.git - cd vcpkg - ./bootstrap-vcpkg.sh - ./vcpkg integrate install - ./vcpkg install sentencepiece - -The sentencepiece port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository. - ### Python module SentencePiece provides Python wrapper that supports both SentencePiece training and segmentation. You can install Python binary package of SentencePiece with. ``` -% pip install sentencepiece +pip install sentencepiece ``` For more detail, see [Python module](python/README.md) -### C++ (from source) +### Build and install SentencePiece command line tools from C++ source The following tools and libraries are required to build SentencePiece: * [cmake](https://cmake.org/) @@ -152,9 +142,11 @@ On Ubuntu, the build tools can be installed with apt-get: ``` % sudo apt-get install cmake build-essential pkg-config libgoogle-perftools-dev ``` -#### Build and Install SentencePiece + +Then, you can build and install command line tools as follows. ``` -% cd /path/to/sentencepiece +% git clone https://github.com/google/sentencepiece.git +% cd sentencepiece % mkdir build % cd build % cmake .. @@ -164,8 +156,30 @@ On Ubuntu, the build tools can be installed with apt-get: ``` On OSX/macOS, replace the last command with `sudo update_dyld_shared_cache` -### TensorFlow module -See [tensorflow/README.md](tensorflow/README.md) +### Build and install using vcpkg + +You can download and install sentencepiece using the [vcpkg](https://github.com/Microsoft/vcpkg) dependency manager: + + git clone https://github.com/Microsoft/vcpkg.git + cd vcpkg + ./bootstrap-vcpkg.sh + ./vcpkg integrate install + ./vcpkg install sentencepiece + +The sentencepiece port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository. + +### Download and install SentencePiece from signed released wheels + +You can download the wheel from the [GitHub releases page](https://github.com/google/sentencepiece/releases/latest). +We generate [SLSA3 signatures](slsa.dev) using the OpenSSF's [slsa-framework/slsa-github-generator](https://github.com/slsa-framework/slsa-github-generator) during the release process. To verify a release binary: +1. Install the verification tool from [slsa-framework/slsa-verifier#installation](https://github.com/slsa-framework/slsa-verifier#installation). +2. Download the provenance file `attestation.intoto.jsonl` from the [GitHub releases page](https://github.com/google/sentencepiece/releases/latest). +3. Run the verifier: +```shell +slsa-verifier -artifact-path -provenance attestation.intoto.jsonl -source github.com/google/sentencepiece -tag +``` + +pip install wheel_file.whl ## Usage instructions ### Train SentencePiece Model @@ -177,7 +191,7 @@ See [tensorflow/README.md](tensorflow/README.md) the input with Unicode NFKC. You can pass a comma-separated list of files. * `--model_prefix`: output model name prefix. `.model` and `.vocab` are generated. * `--vocab_size`: vocabulary size, e.g., 8000, 16000, or 32000 -* `--character_coverage`: amount of characters covered by the model, good defaults are: `0.9995` for languages with rich character set like Japanse or Chinese and `1.0` for other languages with small character set. +* `--character_coverage`: amount of characters covered by the model, good defaults are: `0.9995` for languages with rich character set like Japanese or Chinese and `1.0` for other languages with small character set. * `--model_type`: model type. Choose from `unigram` (default), `bpe`, `char`, or `word`. The input sentence must be pretokenized when using `word` type. Use `--help` flag to display all parameters for training, or see [here](doc/options.md) for an overview. @@ -244,7 +258,7 @@ You can find that the original input sentence is restored from the vocabulary id ``` % spm_train --bos_id=0 --eos_id=1 --unk_id=5 --input=... --model_prefix=... --character_coverage=... ``` -When setting -1 id e.g., ```bos_id=-1```, this special token is disabled. Note that the unknow id cannot be disabled. We can define an id for padding (<pad>) as ```--pad_id=3```.   +When setting -1 id e.g., ```bos_id=-1```, this special token is disabled. Note that the unknown id cannot be disabled. We can define an id for padding (<pad>) as ```--pad_id=3```.   If you want to assign another special tokens, please see [Use custom symbols](doc/special_symbols.md). @@ -276,5 +290,5 @@ Then segment train/test corpus with ```--vocabulary``` option * [Use custom text normalization rules](doc/normalization.md) * [Use custom symbols](doc/special_symbols.md) * [Python Module](python/README.md) -* [TensorFlow Module](tensorflow/README.md) * [Segmentation and training algorithms in detail] + diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index df1faa4a..00000000 --- a/appveyor.yml +++ /dev/null @@ -1,27 +0,0 @@ -version: '{branch} build {build}' -image: Visual Studio 2015 -platform: - - x64 - - Win32 -configuration: Release -clone_depth: 50 -clone_folder: c:\projects\sentencepiece -#init: -# - ps: iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1')) -#on_finish: -# - ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1')) -build_script: -- cmd: call test.bat %platform% -artifacts: - - path: build\sentencepiece*.7z - - path: python\dist\*.whl -deploy: - description: 'SentencePiece Windows release' - provider: GitHub - auth_token: - secure: Aq4jHo/HY6WFFKs1h9cCWfi3U4ZsVTooUEhtgBfcJM6SUhnZdPVazIcKCtiR32kc - draft: false - prerelease: false - on: - branch: master - appveyor_repo_tag: true diff --git a/cmake/ios.toolchain.cmake b/cmake/ios.toolchain.cmake new file mode 100644 index 00000000..04ae05d0 --- /dev/null +++ b/cmake/ios.toolchain.cmake @@ -0,0 +1,1020 @@ +# This file is part of the ios-cmake project. It was retrieved from +# https://github.com/leetal/ios-cmake.git, which is a fork of +# https://github.com/gerstrong/ios-cmake.git, which is a fork of +# https://github.com/cristeab/ios-cmake.git, which is a fork of +# https://code.google.com/p/ios-cmake/. Which in turn is based off of +# the Platform/Darwin.cmake and Platform/UnixPaths.cmake files which +# are included with CMake 2.8.4 +# +# The ios-cmake project is licensed under the new BSD license. +# +# Copyright (c) 2014, Bogdan Cristea and LTE Engineering Software, +# Kitware, Inc., Insight Software Consortium. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# This file is based off of the Platform/Darwin.cmake and +# Platform/UnixPaths.cmake files which are included with CMake 2.8.4 +# It has been altered for iOS development. +# +# Updated by Alex Stewart (alexs.mac@gmail.com) +# +# ***************************************************************************** +# Now maintained by Alexander Widerberg (widerbergaren [at] gmail.com) +# under the BSD-3-Clause license +# https://github.com/leetal/ios-cmake +# ***************************************************************************** +# +# INFORMATION / HELP +# +############################################################################### +# OPTIONS # +############################################################################### +# +# PLATFORM: (default "OS64") +# OS = Build for iPhoneOS. +# OS64 = Build for arm64 iphoneOS. +# OS64COMBINED = Build for arm64 x86_64 iphoneOS + iphoneOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step) +# SIMULATOR = Build for x86 i386 iphoneOS Simulator. +# SIMULATOR64 = Build for x86_64 iphoneOS Simulator. +# SIMULATORARM64 = Build for arm64 iphoneOS Simulator. +# TVOS = Build for arm64 tvOS. +# TVOSCOMBINED = Build for arm64 x86_64 tvOS + tvOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step) +# SIMULATOR_TVOS = Build for x86_64 tvOS Simulator. +# WATCHOS = Build for armv7k arm64_32 for watchOS. +# WATCHOSCOMBINED = Build for armv7k arm64_32 x86_64 watchOS + watchOS Simulator. Combined into FAT STATIC lib (only supported on 3.14+ of CMake with "-G Xcode" argument in combination with the "cmake --install" CMake build step) +# SIMULATOR_WATCHOS = Build for x86_64 for watchOS Simulator. +# MAC = Build for x86_64 macOS. +# MAC_ARM64 = Build for Apple Silicon macOS. +# MAC_CATALYST = Build for x86_64 macOS with Catalyst support (iOS toolchain on macOS). +# Note: The build argument "MACOSX_DEPLOYMENT_TARGET" can be used to control min-version of macOS +# MAC_CATALYST_ARM64 = Build for Apple Silicon macOS with Catalyst support (iOS toolchain on macOS). +# Note: The build argument "MACOSX_DEPLOYMENT_TARGET" can be used to control min-version of macOS +# +# CMAKE_OSX_SYSROOT: Path to the SDK to use. By default this is +# automatically determined from PLATFORM and xcodebuild, but +# can also be manually specified (although this should not be required). +# +# CMAKE_DEVELOPER_ROOT: Path to the Developer directory for the platform +# being compiled for. By default this is automatically determined from +# CMAKE_OSX_SYSROOT, but can also be manually specified (although this should +# not be required). +# +# DEPLOYMENT_TARGET: Minimum SDK version to target. Default 2.0 on watchOS and 9.0 on tvOS+iOS +# +# NAMED_LANGUAGE_SUPPORT: +# ON (default) = Will require "enable_language(OBJC) and/or enable_language(OBJCXX)" for full OBJC|OBJCXX support +# OFF = Will embed the OBJC and OBJCXX flags into the CMAKE_C_FLAGS and CMAKE_CXX_FLAGS (legacy behaviour, CMake version < 3.16) +# +# ENABLE_BITCODE: (ON|OFF) Enables or disables bitcode support. Default ON +# +# ENABLE_ARC: (ON|OFF) Enables or disables ARC support. Default ON (ARC enabled by default) +# +# ENABLE_VISIBILITY: (ON|OFF) Enables or disables symbol visibility support. Default OFF (visibility hidden by default) +# +# ENABLE_STRICT_TRY_COMPILE: (ON|OFF) Enables or disables strict try_compile() on all Check* directives (will run linker +# to actually check if linking is possible). Default OFF (will set CMAKE_TRY_COMPILE_TARGET_TYPE to STATIC_LIBRARY) +# +# ARCHS: (armv7 armv7s armv7k arm64 arm64_32 i386 x86_64) If specified, will override the default architectures for the given PLATFORM +# OS = armv7 armv7s arm64 (if applicable) +# OS64 = arm64 (if applicable) +# SIMULATOR = i386 +# SIMULATOR64 = x86_64 +# SIMULATORARM64 = arm64 +# TVOS = arm64 +# SIMULATOR_TVOS = x86_64 (i386 has since long been deprecated) +# WATCHOS = armv7k arm64_32 (if applicable) +# SIMULATOR_WATCHOS = x86_64 (i386 has since long been deprecated) +# MAC = x86_64 +# MAC_ARM64 = arm64 +# MAC_CATALYST = x86_64 +# MAC_CATALYST_ARM64 = arm64 +# +# NOTE: When manually specifying ARCHS, put a semi-colon between the entries. E.g., -DARCHS="armv7;arm64" +# +############################################################################### +# END OPTIONS # +############################################################################### +# +# This toolchain defines the following properties (available via get_property()) for use externally: +# +# PLATFORM: The currently targeted platform. +# XCODE_VERSION: Version number (not including Build version) of Xcode detected. +# SDK_VERSION: Version of SDK being used. +# OSX_ARCHITECTURES: Architectures being compiled for (generated from PLATFORM). +# APPLE_TARGET_TRIPLE: Used by autoconf build systems. NOTE: If "ARCHS" are overridden, this will *NOT* be set! +# +# This toolchain defines the following macros for use externally: +# +# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE XCODE_VARIANT) +# A convenience macro for setting xcode specific properties on targets. +# Available variants are: All, Release, RelWithDebInfo, Debug, MinSizeRel +# example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1" "all"). +# +# find_host_package (PROGRAM ARGS) +# A macro used to find executable programs on the host system, not within the +# environment. Thanks to the android-cmake project for providing the +# command. +# + +cmake_minimum_required(VERSION 3.8.0) + +# CMake invokes the toolchain file twice during the first build, but only once during subsequent rebuilds. +if(DEFINED ENV{_IOS_TOOLCHAIN_HAS_RUN}) + return() +endif() +set(ENV{_IOS_TOOLCHAIN_HAS_RUN} true) + +# List of supported platform values +list(APPEND _supported_platforms + "OS" "OS64" "OS64COMBINED" "SIMULATOR" "SIMULATOR64" "SIMULATORARM64" + "TVOS" "TVOSCOMBINED" "SIMULATOR_TVOS" + "WATCHOS" "WATCHOSCOMBINED" "SIMULATOR_WATCHOS" + "MAC" "MAC_ARM64" + "MAC_CATALYST" "MAC_CATALYST_ARM64") + +# Cache what generator is used +set(USED_CMAKE_GENERATOR "${CMAKE_GENERATOR}") + +# Check if using a CMake version capable of building combined FAT builds (simulator and target slices combined in one static lib) +if(${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.14") + set(MODERN_CMAKE YES) +endif() + +# Get the Xcode version being used. +# Problem: CMake runs toolchain files multiple times, but can't read cache variables on some runs. +# Workaround: On first run (in which cache variables are always accessible), set an intermediary environment variable. +# +# NOTE: This pattern is used i many places in this toolchain to speed up checks of all sorts +if(DEFINED XCODE_VERSION_INT) + # Environment variables are always preserved. + set(ENV{_XCODE_VERSION_INT} "${XCODE_VERSION_INT}") +elseif(DEFINED ENV{_XCODE_VERSION_INT}) + set(XCODE_VERSION_INT "$ENV{_XCODE_VERSION_INT}") +elseif(NOT DEFINED XCODE_VERSION_INT) + find_program(XCODEBUILD_EXECUTABLE xcodebuild) + if(NOT XCODEBUILD_EXECUTABLE) + message(FATAL_ERROR "xcodebuild not found. Please install either the standalone commandline tools or Xcode.") + endif() + execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -version + OUTPUT_VARIABLE XCODE_VERSION_INT + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + string(REGEX MATCH "Xcode [0-9\\.]+" XCODE_VERSION_INT "${XCODE_VERSION_INT}") + string(REGEX REPLACE "Xcode ([0-9\\.]+)" "\\1" XCODE_VERSION_INT "${XCODE_VERSION_INT}") + set(XCODE_VERSION_INT "${XCODE_VERSION_INT}" CACHE INTERNAL "") +endif() + +# Assuming that xcode 12.0 is installed you most probably have ios sdk 14.0 or later installed (tested on Big Sur) +# if you don't set a deployment target it will be set the way you only get 64-bit builds +if(NOT DEFINED DEPLOYMENT_TARGET AND XCODE_VERSION_INT VERSION_GREATER 12.0) + # Temporarily fix the arm64 issues in CMake install-combined by excluding arm64 for simulator builds (needed for Apple Silicon...) + set(CMAKE_XCODE_ATTRIBUTE_EXCLUDED_ARCHS[sdk=iphonesimulator*] "arm64") +endif() + +# Check if the platform variable is set +if(DEFINED PLATFORM) + # Environment variables are always preserved. + set(ENV{_PLATFORM} "${PLATFORM}") +elseif(DEFINED ENV{_PLATFORM}) + set(PLATFORM "$ENV{_PLATFORM}") +elseif(NOT DEFINED PLATFORM) + message(FATAL_ERROR "PLATFORM argument not set. Bailing configure since I don't know what target you want to build for!") +endif () + +if(PLATFORM MATCHES ".*COMBINED" AND NOT CMAKE_GENERATOR MATCHES "Xcode") + message(FATAL_ERROR "The combined builds support requires Xcode to be used as generator via '-G Xcode' command-line argument in CMake") +endif() + +# Safeguard that the platform value is set and is one of the supported values +list(FIND _supported_platforms ${PLATFORM} contains_PLATFORM) +if("${contains_PLATFORM}" EQUAL "-1") + string(REPLACE ";" "\n * " _supported_platforms_formatted "${_supported_platforms}") + message(FATAL_ERROR " Invalid PLATFORM specified! Current value: ${PLATFORM}.\n" + " Supported PLATFORM values: \n * ${_supported_platforms_formatted}") +endif() + +# Check if Apple Silicon is supported +if(PLATFORM MATCHES "^(MAC_ARM64)$|^(MAC_CATALYST_ARM64)$" AND ${CMAKE_VERSION} VERSION_LESS "3.19.5") + message(FATAL_ERROR "Apple Silicon builds requires a minimum of CMake 3.19.5") +endif() + +# Touch toolchain variable to suppress "unused variable" warning. +# This happens if CMake is invoked with the same command line the second time. +if(CMAKE_TOOLCHAIN_FILE) +endif() + +# Fix for PThread library not in path +set(CMAKE_THREAD_LIBS_INIT "-lpthread") +set(CMAKE_HAVE_THREADS_LIBRARY 1) +set(CMAKE_USE_WIN32_THREADS_INIT 0) +set(CMAKE_USE_PTHREADS_INIT 1) + +# Specify named language support defaults. +if(NOT DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.16") + set(NAMED_LANGUAGE_SUPPORT ON) + message(STATUS "[DEFAULTS] Using explicit named language support! E.g., enable_language(CXX) is needed in the project files.") +elseif(NOT DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_LESS "3.16") + set(NAMED_LANGUAGE_SUPPORT OFF) + message(STATUS "[DEFAULTS] Disabling explicit named language support. Falling back to legacy behaviour.") +elseif(DEFINED NAMED_LANGUAGE_SUPPORT AND ${CMAKE_VERSION} VERSION_LESS "3.16") + message(FATAL_ERROR "CMake named language support for OBJC and OBJCXX was added in CMake 3.16.") +endif() +set(NAMED_LANGUAGE_SUPPORT_INT ${NAMED_LANGUAGE_SUPPORT} CACHE BOOL + "Whether or not to enable explicit named language support" FORCE) + +# Specify minimum version of deployment target. +if(NOT DEFINED DEPLOYMENT_TARGET) + if (PLATFORM MATCHES "WATCHOS") + # Unless specified, SDK version 4.0 is used by default as minimum target version (watchOS). + set(DEPLOYMENT_TARGET "4.0") + elseif(PLATFORM STREQUAL "MAC") + # Unless specified, SDK version 10.13 (High sierra) is used by default as minimum target version (macos). + set(DEPLOYMENT_TARGET "10.13") + elseif(PLATFORM STREQUAL "MAC_ARM64") + # Unless specified, SDK version 11.0 (Big Sur) is used by default as minimum target version (macos on arm). + set(DEPLOYMENT_TARGET "11.0") + elseif(PLATFORM STREQUAL "MAC_CATALYST" OR PLATFORM STREQUAL "MAC_CATALYST_ARM64") + # Unless specified, SDK version 13.0 is used by default as minimum target version (mac catalyst minimum requirement). + set(DEPLOYMENT_TARGET "13.1") + else() + # Unless specified, SDK version 11.0 is used by default as minimum target version (iOS, tvOS). + set(DEPLOYMENT_TARGET "11.0") + endif() + message(STATUS "[DEFAULTS] Using the default min-version since DEPLOYMENT_TARGET not provided!") +elseif(DEFINED DEPLOYMENT_TARGET AND PLATFORM MATCHES "^MAC_CATALYST" AND ${DEPLOYMENT_TARGET} VERSION_LESS "13.1") + message(FATAL_ERROR "Mac Catalyst builds requires a minimum deployment target of 13.1!") +endif() + +# Store the DEPLOYMENT_TARGET in the cache +set(DEPLOYMENT_TARGET "${DEPLOYMENT_TARGET}" CACHE INTERNAL "") + +# Handle the case where we are targeting iOS and a version above 10.3.4 (32-bit support dropped officially) +if(PLATFORM STREQUAL "OS" AND DEPLOYMENT_TARGET VERSION_GREATER_EQUAL 10.3.4) + set(PLATFORM "OS64") + message(STATUS "Targeting minimum SDK version ${DEPLOYMENT_TARGET}. Dropping 32-bit support.") +elseif(PLATFORM STREQUAL "SIMULATOR" AND DEPLOYMENT_TARGET VERSION_GREATER_EQUAL 10.3.4) + set(PLATFORM "SIMULATOR64") + message(STATUS "Targeting minimum SDK version ${DEPLOYMENT_TARGET}. Dropping 32-bit support.") +endif() + +set(PLATFORM_INT "${PLATFORM}") + +if(DEFINED ARCHS) + string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}") +endif() + +# Determine the platform name and architectures for use in xcodebuild commands +# from the specified PLATFORM_INT name. +if(PLATFORM_INT STREQUAL "OS") + set(SDK_NAME iphoneos) + if(NOT ARCHS) + set(ARCHS armv7 armv7s arm64) + set(APPLE_TARGET_TRIPLE_INT arm-apple-ios${DEPLOYMENT_TARGET}) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}) + endif() +elseif(PLATFORM_INT STREQUAL "OS64") + set(SDK_NAME iphoneos) + if(NOT ARCHS) + if (XCODE_VERSION_INT VERSION_GREATER 10.0) + set(ARCHS arm64) # FIXME: Add arm64e when Apple have fixed the integration issues with it, libarclite_iphoneos.a is currently missung bitcode markers for example + else() + set(ARCHS arm64) + endif() + set(APPLE_TARGET_TRIPLE_INT aarch64-apple-ios${DEPLOYMENT_TARGET}) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}) + endif() +elseif(PLATFORM_INT STREQUAL "OS64COMBINED") + set(SDK_NAME iphoneos) + if(MODERN_CMAKE) + if(NOT ARCHS) + if (XCODE_VERSION_INT VERSION_GREATER 10.0) + set(ARCHS arm64 x86_64) # FIXME: Add arm64e when Apple have fixed the integration issues with it, libarclite_iphoneos.a is currently missung bitcode markers for example + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "arm64") + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "arm64") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64") + else() + set(ARCHS arm64 x86_64) + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphoneos*] "arm64") + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=iphonesimulator*] "x86_64") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphoneos*] "arm64") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=iphonesimulator*] "x86_64") + endif() + set(APPLE_TARGET_TRIPLE_INT aarch64-x86_64-apple-ios${DEPLOYMENT_TARGET}) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}) + endif() + else() + message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the OS64COMBINED setting work") + endif() +elseif(PLATFORM_INT STREQUAL "SIMULATOR") + set(SDK_NAME iphonesimulator) + if(NOT ARCHS) + set(ARCHS i386) + set(APPLE_TARGET_TRIPLE_INT i386-apple-ios${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator) + endif() + message(DEPRECATION "SIMULATOR IS DEPRECATED. Consider using SIMULATOR64 instead.") +elseif(PLATFORM_INT STREQUAL "SIMULATOR64") + set(SDK_NAME iphonesimulator) + if(NOT ARCHS) + set(ARCHS x86_64) + set(APPLE_TARGET_TRIPLE_INT x86_64-apple-ios${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator) + endif() +elseif(PLATFORM_INT STREQUAL "SIMULATORARM64") + set(SDK_NAME iphonesimulator) + if(NOT ARCHS) + set(ARCHS arm64) + set(APPLE_TARGET_TRIPLE_INT aarch64-apple-ios${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-simulator) + endif() +elseif(PLATFORM_INT STREQUAL "TVOS") + set(SDK_NAME appletvos) + if(NOT ARCHS) + set(ARCHS arm64) + set(APPLE_TARGET_TRIPLE_INT aarch64-apple-tvos${DEPLOYMENT_TARGET}) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET}) + endif() +elseif (PLATFORM_INT STREQUAL "TVOSCOMBINED") + set(SDK_NAME appletvos) + if(MODERN_CMAKE) + if(NOT ARCHS) + set(ARCHS arm64 x86_64) + set(APPLE_TARGET_TRIPLE_INT aarch64-x86_64-apple-tvos${DEPLOYMENT_TARGET}) + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=appletvos*] "arm64") + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=appletvsimulator*] "x86_64") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=appletvos*] "arm64") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=appletvsimulator*] "x86_64") + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET}) + endif() + else() + message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the TVOSCOMBINED setting work") + endif() +elseif(PLATFORM_INT STREQUAL "SIMULATOR_TVOS") + set(SDK_NAME appletvsimulator) + if(NOT ARCHS) + set(ARCHS x86_64) + set(APPLE_TARGET_TRIPLE_INT x86_64-apple-tvos${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-tvos${DEPLOYMENT_TARGET}-simulator) + endif() +elseif(PLATFORM_INT STREQUAL "WATCHOS") + set(SDK_NAME watchos) + if(NOT ARCHS) + if (XCODE_VERSION_INT VERSION_GREATER 10.0) + set(ARCHS armv7k arm64_32) + set(APPLE_TARGET_TRIPLE_INT aarch64_32-apple-watchos${DEPLOYMENT_TARGET}) + else() + set(ARCHS armv7k) + set(APPLE_TARGET_TRIPLE_INT arm-apple-watchos${DEPLOYMENT_TARGET}) + endif() + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET}) + endif() +elseif(PLATFORM_INT STREQUAL "WATCHOSCOMBINED") + set(SDK_NAME watchos) + if(MODERN_CMAKE) + if(NOT ARCHS) + if (XCODE_VERSION_INT VERSION_GREATER 10.0) + set(ARCHS armv7k arm64_32 i386) + set(APPLE_TARGET_TRIPLE_INT aarch64_32-i386-apple-watchos${DEPLOYMENT_TARGET}) + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchos*] "armv7k arm64_32") + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchsimulator*] "i386") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchos*] "armv7k arm64_32") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchsimulator*] "i386") + else() + set(ARCHS armv7k i386) + set(APPLE_TARGET_TRIPLE_INT arm-i386-apple-watchos${DEPLOYMENT_TARGET}) + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchos*] "armv7k") + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=watchsimulator*] "i386") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchos*] "armv7k") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=watchsimulator*] "i386") + endif() + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET}) + endif() + else() + message(FATAL_ERROR "Please make sure that you are running CMake 3.14+ to make the WATCHOSCOMBINED setting work") + endif() +elseif(PLATFORM_INT STREQUAL "SIMULATOR_WATCHOS") + set(SDK_NAME watchsimulator) + if(NOT ARCHS) + set(ARCHS i386) + set(APPLE_TARGET_TRIPLE_INT i386-apple-watchos${DEPLOYMENT_TARGET}-simulator) + else() + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-watchos${DEPLOYMENT_TARGET}-simulator) + endif() +elseif(PLATFORM_INT STREQUAL "MAC" OR PLATFORM_INT STREQUAL "MAC_CATALYST") + set(SDK_NAME macosx) + if(NOT ARCHS) + set(ARCHS x86_64) + endif() + string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}") + if(PLATFORM_INT STREQUAL "MAC") + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-macosx${DEPLOYMENT_TARGET}) + elseif(PLATFORM_INT STREQUAL "MAC_CATALYST") + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-macabi) + endif() +elseif(PLATFORM_INT MATCHES "^(MAC_ARM64)$|^(MAC_CATALYST_ARM64)$") + set(SDK_NAME macosx) + if(NOT ARCHS) + set(ARCHS arm64) + endif() + string(REPLACE ";" "-" ARCHS_SPLIT "${ARCHS}") + if(PLATFORM_INT STREQUAL "MAC_ARM64") + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-macosx${DEPLOYMENT_TARGET}) + elseif(PLATFORM_INT STREQUAL "MAC_CATALYST_ARM64") + set(APPLE_TARGET_TRIPLE_INT ${ARCHS_SPLIT}-apple-ios${DEPLOYMENT_TARGET}-macabi) + endif() +else() + message(FATAL_ERROR "Invalid PLATFORM: ${PLATFORM_INT}") +endif() + +string(REPLACE ";" " " ARCHS_SPACED "${ARCHS}") + +if(MODERN_CMAKE AND PLATFORM_INT MATCHES ".*COMBINED" AND NOT CMAKE_GENERATOR MATCHES "Xcode") + message(FATAL_ERROR "The COMBINED options only work with Xcode generator, -G Xcode") +endif() + +if(CMAKE_GENERATOR MATCHES "Xcode" AND PLATFORM_INT MATCHES "^MAC_CATALYST") + set(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++") + set(CMAKE_XCODE_ATTRIBUTE_SUPPORTED_PLATFORMS "macosx") + set(CMAKE_XCODE_EFFECTIVE_PLATFORMS "-maccatalyst") + if(NOT DEFINED MACOSX_DEPLOYMENT_TARGET) + set(CMAKE_XCODE_ATTRIBUTE_MACOSX_DEPLOYMENT_TARGET "10.15") + else() + set(CMAKE_XCODE_ATTRIBUTE_MACOSX_DEPLOYMENT_TARGET "${MACOSX_DEPLOYMENT_TARGET}") + endif() +elseif(CMAKE_GENERATOR MATCHES "Xcode") + set(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++") + set(CMAKE_XCODE_ATTRIBUTE_IPHONEOS_DEPLOYMENT_TARGET "${DEPLOYMENT_TARGET}") + if(NOT PLATFORM_INT MATCHES ".*COMBINED") + set(CMAKE_XCODE_ATTRIBUTE_ARCHS[sdk=${SDK_NAME}*] "${ARCHS_SPACED}") + set(CMAKE_XCODE_ATTRIBUTE_VALID_ARCHS[sdk=${SDK_NAME}*] "${ARCHS_SPACED}") + endif() +endif() + +# If user did not specify the SDK root to use, then query xcodebuild for it. +if(DEFINED CMAKE_OSX_SYSROOT_INT) + # Environment variables are always preserved. + set(ENV{_CMAKE_OSX_SYSROOT_INT} "${CMAKE_OSX_SYSROOT_INT}") +elseif(DEFINED ENV{_CMAKE_OSX_SYSROOT_INT}) + set(CMAKE_OSX_SYSROOT_INT "$ENV{_CMAKE_OSX_SYSROOT_INT}") +elseif(NOT DEFINED CMAKE_OSX_SYSROOT_INT) + execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -version -sdk ${SDK_NAME} Path + OUTPUT_VARIABLE CMAKE_OSX_SYSROOT_INT + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() + +if (NOT DEFINED CMAKE_OSX_SYSROOT_INT AND NOT DEFINED CMAKE_OSX_SYSROOT) + message(SEND_ERROR "Please make sure that Xcode is installed and that the toolchain" + "is pointing to the correct path. Please run:" + "sudo xcode-select -s /Applications/Xcode.app/Contents/Developer" + "and see if that fixes the problem for you.") + message(FATAL_ERROR "Invalid CMAKE_OSX_SYSROOT: ${CMAKE_OSX_SYSROOT} " + "does not exist.") +elseif(DEFINED CMAKE_OSX_SYSROOT_INT) + set(CMAKE_OSX_SYSROOT_INT "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "") + # Specify the location or name of the platform SDK to be used in CMAKE_OSX_SYSROOT. + set(CMAKE_OSX_SYSROOT "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "") +endif() + +# Use bitcode or not +if(NOT DEFINED ENABLE_BITCODE AND NOT ARCHS MATCHES "((^|;|, )(i386|x86_64))+") + # Unless specified, enable bitcode support by default + message(STATUS "[DEFAULTS] Enabling bitcode support by default. ENABLE_BITCODE not provided!") + set(ENABLE_BITCODE ON) +elseif(NOT DEFINED ENABLE_BITCODE) + message(STATUS "[DEFAULTS] Disabling bitcode support by default on simulators. ENABLE_BITCODE not provided for override!") + set(ENABLE_BITCODE OFF) +endif() +set(ENABLE_BITCODE_INT ${ENABLE_BITCODE} CACHE BOOL + "Whether or not to enable bitcode" FORCE) +# Use ARC or not +if(NOT DEFINED ENABLE_ARC) + # Unless specified, enable ARC support by default + set(ENABLE_ARC ON) + message(STATUS "[DEFAULTS] Enabling ARC support by default. ENABLE_ARC not provided!") +endif() +set(ENABLE_ARC_INT ${ENABLE_ARC} CACHE BOOL "Whether or not to enable ARC" FORCE) +# Use hidden visibility or not +if(NOT DEFINED ENABLE_VISIBILITY) + # Unless specified, disable symbols visibility by default + set(ENABLE_VISIBILITY OFF) + message(STATUS "[DEFAULTS] Hiding symbols visibility by default. ENABLE_VISIBILITY not provided!") +endif() +set(ENABLE_VISIBILITY_INT ${ENABLE_VISIBILITY} CACHE BOOL "Whether or not to hide symbols from the dynamic linker (-fvisibility=hidden)" FORCE) +# Set strict compiler checks or not +if(NOT DEFINED ENABLE_STRICT_TRY_COMPILE) + # Unless specified, disable strict try_compile() + set(ENABLE_STRICT_TRY_COMPILE OFF) + message(STATUS "[DEFAULTS] Using NON-strict compiler checks by default. ENABLE_STRICT_TRY_COMPILE not provided!") +endif() +set(ENABLE_STRICT_TRY_COMPILE_INT ${ENABLE_STRICT_TRY_COMPILE} CACHE BOOL + "Whether or not to use strict compiler checks" FORCE) + +# Get the SDK version information. +if(DEFINED SDK_VERSION) + # Environment variables are always preserved. + set(ENV{_SDK_VERSION} "${SDK_VERSION}") +elseif(DEFINED ENV{_SDK_VERSION}) + set(SDK_VERSION "$ENV{_SDK_VERSION}") +elseif(NOT DEFINED SDK_VERSION) + execute_process(COMMAND ${XCODEBUILD_EXECUTABLE} -sdk ${CMAKE_OSX_SYSROOT_INT} -version SDKVersion + OUTPUT_VARIABLE SDK_VERSION + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() + +# Find the Developer root for the specific iOS platform being compiled for +# from CMAKE_OSX_SYSROOT. Should be ../../ from SDK specified in +# CMAKE_OSX_SYSROOT. There does not appear to be a direct way to obtain +# this information from xcrun or xcodebuild. +if (NOT DEFINED CMAKE_DEVELOPER_ROOT AND NOT CMAKE_GENERATOR MATCHES "Xcode") + get_filename_component(PLATFORM_SDK_DIR ${CMAKE_OSX_SYSROOT_INT} PATH) + get_filename_component(CMAKE_DEVELOPER_ROOT ${PLATFORM_SDK_DIR} PATH) + if (NOT EXISTS "${CMAKE_DEVELOPER_ROOT}") + message(FATAL_ERROR "Invalid CMAKE_DEVELOPER_ROOT: ${CMAKE_DEVELOPER_ROOT} does not exist.") + endif() +endif() + +# Find the C & C++ compilers for the specified SDK. +if(DEFINED CMAKE_C_COMPILER) + # Environment variables are always preserved. + set(ENV{_CMAKE_C_COMPILER} "${CMAKE_C_COMPILER}") +elseif(DEFINED ENV{_CMAKE_C_COMPILER}) + set(CMAKE_C_COMPILER "$ENV{_CMAKE_C_COMPILER}") + set(CMAKE_ASM_COMPILER ${CMAKE_C_COMPILER}) +elseif(NOT DEFINED CMAKE_C_COMPILER) + execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find clang + OUTPUT_VARIABLE CMAKE_C_COMPILER + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + set(CMAKE_ASM_COMPILER ${CMAKE_C_COMPILER}) +endif() +if(DEFINED CMAKE_CXX_COMPILER) + # Environment variables are always preserved. + set(ENV{_CMAKE_CXX_COMPILER} "${CMAKE_CXX_COMPILER}") +elseif(DEFINED ENV{_CMAKE_CXX_COMPILER}) + set(CMAKE_CXX_COMPILER "$ENV{_CMAKE_CXX_COMPILER}") +elseif(NOT DEFINED CMAKE_CXX_COMPILER) + execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find clang++ + OUTPUT_VARIABLE CMAKE_CXX_COMPILER + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() +# Find (Apple's) libtool. +if(DEFINED BUILD_LIBTOOL) + # Environment variables are always preserved. + set(ENV{_BUILD_LIBTOOL} "${BUILD_LIBTOOL}") +elseif(DEFINED ENV{_BUILD_LIBTOOL}) + set(BUILD_LIBTOOL "$ENV{_BUILD_LIBTOOL}") +elseif(NOT DEFINED BUILD_LIBTOOL) + execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find libtool + OUTPUT_VARIABLE BUILD_LIBTOOL + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +endif() +# Find the toolchain's provided install_name_tool if none is found on the host +if(DEFINED CMAKE_INSTALL_NAME_TOOL) + # Environment variables are always preserved. + set(ENV{_CMAKE_INSTALL_NAME_TOOL} "${CMAKE_INSTALL_NAME_TOOL}") +elseif(DEFINED ENV{_CMAKE_INSTALL_NAME_TOOL}) + set(CMAKE_INSTALL_NAME_TOOL "$ENV{_CMAKE_INSTALL_NAME_TOOL}") +elseif(NOT DEFINED CMAKE_INSTALL_NAME_TOOL) + execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT_INT} -find install_name_tool + OUTPUT_VARIABLE CMAKE_INSTALL_NAME_TOOL_INT + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + set(CMAKE_INSTALL_NAME_TOOL ${CMAKE_INSTALL_NAME_TOOL_INT} CACHE INTERNAL "") +endif() + +# Configure libtool to be used instead of ar + ranlib to build static libraries. +# This is required on Xcode 7+, but should also work on previous versions of +# Xcode. +get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES) +foreach(lang ${languages}) + set(CMAKE_${lang}_CREATE_STATIC_LIBRARY "${BUILD_LIBTOOL} -static -o " CACHE INTERNAL "") +endforeach() + +# CMake 3.14+ support building for iOS, watchOS and tvOS out of the box. +if(MODERN_CMAKE) + if(SDK_NAME MATCHES "iphone") + set(CMAKE_SYSTEM_NAME iOS) + elseif(SDK_NAME MATCHES "macosx") + set(CMAKE_SYSTEM_NAME Darwin) + elseif(SDK_NAME MATCHES "appletv") + set(CMAKE_SYSTEM_NAME tvOS) + elseif(SDK_NAME MATCHES "watch") + set(CMAKE_SYSTEM_NAME watchOS) + endif() + # Provide flags for a combined FAT library build on newer CMake versions + if(PLATFORM_INT MATCHES ".*COMBINED") + set(CMAKE_XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO") + set(CMAKE_IOS_INSTALL_COMBINED YES) + endif() +elseif(NOT DEFINED CMAKE_SYSTEM_NAME AND ${CMAKE_VERSION} VERSION_GREATER_EQUAL "3.10") + # Legacy code path prior to CMake 3.14 or fallback if no CMAKE_SYSTEM_NAME specified + set(CMAKE_SYSTEM_NAME iOS) +elseif(NOT DEFINED CMAKE_SYSTEM_NAME) + # Legacy code path prior to CMake 3.14 or fallback if no CMAKE_SYSTEM_NAME specified + set(CMAKE_SYSTEM_NAME Darwin) +endif() +# Standard settings. +set(CMAKE_SYSTEM_VERSION ${SDK_VERSION} CACHE INTERNAL "") +set(UNIX ON CACHE BOOL "") +set(APPLE ON CACHE BOOL "") +if(PLATFORM STREQUAL "MAC" OR PLATFORM STREQUAL "MAC_ARM64") + set(IOS OFF CACHE BOOL "") + set(MACOS ON CACHE BOOL "") +elseif(PLATFORM STREQUAL "MAC_CATALYST" OR PLATFORM STREQUAL "MAC_CATALYST_ARM64") + set(IOS ON CACHE BOOL "") + set(MACOS ON CACHE BOOL "") +else() + set(IOS ON CACHE BOOL "") +endif() +set(CMAKE_AR ar CACHE FILEPATH "" FORCE) +set(CMAKE_RANLIB ranlib CACHE FILEPATH "" FORCE) +set(CMAKE_STRIP strip CACHE FILEPATH "" FORCE) +# Set the architectures for which to build. +set(CMAKE_OSX_ARCHITECTURES ${ARCHS} CACHE INTERNAL "") +# Change the type of target generated for try_compile() so it'll work when cross-compiling, weak compiler checks +if(NOT ENABLE_STRICT_TRY_COMPILE_INT) + set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) +endif() +# All iOS/Darwin specific settings - some may be redundant. +set(CMAKE_MACOSX_BUNDLE YES) +set(CMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED "NO") +set(CMAKE_SHARED_LIBRARY_PREFIX "lib") +set(CMAKE_SHARED_LIBRARY_SUFFIX ".dylib") +set(CMAKE_SHARED_MODULE_PREFIX "lib") +set(CMAKE_SHARED_MODULE_SUFFIX ".so") +set(CMAKE_C_COMPILER_ABI ELF) +set(CMAKE_CXX_COMPILER_ABI ELF) +set(CMAKE_C_HAS_ISYSROOT 1) +set(CMAKE_CXX_HAS_ISYSROOT 1) +set(CMAKE_MODULE_EXISTS 1) +set(CMAKE_DL_LIBS "") +set(CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ") +set(CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ") +set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}") +set(CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}") + +if(ARCHS MATCHES "((^|;|, )(arm64|arm64e|x86_64))+") + set(CMAKE_C_SIZEOF_DATA_PTR 8) + set(CMAKE_CXX_SIZEOF_DATA_PTR 8) + if(ARCHS MATCHES "((^|;|, )(arm64|arm64e))+") + set(CMAKE_SYSTEM_PROCESSOR "aarch64") + else() + set(CMAKE_SYSTEM_PROCESSOR "x86_64") + endif() +else() + set(CMAKE_C_SIZEOF_DATA_PTR 4) + set(CMAKE_CXX_SIZEOF_DATA_PTR 4) + set(CMAKE_SYSTEM_PROCESSOR "arm") +endif() + +# Note that only Xcode 7+ supports the newer more specific: +# -m${SDK_NAME}-version-min flags, older versions of Xcode use: +# -m(ios/ios-simulator)-version-min instead. +if(${CMAKE_VERSION} VERSION_LESS "3.11") + if(PLATFORM_INT STREQUAL "OS" OR PLATFORM_INT STREQUAL "OS64") + if(XCODE_VERSION_INT VERSION_LESS 7.0) + set(SDK_NAME_VERSION_FLAGS + "-mios-version-min=${DEPLOYMENT_TARGET}") + else() + # Xcode 7.0+ uses flags we can build directly from SDK_NAME. + set(SDK_NAME_VERSION_FLAGS + "-m${SDK_NAME}-version-min=${DEPLOYMENT_TARGET}") + endif() + elseif(PLATFORM_INT STREQUAL "TVOS") + set(SDK_NAME_VERSION_FLAGS + "-mtvos-version-min=${DEPLOYMENT_TARGET}") + elseif(PLATFORM_INT STREQUAL "SIMULATOR_TVOS") + set(SDK_NAME_VERSION_FLAGS + "-mtvos-simulator-version-min=${DEPLOYMENT_TARGET}") + elseif(PLATFORM_INT STREQUAL "WATCHOS") + set(SDK_NAME_VERSION_FLAGS + "-mwatchos-version-min=${DEPLOYMENT_TARGET}") + elseif(PLATFORM_INT STREQUAL "SIMULATOR_WATCHOS") + set(SDK_NAME_VERSION_FLAGS + "-mwatchos-simulator-version-min=${DEPLOYMENT_TARGET}") + elseif(PLATFORM_INT STREQUAL "MAC") + set(SDK_NAME_VERSION_FLAGS + "-mmacosx-version-min=${DEPLOYMENT_TARGET}") + else() + # SIMULATOR or SIMULATOR64 both use -mios-simulator-version-min. + set(SDK_NAME_VERSION_FLAGS + "-mios-simulator-version-min=${DEPLOYMENT_TARGET}") + endif() +elseif(NOT PLATFORM_INT MATCHES "^MAC_CATALYST") + # Newer versions of CMake sets the version min flags correctly, skip this for Mac Catalyst targets + set(CMAKE_OSX_DEPLOYMENT_TARGET ${DEPLOYMENT_TARGET}) +endif() + +if(DEFINED APPLE_TARGET_TRIPLE_INT) + set(APPLE_TARGET_TRIPLE ${APPLE_TARGET_TRIPLE_INT} CACHE INTERNAL "") + set(CMAKE_C_COMPILER_TARGET ${APPLE_TARGET_TRIPLE}) + set(CMAKE_CXX_COMPILER_TARGET ${APPLE_TARGET_TRIPLE}) + set(CMAKE_ASM_COMPILER_TARGET ${APPLE_TARGET_TRIPLE}) +endif() + +if(PLATFORM_INT MATCHES "^MAC_CATALYST") + set(C_TARGET_FLAGS "-isystem ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/usr/include -iframework ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/System/Library/Frameworks") +endif() + +if(ENABLE_BITCODE_INT) + set(BITCODE "-fembed-bitcode") + set(CMAKE_XCODE_ATTRIBUTE_BITCODE_GENERATION_MODE "bitcode") + set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE "YES") +else() + set(BITCODE "") + set(CMAKE_XCODE_ATTRIBUTE_ENABLE_BITCODE "NO") +endif() + +if(ENABLE_ARC_INT) + set(FOBJC_ARC "-fobjc-arc") + set(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "YES") +else() + set(FOBJC_ARC "-fno-objc-arc") + set(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "NO") +endif() + +if(NAMED_LANGUAGE_SUPPORT_INT) + set(OBJC_VARS "-fobjc-abi-version=2 -DOBJC_OLD_DISPATCH_PROTOTYPES=0") + set(OBJC_LEGACY_VARS "") +else() + set(OBJC_VARS "") + set(OBJC_LEGACY_VARS "-fobjc-abi-version=2 -DOBJC_OLD_DISPATCH_PROTOTYPES=0") +endif() + +if(NOT ENABLE_VISIBILITY_INT) + foreach(lang ${languages}) + set(CMAKE_${lang}_VISIBILITY_PRESET "hidden" CACHE INTERNAL "") + endforeach() + set(CMAKE_XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "YES") + set(VISIBILITY "-fvisibility=hidden -fvisibility-inlines-hidden") +else() + foreach(lang ${languages}) + set(CMAKE_${lang}_VISIBILITY_PRESET "default" CACHE INTERNAL "") + endforeach() + set(CMAKE_XCODE_ATTRIBUTE_GCC_SYMBOLS_PRIVATE_EXTERN "NO") + set(VISIBILITY "-fvisibility=default") +endif() + +if(DEFINED APPLE_TARGET_TRIPLE) + set(APPLE_TARGET_TRIPLE_FLAG "-target ${APPLE_TARGET_TRIPLE}") +endif() + +#Check if Xcode generator is used, since that will handle these flags automagically +if(CMAKE_GENERATOR MATCHES "Xcode") + message(STATUS "Not setting any manual command-line buildflags, since Xcode is selected as generator. Modifying the Xcode build-settings directly instead.") +else() + set(CMAKE_C_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${OBJC_LEGACY_VARS} ${BITCODE} ${VISIBILITY} ${CMAKE_C_FLAGS}") + set(CMAKE_C_FLAGS_DEBUG "-O0 -g ${CMAKE_C_FLAGS_DEBUG}") + set(CMAKE_C_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_C_FLAGS_MINSIZEREL}") + set(CMAKE_C_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_C_FLAGS_RELWITHDEBINFO}") + set(CMAKE_C_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_C_FLAGS_RELEASE}") + set(CMAKE_CXX_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${OBJC_LEGACY_VARS} ${BITCODE} ${VISIBILITY} ${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g ${CMAKE_CXX_FLAGS_DEBUG}") + set(CMAKE_CXX_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_CXX_FLAGS_MINSIZEREL}") + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_CXX_FLAGS_RELWITHDEBINFO}") + set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_CXX_FLAGS_RELEASE}") + if(NAMED_LANGUAGE_SUPPORT_INT) + set(CMAKE_OBJC_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${BITCODE} ${VISIBILITY} ${FOBJC_ARC} ${OBJC_VARS} ${CMAKE_OBJC_FLAGS}") + set(CMAKE_OBJC_FLAGS_DEBUG "-O0 -g ${CMAKE_OBJC_FLAGS_DEBUG}") + set(CMAKE_OBJC_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_OBJC_FLAGS_MINSIZEREL}") + set(CMAKE_OBJC_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_OBJC_FLAGS_RELWITHDEBINFO}") + set(CMAKE_OBJC_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_OBJC_FLAGS_RELEASE}") + set(CMAKE_OBJCXX_FLAGS "${C_TARGET_FLAGS} ${APPLE_TARGET_TRIPLE_FLAG} ${SDK_NAME_VERSION_FLAGS} ${BITCODE} ${VISIBILITY} ${FOBJC_ARC} ${OBJC_VARS} ${CMAKE_OBJCXX_FLAGS}") + set(CMAKE_OBJCXX_FLAGS_DEBUG "-O0 -g ${CMAKE_OBJCXX_FLAGS_DEBUG}") + set(CMAKE_OBJCXX_FLAGS_MINSIZEREL "-DNDEBUG -Os ${CMAKE_OBJCXX_FLAGS_MINSIZEREL}") + set(CMAKE_OBJCXX_FLAGS_RELWITHDEBINFO "-DNDEBUG -O2 -g ${CMAKE_OBJCXX_FLAGS_RELWITHDEBINFO}") + set(CMAKE_OBJCXX_FLAGS_RELEASE "-DNDEBUG -O3 ${CMAKE_OBJCXX_FLAGS_RELEASE}") + endif() + set(CMAKE_C_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_C_LINK_FLAGS}") + set(CMAKE_CXX_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_CXX_LINK_FLAGS}") + if(NAMED_LANGUAGE_SUPPORT_INT) + set(CMAKE_OBJC_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_OBJC_LINK_FLAGS}") + set(CMAKE_OBJCXX_LINK_FLAGS "${C_TARGET_FLAGS} ${SDK_NAME_VERSION_FLAGS} -Wl,-search_paths_first ${CMAKE_OBJCXX_LINK_FLAGS}") + endif() + set(CMAKE_ASM_FLAGS "${CMAKE_C_FLAGS} -x assembler-with-cpp -arch ${CMAKE_OSX_ARCHITECTURES} ${APPLE_TARGET_TRIPLE_FLAG}") +endif() + +## Print status messages to inform of the current state +message(STATUS "Configuring ${SDK_NAME} build for platform: ${PLATFORM_INT}, architecture(s): ${ARCHS}") +message(STATUS "Using SDK: ${CMAKE_OSX_SYSROOT_INT}") +message(STATUS "Using C compiler: ${CMAKE_C_COMPILER}") +message(STATUS "Using CXX compiler: ${CMAKE_CXX_COMPILER}") +message(STATUS "Using libtool: ${BUILD_LIBTOOL}") +message(STATUS "Using install name tool: ${CMAKE_INSTALL_NAME_TOOL}") +if(DEFINED APPLE_TARGET_TRIPLE) + message(STATUS "Autoconf target triple: ${APPLE_TARGET_TRIPLE}") +endif() +message(STATUS "Using minimum deployment version: ${DEPLOYMENT_TARGET}" + " (SDK version: ${SDK_VERSION})") +if(MODERN_CMAKE) + message(STATUS "Merging integrated CMake 3.14+ iOS,tvOS,watchOS,macOS toolchain(s) with this toolchain!") + if(PLATFORM_INT MATCHES ".*COMBINED") + message(STATUS "Will combine built (static) artifacts into FAT lib...") + endif() +endif() +if(CMAKE_GENERATOR MATCHES "Xcode") + message(STATUS "Using Xcode version: ${XCODE_VERSION_INT}") +endif() +message(STATUS "CMake version: ${CMAKE_VERSION}") +if(DEFINED SDK_NAME_VERSION_FLAGS) + message(STATUS "Using version flags: ${SDK_NAME_VERSION_FLAGS}") +endif() +message(STATUS "Using a data_ptr size of: ${CMAKE_CXX_SIZEOF_DATA_PTR}") +if(ENABLE_BITCODE_INT) + message(STATUS "Bitcode: Enabled") +else() + message(STATUS "Bitcode: Disabled") +endif() + +if(ENABLE_ARC_INT) + message(STATUS "ARC: Enabled") +else() + message(STATUS "ARC: Disabled") +endif() + +if(ENABLE_VISIBILITY_INT) + message(STATUS "Hiding symbols: Disabled") +else() + message(STATUS "Hiding symbols: Enabled") +endif() + +# Set global properties +set_property(GLOBAL PROPERTY PLATFORM "${PLATFORM}") +set_property(GLOBAL PROPERTY APPLE_TARGET_TRIPLE "${APPLE_TARGET_TRIPLE_INT}") +set_property(GLOBAL PROPERTY SDK_VERSION "${SDK_VERSION}") +set_property(GLOBAL PROPERTY XCODE_VERSION "${XCODE_VERSION_INT}") +set_property(GLOBAL PROPERTY OSX_ARCHITECTURES "${CMAKE_OSX_ARCHITECTURES}") + +# Export configurable variables for the try_compile() command. +set(CMAKE_TRY_COMPILE_PLATFORM_VARIABLES + PLATFORM + XCODE_VERSION_INT + SDK_VERSION + NAMED_LANGUAGE_SUPPORT + DEPLOYMENT_TARGET + CMAKE_DEVELOPER_ROOT + CMAKE_OSX_SYSROOT_INT + ENABLE_BITCODE + ENABLE_ARC + CMAKE_ASM_COMPILER + CMAKE_C_COMPILER + CMAKE_C_COMPILER_TARGET + CMAKE_CXX_COMPILER + CMAKE_CXX_COMPILER_TARGET + BUILD_LIBTOOL + CMAKE_INSTALL_NAME_TOOL + CMAKE_C_FLAGS + CMAKE_C_DEBUG + CMAKE_C_MINSIZEREL + CMAKE_C_RELWITHDEBINFO + CMAKE_C_RELEASE + CMAKE_CXX_FLAGS + CMAKE_CXX_FLAGS_DEBUG + CMAKE_CXX_FLAGS_MINSIZEREL + CMAKE_CXX_FLAGS_RELWITHDEBINFO + CMAKE_CXX_FLAGS_RELEASE + CMAKE_C_LINK_FLAGS + CMAKE_CXX_LINK_FLAGS + CMAKE_ASM_FLAGS +) + +if(NAMED_LANGUAGE_SUPPORT_INT) + list(APPEND CMAKE_TRY_COMPILE_PLATFORM_VARIABLES + CMAKE_OBJC_FLAGS + CMAKE_OBJC_DEBUG + CMAKE_OBJC_MINSIZEREL + CMAKE_OBJC_RELWITHDEBINFO + CMAKE_OBJC_RELEASE + CMAKE_OBJCXX_FLAGS + CMAKE_OBJCXX_DEBUG + CMAKE_OBJCXX_MINSIZEREL + CMAKE_OBJCXX_RELWITHDEBINFO + CMAKE_OBJCXX_RELEASE + CMAKE_OBJC_LINK_FLAGS + CMAKE_OBJCXX_LINK_FLAGS + ) +endif() + +set(CMAKE_PLATFORM_HAS_INSTALLNAME 1) +set(CMAKE_SHARED_LINKER_FLAGS "-rpath @executable_path/Frameworks -rpath @loader_path/Frameworks") +set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib -Wl,-headerpad_max_install_names") +set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle -Wl,-headerpad_max_install_names") +set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,") +set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,") +set(CMAKE_FIND_LIBRARY_SUFFIXES ".tbd" ".dylib" ".so" ".a") +set(CMAKE_SHARED_LIBRARY_SONAME_C_FLAG "-install_name") + +# Set the find root to the SDK developer roots. +# Note: CMAKE_FIND_ROOT_PATH is only useful when cross-compiling. Thus, do not set on macOS builds. +if(NOT PLATFORM_INT MATCHES "^MAC.*$") + list(APPEND CMAKE_FIND_ROOT_PATH "${CMAKE_OSX_SYSROOT_INT}" CACHE INTERNAL "") + set(CMAKE_IGNORE_PATH "/System/Library/Frameworks;/usr/local/lib" CACHE INTERNAL "") +endif() + +# Default to searching for frameworks first. +set(CMAKE_FIND_FRAMEWORK FIRST) + +# Set up the default search directories for frameworks. +if(PLATFORM_INT MATCHES "^MAC_CATALYST") + set(CMAKE_FRAMEWORK_PATH + ${CMAKE_DEVELOPER_ROOT}/Library/PrivateFrameworks + ${CMAKE_OSX_SYSROOT_INT}/System/Library/Frameworks + ${CMAKE_OSX_SYSROOT_INT}/System/iOSSupport/System/Library/Frameworks + ${CMAKE_FRAMEWORK_PATH} CACHE INTERNAL "") +else() + set(CMAKE_FRAMEWORK_PATH + ${CMAKE_DEVELOPER_ROOT}/Library/PrivateFrameworks + ${CMAKE_OSX_SYSROOT_INT}/System/Library/Frameworks + ${CMAKE_FRAMEWORK_PATH} CACHE INTERNAL "") +endif() + +# By default, search both the specified iOS SDK and the remainder of the host filesystem. +if(NOT CMAKE_FIND_ROOT_PATH_MODE_PROGRAM) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH CACHE INTERNAL "") +endif() +if(NOT CMAKE_FIND_ROOT_PATH_MODE_LIBRARY) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH CACHE INTERNAL "") +endif() +if(NOT CMAKE_FIND_ROOT_PATH_MODE_INCLUDE) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH CACHE INTERNAL "") +endif() +if(NOT CMAKE_FIND_ROOT_PATH_MODE_PACKAGE) + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH CACHE INTERNAL "") +endif() + +# +# Some helper-macros below to simplify and beautify the CMakeFile +# + +# This little macro lets you set any Xcode specific property. +macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE XCODE_RELVERSION) + set(XCODE_RELVERSION_I "${XCODE_RELVERSION}") + if(XCODE_RELVERSION_I STREQUAL "All") + set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY} "${XCODE_VALUE}") + else() + set_property(TARGET ${TARGET} PROPERTY XCODE_ATTRIBUTE_${XCODE_PROPERTY}[variant=${XCODE_RELVERSION_I}] "${XCODE_VALUE}") + endif() +endmacro(set_xcode_property) + +# This macro lets you find executable programs on the host system. +macro(find_host_package) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE NEVER) + set(_TOOLCHAIN_IOS ${IOS}) + set(IOS OFF) + find_package(${ARGN}) + set(IOS ${_TOOLCHAIN_IOS}) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH) + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH) +endmacro(find_host_package) +if (NOT DEFINED CMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM AND NOT DEFINED CMAKE_XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY) + set(CMAKE_XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED NO) +endif() + +SET(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_MODULES "YES") +SET(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "YES") diff --git a/data/nfkc.tsv b/data/nfkc.tsv index dfcefbfd..2e4c40a1 100644 --- a/data/nfkc.tsv +++ b/data/nfkc.tsv @@ -155269,6 +155269,7 @@ FB9 F90 FB5 # ྐྵ => ྐྵ 32FE 30F2 # ㋾ => ヲ 32FE 3099 30FA # ㋾゙ => ヺ 32FE FF9E 30FA # ㋾゙ => ヺ +32FF 4EE4 548C # ㋿ => 令和 3300 30A2 30D1 30FC 30C8 # ㌀ => アパート 3301 30A2 30EB 30D5 30A1 # ㌁ => アルファ 3302 30A2 30F3 30DA 30A2 # ㌂ => アンペア @@ -155534,6 +155535,7 @@ AB5C A727 # ꭜ => ꜧ AB5D AB37 # ꭝ => ꬷ AB5E 26B # ꭞ => ɫ AB5F AB52 # ꭟ => ꭒ +AB69 28D # ꭩ => ʍ F900 8C48 # 豈 => 豈 F901 66F4 # 更 => 更 F902 8ECA # 車 => 車 @@ -212815,6 +212817,7 @@ FFEE 25CB # ○ => ○ 114B9 114BD 114BE # 𑒾 => 𑒾 115B8 115AF 115BA # 𑖺 => 𑖺 115B9 115AF 115BB # 𑖻 => 𑖻 +11935 11930 11938 # 𑤸 => 𑤸 1D15E 1D157 1D165 # 𝅗𝅥 => 𝅗𝅥 1D15F 1D158 1D165 # 𝅘𝅥 => 𝅘𝅥 1D160 1D158 1D165 1D16E # 𝅘𝅥𝅮 => 𝅘𝅥𝅮 @@ -224063,6 +224066,7 @@ FFEE 25CB # ○ => ○ 1F14F 57 43 # 🅏 => WC 1F16A 4D 43 # 🅪 => MC 1F16B 4D 44 # 🅫 => MD +1F16C 4D 52 # 🅬 => MR 1F190 44 4A # 🆐 => DJ 1F200 307B 304B # 🈀 => ほか 1F201 30B3 30B3 # 🈁 => ココ @@ -224112,6 +224116,7 @@ FFEE 25CB # ○ => ○ 1F238 7533 # 🈸 => 申 1F239 5272 # 🈹 => 割 1F23A 55B6 # 🈺 => 営 +1F23B 914D # 🈻 => 配 1F240 3014 672C 3015 # 🉀 => 〔本〕 1F241 3014 4E09 3015 # 🉁 => 〔三〕 1F242 3014 4E8C 3015 # 🉂 => 〔二〕 @@ -224123,6 +224128,16 @@ FFEE 25CB # ○ => ○ 1F248 3014 6557 3015 # 🉈 => 〔敗〕 1F250 5F97 # 🉐 => 得 1F251 53EF # 🉑 => 可 +1FBF0 30 # 🯰 => 0 +1FBF1 31 # 🯱 => 1 +1FBF2 32 # 🯲 => 2 +1FBF3 33 # 🯳 => 3 +1FBF4 34 # 🯴 => 4 +1FBF5 35 # 🯵 => 5 +1FBF6 36 # 🯶 => 6 +1FBF7 37 # 🯷 => 7 +1FBF8 38 # 🯸 => 8 +1FBF9 39 # 🯹 => 9 2F800 4E3D # 丽 => 丽 2F801 4E38 # 丸 => 丸 2F802 4E41 # 乁 => 乁 diff --git a/data/nfkc_cf.tsv b/data/nfkc_cf.tsv index 4814f6a5..b164315e 100644 --- a/data/nfkc_cf.tsv +++ b/data/nfkc_cf.tsv @@ -57031,6 +57031,61 @@ FB9 F90 FB5 # ྐྵ => ྐྵ 1B3E 1B35 1B40 # ᭀ => ᭀ 1B3F 1B35 1B41 # ᭁ => ᭁ 1B42 1B35 1B43 # ᭃ => ᭃ +1C80 432 # ᲀ => в +1C81 434 # ᲁ => д +1C82 43E # ᲂ => о +1C83 441 # ᲃ => с +1C84 442 # ᲄ => т +1C85 442 # ᲅ => т +1C86 44A # ᲆ => ъ +1C87 463 # ᲇ => ѣ +1C88 A64B # ᲈ => ꙋ +1C90 10D0 # Ა => ა +1C91 10D1 # Ბ => ბ +1C92 10D2 # Გ => გ +1C93 10D3 # Დ => დ +1C94 10D4 # Ე => ე +1C95 10D5 # Ვ => ვ +1C96 10D6 # Ზ => ზ +1C97 10D7 # Თ => თ +1C98 10D8 # Ი => ი +1C99 10D9 # Კ => კ +1C9A 10DA # Ლ => ლ +1C9B 10DB # Მ => მ +1C9C 10DC # Ნ => ნ +1C9D 10DD # Ო => ო +1C9E 10DE # Პ => პ +1C9F 10DF # Ჟ => ჟ +1CA0 10E0 # Რ => რ +1CA1 10E1 # Ს => ს +1CA2 10E2 # Ტ => ტ +1CA3 10E3 # Უ => უ +1CA4 10E4 # Ფ => ფ +1CA5 10E5 # Ქ => ქ +1CA6 10E6 # Ღ => ღ +1CA7 10E7 # Ყ => ყ +1CA8 10E8 # Შ => შ +1CA9 10E9 # Ჩ => ჩ +1CAA 10EA # Ც => ც +1CAB 10EB # Ძ => ძ +1CAC 10EC # Წ => წ +1CAD 10ED # Ჭ => ჭ +1CAE 10EE # Ხ => ხ +1CAF 10EF # Ჯ => ჯ +1CB0 10F0 # Ჰ => ჰ +1CB1 10F1 # Ჱ => ჱ +1CB2 10F2 # Ჲ => ჲ +1CB3 10F3 # Ჳ => ჳ +1CB4 10F4 # Ჴ => ჴ +1CB5 10F5 # Ჵ => ჵ +1CB6 10F6 # Ჶ => ჶ +1CB7 10F7 # Ჷ => ჷ +1CB8 10F8 # Ჸ => ჸ +1CB9 10F9 # Ჹ => ჹ +1CBA 10FA # Ჺ => ჺ +1CBD 10FD # Ჽ => ჽ +1CBE 10FE # Ჾ => ჾ +1CBF 10FF # Ჿ => ჿ 1D2C 61 # ᴬ => a 1D2C 300 E0 # ᴬ̀ => à 1D2C 301 E1 # ᴬ́ => á @@ -156103,6 +156158,7 @@ FB9 F90 FB5 # ྐྵ => ྐྵ 32FE 30F2 # ㋾ => ヲ 32FE 3099 30FA # ㋾゙ => ヺ 32FE FF9E 30FA # ㋾゙ => ヺ +32FF 4EE4 548C # ㋿ => 令和 3300 30A2 30D1 30FC 30C8 # ㌀ => アパート 3301 30A2 30EB 30D5 30A1 # ㌁ => アルファ 3302 30A2 30F3 30DA 30A2 # ㌂ => アンペア @@ -156463,18 +156519,31 @@ A7AA 266 # Ɦ => ɦ A7AB 25C # Ɜ => ɜ A7AC 261 # Ɡ => ɡ A7AD 26C # Ɬ => ɬ +A7AE 26A # Ɪ => ɪ A7B0 29E # Ʞ => ʞ A7B1 287 # Ʇ => ʇ A7B2 29D # Ʝ => ʝ A7B3 AB53 # Ꭓ => ꭓ A7B4 A7B5 # Ꞵ => ꞵ A7B6 A7B7 # Ꞷ => ꞷ +A7B8 A7B9 # Ꞹ => ꞹ +A7BA A7BB # Ꞻ => ꞻ +A7BC A7BD # Ꞽ => ꞽ +A7BE A7BF # Ꞿ => ꞿ +A7C2 A7C3 # Ꟃ => ꟃ +A7C4 A794 # Ꞔ => ꞔ +A7C5 282 # Ʂ => ʂ +A7C6 1D8E # Ᶎ => ᶎ +A7C7 A7C8 # Ꟈ => ꟈ +A7C9 A7CA # Ꟊ => ꟊ +A7F5 A7F6 # Ꟶ => ꟶ A7F8 127 # ꟸ => ħ A7F9 153 # ꟹ => œ AB5C A727 # ꭜ => ꜧ AB5D AB37 # ꭝ => ꬷ AB5E 26B # ꭞ => ɫ AB5F AB52 # ꭟ => ꭒ +AB69 28D # ꭩ => ʍ AB70 13A0 # ꭰ => Ꭰ AB71 13A1 # ꭱ => Ꭱ AB72 13A2 # ꭲ => Ꭲ @@ -213864,6 +213933,42 @@ FFEE 25CB # ○ => ○ 10425 1044D # 𐐥 => 𐑍 10426 1044E # 𐐦 => 𐑎 10427 1044F # 𐐧 => 𐑏 +104B0 104D8 # 𐒰 => 𐓘 +104B1 104D9 # 𐒱 => 𐓙 +104B2 104DA # 𐒲 => 𐓚 +104B3 104DB # 𐒳 => 𐓛 +104B4 104DC # 𐒴 => 𐓜 +104B5 104DD # 𐒵 => 𐓝 +104B6 104DE # 𐒶 => 𐓞 +104B7 104DF # 𐒷 => 𐓟 +104B8 104E0 # 𐒸 => 𐓠 +104B9 104E1 # 𐒹 => 𐓡 +104BA 104E2 # 𐒺 => 𐓢 +104BB 104E3 # 𐒻 => 𐓣 +104BC 104E4 # 𐒼 => 𐓤 +104BD 104E5 # 𐒽 => 𐓥 +104BE 104E6 # 𐒾 => 𐓦 +104BF 104E7 # 𐒿 => 𐓧 +104C0 104E8 # 𐓀 => 𐓨 +104C1 104E9 # 𐓁 => 𐓩 +104C2 104EA # 𐓂 => 𐓪 +104C3 104EB # 𐓃 => 𐓫 +104C4 104EC # 𐓄 => 𐓬 +104C5 104ED # 𐓅 => 𐓭 +104C6 104EE # 𐓆 => 𐓮 +104C7 104EF # 𐓇 => 𐓯 +104C8 104F0 # 𐓈 => 𐓰 +104C9 104F1 # 𐓉 => 𐓱 +104CA 104F2 # 𐓊 => 𐓲 +104CB 104F3 # 𐓋 => 𐓳 +104CC 104F4 # 𐓌 => 𐓴 +104CD 104F5 # 𐓍 => 𐓵 +104CE 104F6 # 𐓎 => 𐓶 +104CF 104F7 # 𐓏 => 𐓷 +104D0 104F8 # 𐓐 => 𐓸 +104D1 104F9 # 𐓑 => 𐓹 +104D2 104FA # 𐓒 => 𐓺 +104D3 104FB # 𐓓 => 𐓻 10C80 10CC0 # 𐲀 => 𐳀 10C81 10CC1 # 𐲁 => 𐳁 10C82 10CC2 # 𐲂 => 𐳂 @@ -213959,6 +214064,39 @@ FFEE 25CB # ○ => ○ 118BD 118DD # 𑢽 => 𑣝 118BE 118DE # 𑢾 => 𑣞 118BF 118DF # 𑢿 => 𑣟 +11935 11930 11938 # 𑤸 => 𑤸 +16E40 16E60 # 𖹀 => 𖹠 +16E41 16E61 # 𖹁 => 𖹡 +16E42 16E62 # 𖹂 => 𖹢 +16E43 16E63 # 𖹃 => 𖹣 +16E44 16E64 # 𖹄 => 𖹤 +16E45 16E65 # 𖹅 => 𖹥 +16E46 16E66 # 𖹆 => 𖹦 +16E47 16E67 # 𖹇 => 𖹧 +16E48 16E68 # 𖹈 => 𖹨 +16E49 16E69 # 𖹉 => 𖹩 +16E4A 16E6A # 𖹊 => 𖹪 +16E4B 16E6B # 𖹋 => 𖹫 +16E4C 16E6C # 𖹌 => 𖹬 +16E4D 16E6D # 𖹍 => 𖹭 +16E4E 16E6E # 𖹎 => 𖹮 +16E4F 16E6F # 𖹏 => 𖹯 +16E50 16E70 # 𖹐 => 𖹰 +16E51 16E71 # 𖹑 => 𖹱 +16E52 16E72 # 𖹒 => 𖹲 +16E53 16E73 # 𖹓 => 𖹳 +16E54 16E74 # 𖹔 => 𖹴 +16E55 16E75 # 𖹕 => 𖹵 +16E56 16E76 # 𖹖 => 𖹶 +16E57 16E77 # 𖹗 => 𖹷 +16E58 16E78 # 𖹘 => 𖹸 +16E59 16E79 # 𖹙 => 𖹹 +16E5A 16E7A # 𖹚 => 𖹺 +16E5B 16E7B # 𖹛 => 𖹻 +16E5C 16E7C # 𖹜 => 𖹼 +16E5D 16E7D # 𖹝 => 𖹽 +16E5E 16E7E # 𖹞 => 𖹾 +16E5F 16E7F # 𖹟 => 𖹿 1D15E 1D157 1D165 # 𝅗𝅥 => 𝅗𝅥 1D15F 1D158 1D165 # 𝅘𝅥 => 𝅘𝅥 1D160 1D158 1D165 1D16E # 𝅘𝅥𝅮 => 𝅘𝅥𝅮 @@ -224666,6 +224804,40 @@ FFEE 25CB # ○ => ○ 1D7FD 37 # 𝟽 => 7 1D7FE 38 # 𝟾 => 8 1D7FF 39 # 𝟿 => 9 +1E900 1E922 # 𞤀 => 𞤢 +1E901 1E923 # 𞤁 => 𞤣 +1E902 1E924 # 𞤂 => 𞤤 +1E903 1E925 # 𞤃 => 𞤥 +1E904 1E926 # 𞤄 => 𞤦 +1E905 1E927 # 𞤅 => 𞤧 +1E906 1E928 # 𞤆 => 𞤨 +1E907 1E929 # 𞤇 => 𞤩 +1E908 1E92A # 𞤈 => 𞤪 +1E909 1E92B # 𞤉 => 𞤫 +1E90A 1E92C # 𞤊 => 𞤬 +1E90B 1E92D # 𞤋 => 𞤭 +1E90C 1E92E # 𞤌 => 𞤮 +1E90D 1E92F # 𞤍 => 𞤯 +1E90E 1E930 # 𞤎 => 𞤰 +1E90F 1E931 # 𞤏 => 𞤱 +1E910 1E932 # 𞤐 => 𞤲 +1E911 1E933 # 𞤑 => 𞤳 +1E912 1E934 # 𞤒 => 𞤴 +1E913 1E935 # 𞤓 => 𞤵 +1E914 1E936 # 𞤔 => 𞤶 +1E915 1E937 # 𞤕 => 𞤷 +1E916 1E938 # 𞤖 => 𞤸 +1E917 1E939 # 𞤗 => 𞤹 +1E918 1E93A # 𞤘 => 𞤺 +1E919 1E93B # 𞤙 => 𞤻 +1E91A 1E93C # 𞤚 => 𞤼 +1E91B 1E93D # 𞤛 => 𞤽 +1E91C 1E93E # 𞤜 => 𞤾 +1E91D 1E93F # 𞤝 => 𞤿 +1E91E 1E940 # 𞤞 => 𞥀 +1E91F 1E941 # 𞤟 => 𞥁 +1E920 1E942 # 𞤠 => 𞥂 +1E921 1E943 # 𞤡 => 𞥃 1EE00 627 # 𞸀 => ا 1EE00 653 622 # 𞸀ٓ => آ 1EE00 654 623 # 𞸀ٔ => أ @@ -225207,6 +225379,7 @@ FFEE 25CB # ○ => ○ 1F14F 77 63 # 🅏 => wc 1F16A 6D 63 # 🅪 => mc 1F16B 6D 64 # 🅫 => md +1F16C 6D 72 # 🅬 => mr 1F190 64 6A # 🆐 => dj 1F200 307B 304B # 🈀 => ほか 1F201 30B3 30B3 # 🈁 => ココ @@ -225256,6 +225429,7 @@ FFEE 25CB # ○ => ○ 1F238 7533 # 🈸 => 申 1F239 5272 # 🈹 => 割 1F23A 55B6 # 🈺 => 営 +1F23B 914D # 🈻 => 配 1F240 3014 672C 3015 # 🉀 => 〔本〕 1F241 3014 4E09 3015 # 🉁 => 〔三〕 1F242 3014 4E8C 3015 # 🉂 => 〔二〕 @@ -225267,6 +225441,16 @@ FFEE 25CB # ○ => ○ 1F248 3014 6557 3015 # 🉈 => 〔敗〕 1F250 5F97 # 🉐 => 得 1F251 53EF # 🉑 => 可 +1FBF0 30 # 🯰 => 0 +1FBF1 31 # 🯱 => 1 +1FBF2 32 # 🯲 => 2 +1FBF3 33 # 🯳 => 3 +1FBF4 34 # 🯴 => 4 +1FBF5 35 # 🯵 => 5 +1FBF6 36 # 🯶 => 6 +1FBF7 37 # 🯷 => 7 +1FBF8 38 # 🯸 => 8 +1FBF9 39 # 🯹 => 9 2F800 4E3D # 丽 => 丽 2F801 4E38 # 丸 => 丸 2F802 4E41 # 乁 => 乁 diff --git a/data/nfkd.tsv b/data/nfkd.tsv new file mode 100644 index 00000000..e459b4aa --- /dev/null +++ b/data/nfkd.tsv @@ -0,0 +1,16908 @@ +A0 20 #   => +A8 20 308 # ¨ => ̈ +AA 61 # ª => a +AF 20 304 # ¯ => ̄ +B2 32 # ² => 2 +B3 33 # ³ => 3 +B4 20 301 # ´ => ́ +B5 3BC # µ => μ +B8 20 327 # ¸ => ̧ +B9 31 # ¹ => 1 +BA 6F # º => o +BC 31 2044 34 # ¼ => 1⁄4 +BD 31 2044 32 # ½ => 1⁄2 +BE 33 2044 34 # ¾ => 3⁄4 +C0 41 300 # À => À +C1 41 301 # Á => Á +C2 41 302 #  =>  +C3 41 303 # à => à +C4 41 308 # Ä => Ä +C5 41 30A # Å => Å +C7 43 327 # Ç => Ç +C8 45 300 # È => È +C9 45 301 # É => É +CA 45 302 # Ê => Ê +CB 45 308 # Ë => Ë +CC 49 300 # Ì => Ì +CD 49 301 # Í => Í +CE 49 302 # Î => Î +CF 49 308 # Ï => Ï +D1 4E 303 # Ñ => Ñ +D2 4F 300 # Ò => Ò +D3 4F 301 # Ó => Ó +D4 4F 302 # Ô => Ô +D5 4F 303 # Õ => Õ +D6 4F 308 # Ö => Ö +D9 55 300 # Ù => Ù +DA 55 301 # Ú => Ú +DB 55 302 # Û => Û +DC 55 308 # Ü => Ü +DD 59 301 # Ý => Ý +E0 61 300 # à => à +E1 61 301 # á => á +E2 61 302 # â => â +E3 61 303 # ã => ã +E4 61 308 # ä => ä +E5 61 30A # å => å +E7 63 327 # ç => ç +E8 65 300 # è => è +E9 65 301 # é => é +EA 65 302 # ê => ê +EB 65 308 # ë => ë +EC 69 300 # ì => ì +ED 69 301 # í => í +EE 69 302 # î => î +EF 69 308 # ï => ï +F1 6E 303 # ñ => ñ +F2 6F 300 # ò => ò +F3 6F 301 # ó => ó +F4 6F 302 # ô => ô +F5 6F 303 # õ => õ +F6 6F 308 # ö => ö +F9 75 300 # ù => ù +FA 75 301 # ú => ú +FB 75 302 # û => û +FC 75 308 # ü => ü +FD 79 301 # ý => ý +FF 79 308 # ÿ => ÿ +100 41 304 # Ā => Ā +101 61 304 # ā => ā +102 41 306 # Ă => Ă +103 61 306 # ă => ă +104 41 328 # Ą => Ą +105 61 328 # ą => ą +106 43 301 # Ć => Ć +107 63 301 # ć => ć +108 43 302 # Ĉ => Ĉ +109 63 302 # ĉ => ĉ +10A 43 307 # Ċ => Ċ +10B 63 307 # ċ => ċ +10C 43 30C # Č => Č +10D 63 30C # č => č +10E 44 30C # Ď => Ď +10F 64 30C # ď => ď +112 45 304 # Ē => Ē +113 65 304 # ē => ē +114 45 306 # Ĕ => Ĕ +115 65 306 # ĕ => ĕ +116 45 307 # Ė => Ė +117 65 307 # ė => ė +118 45 328 # Ę => Ę +119 65 328 # ę => ę +11A 45 30C # Ě => Ě +11B 65 30C # ě => ě +11C 47 302 # Ĝ => Ĝ +11D 67 302 # ĝ => ĝ +11E 47 306 # Ğ => Ğ +11F 67 306 # ğ => ğ +120 47 307 # Ġ => Ġ +121 67 307 # ġ => ġ +122 47 327 # Ģ => Ģ +123 67 327 # ģ => ģ +124 48 302 # Ĥ => Ĥ +125 68 302 # ĥ => ĥ +128 49 303 # Ĩ => Ĩ +129 69 303 # ĩ => ĩ +12A 49 304 # Ī => Ī +12B 69 304 # ī => ī +12C 49 306 # Ĭ => Ĭ +12D 69 306 # ĭ => ĭ +12E 49 328 # Į => Į +12F 69 328 # į => į +130 49 307 # İ => İ +132 49 4A # IJ => IJ +133 69 6A # ij => ij +134 4A 302 # Ĵ => Ĵ +135 6A 302 # ĵ => ĵ +136 4B 327 # Ķ => Ķ +137 6B 327 # ķ => ķ +139 4C 301 # Ĺ => Ĺ +13A 6C 301 # ĺ => ĺ +13B 4C 327 # Ļ => Ļ +13C 6C 327 # ļ => ļ +13D 4C 30C # Ľ => Ľ +13E 6C 30C # ľ => ľ +13F 4C B7 # Ŀ => L· +140 6C B7 # ŀ => l· +143 4E 301 # Ń => Ń +144 6E 301 # ń => ń +145 4E 327 # Ņ => Ņ +146 6E 327 # ņ => ņ +147 4E 30C # Ň => Ň +148 6E 30C # ň => ň +149 2BC 6E # ʼn => ʼn +14C 4F 304 # Ō => Ō +14D 6F 304 # ō => ō +14E 4F 306 # Ŏ => Ŏ +14F 6F 306 # ŏ => ŏ +150 4F 30B # Ő => Ő +151 6F 30B # ő => ő +154 52 301 # Ŕ => Ŕ +155 72 301 # ŕ => ŕ +156 52 327 # Ŗ => Ŗ +157 72 327 # ŗ => ŗ +158 52 30C # Ř => Ř +159 72 30C # ř => ř +15A 53 301 # Ś => Ś +15B 73 301 # ś => ś +15C 53 302 # Ŝ => Ŝ +15D 73 302 # ŝ => ŝ +15E 53 327 # Ş => Ş +15F 73 327 # ş => ş +160 53 30C # Š => Š +161 73 30C # š => š +162 54 327 # Ţ => Ţ +163 74 327 # ţ => ţ +164 54 30C # Ť => Ť +165 74 30C # ť => ť +168 55 303 # Ũ => Ũ +169 75 303 # ũ => ũ +16A 55 304 # Ū => Ū +16B 75 304 # ū => ū +16C 55 306 # Ŭ => Ŭ +16D 75 306 # ŭ => ŭ +16E 55 30A # Ů => Ů +16F 75 30A # ů => ů +170 55 30B # Ű => Ű +171 75 30B # ű => ű +172 55 328 # Ų => Ų +173 75 328 # ų => ų +174 57 302 # Ŵ => Ŵ +175 77 302 # ŵ => ŵ +176 59 302 # Ŷ => Ŷ +177 79 302 # ŷ => ŷ +178 59 308 # Ÿ => Ÿ +179 5A 301 # Ź => Ź +17A 7A 301 # ź => ź +17B 5A 307 # Ż => Ż +17C 7A 307 # ż => ż +17D 5A 30C # Ž => Ž +17E 7A 30C # ž => ž +17F 73 # ſ => s +1A0 4F 31B # Ơ => Ơ +1A1 6F 31B # ơ => ơ +1AF 55 31B # Ư => Ư +1B0 75 31B # ư => ư +1C4 44 5A 30C # DŽ => DŽ +1C5 44 7A 30C # Dž => Dž +1C6 64 7A 30C # dž => dž +1C7 4C 4A # LJ => LJ +1C8 4C 6A # Lj => Lj +1C9 6C 6A # lj => lj +1CA 4E 4A # NJ => NJ +1CB 4E 6A # Nj => Nj +1CC 6E 6A # nj => nj +1CD 41 30C # Ǎ => Ǎ +1CE 61 30C # ǎ => ǎ +1CF 49 30C # Ǐ => Ǐ +1D0 69 30C # ǐ => ǐ +1D1 4F 30C # Ǒ => Ǒ +1D2 6F 30C # ǒ => ǒ +1D3 55 30C # Ǔ => Ǔ +1D4 75 30C # ǔ => ǔ +1D5 55 308 304 # Ǖ => Ǖ +1D6 75 308 304 # ǖ => ǖ +1D7 55 308 301 # Ǘ => Ǘ +1D8 75 308 301 # ǘ => ǘ +1D9 55 308 30C # Ǚ => Ǚ +1DA 75 308 30C # ǚ => ǚ +1DB 55 308 300 # Ǜ => Ǜ +1DC 75 308 300 # ǜ => ǜ +1DE 41 308 304 # Ǟ => Ǟ +1DF 61 308 304 # ǟ => ǟ +1E0 41 307 304 # Ǡ => Ǡ +1E1 61 307 304 # ǡ => ǡ +1E2 C6 304 # Ǣ => Ǣ +1E3 E6 304 # ǣ => ǣ +1E6 47 30C # Ǧ => Ǧ +1E7 67 30C # ǧ => ǧ +1E8 4B 30C # Ǩ => Ǩ +1E9 6B 30C # ǩ => ǩ +1EA 4F 328 # Ǫ => Ǫ +1EB 6F 328 # ǫ => ǫ +1EC 4F 328 304 # Ǭ => Ǭ +1ED 6F 328 304 # ǭ => ǭ +1EE 1B7 30C # Ǯ => Ǯ +1EF 292 30C # ǯ => ǯ +1F0 6A 30C # ǰ => ǰ +1F1 44 5A # DZ => DZ +1F2 44 7A # Dz => Dz +1F3 64 7A # dz => dz +1F4 47 301 # Ǵ => Ǵ +1F5 67 301 # ǵ => ǵ +1F8 4E 300 # Ǹ => Ǹ +1F9 6E 300 # ǹ => ǹ +1FA 41 30A 301 # Ǻ => Ǻ +1FB 61 30A 301 # ǻ => ǻ +1FC C6 301 # Ǽ => Ǽ +1FD E6 301 # ǽ => ǽ +1FE D8 301 # Ǿ => Ǿ +1FF F8 301 # ǿ => ǿ +200 41 30F # Ȁ => Ȁ +201 61 30F # ȁ => ȁ +202 41 311 # Ȃ => Ȃ +203 61 311 # ȃ => ȃ +204 45 30F # Ȅ => Ȅ +205 65 30F # ȅ => ȅ +206 45 311 # Ȇ => Ȇ +207 65 311 # ȇ => ȇ +208 49 30F # Ȉ => Ȉ +209 69 30F # ȉ => ȉ +20A 49 311 # Ȋ => Ȋ +20B 69 311 # ȋ => ȋ +20C 4F 30F # Ȍ => Ȍ +20D 6F 30F # ȍ => ȍ +20E 4F 311 # Ȏ => Ȏ +20F 6F 311 # ȏ => ȏ +210 52 30F # Ȑ => Ȑ +211 72 30F # ȑ => ȑ +212 52 311 # Ȓ => Ȓ +213 72 311 # ȓ => ȓ +214 55 30F # Ȕ => Ȕ +215 75 30F # ȕ => ȕ +216 55 311 # Ȗ => Ȗ +217 75 311 # ȗ => ȗ +218 53 326 # Ș => Ș +219 73 326 # ș => ș +21A 54 326 # Ț => Ț +21B 74 326 # ț => ț +21E 48 30C # Ȟ => Ȟ +21F 68 30C # ȟ => ȟ +226 41 307 # Ȧ => Ȧ +227 61 307 # ȧ => ȧ +228 45 327 # Ȩ => Ȩ +229 65 327 # ȩ => ȩ +22A 4F 308 304 # Ȫ => Ȫ +22B 6F 308 304 # ȫ => ȫ +22C 4F 303 304 # Ȭ => Ȭ +22D 6F 303 304 # ȭ => ȭ +22E 4F 307 # Ȯ => Ȯ +22F 6F 307 # ȯ => ȯ +230 4F 307 304 # Ȱ => Ȱ +231 6F 307 304 # ȱ => ȱ +232 59 304 # Ȳ => Ȳ +233 79 304 # ȳ => ȳ +2B0 68 # ʰ => h +2B1 266 # ʱ => ɦ +2B2 6A # ʲ => j +2B3 72 # ʳ => r +2B4 279 # ʴ => ɹ +2B5 27B # ʵ => ɻ +2B6 281 # ʶ => ʁ +2B7 77 # ʷ => w +2B8 79 # ʸ => y +2D8 20 306 # ˘ => ̆ +2D9 20 307 # ˙ => ̇ +2DA 20 30A # ˚ => ̊ +2DB 20 328 # ˛ => ̨ +2DC 20 303 # ˜ => ̃ +2DD 20 30B # ˝ => ̋ +2E0 263 # ˠ => ɣ +2E1 6C # ˡ => l +2E2 73 # ˢ => s +2E3 78 # ˣ => x +2E4 295 # ˤ => ʕ +340 300 # ̀ => ̀ +341 301 # ́ => ́ +343 313 # ̓ => ̓ +344 308 301 # ̈́ => ̈́ +374 2B9 # ʹ => ʹ +37A 20 345 # ͺ => ͅ +37E 3B # ; => ; +384 20 301 # ΄ => ́ +385 20 308 301 # ΅ => ̈́ +386 391 301 # Ά => Ά +387 B7 # · => · +388 395 301 # Έ => Έ +389 397 301 # Ή => Ή +38A 399 301 # Ί => Ί +38C 39F 301 # Ό => Ό +38E 3A5 301 # Ύ => Ύ +38F 3A9 301 # Ώ => Ώ +390 3B9 308 301 # ΐ => ΐ +3AA 399 308 # Ϊ => Ϊ +3AB 3A5 308 # Ϋ => Ϋ +3AC 3B1 301 # ά => ά +3AD 3B5 301 # έ => έ +3AE 3B7 301 # ή => ή +3AF 3B9 301 # ί => ί +3B0 3C5 308 301 # ΰ => ΰ +3CA 3B9 308 # ϊ => ϊ +3CB 3C5 308 # ϋ => ϋ +3CC 3BF 301 # ό => ό +3CD 3C5 301 # ύ => ύ +3CE 3C9 301 # ώ => ώ +3D0 3B2 # ϐ => β +3D1 3B8 # ϑ => θ +3D2 3A5 # ϒ => Υ +3D3 3A5 301 # ϓ => Ύ +3D4 3A5 308 # ϔ => Ϋ +3D5 3C6 # ϕ => φ +3D6 3C0 # ϖ => π +3F0 3BA # ϰ => κ +3F1 3C1 # ϱ => ρ +3F2 3C2 # ϲ => ς +3F4 398 # ϴ => Θ +3F5 3B5 # ϵ => ε +3F9 3A3 # Ϲ => Σ +400 415 300 # Ѐ => Ѐ +401 415 308 # Ё => Ё +403 413 301 # Ѓ => Ѓ +407 406 308 # Ї => Ї +40C 41A 301 # Ќ => Ќ +40D 418 300 # Ѝ => Ѝ +40E 423 306 # Ў => Ў +419 418 306 # Й => Й +439 438 306 # й => й +450 435 300 # ѐ => ѐ +451 435 308 # ё => ё +453 433 301 # ѓ => ѓ +457 456 308 # ї => ї +45C 43A 301 # ќ => ќ +45D 438 300 # ѝ => ѝ +45E 443 306 # ў => ў +476 474 30F # Ѷ => Ѷ +477 475 30F # ѷ => ѷ +4C1 416 306 # Ӂ => Ӂ +4C2 436 306 # ӂ => ӂ +4D0 410 306 # Ӑ => Ӑ +4D1 430 306 # ӑ => ӑ +4D2 410 308 # Ӓ => Ӓ +4D3 430 308 # ӓ => ӓ +4D6 415 306 # Ӗ => Ӗ +4D7 435 306 # ӗ => ӗ +4DA 4D8 308 # Ӛ => Ӛ +4DB 4D9 308 # ӛ => ӛ +4DC 416 308 # Ӝ => Ӝ +4DD 436 308 # ӝ => ӝ +4DE 417 308 # Ӟ => Ӟ +4DF 437 308 # ӟ => ӟ +4E2 418 304 # Ӣ => Ӣ +4E3 438 304 # ӣ => ӣ +4E4 418 308 # Ӥ => Ӥ +4E5 438 308 # ӥ => ӥ +4E6 41E 308 # Ӧ => Ӧ +4E7 43E 308 # ӧ => ӧ +4EA 4E8 308 # Ӫ => Ӫ +4EB 4E9 308 # ӫ => ӫ +4EC 42D 308 # Ӭ => Ӭ +4ED 44D 308 # ӭ => ӭ +4EE 423 304 # Ӯ => Ӯ +4EF 443 304 # ӯ => ӯ +4F0 423 308 # Ӱ => Ӱ +4F1 443 308 # ӱ => ӱ +4F2 423 30B # Ӳ => Ӳ +4F3 443 30B # ӳ => ӳ +4F4 427 308 # Ӵ => Ӵ +4F5 447 308 # ӵ => ӵ +4F8 42B 308 # Ӹ => Ӹ +4F9 44B 308 # ӹ => ӹ +587 565 582 # և => եւ +622 627 653 # آ => آ +623 627 654 # أ => أ +624 648 654 # ؤ => ؤ +625 627 655 # إ => إ +626 64A 654 # ئ => ئ +675 627 674 # ٵ => اٴ +676 648 674 # ٶ => وٴ +677 6C7 674 # ٷ => ۇٴ +678 64A 674 # ٸ => يٴ +6C0 6D5 654 # ۀ => ۀ +6C2 6C1 654 # ۂ => ۂ +6D3 6D2 654 # ۓ => ۓ +929 928 93C # ऩ => ऩ +931 930 93C # ऱ => ऱ +934 933 93C # ऴ => ऴ +958 915 93C # क़ => क़ +959 916 93C # ख़ => ख़ +95A 917 93C # ग़ => ग़ +95B 91C 93C # ज़ => ज़ +95C 921 93C # ड़ => ड़ +95D 922 93C # ढ़ => ढ़ +95E 92B 93C # फ़ => फ़ +95F 92F 93C # य़ => य़ +9CB 9C7 9BE # ো => ো +9CC 9C7 9D7 # ৌ => ৌ +9DC 9A1 9BC # ড় => ড় +9DD 9A2 9BC # ঢ় => ঢ় +9DF 9AF 9BC # য় => য় +A33 A32 A3C # ਲ਼ => ਲ਼ +A36 A38 A3C # ਸ਼ => ਸ਼ +A59 A16 A3C # ਖ਼ => ਖ਼ +A5A A17 A3C # ਗ਼ => ਗ਼ +A5B A1C A3C # ਜ਼ => ਜ਼ +A5E A2B A3C # ਫ਼ => ਫ਼ +B48 B47 B56 # ୈ => ୈ +B4B B47 B3E # ୋ => ୋ +B4C B47 B57 # ୌ => ୌ +B5C B21 B3C # ଡ଼ => ଡ଼ +B5D B22 B3C # ଢ଼ => ଢ଼ +B94 B92 BD7 # ஔ => ஔ +BCA BC6 BBE # ொ => ொ +BCB BC7 BBE # ோ => ோ +BCC BC6 BD7 # ௌ => ௌ +C48 C46 C56 # ై => ై +CC0 CBF CD5 # ೀ => ೀ +CC7 CC6 CD5 # ೇ => ೇ +CC8 CC6 CD6 # ೈ => ೈ +CCA CC6 CC2 # ೊ => ೊ +CCB CC6 CC2 CD5 # ೋ => ೋ +D4A D46 D3E # ൊ => ൊ +D4B D47 D3E # ോ => ോ +D4C D46 D57 # ൌ => ൌ +DDA DD9 DCA # ේ => ේ +DDC DD9 DCF # ො => ො +DDD DD9 DCF DCA # ෝ => ෝ +DDE DD9 DDF # ෞ => ෞ +E33 E4D E32 # ำ => ํา +EB3 ECD EB2 # ຳ => ໍາ +EDC EAB E99 # ໜ => ຫນ +EDD EAB EA1 # ໝ => ຫມ +F0C F0B # ༌ => ་ +F43 F42 FB7 # གྷ => གྷ +F4D F4C FB7 # ཌྷ => ཌྷ +F52 F51 FB7 # དྷ => དྷ +F57 F56 FB7 # བྷ => བྷ +F5C F5B FB7 # ཛྷ => ཛྷ +F69 F40 FB5 # ཀྵ => ཀྵ +F73 F71 F72 # ཱི => ཱི +F75 F71 F74 # ཱུ => ཱུ +F76 FB2 F80 # ྲྀ => ྲྀ +F77 FB2 F71 F80 # ཷ => ྲཱྀ +F78 FB3 F80 # ླྀ => ླྀ +F79 FB3 F71 F80 # ཹ => ླཱྀ +F81 F71 F80 # ཱྀ => ཱྀ +F93 F92 FB7 # ྒྷ => ྒྷ +F9D F9C FB7 # ྜྷ => ྜྷ +FA2 FA1 FB7 # ྡྷ => ྡྷ +FA7 FA6 FB7 # ྦྷ => ྦྷ +FAC FAB FB7 # ྫྷ => ྫྷ +FB9 F90 FB5 # ྐྵ => ྐྵ +1026 1025 102E # ဦ => ဦ +10FC 10DC # ჼ => ნ +1B06 1B05 1B35 # ᬆ => ᬆ +1B08 1B07 1B35 # ᬈ => ᬈ +1B0A 1B09 1B35 # ᬊ => ᬊ +1B0C 1B0B 1B35 # ᬌ => ᬌ +1B0E 1B0D 1B35 # ᬎ => ᬎ +1B12 1B11 1B35 # ᬒ => ᬒ +1B3B 1B3A 1B35 # ᬻ => ᬻ +1B3D 1B3C 1B35 # ᬽ => ᬽ +1B40 1B3E 1B35 # ᭀ => ᭀ +1B41 1B3F 1B35 # ᭁ => ᭁ +1B43 1B42 1B35 # ᭃ => ᭃ +1D2C 41 # ᴬ => A +1D2D C6 # ᴭ => Æ +1D2E 42 # ᴮ => B +1D30 44 # ᴰ => D +1D31 45 # ᴱ => E +1D32 18E # ᴲ => Ǝ +1D33 47 # ᴳ => G +1D34 48 # ᴴ => H +1D35 49 # ᴵ => I +1D36 4A # ᴶ => J +1D37 4B # ᴷ => K +1D38 4C # ᴸ => L +1D39 4D # ᴹ => M +1D3A 4E # ᴺ => N +1D3C 4F # ᴼ => O +1D3D 222 # ᴽ => Ȣ +1D3E 50 # ᴾ => P +1D3F 52 # ᴿ => R +1D40 54 # ᵀ => T +1D41 55 # ᵁ => U +1D42 57 # ᵂ => W +1D43 61 # ᵃ => a +1D44 250 # ᵄ => ɐ +1D45 251 # ᵅ => ɑ +1D46 1D02 # ᵆ => ᴂ +1D47 62 # ᵇ => b +1D48 64 # ᵈ => d +1D49 65 # ᵉ => e +1D4A 259 # ᵊ => ə +1D4B 25B # ᵋ => ɛ +1D4C 25C # ᵌ => ɜ +1D4D 67 # ᵍ => g +1D4F 6B # ᵏ => k +1D50 6D # ᵐ => m +1D51 14B # ᵑ => ŋ +1D52 6F # ᵒ => o +1D53 254 # ᵓ => ɔ +1D54 1D16 # ᵔ => ᴖ +1D55 1D17 # ᵕ => ᴗ +1D56 70 # ᵖ => p +1D57 74 # ᵗ => t +1D58 75 # ᵘ => u +1D59 1D1D # ᵙ => ᴝ +1D5A 26F # ᵚ => ɯ +1D5B 76 # ᵛ => v +1D5C 1D25 # ᵜ => ᴥ +1D5D 3B2 # ᵝ => β +1D5E 3B3 # ᵞ => γ +1D5F 3B4 # ᵟ => δ +1D60 3C6 # ᵠ => φ +1D61 3C7 # ᵡ => χ +1D62 69 # ᵢ => i +1D63 72 # ᵣ => r +1D64 75 # ᵤ => u +1D65 76 # ᵥ => v +1D66 3B2 # ᵦ => β +1D67 3B3 # ᵧ => γ +1D68 3C1 # ᵨ => ρ +1D69 3C6 # ᵩ => φ +1D6A 3C7 # ᵪ => χ +1D78 43D # ᵸ => н +1D9B 252 # ᶛ => ɒ +1D9C 63 # ᶜ => c +1D9D 255 # ᶝ => ɕ +1D9E F0 # ᶞ => ð +1D9F 25C # ᶟ => ɜ +1DA0 66 # ᶠ => f +1DA1 25F # ᶡ => ɟ +1DA2 261 # ᶢ => ɡ +1DA3 265 # ᶣ => ɥ +1DA4 268 # ᶤ => ɨ +1DA5 269 # ᶥ => ɩ +1DA6 26A # ᶦ => ɪ +1DA7 1D7B # ᶧ => ᵻ +1DA8 29D # ᶨ => ʝ +1DA9 26D # ᶩ => ɭ +1DAA 1D85 # ᶪ => ᶅ +1DAB 29F # ᶫ => ʟ +1DAC 271 # ᶬ => ɱ +1DAD 270 # ᶭ => ɰ +1DAE 272 # ᶮ => ɲ +1DAF 273 # ᶯ => ɳ +1DB0 274 # ᶰ => ɴ +1DB1 275 # ᶱ => ɵ +1DB2 278 # ᶲ => ɸ +1DB3 282 # ᶳ => ʂ +1DB4 283 # ᶴ => ʃ +1DB5 1AB # ᶵ => ƫ +1DB6 289 # ᶶ => ʉ +1DB7 28A # ᶷ => ʊ +1DB8 1D1C # ᶸ => ᴜ +1DB9 28B # ᶹ => ʋ +1DBA 28C # ᶺ => ʌ +1DBB 7A # ᶻ => z +1DBC 290 # ᶼ => ʐ +1DBD 291 # ᶽ => ʑ +1DBE 292 # ᶾ => ʒ +1DBF 3B8 # ᶿ => θ +1E00 41 325 # Ḁ => Ḁ +1E01 61 325 # ḁ => ḁ +1E02 42 307 # Ḃ => Ḃ +1E03 62 307 # ḃ => ḃ +1E04 42 323 # Ḅ => Ḅ +1E05 62 323 # ḅ => ḅ +1E06 42 331 # Ḇ => Ḇ +1E07 62 331 # ḇ => ḇ +1E08 43 327 301 # Ḉ => Ḉ +1E09 63 327 301 # ḉ => ḉ +1E0A 44 307 # Ḋ => Ḋ +1E0B 64 307 # ḋ => ḋ +1E0C 44 323 # Ḍ => Ḍ +1E0D 64 323 # ḍ => ḍ +1E0E 44 331 # Ḏ => Ḏ +1E0F 64 331 # ḏ => ḏ +1E10 44 327 # Ḑ => Ḑ +1E11 64 327 # ḑ => ḑ +1E12 44 32D # Ḓ => Ḓ +1E13 64 32D # ḓ => ḓ +1E14 45 304 300 # Ḕ => Ḕ +1E15 65 304 300 # ḕ => ḕ +1E16 45 304 301 # Ḗ => Ḗ +1E17 65 304 301 # ḗ => ḗ +1E18 45 32D # Ḙ => Ḙ +1E19 65 32D # ḙ => ḙ +1E1A 45 330 # Ḛ => Ḛ +1E1B 65 330 # ḛ => ḛ +1E1C 45 327 306 # Ḝ => Ḝ +1E1D 65 327 306 # ḝ => ḝ +1E1E 46 307 # Ḟ => Ḟ +1E1F 66 307 # ḟ => ḟ +1E20 47 304 # Ḡ => Ḡ +1E21 67 304 # ḡ => ḡ +1E22 48 307 # Ḣ => Ḣ +1E23 68 307 # ḣ => ḣ +1E24 48 323 # Ḥ => Ḥ +1E25 68 323 # ḥ => ḥ +1E26 48 308 # Ḧ => Ḧ +1E27 68 308 # ḧ => ḧ +1E28 48 327 # Ḩ => Ḩ +1E29 68 327 # ḩ => ḩ +1E2A 48 32E # Ḫ => Ḫ +1E2B 68 32E # ḫ => ḫ +1E2C 49 330 # Ḭ => Ḭ +1E2D 69 330 # ḭ => ḭ +1E2E 49 308 301 # Ḯ => Ḯ +1E2F 69 308 301 # ḯ => ḯ +1E30 4B 301 # Ḱ => Ḱ +1E31 6B 301 # ḱ => ḱ +1E32 4B 323 # Ḳ => Ḳ +1E33 6B 323 # ḳ => ḳ +1E34 4B 331 # Ḵ => Ḵ +1E35 6B 331 # ḵ => ḵ +1E36 4C 323 # Ḷ => Ḷ +1E37 6C 323 # ḷ => ḷ +1E38 4C 323 304 # Ḹ => Ḹ +1E39 6C 323 304 # ḹ => ḹ +1E3A 4C 331 # Ḻ => Ḻ +1E3B 6C 331 # ḻ => ḻ +1E3C 4C 32D # Ḽ => Ḽ +1E3D 6C 32D # ḽ => ḽ +1E3E 4D 301 # Ḿ => Ḿ +1E3F 6D 301 # ḿ => ḿ +1E40 4D 307 # Ṁ => Ṁ +1E41 6D 307 # ṁ => ṁ +1E42 4D 323 # Ṃ => Ṃ +1E43 6D 323 # ṃ => ṃ +1E44 4E 307 # Ṅ => Ṅ +1E45 6E 307 # ṅ => ṅ +1E46 4E 323 # Ṇ => Ṇ +1E47 6E 323 # ṇ => ṇ +1E48 4E 331 # Ṉ => Ṉ +1E49 6E 331 # ṉ => ṉ +1E4A 4E 32D # Ṋ => Ṋ +1E4B 6E 32D # ṋ => ṋ +1E4C 4F 303 301 # Ṍ => Ṍ +1E4D 6F 303 301 # ṍ => ṍ +1E4E 4F 303 308 # Ṏ => Ṏ +1E4F 6F 303 308 # ṏ => ṏ +1E50 4F 304 300 # Ṑ => Ṑ +1E51 6F 304 300 # ṑ => ṑ +1E52 4F 304 301 # Ṓ => Ṓ +1E53 6F 304 301 # ṓ => ṓ +1E54 50 301 # Ṕ => Ṕ +1E55 70 301 # ṕ => ṕ +1E56 50 307 # Ṗ => Ṗ +1E57 70 307 # ṗ => ṗ +1E58 52 307 # Ṙ => Ṙ +1E59 72 307 # ṙ => ṙ +1E5A 52 323 # Ṛ => Ṛ +1E5B 72 323 # ṛ => ṛ +1E5C 52 323 304 # Ṝ => Ṝ +1E5D 72 323 304 # ṝ => ṝ +1E5E 52 331 # Ṟ => Ṟ +1E5F 72 331 # ṟ => ṟ +1E60 53 307 # Ṡ => Ṡ +1E61 73 307 # ṡ => ṡ +1E62 53 323 # Ṣ => Ṣ +1E63 73 323 # ṣ => ṣ +1E64 53 301 307 # Ṥ => Ṥ +1E65 73 301 307 # ṥ => ṥ +1E66 53 30C 307 # Ṧ => Ṧ +1E67 73 30C 307 # ṧ => ṧ +1E68 53 323 307 # Ṩ => Ṩ +1E69 73 323 307 # ṩ => ṩ +1E6A 54 307 # Ṫ => Ṫ +1E6B 74 307 # ṫ => ṫ +1E6C 54 323 # Ṭ => Ṭ +1E6D 74 323 # ṭ => ṭ +1E6E 54 331 # Ṯ => Ṯ +1E6F 74 331 # ṯ => ṯ +1E70 54 32D # Ṱ => Ṱ +1E71 74 32D # ṱ => ṱ +1E72 55 324 # Ṳ => Ṳ +1E73 75 324 # ṳ => ṳ +1E74 55 330 # Ṵ => Ṵ +1E75 75 330 # ṵ => ṵ +1E76 55 32D # Ṷ => Ṷ +1E77 75 32D # ṷ => ṷ +1E78 55 303 301 # Ṹ => Ṹ +1E79 75 303 301 # ṹ => ṹ +1E7A 55 304 308 # Ṻ => Ṻ +1E7B 75 304 308 # ṻ => ṻ +1E7C 56 303 # Ṽ => Ṽ +1E7D 76 303 # ṽ => ṽ +1E7E 56 323 # Ṿ => Ṿ +1E7F 76 323 # ṿ => ṿ +1E80 57 300 # Ẁ => Ẁ +1E81 77 300 # ẁ => ẁ +1E82 57 301 # Ẃ => Ẃ +1E83 77 301 # ẃ => ẃ +1E84 57 308 # Ẅ => Ẅ +1E85 77 308 # ẅ => ẅ +1E86 57 307 # Ẇ => Ẇ +1E87 77 307 # ẇ => ẇ +1E88 57 323 # Ẉ => Ẉ +1E89 77 323 # ẉ => ẉ +1E8A 58 307 # Ẋ => Ẋ +1E8B 78 307 # ẋ => ẋ +1E8C 58 308 # Ẍ => Ẍ +1E8D 78 308 # ẍ => ẍ +1E8E 59 307 # Ẏ => Ẏ +1E8F 79 307 # ẏ => ẏ +1E90 5A 302 # Ẑ => Ẑ +1E91 7A 302 # ẑ => ẑ +1E92 5A 323 # Ẓ => Ẓ +1E93 7A 323 # ẓ => ẓ +1E94 5A 331 # Ẕ => Ẕ +1E95 7A 331 # ẕ => ẕ +1E96 68 331 # ẖ => ẖ +1E97 74 308 # ẗ => ẗ +1E98 77 30A # ẘ => ẘ +1E99 79 30A # ẙ => ẙ +1E9A 61 2BE # ẚ => aʾ +1E9B 73 307 # ẛ => ṡ +1EA0 41 323 # Ạ => Ạ +1EA1 61 323 # ạ => ạ +1EA2 41 309 # Ả => Ả +1EA3 61 309 # ả => ả +1EA4 41 302 301 # Ấ => Ấ +1EA5 61 302 301 # ấ => ấ +1EA6 41 302 300 # Ầ => Ầ +1EA7 61 302 300 # ầ => ầ +1EA8 41 302 309 # Ẩ => Ẩ +1EA9 61 302 309 # ẩ => ẩ +1EAA 41 302 303 # Ẫ => Ẫ +1EAB 61 302 303 # ẫ => ẫ +1EAC 41 323 302 # Ậ => Ậ +1EAD 61 323 302 # ậ => ậ +1EAE 41 306 301 # Ắ => Ắ +1EAF 61 306 301 # ắ => ắ +1EB0 41 306 300 # Ằ => Ằ +1EB1 61 306 300 # ằ => ằ +1EB2 41 306 309 # Ẳ => Ẳ +1EB3 61 306 309 # ẳ => ẳ +1EB4 41 306 303 # Ẵ => Ẵ +1EB5 61 306 303 # ẵ => ẵ +1EB6 41 323 306 # Ặ => Ặ +1EB7 61 323 306 # ặ => ặ +1EB8 45 323 # Ẹ => Ẹ +1EB9 65 323 # ẹ => ẹ +1EBA 45 309 # Ẻ => Ẻ +1EBB 65 309 # ẻ => ẻ +1EBC 45 303 # Ẽ => Ẽ +1EBD 65 303 # ẽ => ẽ +1EBE 45 302 301 # Ế => Ế +1EBF 65 302 301 # ế => ế +1EC0 45 302 300 # Ề => Ề +1EC1 65 302 300 # ề => ề +1EC2 45 302 309 # Ể => Ể +1EC3 65 302 309 # ể => ể +1EC4 45 302 303 # Ễ => Ễ +1EC5 65 302 303 # ễ => ễ +1EC6 45 323 302 # Ệ => Ệ +1EC7 65 323 302 # ệ => ệ +1EC8 49 309 # Ỉ => Ỉ +1EC9 69 309 # ỉ => ỉ +1ECA 49 323 # Ị => Ị +1ECB 69 323 # ị => ị +1ECC 4F 323 # Ọ => Ọ +1ECD 6F 323 # ọ => ọ +1ECE 4F 309 # Ỏ => Ỏ +1ECF 6F 309 # ỏ => ỏ +1ED0 4F 302 301 # Ố => Ố +1ED1 6F 302 301 # ố => ố +1ED2 4F 302 300 # Ồ => Ồ +1ED3 6F 302 300 # ồ => ồ +1ED4 4F 302 309 # Ổ => Ổ +1ED5 6F 302 309 # ổ => ổ +1ED6 4F 302 303 # Ỗ => Ỗ +1ED7 6F 302 303 # ỗ => ỗ +1ED8 4F 323 302 # Ộ => Ộ +1ED9 6F 323 302 # ộ => ộ +1EDA 4F 31B 301 # Ớ => Ớ +1EDB 6F 31B 301 # ớ => ớ +1EDC 4F 31B 300 # Ờ => Ờ +1EDD 6F 31B 300 # ờ => ờ +1EDE 4F 31B 309 # Ở => Ở +1EDF 6F 31B 309 # ở => ở +1EE0 4F 31B 303 # Ỡ => Ỡ +1EE1 6F 31B 303 # ỡ => ỡ +1EE2 4F 31B 323 # Ợ => Ợ +1EE3 6F 31B 323 # ợ => ợ +1EE4 55 323 # Ụ => Ụ +1EE5 75 323 # ụ => ụ +1EE6 55 309 # Ủ => Ủ +1EE7 75 309 # ủ => ủ +1EE8 55 31B 301 # Ứ => Ứ +1EE9 75 31B 301 # ứ => ứ +1EEA 55 31B 300 # Ừ => Ừ +1EEB 75 31B 300 # ừ => ừ +1EEC 55 31B 309 # Ử => Ử +1EED 75 31B 309 # ử => ử +1EEE 55 31B 303 # Ữ => Ữ +1EEF 75 31B 303 # ữ => ữ +1EF0 55 31B 323 # Ự => Ự +1EF1 75 31B 323 # ự => ự +1EF2 59 300 # Ỳ => Ỳ +1EF3 79 300 # ỳ => ỳ +1EF4 59 323 # Ỵ => Ỵ +1EF5 79 323 # ỵ => ỵ +1EF6 59 309 # Ỷ => Ỷ +1EF7 79 309 # ỷ => ỷ +1EF8 59 303 # Ỹ => Ỹ +1EF9 79 303 # ỹ => ỹ +1F00 3B1 313 # ἀ => ἀ +1F01 3B1 314 # ἁ => ἁ +1F02 3B1 313 300 # ἂ => ἂ +1F03 3B1 314 300 # ἃ => ἃ +1F04 3B1 313 301 # ἄ => ἄ +1F05 3B1 314 301 # ἅ => ἅ +1F06 3B1 313 342 # ἆ => ἆ +1F07 3B1 314 342 # ἇ => ἇ +1F08 391 313 # Ἀ => Ἀ +1F09 391 314 # Ἁ => Ἁ +1F0A 391 313 300 # Ἂ => Ἂ +1F0B 391 314 300 # Ἃ => Ἃ +1F0C 391 313 301 # Ἄ => Ἄ +1F0D 391 314 301 # Ἅ => Ἅ +1F0E 391 313 342 # Ἆ => Ἆ +1F0F 391 314 342 # Ἇ => Ἇ +1F10 3B5 313 # ἐ => ἐ +1F11 3B5 314 # ἑ => ἑ +1F12 3B5 313 300 # ἒ => ἒ +1F13 3B5 314 300 # ἓ => ἓ +1F14 3B5 313 301 # ἔ => ἔ +1F15 3B5 314 301 # ἕ => ἕ +1F18 395 313 # Ἐ => Ἐ +1F19 395 314 # Ἑ => Ἑ +1F1A 395 313 300 # Ἒ => Ἒ +1F1B 395 314 300 # Ἓ => Ἓ +1F1C 395 313 301 # Ἔ => Ἔ +1F1D 395 314 301 # Ἕ => Ἕ +1F20 3B7 313 # ἠ => ἠ +1F21 3B7 314 # ἡ => ἡ +1F22 3B7 313 300 # ἢ => ἢ +1F23 3B7 314 300 # ἣ => ἣ +1F24 3B7 313 301 # ἤ => ἤ +1F25 3B7 314 301 # ἥ => ἥ +1F26 3B7 313 342 # ἦ => ἦ +1F27 3B7 314 342 # ἧ => ἧ +1F28 397 313 # Ἠ => Ἠ +1F29 397 314 # Ἡ => Ἡ +1F2A 397 313 300 # Ἢ => Ἢ +1F2B 397 314 300 # Ἣ => Ἣ +1F2C 397 313 301 # Ἤ => Ἤ +1F2D 397 314 301 # Ἥ => Ἥ +1F2E 397 313 342 # Ἦ => Ἦ +1F2F 397 314 342 # Ἧ => Ἧ +1F30 3B9 313 # ἰ => ἰ +1F31 3B9 314 # ἱ => ἱ +1F32 3B9 313 300 # ἲ => ἲ +1F33 3B9 314 300 # ἳ => ἳ +1F34 3B9 313 301 # ἴ => ἴ +1F35 3B9 314 301 # ἵ => ἵ +1F36 3B9 313 342 # ἶ => ἶ +1F37 3B9 314 342 # ἷ => ἷ +1F38 399 313 # Ἰ => Ἰ +1F39 399 314 # Ἱ => Ἱ +1F3A 399 313 300 # Ἲ => Ἲ +1F3B 399 314 300 # Ἳ => Ἳ +1F3C 399 313 301 # Ἴ => Ἴ +1F3D 399 314 301 # Ἵ => Ἵ +1F3E 399 313 342 # Ἶ => Ἶ +1F3F 399 314 342 # Ἷ => Ἷ +1F40 3BF 313 # ὀ => ὀ +1F41 3BF 314 # ὁ => ὁ +1F42 3BF 313 300 # ὂ => ὂ +1F43 3BF 314 300 # ὃ => ὃ +1F44 3BF 313 301 # ὄ => ὄ +1F45 3BF 314 301 # ὅ => ὅ +1F48 39F 313 # Ὀ => Ὀ +1F49 39F 314 # Ὁ => Ὁ +1F4A 39F 313 300 # Ὂ => Ὂ +1F4B 39F 314 300 # Ὃ => Ὃ +1F4C 39F 313 301 # Ὄ => Ὄ +1F4D 39F 314 301 # Ὅ => Ὅ +1F50 3C5 313 # ὐ => ὐ +1F51 3C5 314 # ὑ => ὑ +1F52 3C5 313 300 # ὒ => ὒ +1F53 3C5 314 300 # ὓ => ὓ +1F54 3C5 313 301 # ὔ => ὔ +1F55 3C5 314 301 # ὕ => ὕ +1F56 3C5 313 342 # ὖ => ὖ +1F57 3C5 314 342 # ὗ => ὗ +1F59 3A5 314 # Ὑ => Ὑ +1F5B 3A5 314 300 # Ὓ => Ὓ +1F5D 3A5 314 301 # Ὕ => Ὕ +1F5F 3A5 314 342 # Ὗ => Ὗ +1F60 3C9 313 # ὠ => ὠ +1F61 3C9 314 # ὡ => ὡ +1F62 3C9 313 300 # ὢ => ὢ +1F63 3C9 314 300 # ὣ => ὣ +1F64 3C9 313 301 # ὤ => ὤ +1F65 3C9 314 301 # ὥ => ὥ +1F66 3C9 313 342 # ὦ => ὦ +1F67 3C9 314 342 # ὧ => ὧ +1F68 3A9 313 # Ὠ => Ὠ +1F69 3A9 314 # Ὡ => Ὡ +1F6A 3A9 313 300 # Ὢ => Ὢ +1F6B 3A9 314 300 # Ὣ => Ὣ +1F6C 3A9 313 301 # Ὤ => Ὤ +1F6D 3A9 314 301 # Ὥ => Ὥ +1F6E 3A9 313 342 # Ὦ => Ὦ +1F6F 3A9 314 342 # Ὧ => Ὧ +1F70 3B1 300 # ὰ => ὰ +1F71 3B1 301 # ά => ά +1F72 3B5 300 # ὲ => ὲ +1F73 3B5 301 # έ => έ +1F74 3B7 300 # ὴ => ὴ +1F75 3B7 301 # ή => ή +1F76 3B9 300 # ὶ => ὶ +1F77 3B9 301 # ί => ί +1F78 3BF 300 # ὸ => ὸ +1F79 3BF 301 # ό => ό +1F7A 3C5 300 # ὺ => ὺ +1F7B 3C5 301 # ύ => ύ +1F7C 3C9 300 # ὼ => ὼ +1F7D 3C9 301 # ώ => ώ +1F80 3B1 313 345 # ᾀ => ᾀ +1F81 3B1 314 345 # ᾁ => ᾁ +1F82 3B1 313 300 345 # ᾂ => ᾂ +1F83 3B1 314 300 345 # ᾃ => ᾃ +1F84 3B1 313 301 345 # ᾄ => ᾄ +1F85 3B1 314 301 345 # ᾅ => ᾅ +1F86 3B1 313 342 345 # ᾆ => ᾆ +1F87 3B1 314 342 345 # ᾇ => ᾇ +1F88 391 313 345 # ᾈ => ᾈ +1F89 391 314 345 # ᾉ => ᾉ +1F8A 391 313 300 345 # ᾊ => ᾊ +1F8B 391 314 300 345 # ᾋ => ᾋ +1F8C 391 313 301 345 # ᾌ => ᾌ +1F8D 391 314 301 345 # ᾍ => ᾍ +1F8E 391 313 342 345 # ᾎ => ᾎ +1F8F 391 314 342 345 # ᾏ => ᾏ +1F90 3B7 313 345 # ᾐ => ᾐ +1F91 3B7 314 345 # ᾑ => ᾑ +1F92 3B7 313 300 345 # ᾒ => ᾒ +1F93 3B7 314 300 345 # ᾓ => ᾓ +1F94 3B7 313 301 345 # ᾔ => ᾔ +1F95 3B7 314 301 345 # ᾕ => ᾕ +1F96 3B7 313 342 345 # ᾖ => ᾖ +1F97 3B7 314 342 345 # ᾗ => ᾗ +1F98 397 313 345 # ᾘ => ᾘ +1F99 397 314 345 # ᾙ => ᾙ +1F9A 397 313 300 345 # ᾚ => ᾚ +1F9B 397 314 300 345 # ᾛ => ᾛ +1F9C 397 313 301 345 # ᾜ => ᾜ +1F9D 397 314 301 345 # ᾝ => ᾝ +1F9E 397 313 342 345 # ᾞ => ᾞ +1F9F 397 314 342 345 # ᾟ => ᾟ +1FA0 3C9 313 345 # ᾠ => ᾠ +1FA1 3C9 314 345 # ᾡ => ᾡ +1FA2 3C9 313 300 345 # ᾢ => ᾢ +1FA3 3C9 314 300 345 # ᾣ => ᾣ +1FA4 3C9 313 301 345 # ᾤ => ᾤ +1FA5 3C9 314 301 345 # ᾥ => ᾥ +1FA6 3C9 313 342 345 # ᾦ => ᾦ +1FA7 3C9 314 342 345 # ᾧ => ᾧ +1FA8 3A9 313 345 # ᾨ => ᾨ +1FA9 3A9 314 345 # ᾩ => ᾩ +1FAA 3A9 313 300 345 # ᾪ => ᾪ +1FAB 3A9 314 300 345 # ᾫ => ᾫ +1FAC 3A9 313 301 345 # ᾬ => ᾬ +1FAD 3A9 314 301 345 # ᾭ => ᾭ +1FAE 3A9 313 342 345 # ᾮ => ᾮ +1FAF 3A9 314 342 345 # ᾯ => ᾯ +1FB0 3B1 306 # ᾰ => ᾰ +1FB1 3B1 304 # ᾱ => ᾱ +1FB2 3B1 300 345 # ᾲ => ᾲ +1FB3 3B1 345 # ᾳ => ᾳ +1FB4 3B1 301 345 # ᾴ => ᾴ +1FB6 3B1 342 # ᾶ => ᾶ +1FB7 3B1 342 345 # ᾷ => ᾷ +1FB8 391 306 # Ᾰ => Ᾰ +1FB9 391 304 # Ᾱ => Ᾱ +1FBA 391 300 # Ὰ => Ὰ +1FBB 391 301 # Ά => Ά +1FBC 391 345 # ᾼ => ᾼ +1FBD 20 313 # ᾽ => ̓ +1FBE 3B9 # ι => ι +1FBF 20 313 # ᾿ => ̓ +1FC0 20 342 # ῀ => ͂ +1FC1 20 308 342 # ῁ => ̈͂ +1FC2 3B7 300 345 # ῂ => ῂ +1FC3 3B7 345 # ῃ => ῃ +1FC4 3B7 301 345 # ῄ => ῄ +1FC6 3B7 342 # ῆ => ῆ +1FC7 3B7 342 345 # ῇ => ῇ +1FC8 395 300 # Ὲ => Ὲ +1FC9 395 301 # Έ => Έ +1FCA 397 300 # Ὴ => Ὴ +1FCB 397 301 # Ή => Ή +1FCC 397 345 # ῌ => ῌ +1FCD 20 313 300 # ῍ => ̓̀ +1FCE 20 313 301 # ῎ => ̓́ +1FCF 20 313 342 # ῏ => ̓͂ +1FD0 3B9 306 # ῐ => ῐ +1FD1 3B9 304 # ῑ => ῑ +1FD2 3B9 308 300 # ῒ => ῒ +1FD3 3B9 308 301 # ΐ => ΐ +1FD6 3B9 342 # ῖ => ῖ +1FD7 3B9 308 342 # ῗ => ῗ +1FD8 399 306 # Ῐ => Ῐ +1FD9 399 304 # Ῑ => Ῑ +1FDA 399 300 # Ὶ => Ὶ +1FDB 399 301 # Ί => Ί +1FDD 20 314 300 # ῝ => ̔̀ +1FDE 20 314 301 # ῞ => ̔́ +1FDF 20 314 342 # ῟ => ̔͂ +1FE0 3C5 306 # ῠ => ῠ +1FE1 3C5 304 # ῡ => ῡ +1FE2 3C5 308 300 # ῢ => ῢ +1FE3 3C5 308 301 # ΰ => ΰ +1FE4 3C1 313 # ῤ => ῤ +1FE5 3C1 314 # ῥ => ῥ +1FE6 3C5 342 # ῦ => ῦ +1FE7 3C5 308 342 # ῧ => ῧ +1FE8 3A5 306 # Ῠ => Ῠ +1FE9 3A5 304 # Ῡ => Ῡ +1FEA 3A5 300 # Ὺ => Ὺ +1FEB 3A5 301 # Ύ => Ύ +1FEC 3A1 314 # Ῥ => Ῥ +1FED 20 308 300 # ῭ => ̈̀ +1FEE 20 308 301 # ΅ => ̈́ +1FEF 60 # ` => ` +1FF2 3C9 300 345 # ῲ => ῲ +1FF3 3C9 345 # ῳ => ῳ +1FF4 3C9 301 345 # ῴ => ῴ +1FF6 3C9 342 # ῶ => ῶ +1FF7 3C9 342 345 # ῷ => ῷ +1FF8 39F 300 # Ὸ => Ὸ +1FF9 39F 301 # Ό => Ό +1FFA 3A9 300 # Ὼ => Ὼ +1FFB 3A9 301 # Ώ => Ώ +1FFC 3A9 345 # ῼ => ῼ +1FFD 20 301 # ´ => ́ +1FFE 20 314 # ῾ => ̔ +2000 20 #   => +2001 20 #   => +2002 20 #   => +2003 20 #   => +2004 20 #   => +2005 20 #   => +2006 20 #   => +2007 20 #   => +2008 20 #   => +2009 20 #   => +200A 20 #   => +2011 2010 # ‑ => ‐ +2017 20 333 # ‗ => ̳ +2024 2E # ․ => . +2025 2E 2E # ‥ => .. +2026 2E 2E 2E # … => ... +202F 20 #   => +2033 2032 2032 # ″ => ′′ +2034 2032 2032 2032 # ‴ => ′′′ +2036 2035 2035 # ‶ => ‵‵ +2037 2035 2035 2035 # ‷ => ‵‵‵ +203C 21 21 # ‼ => !! +203E 20 305 # ‾ => ̅ +2047 3F 3F # ⁇ => ?? +2048 3F 21 # ⁈ => ?! +2049 21 3F # ⁉ => !? +2057 2032 2032 2032 2032 # ⁗ => ′′′′ +205F 20 #   => +2070 30 # ⁰ => 0 +2071 69 # ⁱ => i +2074 34 # ⁴ => 4 +2075 35 # ⁵ => 5 +2076 36 # ⁶ => 6 +2077 37 # ⁷ => 7 +2078 38 # ⁸ => 8 +2079 39 # ⁹ => 9 +207A 2B # ⁺ => + +207B 2212 # ⁻ => − +207C 3D # ⁼ => = +207D 28 # ⁽ => ( +207E 29 # ⁾ => ) +207F 6E # ⁿ => n +2080 30 # ₀ => 0 +2081 31 # ₁ => 1 +2082 32 # ₂ => 2 +2083 33 # ₃ => 3 +2084 34 # ₄ => 4 +2085 35 # ₅ => 5 +2086 36 # ₆ => 6 +2087 37 # ₇ => 7 +2088 38 # ₈ => 8 +2089 39 # ₉ => 9 +208A 2B # ₊ => + +208B 2212 # ₋ => − +208C 3D # ₌ => = +208D 28 # ₍ => ( +208E 29 # ₎ => ) +2090 61 # ₐ => a +2091 65 # ₑ => e +2092 6F # ₒ => o +2093 78 # ₓ => x +2094 259 # ₔ => ə +2095 68 # ₕ => h +2096 6B # ₖ => k +2097 6C # ₗ => l +2098 6D # ₘ => m +2099 6E # ₙ => n +209A 70 # ₚ => p +209B 73 # ₛ => s +209C 74 # ₜ => t +20A8 52 73 # ₨ => Rs +2100 61 2F 63 # ℀ => a/c +2101 61 2F 73 # ℁ => a/s +2102 43 # ℂ => C +2103 B0 43 # ℃ => °C +2105 63 2F 6F # ℅ => c/o +2106 63 2F 75 # ℆ => c/u +2107 190 # ℇ => Ɛ +2109 B0 46 # ℉ => °F +210A 67 # ℊ => g +210B 48 # ℋ => H +210C 48 # ℌ => H +210D 48 # ℍ => H +210E 68 # ℎ => h +210F 127 # ℏ => ħ +2110 49 # ℐ => I +2111 49 # ℑ => I +2112 4C # ℒ => L +2113 6C # ℓ => l +2115 4E # ℕ => N +2116 4E 6F # № => No +2119 50 # ℙ => P +211A 51 # ℚ => Q +211B 52 # ℛ => R +211C 52 # ℜ => R +211D 52 # ℝ => R +2120 53 4D # ℠ => SM +2121 54 45 4C # ℡ => TEL +2122 54 4D # ™ => TM +2124 5A # ℤ => Z +2126 3A9 # Ω => Ω +2128 5A # ℨ => Z +212A 4B # K => K +212B 41 30A # Å => Å +212C 42 # ℬ => B +212D 43 # ℭ => C +212F 65 # ℯ => e +2130 45 # ℰ => E +2131 46 # ℱ => F +2133 4D # ℳ => M +2134 6F # ℴ => o +2135 5D0 # ℵ => א +2136 5D1 # ℶ => ב +2137 5D2 # ℷ => ג +2138 5D3 # ℸ => ד +2139 69 # ℹ => i +213B 46 41 58 # ℻ => FAX +213C 3C0 # ℼ => π +213D 3B3 # ℽ => γ +213E 393 # ℾ => Γ +213F 3A0 # ℿ => Π +2140 2211 # ⅀ => ∑ +2145 44 # ⅅ => D +2146 64 # ⅆ => d +2147 65 # ⅇ => e +2148 69 # ⅈ => i +2149 6A # ⅉ => j +2150 31 2044 37 # ⅐ => 1⁄7 +2151 31 2044 39 # ⅑ => 1⁄9 +2152 31 2044 31 30 # ⅒ => 1⁄10 +2153 31 2044 33 # ⅓ => 1⁄3 +2154 32 2044 33 # ⅔ => 2⁄3 +2155 31 2044 35 # ⅕ => 1⁄5 +2156 32 2044 35 # ⅖ => 2⁄5 +2157 33 2044 35 # ⅗ => 3⁄5 +2158 34 2044 35 # ⅘ => 4⁄5 +2159 31 2044 36 # ⅙ => 1⁄6 +215A 35 2044 36 # ⅚ => 5⁄6 +215B 31 2044 38 # ⅛ => 1⁄8 +215C 33 2044 38 # ⅜ => 3⁄8 +215D 35 2044 38 # ⅝ => 5⁄8 +215E 37 2044 38 # ⅞ => 7⁄8 +215F 31 2044 # ⅟ => 1⁄ +2160 49 # Ⅰ => I +2161 49 49 # Ⅱ => II +2162 49 49 49 # Ⅲ => III +2163 49 56 # Ⅳ => IV +2164 56 # Ⅴ => V +2165 56 49 # Ⅵ => VI +2166 56 49 49 # Ⅶ => VII +2167 56 49 49 49 # Ⅷ => VIII +2168 49 58 # Ⅸ => IX +2169 58 # Ⅹ => X +216A 58 49 # Ⅺ => XI +216B 58 49 49 # Ⅻ => XII +216C 4C # Ⅼ => L +216D 43 # Ⅽ => C +216E 44 # Ⅾ => D +216F 4D # Ⅿ => M +2170 69 # ⅰ => i +2171 69 69 # ⅱ => ii +2172 69 69 69 # ⅲ => iii +2173 69 76 # ⅳ => iv +2174 76 # ⅴ => v +2175 76 69 # ⅵ => vi +2176 76 69 69 # ⅶ => vii +2177 76 69 69 69 # ⅷ => viii +2178 69 78 # ⅸ => ix +2179 78 # ⅹ => x +217A 78 69 # ⅺ => xi +217B 78 69 69 # ⅻ => xii +217C 6C # ⅼ => l +217D 63 # ⅽ => c +217E 64 # ⅾ => d +217F 6D # ⅿ => m +2189 30 2044 33 # ↉ => 0⁄3 +219A 2190 338 # ↚ => ↚ +219B 2192 338 # ↛ => ↛ +21AE 2194 338 # ↮ => ↮ +21CD 21D0 338 # ⇍ => ⇍ +21CE 21D4 338 # ⇎ => ⇎ +21CF 21D2 338 # ⇏ => ⇏ +2204 2203 338 # ∄ => ∄ +2209 2208 338 # ∉ => ∉ +220C 220B 338 # ∌ => ∌ +2224 2223 338 # ∤ => ∤ +2226 2225 338 # ∦ => ∦ +222C 222B 222B # ∬ => ∫∫ +222D 222B 222B 222B # ∭ => ∫∫∫ +222F 222E 222E # ∯ => ∮∮ +2230 222E 222E 222E # ∰ => ∮∮∮ +2241 223C 338 # ≁ => ≁ +2244 2243 338 # ≄ => ≄ +2247 2245 338 # ≇ => ≇ +2249 2248 338 # ≉ => ≉ +2260 3D 338 # ≠ => ≠ +2262 2261 338 # ≢ => ≢ +226D 224D 338 # ≭ => ≭ +226E 3C 338 # ≮ => ≮ +226F 3E 338 # ≯ => ≯ +2270 2264 338 # ≰ => ≰ +2271 2265 338 # ≱ => ≱ +2274 2272 338 # ≴ => ≴ +2275 2273 338 # ≵ => ≵ +2278 2276 338 # ≸ => ≸ +2279 2277 338 # ≹ => ≹ +2280 227A 338 # ⊀ => ⊀ +2281 227B 338 # ⊁ => ⊁ +2284 2282 338 # ⊄ => ⊄ +2285 2283 338 # ⊅ => ⊅ +2288 2286 338 # ⊈ => ⊈ +2289 2287 338 # ⊉ => ⊉ +22AC 22A2 338 # ⊬ => ⊬ +22AD 22A8 338 # ⊭ => ⊭ +22AE 22A9 338 # ⊮ => ⊮ +22AF 22AB 338 # ⊯ => ⊯ +22E0 227C 338 # ⋠ => ⋠ +22E1 227D 338 # ⋡ => ⋡ +22E2 2291 338 # ⋢ => ⋢ +22E3 2292 338 # ⋣ => ⋣ +22EA 22B2 338 # ⋪ => ⋪ +22EB 22B3 338 # ⋫ => ⋫ +22EC 22B4 338 # ⋬ => ⋬ +22ED 22B5 338 # ⋭ => ⋭ +2329 3008 # 〈 => 〈 +232A 3009 # 〉 => 〉 +2460 31 # ① => 1 +2461 32 # ② => 2 +2462 33 # ③ => 3 +2463 34 # ④ => 4 +2464 35 # ⑤ => 5 +2465 36 # ⑥ => 6 +2466 37 # ⑦ => 7 +2467 38 # ⑧ => 8 +2468 39 # ⑨ => 9 +2469 31 30 # ⑩ => 10 +246A 31 31 # ⑪ => 11 +246B 31 32 # ⑫ => 12 +246C 31 33 # ⑬ => 13 +246D 31 34 # ⑭ => 14 +246E 31 35 # ⑮ => 15 +246F 31 36 # ⑯ => 16 +2470 31 37 # ⑰ => 17 +2471 31 38 # ⑱ => 18 +2472 31 39 # ⑲ => 19 +2473 32 30 # ⑳ => 20 +2474 28 31 29 # ⑴ => (1) +2475 28 32 29 # ⑵ => (2) +2476 28 33 29 # ⑶ => (3) +2477 28 34 29 # ⑷ => (4) +2478 28 35 29 # ⑸ => (5) +2479 28 36 29 # ⑹ => (6) +247A 28 37 29 # ⑺ => (7) +247B 28 38 29 # ⑻ => (8) +247C 28 39 29 # ⑼ => (9) +247D 28 31 30 29 # ⑽ => (10) +247E 28 31 31 29 # ⑾ => (11) +247F 28 31 32 29 # ⑿ => (12) +2480 28 31 33 29 # ⒀ => (13) +2481 28 31 34 29 # ⒁ => (14) +2482 28 31 35 29 # ⒂ => (15) +2483 28 31 36 29 # ⒃ => (16) +2484 28 31 37 29 # ⒄ => (17) +2485 28 31 38 29 # ⒅ => (18) +2486 28 31 39 29 # ⒆ => (19) +2487 28 32 30 29 # ⒇ => (20) +2488 31 2E # ⒈ => 1. +2489 32 2E # ⒉ => 2. +248A 33 2E # ⒊ => 3. +248B 34 2E # ⒋ => 4. +248C 35 2E # ⒌ => 5. +248D 36 2E # ⒍ => 6. +248E 37 2E # ⒎ => 7. +248F 38 2E # ⒏ => 8. +2490 39 2E # ⒐ => 9. +2491 31 30 2E # ⒑ => 10. +2492 31 31 2E # ⒒ => 11. +2493 31 32 2E # ⒓ => 12. +2494 31 33 2E # ⒔ => 13. +2495 31 34 2E # ⒕ => 14. +2496 31 35 2E # ⒖ => 15. +2497 31 36 2E # ⒗ => 16. +2498 31 37 2E # ⒘ => 17. +2499 31 38 2E # ⒙ => 18. +249A 31 39 2E # ⒚ => 19. +249B 32 30 2E # ⒛ => 20. +249C 28 61 29 # ⒜ => (a) +249D 28 62 29 # ⒝ => (b) +249E 28 63 29 # ⒞ => (c) +249F 28 64 29 # ⒟ => (d) +24A0 28 65 29 # ⒠ => (e) +24A1 28 66 29 # ⒡ => (f) +24A2 28 67 29 # ⒢ => (g) +24A3 28 68 29 # ⒣ => (h) +24A4 28 69 29 # ⒤ => (i) +24A5 28 6A 29 # ⒥ => (j) +24A6 28 6B 29 # ⒦ => (k) +24A7 28 6C 29 # ⒧ => (l) +24A8 28 6D 29 # ⒨ => (m) +24A9 28 6E 29 # ⒩ => (n) +24AA 28 6F 29 # ⒪ => (o) +24AB 28 70 29 # ⒫ => (p) +24AC 28 71 29 # ⒬ => (q) +24AD 28 72 29 # ⒭ => (r) +24AE 28 73 29 # ⒮ => (s) +24AF 28 74 29 # ⒯ => (t) +24B0 28 75 29 # ⒰ => (u) +24B1 28 76 29 # ⒱ => (v) +24B2 28 77 29 # ⒲ => (w) +24B3 28 78 29 # ⒳ => (x) +24B4 28 79 29 # ⒴ => (y) +24B5 28 7A 29 # ⒵ => (z) +24B6 41 # Ⓐ => A +24B7 42 # Ⓑ => B +24B8 43 # Ⓒ => C +24B9 44 # Ⓓ => D +24BA 45 # Ⓔ => E +24BB 46 # Ⓕ => F +24BC 47 # Ⓖ => G +24BD 48 # Ⓗ => H +24BE 49 # Ⓘ => I +24BF 4A # Ⓙ => J +24C0 4B # Ⓚ => K +24C1 4C # Ⓛ => L +24C2 4D # Ⓜ => M +24C3 4E # Ⓝ => N +24C4 4F # Ⓞ => O +24C5 50 # Ⓟ => P +24C6 51 # Ⓠ => Q +24C7 52 # Ⓡ => R +24C8 53 # Ⓢ => S +24C9 54 # Ⓣ => T +24CA 55 # Ⓤ => U +24CB 56 # Ⓥ => V +24CC 57 # Ⓦ => W +24CD 58 # Ⓧ => X +24CE 59 # Ⓨ => Y +24CF 5A # Ⓩ => Z +24D0 61 # ⓐ => a +24D1 62 # ⓑ => b +24D2 63 # ⓒ => c +24D3 64 # ⓓ => d +24D4 65 # ⓔ => e +24D5 66 # ⓕ => f +24D6 67 # ⓖ => g +24D7 68 # ⓗ => h +24D8 69 # ⓘ => i +24D9 6A # ⓙ => j +24DA 6B # ⓚ => k +24DB 6C # ⓛ => l +24DC 6D # ⓜ => m +24DD 6E # ⓝ => n +24DE 6F # ⓞ => o +24DF 70 # ⓟ => p +24E0 71 # ⓠ => q +24E1 72 # ⓡ => r +24E2 73 # ⓢ => s +24E3 74 # ⓣ => t +24E4 75 # ⓤ => u +24E5 76 # ⓥ => v +24E6 77 # ⓦ => w +24E7 78 # ⓧ => x +24E8 79 # ⓨ => y +24E9 7A # ⓩ => z +24EA 30 # ⓪ => 0 +2A0C 222B 222B 222B 222B # ⨌ => ∫∫∫∫ +2A74 3A 3A 3D # ⩴ => ::= +2A75 3D 3D # ⩵ => == +2A76 3D 3D 3D # ⩶ => === +2ADC 2ADD 338 # ⫝̸ => ⫝̸ +2C7C 6A # ⱼ => j +2C7D 56 # ⱽ => V +2D6F 2D61 # ⵯ => ⵡ +2E9F 6BCD # ⺟ => 母 +2EF3 9F9F # ⻳ => 龟 +2F00 4E00 # ⼀ => 一 +2F01 4E28 # ⼁ => 丨 +2F02 4E36 # ⼂ => 丶 +2F03 4E3F # ⼃ => 丿 +2F04 4E59 # ⼄ => 乙 +2F05 4E85 # ⼅ => 亅 +2F06 4E8C # ⼆ => 二 +2F07 4EA0 # ⼇ => 亠 +2F08 4EBA # ⼈ => 人 +2F09 513F # ⼉ => 儿 +2F0A 5165 # ⼊ => 入 +2F0B 516B # ⼋ => 八 +2F0C 5182 # ⼌ => 冂 +2F0D 5196 # ⼍ => 冖 +2F0E 51AB # ⼎ => 冫 +2F0F 51E0 # ⼏ => 几 +2F10 51F5 # ⼐ => 凵 +2F11 5200 # ⼑ => 刀 +2F12 529B # ⼒ => 力 +2F13 52F9 # ⼓ => 勹 +2F14 5315 # ⼔ => 匕 +2F15 531A # ⼕ => 匚 +2F16 5338 # ⼖ => 匸 +2F17 5341 # ⼗ => 十 +2F18 535C # ⼘ => 卜 +2F19 5369 # ⼙ => 卩 +2F1A 5382 # ⼚ => 厂 +2F1B 53B6 # ⼛ => 厶 +2F1C 53C8 # ⼜ => 又 +2F1D 53E3 # ⼝ => 口 +2F1E 56D7 # ⼞ => 囗 +2F1F 571F # ⼟ => 土 +2F20 58EB # ⼠ => 士 +2F21 5902 # ⼡ => 夂 +2F22 590A # ⼢ => 夊 +2F23 5915 # ⼣ => 夕 +2F24 5927 # ⼤ => 大 +2F25 5973 # ⼥ => 女 +2F26 5B50 # ⼦ => 子 +2F27 5B80 # ⼧ => 宀 +2F28 5BF8 # ⼨ => 寸 +2F29 5C0F # ⼩ => 小 +2F2A 5C22 # ⼪ => 尢 +2F2B 5C38 # ⼫ => 尸 +2F2C 5C6E # ⼬ => 屮 +2F2D 5C71 # ⼭ => 山 +2F2E 5DDB # ⼮ => 巛 +2F2F 5DE5 # ⼯ => 工 +2F30 5DF1 # ⼰ => 己 +2F31 5DFE # ⼱ => 巾 +2F32 5E72 # ⼲ => 干 +2F33 5E7A # ⼳ => 幺 +2F34 5E7F # ⼴ => 广 +2F35 5EF4 # ⼵ => 廴 +2F36 5EFE # ⼶ => 廾 +2F37 5F0B # ⼷ => 弋 +2F38 5F13 # ⼸ => 弓 +2F39 5F50 # ⼹ => 彐 +2F3A 5F61 # ⼺ => 彡 +2F3B 5F73 # ⼻ => 彳 +2F3C 5FC3 # ⼼ => 心 +2F3D 6208 # ⼽ => 戈 +2F3E 6236 # ⼾ => 戶 +2F3F 624B # ⼿ => 手 +2F40 652F # ⽀ => 支 +2F41 6534 # ⽁ => 攴 +2F42 6587 # ⽂ => 文 +2F43 6597 # ⽃ => 斗 +2F44 65A4 # ⽄ => 斤 +2F45 65B9 # ⽅ => 方 +2F46 65E0 # ⽆ => 无 +2F47 65E5 # ⽇ => 日 +2F48 66F0 # ⽈ => 曰 +2F49 6708 # ⽉ => 月 +2F4A 6728 # ⽊ => 木 +2F4B 6B20 # ⽋ => 欠 +2F4C 6B62 # ⽌ => 止 +2F4D 6B79 # ⽍ => 歹 +2F4E 6BB3 # ⽎ => 殳 +2F4F 6BCB # ⽏ => 毋 +2F50 6BD4 # ⽐ => 比 +2F51 6BDB # ⽑ => 毛 +2F52 6C0F # ⽒ => 氏 +2F53 6C14 # ⽓ => 气 +2F54 6C34 # ⽔ => 水 +2F55 706B # ⽕ => 火 +2F56 722A # ⽖ => 爪 +2F57 7236 # ⽗ => 父 +2F58 723B # ⽘ => 爻 +2F59 723F # ⽙ => 爿 +2F5A 7247 # ⽚ => 片 +2F5B 7259 # ⽛ => 牙 +2F5C 725B # ⽜ => 牛 +2F5D 72AC # ⽝ => 犬 +2F5E 7384 # ⽞ => 玄 +2F5F 7389 # ⽟ => 玉 +2F60 74DC # ⽠ => 瓜 +2F61 74E6 # ⽡ => 瓦 +2F62 7518 # ⽢ => 甘 +2F63 751F # ⽣ => 生 +2F64 7528 # ⽤ => 用 +2F65 7530 # ⽥ => 田 +2F66 758B # ⽦ => 疋 +2F67 7592 # ⽧ => 疒 +2F68 7676 # ⽨ => 癶 +2F69 767D # ⽩ => 白 +2F6A 76AE # ⽪ => 皮 +2F6B 76BF # ⽫ => 皿 +2F6C 76EE # ⽬ => 目 +2F6D 77DB # ⽭ => 矛 +2F6E 77E2 # ⽮ => 矢 +2F6F 77F3 # ⽯ => 石 +2F70 793A # ⽰ => 示 +2F71 79B8 # ⽱ => 禸 +2F72 79BE # ⽲ => 禾 +2F73 7A74 # ⽳ => 穴 +2F74 7ACB # ⽴ => 立 +2F75 7AF9 # ⽵ => 竹 +2F76 7C73 # ⽶ => 米 +2F77 7CF8 # ⽷ => 糸 +2F78 7F36 # ⽸ => 缶 +2F79 7F51 # ⽹ => 网 +2F7A 7F8A # ⽺ => 羊 +2F7B 7FBD # ⽻ => 羽 +2F7C 8001 # ⽼ => 老 +2F7D 800C # ⽽ => 而 +2F7E 8012 # ⽾ => 耒 +2F7F 8033 # ⽿ => 耳 +2F80 807F # ⾀ => 聿 +2F81 8089 # ⾁ => 肉 +2F82 81E3 # ⾂ => 臣 +2F83 81EA # ⾃ => 自 +2F84 81F3 # ⾄ => 至 +2F85 81FC # ⾅ => 臼 +2F86 820C # ⾆ => 舌 +2F87 821B # ⾇ => 舛 +2F88 821F # ⾈ => 舟 +2F89 826E # ⾉ => 艮 +2F8A 8272 # ⾊ => 色 +2F8B 8278 # ⾋ => 艸 +2F8C 864D # ⾌ => 虍 +2F8D 866B # ⾍ => 虫 +2F8E 8840 # ⾎ => 血 +2F8F 884C # ⾏ => 行 +2F90 8863 # ⾐ => 衣 +2F91 897E # ⾑ => 襾 +2F92 898B # ⾒ => 見 +2F93 89D2 # ⾓ => 角 +2F94 8A00 # ⾔ => 言 +2F95 8C37 # ⾕ => 谷 +2F96 8C46 # ⾖ => 豆 +2F97 8C55 # ⾗ => 豕 +2F98 8C78 # ⾘ => 豸 +2F99 8C9D # ⾙ => 貝 +2F9A 8D64 # ⾚ => 赤 +2F9B 8D70 # ⾛ => 走 +2F9C 8DB3 # ⾜ => 足 +2F9D 8EAB # ⾝ => 身 +2F9E 8ECA # ⾞ => 車 +2F9F 8F9B # ⾟ => 辛 +2FA0 8FB0 # ⾠ => 辰 +2FA1 8FB5 # ⾡ => 辵 +2FA2 9091 # ⾢ => 邑 +2FA3 9149 # ⾣ => 酉 +2FA4 91C6 # ⾤ => 釆 +2FA5 91CC # ⾥ => 里 +2FA6 91D1 # ⾦ => 金 +2FA7 9577 # ⾧ => 長 +2FA8 9580 # ⾨ => 門 +2FA9 961C # ⾩ => 阜 +2FAA 96B6 # ⾪ => 隶 +2FAB 96B9 # ⾫ => 隹 +2FAC 96E8 # ⾬ => 雨 +2FAD 9751 # ⾭ => 靑 +2FAE 975E # ⾮ => 非 +2FAF 9762 # ⾯ => 面 +2FB0 9769 # ⾰ => 革 +2FB1 97CB # ⾱ => 韋 +2FB2 97ED # ⾲ => 韭 +2FB3 97F3 # ⾳ => 音 +2FB4 9801 # ⾴ => 頁 +2FB5 98A8 # ⾵ => 風 +2FB6 98DB # ⾶ => 飛 +2FB7 98DF # ⾷ => 食 +2FB8 9996 # ⾸ => 首 +2FB9 9999 # ⾹ => 香 +2FBA 99AC # ⾺ => 馬 +2FBB 9AA8 # ⾻ => 骨 +2FBC 9AD8 # ⾼ => 高 +2FBD 9ADF # ⾽ => 髟 +2FBE 9B25 # ⾾ => 鬥 +2FBF 9B2F # ⾿ => 鬯 +2FC0 9B32 # ⿀ => 鬲 +2FC1 9B3C # ⿁ => 鬼 +2FC2 9B5A # ⿂ => 魚 +2FC3 9CE5 # ⿃ => 鳥 +2FC4 9E75 # ⿄ => 鹵 +2FC5 9E7F # ⿅ => 鹿 +2FC6 9EA5 # ⿆ => 麥 +2FC7 9EBB # ⿇ => 麻 +2FC8 9EC3 # ⿈ => 黃 +2FC9 9ECD # ⿉ => 黍 +2FCA 9ED1 # ⿊ => 黑 +2FCB 9EF9 # ⿋ => 黹 +2FCC 9EFD # ⿌ => 黽 +2FCD 9F0E # ⿍ => 鼎 +2FCE 9F13 # ⿎ => 鼓 +2FCF 9F20 # ⿏ => 鼠 +2FD0 9F3B # ⿐ => 鼻 +2FD1 9F4A # ⿑ => 齊 +2FD2 9F52 # ⿒ => 齒 +2FD3 9F8D # ⿓ => 龍 +2FD4 9F9C # ⿔ => 龜 +2FD5 9FA0 # ⿕ => 龠 +3000 20 #   => +3036 3012 # 〶 => 〒 +3038 5341 # 〸 => 十 +3039 5344 # 〹 => 卄 +303A 5345 # 〺 => 卅 +304C 304B 3099 # が => が +304E 304D 3099 # ぎ => ぎ +3050 304F 3099 # ぐ => ぐ +3052 3051 3099 # げ => げ +3054 3053 3099 # ご => ご +3056 3055 3099 # ざ => ざ +3058 3057 3099 # じ => じ +305A 3059 3099 # ず => ず +305C 305B 3099 # ぜ => ぜ +305E 305D 3099 # ぞ => ぞ +3060 305F 3099 # だ => だ +3062 3061 3099 # ぢ => ぢ +3065 3064 3099 # づ => づ +3067 3066 3099 # で => で +3069 3068 3099 # ど => ど +3070 306F 3099 # ば => ば +3071 306F 309A # ぱ => ぱ +3073 3072 3099 # び => び +3074 3072 309A # ぴ => ぴ +3076 3075 3099 # ぶ => ぶ +3077 3075 309A # ぷ => ぷ +3079 3078 3099 # べ => べ +307A 3078 309A # ぺ => ぺ +307C 307B 3099 # ぼ => ぼ +307D 307B 309A # ぽ => ぽ +3094 3046 3099 # ゔ => ゔ +309B 20 3099 # ゛ => ゙ +309C 20 309A # ゜ => ゚ +309E 309D 3099 # ゞ => ゞ +309F 3088 308A # ゟ => より +30AC 30AB 3099 # ガ => ガ +30AE 30AD 3099 # ギ => ギ +30B0 30AF 3099 # グ => グ +30B2 30B1 3099 # ゲ => ゲ +30B4 30B3 3099 # ゴ => ゴ +30B6 30B5 3099 # ザ => ザ +30B8 30B7 3099 # ジ => ジ +30BA 30B9 3099 # ズ => ズ +30BC 30BB 3099 # ゼ => ゼ +30BE 30BD 3099 # ゾ => ゾ +30C0 30BF 3099 # ダ => ダ +30C2 30C1 3099 # ヂ => ヂ +30C5 30C4 3099 # ヅ => ヅ +30C7 30C6 3099 # デ => デ +30C9 30C8 3099 # ド => ド +30D0 30CF 3099 # バ => バ +30D1 30CF 309A # パ => パ +30D3 30D2 3099 # ビ => ビ +30D4 30D2 309A # ピ => ピ +30D6 30D5 3099 # ブ => ブ +30D7 30D5 309A # プ => プ +30D9 30D8 3099 # ベ => ベ +30DA 30D8 309A # ペ => ペ +30DC 30DB 3099 # ボ => ボ +30DD 30DB 309A # ポ => ポ +30F4 30A6 3099 # ヴ => ヴ +30F7 30EF 3099 # ヷ => ヷ +30F8 30F0 3099 # ヸ => ヸ +30F9 30F1 3099 # ヹ => ヹ +30FA 30F2 3099 # ヺ => ヺ +30FE 30FD 3099 # ヾ => ヾ +30FF 30B3 30C8 # ヿ => コト +3131 1100 # ㄱ => ᄀ +3132 1101 # ㄲ => ᄁ +3133 11AA # ㄳ => ᆪ +3134 1102 # ㄴ => ᄂ +3135 11AC # ㄵ => ᆬ +3136 11AD # ㄶ => ᆭ +3137 1103 # ㄷ => ᄃ +3138 1104 # ㄸ => ᄄ +3139 1105 # ㄹ => ᄅ +313A 11B0 # ㄺ => ᆰ +313B 11B1 # ㄻ => ᆱ +313C 11B2 # ㄼ => ᆲ +313D 11B3 # ㄽ => ᆳ +313E 11B4 # ㄾ => ᆴ +313F 11B5 # ㄿ => ᆵ +3140 111A # ㅀ => ᄚ +3141 1106 # ㅁ => ᄆ +3142 1107 # ㅂ => ᄇ +3143 1108 # ㅃ => ᄈ +3144 1121 # ㅄ => ᄡ +3145 1109 # ㅅ => ᄉ +3146 110A # ㅆ => ᄊ +3147 110B # ㅇ => ᄋ +3148 110C # ㅈ => ᄌ +3149 110D # ㅉ => ᄍ +314A 110E # ㅊ => ᄎ +314B 110F # ㅋ => ᄏ +314C 1110 # ㅌ => ᄐ +314D 1111 # ㅍ => ᄑ +314E 1112 # ㅎ => ᄒ +314F 1161 # ㅏ => ᅡ +3150 1162 # ㅐ => ᅢ +3151 1163 # ㅑ => ᅣ +3152 1164 # ㅒ => ᅤ +3153 1165 # ㅓ => ᅥ +3154 1166 # ㅔ => ᅦ +3155 1167 # ㅕ => ᅧ +3156 1168 # ㅖ => ᅨ +3157 1169 # ㅗ => ᅩ +3158 116A # ㅘ => ᅪ +3159 116B # ㅙ => ᅫ +315A 116C # ㅚ => ᅬ +315B 116D # ㅛ => ᅭ +315C 116E # ㅜ => ᅮ +315D 116F # ㅝ => ᅯ +315E 1170 # ㅞ => ᅰ +315F 1171 # ㅟ => ᅱ +3160 1172 # ㅠ => ᅲ +3161 1173 # ㅡ => ᅳ +3162 1174 # ㅢ => ᅴ +3163 1175 # ㅣ => ᅵ +3164 1160 # ㅤ => ᅠ +3165 1114 # ㅥ => ᄔ +3166 1115 # ㅦ => ᄕ +3167 11C7 # ㅧ => ᇇ +3168 11C8 # ㅨ => ᇈ +3169 11CC # ㅩ => ᇌ +316A 11CE # ㅪ => ᇎ +316B 11D3 # ㅫ => ᇓ +316C 11D7 # ㅬ => ᇗ +316D 11D9 # ㅭ => ᇙ +316E 111C # ㅮ => ᄜ +316F 11DD # ㅯ => ᇝ +3170 11DF # ㅰ => ᇟ +3171 111D # ㅱ => ᄝ +3172 111E # ㅲ => ᄞ +3173 1120 # ㅳ => ᄠ +3174 1122 # ㅴ => ᄢ +3175 1123 # ㅵ => ᄣ +3176 1127 # ㅶ => ᄧ +3177 1129 # ㅷ => ᄩ +3178 112B # ㅸ => ᄫ +3179 112C # ㅹ => ᄬ +317A 112D # ㅺ => ᄭ +317B 112E # ㅻ => ᄮ +317C 112F # ㅼ => ᄯ +317D 1132 # ㅽ => ᄲ +317E 1136 # ㅾ => ᄶ +317F 1140 # ㅿ => ᅀ +3180 1147 # ㆀ => ᅇ +3181 114C # ㆁ => ᅌ +3182 11F1 # ㆂ => ᇱ +3183 11F2 # ㆃ => ᇲ +3184 1157 # ㆄ => ᅗ +3185 1158 # ㆅ => ᅘ +3186 1159 # ㆆ => ᅙ +3187 1184 # ㆇ => ᆄ +3188 1185 # ㆈ => ᆅ +3189 1188 # ㆉ => ᆈ +318A 1191 # ㆊ => ᆑ +318B 1192 # ㆋ => ᆒ +318C 1194 # ㆌ => ᆔ +318D 119E # ㆍ => ᆞ +318E 11A1 # ㆎ => ᆡ +3192 4E00 # ㆒ => 一 +3193 4E8C # ㆓ => 二 +3194 4E09 # ㆔ => 三 +3195 56DB # ㆕ => 四 +3196 4E0A # ㆖ => 上 +3197 4E2D # ㆗ => 中 +3198 4E0B # ㆘ => 下 +3199 7532 # ㆙ => 甲 +319A 4E59 # ㆚ => 乙 +319B 4E19 # ㆛ => 丙 +319C 4E01 # ㆜ => 丁 +319D 5929 # ㆝ => 天 +319E 5730 # ㆞ => 地 +319F 4EBA # ㆟ => 人 +3200 28 1100 29 # ㈀ => (ᄀ) +3201 28 1102 29 # ㈁ => (ᄂ) +3202 28 1103 29 # ㈂ => (ᄃ) +3203 28 1105 29 # ㈃ => (ᄅ) +3204 28 1106 29 # ㈄ => (ᄆ) +3205 28 1107 29 # ㈅ => (ᄇ) +3206 28 1109 29 # ㈆ => (ᄉ) +3207 28 110B 29 # ㈇ => (ᄋ) +3208 28 110C 29 # ㈈ => (ᄌ) +3209 28 110E 29 # ㈉ => (ᄎ) +320A 28 110F 29 # ㈊ => (ᄏ) +320B 28 1110 29 # ㈋ => (ᄐ) +320C 28 1111 29 # ㈌ => (ᄑ) +320D 28 1112 29 # ㈍ => (ᄒ) +320E 28 1100 1161 29 # ㈎ => (가) +320F 28 1102 1161 29 # ㈏ => (나) +3210 28 1103 1161 29 # ㈐ => (다) +3211 28 1105 1161 29 # ㈑ => (라) +3212 28 1106 1161 29 # ㈒ => (마) +3213 28 1107 1161 29 # ㈓ => (바) +3214 28 1109 1161 29 # ㈔ => (사) +3215 28 110B 1161 29 # ㈕ => (아) +3216 28 110C 1161 29 # ㈖ => (자) +3217 28 110E 1161 29 # ㈗ => (차) +3218 28 110F 1161 29 # ㈘ => (카) +3219 28 1110 1161 29 # ㈙ => (타) +321A 28 1111 1161 29 # ㈚ => (파) +321B 28 1112 1161 29 # ㈛ => (하) +321C 28 110C 116E 29 # ㈜ => (주) +321D 28 110B 1169 110C 1165 11AB 29 # ㈝ => (오전) +321E 28 110B 1169 1112 116E 29 # ㈞ => (오후) +3220 28 4E00 29 # ㈠ => (一) +3221 28 4E8C 29 # ㈡ => (二) +3222 28 4E09 29 # ㈢ => (三) +3223 28 56DB 29 # ㈣ => (四) +3224 28 4E94 29 # ㈤ => (五) +3225 28 516D 29 # ㈥ => (六) +3226 28 4E03 29 # ㈦ => (七) +3227 28 516B 29 # ㈧ => (八) +3228 28 4E5D 29 # ㈨ => (九) +3229 28 5341 29 # ㈩ => (十) +322A 28 6708 29 # ㈪ => (月) +322B 28 706B 29 # ㈫ => (火) +322C 28 6C34 29 # ㈬ => (水) +322D 28 6728 29 # ㈭ => (木) +322E 28 91D1 29 # ㈮ => (金) +322F 28 571F 29 # ㈯ => (土) +3230 28 65E5 29 # ㈰ => (日) +3231 28 682A 29 # ㈱ => (株) +3232 28 6709 29 # ㈲ => (有) +3233 28 793E 29 # ㈳ => (社) +3234 28 540D 29 # ㈴ => (名) +3235 28 7279 29 # ㈵ => (特) +3236 28 8CA1 29 # ㈶ => (財) +3237 28 795D 29 # ㈷ => (祝) +3238 28 52B4 29 # ㈸ => (労) +3239 28 4EE3 29 # ㈹ => (代) +323A 28 547C 29 # ㈺ => (呼) +323B 28 5B66 29 # ㈻ => (学) +323C 28 76E3 29 # ㈼ => (監) +323D 28 4F01 29 # ㈽ => (企) +323E 28 8CC7 29 # ㈾ => (資) +323F 28 5354 29 # ㈿ => (協) +3240 28 796D 29 # ㉀ => (祭) +3241 28 4F11 29 # ㉁ => (休) +3242 28 81EA 29 # ㉂ => (自) +3243 28 81F3 29 # ㉃ => (至) +3244 554F # ㉄ => 問 +3245 5E7C # ㉅ => 幼 +3246 6587 # ㉆ => 文 +3247 7B8F # ㉇ => 箏 +3250 50 54 45 # ㉐ => PTE +3251 32 31 # ㉑ => 21 +3252 32 32 # ㉒ => 22 +3253 32 33 # ㉓ => 23 +3254 32 34 # ㉔ => 24 +3255 32 35 # ㉕ => 25 +3256 32 36 # ㉖ => 26 +3257 32 37 # ㉗ => 27 +3258 32 38 # ㉘ => 28 +3259 32 39 # ㉙ => 29 +325A 33 30 # ㉚ => 30 +325B 33 31 # ㉛ => 31 +325C 33 32 # ㉜ => 32 +325D 33 33 # ㉝ => 33 +325E 33 34 # ㉞ => 34 +325F 33 35 # ㉟ => 35 +3260 1100 # ㉠ => ᄀ +3261 1102 # ㉡ => ᄂ +3262 1103 # ㉢ => ᄃ +3263 1105 # ㉣ => ᄅ +3264 1106 # ㉤ => ᄆ +3265 1107 # ㉥ => ᄇ +3266 1109 # ㉦ => ᄉ +3267 110B # ㉧ => ᄋ +3268 110C # ㉨ => ᄌ +3269 110E # ㉩ => ᄎ +326A 110F # ㉪ => ᄏ +326B 1110 # ㉫ => ᄐ +326C 1111 # ㉬ => ᄑ +326D 1112 # ㉭ => ᄒ +326E 1100 1161 # ㉮ => 가 +326F 1102 1161 # ㉯ => 나 +3270 1103 1161 # ㉰ => 다 +3271 1105 1161 # ㉱ => 라 +3272 1106 1161 # ㉲ => 마 +3273 1107 1161 # ㉳ => 바 +3274 1109 1161 # ㉴ => 사 +3275 110B 1161 # ㉵ => 아 +3276 110C 1161 # ㉶ => 자 +3277 110E 1161 # ㉷ => 차 +3278 110F 1161 # ㉸ => 카 +3279 1110 1161 # ㉹ => 타 +327A 1111 1161 # ㉺ => 파 +327B 1112 1161 # ㉻ => 하 +327C 110E 1161 11B7 1100 1169 # ㉼ => 참고 +327D 110C 116E 110B 1174 # ㉽ => 주의 +327E 110B 116E # ㉾ => 우 +3280 4E00 # ㊀ => 一 +3281 4E8C # ㊁ => 二 +3282 4E09 # ㊂ => 三 +3283 56DB # ㊃ => 四 +3284 4E94 # ㊄ => 五 +3285 516D # ㊅ => 六 +3286 4E03 # ㊆ => 七 +3287 516B # ㊇ => 八 +3288 4E5D # ㊈ => 九 +3289 5341 # ㊉ => 十 +328A 6708 # ㊊ => 月 +328B 706B # ㊋ => 火 +328C 6C34 # ㊌ => 水 +328D 6728 # ㊍ => 木 +328E 91D1 # ㊎ => 金 +328F 571F # ㊏ => 土 +3290 65E5 # ㊐ => 日 +3291 682A # ㊑ => 株 +3292 6709 # ㊒ => 有 +3293 793E # ㊓ => 社 +3294 540D # ㊔ => 名 +3295 7279 # ㊕ => 特 +3296 8CA1 # ㊖ => 財 +3297 795D # ㊗ => 祝 +3298 52B4 # ㊘ => 労 +3299 79D8 # ㊙ => 秘 +329A 7537 # ㊚ => 男 +329B 5973 # ㊛ => 女 +329C 9069 # ㊜ => 適 +329D 512A # ㊝ => 優 +329E 5370 # ㊞ => 印 +329F 6CE8 # ㊟ => 注 +32A0 9805 # ㊠ => 項 +32A1 4F11 # ㊡ => 休 +32A2 5199 # ㊢ => 写 +32A3 6B63 # ㊣ => 正 +32A4 4E0A # ㊤ => 上 +32A5 4E2D # ㊥ => 中 +32A6 4E0B # ㊦ => 下 +32A7 5DE6 # ㊧ => 左 +32A8 53F3 # ㊨ => 右 +32A9 533B # ㊩ => 医 +32AA 5B97 # ㊪ => 宗 +32AB 5B66 # ㊫ => 学 +32AC 76E3 # ㊬ => 監 +32AD 4F01 # ㊭ => 企 +32AE 8CC7 # ㊮ => 資 +32AF 5354 # ㊯ => 協 +32B0 591C # ㊰ => 夜 +32B1 33 36 # ㊱ => 36 +32B2 33 37 # ㊲ => 37 +32B3 33 38 # ㊳ => 38 +32B4 33 39 # ㊴ => 39 +32B5 34 30 # ㊵ => 40 +32B6 34 31 # ㊶ => 41 +32B7 34 32 # ㊷ => 42 +32B8 34 33 # ㊸ => 43 +32B9 34 34 # ㊹ => 44 +32BA 34 35 # ㊺ => 45 +32BB 34 36 # ㊻ => 46 +32BC 34 37 # ㊼ => 47 +32BD 34 38 # ㊽ => 48 +32BE 34 39 # ㊾ => 49 +32BF 35 30 # ㊿ => 50 +32C0 31 6708 # ㋀ => 1月 +32C1 32 6708 # ㋁ => 2月 +32C2 33 6708 # ㋂ => 3月 +32C3 34 6708 # ㋃ => 4月 +32C4 35 6708 # ㋄ => 5月 +32C5 36 6708 # ㋅ => 6月 +32C6 37 6708 # ㋆ => 7月 +32C7 38 6708 # ㋇ => 8月 +32C8 39 6708 # ㋈ => 9月 +32C9 31 30 6708 # ㋉ => 10月 +32CA 31 31 6708 # ㋊ => 11月 +32CB 31 32 6708 # ㋋ => 12月 +32CC 48 67 # ㋌ => Hg +32CD 65 72 67 # ㋍ => erg +32CE 65 56 # ㋎ => eV +32CF 4C 54 44 # ㋏ => LTD +32D0 30A2 # ㋐ => ア +32D1 30A4 # ㋑ => イ +32D2 30A6 # ㋒ => ウ +32D3 30A8 # ㋓ => エ +32D4 30AA # ㋔ => オ +32D5 30AB # ㋕ => カ +32D6 30AD # ㋖ => キ +32D7 30AF # ㋗ => ク +32D8 30B1 # ㋘ => ケ +32D9 30B3 # ㋙ => コ +32DA 30B5 # ㋚ => サ +32DB 30B7 # ㋛ => シ +32DC 30B9 # ㋜ => ス +32DD 30BB # ㋝ => セ +32DE 30BD # ㋞ => ソ +32DF 30BF # ㋟ => タ +32E0 30C1 # ㋠ => チ +32E1 30C4 # ㋡ => ツ +32E2 30C6 # ㋢ => テ +32E3 30C8 # ㋣ => ト +32E4 30CA # ㋤ => ナ +32E5 30CB # ㋥ => ニ +32E6 30CC # ㋦ => ヌ +32E7 30CD # ㋧ => ネ +32E8 30CE # ㋨ => ノ +32E9 30CF # ㋩ => ハ +32EA 30D2 # ㋪ => ヒ +32EB 30D5 # ㋫ => フ +32EC 30D8 # ㋬ => ヘ +32ED 30DB # ㋭ => ホ +32EE 30DE # ㋮ => マ +32EF 30DF # ㋯ => ミ +32F0 30E0 # ㋰ => ム +32F1 30E1 # ㋱ => メ +32F2 30E2 # ㋲ => モ +32F3 30E4 # ㋳ => ヤ +32F4 30E6 # ㋴ => ユ +32F5 30E8 # ㋵ => ヨ +32F6 30E9 # ㋶ => ラ +32F7 30EA # ㋷ => リ +32F8 30EB # ㋸ => ル +32F9 30EC # ㋹ => レ +32FA 30ED # ㋺ => ロ +32FB 30EF # ㋻ => ワ +32FC 30F0 # ㋼ => ヰ +32FD 30F1 # ㋽ => ヱ +32FE 30F2 # ㋾ => ヲ +32FF 4EE4 548C # ㋿ => 令和 +3300 30A2 30CF 309A 30FC 30C8 # ㌀ => アパート +3301 30A2 30EB 30D5 30A1 # ㌁ => アルファ +3302 30A2 30F3 30D8 309A 30A2 # ㌂ => アンペア +3303 30A2 30FC 30EB # ㌃ => アール +3304 30A4 30CB 30F3 30AF 3099 # ㌄ => イニング +3305 30A4 30F3 30C1 # ㌅ => インチ +3306 30A6 30A9 30F3 # ㌆ => ウォン +3307 30A8 30B9 30AF 30FC 30C8 3099 # ㌇ => エスクード +3308 30A8 30FC 30AB 30FC # ㌈ => エーカー +3309 30AA 30F3 30B9 # ㌉ => オンス +330A 30AA 30FC 30E0 # ㌊ => オーム +330B 30AB 30A4 30EA # ㌋ => カイリ +330C 30AB 30E9 30C3 30C8 # ㌌ => カラット +330D 30AB 30ED 30EA 30FC # ㌍ => カロリー +330E 30AB 3099 30ED 30F3 # ㌎ => ガロン +330F 30AB 3099 30F3 30DE # ㌏ => ガンマ +3310 30AD 3099 30AB 3099 # ㌐ => ギガ +3311 30AD 3099 30CB 30FC # ㌑ => ギニー +3312 30AD 30E5 30EA 30FC # ㌒ => キュリー +3313 30AD 3099 30EB 30BF 3099 30FC # ㌓ => ギルダー +3314 30AD 30ED # ㌔ => キロ +3315 30AD 30ED 30AF 3099 30E9 30E0 # ㌕ => キログラム +3316 30AD 30ED 30E1 30FC 30C8 30EB # ㌖ => キロメートル +3317 30AD 30ED 30EF 30C3 30C8 # ㌗ => キロワット +3318 30AF 3099 30E9 30E0 # ㌘ => グラム +3319 30AF 3099 30E9 30E0 30C8 30F3 # ㌙ => グラムトン +331A 30AF 30EB 30BB 3099 30A4 30ED # ㌚ => クルゼイロ +331B 30AF 30ED 30FC 30CD # ㌛ => クローネ +331C 30B1 30FC 30B9 # ㌜ => ケース +331D 30B3 30EB 30CA # ㌝ => コルナ +331E 30B3 30FC 30DB 309A # ㌞ => コーポ +331F 30B5 30A4 30AF 30EB # ㌟ => サイクル +3320 30B5 30F3 30C1 30FC 30E0 # ㌠ => サンチーム +3321 30B7 30EA 30F3 30AF 3099 # ㌡ => シリング +3322 30BB 30F3 30C1 # ㌢ => センチ +3323 30BB 30F3 30C8 # ㌣ => セント +3324 30BF 3099 30FC 30B9 # ㌤ => ダース +3325 30C6 3099 30B7 # ㌥ => デシ +3326 30C8 3099 30EB # ㌦ => ドル +3327 30C8 30F3 # ㌧ => トン +3328 30CA 30CE # ㌨ => ナノ +3329 30CE 30C3 30C8 # ㌩ => ノット +332A 30CF 30A4 30C4 # ㌪ => ハイツ +332B 30CF 309A 30FC 30BB 30F3 30C8 # ㌫ => パーセント +332C 30CF 309A 30FC 30C4 # ㌬ => パーツ +332D 30CF 3099 30FC 30EC 30EB # ㌭ => バーレル +332E 30D2 309A 30A2 30B9 30C8 30EB # ㌮ => ピアストル +332F 30D2 309A 30AF 30EB # ㌯ => ピクル +3330 30D2 309A 30B3 # ㌰ => ピコ +3331 30D2 3099 30EB # ㌱ => ビル +3332 30D5 30A1 30E9 30C3 30C8 3099 # ㌲ => ファラッド +3333 30D5 30A3 30FC 30C8 # ㌳ => フィート +3334 30D5 3099 30C3 30B7 30A7 30EB # ㌴ => ブッシェル +3335 30D5 30E9 30F3 # ㌵ => フラン +3336 30D8 30AF 30BF 30FC 30EB # ㌶ => ヘクタール +3337 30D8 309A 30BD # ㌷ => ペソ +3338 30D8 309A 30CB 30D2 # ㌸ => ペニヒ +3339 30D8 30EB 30C4 # ㌹ => ヘルツ +333A 30D8 309A 30F3 30B9 # ㌺ => ペンス +333B 30D8 309A 30FC 30B7 3099 # ㌻ => ページ +333C 30D8 3099 30FC 30BF # ㌼ => ベータ +333D 30DB 309A 30A4 30F3 30C8 # ㌽ => ポイント +333E 30DB 3099 30EB 30C8 # ㌾ => ボルト +333F 30DB 30F3 # ㌿ => ホン +3340 30DB 309A 30F3 30C8 3099 # ㍀ => ポンド +3341 30DB 30FC 30EB # ㍁ => ホール +3342 30DB 30FC 30F3 # ㍂ => ホーン +3343 30DE 30A4 30AF 30ED # ㍃ => マイクロ +3344 30DE 30A4 30EB # ㍄ => マイル +3345 30DE 30C3 30CF # ㍅ => マッハ +3346 30DE 30EB 30AF # ㍆ => マルク +3347 30DE 30F3 30B7 30E7 30F3 # ㍇ => マンション +3348 30DF 30AF 30ED 30F3 # ㍈ => ミクロン +3349 30DF 30EA # ㍉ => ミリ +334A 30DF 30EA 30CF 3099 30FC 30EB # ㍊ => ミリバール +334B 30E1 30AB 3099 # ㍋ => メガ +334C 30E1 30AB 3099 30C8 30F3 # ㍌ => メガトン +334D 30E1 30FC 30C8 30EB # ㍍ => メートル +334E 30E4 30FC 30C8 3099 # ㍎ => ヤード +334F 30E4 30FC 30EB # ㍏ => ヤール +3350 30E6 30A2 30F3 # ㍐ => ユアン +3351 30EA 30C3 30C8 30EB # ㍑ => リットル +3352 30EA 30E9 # ㍒ => リラ +3353 30EB 30D2 309A 30FC # ㍓ => ルピー +3354 30EB 30FC 30D5 3099 30EB # ㍔ => ルーブル +3355 30EC 30E0 # ㍕ => レム +3356 30EC 30F3 30C8 30B1 3099 30F3 # ㍖ => レントゲン +3357 30EF 30C3 30C8 # ㍗ => ワット +3358 30 70B9 # ㍘ => 0点 +3359 31 70B9 # ㍙ => 1点 +335A 32 70B9 # ㍚ => 2点 +335B 33 70B9 # ㍛ => 3点 +335C 34 70B9 # ㍜ => 4点 +335D 35 70B9 # ㍝ => 5点 +335E 36 70B9 # ㍞ => 6点 +335F 37 70B9 # ㍟ => 7点 +3360 38 70B9 # ㍠ => 8点 +3361 39 70B9 # ㍡ => 9点 +3362 31 30 70B9 # ㍢ => 10点 +3363 31 31 70B9 # ㍣ => 11点 +3364 31 32 70B9 # ㍤ => 12点 +3365 31 33 70B9 # ㍥ => 13点 +3366 31 34 70B9 # ㍦ => 14点 +3367 31 35 70B9 # ㍧ => 15点 +3368 31 36 70B9 # ㍨ => 16点 +3369 31 37 70B9 # ㍩ => 17点 +336A 31 38 70B9 # ㍪ => 18点 +336B 31 39 70B9 # ㍫ => 19点 +336C 32 30 70B9 # ㍬ => 20点 +336D 32 31 70B9 # ㍭ => 21点 +336E 32 32 70B9 # ㍮ => 22点 +336F 32 33 70B9 # ㍯ => 23点 +3370 32 34 70B9 # ㍰ => 24点 +3371 68 50 61 # ㍱ => hPa +3372 64 61 # ㍲ => da +3373 41 55 # ㍳ => AU +3374 62 61 72 # ㍴ => bar +3375 6F 56 # ㍵ => oV +3376 70 63 # ㍶ => pc +3377 64 6D # ㍷ => dm +3378 64 6D 32 # ㍸ => dm2 +3379 64 6D 33 # ㍹ => dm3 +337A 49 55 # ㍺ => IU +337B 5E73 6210 # ㍻ => 平成 +337C 662D 548C # ㍼ => 昭和 +337D 5927 6B63 # ㍽ => 大正 +337E 660E 6CBB # ㍾ => 明治 +337F 682A 5F0F 4F1A 793E # ㍿ => 株式会社 +3380 70 41 # ㎀ => pA +3381 6E 41 # ㎁ => nA +3382 3BC 41 # ㎂ => μA +3383 6D 41 # ㎃ => mA +3384 6B 41 # ㎄ => kA +3385 4B 42 # ㎅ => KB +3386 4D 42 # ㎆ => MB +3387 47 42 # ㎇ => GB +3388 63 61 6C # ㎈ => cal +3389 6B 63 61 6C # ㎉ => kcal +338A 70 46 # ㎊ => pF +338B 6E 46 # ㎋ => nF +338C 3BC 46 # ㎌ => μF +338D 3BC 67 # ㎍ => μg +338E 6D 67 # ㎎ => mg +338F 6B 67 # ㎏ => kg +3390 48 7A # ㎐ => Hz +3391 6B 48 7A # ㎑ => kHz +3392 4D 48 7A # ㎒ => MHz +3393 47 48 7A # ㎓ => GHz +3394 54 48 7A # ㎔ => THz +3395 3BC 6C # ㎕ => μl +3396 6D 6C # ㎖ => ml +3397 64 6C # ㎗ => dl +3398 6B 6C # ㎘ => kl +3399 66 6D # ㎙ => fm +339A 6E 6D # ㎚ => nm +339B 3BC 6D # ㎛ => μm +339C 6D 6D # ㎜ => mm +339D 63 6D # ㎝ => cm +339E 6B 6D # ㎞ => km +339F 6D 6D 32 # ㎟ => mm2 +33A0 63 6D 32 # ㎠ => cm2 +33A1 6D 32 # ㎡ => m2 +33A2 6B 6D 32 # ㎢ => km2 +33A3 6D 6D 33 # ㎣ => mm3 +33A4 63 6D 33 # ㎤ => cm3 +33A5 6D 33 # ㎥ => m3 +33A6 6B 6D 33 # ㎦ => km3 +33A7 6D 2215 73 # ㎧ => m∕s +33A8 6D 2215 73 32 # ㎨ => m∕s2 +33A9 50 61 # ㎩ => Pa +33AA 6B 50 61 # ㎪ => kPa +33AB 4D 50 61 # ㎫ => MPa +33AC 47 50 61 # ㎬ => GPa +33AD 72 61 64 # ㎭ => rad +33AE 72 61 64 2215 73 # ㎮ => rad∕s +33AF 72 61 64 2215 73 32 # ㎯ => rad∕s2 +33B0 70 73 # ㎰ => ps +33B1 6E 73 # ㎱ => ns +33B2 3BC 73 # ㎲ => μs +33B3 6D 73 # ㎳ => ms +33B4 70 56 # ㎴ => pV +33B5 6E 56 # ㎵ => nV +33B6 3BC 56 # ㎶ => μV +33B7 6D 56 # ㎷ => mV +33B8 6B 56 # ㎸ => kV +33B9 4D 56 # ㎹ => MV +33BA 70 57 # ㎺ => pW +33BB 6E 57 # ㎻ => nW +33BC 3BC 57 # ㎼ => μW +33BD 6D 57 # ㎽ => mW +33BE 6B 57 # ㎾ => kW +33BF 4D 57 # ㎿ => MW +33C0 6B 3A9 # ㏀ => kΩ +33C1 4D 3A9 # ㏁ => MΩ +33C2 61 2E 6D 2E # ㏂ => a.m. +33C3 42 71 # ㏃ => Bq +33C4 63 63 # ㏄ => cc +33C5 63 64 # ㏅ => cd +33C6 43 2215 6B 67 # ㏆ => C∕kg +33C7 43 6F 2E # ㏇ => Co. +33C8 64 42 # ㏈ => dB +33C9 47 79 # ㏉ => Gy +33CA 68 61 # ㏊ => ha +33CB 48 50 # ㏋ => HP +33CC 69 6E # ㏌ => in +33CD 4B 4B # ㏍ => KK +33CE 4B 4D # ㏎ => KM +33CF 6B 74 # ㏏ => kt +33D0 6C 6D # ㏐ => lm +33D1 6C 6E # ㏑ => ln +33D2 6C 6F 67 # ㏒ => log +33D3 6C 78 # ㏓ => lx +33D4 6D 62 # ㏔ => mb +33D5 6D 69 6C # ㏕ => mil +33D6 6D 6F 6C # ㏖ => mol +33D7 50 48 # ㏗ => PH +33D8 70 2E 6D 2E # ㏘ => p.m. +33D9 50 50 4D # ㏙ => PPM +33DA 50 52 # ㏚ => PR +33DB 73 72 # ㏛ => sr +33DC 53 76 # ㏜ => Sv +33DD 57 62 # ㏝ => Wb +33DE 56 2215 6D # ㏞ => V∕m +33DF 41 2215 6D # ㏟ => A∕m +33E0 31 65E5 # ㏠ => 1日 +33E1 32 65E5 # ㏡ => 2日 +33E2 33 65E5 # ㏢ => 3日 +33E3 34 65E5 # ㏣ => 4日 +33E4 35 65E5 # ㏤ => 5日 +33E5 36 65E5 # ㏥ => 6日 +33E6 37 65E5 # ㏦ => 7日 +33E7 38 65E5 # ㏧ => 8日 +33E8 39 65E5 # ㏨ => 9日 +33E9 31 30 65E5 # ㏩ => 10日 +33EA 31 31 65E5 # ㏪ => 11日 +33EB 31 32 65E5 # ㏫ => 12日 +33EC 31 33 65E5 # ㏬ => 13日 +33ED 31 34 65E5 # ㏭ => 14日 +33EE 31 35 65E5 # ㏮ => 15日 +33EF 31 36 65E5 # ㏯ => 16日 +33F0 31 37 65E5 # ㏰ => 17日 +33F1 31 38 65E5 # ㏱ => 18日 +33F2 31 39 65E5 # ㏲ => 19日 +33F3 32 30 65E5 # ㏳ => 20日 +33F4 32 31 65E5 # ㏴ => 21日 +33F5 32 32 65E5 # ㏵ => 22日 +33F6 32 33 65E5 # ㏶ => 23日 +33F7 32 34 65E5 # ㏷ => 24日 +33F8 32 35 65E5 # ㏸ => 25日 +33F9 32 36 65E5 # ㏹ => 26日 +33FA 32 37 65E5 # ㏺ => 27日 +33FB 32 38 65E5 # ㏻ => 28日 +33FC 32 39 65E5 # ㏼ => 29日 +33FD 33 30 65E5 # ㏽ => 30日 +33FE 33 31 65E5 # ㏾ => 31日 +33FF 67 61 6C # ㏿ => gal +A69C 44A # ꚜ => ъ +A69D 44C # ꚝ => ь +A770 A76F # ꝰ => ꝯ +A7F8 126 # ꟸ => Ħ +A7F9 153 # ꟹ => œ +AB5C A727 # ꭜ => ꜧ +AB5D AB37 # ꭝ => ꬷ +AB5E 26B # ꭞ => ɫ +AB5F AB52 # ꭟ => ꭒ +AB69 28D # ꭩ => ʍ +AC00 1100 1161 # 가 => 가 +AC01 1100 1161 11A8 # 각 => 각 +AC02 1100 1161 11A9 # 갂 => 갂 +AC03 1100 1161 11AA # 갃 => 갃 +AC04 1100 1161 11AB # 간 => 간 +AC05 1100 1161 11AC # 갅 => 갅 +AC06 1100 1161 11AD # 갆 => 갆 +AC07 1100 1161 11AE # 갇 => 갇 +AC08 1100 1161 11AF # 갈 => 갈 +AC09 1100 1161 11B0 # 갉 => 갉 +AC0A 1100 1161 11B1 # 갊 => 갊 +AC0B 1100 1161 11B2 # 갋 => 갋 +AC0C 1100 1161 11B3 # 갌 => 갌 +AC0D 1100 1161 11B4 # 갍 => 갍 +AC0E 1100 1161 11B5 # 갎 => 갎 +AC0F 1100 1161 11B6 # 갏 => 갏 +AC10 1100 1161 11B7 # 감 => 감 +AC11 1100 1161 11B8 # 갑 => 갑 +AC12 1100 1161 11B9 # 값 => 값 +AC13 1100 1161 11BA # 갓 => 갓 +AC14 1100 1161 11BB # 갔 => 갔 +AC15 1100 1161 11BC # 강 => 강 +AC16 1100 1161 11BD # 갖 => 갖 +AC17 1100 1161 11BE # 갗 => 갗 +AC18 1100 1161 11BF # 갘 => 갘 +AC19 1100 1161 11C0 # 같 => 같 +AC1A 1100 1161 11C1 # 갚 => 갚 +AC1B 1100 1161 11C2 # 갛 => 갛 +AC1C 1100 1162 # 개 => 개 +AC1D 1100 1162 11A8 # 객 => 객 +AC1E 1100 1162 11A9 # 갞 => 갞 +AC1F 1100 1162 11AA # 갟 => 갟 +AC20 1100 1162 11AB # 갠 => 갠 +AC21 1100 1162 11AC # 갡 => 갡 +AC22 1100 1162 11AD # 갢 => 갢 +AC23 1100 1162 11AE # 갣 => 갣 +AC24 1100 1162 11AF # 갤 => 갤 +AC25 1100 1162 11B0 # 갥 => 갥 +AC26 1100 1162 11B1 # 갦 => 갦 +AC27 1100 1162 11B2 # 갧 => 갧 +AC28 1100 1162 11B3 # 갨 => 갨 +AC29 1100 1162 11B4 # 갩 => 갩 +AC2A 1100 1162 11B5 # 갪 => 갪 +AC2B 1100 1162 11B6 # 갫 => 갫 +AC2C 1100 1162 11B7 # 갬 => 갬 +AC2D 1100 1162 11B8 # 갭 => 갭 +AC2E 1100 1162 11B9 # 갮 => 갮 +AC2F 1100 1162 11BA # 갯 => 갯 +AC30 1100 1162 11BB # 갰 => 갰 +AC31 1100 1162 11BC # 갱 => 갱 +AC32 1100 1162 11BD # 갲 => 갲 +AC33 1100 1162 11BE # 갳 => 갳 +AC34 1100 1162 11BF # 갴 => 갴 +AC35 1100 1162 11C0 # 갵 => 갵 +AC36 1100 1162 11C1 # 갶 => 갶 +AC37 1100 1162 11C2 # 갷 => 갷 +AC38 1100 1163 # 갸 => 갸 +AC39 1100 1163 11A8 # 갹 => 갹 +AC3A 1100 1163 11A9 # 갺 => 갺 +AC3B 1100 1163 11AA # 갻 => 갻 +AC3C 1100 1163 11AB # 갼 => 갼 +AC3D 1100 1163 11AC # 갽 => 갽 +AC3E 1100 1163 11AD # 갾 => 갾 +AC3F 1100 1163 11AE # 갿 => 갿 +AC40 1100 1163 11AF # 걀 => 걀 +AC41 1100 1163 11B0 # 걁 => 걁 +AC42 1100 1163 11B1 # 걂 => 걂 +AC43 1100 1163 11B2 # 걃 => 걃 +AC44 1100 1163 11B3 # 걄 => 걄 +AC45 1100 1163 11B4 # 걅 => 걅 +AC46 1100 1163 11B5 # 걆 => 걆 +AC47 1100 1163 11B6 # 걇 => 걇 +AC48 1100 1163 11B7 # 걈 => 걈 +AC49 1100 1163 11B8 # 걉 => 걉 +AC4A 1100 1163 11B9 # 걊 => 걊 +AC4B 1100 1163 11BA # 걋 => 걋 +AC4C 1100 1163 11BB # 걌 => 걌 +AC4D 1100 1163 11BC # 걍 => 걍 +AC4E 1100 1163 11BD # 걎 => 걎 +AC4F 1100 1163 11BE # 걏 => 걏 +AC50 1100 1163 11BF # 걐 => 걐 +AC51 1100 1163 11C0 # 걑 => 걑 +AC52 1100 1163 11C1 # 걒 => 걒 +AC53 1100 1163 11C2 # 걓 => 걓 +AC54 1100 1164 # 걔 => 걔 +AC55 1100 1164 11A8 # 걕 => 걕 +AC56 1100 1164 11A9 # 걖 => 걖 +AC57 1100 1164 11AA # 걗 => 걗 +AC58 1100 1164 11AB # 걘 => 걘 +AC59 1100 1164 11AC # 걙 => 걙 +AC5A 1100 1164 11AD # 걚 => 걚 +AC5B 1100 1164 11AE # 걛 => 걛 +AC5C 1100 1164 11AF # 걜 => 걜 +AC5D 1100 1164 11B0 # 걝 => 걝 +AC5E 1100 1164 11B1 # 걞 => 걞 +AC5F 1100 1164 11B2 # 걟 => 걟 +AC60 1100 1164 11B3 # 걠 => 걠 +AC61 1100 1164 11B4 # 걡 => 걡 +AC62 1100 1164 11B5 # 걢 => 걢 +AC63 1100 1164 11B6 # 걣 => 걣 +AC64 1100 1164 11B7 # 걤 => 걤 +AC65 1100 1164 11B8 # 걥 => 걥 +AC66 1100 1164 11B9 # 걦 => 걦 +AC67 1100 1164 11BA # 걧 => 걧 +AC68 1100 1164 11BB # 걨 => 걨 +AC69 1100 1164 11BC # 걩 => 걩 +AC6A 1100 1164 11BD # 걪 => 걪 +AC6B 1100 1164 11BE # 걫 => 걫 +AC6C 1100 1164 11BF # 걬 => 걬 +AC6D 1100 1164 11C0 # 걭 => 걭 +AC6E 1100 1164 11C1 # 걮 => 걮 +AC6F 1100 1164 11C2 # 걯 => 걯 +AC70 1100 1165 # 거 => 거 +AC71 1100 1165 11A8 # 걱 => 걱 +AC72 1100 1165 11A9 # 걲 => 걲 +AC73 1100 1165 11AA # 걳 => 걳 +AC74 1100 1165 11AB # 건 => 건 +AC75 1100 1165 11AC # 걵 => 걵 +AC76 1100 1165 11AD # 걶 => 걶 +AC77 1100 1165 11AE # 걷 => 걷 +AC78 1100 1165 11AF # 걸 => 걸 +AC79 1100 1165 11B0 # 걹 => 걹 +AC7A 1100 1165 11B1 # 걺 => 걺 +AC7B 1100 1165 11B2 # 걻 => 걻 +AC7C 1100 1165 11B3 # 걼 => 걼 +AC7D 1100 1165 11B4 # 걽 => 걽 +AC7E 1100 1165 11B5 # 걾 => 걾 +AC7F 1100 1165 11B6 # 걿 => 걿 +AC80 1100 1165 11B7 # 검 => 검 +AC81 1100 1165 11B8 # 겁 => 겁 +AC82 1100 1165 11B9 # 겂 => 겂 +AC83 1100 1165 11BA # 것 => 것 +AC84 1100 1165 11BB # 겄 => 겄 +AC85 1100 1165 11BC # 겅 => 겅 +AC86 1100 1165 11BD # 겆 => 겆 +AC87 1100 1165 11BE # 겇 => 겇 +AC88 1100 1165 11BF # 겈 => 겈 +AC89 1100 1165 11C0 # 겉 => 겉 +AC8A 1100 1165 11C1 # 겊 => 겊 +AC8B 1100 1165 11C2 # 겋 => 겋 +AC8C 1100 1166 # 게 => 게 +AC8D 1100 1166 11A8 # 겍 => 겍 +AC8E 1100 1166 11A9 # 겎 => 겎 +AC8F 1100 1166 11AA # 겏 => 겏 +AC90 1100 1166 11AB # 겐 => 겐 +AC91 1100 1166 11AC # 겑 => 겑 +AC92 1100 1166 11AD # 겒 => 겒 +AC93 1100 1166 11AE # 겓 => 겓 +AC94 1100 1166 11AF # 겔 => 겔 +AC95 1100 1166 11B0 # 겕 => 겕 +AC96 1100 1166 11B1 # 겖 => 겖 +AC97 1100 1166 11B2 # 겗 => 겗 +AC98 1100 1166 11B3 # 겘 => 겘 +AC99 1100 1166 11B4 # 겙 => 겙 +AC9A 1100 1166 11B5 # 겚 => 겚 +AC9B 1100 1166 11B6 # 겛 => 겛 +AC9C 1100 1166 11B7 # 겜 => 겜 +AC9D 1100 1166 11B8 # 겝 => 겝 +AC9E 1100 1166 11B9 # 겞 => 겞 +AC9F 1100 1166 11BA # 겟 => 겟 +ACA0 1100 1166 11BB # 겠 => 겠 +ACA1 1100 1166 11BC # 겡 => 겡 +ACA2 1100 1166 11BD # 겢 => 겢 +ACA3 1100 1166 11BE # 겣 => 겣 +ACA4 1100 1166 11BF # 겤 => 겤 +ACA5 1100 1166 11C0 # 겥 => 겥 +ACA6 1100 1166 11C1 # 겦 => 겦 +ACA7 1100 1166 11C2 # 겧 => 겧 +ACA8 1100 1167 # 겨 => 겨 +ACA9 1100 1167 11A8 # 격 => 격 +ACAA 1100 1167 11A9 # 겪 => 겪 +ACAB 1100 1167 11AA # 겫 => 겫 +ACAC 1100 1167 11AB # 견 => 견 +ACAD 1100 1167 11AC # 겭 => 겭 +ACAE 1100 1167 11AD # 겮 => 겮 +ACAF 1100 1167 11AE # 겯 => 겯 +ACB0 1100 1167 11AF # 결 => 결 +ACB1 1100 1167 11B0 # 겱 => 겱 +ACB2 1100 1167 11B1 # 겲 => 겲 +ACB3 1100 1167 11B2 # 겳 => 겳 +ACB4 1100 1167 11B3 # 겴 => 겴 +ACB5 1100 1167 11B4 # 겵 => 겵 +ACB6 1100 1167 11B5 # 겶 => 겶 +ACB7 1100 1167 11B6 # 겷 => 겷 +ACB8 1100 1167 11B7 # 겸 => 겸 +ACB9 1100 1167 11B8 # 겹 => 겹 +ACBA 1100 1167 11B9 # 겺 => 겺 +ACBB 1100 1167 11BA # 겻 => 겻 +ACBC 1100 1167 11BB # 겼 => 겼 +ACBD 1100 1167 11BC # 경 => 경 +ACBE 1100 1167 11BD # 겾 => 겾 +ACBF 1100 1167 11BE # 겿 => 겿 +ACC0 1100 1167 11BF # 곀 => 곀 +ACC1 1100 1167 11C0 # 곁 => 곁 +ACC2 1100 1167 11C1 # 곂 => 곂 +ACC3 1100 1167 11C2 # 곃 => 곃 +ACC4 1100 1168 # 계 => 계 +ACC5 1100 1168 11A8 # 곅 => 곅 +ACC6 1100 1168 11A9 # 곆 => 곆 +ACC7 1100 1168 11AA # 곇 => 곇 +ACC8 1100 1168 11AB # 곈 => 곈 +ACC9 1100 1168 11AC # 곉 => 곉 +ACCA 1100 1168 11AD # 곊 => 곊 +ACCB 1100 1168 11AE # 곋 => 곋 +ACCC 1100 1168 11AF # 곌 => 곌 +ACCD 1100 1168 11B0 # 곍 => 곍 +ACCE 1100 1168 11B1 # 곎 => 곎 +ACCF 1100 1168 11B2 # 곏 => 곏 +ACD0 1100 1168 11B3 # 곐 => 곐 +ACD1 1100 1168 11B4 # 곑 => 곑 +ACD2 1100 1168 11B5 # 곒 => 곒 +ACD3 1100 1168 11B6 # 곓 => 곓 +ACD4 1100 1168 11B7 # 곔 => 곔 +ACD5 1100 1168 11B8 # 곕 => 곕 +ACD6 1100 1168 11B9 # 곖 => 곖 +ACD7 1100 1168 11BA # 곗 => 곗 +ACD8 1100 1168 11BB # 곘 => 곘 +ACD9 1100 1168 11BC # 곙 => 곙 +ACDA 1100 1168 11BD # 곚 => 곚 +ACDB 1100 1168 11BE # 곛 => 곛 +ACDC 1100 1168 11BF # 곜 => 곜 +ACDD 1100 1168 11C0 # 곝 => 곝 +ACDE 1100 1168 11C1 # 곞 => 곞 +ACDF 1100 1168 11C2 # 곟 => 곟 +ACE0 1100 1169 # 고 => 고 +ACE1 1100 1169 11A8 # 곡 => 곡 +ACE2 1100 1169 11A9 # 곢 => 곢 +ACE3 1100 1169 11AA # 곣 => 곣 +ACE4 1100 1169 11AB # 곤 => 곤 +ACE5 1100 1169 11AC # 곥 => 곥 +ACE6 1100 1169 11AD # 곦 => 곦 +ACE7 1100 1169 11AE # 곧 => 곧 +ACE8 1100 1169 11AF # 골 => 골 +ACE9 1100 1169 11B0 # 곩 => 곩 +ACEA 1100 1169 11B1 # 곪 => 곪 +ACEB 1100 1169 11B2 # 곫 => 곫 +ACEC 1100 1169 11B3 # 곬 => 곬 +ACED 1100 1169 11B4 # 곭 => 곭 +ACEE 1100 1169 11B5 # 곮 => 곮 +ACEF 1100 1169 11B6 # 곯 => 곯 +ACF0 1100 1169 11B7 # 곰 => 곰 +ACF1 1100 1169 11B8 # 곱 => 곱 +ACF2 1100 1169 11B9 # 곲 => 곲 +ACF3 1100 1169 11BA # 곳 => 곳 +ACF4 1100 1169 11BB # 곴 => 곴 +ACF5 1100 1169 11BC # 공 => 공 +ACF6 1100 1169 11BD # 곶 => 곶 +ACF7 1100 1169 11BE # 곷 => 곷 +ACF8 1100 1169 11BF # 곸 => 곸 +ACF9 1100 1169 11C0 # 곹 => 곹 +ACFA 1100 1169 11C1 # 곺 => 곺 +ACFB 1100 1169 11C2 # 곻 => 곻 +ACFC 1100 116A # 과 => 과 +ACFD 1100 116A 11A8 # 곽 => 곽 +ACFE 1100 116A 11A9 # 곾 => 곾 +ACFF 1100 116A 11AA # 곿 => 곿 +AD00 1100 116A 11AB # 관 => 관 +AD01 1100 116A 11AC # 괁 => 괁 +AD02 1100 116A 11AD # 괂 => 괂 +AD03 1100 116A 11AE # 괃 => 괃 +AD04 1100 116A 11AF # 괄 => 괄 +AD05 1100 116A 11B0 # 괅 => 괅 +AD06 1100 116A 11B1 # 괆 => 괆 +AD07 1100 116A 11B2 # 괇 => 괇 +AD08 1100 116A 11B3 # 괈 => 괈 +AD09 1100 116A 11B4 # 괉 => 괉 +AD0A 1100 116A 11B5 # 괊 => 괊 +AD0B 1100 116A 11B6 # 괋 => 괋 +AD0C 1100 116A 11B7 # 괌 => 괌 +AD0D 1100 116A 11B8 # 괍 => 괍 +AD0E 1100 116A 11B9 # 괎 => 괎 +AD0F 1100 116A 11BA # 괏 => 괏 +AD10 1100 116A 11BB # 괐 => 괐 +AD11 1100 116A 11BC # 광 => 광 +AD12 1100 116A 11BD # 괒 => 괒 +AD13 1100 116A 11BE # 괓 => 괓 +AD14 1100 116A 11BF # 괔 => 괔 +AD15 1100 116A 11C0 # 괕 => 괕 +AD16 1100 116A 11C1 # 괖 => 괖 +AD17 1100 116A 11C2 # 괗 => 괗 +AD18 1100 116B # 괘 => 괘 +AD19 1100 116B 11A8 # 괙 => 괙 +AD1A 1100 116B 11A9 # 괚 => 괚 +AD1B 1100 116B 11AA # 괛 => 괛 +AD1C 1100 116B 11AB # 괜 => 괜 +AD1D 1100 116B 11AC # 괝 => 괝 +AD1E 1100 116B 11AD # 괞 => 괞 +AD1F 1100 116B 11AE # 괟 => 괟 +AD20 1100 116B 11AF # 괠 => 괠 +AD21 1100 116B 11B0 # 괡 => 괡 +AD22 1100 116B 11B1 # 괢 => 괢 +AD23 1100 116B 11B2 # 괣 => 괣 +AD24 1100 116B 11B3 # 괤 => 괤 +AD25 1100 116B 11B4 # 괥 => 괥 +AD26 1100 116B 11B5 # 괦 => 괦 +AD27 1100 116B 11B6 # 괧 => 괧 +AD28 1100 116B 11B7 # 괨 => 괨 +AD29 1100 116B 11B8 # 괩 => 괩 +AD2A 1100 116B 11B9 # 괪 => 괪 +AD2B 1100 116B 11BA # 괫 => 괫 +AD2C 1100 116B 11BB # 괬 => 괬 +AD2D 1100 116B 11BC # 괭 => 괭 +AD2E 1100 116B 11BD # 괮 => 괮 +AD2F 1100 116B 11BE # 괯 => 괯 +AD30 1100 116B 11BF # 괰 => 괰 +AD31 1100 116B 11C0 # 괱 => 괱 +AD32 1100 116B 11C1 # 괲 => 괲 +AD33 1100 116B 11C2 # 괳 => 괳 +AD34 1100 116C # 괴 => 괴 +AD35 1100 116C 11A8 # 괵 => 괵 +AD36 1100 116C 11A9 # 괶 => 괶 +AD37 1100 116C 11AA # 괷 => 괷 +AD38 1100 116C 11AB # 괸 => 괸 +AD39 1100 116C 11AC # 괹 => 괹 +AD3A 1100 116C 11AD # 괺 => 괺 +AD3B 1100 116C 11AE # 괻 => 괻 +AD3C 1100 116C 11AF # 괼 => 괼 +AD3D 1100 116C 11B0 # 괽 => 괽 +AD3E 1100 116C 11B1 # 괾 => 괾 +AD3F 1100 116C 11B2 # 괿 => 괿 +AD40 1100 116C 11B3 # 굀 => 굀 +AD41 1100 116C 11B4 # 굁 => 굁 +AD42 1100 116C 11B5 # 굂 => 굂 +AD43 1100 116C 11B6 # 굃 => 굃 +AD44 1100 116C 11B7 # 굄 => 굄 +AD45 1100 116C 11B8 # 굅 => 굅 +AD46 1100 116C 11B9 # 굆 => 굆 +AD47 1100 116C 11BA # 굇 => 굇 +AD48 1100 116C 11BB # 굈 => 굈 +AD49 1100 116C 11BC # 굉 => 굉 +AD4A 1100 116C 11BD # 굊 => 굊 +AD4B 1100 116C 11BE # 굋 => 굋 +AD4C 1100 116C 11BF # 굌 => 굌 +AD4D 1100 116C 11C0 # 굍 => 굍 +AD4E 1100 116C 11C1 # 굎 => 굎 +AD4F 1100 116C 11C2 # 굏 => 굏 +AD50 1100 116D # 교 => 교 +AD51 1100 116D 11A8 # 굑 => 굑 +AD52 1100 116D 11A9 # 굒 => 굒 +AD53 1100 116D 11AA # 굓 => 굓 +AD54 1100 116D 11AB # 굔 => 굔 +AD55 1100 116D 11AC # 굕 => 굕 +AD56 1100 116D 11AD # 굖 => 굖 +AD57 1100 116D 11AE # 굗 => 굗 +AD58 1100 116D 11AF # 굘 => 굘 +AD59 1100 116D 11B0 # 굙 => 굙 +AD5A 1100 116D 11B1 # 굚 => 굚 +AD5B 1100 116D 11B2 # 굛 => 굛 +AD5C 1100 116D 11B3 # 굜 => 굜 +AD5D 1100 116D 11B4 # 굝 => 굝 +AD5E 1100 116D 11B5 # 굞 => 굞 +AD5F 1100 116D 11B6 # 굟 => 굟 +AD60 1100 116D 11B7 # 굠 => 굠 +AD61 1100 116D 11B8 # 굡 => 굡 +AD62 1100 116D 11B9 # 굢 => 굢 +AD63 1100 116D 11BA # 굣 => 굣 +AD64 1100 116D 11BB # 굤 => 굤 +AD65 1100 116D 11BC # 굥 => 굥 +AD66 1100 116D 11BD # 굦 => 굦 +AD67 1100 116D 11BE # 굧 => 굧 +AD68 1100 116D 11BF # 굨 => 굨 +AD69 1100 116D 11C0 # 굩 => 굩 +AD6A 1100 116D 11C1 # 굪 => 굪 +AD6B 1100 116D 11C2 # 굫 => 굫 +AD6C 1100 116E # 구 => 구 +AD6D 1100 116E 11A8 # 국 => 국 +AD6E 1100 116E 11A9 # 굮 => 굮 +AD6F 1100 116E 11AA # 굯 => 굯 +AD70 1100 116E 11AB # 군 => 군 +AD71 1100 116E 11AC # 굱 => 굱 +AD72 1100 116E 11AD # 굲 => 굲 +AD73 1100 116E 11AE # 굳 => 굳 +AD74 1100 116E 11AF # 굴 => 굴 +AD75 1100 116E 11B0 # 굵 => 굵 +AD76 1100 116E 11B1 # 굶 => 굶 +AD77 1100 116E 11B2 # 굷 => 굷 +AD78 1100 116E 11B3 # 굸 => 굸 +AD79 1100 116E 11B4 # 굹 => 굹 +AD7A 1100 116E 11B5 # 굺 => 굺 +AD7B 1100 116E 11B6 # 굻 => 굻 +AD7C 1100 116E 11B7 # 굼 => 굼 +AD7D 1100 116E 11B8 # 굽 => 굽 +AD7E 1100 116E 11B9 # 굾 => 굾 +AD7F 1100 116E 11BA # 굿 => 굿 +AD80 1100 116E 11BB # 궀 => 궀 +AD81 1100 116E 11BC # 궁 => 궁 +AD82 1100 116E 11BD # 궂 => 궂 +AD83 1100 116E 11BE # 궃 => 궃 +AD84 1100 116E 11BF # 궄 => 궄 +AD85 1100 116E 11C0 # 궅 => 궅 +AD86 1100 116E 11C1 # 궆 => 궆 +AD87 1100 116E 11C2 # 궇 => 궇 +AD88 1100 116F # 궈 => 궈 +AD89 1100 116F 11A8 # 궉 => 궉 +AD8A 1100 116F 11A9 # 궊 => 궊 +AD8B 1100 116F 11AA # 궋 => 궋 +AD8C 1100 116F 11AB # 권 => 권 +AD8D 1100 116F 11AC # 궍 => 궍 +AD8E 1100 116F 11AD # 궎 => 궎 +AD8F 1100 116F 11AE # 궏 => 궏 +AD90 1100 116F 11AF # 궐 => 궐 +AD91 1100 116F 11B0 # 궑 => 궑 +AD92 1100 116F 11B1 # 궒 => 궒 +AD93 1100 116F 11B2 # 궓 => 궓 +AD94 1100 116F 11B3 # 궔 => 궔 +AD95 1100 116F 11B4 # 궕 => 궕 +AD96 1100 116F 11B5 # 궖 => 궖 +AD97 1100 116F 11B6 # 궗 => 궗 +AD98 1100 116F 11B7 # 궘 => 궘 +AD99 1100 116F 11B8 # 궙 => 궙 +AD9A 1100 116F 11B9 # 궚 => 궚 +AD9B 1100 116F 11BA # 궛 => 궛 +AD9C 1100 116F 11BB # 궜 => 궜 +AD9D 1100 116F 11BC # 궝 => 궝 +AD9E 1100 116F 11BD # 궞 => 궞 +AD9F 1100 116F 11BE # 궟 => 궟 +ADA0 1100 116F 11BF # 궠 => 궠 +ADA1 1100 116F 11C0 # 궡 => 궡 +ADA2 1100 116F 11C1 # 궢 => 궢 +ADA3 1100 116F 11C2 # 궣 => 궣 +ADA4 1100 1170 # 궤 => 궤 +ADA5 1100 1170 11A8 # 궥 => 궥 +ADA6 1100 1170 11A9 # 궦 => 궦 +ADA7 1100 1170 11AA # 궧 => 궧 +ADA8 1100 1170 11AB # 궨 => 궨 +ADA9 1100 1170 11AC # 궩 => 궩 +ADAA 1100 1170 11AD # 궪 => 궪 +ADAB 1100 1170 11AE # 궫 => 궫 +ADAC 1100 1170 11AF # 궬 => 궬 +ADAD 1100 1170 11B0 # 궭 => 궭 +ADAE 1100 1170 11B1 # 궮 => 궮 +ADAF 1100 1170 11B2 # 궯 => 궯 +ADB0 1100 1170 11B3 # 궰 => 궰 +ADB1 1100 1170 11B4 # 궱 => 궱 +ADB2 1100 1170 11B5 # 궲 => 궲 +ADB3 1100 1170 11B6 # 궳 => 궳 +ADB4 1100 1170 11B7 # 궴 => 궴 +ADB5 1100 1170 11B8 # 궵 => 궵 +ADB6 1100 1170 11B9 # 궶 => 궶 +ADB7 1100 1170 11BA # 궷 => 궷 +ADB8 1100 1170 11BB # 궸 => 궸 +ADB9 1100 1170 11BC # 궹 => 궹 +ADBA 1100 1170 11BD # 궺 => 궺 +ADBB 1100 1170 11BE # 궻 => 궻 +ADBC 1100 1170 11BF # 궼 => 궼 +ADBD 1100 1170 11C0 # 궽 => 궽 +ADBE 1100 1170 11C1 # 궾 => 궾 +ADBF 1100 1170 11C2 # 궿 => 궿 +ADC0 1100 1171 # 귀 => 귀 +ADC1 1100 1171 11A8 # 귁 => 귁 +ADC2 1100 1171 11A9 # 귂 => 귂 +ADC3 1100 1171 11AA # 귃 => 귃 +ADC4 1100 1171 11AB # 귄 => 귄 +ADC5 1100 1171 11AC # 귅 => 귅 +ADC6 1100 1171 11AD # 귆 => 귆 +ADC7 1100 1171 11AE # 귇 => 귇 +ADC8 1100 1171 11AF # 귈 => 귈 +ADC9 1100 1171 11B0 # 귉 => 귉 +ADCA 1100 1171 11B1 # 귊 => 귊 +ADCB 1100 1171 11B2 # 귋 => 귋 +ADCC 1100 1171 11B3 # 귌 => 귌 +ADCD 1100 1171 11B4 # 귍 => 귍 +ADCE 1100 1171 11B5 # 귎 => 귎 +ADCF 1100 1171 11B6 # 귏 => 귏 +ADD0 1100 1171 11B7 # 귐 => 귐 +ADD1 1100 1171 11B8 # 귑 => 귑 +ADD2 1100 1171 11B9 # 귒 => 귒 +ADD3 1100 1171 11BA # 귓 => 귓 +ADD4 1100 1171 11BB # 귔 => 귔 +ADD5 1100 1171 11BC # 귕 => 귕 +ADD6 1100 1171 11BD # 귖 => 귖 +ADD7 1100 1171 11BE # 귗 => 귗 +ADD8 1100 1171 11BF # 귘 => 귘 +ADD9 1100 1171 11C0 # 귙 => 귙 +ADDA 1100 1171 11C1 # 귚 => 귚 +ADDB 1100 1171 11C2 # 귛 => 귛 +ADDC 1100 1172 # 규 => 규 +ADDD 1100 1172 11A8 # 귝 => 귝 +ADDE 1100 1172 11A9 # 귞 => 귞 +ADDF 1100 1172 11AA # 귟 => 귟 +ADE0 1100 1172 11AB # 균 => 균 +ADE1 1100 1172 11AC # 귡 => 귡 +ADE2 1100 1172 11AD # 귢 => 귢 +ADE3 1100 1172 11AE # 귣 => 귣 +ADE4 1100 1172 11AF # 귤 => 귤 +ADE5 1100 1172 11B0 # 귥 => 귥 +ADE6 1100 1172 11B1 # 귦 => 귦 +ADE7 1100 1172 11B2 # 귧 => 귧 +ADE8 1100 1172 11B3 # 귨 => 귨 +ADE9 1100 1172 11B4 # 귩 => 귩 +ADEA 1100 1172 11B5 # 귪 => 귪 +ADEB 1100 1172 11B6 # 귫 => 귫 +ADEC 1100 1172 11B7 # 귬 => 귬 +ADED 1100 1172 11B8 # 귭 => 귭 +ADEE 1100 1172 11B9 # 귮 => 귮 +ADEF 1100 1172 11BA # 귯 => 귯 +ADF0 1100 1172 11BB # 귰 => 귰 +ADF1 1100 1172 11BC # 귱 => 귱 +ADF2 1100 1172 11BD # 귲 => 귲 +ADF3 1100 1172 11BE # 귳 => 귳 +ADF4 1100 1172 11BF # 귴 => 귴 +ADF5 1100 1172 11C0 # 귵 => 귵 +ADF6 1100 1172 11C1 # 귶 => 귶 +ADF7 1100 1172 11C2 # 귷 => 귷 +ADF8 1100 1173 # 그 => 그 +ADF9 1100 1173 11A8 # 극 => 극 +ADFA 1100 1173 11A9 # 귺 => 귺 +ADFB 1100 1173 11AA # 귻 => 귻 +ADFC 1100 1173 11AB # 근 => 근 +ADFD 1100 1173 11AC # 귽 => 귽 +ADFE 1100 1173 11AD # 귾 => 귾 +ADFF 1100 1173 11AE # 귿 => 귿 +AE00 1100 1173 11AF # 글 => 글 +AE01 1100 1173 11B0 # 긁 => 긁 +AE02 1100 1173 11B1 # 긂 => 긂 +AE03 1100 1173 11B2 # 긃 => 긃 +AE04 1100 1173 11B3 # 긄 => 긄 +AE05 1100 1173 11B4 # 긅 => 긅 +AE06 1100 1173 11B5 # 긆 => 긆 +AE07 1100 1173 11B6 # 긇 => 긇 +AE08 1100 1173 11B7 # 금 => 금 +AE09 1100 1173 11B8 # 급 => 급 +AE0A 1100 1173 11B9 # 긊 => 긊 +AE0B 1100 1173 11BA # 긋 => 긋 +AE0C 1100 1173 11BB # 긌 => 긌 +AE0D 1100 1173 11BC # 긍 => 긍 +AE0E 1100 1173 11BD # 긎 => 긎 +AE0F 1100 1173 11BE # 긏 => 긏 +AE10 1100 1173 11BF # 긐 => 긐 +AE11 1100 1173 11C0 # 긑 => 긑 +AE12 1100 1173 11C1 # 긒 => 긒 +AE13 1100 1173 11C2 # 긓 => 긓 +AE14 1100 1174 # 긔 => 긔 +AE15 1100 1174 11A8 # 긕 => 긕 +AE16 1100 1174 11A9 # 긖 => 긖 +AE17 1100 1174 11AA # 긗 => 긗 +AE18 1100 1174 11AB # 긘 => 긘 +AE19 1100 1174 11AC # 긙 => 긙 +AE1A 1100 1174 11AD # 긚 => 긚 +AE1B 1100 1174 11AE # 긛 => 긛 +AE1C 1100 1174 11AF # 긜 => 긜 +AE1D 1100 1174 11B0 # 긝 => 긝 +AE1E 1100 1174 11B1 # 긞 => 긞 +AE1F 1100 1174 11B2 # 긟 => 긟 +AE20 1100 1174 11B3 # 긠 => 긠 +AE21 1100 1174 11B4 # 긡 => 긡 +AE22 1100 1174 11B5 # 긢 => 긢 +AE23 1100 1174 11B6 # 긣 => 긣 +AE24 1100 1174 11B7 # 긤 => 긤 +AE25 1100 1174 11B8 # 긥 => 긥 +AE26 1100 1174 11B9 # 긦 => 긦 +AE27 1100 1174 11BA # 긧 => 긧 +AE28 1100 1174 11BB # 긨 => 긨 +AE29 1100 1174 11BC # 긩 => 긩 +AE2A 1100 1174 11BD # 긪 => 긪 +AE2B 1100 1174 11BE # 긫 => 긫 +AE2C 1100 1174 11BF # 긬 => 긬 +AE2D 1100 1174 11C0 # 긭 => 긭 +AE2E 1100 1174 11C1 # 긮 => 긮 +AE2F 1100 1174 11C2 # 긯 => 긯 +AE30 1100 1175 # 기 => 기 +AE31 1100 1175 11A8 # 긱 => 긱 +AE32 1100 1175 11A9 # 긲 => 긲 +AE33 1100 1175 11AA # 긳 => 긳 +AE34 1100 1175 11AB # 긴 => 긴 +AE35 1100 1175 11AC # 긵 => 긵 +AE36 1100 1175 11AD # 긶 => 긶 +AE37 1100 1175 11AE # 긷 => 긷 +AE38 1100 1175 11AF # 길 => 길 +AE39 1100 1175 11B0 # 긹 => 긹 +AE3A 1100 1175 11B1 # 긺 => 긺 +AE3B 1100 1175 11B2 # 긻 => 긻 +AE3C 1100 1175 11B3 # 긼 => 긼 +AE3D 1100 1175 11B4 # 긽 => 긽 +AE3E 1100 1175 11B5 # 긾 => 긾 +AE3F 1100 1175 11B6 # 긿 => 긿 +AE40 1100 1175 11B7 # 김 => 김 +AE41 1100 1175 11B8 # 깁 => 깁 +AE42 1100 1175 11B9 # 깂 => 깂 +AE43 1100 1175 11BA # 깃 => 깃 +AE44 1100 1175 11BB # 깄 => 깄 +AE45 1100 1175 11BC # 깅 => 깅 +AE46 1100 1175 11BD # 깆 => 깆 +AE47 1100 1175 11BE # 깇 => 깇 +AE48 1100 1175 11BF # 깈 => 깈 +AE49 1100 1175 11C0 # 깉 => 깉 +AE4A 1100 1175 11C1 # 깊 => 깊 +AE4B 1100 1175 11C2 # 깋 => 깋 +AE4C 1101 1161 # 까 => 까 +AE4D 1101 1161 11A8 # 깍 => 깍 +AE4E 1101 1161 11A9 # 깎 => 깎 +AE4F 1101 1161 11AA # 깏 => 깏 +AE50 1101 1161 11AB # 깐 => 깐 +AE51 1101 1161 11AC # 깑 => 깑 +AE52 1101 1161 11AD # 깒 => 깒 +AE53 1101 1161 11AE # 깓 => 깓 +AE54 1101 1161 11AF # 깔 => 깔 +AE55 1101 1161 11B0 # 깕 => 깕 +AE56 1101 1161 11B1 # 깖 => 깖 +AE57 1101 1161 11B2 # 깗 => 깗 +AE58 1101 1161 11B3 # 깘 => 깘 +AE59 1101 1161 11B4 # 깙 => 깙 +AE5A 1101 1161 11B5 # 깚 => 깚 +AE5B 1101 1161 11B6 # 깛 => 깛 +AE5C 1101 1161 11B7 # 깜 => 깜 +AE5D 1101 1161 11B8 # 깝 => 깝 +AE5E 1101 1161 11B9 # 깞 => 깞 +AE5F 1101 1161 11BA # 깟 => 깟 +AE60 1101 1161 11BB # 깠 => 깠 +AE61 1101 1161 11BC # 깡 => 깡 +AE62 1101 1161 11BD # 깢 => 깢 +AE63 1101 1161 11BE # 깣 => 깣 +AE64 1101 1161 11BF # 깤 => 깤 +AE65 1101 1161 11C0 # 깥 => 깥 +AE66 1101 1161 11C1 # 깦 => 깦 +AE67 1101 1161 11C2 # 깧 => 깧 +AE68 1101 1162 # 깨 => 깨 +AE69 1101 1162 11A8 # 깩 => 깩 +AE6A 1101 1162 11A9 # 깪 => 깪 +AE6B 1101 1162 11AA # 깫 => 깫 +AE6C 1101 1162 11AB # 깬 => 깬 +AE6D 1101 1162 11AC # 깭 => 깭 +AE6E 1101 1162 11AD # 깮 => 깮 +AE6F 1101 1162 11AE # 깯 => 깯 +AE70 1101 1162 11AF # 깰 => 깰 +AE71 1101 1162 11B0 # 깱 => 깱 +AE72 1101 1162 11B1 # 깲 => 깲 +AE73 1101 1162 11B2 # 깳 => 깳 +AE74 1101 1162 11B3 # 깴 => 깴 +AE75 1101 1162 11B4 # 깵 => 깵 +AE76 1101 1162 11B5 # 깶 => 깶 +AE77 1101 1162 11B6 # 깷 => 깷 +AE78 1101 1162 11B7 # 깸 => 깸 +AE79 1101 1162 11B8 # 깹 => 깹 +AE7A 1101 1162 11B9 # 깺 => 깺 +AE7B 1101 1162 11BA # 깻 => 깻 +AE7C 1101 1162 11BB # 깼 => 깼 +AE7D 1101 1162 11BC # 깽 => 깽 +AE7E 1101 1162 11BD # 깾 => 깾 +AE7F 1101 1162 11BE # 깿 => 깿 +AE80 1101 1162 11BF # 꺀 => 꺀 +AE81 1101 1162 11C0 # 꺁 => 꺁 +AE82 1101 1162 11C1 # 꺂 => 꺂 +AE83 1101 1162 11C2 # 꺃 => 꺃 +AE84 1101 1163 # 꺄 => 꺄 +AE85 1101 1163 11A8 # 꺅 => 꺅 +AE86 1101 1163 11A9 # 꺆 => 꺆 +AE87 1101 1163 11AA # 꺇 => 꺇 +AE88 1101 1163 11AB # 꺈 => 꺈 +AE89 1101 1163 11AC # 꺉 => 꺉 +AE8A 1101 1163 11AD # 꺊 => 꺊 +AE8B 1101 1163 11AE # 꺋 => 꺋 +AE8C 1101 1163 11AF # 꺌 => 꺌 +AE8D 1101 1163 11B0 # 꺍 => 꺍 +AE8E 1101 1163 11B1 # 꺎 => 꺎 +AE8F 1101 1163 11B2 # 꺏 => 꺏 +AE90 1101 1163 11B3 # 꺐 => 꺐 +AE91 1101 1163 11B4 # 꺑 => 꺑 +AE92 1101 1163 11B5 # 꺒 => 꺒 +AE93 1101 1163 11B6 # 꺓 => 꺓 +AE94 1101 1163 11B7 # 꺔 => 꺔 +AE95 1101 1163 11B8 # 꺕 => 꺕 +AE96 1101 1163 11B9 # 꺖 => 꺖 +AE97 1101 1163 11BA # 꺗 => 꺗 +AE98 1101 1163 11BB # 꺘 => 꺘 +AE99 1101 1163 11BC # 꺙 => 꺙 +AE9A 1101 1163 11BD # 꺚 => 꺚 +AE9B 1101 1163 11BE # 꺛 => 꺛 +AE9C 1101 1163 11BF # 꺜 => 꺜 +AE9D 1101 1163 11C0 # 꺝 => 꺝 +AE9E 1101 1163 11C1 # 꺞 => 꺞 +AE9F 1101 1163 11C2 # 꺟 => 꺟 +AEA0 1101 1164 # 꺠 => 꺠 +AEA1 1101 1164 11A8 # 꺡 => 꺡 +AEA2 1101 1164 11A9 # 꺢 => 꺢 +AEA3 1101 1164 11AA # 꺣 => 꺣 +AEA4 1101 1164 11AB # 꺤 => 꺤 +AEA5 1101 1164 11AC # 꺥 => 꺥 +AEA6 1101 1164 11AD # 꺦 => 꺦 +AEA7 1101 1164 11AE # 꺧 => 꺧 +AEA8 1101 1164 11AF # 꺨 => 꺨 +AEA9 1101 1164 11B0 # 꺩 => 꺩 +AEAA 1101 1164 11B1 # 꺪 => 꺪 +AEAB 1101 1164 11B2 # 꺫 => 꺫 +AEAC 1101 1164 11B3 # 꺬 => 꺬 +AEAD 1101 1164 11B4 # 꺭 => 꺭 +AEAE 1101 1164 11B5 # 꺮 => 꺮 +AEAF 1101 1164 11B6 # 꺯 => 꺯 +AEB0 1101 1164 11B7 # 꺰 => 꺰 +AEB1 1101 1164 11B8 # 꺱 => 꺱 +AEB2 1101 1164 11B9 # 꺲 => 꺲 +AEB3 1101 1164 11BA # 꺳 => 꺳 +AEB4 1101 1164 11BB # 꺴 => 꺴 +AEB5 1101 1164 11BC # 꺵 => 꺵 +AEB6 1101 1164 11BD # 꺶 => 꺶 +AEB7 1101 1164 11BE # 꺷 => 꺷 +AEB8 1101 1164 11BF # 꺸 => 꺸 +AEB9 1101 1164 11C0 # 꺹 => 꺹 +AEBA 1101 1164 11C1 # 꺺 => 꺺 +AEBB 1101 1164 11C2 # 꺻 => 꺻 +AEBC 1101 1165 # 꺼 => 꺼 +AEBD 1101 1165 11A8 # 꺽 => 꺽 +AEBE 1101 1165 11A9 # 꺾 => 꺾 +AEBF 1101 1165 11AA # 꺿 => 꺿 +AEC0 1101 1165 11AB # 껀 => 껀 +AEC1 1101 1165 11AC # 껁 => 껁 +AEC2 1101 1165 11AD # 껂 => 껂 +AEC3 1101 1165 11AE # 껃 => 껃 +AEC4 1101 1165 11AF # 껄 => 껄 +AEC5 1101 1165 11B0 # 껅 => 껅 +AEC6 1101 1165 11B1 # 껆 => 껆 +AEC7 1101 1165 11B2 # 껇 => 껇 +AEC8 1101 1165 11B3 # 껈 => 껈 +AEC9 1101 1165 11B4 # 껉 => 껉 +AECA 1101 1165 11B5 # 껊 => 껊 +AECB 1101 1165 11B6 # 껋 => 껋 +AECC 1101 1165 11B7 # 껌 => 껌 +AECD 1101 1165 11B8 # 껍 => 껍 +AECE 1101 1165 11B9 # 껎 => 껎 +AECF 1101 1165 11BA # 껏 => 껏 +AED0 1101 1165 11BB # 껐 => 껐 +AED1 1101 1165 11BC # 껑 => 껑 +AED2 1101 1165 11BD # 껒 => 껒 +AED3 1101 1165 11BE # 껓 => 껓 +AED4 1101 1165 11BF # 껔 => 껔 +AED5 1101 1165 11C0 # 껕 => 껕 +AED6 1101 1165 11C1 # 껖 => 껖 +AED7 1101 1165 11C2 # 껗 => 껗 +AED8 1101 1166 # 께 => 께 +AED9 1101 1166 11A8 # 껙 => 껙 +AEDA 1101 1166 11A9 # 껚 => 껚 +AEDB 1101 1166 11AA # 껛 => 껛 +AEDC 1101 1166 11AB # 껜 => 껜 +AEDD 1101 1166 11AC # 껝 => 껝 +AEDE 1101 1166 11AD # 껞 => 껞 +AEDF 1101 1166 11AE # 껟 => 껟 +AEE0 1101 1166 11AF # 껠 => 껠 +AEE1 1101 1166 11B0 # 껡 => 껡 +AEE2 1101 1166 11B1 # 껢 => 껢 +AEE3 1101 1166 11B2 # 껣 => 껣 +AEE4 1101 1166 11B3 # 껤 => 껤 +AEE5 1101 1166 11B4 # 껥 => 껥 +AEE6 1101 1166 11B5 # 껦 => 껦 +AEE7 1101 1166 11B6 # 껧 => 껧 +AEE8 1101 1166 11B7 # 껨 => 껨 +AEE9 1101 1166 11B8 # 껩 => 껩 +AEEA 1101 1166 11B9 # 껪 => 껪 +AEEB 1101 1166 11BA # 껫 => 껫 +AEEC 1101 1166 11BB # 껬 => 껬 +AEED 1101 1166 11BC # 껭 => 껭 +AEEE 1101 1166 11BD # 껮 => 껮 +AEEF 1101 1166 11BE # 껯 => 껯 +AEF0 1101 1166 11BF # 껰 => 껰 +AEF1 1101 1166 11C0 # 껱 => 껱 +AEF2 1101 1166 11C1 # 껲 => 껲 +AEF3 1101 1166 11C2 # 껳 => 껳 +AEF4 1101 1167 # 껴 => 껴 +AEF5 1101 1167 11A8 # 껵 => 껵 +AEF6 1101 1167 11A9 # 껶 => 껶 +AEF7 1101 1167 11AA # 껷 => 껷 +AEF8 1101 1167 11AB # 껸 => 껸 +AEF9 1101 1167 11AC # 껹 => 껹 +AEFA 1101 1167 11AD # 껺 => 껺 +AEFB 1101 1167 11AE # 껻 => 껻 +AEFC 1101 1167 11AF # 껼 => 껼 +AEFD 1101 1167 11B0 # 껽 => 껽 +AEFE 1101 1167 11B1 # 껾 => 껾 +AEFF 1101 1167 11B2 # 껿 => 껿 +AF00 1101 1167 11B3 # 꼀 => 꼀 +AF01 1101 1167 11B4 # 꼁 => 꼁 +AF02 1101 1167 11B5 # 꼂 => 꼂 +AF03 1101 1167 11B6 # 꼃 => 꼃 +AF04 1101 1167 11B7 # 꼄 => 꼄 +AF05 1101 1167 11B8 # 꼅 => 꼅 +AF06 1101 1167 11B9 # 꼆 => 꼆 +AF07 1101 1167 11BA # 꼇 => 꼇 +AF08 1101 1167 11BB # 꼈 => 꼈 +AF09 1101 1167 11BC # 꼉 => 꼉 +AF0A 1101 1167 11BD # 꼊 => 꼊 +AF0B 1101 1167 11BE # 꼋 => 꼋 +AF0C 1101 1167 11BF # 꼌 => 꼌 +AF0D 1101 1167 11C0 # 꼍 => 꼍 +AF0E 1101 1167 11C1 # 꼎 => 꼎 +AF0F 1101 1167 11C2 # 꼏 => 꼏 +AF10 1101 1168 # 꼐 => 꼐 +AF11 1101 1168 11A8 # 꼑 => 꼑 +AF12 1101 1168 11A9 # 꼒 => 꼒 +AF13 1101 1168 11AA # 꼓 => 꼓 +AF14 1101 1168 11AB # 꼔 => 꼔 +AF15 1101 1168 11AC # 꼕 => 꼕 +AF16 1101 1168 11AD # 꼖 => 꼖 +AF17 1101 1168 11AE # 꼗 => 꼗 +AF18 1101 1168 11AF # 꼘 => 꼘 +AF19 1101 1168 11B0 # 꼙 => 꼙 +AF1A 1101 1168 11B1 # 꼚 => 꼚 +AF1B 1101 1168 11B2 # 꼛 => 꼛 +AF1C 1101 1168 11B3 # 꼜 => 꼜 +AF1D 1101 1168 11B4 # 꼝 => 꼝 +AF1E 1101 1168 11B5 # 꼞 => 꼞 +AF1F 1101 1168 11B6 # 꼟 => 꼟 +AF20 1101 1168 11B7 # 꼠 => 꼠 +AF21 1101 1168 11B8 # 꼡 => 꼡 +AF22 1101 1168 11B9 # 꼢 => 꼢 +AF23 1101 1168 11BA # 꼣 => 꼣 +AF24 1101 1168 11BB # 꼤 => 꼤 +AF25 1101 1168 11BC # 꼥 => 꼥 +AF26 1101 1168 11BD # 꼦 => 꼦 +AF27 1101 1168 11BE # 꼧 => 꼧 +AF28 1101 1168 11BF # 꼨 => 꼨 +AF29 1101 1168 11C0 # 꼩 => 꼩 +AF2A 1101 1168 11C1 # 꼪 => 꼪 +AF2B 1101 1168 11C2 # 꼫 => 꼫 +AF2C 1101 1169 # 꼬 => 꼬 +AF2D 1101 1169 11A8 # 꼭 => 꼭 +AF2E 1101 1169 11A9 # 꼮 => 꼮 +AF2F 1101 1169 11AA # 꼯 => 꼯 +AF30 1101 1169 11AB # 꼰 => 꼰 +AF31 1101 1169 11AC # 꼱 => 꼱 +AF32 1101 1169 11AD # 꼲 => 꼲 +AF33 1101 1169 11AE # 꼳 => 꼳 +AF34 1101 1169 11AF # 꼴 => 꼴 +AF35 1101 1169 11B0 # 꼵 => 꼵 +AF36 1101 1169 11B1 # 꼶 => 꼶 +AF37 1101 1169 11B2 # 꼷 => 꼷 +AF38 1101 1169 11B3 # 꼸 => 꼸 +AF39 1101 1169 11B4 # 꼹 => 꼹 +AF3A 1101 1169 11B5 # 꼺 => 꼺 +AF3B 1101 1169 11B6 # 꼻 => 꼻 +AF3C 1101 1169 11B7 # 꼼 => 꼼 +AF3D 1101 1169 11B8 # 꼽 => 꼽 +AF3E 1101 1169 11B9 # 꼾 => 꼾 +AF3F 1101 1169 11BA # 꼿 => 꼿 +AF40 1101 1169 11BB # 꽀 => 꽀 +AF41 1101 1169 11BC # 꽁 => 꽁 +AF42 1101 1169 11BD # 꽂 => 꽂 +AF43 1101 1169 11BE # 꽃 => 꽃 +AF44 1101 1169 11BF # 꽄 => 꽄 +AF45 1101 1169 11C0 # 꽅 => 꽅 +AF46 1101 1169 11C1 # 꽆 => 꽆 +AF47 1101 1169 11C2 # 꽇 => 꽇 +AF48 1101 116A # 꽈 => 꽈 +AF49 1101 116A 11A8 # 꽉 => 꽉 +AF4A 1101 116A 11A9 # 꽊 => 꽊 +AF4B 1101 116A 11AA # 꽋 => 꽋 +AF4C 1101 116A 11AB # 꽌 => 꽌 +AF4D 1101 116A 11AC # 꽍 => 꽍 +AF4E 1101 116A 11AD # 꽎 => 꽎 +AF4F 1101 116A 11AE # 꽏 => 꽏 +AF50 1101 116A 11AF # 꽐 => 꽐 +AF51 1101 116A 11B0 # 꽑 => 꽑 +AF52 1101 116A 11B1 # 꽒 => 꽒 +AF53 1101 116A 11B2 # 꽓 => 꽓 +AF54 1101 116A 11B3 # 꽔 => 꽔 +AF55 1101 116A 11B4 # 꽕 => 꽕 +AF56 1101 116A 11B5 # 꽖 => 꽖 +AF57 1101 116A 11B6 # 꽗 => 꽗 +AF58 1101 116A 11B7 # 꽘 => 꽘 +AF59 1101 116A 11B8 # 꽙 => 꽙 +AF5A 1101 116A 11B9 # 꽚 => 꽚 +AF5B 1101 116A 11BA # 꽛 => 꽛 +AF5C 1101 116A 11BB # 꽜 => 꽜 +AF5D 1101 116A 11BC # 꽝 => 꽝 +AF5E 1101 116A 11BD # 꽞 => 꽞 +AF5F 1101 116A 11BE # 꽟 => 꽟 +AF60 1101 116A 11BF # 꽠 => 꽠 +AF61 1101 116A 11C0 # 꽡 => 꽡 +AF62 1101 116A 11C1 # 꽢 => 꽢 +AF63 1101 116A 11C2 # 꽣 => 꽣 +AF64 1101 116B # 꽤 => 꽤 +AF65 1101 116B 11A8 # 꽥 => 꽥 +AF66 1101 116B 11A9 # 꽦 => 꽦 +AF67 1101 116B 11AA # 꽧 => 꽧 +AF68 1101 116B 11AB # 꽨 => 꽨 +AF69 1101 116B 11AC # 꽩 => 꽩 +AF6A 1101 116B 11AD # 꽪 => 꽪 +AF6B 1101 116B 11AE # 꽫 => 꽫 +AF6C 1101 116B 11AF # 꽬 => 꽬 +AF6D 1101 116B 11B0 # 꽭 => 꽭 +AF6E 1101 116B 11B1 # 꽮 => 꽮 +AF6F 1101 116B 11B2 # 꽯 => 꽯 +AF70 1101 116B 11B3 # 꽰 => 꽰 +AF71 1101 116B 11B4 # 꽱 => 꽱 +AF72 1101 116B 11B5 # 꽲 => 꽲 +AF73 1101 116B 11B6 # 꽳 => 꽳 +AF74 1101 116B 11B7 # 꽴 => 꽴 +AF75 1101 116B 11B8 # 꽵 => 꽵 +AF76 1101 116B 11B9 # 꽶 => 꽶 +AF77 1101 116B 11BA # 꽷 => 꽷 +AF78 1101 116B 11BB # 꽸 => 꽸 +AF79 1101 116B 11BC # 꽹 => 꽹 +AF7A 1101 116B 11BD # 꽺 => 꽺 +AF7B 1101 116B 11BE # 꽻 => 꽻 +AF7C 1101 116B 11BF # 꽼 => 꽼 +AF7D 1101 116B 11C0 # 꽽 => 꽽 +AF7E 1101 116B 11C1 # 꽾 => 꽾 +AF7F 1101 116B 11C2 # 꽿 => 꽿 +AF80 1101 116C # 꾀 => 꾀 +AF81 1101 116C 11A8 # 꾁 => 꾁 +AF82 1101 116C 11A9 # 꾂 => 꾂 +AF83 1101 116C 11AA # 꾃 => 꾃 +AF84 1101 116C 11AB # 꾄 => 꾄 +AF85 1101 116C 11AC # 꾅 => 꾅 +AF86 1101 116C 11AD # 꾆 => 꾆 +AF87 1101 116C 11AE # 꾇 => 꾇 +AF88 1101 116C 11AF # 꾈 => 꾈 +AF89 1101 116C 11B0 # 꾉 => 꾉 +AF8A 1101 116C 11B1 # 꾊 => 꾊 +AF8B 1101 116C 11B2 # 꾋 => 꾋 +AF8C 1101 116C 11B3 # 꾌 => 꾌 +AF8D 1101 116C 11B4 # 꾍 => 꾍 +AF8E 1101 116C 11B5 # 꾎 => 꾎 +AF8F 1101 116C 11B6 # 꾏 => 꾏 +AF90 1101 116C 11B7 # 꾐 => 꾐 +AF91 1101 116C 11B8 # 꾑 => 꾑 +AF92 1101 116C 11B9 # 꾒 => 꾒 +AF93 1101 116C 11BA # 꾓 => 꾓 +AF94 1101 116C 11BB # 꾔 => 꾔 +AF95 1101 116C 11BC # 꾕 => 꾕 +AF96 1101 116C 11BD # 꾖 => 꾖 +AF97 1101 116C 11BE # 꾗 => 꾗 +AF98 1101 116C 11BF # 꾘 => 꾘 +AF99 1101 116C 11C0 # 꾙 => 꾙 +AF9A 1101 116C 11C1 # 꾚 => 꾚 +AF9B 1101 116C 11C2 # 꾛 => 꾛 +AF9C 1101 116D # 꾜 => 꾜 +AF9D 1101 116D 11A8 # 꾝 => 꾝 +AF9E 1101 116D 11A9 # 꾞 => 꾞 +AF9F 1101 116D 11AA # 꾟 => 꾟 +AFA0 1101 116D 11AB # 꾠 => 꾠 +AFA1 1101 116D 11AC # 꾡 => 꾡 +AFA2 1101 116D 11AD # 꾢 => 꾢 +AFA3 1101 116D 11AE # 꾣 => 꾣 +AFA4 1101 116D 11AF # 꾤 => 꾤 +AFA5 1101 116D 11B0 # 꾥 => 꾥 +AFA6 1101 116D 11B1 # 꾦 => 꾦 +AFA7 1101 116D 11B2 # 꾧 => 꾧 +AFA8 1101 116D 11B3 # 꾨 => 꾨 +AFA9 1101 116D 11B4 # 꾩 => 꾩 +AFAA 1101 116D 11B5 # 꾪 => 꾪 +AFAB 1101 116D 11B6 # 꾫 => 꾫 +AFAC 1101 116D 11B7 # 꾬 => 꾬 +AFAD 1101 116D 11B8 # 꾭 => 꾭 +AFAE 1101 116D 11B9 # 꾮 => 꾮 +AFAF 1101 116D 11BA # 꾯 => 꾯 +AFB0 1101 116D 11BB # 꾰 => 꾰 +AFB1 1101 116D 11BC # 꾱 => 꾱 +AFB2 1101 116D 11BD # 꾲 => 꾲 +AFB3 1101 116D 11BE # 꾳 => 꾳 +AFB4 1101 116D 11BF # 꾴 => 꾴 +AFB5 1101 116D 11C0 # 꾵 => 꾵 +AFB6 1101 116D 11C1 # 꾶 => 꾶 +AFB7 1101 116D 11C2 # 꾷 => 꾷 +AFB8 1101 116E # 꾸 => 꾸 +AFB9 1101 116E 11A8 # 꾹 => 꾹 +AFBA 1101 116E 11A9 # 꾺 => 꾺 +AFBB 1101 116E 11AA # 꾻 => 꾻 +AFBC 1101 116E 11AB # 꾼 => 꾼 +AFBD 1101 116E 11AC # 꾽 => 꾽 +AFBE 1101 116E 11AD # 꾾 => 꾾 +AFBF 1101 116E 11AE # 꾿 => 꾿 +AFC0 1101 116E 11AF # 꿀 => 꿀 +AFC1 1101 116E 11B0 # 꿁 => 꿁 +AFC2 1101 116E 11B1 # 꿂 => 꿂 +AFC3 1101 116E 11B2 # 꿃 => 꿃 +AFC4 1101 116E 11B3 # 꿄 => 꿄 +AFC5 1101 116E 11B4 # 꿅 => 꿅 +AFC6 1101 116E 11B5 # 꿆 => 꿆 +AFC7 1101 116E 11B6 # 꿇 => 꿇 +AFC8 1101 116E 11B7 # 꿈 => 꿈 +AFC9 1101 116E 11B8 # 꿉 => 꿉 +AFCA 1101 116E 11B9 # 꿊 => 꿊 +AFCB 1101 116E 11BA # 꿋 => 꿋 +AFCC 1101 116E 11BB # 꿌 => 꿌 +AFCD 1101 116E 11BC # 꿍 => 꿍 +AFCE 1101 116E 11BD # 꿎 => 꿎 +AFCF 1101 116E 11BE # 꿏 => 꿏 +AFD0 1101 116E 11BF # 꿐 => 꿐 +AFD1 1101 116E 11C0 # 꿑 => 꿑 +AFD2 1101 116E 11C1 # 꿒 => 꿒 +AFD3 1101 116E 11C2 # 꿓 => 꿓 +AFD4 1101 116F # 꿔 => 꿔 +AFD5 1101 116F 11A8 # 꿕 => 꿕 +AFD6 1101 116F 11A9 # 꿖 => 꿖 +AFD7 1101 116F 11AA # 꿗 => 꿗 +AFD8 1101 116F 11AB # 꿘 => 꿘 +AFD9 1101 116F 11AC # 꿙 => 꿙 +AFDA 1101 116F 11AD # 꿚 => 꿚 +AFDB 1101 116F 11AE # 꿛 => 꿛 +AFDC 1101 116F 11AF # 꿜 => 꿜 +AFDD 1101 116F 11B0 # 꿝 => 꿝 +AFDE 1101 116F 11B1 # 꿞 => 꿞 +AFDF 1101 116F 11B2 # 꿟 => 꿟 +AFE0 1101 116F 11B3 # 꿠 => 꿠 +AFE1 1101 116F 11B4 # 꿡 => 꿡 +AFE2 1101 116F 11B5 # 꿢 => 꿢 +AFE3 1101 116F 11B6 # 꿣 => 꿣 +AFE4 1101 116F 11B7 # 꿤 => 꿤 +AFE5 1101 116F 11B8 # 꿥 => 꿥 +AFE6 1101 116F 11B9 # 꿦 => 꿦 +AFE7 1101 116F 11BA # 꿧 => 꿧 +AFE8 1101 116F 11BB # 꿨 => 꿨 +AFE9 1101 116F 11BC # 꿩 => 꿩 +AFEA 1101 116F 11BD # 꿪 => 꿪 +AFEB 1101 116F 11BE # 꿫 => 꿫 +AFEC 1101 116F 11BF # 꿬 => 꿬 +AFED 1101 116F 11C0 # 꿭 => 꿭 +AFEE 1101 116F 11C1 # 꿮 => 꿮 +AFEF 1101 116F 11C2 # 꿯 => 꿯 +AFF0 1101 1170 # 꿰 => 꿰 +AFF1 1101 1170 11A8 # 꿱 => 꿱 +AFF2 1101 1170 11A9 # 꿲 => 꿲 +AFF3 1101 1170 11AA # 꿳 => 꿳 +AFF4 1101 1170 11AB # 꿴 => 꿴 +AFF5 1101 1170 11AC # 꿵 => 꿵 +AFF6 1101 1170 11AD # 꿶 => 꿶 +AFF7 1101 1170 11AE # 꿷 => 꿷 +AFF8 1101 1170 11AF # 꿸 => 꿸 +AFF9 1101 1170 11B0 # 꿹 => 꿹 +AFFA 1101 1170 11B1 # 꿺 => 꿺 +AFFB 1101 1170 11B2 # 꿻 => 꿻 +AFFC 1101 1170 11B3 # 꿼 => 꿼 +AFFD 1101 1170 11B4 # 꿽 => 꿽 +AFFE 1101 1170 11B5 # 꿾 => 꿾 +AFFF 1101 1170 11B6 # 꿿 => 꿿 +B000 1101 1170 11B7 # 뀀 => 뀀 +B001 1101 1170 11B8 # 뀁 => 뀁 +B002 1101 1170 11B9 # 뀂 => 뀂 +B003 1101 1170 11BA # 뀃 => 뀃 +B004 1101 1170 11BB # 뀄 => 뀄 +B005 1101 1170 11BC # 뀅 => 뀅 +B006 1101 1170 11BD # 뀆 => 뀆 +B007 1101 1170 11BE # 뀇 => 뀇 +B008 1101 1170 11BF # 뀈 => 뀈 +B009 1101 1170 11C0 # 뀉 => 뀉 +B00A 1101 1170 11C1 # 뀊 => 뀊 +B00B 1101 1170 11C2 # 뀋 => 뀋 +B00C 1101 1171 # 뀌 => 뀌 +B00D 1101 1171 11A8 # 뀍 => 뀍 +B00E 1101 1171 11A9 # 뀎 => 뀎 +B00F 1101 1171 11AA # 뀏 => 뀏 +B010 1101 1171 11AB # 뀐 => 뀐 +B011 1101 1171 11AC # 뀑 => 뀑 +B012 1101 1171 11AD # 뀒 => 뀒 +B013 1101 1171 11AE # 뀓 => 뀓 +B014 1101 1171 11AF # 뀔 => 뀔 +B015 1101 1171 11B0 # 뀕 => 뀕 +B016 1101 1171 11B1 # 뀖 => 뀖 +B017 1101 1171 11B2 # 뀗 => 뀗 +B018 1101 1171 11B3 # 뀘 => 뀘 +B019 1101 1171 11B4 # 뀙 => 뀙 +B01A 1101 1171 11B5 # 뀚 => 뀚 +B01B 1101 1171 11B6 # 뀛 => 뀛 +B01C 1101 1171 11B7 # 뀜 => 뀜 +B01D 1101 1171 11B8 # 뀝 => 뀝 +B01E 1101 1171 11B9 # 뀞 => 뀞 +B01F 1101 1171 11BA # 뀟 => 뀟 +B020 1101 1171 11BB # 뀠 => 뀠 +B021 1101 1171 11BC # 뀡 => 뀡 +B022 1101 1171 11BD # 뀢 => 뀢 +B023 1101 1171 11BE # 뀣 => 뀣 +B024 1101 1171 11BF # 뀤 => 뀤 +B025 1101 1171 11C0 # 뀥 => 뀥 +B026 1101 1171 11C1 # 뀦 => 뀦 +B027 1101 1171 11C2 # 뀧 => 뀧 +B028 1101 1172 # 뀨 => 뀨 +B029 1101 1172 11A8 # 뀩 => 뀩 +B02A 1101 1172 11A9 # 뀪 => 뀪 +B02B 1101 1172 11AA # 뀫 => 뀫 +B02C 1101 1172 11AB # 뀬 => 뀬 +B02D 1101 1172 11AC # 뀭 => 뀭 +B02E 1101 1172 11AD # 뀮 => 뀮 +B02F 1101 1172 11AE # 뀯 => 뀯 +B030 1101 1172 11AF # 뀰 => 뀰 +B031 1101 1172 11B0 # 뀱 => 뀱 +B032 1101 1172 11B1 # 뀲 => 뀲 +B033 1101 1172 11B2 # 뀳 => 뀳 +B034 1101 1172 11B3 # 뀴 => 뀴 +B035 1101 1172 11B4 # 뀵 => 뀵 +B036 1101 1172 11B5 # 뀶 => 뀶 +B037 1101 1172 11B6 # 뀷 => 뀷 +B038 1101 1172 11B7 # 뀸 => 뀸 +B039 1101 1172 11B8 # 뀹 => 뀹 +B03A 1101 1172 11B9 # 뀺 => 뀺 +B03B 1101 1172 11BA # 뀻 => 뀻 +B03C 1101 1172 11BB # 뀼 => 뀼 +B03D 1101 1172 11BC # 뀽 => 뀽 +B03E 1101 1172 11BD # 뀾 => 뀾 +B03F 1101 1172 11BE # 뀿 => 뀿 +B040 1101 1172 11BF # 끀 => 끀 +B041 1101 1172 11C0 # 끁 => 끁 +B042 1101 1172 11C1 # 끂 => 끂 +B043 1101 1172 11C2 # 끃 => 끃 +B044 1101 1173 # 끄 => 끄 +B045 1101 1173 11A8 # 끅 => 끅 +B046 1101 1173 11A9 # 끆 => 끆 +B047 1101 1173 11AA # 끇 => 끇 +B048 1101 1173 11AB # 끈 => 끈 +B049 1101 1173 11AC # 끉 => 끉 +B04A 1101 1173 11AD # 끊 => 끊 +B04B 1101 1173 11AE # 끋 => 끋 +B04C 1101 1173 11AF # 끌 => 끌 +B04D 1101 1173 11B0 # 끍 => 끍 +B04E 1101 1173 11B1 # 끎 => 끎 +B04F 1101 1173 11B2 # 끏 => 끏 +B050 1101 1173 11B3 # 끐 => 끐 +B051 1101 1173 11B4 # 끑 => 끑 +B052 1101 1173 11B5 # 끒 => 끒 +B053 1101 1173 11B6 # 끓 => 끓 +B054 1101 1173 11B7 # 끔 => 끔 +B055 1101 1173 11B8 # 끕 => 끕 +B056 1101 1173 11B9 # 끖 => 끖 +B057 1101 1173 11BA # 끗 => 끗 +B058 1101 1173 11BB # 끘 => 끘 +B059 1101 1173 11BC # 끙 => 끙 +B05A 1101 1173 11BD # 끚 => 끚 +B05B 1101 1173 11BE # 끛 => 끛 +B05C 1101 1173 11BF # 끜 => 끜 +B05D 1101 1173 11C0 # 끝 => 끝 +B05E 1101 1173 11C1 # 끞 => 끞 +B05F 1101 1173 11C2 # 끟 => 끟 +B060 1101 1174 # 끠 => 끠 +B061 1101 1174 11A8 # 끡 => 끡 +B062 1101 1174 11A9 # 끢 => 끢 +B063 1101 1174 11AA # 끣 => 끣 +B064 1101 1174 11AB # 끤 => 끤 +B065 1101 1174 11AC # 끥 => 끥 +B066 1101 1174 11AD # 끦 => 끦 +B067 1101 1174 11AE # 끧 => 끧 +B068 1101 1174 11AF # 끨 => 끨 +B069 1101 1174 11B0 # 끩 => 끩 +B06A 1101 1174 11B1 # 끪 => 끪 +B06B 1101 1174 11B2 # 끫 => 끫 +B06C 1101 1174 11B3 # 끬 => 끬 +B06D 1101 1174 11B4 # 끭 => 끭 +B06E 1101 1174 11B5 # 끮 => 끮 +B06F 1101 1174 11B6 # 끯 => 끯 +B070 1101 1174 11B7 # 끰 => 끰 +B071 1101 1174 11B8 # 끱 => 끱 +B072 1101 1174 11B9 # 끲 => 끲 +B073 1101 1174 11BA # 끳 => 끳 +B074 1101 1174 11BB # 끴 => 끴 +B075 1101 1174 11BC # 끵 => 끵 +B076 1101 1174 11BD # 끶 => 끶 +B077 1101 1174 11BE # 끷 => 끷 +B078 1101 1174 11BF # 끸 => 끸 +B079 1101 1174 11C0 # 끹 => 끹 +B07A 1101 1174 11C1 # 끺 => 끺 +B07B 1101 1174 11C2 # 끻 => 끻 +B07C 1101 1175 # 끼 => 끼 +B07D 1101 1175 11A8 # 끽 => 끽 +B07E 1101 1175 11A9 # 끾 => 끾 +B07F 1101 1175 11AA # 끿 => 끿 +B080 1101 1175 11AB # 낀 => 낀 +B081 1101 1175 11AC # 낁 => 낁 +B082 1101 1175 11AD # 낂 => 낂 +B083 1101 1175 11AE # 낃 => 낃 +B084 1101 1175 11AF # 낄 => 낄 +B085 1101 1175 11B0 # 낅 => 낅 +B086 1101 1175 11B1 # 낆 => 낆 +B087 1101 1175 11B2 # 낇 => 낇 +B088 1101 1175 11B3 # 낈 => 낈 +B089 1101 1175 11B4 # 낉 => 낉 +B08A 1101 1175 11B5 # 낊 => 낊 +B08B 1101 1175 11B6 # 낋 => 낋 +B08C 1101 1175 11B7 # 낌 => 낌 +B08D 1101 1175 11B8 # 낍 => 낍 +B08E 1101 1175 11B9 # 낎 => 낎 +B08F 1101 1175 11BA # 낏 => 낏 +B090 1101 1175 11BB # 낐 => 낐 +B091 1101 1175 11BC # 낑 => 낑 +B092 1101 1175 11BD # 낒 => 낒 +B093 1101 1175 11BE # 낓 => 낓 +B094 1101 1175 11BF # 낔 => 낔 +B095 1101 1175 11C0 # 낕 => 낕 +B096 1101 1175 11C1 # 낖 => 낖 +B097 1101 1175 11C2 # 낗 => 낗 +B098 1102 1161 # 나 => 나 +B099 1102 1161 11A8 # 낙 => 낙 +B09A 1102 1161 11A9 # 낚 => 낚 +B09B 1102 1161 11AA # 낛 => 낛 +B09C 1102 1161 11AB # 난 => 난 +B09D 1102 1161 11AC # 낝 => 낝 +B09E 1102 1161 11AD # 낞 => 낞 +B09F 1102 1161 11AE # 낟 => 낟 +B0A0 1102 1161 11AF # 날 => 날 +B0A1 1102 1161 11B0 # 낡 => 낡 +B0A2 1102 1161 11B1 # 낢 => 낢 +B0A3 1102 1161 11B2 # 낣 => 낣 +B0A4 1102 1161 11B3 # 낤 => 낤 +B0A5 1102 1161 11B4 # 낥 => 낥 +B0A6 1102 1161 11B5 # 낦 => 낦 +B0A7 1102 1161 11B6 # 낧 => 낧 +B0A8 1102 1161 11B7 # 남 => 남 +B0A9 1102 1161 11B8 # 납 => 납 +B0AA 1102 1161 11B9 # 낪 => 낪 +B0AB 1102 1161 11BA # 낫 => 낫 +B0AC 1102 1161 11BB # 났 => 났 +B0AD 1102 1161 11BC # 낭 => 낭 +B0AE 1102 1161 11BD # 낮 => 낮 +B0AF 1102 1161 11BE # 낯 => 낯 +B0B0 1102 1161 11BF # 낰 => 낰 +B0B1 1102 1161 11C0 # 낱 => 낱 +B0B2 1102 1161 11C1 # 낲 => 낲 +B0B3 1102 1161 11C2 # 낳 => 낳 +B0B4 1102 1162 # 내 => 내 +B0B5 1102 1162 11A8 # 낵 => 낵 +B0B6 1102 1162 11A9 # 낶 => 낶 +B0B7 1102 1162 11AA # 낷 => 낷 +B0B8 1102 1162 11AB # 낸 => 낸 +B0B9 1102 1162 11AC # 낹 => 낹 +B0BA 1102 1162 11AD # 낺 => 낺 +B0BB 1102 1162 11AE # 낻 => 낻 +B0BC 1102 1162 11AF # 낼 => 낼 +B0BD 1102 1162 11B0 # 낽 => 낽 +B0BE 1102 1162 11B1 # 낾 => 낾 +B0BF 1102 1162 11B2 # 낿 => 낿 +B0C0 1102 1162 11B3 # 냀 => 냀 +B0C1 1102 1162 11B4 # 냁 => 냁 +B0C2 1102 1162 11B5 # 냂 => 냂 +B0C3 1102 1162 11B6 # 냃 => 냃 +B0C4 1102 1162 11B7 # 냄 => 냄 +B0C5 1102 1162 11B8 # 냅 => 냅 +B0C6 1102 1162 11B9 # 냆 => 냆 +B0C7 1102 1162 11BA # 냇 => 냇 +B0C8 1102 1162 11BB # 냈 => 냈 +B0C9 1102 1162 11BC # 냉 => 냉 +B0CA 1102 1162 11BD # 냊 => 냊 +B0CB 1102 1162 11BE # 냋 => 냋 +B0CC 1102 1162 11BF # 냌 => 냌 +B0CD 1102 1162 11C0 # 냍 => 냍 +B0CE 1102 1162 11C1 # 냎 => 냎 +B0CF 1102 1162 11C2 # 냏 => 냏 +B0D0 1102 1163 # 냐 => 냐 +B0D1 1102 1163 11A8 # 냑 => 냑 +B0D2 1102 1163 11A9 # 냒 => 냒 +B0D3 1102 1163 11AA # 냓 => 냓 +B0D4 1102 1163 11AB # 냔 => 냔 +B0D5 1102 1163 11AC # 냕 => 냕 +B0D6 1102 1163 11AD # 냖 => 냖 +B0D7 1102 1163 11AE # 냗 => 냗 +B0D8 1102 1163 11AF # 냘 => 냘 +B0D9 1102 1163 11B0 # 냙 => 냙 +B0DA 1102 1163 11B1 # 냚 => 냚 +B0DB 1102 1163 11B2 # 냛 => 냛 +B0DC 1102 1163 11B3 # 냜 => 냜 +B0DD 1102 1163 11B4 # 냝 => 냝 +B0DE 1102 1163 11B5 # 냞 => 냞 +B0DF 1102 1163 11B6 # 냟 => 냟 +B0E0 1102 1163 11B7 # 냠 => 냠 +B0E1 1102 1163 11B8 # 냡 => 냡 +B0E2 1102 1163 11B9 # 냢 => 냢 +B0E3 1102 1163 11BA # 냣 => 냣 +B0E4 1102 1163 11BB # 냤 => 냤 +B0E5 1102 1163 11BC # 냥 => 냥 +B0E6 1102 1163 11BD # 냦 => 냦 +B0E7 1102 1163 11BE # 냧 => 냧 +B0E8 1102 1163 11BF # 냨 => 냨 +B0E9 1102 1163 11C0 # 냩 => 냩 +B0EA 1102 1163 11C1 # 냪 => 냪 +B0EB 1102 1163 11C2 # 냫 => 냫 +B0EC 1102 1164 # 냬 => 냬 +B0ED 1102 1164 11A8 # 냭 => 냭 +B0EE 1102 1164 11A9 # 냮 => 냮 +B0EF 1102 1164 11AA # 냯 => 냯 +B0F0 1102 1164 11AB # 냰 => 냰 +B0F1 1102 1164 11AC # 냱 => 냱 +B0F2 1102 1164 11AD # 냲 => 냲 +B0F3 1102 1164 11AE # 냳 => 냳 +B0F4 1102 1164 11AF # 냴 => 냴 +B0F5 1102 1164 11B0 # 냵 => 냵 +B0F6 1102 1164 11B1 # 냶 => 냶 +B0F7 1102 1164 11B2 # 냷 => 냷 +B0F8 1102 1164 11B3 # 냸 => 냸 +B0F9 1102 1164 11B4 # 냹 => 냹 +B0FA 1102 1164 11B5 # 냺 => 냺 +B0FB 1102 1164 11B6 # 냻 => 냻 +B0FC 1102 1164 11B7 # 냼 => 냼 +B0FD 1102 1164 11B8 # 냽 => 냽 +B0FE 1102 1164 11B9 # 냾 => 냾 +B0FF 1102 1164 11BA # 냿 => 냿 +B100 1102 1164 11BB # 넀 => 넀 +B101 1102 1164 11BC # 넁 => 넁 +B102 1102 1164 11BD # 넂 => 넂 +B103 1102 1164 11BE # 넃 => 넃 +B104 1102 1164 11BF # 넄 => 넄 +B105 1102 1164 11C0 # 넅 => 넅 +B106 1102 1164 11C1 # 넆 => 넆 +B107 1102 1164 11C2 # 넇 => 넇 +B108 1102 1165 # 너 => 너 +B109 1102 1165 11A8 # 넉 => 넉 +B10A 1102 1165 11A9 # 넊 => 넊 +B10B 1102 1165 11AA # 넋 => 넋 +B10C 1102 1165 11AB # 넌 => 넌 +B10D 1102 1165 11AC # 넍 => 넍 +B10E 1102 1165 11AD # 넎 => 넎 +B10F 1102 1165 11AE # 넏 => 넏 +B110 1102 1165 11AF # 널 => 널 +B111 1102 1165 11B0 # 넑 => 넑 +B112 1102 1165 11B1 # 넒 => 넒 +B113 1102 1165 11B2 # 넓 => 넓 +B114 1102 1165 11B3 # 넔 => 넔 +B115 1102 1165 11B4 # 넕 => 넕 +B116 1102 1165 11B5 # 넖 => 넖 +B117 1102 1165 11B6 # 넗 => 넗 +B118 1102 1165 11B7 # 넘 => 넘 +B119 1102 1165 11B8 # 넙 => 넙 +B11A 1102 1165 11B9 # 넚 => 넚 +B11B 1102 1165 11BA # 넛 => 넛 +B11C 1102 1165 11BB # 넜 => 넜 +B11D 1102 1165 11BC # 넝 => 넝 +B11E 1102 1165 11BD # 넞 => 넞 +B11F 1102 1165 11BE # 넟 => 넟 +B120 1102 1165 11BF # 넠 => 넠 +B121 1102 1165 11C0 # 넡 => 넡 +B122 1102 1165 11C1 # 넢 => 넢 +B123 1102 1165 11C2 # 넣 => 넣 +B124 1102 1166 # 네 => 네 +B125 1102 1166 11A8 # 넥 => 넥 +B126 1102 1166 11A9 # 넦 => 넦 +B127 1102 1166 11AA # 넧 => 넧 +B128 1102 1166 11AB # 넨 => 넨 +B129 1102 1166 11AC # 넩 => 넩 +B12A 1102 1166 11AD # 넪 => 넪 +B12B 1102 1166 11AE # 넫 => 넫 +B12C 1102 1166 11AF # 넬 => 넬 +B12D 1102 1166 11B0 # 넭 => 넭 +B12E 1102 1166 11B1 # 넮 => 넮 +B12F 1102 1166 11B2 # 넯 => 넯 +B130 1102 1166 11B3 # 넰 => 넰 +B131 1102 1166 11B4 # 넱 => 넱 +B132 1102 1166 11B5 # 넲 => 넲 +B133 1102 1166 11B6 # 넳 => 넳 +B134 1102 1166 11B7 # 넴 => 넴 +B135 1102 1166 11B8 # 넵 => 넵 +B136 1102 1166 11B9 # 넶 => 넶 +B137 1102 1166 11BA # 넷 => 넷 +B138 1102 1166 11BB # 넸 => 넸 +B139 1102 1166 11BC # 넹 => 넹 +B13A 1102 1166 11BD # 넺 => 넺 +B13B 1102 1166 11BE # 넻 => 넻 +B13C 1102 1166 11BF # 넼 => 넼 +B13D 1102 1166 11C0 # 넽 => 넽 +B13E 1102 1166 11C1 # 넾 => 넾 +B13F 1102 1166 11C2 # 넿 => 넿 +B140 1102 1167 # 녀 => 녀 +B141 1102 1167 11A8 # 녁 => 녁 +B142 1102 1167 11A9 # 녂 => 녂 +B143 1102 1167 11AA # 녃 => 녃 +B144 1102 1167 11AB # 년 => 년 +B145 1102 1167 11AC # 녅 => 녅 +B146 1102 1167 11AD # 녆 => 녆 +B147 1102 1167 11AE # 녇 => 녇 +B148 1102 1167 11AF # 녈 => 녈 +B149 1102 1167 11B0 # 녉 => 녉 +B14A 1102 1167 11B1 # 녊 => 녊 +B14B 1102 1167 11B2 # 녋 => 녋 +B14C 1102 1167 11B3 # 녌 => 녌 +B14D 1102 1167 11B4 # 녍 => 녍 +B14E 1102 1167 11B5 # 녎 => 녎 +B14F 1102 1167 11B6 # 녏 => 녏 +B150 1102 1167 11B7 # 념 => 념 +B151 1102 1167 11B8 # 녑 => 녑 +B152 1102 1167 11B9 # 녒 => 녒 +B153 1102 1167 11BA # 녓 => 녓 +B154 1102 1167 11BB # 녔 => 녔 +B155 1102 1167 11BC # 녕 => 녕 +B156 1102 1167 11BD # 녖 => 녖 +B157 1102 1167 11BE # 녗 => 녗 +B158 1102 1167 11BF # 녘 => 녘 +B159 1102 1167 11C0 # 녙 => 녙 +B15A 1102 1167 11C1 # 녚 => 녚 +B15B 1102 1167 11C2 # 녛 => 녛 +B15C 1102 1168 # 녜 => 녜 +B15D 1102 1168 11A8 # 녝 => 녝 +B15E 1102 1168 11A9 # 녞 => 녞 +B15F 1102 1168 11AA # 녟 => 녟 +B160 1102 1168 11AB # 녠 => 녠 +B161 1102 1168 11AC # 녡 => 녡 +B162 1102 1168 11AD # 녢 => 녢 +B163 1102 1168 11AE # 녣 => 녣 +B164 1102 1168 11AF # 녤 => 녤 +B165 1102 1168 11B0 # 녥 => 녥 +B166 1102 1168 11B1 # 녦 => 녦 +B167 1102 1168 11B2 # 녧 => 녧 +B168 1102 1168 11B3 # 녨 => 녨 +B169 1102 1168 11B4 # 녩 => 녩 +B16A 1102 1168 11B5 # 녪 => 녪 +B16B 1102 1168 11B6 # 녫 => 녫 +B16C 1102 1168 11B7 # 녬 => 녬 +B16D 1102 1168 11B8 # 녭 => 녭 +B16E 1102 1168 11B9 # 녮 => 녮 +B16F 1102 1168 11BA # 녯 => 녯 +B170 1102 1168 11BB # 녰 => 녰 +B171 1102 1168 11BC # 녱 => 녱 +B172 1102 1168 11BD # 녲 => 녲 +B173 1102 1168 11BE # 녳 => 녳 +B174 1102 1168 11BF # 녴 => 녴 +B175 1102 1168 11C0 # 녵 => 녵 +B176 1102 1168 11C1 # 녶 => 녶 +B177 1102 1168 11C2 # 녷 => 녷 +B178 1102 1169 # 노 => 노 +B179 1102 1169 11A8 # 녹 => 녹 +B17A 1102 1169 11A9 # 녺 => 녺 +B17B 1102 1169 11AA # 녻 => 녻 +B17C 1102 1169 11AB # 논 => 논 +B17D 1102 1169 11AC # 녽 => 녽 +B17E 1102 1169 11AD # 녾 => 녾 +B17F 1102 1169 11AE # 녿 => 녿 +B180 1102 1169 11AF # 놀 => 놀 +B181 1102 1169 11B0 # 놁 => 놁 +B182 1102 1169 11B1 # 놂 => 놂 +B183 1102 1169 11B2 # 놃 => 놃 +B184 1102 1169 11B3 # 놄 => 놄 +B185 1102 1169 11B4 # 놅 => 놅 +B186 1102 1169 11B5 # 놆 => 놆 +B187 1102 1169 11B6 # 놇 => 놇 +B188 1102 1169 11B7 # 놈 => 놈 +B189 1102 1169 11B8 # 놉 => 놉 +B18A 1102 1169 11B9 # 놊 => 놊 +B18B 1102 1169 11BA # 놋 => 놋 +B18C 1102 1169 11BB # 놌 => 놌 +B18D 1102 1169 11BC # 농 => 농 +B18E 1102 1169 11BD # 놎 => 놎 +B18F 1102 1169 11BE # 놏 => 놏 +B190 1102 1169 11BF # 놐 => 놐 +B191 1102 1169 11C0 # 놑 => 놑 +B192 1102 1169 11C1 # 높 => 높 +B193 1102 1169 11C2 # 놓 => 놓 +B194 1102 116A # 놔 => 놔 +B195 1102 116A 11A8 # 놕 => 놕 +B196 1102 116A 11A9 # 놖 => 놖 +B197 1102 116A 11AA # 놗 => 놗 +B198 1102 116A 11AB # 놘 => 놘 +B199 1102 116A 11AC # 놙 => 놙 +B19A 1102 116A 11AD # 놚 => 놚 +B19B 1102 116A 11AE # 놛 => 놛 +B19C 1102 116A 11AF # 놜 => 놜 +B19D 1102 116A 11B0 # 놝 => 놝 +B19E 1102 116A 11B1 # 놞 => 놞 +B19F 1102 116A 11B2 # 놟 => 놟 +B1A0 1102 116A 11B3 # 놠 => 놠 +B1A1 1102 116A 11B4 # 놡 => 놡 +B1A2 1102 116A 11B5 # 놢 => 놢 +B1A3 1102 116A 11B6 # 놣 => 놣 +B1A4 1102 116A 11B7 # 놤 => 놤 +B1A5 1102 116A 11B8 # 놥 => 놥 +B1A6 1102 116A 11B9 # 놦 => 놦 +B1A7 1102 116A 11BA # 놧 => 놧 +B1A8 1102 116A 11BB # 놨 => 놨 +B1A9 1102 116A 11BC # 놩 => 놩 +B1AA 1102 116A 11BD # 놪 => 놪 +B1AB 1102 116A 11BE # 놫 => 놫 +B1AC 1102 116A 11BF # 놬 => 놬 +B1AD 1102 116A 11C0 # 놭 => 놭 +B1AE 1102 116A 11C1 # 놮 => 놮 +B1AF 1102 116A 11C2 # 놯 => 놯 +B1B0 1102 116B # 놰 => 놰 +B1B1 1102 116B 11A8 # 놱 => 놱 +B1B2 1102 116B 11A9 # 놲 => 놲 +B1B3 1102 116B 11AA # 놳 => 놳 +B1B4 1102 116B 11AB # 놴 => 놴 +B1B5 1102 116B 11AC # 놵 => 놵 +B1B6 1102 116B 11AD # 놶 => 놶 +B1B7 1102 116B 11AE # 놷 => 놷 +B1B8 1102 116B 11AF # 놸 => 놸 +B1B9 1102 116B 11B0 # 놹 => 놹 +B1BA 1102 116B 11B1 # 놺 => 놺 +B1BB 1102 116B 11B2 # 놻 => 놻 +B1BC 1102 116B 11B3 # 놼 => 놼 +B1BD 1102 116B 11B4 # 놽 => 놽 +B1BE 1102 116B 11B5 # 놾 => 놾 +B1BF 1102 116B 11B6 # 놿 => 놿 +B1C0 1102 116B 11B7 # 뇀 => 뇀 +B1C1 1102 116B 11B8 # 뇁 => 뇁 +B1C2 1102 116B 11B9 # 뇂 => 뇂 +B1C3 1102 116B 11BA # 뇃 => 뇃 +B1C4 1102 116B 11BB # 뇄 => 뇄 +B1C5 1102 116B 11BC # 뇅 => 뇅 +B1C6 1102 116B 11BD # 뇆 => 뇆 +B1C7 1102 116B 11BE # 뇇 => 뇇 +B1C8 1102 116B 11BF # 뇈 => 뇈 +B1C9 1102 116B 11C0 # 뇉 => 뇉 +B1CA 1102 116B 11C1 # 뇊 => 뇊 +B1CB 1102 116B 11C2 # 뇋 => 뇋 +B1CC 1102 116C # 뇌 => 뇌 +B1CD 1102 116C 11A8 # 뇍 => 뇍 +B1CE 1102 116C 11A9 # 뇎 => 뇎 +B1CF 1102 116C 11AA # 뇏 => 뇏 +B1D0 1102 116C 11AB # 뇐 => 뇐 +B1D1 1102 116C 11AC # 뇑 => 뇑 +B1D2 1102 116C 11AD # 뇒 => 뇒 +B1D3 1102 116C 11AE # 뇓 => 뇓 +B1D4 1102 116C 11AF # 뇔 => 뇔 +B1D5 1102 116C 11B0 # 뇕 => 뇕 +B1D6 1102 116C 11B1 # 뇖 => 뇖 +B1D7 1102 116C 11B2 # 뇗 => 뇗 +B1D8 1102 116C 11B3 # 뇘 => 뇘 +B1D9 1102 116C 11B4 # 뇙 => 뇙 +B1DA 1102 116C 11B5 # 뇚 => 뇚 +B1DB 1102 116C 11B6 # 뇛 => 뇛 +B1DC 1102 116C 11B7 # 뇜 => 뇜 +B1DD 1102 116C 11B8 # 뇝 => 뇝 +B1DE 1102 116C 11B9 # 뇞 => 뇞 +B1DF 1102 116C 11BA # 뇟 => 뇟 +B1E0 1102 116C 11BB # 뇠 => 뇠 +B1E1 1102 116C 11BC # 뇡 => 뇡 +B1E2 1102 116C 11BD # 뇢 => 뇢 +B1E3 1102 116C 11BE # 뇣 => 뇣 +B1E4 1102 116C 11BF # 뇤 => 뇤 +B1E5 1102 116C 11C0 # 뇥 => 뇥 +B1E6 1102 116C 11C1 # 뇦 => 뇦 +B1E7 1102 116C 11C2 # 뇧 => 뇧 +B1E8 1102 116D # 뇨 => 뇨 +B1E9 1102 116D 11A8 # 뇩 => 뇩 +B1EA 1102 116D 11A9 # 뇪 => 뇪 +B1EB 1102 116D 11AA # 뇫 => 뇫 +B1EC 1102 116D 11AB # 뇬 => 뇬 +B1ED 1102 116D 11AC # 뇭 => 뇭 +B1EE 1102 116D 11AD # 뇮 => 뇮 +B1EF 1102 116D 11AE # 뇯 => 뇯 +B1F0 1102 116D 11AF # 뇰 => 뇰 +B1F1 1102 116D 11B0 # 뇱 => 뇱 +B1F2 1102 116D 11B1 # 뇲 => 뇲 +B1F3 1102 116D 11B2 # 뇳 => 뇳 +B1F4 1102 116D 11B3 # 뇴 => 뇴 +B1F5 1102 116D 11B4 # 뇵 => 뇵 +B1F6 1102 116D 11B5 # 뇶 => 뇶 +B1F7 1102 116D 11B6 # 뇷 => 뇷 +B1F8 1102 116D 11B7 # 뇸 => 뇸 +B1F9 1102 116D 11B8 # 뇹 => 뇹 +B1FA 1102 116D 11B9 # 뇺 => 뇺 +B1FB 1102 116D 11BA # 뇻 => 뇻 +B1FC 1102 116D 11BB # 뇼 => 뇼 +B1FD 1102 116D 11BC # 뇽 => 뇽 +B1FE 1102 116D 11BD # 뇾 => 뇾 +B1FF 1102 116D 11BE # 뇿 => 뇿 +B200 1102 116D 11BF # 눀 => 눀 +B201 1102 116D 11C0 # 눁 => 눁 +B202 1102 116D 11C1 # 눂 => 눂 +B203 1102 116D 11C2 # 눃 => 눃 +B204 1102 116E # 누 => 누 +B205 1102 116E 11A8 # 눅 => 눅 +B206 1102 116E 11A9 # 눆 => 눆 +B207 1102 116E 11AA # 눇 => 눇 +B208 1102 116E 11AB # 눈 => 눈 +B209 1102 116E 11AC # 눉 => 눉 +B20A 1102 116E 11AD # 눊 => 눊 +B20B 1102 116E 11AE # 눋 => 눋 +B20C 1102 116E 11AF # 눌 => 눌 +B20D 1102 116E 11B0 # 눍 => 눍 +B20E 1102 116E 11B1 # 눎 => 눎 +B20F 1102 116E 11B2 # 눏 => 눏 +B210 1102 116E 11B3 # 눐 => 눐 +B211 1102 116E 11B4 # 눑 => 눑 +B212 1102 116E 11B5 # 눒 => 눒 +B213 1102 116E 11B6 # 눓 => 눓 +B214 1102 116E 11B7 # 눔 => 눔 +B215 1102 116E 11B8 # 눕 => 눕 +B216 1102 116E 11B9 # 눖 => 눖 +B217 1102 116E 11BA # 눗 => 눗 +B218 1102 116E 11BB # 눘 => 눘 +B219 1102 116E 11BC # 눙 => 눙 +B21A 1102 116E 11BD # 눚 => 눚 +B21B 1102 116E 11BE # 눛 => 눛 +B21C 1102 116E 11BF # 눜 => 눜 +B21D 1102 116E 11C0 # 눝 => 눝 +B21E 1102 116E 11C1 # 눞 => 눞 +B21F 1102 116E 11C2 # 눟 => 눟 +B220 1102 116F # 눠 => 눠 +B221 1102 116F 11A8 # 눡 => 눡 +B222 1102 116F 11A9 # 눢 => 눢 +B223 1102 116F 11AA # 눣 => 눣 +B224 1102 116F 11AB # 눤 => 눤 +B225 1102 116F 11AC # 눥 => 눥 +B226 1102 116F 11AD # 눦 => 눦 +B227 1102 116F 11AE # 눧 => 눧 +B228 1102 116F 11AF # 눨 => 눨 +B229 1102 116F 11B0 # 눩 => 눩 +B22A 1102 116F 11B1 # 눪 => 눪 +B22B 1102 116F 11B2 # 눫 => 눫 +B22C 1102 116F 11B3 # 눬 => 눬 +B22D 1102 116F 11B4 # 눭 => 눭 +B22E 1102 116F 11B5 # 눮 => 눮 +B22F 1102 116F 11B6 # 눯 => 눯 +B230 1102 116F 11B7 # 눰 => 눰 +B231 1102 116F 11B8 # 눱 => 눱 +B232 1102 116F 11B9 # 눲 => 눲 +B233 1102 116F 11BA # 눳 => 눳 +B234 1102 116F 11BB # 눴 => 눴 +B235 1102 116F 11BC # 눵 => 눵 +B236 1102 116F 11BD # 눶 => 눶 +B237 1102 116F 11BE # 눷 => 눷 +B238 1102 116F 11BF # 눸 => 눸 +B239 1102 116F 11C0 # 눹 => 눹 +B23A 1102 116F 11C1 # 눺 => 눺 +B23B 1102 116F 11C2 # 눻 => 눻 +B23C 1102 1170 # 눼 => 눼 +B23D 1102 1170 11A8 # 눽 => 눽 +B23E 1102 1170 11A9 # 눾 => 눾 +B23F 1102 1170 11AA # 눿 => 눿 +B240 1102 1170 11AB # 뉀 => 뉀 +B241 1102 1170 11AC # 뉁 => 뉁 +B242 1102 1170 11AD # 뉂 => 뉂 +B243 1102 1170 11AE # 뉃 => 뉃 +B244 1102 1170 11AF # 뉄 => 뉄 +B245 1102 1170 11B0 # 뉅 => 뉅 +B246 1102 1170 11B1 # 뉆 => 뉆 +B247 1102 1170 11B2 # 뉇 => 뉇 +B248 1102 1170 11B3 # 뉈 => 뉈 +B249 1102 1170 11B4 # 뉉 => 뉉 +B24A 1102 1170 11B5 # 뉊 => 뉊 +B24B 1102 1170 11B6 # 뉋 => 뉋 +B24C 1102 1170 11B7 # 뉌 => 뉌 +B24D 1102 1170 11B8 # 뉍 => 뉍 +B24E 1102 1170 11B9 # 뉎 => 뉎 +B24F 1102 1170 11BA # 뉏 => 뉏 +B250 1102 1170 11BB # 뉐 => 뉐 +B251 1102 1170 11BC # 뉑 => 뉑 +B252 1102 1170 11BD # 뉒 => 뉒 +B253 1102 1170 11BE # 뉓 => 뉓 +B254 1102 1170 11BF # 뉔 => 뉔 +B255 1102 1170 11C0 # 뉕 => 뉕 +B256 1102 1170 11C1 # 뉖 => 뉖 +B257 1102 1170 11C2 # 뉗 => 뉗 +B258 1102 1171 # 뉘 => 뉘 +B259 1102 1171 11A8 # 뉙 => 뉙 +B25A 1102 1171 11A9 # 뉚 => 뉚 +B25B 1102 1171 11AA # 뉛 => 뉛 +B25C 1102 1171 11AB # 뉜 => 뉜 +B25D 1102 1171 11AC # 뉝 => 뉝 +B25E 1102 1171 11AD # 뉞 => 뉞 +B25F 1102 1171 11AE # 뉟 => 뉟 +B260 1102 1171 11AF # 뉠 => 뉠 +B261 1102 1171 11B0 # 뉡 => 뉡 +B262 1102 1171 11B1 # 뉢 => 뉢 +B263 1102 1171 11B2 # 뉣 => 뉣 +B264 1102 1171 11B3 # 뉤 => 뉤 +B265 1102 1171 11B4 # 뉥 => 뉥 +B266 1102 1171 11B5 # 뉦 => 뉦 +B267 1102 1171 11B6 # 뉧 => 뉧 +B268 1102 1171 11B7 # 뉨 => 뉨 +B269 1102 1171 11B8 # 뉩 => 뉩 +B26A 1102 1171 11B9 # 뉪 => 뉪 +B26B 1102 1171 11BA # 뉫 => 뉫 +B26C 1102 1171 11BB # 뉬 => 뉬 +B26D 1102 1171 11BC # 뉭 => 뉭 +B26E 1102 1171 11BD # 뉮 => 뉮 +B26F 1102 1171 11BE # 뉯 => 뉯 +B270 1102 1171 11BF # 뉰 => 뉰 +B271 1102 1171 11C0 # 뉱 => 뉱 +B272 1102 1171 11C1 # 뉲 => 뉲 +B273 1102 1171 11C2 # 뉳 => 뉳 +B274 1102 1172 # 뉴 => 뉴 +B275 1102 1172 11A8 # 뉵 => 뉵 +B276 1102 1172 11A9 # 뉶 => 뉶 +B277 1102 1172 11AA # 뉷 => 뉷 +B278 1102 1172 11AB # 뉸 => 뉸 +B279 1102 1172 11AC # 뉹 => 뉹 +B27A 1102 1172 11AD # 뉺 => 뉺 +B27B 1102 1172 11AE # 뉻 => 뉻 +B27C 1102 1172 11AF # 뉼 => 뉼 +B27D 1102 1172 11B0 # 뉽 => 뉽 +B27E 1102 1172 11B1 # 뉾 => 뉾 +B27F 1102 1172 11B2 # 뉿 => 뉿 +B280 1102 1172 11B3 # 늀 => 늀 +B281 1102 1172 11B4 # 늁 => 늁 +B282 1102 1172 11B5 # 늂 => 늂 +B283 1102 1172 11B6 # 늃 => 늃 +B284 1102 1172 11B7 # 늄 => 늄 +B285 1102 1172 11B8 # 늅 => 늅 +B286 1102 1172 11B9 # 늆 => 늆 +B287 1102 1172 11BA # 늇 => 늇 +B288 1102 1172 11BB # 늈 => 늈 +B289 1102 1172 11BC # 늉 => 늉 +B28A 1102 1172 11BD # 늊 => 늊 +B28B 1102 1172 11BE # 늋 => 늋 +B28C 1102 1172 11BF # 늌 => 늌 +B28D 1102 1172 11C0 # 늍 => 늍 +B28E 1102 1172 11C1 # 늎 => 늎 +B28F 1102 1172 11C2 # 늏 => 늏 +B290 1102 1173 # 느 => 느 +B291 1102 1173 11A8 # 늑 => 늑 +B292 1102 1173 11A9 # 늒 => 늒 +B293 1102 1173 11AA # 늓 => 늓 +B294 1102 1173 11AB # 는 => 는 +B295 1102 1173 11AC # 늕 => 늕 +B296 1102 1173 11AD # 늖 => 늖 +B297 1102 1173 11AE # 늗 => 늗 +B298 1102 1173 11AF # 늘 => 늘 +B299 1102 1173 11B0 # 늙 => 늙 +B29A 1102 1173 11B1 # 늚 => 늚 +B29B 1102 1173 11B2 # 늛 => 늛 +B29C 1102 1173 11B3 # 늜 => 늜 +B29D 1102 1173 11B4 # 늝 => 늝 +B29E 1102 1173 11B5 # 늞 => 늞 +B29F 1102 1173 11B6 # 늟 => 늟 +B2A0 1102 1173 11B7 # 늠 => 늠 +B2A1 1102 1173 11B8 # 늡 => 늡 +B2A2 1102 1173 11B9 # 늢 => 늢 +B2A3 1102 1173 11BA # 늣 => 늣 +B2A4 1102 1173 11BB # 늤 => 늤 +B2A5 1102 1173 11BC # 능 => 능 +B2A6 1102 1173 11BD # 늦 => 늦 +B2A7 1102 1173 11BE # 늧 => 늧 +B2A8 1102 1173 11BF # 늨 => 늨 +B2A9 1102 1173 11C0 # 늩 => 늩 +B2AA 1102 1173 11C1 # 늪 => 늪 +B2AB 1102 1173 11C2 # 늫 => 늫 +B2AC 1102 1174 # 늬 => 늬 +B2AD 1102 1174 11A8 # 늭 => 늭 +B2AE 1102 1174 11A9 # 늮 => 늮 +B2AF 1102 1174 11AA # 늯 => 늯 +B2B0 1102 1174 11AB # 늰 => 늰 +B2B1 1102 1174 11AC # 늱 => 늱 +B2B2 1102 1174 11AD # 늲 => 늲 +B2B3 1102 1174 11AE # 늳 => 늳 +B2B4 1102 1174 11AF # 늴 => 늴 +B2B5 1102 1174 11B0 # 늵 => 늵 +B2B6 1102 1174 11B1 # 늶 => 늶 +B2B7 1102 1174 11B2 # 늷 => 늷 +B2B8 1102 1174 11B3 # 늸 => 늸 +B2B9 1102 1174 11B4 # 늹 => 늹 +B2BA 1102 1174 11B5 # 늺 => 늺 +B2BB 1102 1174 11B6 # 늻 => 늻 +B2BC 1102 1174 11B7 # 늼 => 늼 +B2BD 1102 1174 11B8 # 늽 => 늽 +B2BE 1102 1174 11B9 # 늾 => 늾 +B2BF 1102 1174 11BA # 늿 => 늿 +B2C0 1102 1174 11BB # 닀 => 닀 +B2C1 1102 1174 11BC # 닁 => 닁 +B2C2 1102 1174 11BD # 닂 => 닂 +B2C3 1102 1174 11BE # 닃 => 닃 +B2C4 1102 1174 11BF # 닄 => 닄 +B2C5 1102 1174 11C0 # 닅 => 닅 +B2C6 1102 1174 11C1 # 닆 => 닆 +B2C7 1102 1174 11C2 # 닇 => 닇 +B2C8 1102 1175 # 니 => 니 +B2C9 1102 1175 11A8 # 닉 => 닉 +B2CA 1102 1175 11A9 # 닊 => 닊 +B2CB 1102 1175 11AA # 닋 => 닋 +B2CC 1102 1175 11AB # 닌 => 닌 +B2CD 1102 1175 11AC # 닍 => 닍 +B2CE 1102 1175 11AD # 닎 => 닎 +B2CF 1102 1175 11AE # 닏 => 닏 +B2D0 1102 1175 11AF # 닐 => 닐 +B2D1 1102 1175 11B0 # 닑 => 닑 +B2D2 1102 1175 11B1 # 닒 => 닒 +B2D3 1102 1175 11B2 # 닓 => 닓 +B2D4 1102 1175 11B3 # 닔 => 닔 +B2D5 1102 1175 11B4 # 닕 => 닕 +B2D6 1102 1175 11B5 # 닖 => 닖 +B2D7 1102 1175 11B6 # 닗 => 닗 +B2D8 1102 1175 11B7 # 님 => 님 +B2D9 1102 1175 11B8 # 닙 => 닙 +B2DA 1102 1175 11B9 # 닚 => 닚 +B2DB 1102 1175 11BA # 닛 => 닛 +B2DC 1102 1175 11BB # 닜 => 닜 +B2DD 1102 1175 11BC # 닝 => 닝 +B2DE 1102 1175 11BD # 닞 => 닞 +B2DF 1102 1175 11BE # 닟 => 닟 +B2E0 1102 1175 11BF # 닠 => 닠 +B2E1 1102 1175 11C0 # 닡 => 닡 +B2E2 1102 1175 11C1 # 닢 => 닢 +B2E3 1102 1175 11C2 # 닣 => 닣 +B2E4 1103 1161 # 다 => 다 +B2E5 1103 1161 11A8 # 닥 => 닥 +B2E6 1103 1161 11A9 # 닦 => 닦 +B2E7 1103 1161 11AA # 닧 => 닧 +B2E8 1103 1161 11AB # 단 => 단 +B2E9 1103 1161 11AC # 닩 => 닩 +B2EA 1103 1161 11AD # 닪 => 닪 +B2EB 1103 1161 11AE # 닫 => 닫 +B2EC 1103 1161 11AF # 달 => 달 +B2ED 1103 1161 11B0 # 닭 => 닭 +B2EE 1103 1161 11B1 # 닮 => 닮 +B2EF 1103 1161 11B2 # 닯 => 닯 +B2F0 1103 1161 11B3 # 닰 => 닰 +B2F1 1103 1161 11B4 # 닱 => 닱 +B2F2 1103 1161 11B5 # 닲 => 닲 +B2F3 1103 1161 11B6 # 닳 => 닳 +B2F4 1103 1161 11B7 # 담 => 담 +B2F5 1103 1161 11B8 # 답 => 답 +B2F6 1103 1161 11B9 # 닶 => 닶 +B2F7 1103 1161 11BA # 닷 => 닷 +B2F8 1103 1161 11BB # 닸 => 닸 +B2F9 1103 1161 11BC # 당 => 당 +B2FA 1103 1161 11BD # 닺 => 닺 +B2FB 1103 1161 11BE # 닻 => 닻 +B2FC 1103 1161 11BF # 닼 => 닼 +B2FD 1103 1161 11C0 # 닽 => 닽 +B2FE 1103 1161 11C1 # 닾 => 닾 +B2FF 1103 1161 11C2 # 닿 => 닿 +B300 1103 1162 # 대 => 대 +B301 1103 1162 11A8 # 댁 => 댁 +B302 1103 1162 11A9 # 댂 => 댂 +B303 1103 1162 11AA # 댃 => 댃 +B304 1103 1162 11AB # 댄 => 댄 +B305 1103 1162 11AC # 댅 => 댅 +B306 1103 1162 11AD # 댆 => 댆 +B307 1103 1162 11AE # 댇 => 댇 +B308 1103 1162 11AF # 댈 => 댈 +B309 1103 1162 11B0 # 댉 => 댉 +B30A 1103 1162 11B1 # 댊 => 댊 +B30B 1103 1162 11B2 # 댋 => 댋 +B30C 1103 1162 11B3 # 댌 => 댌 +B30D 1103 1162 11B4 # 댍 => 댍 +B30E 1103 1162 11B5 # 댎 => 댎 +B30F 1103 1162 11B6 # 댏 => 댏 +B310 1103 1162 11B7 # 댐 => 댐 +B311 1103 1162 11B8 # 댑 => 댑 +B312 1103 1162 11B9 # 댒 => 댒 +B313 1103 1162 11BA # 댓 => 댓 +B314 1103 1162 11BB # 댔 => 댔 +B315 1103 1162 11BC # 댕 => 댕 +B316 1103 1162 11BD # 댖 => 댖 +B317 1103 1162 11BE # 댗 => 댗 +B318 1103 1162 11BF # 댘 => 댘 +B319 1103 1162 11C0 # 댙 => 댙 +B31A 1103 1162 11C1 # 댚 => 댚 +B31B 1103 1162 11C2 # 댛 => 댛 +B31C 1103 1163 # 댜 => 댜 +B31D 1103 1163 11A8 # 댝 => 댝 +B31E 1103 1163 11A9 # 댞 => 댞 +B31F 1103 1163 11AA # 댟 => 댟 +B320 1103 1163 11AB # 댠 => 댠 +B321 1103 1163 11AC # 댡 => 댡 +B322 1103 1163 11AD # 댢 => 댢 +B323 1103 1163 11AE # 댣 => 댣 +B324 1103 1163 11AF # 댤 => 댤 +B325 1103 1163 11B0 # 댥 => 댥 +B326 1103 1163 11B1 # 댦 => 댦 +B327 1103 1163 11B2 # 댧 => 댧 +B328 1103 1163 11B3 # 댨 => 댨 +B329 1103 1163 11B4 # 댩 => 댩 +B32A 1103 1163 11B5 # 댪 => 댪 +B32B 1103 1163 11B6 # 댫 => 댫 +B32C 1103 1163 11B7 # 댬 => 댬 +B32D 1103 1163 11B8 # 댭 => 댭 +B32E 1103 1163 11B9 # 댮 => 댮 +B32F 1103 1163 11BA # 댯 => 댯 +B330 1103 1163 11BB # 댰 => 댰 +B331 1103 1163 11BC # 댱 => 댱 +B332 1103 1163 11BD # 댲 => 댲 +B333 1103 1163 11BE # 댳 => 댳 +B334 1103 1163 11BF # 댴 => 댴 +B335 1103 1163 11C0 # 댵 => 댵 +B336 1103 1163 11C1 # 댶 => 댶 +B337 1103 1163 11C2 # 댷 => 댷 +B338 1103 1164 # 댸 => 댸 +B339 1103 1164 11A8 # 댹 => 댹 +B33A 1103 1164 11A9 # 댺 => 댺 +B33B 1103 1164 11AA # 댻 => 댻 +B33C 1103 1164 11AB # 댼 => 댼 +B33D 1103 1164 11AC # 댽 => 댽 +B33E 1103 1164 11AD # 댾 => 댾 +B33F 1103 1164 11AE # 댿 => 댿 +B340 1103 1164 11AF # 덀 => 덀 +B341 1103 1164 11B0 # 덁 => 덁 +B342 1103 1164 11B1 # 덂 => 덂 +B343 1103 1164 11B2 # 덃 => 덃 +B344 1103 1164 11B3 # 덄 => 덄 +B345 1103 1164 11B4 # 덅 => 덅 +B346 1103 1164 11B5 # 덆 => 덆 +B347 1103 1164 11B6 # 덇 => 덇 +B348 1103 1164 11B7 # 덈 => 덈 +B349 1103 1164 11B8 # 덉 => 덉 +B34A 1103 1164 11B9 # 덊 => 덊 +B34B 1103 1164 11BA # 덋 => 덋 +B34C 1103 1164 11BB # 덌 => 덌 +B34D 1103 1164 11BC # 덍 => 덍 +B34E 1103 1164 11BD # 덎 => 덎 +B34F 1103 1164 11BE # 덏 => 덏 +B350 1103 1164 11BF # 덐 => 덐 +B351 1103 1164 11C0 # 덑 => 덑 +B352 1103 1164 11C1 # 덒 => 덒 +B353 1103 1164 11C2 # 덓 => 덓 +B354 1103 1165 # 더 => 더 +B355 1103 1165 11A8 # 덕 => 덕 +B356 1103 1165 11A9 # 덖 => 덖 +B357 1103 1165 11AA # 덗 => 덗 +B358 1103 1165 11AB # 던 => 던 +B359 1103 1165 11AC # 덙 => 덙 +B35A 1103 1165 11AD # 덚 => 덚 +B35B 1103 1165 11AE # 덛 => 덛 +B35C 1103 1165 11AF # 덜 => 덜 +B35D 1103 1165 11B0 # 덝 => 덝 +B35E 1103 1165 11B1 # 덞 => 덞 +B35F 1103 1165 11B2 # 덟 => 덟 +B360 1103 1165 11B3 # 덠 => 덠 +B361 1103 1165 11B4 # 덡 => 덡 +B362 1103 1165 11B5 # 덢 => 덢 +B363 1103 1165 11B6 # 덣 => 덣 +B364 1103 1165 11B7 # 덤 => 덤 +B365 1103 1165 11B8 # 덥 => 덥 +B366 1103 1165 11B9 # 덦 => 덦 +B367 1103 1165 11BA # 덧 => 덧 +B368 1103 1165 11BB # 덨 => 덨 +B369 1103 1165 11BC # 덩 => 덩 +B36A 1103 1165 11BD # 덪 => 덪 +B36B 1103 1165 11BE # 덫 => 덫 +B36C 1103 1165 11BF # 덬 => 덬 +B36D 1103 1165 11C0 # 덭 => 덭 +B36E 1103 1165 11C1 # 덮 => 덮 +B36F 1103 1165 11C2 # 덯 => 덯 +B370 1103 1166 # 데 => 데 +B371 1103 1166 11A8 # 덱 => 덱 +B372 1103 1166 11A9 # 덲 => 덲 +B373 1103 1166 11AA # 덳 => 덳 +B374 1103 1166 11AB # 덴 => 덴 +B375 1103 1166 11AC # 덵 => 덵 +B376 1103 1166 11AD # 덶 => 덶 +B377 1103 1166 11AE # 덷 => 덷 +B378 1103 1166 11AF # 델 => 델 +B379 1103 1166 11B0 # 덹 => 덹 +B37A 1103 1166 11B1 # 덺 => 덺 +B37B 1103 1166 11B2 # 덻 => 덻 +B37C 1103 1166 11B3 # 덼 => 덼 +B37D 1103 1166 11B4 # 덽 => 덽 +B37E 1103 1166 11B5 # 덾 => 덾 +B37F 1103 1166 11B6 # 덿 => 덿 +B380 1103 1166 11B7 # 뎀 => 뎀 +B381 1103 1166 11B8 # 뎁 => 뎁 +B382 1103 1166 11B9 # 뎂 => 뎂 +B383 1103 1166 11BA # 뎃 => 뎃 +B384 1103 1166 11BB # 뎄 => 뎄 +B385 1103 1166 11BC # 뎅 => 뎅 +B386 1103 1166 11BD # 뎆 => 뎆 +B387 1103 1166 11BE # 뎇 => 뎇 +B388 1103 1166 11BF # 뎈 => 뎈 +B389 1103 1166 11C0 # 뎉 => 뎉 +B38A 1103 1166 11C1 # 뎊 => 뎊 +B38B 1103 1166 11C2 # 뎋 => 뎋 +B38C 1103 1167 # 뎌 => 뎌 +B38D 1103 1167 11A8 # 뎍 => 뎍 +B38E 1103 1167 11A9 # 뎎 => 뎎 +B38F 1103 1167 11AA # 뎏 => 뎏 +B390 1103 1167 11AB # 뎐 => 뎐 +B391 1103 1167 11AC # 뎑 => 뎑 +B392 1103 1167 11AD # 뎒 => 뎒 +B393 1103 1167 11AE # 뎓 => 뎓 +B394 1103 1167 11AF # 뎔 => 뎔 +B395 1103 1167 11B0 # 뎕 => 뎕 +B396 1103 1167 11B1 # 뎖 => 뎖 +B397 1103 1167 11B2 # 뎗 => 뎗 +B398 1103 1167 11B3 # 뎘 => 뎘 +B399 1103 1167 11B4 # 뎙 => 뎙 +B39A 1103 1167 11B5 # 뎚 => 뎚 +B39B 1103 1167 11B6 # 뎛 => 뎛 +B39C 1103 1167 11B7 # 뎜 => 뎜 +B39D 1103 1167 11B8 # 뎝 => 뎝 +B39E 1103 1167 11B9 # 뎞 => 뎞 +B39F 1103 1167 11BA # 뎟 => 뎟 +B3A0 1103 1167 11BB # 뎠 => 뎠 +B3A1 1103 1167 11BC # 뎡 => 뎡 +B3A2 1103 1167 11BD # 뎢 => 뎢 +B3A3 1103 1167 11BE # 뎣 => 뎣 +B3A4 1103 1167 11BF # 뎤 => 뎤 +B3A5 1103 1167 11C0 # 뎥 => 뎥 +B3A6 1103 1167 11C1 # 뎦 => 뎦 +B3A7 1103 1167 11C2 # 뎧 => 뎧 +B3A8 1103 1168 # 뎨 => 뎨 +B3A9 1103 1168 11A8 # 뎩 => 뎩 +B3AA 1103 1168 11A9 # 뎪 => 뎪 +B3AB 1103 1168 11AA # 뎫 => 뎫 +B3AC 1103 1168 11AB # 뎬 => 뎬 +B3AD 1103 1168 11AC # 뎭 => 뎭 +B3AE 1103 1168 11AD # 뎮 => 뎮 +B3AF 1103 1168 11AE # 뎯 => 뎯 +B3B0 1103 1168 11AF # 뎰 => 뎰 +B3B1 1103 1168 11B0 # 뎱 => 뎱 +B3B2 1103 1168 11B1 # 뎲 => 뎲 +B3B3 1103 1168 11B2 # 뎳 => 뎳 +B3B4 1103 1168 11B3 # 뎴 => 뎴 +B3B5 1103 1168 11B4 # 뎵 => 뎵 +B3B6 1103 1168 11B5 # 뎶 => 뎶 +B3B7 1103 1168 11B6 # 뎷 => 뎷 +B3B8 1103 1168 11B7 # 뎸 => 뎸 +B3B9 1103 1168 11B8 # 뎹 => 뎹 +B3BA 1103 1168 11B9 # 뎺 => 뎺 +B3BB 1103 1168 11BA # 뎻 => 뎻 +B3BC 1103 1168 11BB # 뎼 => 뎼 +B3BD 1103 1168 11BC # 뎽 => 뎽 +B3BE 1103 1168 11BD # 뎾 => 뎾 +B3BF 1103 1168 11BE # 뎿 => 뎿 +B3C0 1103 1168 11BF # 돀 => 돀 +B3C1 1103 1168 11C0 # 돁 => 돁 +B3C2 1103 1168 11C1 # 돂 => 돂 +B3C3 1103 1168 11C2 # 돃 => 돃 +B3C4 1103 1169 # 도 => 도 +B3C5 1103 1169 11A8 # 독 => 독 +B3C6 1103 1169 11A9 # 돆 => 돆 +B3C7 1103 1169 11AA # 돇 => 돇 +B3C8 1103 1169 11AB # 돈 => 돈 +B3C9 1103 1169 11AC # 돉 => 돉 +B3CA 1103 1169 11AD # 돊 => 돊 +B3CB 1103 1169 11AE # 돋 => 돋 +B3CC 1103 1169 11AF # 돌 => 돌 +B3CD 1103 1169 11B0 # 돍 => 돍 +B3CE 1103 1169 11B1 # 돎 => 돎 +B3CF 1103 1169 11B2 # 돏 => 돏 +B3D0 1103 1169 11B3 # 돐 => 돐 +B3D1 1103 1169 11B4 # 돑 => 돑 +B3D2 1103 1169 11B5 # 돒 => 돒 +B3D3 1103 1169 11B6 # 돓 => 돓 +B3D4 1103 1169 11B7 # 돔 => 돔 +B3D5 1103 1169 11B8 # 돕 => 돕 +B3D6 1103 1169 11B9 # 돖 => 돖 +B3D7 1103 1169 11BA # 돗 => 돗 +B3D8 1103 1169 11BB # 돘 => 돘 +B3D9 1103 1169 11BC # 동 => 동 +B3DA 1103 1169 11BD # 돚 => 돚 +B3DB 1103 1169 11BE # 돛 => 돛 +B3DC 1103 1169 11BF # 돜 => 돜 +B3DD 1103 1169 11C0 # 돝 => 돝 +B3DE 1103 1169 11C1 # 돞 => 돞 +B3DF 1103 1169 11C2 # 돟 => 돟 +B3E0 1103 116A # 돠 => 돠 +B3E1 1103 116A 11A8 # 돡 => 돡 +B3E2 1103 116A 11A9 # 돢 => 돢 +B3E3 1103 116A 11AA # 돣 => 돣 +B3E4 1103 116A 11AB # 돤 => 돤 +B3E5 1103 116A 11AC # 돥 => 돥 +B3E6 1103 116A 11AD # 돦 => 돦 +B3E7 1103 116A 11AE # 돧 => 돧 +B3E8 1103 116A 11AF # 돨 => 돨 +B3E9 1103 116A 11B0 # 돩 => 돩 +B3EA 1103 116A 11B1 # 돪 => 돪 +B3EB 1103 116A 11B2 # 돫 => 돫 +B3EC 1103 116A 11B3 # 돬 => 돬 +B3ED 1103 116A 11B4 # 돭 => 돭 +B3EE 1103 116A 11B5 # 돮 => 돮 +B3EF 1103 116A 11B6 # 돯 => 돯 +B3F0 1103 116A 11B7 # 돰 => 돰 +B3F1 1103 116A 11B8 # 돱 => 돱 +B3F2 1103 116A 11B9 # 돲 => 돲 +B3F3 1103 116A 11BA # 돳 => 돳 +B3F4 1103 116A 11BB # 돴 => 돴 +B3F5 1103 116A 11BC # 돵 => 돵 +B3F6 1103 116A 11BD # 돶 => 돶 +B3F7 1103 116A 11BE # 돷 => 돷 +B3F8 1103 116A 11BF # 돸 => 돸 +B3F9 1103 116A 11C0 # 돹 => 돹 +B3FA 1103 116A 11C1 # 돺 => 돺 +B3FB 1103 116A 11C2 # 돻 => 돻 +B3FC 1103 116B # 돼 => 돼 +B3FD 1103 116B 11A8 # 돽 => 돽 +B3FE 1103 116B 11A9 # 돾 => 돾 +B3FF 1103 116B 11AA # 돿 => 돿 +B400 1103 116B 11AB # 됀 => 됀 +B401 1103 116B 11AC # 됁 => 됁 +B402 1103 116B 11AD # 됂 => 됂 +B403 1103 116B 11AE # 됃 => 됃 +B404 1103 116B 11AF # 됄 => 됄 +B405 1103 116B 11B0 # 됅 => 됅 +B406 1103 116B 11B1 # 됆 => 됆 +B407 1103 116B 11B2 # 됇 => 됇 +B408 1103 116B 11B3 # 됈 => 됈 +B409 1103 116B 11B4 # 됉 => 됉 +B40A 1103 116B 11B5 # 됊 => 됊 +B40B 1103 116B 11B6 # 됋 => 됋 +B40C 1103 116B 11B7 # 됌 => 됌 +B40D 1103 116B 11B8 # 됍 => 됍 +B40E 1103 116B 11B9 # 됎 => 됎 +B40F 1103 116B 11BA # 됏 => 됏 +B410 1103 116B 11BB # 됐 => 됐 +B411 1103 116B 11BC # 됑 => 됑 +B412 1103 116B 11BD # 됒 => 됒 +B413 1103 116B 11BE # 됓 => 됓 +B414 1103 116B 11BF # 됔 => 됔 +B415 1103 116B 11C0 # 됕 => 됕 +B416 1103 116B 11C1 # 됖 => 됖 +B417 1103 116B 11C2 # 됗 => 됗 +B418 1103 116C # 되 => 되 +B419 1103 116C 11A8 # 됙 => 됙 +B41A 1103 116C 11A9 # 됚 => 됚 +B41B 1103 116C 11AA # 됛 => 됛 +B41C 1103 116C 11AB # 된 => 된 +B41D 1103 116C 11AC # 됝 => 됝 +B41E 1103 116C 11AD # 됞 => 됞 +B41F 1103 116C 11AE # 됟 => 됟 +B420 1103 116C 11AF # 될 => 될 +B421 1103 116C 11B0 # 됡 => 됡 +B422 1103 116C 11B1 # 됢 => 됢 +B423 1103 116C 11B2 # 됣 => 됣 +B424 1103 116C 11B3 # 됤 => 됤 +B425 1103 116C 11B4 # 됥 => 됥 +B426 1103 116C 11B5 # 됦 => 됦 +B427 1103 116C 11B6 # 됧 => 됧 +B428 1103 116C 11B7 # 됨 => 됨 +B429 1103 116C 11B8 # 됩 => 됩 +B42A 1103 116C 11B9 # 됪 => 됪 +B42B 1103 116C 11BA # 됫 => 됫 +B42C 1103 116C 11BB # 됬 => 됬 +B42D 1103 116C 11BC # 됭 => 됭 +B42E 1103 116C 11BD # 됮 => 됮 +B42F 1103 116C 11BE # 됯 => 됯 +B430 1103 116C 11BF # 됰 => 됰 +B431 1103 116C 11C0 # 됱 => 됱 +B432 1103 116C 11C1 # 됲 => 됲 +B433 1103 116C 11C2 # 됳 => 됳 +B434 1103 116D # 됴 => 됴 +B435 1103 116D 11A8 # 됵 => 됵 +B436 1103 116D 11A9 # 됶 => 됶 +B437 1103 116D 11AA # 됷 => 됷 +B438 1103 116D 11AB # 됸 => 됸 +B439 1103 116D 11AC # 됹 => 됹 +B43A 1103 116D 11AD # 됺 => 됺 +B43B 1103 116D 11AE # 됻 => 됻 +B43C 1103 116D 11AF # 됼 => 됼 +B43D 1103 116D 11B0 # 됽 => 됽 +B43E 1103 116D 11B1 # 됾 => 됾 +B43F 1103 116D 11B2 # 됿 => 됿 +B440 1103 116D 11B3 # 둀 => 둀 +B441 1103 116D 11B4 # 둁 => 둁 +B442 1103 116D 11B5 # 둂 => 둂 +B443 1103 116D 11B6 # 둃 => 둃 +B444 1103 116D 11B7 # 둄 => 둄 +B445 1103 116D 11B8 # 둅 => 둅 +B446 1103 116D 11B9 # 둆 => 둆 +B447 1103 116D 11BA # 둇 => 둇 +B448 1103 116D 11BB # 둈 => 둈 +B449 1103 116D 11BC # 둉 => 둉 +B44A 1103 116D 11BD # 둊 => 둊 +B44B 1103 116D 11BE # 둋 => 둋 +B44C 1103 116D 11BF # 둌 => 둌 +B44D 1103 116D 11C0 # 둍 => 둍 +B44E 1103 116D 11C1 # 둎 => 둎 +B44F 1103 116D 11C2 # 둏 => 둏 +B450 1103 116E # 두 => 두 +B451 1103 116E 11A8 # 둑 => 둑 +B452 1103 116E 11A9 # 둒 => 둒 +B453 1103 116E 11AA # 둓 => 둓 +B454 1103 116E 11AB # 둔 => 둔 +B455 1103 116E 11AC # 둕 => 둕 +B456 1103 116E 11AD # 둖 => 둖 +B457 1103 116E 11AE # 둗 => 둗 +B458 1103 116E 11AF # 둘 => 둘 +B459 1103 116E 11B0 # 둙 => 둙 +B45A 1103 116E 11B1 # 둚 => 둚 +B45B 1103 116E 11B2 # 둛 => 둛 +B45C 1103 116E 11B3 # 둜 => 둜 +B45D 1103 116E 11B4 # 둝 => 둝 +B45E 1103 116E 11B5 # 둞 => 둞 +B45F 1103 116E 11B6 # 둟 => 둟 +B460 1103 116E 11B7 # 둠 => 둠 +B461 1103 116E 11B8 # 둡 => 둡 +B462 1103 116E 11B9 # 둢 => 둢 +B463 1103 116E 11BA # 둣 => 둣 +B464 1103 116E 11BB # 둤 => 둤 +B465 1103 116E 11BC # 둥 => 둥 +B466 1103 116E 11BD # 둦 => 둦 +B467 1103 116E 11BE # 둧 => 둧 +B468 1103 116E 11BF # 둨 => 둨 +B469 1103 116E 11C0 # 둩 => 둩 +B46A 1103 116E 11C1 # 둪 => 둪 +B46B 1103 116E 11C2 # 둫 => 둫 +B46C 1103 116F # 둬 => 둬 +B46D 1103 116F 11A8 # 둭 => 둭 +B46E 1103 116F 11A9 # 둮 => 둮 +B46F 1103 116F 11AA # 둯 => 둯 +B470 1103 116F 11AB # 둰 => 둰 +B471 1103 116F 11AC # 둱 => 둱 +B472 1103 116F 11AD # 둲 => 둲 +B473 1103 116F 11AE # 둳 => 둳 +B474 1103 116F 11AF # 둴 => 둴 +B475 1103 116F 11B0 # 둵 => 둵 +B476 1103 116F 11B1 # 둶 => 둶 +B477 1103 116F 11B2 # 둷 => 둷 +B478 1103 116F 11B3 # 둸 => 둸 +B479 1103 116F 11B4 # 둹 => 둹 +B47A 1103 116F 11B5 # 둺 => 둺 +B47B 1103 116F 11B6 # 둻 => 둻 +B47C 1103 116F 11B7 # 둼 => 둼 +B47D 1103 116F 11B8 # 둽 => 둽 +B47E 1103 116F 11B9 # 둾 => 둾 +B47F 1103 116F 11BA # 둿 => 둿 +B480 1103 116F 11BB # 뒀 => 뒀 +B481 1103 116F 11BC # 뒁 => 뒁 +B482 1103 116F 11BD # 뒂 => 뒂 +B483 1103 116F 11BE # 뒃 => 뒃 +B484 1103 116F 11BF # 뒄 => 뒄 +B485 1103 116F 11C0 # 뒅 => 뒅 +B486 1103 116F 11C1 # 뒆 => 뒆 +B487 1103 116F 11C2 # 뒇 => 뒇 +B488 1103 1170 # 뒈 => 뒈 +B489 1103 1170 11A8 # 뒉 => 뒉 +B48A 1103 1170 11A9 # 뒊 => 뒊 +B48B 1103 1170 11AA # 뒋 => 뒋 +B48C 1103 1170 11AB # 뒌 => 뒌 +B48D 1103 1170 11AC # 뒍 => 뒍 +B48E 1103 1170 11AD # 뒎 => 뒎 +B48F 1103 1170 11AE # 뒏 => 뒏 +B490 1103 1170 11AF # 뒐 => 뒐 +B491 1103 1170 11B0 # 뒑 => 뒑 +B492 1103 1170 11B1 # 뒒 => 뒒 +B493 1103 1170 11B2 # 뒓 => 뒓 +B494 1103 1170 11B3 # 뒔 => 뒔 +B495 1103 1170 11B4 # 뒕 => 뒕 +B496 1103 1170 11B5 # 뒖 => 뒖 +B497 1103 1170 11B6 # 뒗 => 뒗 +B498 1103 1170 11B7 # 뒘 => 뒘 +B499 1103 1170 11B8 # 뒙 => 뒙 +B49A 1103 1170 11B9 # 뒚 => 뒚 +B49B 1103 1170 11BA # 뒛 => 뒛 +B49C 1103 1170 11BB # 뒜 => 뒜 +B49D 1103 1170 11BC # 뒝 => 뒝 +B49E 1103 1170 11BD # 뒞 => 뒞 +B49F 1103 1170 11BE # 뒟 => 뒟 +B4A0 1103 1170 11BF # 뒠 => 뒠 +B4A1 1103 1170 11C0 # 뒡 => 뒡 +B4A2 1103 1170 11C1 # 뒢 => 뒢 +B4A3 1103 1170 11C2 # 뒣 => 뒣 +B4A4 1103 1171 # 뒤 => 뒤 +B4A5 1103 1171 11A8 # 뒥 => 뒥 +B4A6 1103 1171 11A9 # 뒦 => 뒦 +B4A7 1103 1171 11AA # 뒧 => 뒧 +B4A8 1103 1171 11AB # 뒨 => 뒨 +B4A9 1103 1171 11AC # 뒩 => 뒩 +B4AA 1103 1171 11AD # 뒪 => 뒪 +B4AB 1103 1171 11AE # 뒫 => 뒫 +B4AC 1103 1171 11AF # 뒬 => 뒬 +B4AD 1103 1171 11B0 # 뒭 => 뒭 +B4AE 1103 1171 11B1 # 뒮 => 뒮 +B4AF 1103 1171 11B2 # 뒯 => 뒯 +B4B0 1103 1171 11B3 # 뒰 => 뒰 +B4B1 1103 1171 11B4 # 뒱 => 뒱 +B4B2 1103 1171 11B5 # 뒲 => 뒲 +B4B3 1103 1171 11B6 # 뒳 => 뒳 +B4B4 1103 1171 11B7 # 뒴 => 뒴 +B4B5 1103 1171 11B8 # 뒵 => 뒵 +B4B6 1103 1171 11B9 # 뒶 => 뒶 +B4B7 1103 1171 11BA # 뒷 => 뒷 +B4B8 1103 1171 11BB # 뒸 => 뒸 +B4B9 1103 1171 11BC # 뒹 => 뒹 +B4BA 1103 1171 11BD # 뒺 => 뒺 +B4BB 1103 1171 11BE # 뒻 => 뒻 +B4BC 1103 1171 11BF # 뒼 => 뒼 +B4BD 1103 1171 11C0 # 뒽 => 뒽 +B4BE 1103 1171 11C1 # 뒾 => 뒾 +B4BF 1103 1171 11C2 # 뒿 => 뒿 +B4C0 1103 1172 # 듀 => 듀 +B4C1 1103 1172 11A8 # 듁 => 듁 +B4C2 1103 1172 11A9 # 듂 => 듂 +B4C3 1103 1172 11AA # 듃 => 듃 +B4C4 1103 1172 11AB # 듄 => 듄 +B4C5 1103 1172 11AC # 듅 => 듅 +B4C6 1103 1172 11AD # 듆 => 듆 +B4C7 1103 1172 11AE # 듇 => 듇 +B4C8 1103 1172 11AF # 듈 => 듈 +B4C9 1103 1172 11B0 # 듉 => 듉 +B4CA 1103 1172 11B1 # 듊 => 듊 +B4CB 1103 1172 11B2 # 듋 => 듋 +B4CC 1103 1172 11B3 # 듌 => 듌 +B4CD 1103 1172 11B4 # 듍 => 듍 +B4CE 1103 1172 11B5 # 듎 => 듎 +B4CF 1103 1172 11B6 # 듏 => 듏 +B4D0 1103 1172 11B7 # 듐 => 듐 +B4D1 1103 1172 11B8 # 듑 => 듑 +B4D2 1103 1172 11B9 # 듒 => 듒 +B4D3 1103 1172 11BA # 듓 => 듓 +B4D4 1103 1172 11BB # 듔 => 듔 +B4D5 1103 1172 11BC # 듕 => 듕 +B4D6 1103 1172 11BD # 듖 => 듖 +B4D7 1103 1172 11BE # 듗 => 듗 +B4D8 1103 1172 11BF # 듘 => 듘 +B4D9 1103 1172 11C0 # 듙 => 듙 +B4DA 1103 1172 11C1 # 듚 => 듚 +B4DB 1103 1172 11C2 # 듛 => 듛 +B4DC 1103 1173 # 드 => 드 +B4DD 1103 1173 11A8 # 득 => 득 +B4DE 1103 1173 11A9 # 듞 => 듞 +B4DF 1103 1173 11AA # 듟 => 듟 +B4E0 1103 1173 11AB # 든 => 든 +B4E1 1103 1173 11AC # 듡 => 듡 +B4E2 1103 1173 11AD # 듢 => 듢 +B4E3 1103 1173 11AE # 듣 => 듣 +B4E4 1103 1173 11AF # 들 => 들 +B4E5 1103 1173 11B0 # 듥 => 듥 +B4E6 1103 1173 11B1 # 듦 => 듦 +B4E7 1103 1173 11B2 # 듧 => 듧 +B4E8 1103 1173 11B3 # 듨 => 듨 +B4E9 1103 1173 11B4 # 듩 => 듩 +B4EA 1103 1173 11B5 # 듪 => 듪 +B4EB 1103 1173 11B6 # 듫 => 듫 +B4EC 1103 1173 11B7 # 듬 => 듬 +B4ED 1103 1173 11B8 # 듭 => 듭 +B4EE 1103 1173 11B9 # 듮 => 듮 +B4EF 1103 1173 11BA # 듯 => 듯 +B4F0 1103 1173 11BB # 듰 => 듰 +B4F1 1103 1173 11BC # 등 => 등 +B4F2 1103 1173 11BD # 듲 => 듲 +B4F3 1103 1173 11BE # 듳 => 듳 +B4F4 1103 1173 11BF # 듴 => 듴 +B4F5 1103 1173 11C0 # 듵 => 듵 +B4F6 1103 1173 11C1 # 듶 => 듶 +B4F7 1103 1173 11C2 # 듷 => 듷 +B4F8 1103 1174 # 듸 => 듸 +B4F9 1103 1174 11A8 # 듹 => 듹 +B4FA 1103 1174 11A9 # 듺 => 듺 +B4FB 1103 1174 11AA # 듻 => 듻 +B4FC 1103 1174 11AB # 듼 => 듼 +B4FD 1103 1174 11AC # 듽 => 듽 +B4FE 1103 1174 11AD # 듾 => 듾 +B4FF 1103 1174 11AE # 듿 => 듿 +B500 1103 1174 11AF # 딀 => 딀 +B501 1103 1174 11B0 # 딁 => 딁 +B502 1103 1174 11B1 # 딂 => 딂 +B503 1103 1174 11B2 # 딃 => 딃 +B504 1103 1174 11B3 # 딄 => 딄 +B505 1103 1174 11B4 # 딅 => 딅 +B506 1103 1174 11B5 # 딆 => 딆 +B507 1103 1174 11B6 # 딇 => 딇 +B508 1103 1174 11B7 # 딈 => 딈 +B509 1103 1174 11B8 # 딉 => 딉 +B50A 1103 1174 11B9 # 딊 => 딊 +B50B 1103 1174 11BA # 딋 => 딋 +B50C 1103 1174 11BB # 딌 => 딌 +B50D 1103 1174 11BC # 딍 => 딍 +B50E 1103 1174 11BD # 딎 => 딎 +B50F 1103 1174 11BE # 딏 => 딏 +B510 1103 1174 11BF # 딐 => 딐 +B511 1103 1174 11C0 # 딑 => 딑 +B512 1103 1174 11C1 # 딒 => 딒 +B513 1103 1174 11C2 # 딓 => 딓 +B514 1103 1175 # 디 => 디 +B515 1103 1175 11A8 # 딕 => 딕 +B516 1103 1175 11A9 # 딖 => 딖 +B517 1103 1175 11AA # 딗 => 딗 +B518 1103 1175 11AB # 딘 => 딘 +B519 1103 1175 11AC # 딙 => 딙 +B51A 1103 1175 11AD # 딚 => 딚 +B51B 1103 1175 11AE # 딛 => 딛 +B51C 1103 1175 11AF # 딜 => 딜 +B51D 1103 1175 11B0 # 딝 => 딝 +B51E 1103 1175 11B1 # 딞 => 딞 +B51F 1103 1175 11B2 # 딟 => 딟 +B520 1103 1175 11B3 # 딠 => 딠 +B521 1103 1175 11B4 # 딡 => 딡 +B522 1103 1175 11B5 # 딢 => 딢 +B523 1103 1175 11B6 # 딣 => 딣 +B524 1103 1175 11B7 # 딤 => 딤 +B525 1103 1175 11B8 # 딥 => 딥 +B526 1103 1175 11B9 # 딦 => 딦 +B527 1103 1175 11BA # 딧 => 딧 +B528 1103 1175 11BB # 딨 => 딨 +B529 1103 1175 11BC # 딩 => 딩 +B52A 1103 1175 11BD # 딪 => 딪 +B52B 1103 1175 11BE # 딫 => 딫 +B52C 1103 1175 11BF # 딬 => 딬 +B52D 1103 1175 11C0 # 딭 => 딭 +B52E 1103 1175 11C1 # 딮 => 딮 +B52F 1103 1175 11C2 # 딯 => 딯 +B530 1104 1161 # 따 => 따 +B531 1104 1161 11A8 # 딱 => 딱 +B532 1104 1161 11A9 # 딲 => 딲 +B533 1104 1161 11AA # 딳 => 딳 +B534 1104 1161 11AB # 딴 => 딴 +B535 1104 1161 11AC # 딵 => 딵 +B536 1104 1161 11AD # 딶 => 딶 +B537 1104 1161 11AE # 딷 => 딷 +B538 1104 1161 11AF # 딸 => 딸 +B539 1104 1161 11B0 # 딹 => 딹 +B53A 1104 1161 11B1 # 딺 => 딺 +B53B 1104 1161 11B2 # 딻 => 딻 +B53C 1104 1161 11B3 # 딼 => 딼 +B53D 1104 1161 11B4 # 딽 => 딽 +B53E 1104 1161 11B5 # 딾 => 딾 +B53F 1104 1161 11B6 # 딿 => 딿 +B540 1104 1161 11B7 # 땀 => 땀 +B541 1104 1161 11B8 # 땁 => 땁 +B542 1104 1161 11B9 # 땂 => 땂 +B543 1104 1161 11BA # 땃 => 땃 +B544 1104 1161 11BB # 땄 => 땄 +B545 1104 1161 11BC # 땅 => 땅 +B546 1104 1161 11BD # 땆 => 땆 +B547 1104 1161 11BE # 땇 => 땇 +B548 1104 1161 11BF # 땈 => 땈 +B549 1104 1161 11C0 # 땉 => 땉 +B54A 1104 1161 11C1 # 땊 => 땊 +B54B 1104 1161 11C2 # 땋 => 땋 +B54C 1104 1162 # 때 => 때 +B54D 1104 1162 11A8 # 땍 => 땍 +B54E 1104 1162 11A9 # 땎 => 땎 +B54F 1104 1162 11AA # 땏 => 땏 +B550 1104 1162 11AB # 땐 => 땐 +B551 1104 1162 11AC # 땑 => 땑 +B552 1104 1162 11AD # 땒 => 땒 +B553 1104 1162 11AE # 땓 => 땓 +B554 1104 1162 11AF # 땔 => 땔 +B555 1104 1162 11B0 # 땕 => 땕 +B556 1104 1162 11B1 # 땖 => 땖 +B557 1104 1162 11B2 # 땗 => 땗 +B558 1104 1162 11B3 # 땘 => 땘 +B559 1104 1162 11B4 # 땙 => 땙 +B55A 1104 1162 11B5 # 땚 => 땚 +B55B 1104 1162 11B6 # 땛 => 땛 +B55C 1104 1162 11B7 # 땜 => 땜 +B55D 1104 1162 11B8 # 땝 => 땝 +B55E 1104 1162 11B9 # 땞 => 땞 +B55F 1104 1162 11BA # 땟 => 땟 +B560 1104 1162 11BB # 땠 => 땠 +B561 1104 1162 11BC # 땡 => 땡 +B562 1104 1162 11BD # 땢 => 땢 +B563 1104 1162 11BE # 땣 => 땣 +B564 1104 1162 11BF # 땤 => 땤 +B565 1104 1162 11C0 # 땥 => 땥 +B566 1104 1162 11C1 # 땦 => 땦 +B567 1104 1162 11C2 # 땧 => 땧 +B568 1104 1163 # 땨 => 땨 +B569 1104 1163 11A8 # 땩 => 땩 +B56A 1104 1163 11A9 # 땪 => 땪 +B56B 1104 1163 11AA # 땫 => 땫 +B56C 1104 1163 11AB # 땬 => 땬 +B56D 1104 1163 11AC # 땭 => 땭 +B56E 1104 1163 11AD # 땮 => 땮 +B56F 1104 1163 11AE # 땯 => 땯 +B570 1104 1163 11AF # 땰 => 땰 +B571 1104 1163 11B0 # 땱 => 땱 +B572 1104 1163 11B1 # 땲 => 땲 +B573 1104 1163 11B2 # 땳 => 땳 +B574 1104 1163 11B3 # 땴 => 땴 +B575 1104 1163 11B4 # 땵 => 땵 +B576 1104 1163 11B5 # 땶 => 땶 +B577 1104 1163 11B6 # 땷 => 땷 +B578 1104 1163 11B7 # 땸 => 땸 +B579 1104 1163 11B8 # 땹 => 땹 +B57A 1104 1163 11B9 # 땺 => 땺 +B57B 1104 1163 11BA # 땻 => 땻 +B57C 1104 1163 11BB # 땼 => 땼 +B57D 1104 1163 11BC # 땽 => 땽 +B57E 1104 1163 11BD # 땾 => 땾 +B57F 1104 1163 11BE # 땿 => 땿 +B580 1104 1163 11BF # 떀 => 떀 +B581 1104 1163 11C0 # 떁 => 떁 +B582 1104 1163 11C1 # 떂 => 떂 +B583 1104 1163 11C2 # 떃 => 떃 +B584 1104 1164 # 떄 => 떄 +B585 1104 1164 11A8 # 떅 => 떅 +B586 1104 1164 11A9 # 떆 => 떆 +B587 1104 1164 11AA # 떇 => 떇 +B588 1104 1164 11AB # 떈 => 떈 +B589 1104 1164 11AC # 떉 => 떉 +B58A 1104 1164 11AD # 떊 => 떊 +B58B 1104 1164 11AE # 떋 => 떋 +B58C 1104 1164 11AF # 떌 => 떌 +B58D 1104 1164 11B0 # 떍 => 떍 +B58E 1104 1164 11B1 # 떎 => 떎 +B58F 1104 1164 11B2 # 떏 => 떏 +B590 1104 1164 11B3 # 떐 => 떐 +B591 1104 1164 11B4 # 떑 => 떑 +B592 1104 1164 11B5 # 떒 => 떒 +B593 1104 1164 11B6 # 떓 => 떓 +B594 1104 1164 11B7 # 떔 => 떔 +B595 1104 1164 11B8 # 떕 => 떕 +B596 1104 1164 11B9 # 떖 => 떖 +B597 1104 1164 11BA # 떗 => 떗 +B598 1104 1164 11BB # 떘 => 떘 +B599 1104 1164 11BC # 떙 => 떙 +B59A 1104 1164 11BD # 떚 => 떚 +B59B 1104 1164 11BE # 떛 => 떛 +B59C 1104 1164 11BF # 떜 => 떜 +B59D 1104 1164 11C0 # 떝 => 떝 +B59E 1104 1164 11C1 # 떞 => 떞 +B59F 1104 1164 11C2 # 떟 => 떟 +B5A0 1104 1165 # 떠 => 떠 +B5A1 1104 1165 11A8 # 떡 => 떡 +B5A2 1104 1165 11A9 # 떢 => 떢 +B5A3 1104 1165 11AA # 떣 => 떣 +B5A4 1104 1165 11AB # 떤 => 떤 +B5A5 1104 1165 11AC # 떥 => 떥 +B5A6 1104 1165 11AD # 떦 => 떦 +B5A7 1104 1165 11AE # 떧 => 떧 +B5A8 1104 1165 11AF # 떨 => 떨 +B5A9 1104 1165 11B0 # 떩 => 떩 +B5AA 1104 1165 11B1 # 떪 => 떪 +B5AB 1104 1165 11B2 # 떫 => 떫 +B5AC 1104 1165 11B3 # 떬 => 떬 +B5AD 1104 1165 11B4 # 떭 => 떭 +B5AE 1104 1165 11B5 # 떮 => 떮 +B5AF 1104 1165 11B6 # 떯 => 떯 +B5B0 1104 1165 11B7 # 떰 => 떰 +B5B1 1104 1165 11B8 # 떱 => 떱 +B5B2 1104 1165 11B9 # 떲 => 떲 +B5B3 1104 1165 11BA # 떳 => 떳 +B5B4 1104 1165 11BB # 떴 => 떴 +B5B5 1104 1165 11BC # 떵 => 떵 +B5B6 1104 1165 11BD # 떶 => 떶 +B5B7 1104 1165 11BE # 떷 => 떷 +B5B8 1104 1165 11BF # 떸 => 떸 +B5B9 1104 1165 11C0 # 떹 => 떹 +B5BA 1104 1165 11C1 # 떺 => 떺 +B5BB 1104 1165 11C2 # 떻 => 떻 +B5BC 1104 1166 # 떼 => 떼 +B5BD 1104 1166 11A8 # 떽 => 떽 +B5BE 1104 1166 11A9 # 떾 => 떾 +B5BF 1104 1166 11AA # 떿 => 떿 +B5C0 1104 1166 11AB # 뗀 => 뗀 +B5C1 1104 1166 11AC # 뗁 => 뗁 +B5C2 1104 1166 11AD # 뗂 => 뗂 +B5C3 1104 1166 11AE # 뗃 => 뗃 +B5C4 1104 1166 11AF # 뗄 => 뗄 +B5C5 1104 1166 11B0 # 뗅 => 뗅 +B5C6 1104 1166 11B1 # 뗆 => 뗆 +B5C7 1104 1166 11B2 # 뗇 => 뗇 +B5C8 1104 1166 11B3 # 뗈 => 뗈 +B5C9 1104 1166 11B4 # 뗉 => 뗉 +B5CA 1104 1166 11B5 # 뗊 => 뗊 +B5CB 1104 1166 11B6 # 뗋 => 뗋 +B5CC 1104 1166 11B7 # 뗌 => 뗌 +B5CD 1104 1166 11B8 # 뗍 => 뗍 +B5CE 1104 1166 11B9 # 뗎 => 뗎 +B5CF 1104 1166 11BA # 뗏 => 뗏 +B5D0 1104 1166 11BB # 뗐 => 뗐 +B5D1 1104 1166 11BC # 뗑 => 뗑 +B5D2 1104 1166 11BD # 뗒 => 뗒 +B5D3 1104 1166 11BE # 뗓 => 뗓 +B5D4 1104 1166 11BF # 뗔 => 뗔 +B5D5 1104 1166 11C0 # 뗕 => 뗕 +B5D6 1104 1166 11C1 # 뗖 => 뗖 +B5D7 1104 1166 11C2 # 뗗 => 뗗 +B5D8 1104 1167 # 뗘 => 뗘 +B5D9 1104 1167 11A8 # 뗙 => 뗙 +B5DA 1104 1167 11A9 # 뗚 => 뗚 +B5DB 1104 1167 11AA # 뗛 => 뗛 +B5DC 1104 1167 11AB # 뗜 => 뗜 +B5DD 1104 1167 11AC # 뗝 => 뗝 +B5DE 1104 1167 11AD # 뗞 => 뗞 +B5DF 1104 1167 11AE # 뗟 => 뗟 +B5E0 1104 1167 11AF # 뗠 => 뗠 +B5E1 1104 1167 11B0 # 뗡 => 뗡 +B5E2 1104 1167 11B1 # 뗢 => 뗢 +B5E3 1104 1167 11B2 # 뗣 => 뗣 +B5E4 1104 1167 11B3 # 뗤 => 뗤 +B5E5 1104 1167 11B4 # 뗥 => 뗥 +B5E6 1104 1167 11B5 # 뗦 => 뗦 +B5E7 1104 1167 11B6 # 뗧 => 뗧 +B5E8 1104 1167 11B7 # 뗨 => 뗨 +B5E9 1104 1167 11B8 # 뗩 => 뗩 +B5EA 1104 1167 11B9 # 뗪 => 뗪 +B5EB 1104 1167 11BA # 뗫 => 뗫 +B5EC 1104 1167 11BB # 뗬 => 뗬 +B5ED 1104 1167 11BC # 뗭 => 뗭 +B5EE 1104 1167 11BD # 뗮 => 뗮 +B5EF 1104 1167 11BE # 뗯 => 뗯 +B5F0 1104 1167 11BF # 뗰 => 뗰 +B5F1 1104 1167 11C0 # 뗱 => 뗱 +B5F2 1104 1167 11C1 # 뗲 => 뗲 +B5F3 1104 1167 11C2 # 뗳 => 뗳 +B5F4 1104 1168 # 뗴 => 뗴 +B5F5 1104 1168 11A8 # 뗵 => 뗵 +B5F6 1104 1168 11A9 # 뗶 => 뗶 +B5F7 1104 1168 11AA # 뗷 => 뗷 +B5F8 1104 1168 11AB # 뗸 => 뗸 +B5F9 1104 1168 11AC # 뗹 => 뗹 +B5FA 1104 1168 11AD # 뗺 => 뗺 +B5FB 1104 1168 11AE # 뗻 => 뗻 +B5FC 1104 1168 11AF # 뗼 => 뗼 +B5FD 1104 1168 11B0 # 뗽 => 뗽 +B5FE 1104 1168 11B1 # 뗾 => 뗾 +B5FF 1104 1168 11B2 # 뗿 => 뗿 +B600 1104 1168 11B3 # 똀 => 똀 +B601 1104 1168 11B4 # 똁 => 똁 +B602 1104 1168 11B5 # 똂 => 똂 +B603 1104 1168 11B6 # 똃 => 똃 +B604 1104 1168 11B7 # 똄 => 똄 +B605 1104 1168 11B8 # 똅 => 똅 +B606 1104 1168 11B9 # 똆 => 똆 +B607 1104 1168 11BA # 똇 => 똇 +B608 1104 1168 11BB # 똈 => 똈 +B609 1104 1168 11BC # 똉 => 똉 +B60A 1104 1168 11BD # 똊 => 똊 +B60B 1104 1168 11BE # 똋 => 똋 +B60C 1104 1168 11BF # 똌 => 똌 +B60D 1104 1168 11C0 # 똍 => 똍 +B60E 1104 1168 11C1 # 똎 => 똎 +B60F 1104 1168 11C2 # 똏 => 똏 +B610 1104 1169 # 또 => 또 +B611 1104 1169 11A8 # 똑 => 똑 +B612 1104 1169 11A9 # 똒 => 똒 +B613 1104 1169 11AA # 똓 => 똓 +B614 1104 1169 11AB # 똔 => 똔 +B615 1104 1169 11AC # 똕 => 똕 +B616 1104 1169 11AD # 똖 => 똖 +B617 1104 1169 11AE # 똗 => 똗 +B618 1104 1169 11AF # 똘 => 똘 +B619 1104 1169 11B0 # 똙 => 똙 +B61A 1104 1169 11B1 # 똚 => 똚 +B61B 1104 1169 11B2 # 똛 => 똛 +B61C 1104 1169 11B3 # 똜 => 똜 +B61D 1104 1169 11B4 # 똝 => 똝 +B61E 1104 1169 11B5 # 똞 => 똞 +B61F 1104 1169 11B6 # 똟 => 똟 +B620 1104 1169 11B7 # 똠 => 똠 +B621 1104 1169 11B8 # 똡 => 똡 +B622 1104 1169 11B9 # 똢 => 똢 +B623 1104 1169 11BA # 똣 => 똣 +B624 1104 1169 11BB # 똤 => 똤 +B625 1104 1169 11BC # 똥 => 똥 +B626 1104 1169 11BD # 똦 => 똦 +B627 1104 1169 11BE # 똧 => 똧 +B628 1104 1169 11BF # 똨 => 똨 +B629 1104 1169 11C0 # 똩 => 똩 +B62A 1104 1169 11C1 # 똪 => 똪 +B62B 1104 1169 11C2 # 똫 => 똫 +B62C 1104 116A # 똬 => 똬 +B62D 1104 116A 11A8 # 똭 => 똭 +B62E 1104 116A 11A9 # 똮 => 똮 +B62F 1104 116A 11AA # 똯 => 똯 +B630 1104 116A 11AB # 똰 => 똰 +B631 1104 116A 11AC # 똱 => 똱 +B632 1104 116A 11AD # 똲 => 똲 +B633 1104 116A 11AE # 똳 => 똳 +B634 1104 116A 11AF # 똴 => 똴 +B635 1104 116A 11B0 # 똵 => 똵 +B636 1104 116A 11B1 # 똶 => 똶 +B637 1104 116A 11B2 # 똷 => 똷 +B638 1104 116A 11B3 # 똸 => 똸 +B639 1104 116A 11B4 # 똹 => 똹 +B63A 1104 116A 11B5 # 똺 => 똺 +B63B 1104 116A 11B6 # 똻 => 똻 +B63C 1104 116A 11B7 # 똼 => 똼 +B63D 1104 116A 11B8 # 똽 => 똽 +B63E 1104 116A 11B9 # 똾 => 똾 +B63F 1104 116A 11BA # 똿 => 똿 +B640 1104 116A 11BB # 뙀 => 뙀 +B641 1104 116A 11BC # 뙁 => 뙁 +B642 1104 116A 11BD # 뙂 => 뙂 +B643 1104 116A 11BE # 뙃 => 뙃 +B644 1104 116A 11BF # 뙄 => 뙄 +B645 1104 116A 11C0 # 뙅 => 뙅 +B646 1104 116A 11C1 # 뙆 => 뙆 +B647 1104 116A 11C2 # 뙇 => 뙇 +B648 1104 116B # 뙈 => 뙈 +B649 1104 116B 11A8 # 뙉 => 뙉 +B64A 1104 116B 11A9 # 뙊 => 뙊 +B64B 1104 116B 11AA # 뙋 => 뙋 +B64C 1104 116B 11AB # 뙌 => 뙌 +B64D 1104 116B 11AC # 뙍 => 뙍 +B64E 1104 116B 11AD # 뙎 => 뙎 +B64F 1104 116B 11AE # 뙏 => 뙏 +B650 1104 116B 11AF # 뙐 => 뙐 +B651 1104 116B 11B0 # 뙑 => 뙑 +B652 1104 116B 11B1 # 뙒 => 뙒 +B653 1104 116B 11B2 # 뙓 => 뙓 +B654 1104 116B 11B3 # 뙔 => 뙔 +B655 1104 116B 11B4 # 뙕 => 뙕 +B656 1104 116B 11B5 # 뙖 => 뙖 +B657 1104 116B 11B6 # 뙗 => 뙗 +B658 1104 116B 11B7 # 뙘 => 뙘 +B659 1104 116B 11B8 # 뙙 => 뙙 +B65A 1104 116B 11B9 # 뙚 => 뙚 +B65B 1104 116B 11BA # 뙛 => 뙛 +B65C 1104 116B 11BB # 뙜 => 뙜 +B65D 1104 116B 11BC # 뙝 => 뙝 +B65E 1104 116B 11BD # 뙞 => 뙞 +B65F 1104 116B 11BE # 뙟 => 뙟 +B660 1104 116B 11BF # 뙠 => 뙠 +B661 1104 116B 11C0 # 뙡 => 뙡 +B662 1104 116B 11C1 # 뙢 => 뙢 +B663 1104 116B 11C2 # 뙣 => 뙣 +B664 1104 116C # 뙤 => 뙤 +B665 1104 116C 11A8 # 뙥 => 뙥 +B666 1104 116C 11A9 # 뙦 => 뙦 +B667 1104 116C 11AA # 뙧 => 뙧 +B668 1104 116C 11AB # 뙨 => 뙨 +B669 1104 116C 11AC # 뙩 => 뙩 +B66A 1104 116C 11AD # 뙪 => 뙪 +B66B 1104 116C 11AE # 뙫 => 뙫 +B66C 1104 116C 11AF # 뙬 => 뙬 +B66D 1104 116C 11B0 # 뙭 => 뙭 +B66E 1104 116C 11B1 # 뙮 => 뙮 +B66F 1104 116C 11B2 # 뙯 => 뙯 +B670 1104 116C 11B3 # 뙰 => 뙰 +B671 1104 116C 11B4 # 뙱 => 뙱 +B672 1104 116C 11B5 # 뙲 => 뙲 +B673 1104 116C 11B6 # 뙳 => 뙳 +B674 1104 116C 11B7 # 뙴 => 뙴 +B675 1104 116C 11B8 # 뙵 => 뙵 +B676 1104 116C 11B9 # 뙶 => 뙶 +B677 1104 116C 11BA # 뙷 => 뙷 +B678 1104 116C 11BB # 뙸 => 뙸 +B679 1104 116C 11BC # 뙹 => 뙹 +B67A 1104 116C 11BD # 뙺 => 뙺 +B67B 1104 116C 11BE # 뙻 => 뙻 +B67C 1104 116C 11BF # 뙼 => 뙼 +B67D 1104 116C 11C0 # 뙽 => 뙽 +B67E 1104 116C 11C1 # 뙾 => 뙾 +B67F 1104 116C 11C2 # 뙿 => 뙿 +B680 1104 116D # 뚀 => 뚀 +B681 1104 116D 11A8 # 뚁 => 뚁 +B682 1104 116D 11A9 # 뚂 => 뚂 +B683 1104 116D 11AA # 뚃 => 뚃 +B684 1104 116D 11AB # 뚄 => 뚄 +B685 1104 116D 11AC # 뚅 => 뚅 +B686 1104 116D 11AD # 뚆 => 뚆 +B687 1104 116D 11AE # 뚇 => 뚇 +B688 1104 116D 11AF # 뚈 => 뚈 +B689 1104 116D 11B0 # 뚉 => 뚉 +B68A 1104 116D 11B1 # 뚊 => 뚊 +B68B 1104 116D 11B2 # 뚋 => 뚋 +B68C 1104 116D 11B3 # 뚌 => 뚌 +B68D 1104 116D 11B4 # 뚍 => 뚍 +B68E 1104 116D 11B5 # 뚎 => 뚎 +B68F 1104 116D 11B6 # 뚏 => 뚏 +B690 1104 116D 11B7 # 뚐 => 뚐 +B691 1104 116D 11B8 # 뚑 => 뚑 +B692 1104 116D 11B9 # 뚒 => 뚒 +B693 1104 116D 11BA # 뚓 => 뚓 +B694 1104 116D 11BB # 뚔 => 뚔 +B695 1104 116D 11BC # 뚕 => 뚕 +B696 1104 116D 11BD # 뚖 => 뚖 +B697 1104 116D 11BE # 뚗 => 뚗 +B698 1104 116D 11BF # 뚘 => 뚘 +B699 1104 116D 11C0 # 뚙 => 뚙 +B69A 1104 116D 11C1 # 뚚 => 뚚 +B69B 1104 116D 11C2 # 뚛 => 뚛 +B69C 1104 116E # 뚜 => 뚜 +B69D 1104 116E 11A8 # 뚝 => 뚝 +B69E 1104 116E 11A9 # 뚞 => 뚞 +B69F 1104 116E 11AA # 뚟 => 뚟 +B6A0 1104 116E 11AB # 뚠 => 뚠 +B6A1 1104 116E 11AC # 뚡 => 뚡 +B6A2 1104 116E 11AD # 뚢 => 뚢 +B6A3 1104 116E 11AE # 뚣 => 뚣 +B6A4 1104 116E 11AF # 뚤 => 뚤 +B6A5 1104 116E 11B0 # 뚥 => 뚥 +B6A6 1104 116E 11B1 # 뚦 => 뚦 +B6A7 1104 116E 11B2 # 뚧 => 뚧 +B6A8 1104 116E 11B3 # 뚨 => 뚨 +B6A9 1104 116E 11B4 # 뚩 => 뚩 +B6AA 1104 116E 11B5 # 뚪 => 뚪 +B6AB 1104 116E 11B6 # 뚫 => 뚫 +B6AC 1104 116E 11B7 # 뚬 => 뚬 +B6AD 1104 116E 11B8 # 뚭 => 뚭 +B6AE 1104 116E 11B9 # 뚮 => 뚮 +B6AF 1104 116E 11BA # 뚯 => 뚯 +B6B0 1104 116E 11BB # 뚰 => 뚰 +B6B1 1104 116E 11BC # 뚱 => 뚱 +B6B2 1104 116E 11BD # 뚲 => 뚲 +B6B3 1104 116E 11BE # 뚳 => 뚳 +B6B4 1104 116E 11BF # 뚴 => 뚴 +B6B5 1104 116E 11C0 # 뚵 => 뚵 +B6B6 1104 116E 11C1 # 뚶 => 뚶 +B6B7 1104 116E 11C2 # 뚷 => 뚷 +B6B8 1104 116F # 뚸 => 뚸 +B6B9 1104 116F 11A8 # 뚹 => 뚹 +B6BA 1104 116F 11A9 # 뚺 => 뚺 +B6BB 1104 116F 11AA # 뚻 => 뚻 +B6BC 1104 116F 11AB # 뚼 => 뚼 +B6BD 1104 116F 11AC # 뚽 => 뚽 +B6BE 1104 116F 11AD # 뚾 => 뚾 +B6BF 1104 116F 11AE # 뚿 => 뚿 +B6C0 1104 116F 11AF # 뛀 => 뛀 +B6C1 1104 116F 11B0 # 뛁 => 뛁 +B6C2 1104 116F 11B1 # 뛂 => 뛂 +B6C3 1104 116F 11B2 # 뛃 => 뛃 +B6C4 1104 116F 11B3 # 뛄 => 뛄 +B6C5 1104 116F 11B4 # 뛅 => 뛅 +B6C6 1104 116F 11B5 # 뛆 => 뛆 +B6C7 1104 116F 11B6 # 뛇 => 뛇 +B6C8 1104 116F 11B7 # 뛈 => 뛈 +B6C9 1104 116F 11B8 # 뛉 => 뛉 +B6CA 1104 116F 11B9 # 뛊 => 뛊 +B6CB 1104 116F 11BA # 뛋 => 뛋 +B6CC 1104 116F 11BB # 뛌 => 뛌 +B6CD 1104 116F 11BC # 뛍 => 뛍 +B6CE 1104 116F 11BD # 뛎 => 뛎 +B6CF 1104 116F 11BE # 뛏 => 뛏 +B6D0 1104 116F 11BF # 뛐 => 뛐 +B6D1 1104 116F 11C0 # 뛑 => 뛑 +B6D2 1104 116F 11C1 # 뛒 => 뛒 +B6D3 1104 116F 11C2 # 뛓 => 뛓 +B6D4 1104 1170 # 뛔 => 뛔 +B6D5 1104 1170 11A8 # 뛕 => 뛕 +B6D6 1104 1170 11A9 # 뛖 => 뛖 +B6D7 1104 1170 11AA # 뛗 => 뛗 +B6D8 1104 1170 11AB # 뛘 => 뛘 +B6D9 1104 1170 11AC # 뛙 => 뛙 +B6DA 1104 1170 11AD # 뛚 => 뛚 +B6DB 1104 1170 11AE # 뛛 => 뛛 +B6DC 1104 1170 11AF # 뛜 => 뛜 +B6DD 1104 1170 11B0 # 뛝 => 뛝 +B6DE 1104 1170 11B1 # 뛞 => 뛞 +B6DF 1104 1170 11B2 # 뛟 => 뛟 +B6E0 1104 1170 11B3 # 뛠 => 뛠 +B6E1 1104 1170 11B4 # 뛡 => 뛡 +B6E2 1104 1170 11B5 # 뛢 => 뛢 +B6E3 1104 1170 11B6 # 뛣 => 뛣 +B6E4 1104 1170 11B7 # 뛤 => 뛤 +B6E5 1104 1170 11B8 # 뛥 => 뛥 +B6E6 1104 1170 11B9 # 뛦 => 뛦 +B6E7 1104 1170 11BA # 뛧 => 뛧 +B6E8 1104 1170 11BB # 뛨 => 뛨 +B6E9 1104 1170 11BC # 뛩 => 뛩 +B6EA 1104 1170 11BD # 뛪 => 뛪 +B6EB 1104 1170 11BE # 뛫 => 뛫 +B6EC 1104 1170 11BF # 뛬 => 뛬 +B6ED 1104 1170 11C0 # 뛭 => 뛭 +B6EE 1104 1170 11C1 # 뛮 => 뛮 +B6EF 1104 1170 11C2 # 뛯 => 뛯 +B6F0 1104 1171 # 뛰 => 뛰 +B6F1 1104 1171 11A8 # 뛱 => 뛱 +B6F2 1104 1171 11A9 # 뛲 => 뛲 +B6F3 1104 1171 11AA # 뛳 => 뛳 +B6F4 1104 1171 11AB # 뛴 => 뛴 +B6F5 1104 1171 11AC # 뛵 => 뛵 +B6F6 1104 1171 11AD # 뛶 => 뛶 +B6F7 1104 1171 11AE # 뛷 => 뛷 +B6F8 1104 1171 11AF # 뛸 => 뛸 +B6F9 1104 1171 11B0 # 뛹 => 뛹 +B6FA 1104 1171 11B1 # 뛺 => 뛺 +B6FB 1104 1171 11B2 # 뛻 => 뛻 +B6FC 1104 1171 11B3 # 뛼 => 뛼 +B6FD 1104 1171 11B4 # 뛽 => 뛽 +B6FE 1104 1171 11B5 # 뛾 => 뛾 +B6FF 1104 1171 11B6 # 뛿 => 뛿 +B700 1104 1171 11B7 # 뜀 => 뜀 +B701 1104 1171 11B8 # 뜁 => 뜁 +B702 1104 1171 11B9 # 뜂 => 뜂 +B703 1104 1171 11BA # 뜃 => 뜃 +B704 1104 1171 11BB # 뜄 => 뜄 +B705 1104 1171 11BC # 뜅 => 뜅 +B706 1104 1171 11BD # 뜆 => 뜆 +B707 1104 1171 11BE # 뜇 => 뜇 +B708 1104 1171 11BF # 뜈 => 뜈 +B709 1104 1171 11C0 # 뜉 => 뜉 +B70A 1104 1171 11C1 # 뜊 => 뜊 +B70B 1104 1171 11C2 # 뜋 => 뜋 +B70C 1104 1172 # 뜌 => 뜌 +B70D 1104 1172 11A8 # 뜍 => 뜍 +B70E 1104 1172 11A9 # 뜎 => 뜎 +B70F 1104 1172 11AA # 뜏 => 뜏 +B710 1104 1172 11AB # 뜐 => 뜐 +B711 1104 1172 11AC # 뜑 => 뜑 +B712 1104 1172 11AD # 뜒 => 뜒 +B713 1104 1172 11AE # 뜓 => 뜓 +B714 1104 1172 11AF # 뜔 => 뜔 +B715 1104 1172 11B0 # 뜕 => 뜕 +B716 1104 1172 11B1 # 뜖 => 뜖 +B717 1104 1172 11B2 # 뜗 => 뜗 +B718 1104 1172 11B3 # 뜘 => 뜘 +B719 1104 1172 11B4 # 뜙 => 뜙 +B71A 1104 1172 11B5 # 뜚 => 뜚 +B71B 1104 1172 11B6 # 뜛 => 뜛 +B71C 1104 1172 11B7 # 뜜 => 뜜 +B71D 1104 1172 11B8 # 뜝 => 뜝 +B71E 1104 1172 11B9 # 뜞 => 뜞 +B71F 1104 1172 11BA # 뜟 => 뜟 +B720 1104 1172 11BB # 뜠 => 뜠 +B721 1104 1172 11BC # 뜡 => 뜡 +B722 1104 1172 11BD # 뜢 => 뜢 +B723 1104 1172 11BE # 뜣 => 뜣 +B724 1104 1172 11BF # 뜤 => 뜤 +B725 1104 1172 11C0 # 뜥 => 뜥 +B726 1104 1172 11C1 # 뜦 => 뜦 +B727 1104 1172 11C2 # 뜧 => 뜧 +B728 1104 1173 # 뜨 => 뜨 +B729 1104 1173 11A8 # 뜩 => 뜩 +B72A 1104 1173 11A9 # 뜪 => 뜪 +B72B 1104 1173 11AA # 뜫 => 뜫 +B72C 1104 1173 11AB # 뜬 => 뜬 +B72D 1104 1173 11AC # 뜭 => 뜭 +B72E 1104 1173 11AD # 뜮 => 뜮 +B72F 1104 1173 11AE # 뜯 => 뜯 +B730 1104 1173 11AF # 뜰 => 뜰 +B731 1104 1173 11B0 # 뜱 => 뜱 +B732 1104 1173 11B1 # 뜲 => 뜲 +B733 1104 1173 11B2 # 뜳 => 뜳 +B734 1104 1173 11B3 # 뜴 => 뜴 +B735 1104 1173 11B4 # 뜵 => 뜵 +B736 1104 1173 11B5 # 뜶 => 뜶 +B737 1104 1173 11B6 # 뜷 => 뜷 +B738 1104 1173 11B7 # 뜸 => 뜸 +B739 1104 1173 11B8 # 뜹 => 뜹 +B73A 1104 1173 11B9 # 뜺 => 뜺 +B73B 1104 1173 11BA # 뜻 => 뜻 +B73C 1104 1173 11BB # 뜼 => 뜼 +B73D 1104 1173 11BC # 뜽 => 뜽 +B73E 1104 1173 11BD # 뜾 => 뜾 +B73F 1104 1173 11BE # 뜿 => 뜿 +B740 1104 1173 11BF # 띀 => 띀 +B741 1104 1173 11C0 # 띁 => 띁 +B742 1104 1173 11C1 # 띂 => 띂 +B743 1104 1173 11C2 # 띃 => 띃 +B744 1104 1174 # 띄 => 띄 +B745 1104 1174 11A8 # 띅 => 띅 +B746 1104 1174 11A9 # 띆 => 띆 +B747 1104 1174 11AA # 띇 => 띇 +B748 1104 1174 11AB # 띈 => 띈 +B749 1104 1174 11AC # 띉 => 띉 +B74A 1104 1174 11AD # 띊 => 띊 +B74B 1104 1174 11AE # 띋 => 띋 +B74C 1104 1174 11AF # 띌 => 띌 +B74D 1104 1174 11B0 # 띍 => 띍 +B74E 1104 1174 11B1 # 띎 => 띎 +B74F 1104 1174 11B2 # 띏 => 띏 +B750 1104 1174 11B3 # 띐 => 띐 +B751 1104 1174 11B4 # 띑 => 띑 +B752 1104 1174 11B5 # 띒 => 띒 +B753 1104 1174 11B6 # 띓 => 띓 +B754 1104 1174 11B7 # 띔 => 띔 +B755 1104 1174 11B8 # 띕 => 띕 +B756 1104 1174 11B9 # 띖 => 띖 +B757 1104 1174 11BA # 띗 => 띗 +B758 1104 1174 11BB # 띘 => 띘 +B759 1104 1174 11BC # 띙 => 띙 +B75A 1104 1174 11BD # 띚 => 띚 +B75B 1104 1174 11BE # 띛 => 띛 +B75C 1104 1174 11BF # 띜 => 띜 +B75D 1104 1174 11C0 # 띝 => 띝 +B75E 1104 1174 11C1 # 띞 => 띞 +B75F 1104 1174 11C2 # 띟 => 띟 +B760 1104 1175 # 띠 => 띠 +B761 1104 1175 11A8 # 띡 => 띡 +B762 1104 1175 11A9 # 띢 => 띢 +B763 1104 1175 11AA # 띣 => 띣 +B764 1104 1175 11AB # 띤 => 띤 +B765 1104 1175 11AC # 띥 => 띥 +B766 1104 1175 11AD # 띦 => 띦 +B767 1104 1175 11AE # 띧 => 띧 +B768 1104 1175 11AF # 띨 => 띨 +B769 1104 1175 11B0 # 띩 => 띩 +B76A 1104 1175 11B1 # 띪 => 띪 +B76B 1104 1175 11B2 # 띫 => 띫 +B76C 1104 1175 11B3 # 띬 => 띬 +B76D 1104 1175 11B4 # 띭 => 띭 +B76E 1104 1175 11B5 # 띮 => 띮 +B76F 1104 1175 11B6 # 띯 => 띯 +B770 1104 1175 11B7 # 띰 => 띰 +B771 1104 1175 11B8 # 띱 => 띱 +B772 1104 1175 11B9 # 띲 => 띲 +B773 1104 1175 11BA # 띳 => 띳 +B774 1104 1175 11BB # 띴 => 띴 +B775 1104 1175 11BC # 띵 => 띵 +B776 1104 1175 11BD # 띶 => 띶 +B777 1104 1175 11BE # 띷 => 띷 +B778 1104 1175 11BF # 띸 => 띸 +B779 1104 1175 11C0 # 띹 => 띹 +B77A 1104 1175 11C1 # 띺 => 띺 +B77B 1104 1175 11C2 # 띻 => 띻 +B77C 1105 1161 # 라 => 라 +B77D 1105 1161 11A8 # 락 => 락 +B77E 1105 1161 11A9 # 띾 => 띾 +B77F 1105 1161 11AA # 띿 => 띿 +B780 1105 1161 11AB # 란 => 란 +B781 1105 1161 11AC # 랁 => 랁 +B782 1105 1161 11AD # 랂 => 랂 +B783 1105 1161 11AE # 랃 => 랃 +B784 1105 1161 11AF # 랄 => 랄 +B785 1105 1161 11B0 # 랅 => 랅 +B786 1105 1161 11B1 # 랆 => 랆 +B787 1105 1161 11B2 # 랇 => 랇 +B788 1105 1161 11B3 # 랈 => 랈 +B789 1105 1161 11B4 # 랉 => 랉 +B78A 1105 1161 11B5 # 랊 => 랊 +B78B 1105 1161 11B6 # 랋 => 랋 +B78C 1105 1161 11B7 # 람 => 람 +B78D 1105 1161 11B8 # 랍 => 랍 +B78E 1105 1161 11B9 # 랎 => 랎 +B78F 1105 1161 11BA # 랏 => 랏 +B790 1105 1161 11BB # 랐 => 랐 +B791 1105 1161 11BC # 랑 => 랑 +B792 1105 1161 11BD # 랒 => 랒 +B793 1105 1161 11BE # 랓 => 랓 +B794 1105 1161 11BF # 랔 => 랔 +B795 1105 1161 11C0 # 랕 => 랕 +B796 1105 1161 11C1 # 랖 => 랖 +B797 1105 1161 11C2 # 랗 => 랗 +B798 1105 1162 # 래 => 래 +B799 1105 1162 11A8 # 랙 => 랙 +B79A 1105 1162 11A9 # 랚 => 랚 +B79B 1105 1162 11AA # 랛 => 랛 +B79C 1105 1162 11AB # 랜 => 랜 +B79D 1105 1162 11AC # 랝 => 랝 +B79E 1105 1162 11AD # 랞 => 랞 +B79F 1105 1162 11AE # 랟 => 랟 +B7A0 1105 1162 11AF # 랠 => 랠 +B7A1 1105 1162 11B0 # 랡 => 랡 +B7A2 1105 1162 11B1 # 랢 => 랢 +B7A3 1105 1162 11B2 # 랣 => 랣 +B7A4 1105 1162 11B3 # 랤 => 랤 +B7A5 1105 1162 11B4 # 랥 => 랥 +B7A6 1105 1162 11B5 # 랦 => 랦 +B7A7 1105 1162 11B6 # 랧 => 랧 +B7A8 1105 1162 11B7 # 램 => 램 +B7A9 1105 1162 11B8 # 랩 => 랩 +B7AA 1105 1162 11B9 # 랪 => 랪 +B7AB 1105 1162 11BA # 랫 => 랫 +B7AC 1105 1162 11BB # 랬 => 랬 +B7AD 1105 1162 11BC # 랭 => 랭 +B7AE 1105 1162 11BD # 랮 => 랮 +B7AF 1105 1162 11BE # 랯 => 랯 +B7B0 1105 1162 11BF # 랰 => 랰 +B7B1 1105 1162 11C0 # 랱 => 랱 +B7B2 1105 1162 11C1 # 랲 => 랲 +B7B3 1105 1162 11C2 # 랳 => 랳 +B7B4 1105 1163 # 랴 => 랴 +B7B5 1105 1163 11A8 # 략 => 략 +B7B6 1105 1163 11A9 # 랶 => 랶 +B7B7 1105 1163 11AA # 랷 => 랷 +B7B8 1105 1163 11AB # 랸 => 랸 +B7B9 1105 1163 11AC # 랹 => 랹 +B7BA 1105 1163 11AD # 랺 => 랺 +B7BB 1105 1163 11AE # 랻 => 랻 +B7BC 1105 1163 11AF # 랼 => 랼 +B7BD 1105 1163 11B0 # 랽 => 랽 +B7BE 1105 1163 11B1 # 랾 => 랾 +B7BF 1105 1163 11B2 # 랿 => 랿 +B7C0 1105 1163 11B3 # 럀 => 럀 +B7C1 1105 1163 11B4 # 럁 => 럁 +B7C2 1105 1163 11B5 # 럂 => 럂 +B7C3 1105 1163 11B6 # 럃 => 럃 +B7C4 1105 1163 11B7 # 럄 => 럄 +B7C5 1105 1163 11B8 # 럅 => 럅 +B7C6 1105 1163 11B9 # 럆 => 럆 +B7C7 1105 1163 11BA # 럇 => 럇 +B7C8 1105 1163 11BB # 럈 => 럈 +B7C9 1105 1163 11BC # 량 => 량 +B7CA 1105 1163 11BD # 럊 => 럊 +B7CB 1105 1163 11BE # 럋 => 럋 +B7CC 1105 1163 11BF # 럌 => 럌 +B7CD 1105 1163 11C0 # 럍 => 럍 +B7CE 1105 1163 11C1 # 럎 => 럎 +B7CF 1105 1163 11C2 # 럏 => 럏 +B7D0 1105 1164 # 럐 => 럐 +B7D1 1105 1164 11A8 # 럑 => 럑 +B7D2 1105 1164 11A9 # 럒 => 럒 +B7D3 1105 1164 11AA # 럓 => 럓 +B7D4 1105 1164 11AB # 럔 => 럔 +B7D5 1105 1164 11AC # 럕 => 럕 +B7D6 1105 1164 11AD # 럖 => 럖 +B7D7 1105 1164 11AE # 럗 => 럗 +B7D8 1105 1164 11AF # 럘 => 럘 +B7D9 1105 1164 11B0 # 럙 => 럙 +B7DA 1105 1164 11B1 # 럚 => 럚 +B7DB 1105 1164 11B2 # 럛 => 럛 +B7DC 1105 1164 11B3 # 럜 => 럜 +B7DD 1105 1164 11B4 # 럝 => 럝 +B7DE 1105 1164 11B5 # 럞 => 럞 +B7DF 1105 1164 11B6 # 럟 => 럟 +B7E0 1105 1164 11B7 # 럠 => 럠 +B7E1 1105 1164 11B8 # 럡 => 럡 +B7E2 1105 1164 11B9 # 럢 => 럢 +B7E3 1105 1164 11BA # 럣 => 럣 +B7E4 1105 1164 11BB # 럤 => 럤 +B7E5 1105 1164 11BC # 럥 => 럥 +B7E6 1105 1164 11BD # 럦 => 럦 +B7E7 1105 1164 11BE # 럧 => 럧 +B7E8 1105 1164 11BF # 럨 => 럨 +B7E9 1105 1164 11C0 # 럩 => 럩 +B7EA 1105 1164 11C1 # 럪 => 럪 +B7EB 1105 1164 11C2 # 럫 => 럫 +B7EC 1105 1165 # 러 => 러 +B7ED 1105 1165 11A8 # 럭 => 럭 +B7EE 1105 1165 11A9 # 럮 => 럮 +B7EF 1105 1165 11AA # 럯 => 럯 +B7F0 1105 1165 11AB # 런 => 런 +B7F1 1105 1165 11AC # 럱 => 럱 +B7F2 1105 1165 11AD # 럲 => 럲 +B7F3 1105 1165 11AE # 럳 => 럳 +B7F4 1105 1165 11AF # 럴 => 럴 +B7F5 1105 1165 11B0 # 럵 => 럵 +B7F6 1105 1165 11B1 # 럶 => 럶 +B7F7 1105 1165 11B2 # 럷 => 럷 +B7F8 1105 1165 11B3 # 럸 => 럸 +B7F9 1105 1165 11B4 # 럹 => 럹 +B7FA 1105 1165 11B5 # 럺 => 럺 +B7FB 1105 1165 11B6 # 럻 => 럻 +B7FC 1105 1165 11B7 # 럼 => 럼 +B7FD 1105 1165 11B8 # 럽 => 럽 +B7FE 1105 1165 11B9 # 럾 => 럾 +B7FF 1105 1165 11BA # 럿 => 럿 +B800 1105 1165 11BB # 렀 => 렀 +B801 1105 1165 11BC # 렁 => 렁 +B802 1105 1165 11BD # 렂 => 렂 +B803 1105 1165 11BE # 렃 => 렃 +B804 1105 1165 11BF # 렄 => 렄 +B805 1105 1165 11C0 # 렅 => 렅 +B806 1105 1165 11C1 # 렆 => 렆 +B807 1105 1165 11C2 # 렇 => 렇 +B808 1105 1166 # 레 => 레 +B809 1105 1166 11A8 # 렉 => 렉 +B80A 1105 1166 11A9 # 렊 => 렊 +B80B 1105 1166 11AA # 렋 => 렋 +B80C 1105 1166 11AB # 렌 => 렌 +B80D 1105 1166 11AC # 렍 => 렍 +B80E 1105 1166 11AD # 렎 => 렎 +B80F 1105 1166 11AE # 렏 => 렏 +B810 1105 1166 11AF # 렐 => 렐 +B811 1105 1166 11B0 # 렑 => 렑 +B812 1105 1166 11B1 # 렒 => 렒 +B813 1105 1166 11B2 # 렓 => 렓 +B814 1105 1166 11B3 # 렔 => 렔 +B815 1105 1166 11B4 # 렕 => 렕 +B816 1105 1166 11B5 # 렖 => 렖 +B817 1105 1166 11B6 # 렗 => 렗 +B818 1105 1166 11B7 # 렘 => 렘 +B819 1105 1166 11B8 # 렙 => 렙 +B81A 1105 1166 11B9 # 렚 => 렚 +B81B 1105 1166 11BA # 렛 => 렛 +B81C 1105 1166 11BB # 렜 => 렜 +B81D 1105 1166 11BC # 렝 => 렝 +B81E 1105 1166 11BD # 렞 => 렞 +B81F 1105 1166 11BE # 렟 => 렟 +B820 1105 1166 11BF # 렠 => 렠 +B821 1105 1166 11C0 # 렡 => 렡 +B822 1105 1166 11C1 # 렢 => 렢 +B823 1105 1166 11C2 # 렣 => 렣 +B824 1105 1167 # 려 => 려 +B825 1105 1167 11A8 # 력 => 력 +B826 1105 1167 11A9 # 렦 => 렦 +B827 1105 1167 11AA # 렧 => 렧 +B828 1105 1167 11AB # 련 => 련 +B829 1105 1167 11AC # 렩 => 렩 +B82A 1105 1167 11AD # 렪 => 렪 +B82B 1105 1167 11AE # 렫 => 렫 +B82C 1105 1167 11AF # 렬 => 렬 +B82D 1105 1167 11B0 # 렭 => 렭 +B82E 1105 1167 11B1 # 렮 => 렮 +B82F 1105 1167 11B2 # 렯 => 렯 +B830 1105 1167 11B3 # 렰 => 렰 +B831 1105 1167 11B4 # 렱 => 렱 +B832 1105 1167 11B5 # 렲 => 렲 +B833 1105 1167 11B6 # 렳 => 렳 +B834 1105 1167 11B7 # 렴 => 렴 +B835 1105 1167 11B8 # 렵 => 렵 +B836 1105 1167 11B9 # 렶 => 렶 +B837 1105 1167 11BA # 렷 => 렷 +B838 1105 1167 11BB # 렸 => 렸 +B839 1105 1167 11BC # 령 => 령 +B83A 1105 1167 11BD # 렺 => 렺 +B83B 1105 1167 11BE # 렻 => 렻 +B83C 1105 1167 11BF # 렼 => 렼 +B83D 1105 1167 11C0 # 렽 => 렽 +B83E 1105 1167 11C1 # 렾 => 렾 +B83F 1105 1167 11C2 # 렿 => 렿 +B840 1105 1168 # 례 => 례 +B841 1105 1168 11A8 # 롁 => 롁 +B842 1105 1168 11A9 # 롂 => 롂 +B843 1105 1168 11AA # 롃 => 롃 +B844 1105 1168 11AB # 롄 => 롄 +B845 1105 1168 11AC # 롅 => 롅 +B846 1105 1168 11AD # 롆 => 롆 +B847 1105 1168 11AE # 롇 => 롇 +B848 1105 1168 11AF # 롈 => 롈 +B849 1105 1168 11B0 # 롉 => 롉 +B84A 1105 1168 11B1 # 롊 => 롊 +B84B 1105 1168 11B2 # 롋 => 롋 +B84C 1105 1168 11B3 # 롌 => 롌 +B84D 1105 1168 11B4 # 롍 => 롍 +B84E 1105 1168 11B5 # 롎 => 롎 +B84F 1105 1168 11B6 # 롏 => 롏 +B850 1105 1168 11B7 # 롐 => 롐 +B851 1105 1168 11B8 # 롑 => 롑 +B852 1105 1168 11B9 # 롒 => 롒 +B853 1105 1168 11BA # 롓 => 롓 +B854 1105 1168 11BB # 롔 => 롔 +B855 1105 1168 11BC # 롕 => 롕 +B856 1105 1168 11BD # 롖 => 롖 +B857 1105 1168 11BE # 롗 => 롗 +B858 1105 1168 11BF # 롘 => 롘 +B859 1105 1168 11C0 # 롙 => 롙 +B85A 1105 1168 11C1 # 롚 => 롚 +B85B 1105 1168 11C2 # 롛 => 롛 +B85C 1105 1169 # 로 => 로 +B85D 1105 1169 11A8 # 록 => 록 +B85E 1105 1169 11A9 # 롞 => 롞 +B85F 1105 1169 11AA # 롟 => 롟 +B860 1105 1169 11AB # 론 => 론 +B861 1105 1169 11AC # 롡 => 롡 +B862 1105 1169 11AD # 롢 => 롢 +B863 1105 1169 11AE # 롣 => 롣 +B864 1105 1169 11AF # 롤 => 롤 +B865 1105 1169 11B0 # 롥 => 롥 +B866 1105 1169 11B1 # 롦 => 롦 +B867 1105 1169 11B2 # 롧 => 롧 +B868 1105 1169 11B3 # 롨 => 롨 +B869 1105 1169 11B4 # 롩 => 롩 +B86A 1105 1169 11B5 # 롪 => 롪 +B86B 1105 1169 11B6 # 롫 => 롫 +B86C 1105 1169 11B7 # 롬 => 롬 +B86D 1105 1169 11B8 # 롭 => 롭 +B86E 1105 1169 11B9 # 롮 => 롮 +B86F 1105 1169 11BA # 롯 => 롯 +B870 1105 1169 11BB # 롰 => 롰 +B871 1105 1169 11BC # 롱 => 롱 +B872 1105 1169 11BD # 롲 => 롲 +B873 1105 1169 11BE # 롳 => 롳 +B874 1105 1169 11BF # 롴 => 롴 +B875 1105 1169 11C0 # 롵 => 롵 +B876 1105 1169 11C1 # 롶 => 롶 +B877 1105 1169 11C2 # 롷 => 롷 +B878 1105 116A # 롸 => 롸 +B879 1105 116A 11A8 # 롹 => 롹 +B87A 1105 116A 11A9 # 롺 => 롺 +B87B 1105 116A 11AA # 롻 => 롻 +B87C 1105 116A 11AB # 롼 => 롼 +B87D 1105 116A 11AC # 롽 => 롽 +B87E 1105 116A 11AD # 롾 => 롾 +B87F 1105 116A 11AE # 롿 => 롿 +B880 1105 116A 11AF # 뢀 => 뢀 +B881 1105 116A 11B0 # 뢁 => 뢁 +B882 1105 116A 11B1 # 뢂 => 뢂 +B883 1105 116A 11B2 # 뢃 => 뢃 +B884 1105 116A 11B3 # 뢄 => 뢄 +B885 1105 116A 11B4 # 뢅 => 뢅 +B886 1105 116A 11B5 # 뢆 => 뢆 +B887 1105 116A 11B6 # 뢇 => 뢇 +B888 1105 116A 11B7 # 뢈 => 뢈 +B889 1105 116A 11B8 # 뢉 => 뢉 +B88A 1105 116A 11B9 # 뢊 => 뢊 +B88B 1105 116A 11BA # 뢋 => 뢋 +B88C 1105 116A 11BB # 뢌 => 뢌 +B88D 1105 116A 11BC # 뢍 => 뢍 +B88E 1105 116A 11BD # 뢎 => 뢎 +B88F 1105 116A 11BE # 뢏 => 뢏 +B890 1105 116A 11BF # 뢐 => 뢐 +B891 1105 116A 11C0 # 뢑 => 뢑 +B892 1105 116A 11C1 # 뢒 => 뢒 +B893 1105 116A 11C2 # 뢓 => 뢓 +B894 1105 116B # 뢔 => 뢔 +B895 1105 116B 11A8 # 뢕 => 뢕 +B896 1105 116B 11A9 # 뢖 => 뢖 +B897 1105 116B 11AA # 뢗 => 뢗 +B898 1105 116B 11AB # 뢘 => 뢘 +B899 1105 116B 11AC # 뢙 => 뢙 +B89A 1105 116B 11AD # 뢚 => 뢚 +B89B 1105 116B 11AE # 뢛 => 뢛 +B89C 1105 116B 11AF # 뢜 => 뢜 +B89D 1105 116B 11B0 # 뢝 => 뢝 +B89E 1105 116B 11B1 # 뢞 => 뢞 +B89F 1105 116B 11B2 # 뢟 => 뢟 +B8A0 1105 116B 11B3 # 뢠 => 뢠 +B8A1 1105 116B 11B4 # 뢡 => 뢡 +B8A2 1105 116B 11B5 # 뢢 => 뢢 +B8A3 1105 116B 11B6 # 뢣 => 뢣 +B8A4 1105 116B 11B7 # 뢤 => 뢤 +B8A5 1105 116B 11B8 # 뢥 => 뢥 +B8A6 1105 116B 11B9 # 뢦 => 뢦 +B8A7 1105 116B 11BA # 뢧 => 뢧 +B8A8 1105 116B 11BB # 뢨 => 뢨 +B8A9 1105 116B 11BC # 뢩 => 뢩 +B8AA 1105 116B 11BD # 뢪 => 뢪 +B8AB 1105 116B 11BE # 뢫 => 뢫 +B8AC 1105 116B 11BF # 뢬 => 뢬 +B8AD 1105 116B 11C0 # 뢭 => 뢭 +B8AE 1105 116B 11C1 # 뢮 => 뢮 +B8AF 1105 116B 11C2 # 뢯 => 뢯 +B8B0 1105 116C # 뢰 => 뢰 +B8B1 1105 116C 11A8 # 뢱 => 뢱 +B8B2 1105 116C 11A9 # 뢲 => 뢲 +B8B3 1105 116C 11AA # 뢳 => 뢳 +B8B4 1105 116C 11AB # 뢴 => 뢴 +B8B5 1105 116C 11AC # 뢵 => 뢵 +B8B6 1105 116C 11AD # 뢶 => 뢶 +B8B7 1105 116C 11AE # 뢷 => 뢷 +B8B8 1105 116C 11AF # 뢸 => 뢸 +B8B9 1105 116C 11B0 # 뢹 => 뢹 +B8BA 1105 116C 11B1 # 뢺 => 뢺 +B8BB 1105 116C 11B2 # 뢻 => 뢻 +B8BC 1105 116C 11B3 # 뢼 => 뢼 +B8BD 1105 116C 11B4 # 뢽 => 뢽 +B8BE 1105 116C 11B5 # 뢾 => 뢾 +B8BF 1105 116C 11B6 # 뢿 => 뢿 +B8C0 1105 116C 11B7 # 룀 => 룀 +B8C1 1105 116C 11B8 # 룁 => 룁 +B8C2 1105 116C 11B9 # 룂 => 룂 +B8C3 1105 116C 11BA # 룃 => 룃 +B8C4 1105 116C 11BB # 룄 => 룄 +B8C5 1105 116C 11BC # 룅 => 룅 +B8C6 1105 116C 11BD # 룆 => 룆 +B8C7 1105 116C 11BE # 룇 => 룇 +B8C8 1105 116C 11BF # 룈 => 룈 +B8C9 1105 116C 11C0 # 룉 => 룉 +B8CA 1105 116C 11C1 # 룊 => 룊 +B8CB 1105 116C 11C2 # 룋 => 룋 +B8CC 1105 116D # 료 => 료 +B8CD 1105 116D 11A8 # 룍 => 룍 +B8CE 1105 116D 11A9 # 룎 => 룎 +B8CF 1105 116D 11AA # 룏 => 룏 +B8D0 1105 116D 11AB # 룐 => 룐 +B8D1 1105 116D 11AC # 룑 => 룑 +B8D2 1105 116D 11AD # 룒 => 룒 +B8D3 1105 116D 11AE # 룓 => 룓 +B8D4 1105 116D 11AF # 룔 => 룔 +B8D5 1105 116D 11B0 # 룕 => 룕 +B8D6 1105 116D 11B1 # 룖 => 룖 +B8D7 1105 116D 11B2 # 룗 => 룗 +B8D8 1105 116D 11B3 # 룘 => 룘 +B8D9 1105 116D 11B4 # 룙 => 룙 +B8DA 1105 116D 11B5 # 룚 => 룚 +B8DB 1105 116D 11B6 # 룛 => 룛 +B8DC 1105 116D 11B7 # 룜 => 룜 +B8DD 1105 116D 11B8 # 룝 => 룝 +B8DE 1105 116D 11B9 # 룞 => 룞 +B8DF 1105 116D 11BA # 룟 => 룟 +B8E0 1105 116D 11BB # 룠 => 룠 +B8E1 1105 116D 11BC # 룡 => 룡 +B8E2 1105 116D 11BD # 룢 => 룢 +B8E3 1105 116D 11BE # 룣 => 룣 +B8E4 1105 116D 11BF # 룤 => 룤 +B8E5 1105 116D 11C0 # 룥 => 룥 +B8E6 1105 116D 11C1 # 룦 => 룦 +B8E7 1105 116D 11C2 # 룧 => 룧 +B8E8 1105 116E # 루 => 루 +B8E9 1105 116E 11A8 # 룩 => 룩 +B8EA 1105 116E 11A9 # 룪 => 룪 +B8EB 1105 116E 11AA # 룫 => 룫 +B8EC 1105 116E 11AB # 룬 => 룬 +B8ED 1105 116E 11AC # 룭 => 룭 +B8EE 1105 116E 11AD # 룮 => 룮 +B8EF 1105 116E 11AE # 룯 => 룯 +B8F0 1105 116E 11AF # 룰 => 룰 +B8F1 1105 116E 11B0 # 룱 => 룱 +B8F2 1105 116E 11B1 # 룲 => 룲 +B8F3 1105 116E 11B2 # 룳 => 룳 +B8F4 1105 116E 11B3 # 룴 => 룴 +B8F5 1105 116E 11B4 # 룵 => 룵 +B8F6 1105 116E 11B5 # 룶 => 룶 +B8F7 1105 116E 11B6 # 룷 => 룷 +B8F8 1105 116E 11B7 # 룸 => 룸 +B8F9 1105 116E 11B8 # 룹 => 룹 +B8FA 1105 116E 11B9 # 룺 => 룺 +B8FB 1105 116E 11BA # 룻 => 룻 +B8FC 1105 116E 11BB # 룼 => 룼 +B8FD 1105 116E 11BC # 룽 => 룽 +B8FE 1105 116E 11BD # 룾 => 룾 +B8FF 1105 116E 11BE # 룿 => 룿 +B900 1105 116E 11BF # 뤀 => 뤀 +B901 1105 116E 11C0 # 뤁 => 뤁 +B902 1105 116E 11C1 # 뤂 => 뤂 +B903 1105 116E 11C2 # 뤃 => 뤃 +B904 1105 116F # 뤄 => 뤄 +B905 1105 116F 11A8 # 뤅 => 뤅 +B906 1105 116F 11A9 # 뤆 => 뤆 +B907 1105 116F 11AA # 뤇 => 뤇 +B908 1105 116F 11AB # 뤈 => 뤈 +B909 1105 116F 11AC # 뤉 => 뤉 +B90A 1105 116F 11AD # 뤊 => 뤊 +B90B 1105 116F 11AE # 뤋 => 뤋 +B90C 1105 116F 11AF # 뤌 => 뤌 +B90D 1105 116F 11B0 # 뤍 => 뤍 +B90E 1105 116F 11B1 # 뤎 => 뤎 +B90F 1105 116F 11B2 # 뤏 => 뤏 +B910 1105 116F 11B3 # 뤐 => 뤐 +B911 1105 116F 11B4 # 뤑 => 뤑 +B912 1105 116F 11B5 # 뤒 => 뤒 +B913 1105 116F 11B6 # 뤓 => 뤓 +B914 1105 116F 11B7 # 뤔 => 뤔 +B915 1105 116F 11B8 # 뤕 => 뤕 +B916 1105 116F 11B9 # 뤖 => 뤖 +B917 1105 116F 11BA # 뤗 => 뤗 +B918 1105 116F 11BB # 뤘 => 뤘 +B919 1105 116F 11BC # 뤙 => 뤙 +B91A 1105 116F 11BD # 뤚 => 뤚 +B91B 1105 116F 11BE # 뤛 => 뤛 +B91C 1105 116F 11BF # 뤜 => 뤜 +B91D 1105 116F 11C0 # 뤝 => 뤝 +B91E 1105 116F 11C1 # 뤞 => 뤞 +B91F 1105 116F 11C2 # 뤟 => 뤟 +B920 1105 1170 # 뤠 => 뤠 +B921 1105 1170 11A8 # 뤡 => 뤡 +B922 1105 1170 11A9 # 뤢 => 뤢 +B923 1105 1170 11AA # 뤣 => 뤣 +B924 1105 1170 11AB # 뤤 => 뤤 +B925 1105 1170 11AC # 뤥 => 뤥 +B926 1105 1170 11AD # 뤦 => 뤦 +B927 1105 1170 11AE # 뤧 => 뤧 +B928 1105 1170 11AF # 뤨 => 뤨 +B929 1105 1170 11B0 # 뤩 => 뤩 +B92A 1105 1170 11B1 # 뤪 => 뤪 +B92B 1105 1170 11B2 # 뤫 => 뤫 +B92C 1105 1170 11B3 # 뤬 => 뤬 +B92D 1105 1170 11B4 # 뤭 => 뤭 +B92E 1105 1170 11B5 # 뤮 => 뤮 +B92F 1105 1170 11B6 # 뤯 => 뤯 +B930 1105 1170 11B7 # 뤰 => 뤰 +B931 1105 1170 11B8 # 뤱 => 뤱 +B932 1105 1170 11B9 # 뤲 => 뤲 +B933 1105 1170 11BA # 뤳 => 뤳 +B934 1105 1170 11BB # 뤴 => 뤴 +B935 1105 1170 11BC # 뤵 => 뤵 +B936 1105 1170 11BD # 뤶 => 뤶 +B937 1105 1170 11BE # 뤷 => 뤷 +B938 1105 1170 11BF # 뤸 => 뤸 +B939 1105 1170 11C0 # 뤹 => 뤹 +B93A 1105 1170 11C1 # 뤺 => 뤺 +B93B 1105 1170 11C2 # 뤻 => 뤻 +B93C 1105 1171 # 뤼 => 뤼 +B93D 1105 1171 11A8 # 뤽 => 뤽 +B93E 1105 1171 11A9 # 뤾 => 뤾 +B93F 1105 1171 11AA # 뤿 => 뤿 +B940 1105 1171 11AB # 륀 => 륀 +B941 1105 1171 11AC # 륁 => 륁 +B942 1105 1171 11AD # 륂 => 륂 +B943 1105 1171 11AE # 륃 => 륃 +B944 1105 1171 11AF # 륄 => 륄 +B945 1105 1171 11B0 # 륅 => 륅 +B946 1105 1171 11B1 # 륆 => 륆 +B947 1105 1171 11B2 # 륇 => 륇 +B948 1105 1171 11B3 # 륈 => 륈 +B949 1105 1171 11B4 # 륉 => 륉 +B94A 1105 1171 11B5 # 륊 => 륊 +B94B 1105 1171 11B6 # 륋 => 륋 +B94C 1105 1171 11B7 # 륌 => 륌 +B94D 1105 1171 11B8 # 륍 => 륍 +B94E 1105 1171 11B9 # 륎 => 륎 +B94F 1105 1171 11BA # 륏 => 륏 +B950 1105 1171 11BB # 륐 => 륐 +B951 1105 1171 11BC # 륑 => 륑 +B952 1105 1171 11BD # 륒 => 륒 +B953 1105 1171 11BE # 륓 => 륓 +B954 1105 1171 11BF # 륔 => 륔 +B955 1105 1171 11C0 # 륕 => 륕 +B956 1105 1171 11C1 # 륖 => 륖 +B957 1105 1171 11C2 # 륗 => 륗 +B958 1105 1172 # 류 => 류 +B959 1105 1172 11A8 # 륙 => 륙 +B95A 1105 1172 11A9 # 륚 => 륚 +B95B 1105 1172 11AA # 륛 => 륛 +B95C 1105 1172 11AB # 륜 => 륜 +B95D 1105 1172 11AC # 륝 => 륝 +B95E 1105 1172 11AD # 륞 => 륞 +B95F 1105 1172 11AE # 륟 => 륟 +B960 1105 1172 11AF # 률 => 률 +B961 1105 1172 11B0 # 륡 => 륡 +B962 1105 1172 11B1 # 륢 => 륢 +B963 1105 1172 11B2 # 륣 => 륣 +B964 1105 1172 11B3 # 륤 => 륤 +B965 1105 1172 11B4 # 륥 => 륥 +B966 1105 1172 11B5 # 륦 => 륦 +B967 1105 1172 11B6 # 륧 => 륧 +B968 1105 1172 11B7 # 륨 => 륨 +B969 1105 1172 11B8 # 륩 => 륩 +B96A 1105 1172 11B9 # 륪 => 륪 +B96B 1105 1172 11BA # 륫 => 륫 +B96C 1105 1172 11BB # 륬 => 륬 +B96D 1105 1172 11BC # 륭 => 륭 +B96E 1105 1172 11BD # 륮 => 륮 +B96F 1105 1172 11BE # 륯 => 륯 +B970 1105 1172 11BF # 륰 => 륰 +B971 1105 1172 11C0 # 륱 => 륱 +B972 1105 1172 11C1 # 륲 => 륲 +B973 1105 1172 11C2 # 륳 => 륳 +B974 1105 1173 # 르 => 르 +B975 1105 1173 11A8 # 륵 => 륵 +B976 1105 1173 11A9 # 륶 => 륶 +B977 1105 1173 11AA # 륷 => 륷 +B978 1105 1173 11AB # 른 => 른 +B979 1105 1173 11AC # 륹 => 륹 +B97A 1105 1173 11AD # 륺 => 륺 +B97B 1105 1173 11AE # 륻 => 륻 +B97C 1105 1173 11AF # 를 => 를 +B97D 1105 1173 11B0 # 륽 => 륽 +B97E 1105 1173 11B1 # 륾 => 륾 +B97F 1105 1173 11B2 # 륿 => 륿 +B980 1105 1173 11B3 # 릀 => 릀 +B981 1105 1173 11B4 # 릁 => 릁 +B982 1105 1173 11B5 # 릂 => 릂 +B983 1105 1173 11B6 # 릃 => 릃 +B984 1105 1173 11B7 # 름 => 름 +B985 1105 1173 11B8 # 릅 => 릅 +B986 1105 1173 11B9 # 릆 => 릆 +B987 1105 1173 11BA # 릇 => 릇 +B988 1105 1173 11BB # 릈 => 릈 +B989 1105 1173 11BC # 릉 => 릉 +B98A 1105 1173 11BD # 릊 => 릊 +B98B 1105 1173 11BE # 릋 => 릋 +B98C 1105 1173 11BF # 릌 => 릌 +B98D 1105 1173 11C0 # 릍 => 릍 +B98E 1105 1173 11C1 # 릎 => 릎 +B98F 1105 1173 11C2 # 릏 => 릏 +B990 1105 1174 # 릐 => 릐 +B991 1105 1174 11A8 # 릑 => 릑 +B992 1105 1174 11A9 # 릒 => 릒 +B993 1105 1174 11AA # 릓 => 릓 +B994 1105 1174 11AB # 릔 => 릔 +B995 1105 1174 11AC # 릕 => 릕 +B996 1105 1174 11AD # 릖 => 릖 +B997 1105 1174 11AE # 릗 => 릗 +B998 1105 1174 11AF # 릘 => 릘 +B999 1105 1174 11B0 # 릙 => 릙 +B99A 1105 1174 11B1 # 릚 => 릚 +B99B 1105 1174 11B2 # 릛 => 릛 +B99C 1105 1174 11B3 # 릜 => 릜 +B99D 1105 1174 11B4 # 릝 => 릝 +B99E 1105 1174 11B5 # 릞 => 릞 +B99F 1105 1174 11B6 # 릟 => 릟 +B9A0 1105 1174 11B7 # 릠 => 릠 +B9A1 1105 1174 11B8 # 릡 => 릡 +B9A2 1105 1174 11B9 # 릢 => 릢 +B9A3 1105 1174 11BA # 릣 => 릣 +B9A4 1105 1174 11BB # 릤 => 릤 +B9A5 1105 1174 11BC # 릥 => 릥 +B9A6 1105 1174 11BD # 릦 => 릦 +B9A7 1105 1174 11BE # 릧 => 릧 +B9A8 1105 1174 11BF # 릨 => 릨 +B9A9 1105 1174 11C0 # 릩 => 릩 +B9AA 1105 1174 11C1 # 릪 => 릪 +B9AB 1105 1174 11C2 # 릫 => 릫 +B9AC 1105 1175 # 리 => 리 +B9AD 1105 1175 11A8 # 릭 => 릭 +B9AE 1105 1175 11A9 # 릮 => 릮 +B9AF 1105 1175 11AA # 릯 => 릯 +B9B0 1105 1175 11AB # 린 => 린 +B9B1 1105 1175 11AC # 릱 => 릱 +B9B2 1105 1175 11AD # 릲 => 릲 +B9B3 1105 1175 11AE # 릳 => 릳 +B9B4 1105 1175 11AF # 릴 => 릴 +B9B5 1105 1175 11B0 # 릵 => 릵 +B9B6 1105 1175 11B1 # 릶 => 릶 +B9B7 1105 1175 11B2 # 릷 => 릷 +B9B8 1105 1175 11B3 # 릸 => 릸 +B9B9 1105 1175 11B4 # 릹 => 릹 +B9BA 1105 1175 11B5 # 릺 => 릺 +B9BB 1105 1175 11B6 # 릻 => 릻 +B9BC 1105 1175 11B7 # 림 => 림 +B9BD 1105 1175 11B8 # 립 => 립 +B9BE 1105 1175 11B9 # 릾 => 릾 +B9BF 1105 1175 11BA # 릿 => 릿 +B9C0 1105 1175 11BB # 맀 => 맀 +B9C1 1105 1175 11BC # 링 => 링 +B9C2 1105 1175 11BD # 맂 => 맂 +B9C3 1105 1175 11BE # 맃 => 맃 +B9C4 1105 1175 11BF # 맄 => 맄 +B9C5 1105 1175 11C0 # 맅 => 맅 +B9C6 1105 1175 11C1 # 맆 => 맆 +B9C7 1105 1175 11C2 # 맇 => 맇 +B9C8 1106 1161 # 마 => 마 +B9C9 1106 1161 11A8 # 막 => 막 +B9CA 1106 1161 11A9 # 맊 => 맊 +B9CB 1106 1161 11AA # 맋 => 맋 +B9CC 1106 1161 11AB # 만 => 만 +B9CD 1106 1161 11AC # 맍 => 맍 +B9CE 1106 1161 11AD # 많 => 많 +B9CF 1106 1161 11AE # 맏 => 맏 +B9D0 1106 1161 11AF # 말 => 말 +B9D1 1106 1161 11B0 # 맑 => 맑 +B9D2 1106 1161 11B1 # 맒 => 맒 +B9D3 1106 1161 11B2 # 맓 => 맓 +B9D4 1106 1161 11B3 # 맔 => 맔 +B9D5 1106 1161 11B4 # 맕 => 맕 +B9D6 1106 1161 11B5 # 맖 => 맖 +B9D7 1106 1161 11B6 # 맗 => 맗 +B9D8 1106 1161 11B7 # 맘 => 맘 +B9D9 1106 1161 11B8 # 맙 => 맙 +B9DA 1106 1161 11B9 # 맚 => 맚 +B9DB 1106 1161 11BA # 맛 => 맛 +B9DC 1106 1161 11BB # 맜 => 맜 +B9DD 1106 1161 11BC # 망 => 망 +B9DE 1106 1161 11BD # 맞 => 맞 +B9DF 1106 1161 11BE # 맟 => 맟 +B9E0 1106 1161 11BF # 맠 => 맠 +B9E1 1106 1161 11C0 # 맡 => 맡 +B9E2 1106 1161 11C1 # 맢 => 맢 +B9E3 1106 1161 11C2 # 맣 => 맣 +B9E4 1106 1162 # 매 => 매 +B9E5 1106 1162 11A8 # 맥 => 맥 +B9E6 1106 1162 11A9 # 맦 => 맦 +B9E7 1106 1162 11AA # 맧 => 맧 +B9E8 1106 1162 11AB # 맨 => 맨 +B9E9 1106 1162 11AC # 맩 => 맩 +B9EA 1106 1162 11AD # 맪 => 맪 +B9EB 1106 1162 11AE # 맫 => 맫 +B9EC 1106 1162 11AF # 맬 => 맬 +B9ED 1106 1162 11B0 # 맭 => 맭 +B9EE 1106 1162 11B1 # 맮 => 맮 +B9EF 1106 1162 11B2 # 맯 => 맯 +B9F0 1106 1162 11B3 # 맰 => 맰 +B9F1 1106 1162 11B4 # 맱 => 맱 +B9F2 1106 1162 11B5 # 맲 => 맲 +B9F3 1106 1162 11B6 # 맳 => 맳 +B9F4 1106 1162 11B7 # 맴 => 맴 +B9F5 1106 1162 11B8 # 맵 => 맵 +B9F6 1106 1162 11B9 # 맶 => 맶 +B9F7 1106 1162 11BA # 맷 => 맷 +B9F8 1106 1162 11BB # 맸 => 맸 +B9F9 1106 1162 11BC # 맹 => 맹 +B9FA 1106 1162 11BD # 맺 => 맺 +B9FB 1106 1162 11BE # 맻 => 맻 +B9FC 1106 1162 11BF # 맼 => 맼 +B9FD 1106 1162 11C0 # 맽 => 맽 +B9FE 1106 1162 11C1 # 맾 => 맾 +B9FF 1106 1162 11C2 # 맿 => 맿 +BA00 1106 1163 # 먀 => 먀 +BA01 1106 1163 11A8 # 먁 => 먁 +BA02 1106 1163 11A9 # 먂 => 먂 +BA03 1106 1163 11AA # 먃 => 먃 +BA04 1106 1163 11AB # 먄 => 먄 +BA05 1106 1163 11AC # 먅 => 먅 +BA06 1106 1163 11AD # 먆 => 먆 +BA07 1106 1163 11AE # 먇 => 먇 +BA08 1106 1163 11AF # 먈 => 먈 +BA09 1106 1163 11B0 # 먉 => 먉 +BA0A 1106 1163 11B1 # 먊 => 먊 +BA0B 1106 1163 11B2 # 먋 => 먋 +BA0C 1106 1163 11B3 # 먌 => 먌 +BA0D 1106 1163 11B4 # 먍 => 먍 +BA0E 1106 1163 11B5 # 먎 => 먎 +BA0F 1106 1163 11B6 # 먏 => 먏 +BA10 1106 1163 11B7 # 먐 => 먐 +BA11 1106 1163 11B8 # 먑 => 먑 +BA12 1106 1163 11B9 # 먒 => 먒 +BA13 1106 1163 11BA # 먓 => 먓 +BA14 1106 1163 11BB # 먔 => 먔 +BA15 1106 1163 11BC # 먕 => 먕 +BA16 1106 1163 11BD # 먖 => 먖 +BA17 1106 1163 11BE # 먗 => 먗 +BA18 1106 1163 11BF # 먘 => 먘 +BA19 1106 1163 11C0 # 먙 => 먙 +BA1A 1106 1163 11C1 # 먚 => 먚 +BA1B 1106 1163 11C2 # 먛 => 먛 +BA1C 1106 1164 # 먜 => 먜 +BA1D 1106 1164 11A8 # 먝 => 먝 +BA1E 1106 1164 11A9 # 먞 => 먞 +BA1F 1106 1164 11AA # 먟 => 먟 +BA20 1106 1164 11AB # 먠 => 먠 +BA21 1106 1164 11AC # 먡 => 먡 +BA22 1106 1164 11AD # 먢 => 먢 +BA23 1106 1164 11AE # 먣 => 먣 +BA24 1106 1164 11AF # 먤 => 먤 +BA25 1106 1164 11B0 # 먥 => 먥 +BA26 1106 1164 11B1 # 먦 => 먦 +BA27 1106 1164 11B2 # 먧 => 먧 +BA28 1106 1164 11B3 # 먨 => 먨 +BA29 1106 1164 11B4 # 먩 => 먩 +BA2A 1106 1164 11B5 # 먪 => 먪 +BA2B 1106 1164 11B6 # 먫 => 먫 +BA2C 1106 1164 11B7 # 먬 => 먬 +BA2D 1106 1164 11B8 # 먭 => 먭 +BA2E 1106 1164 11B9 # 먮 => 먮 +BA2F 1106 1164 11BA # 먯 => 먯 +BA30 1106 1164 11BB # 먰 => 먰 +BA31 1106 1164 11BC # 먱 => 먱 +BA32 1106 1164 11BD # 먲 => 먲 +BA33 1106 1164 11BE # 먳 => 먳 +BA34 1106 1164 11BF # 먴 => 먴 +BA35 1106 1164 11C0 # 먵 => 먵 +BA36 1106 1164 11C1 # 먶 => 먶 +BA37 1106 1164 11C2 # 먷 => 먷 +BA38 1106 1165 # 머 => 머 +BA39 1106 1165 11A8 # 먹 => 먹 +BA3A 1106 1165 11A9 # 먺 => 먺 +BA3B 1106 1165 11AA # 먻 => 먻 +BA3C 1106 1165 11AB # 먼 => 먼 +BA3D 1106 1165 11AC # 먽 => 먽 +BA3E 1106 1165 11AD # 먾 => 먾 +BA3F 1106 1165 11AE # 먿 => 먿 +BA40 1106 1165 11AF # 멀 => 멀 +BA41 1106 1165 11B0 # 멁 => 멁 +BA42 1106 1165 11B1 # 멂 => 멂 +BA43 1106 1165 11B2 # 멃 => 멃 +BA44 1106 1165 11B3 # 멄 => 멄 +BA45 1106 1165 11B4 # 멅 => 멅 +BA46 1106 1165 11B5 # 멆 => 멆 +BA47 1106 1165 11B6 # 멇 => 멇 +BA48 1106 1165 11B7 # 멈 => 멈 +BA49 1106 1165 11B8 # 멉 => 멉 +BA4A 1106 1165 11B9 # 멊 => 멊 +BA4B 1106 1165 11BA # 멋 => 멋 +BA4C 1106 1165 11BB # 멌 => 멌 +BA4D 1106 1165 11BC # 멍 => 멍 +BA4E 1106 1165 11BD # 멎 => 멎 +BA4F 1106 1165 11BE # 멏 => 멏 +BA50 1106 1165 11BF # 멐 => 멐 +BA51 1106 1165 11C0 # 멑 => 멑 +BA52 1106 1165 11C1 # 멒 => 멒 +BA53 1106 1165 11C2 # 멓 => 멓 +BA54 1106 1166 # 메 => 메 +BA55 1106 1166 11A8 # 멕 => 멕 +BA56 1106 1166 11A9 # 멖 => 멖 +BA57 1106 1166 11AA # 멗 => 멗 +BA58 1106 1166 11AB # 멘 => 멘 +BA59 1106 1166 11AC # 멙 => 멙 +BA5A 1106 1166 11AD # 멚 => 멚 +BA5B 1106 1166 11AE # 멛 => 멛 +BA5C 1106 1166 11AF # 멜 => 멜 +BA5D 1106 1166 11B0 # 멝 => 멝 +BA5E 1106 1166 11B1 # 멞 => 멞 +BA5F 1106 1166 11B2 # 멟 => 멟 +BA60 1106 1166 11B3 # 멠 => 멠 +BA61 1106 1166 11B4 # 멡 => 멡 +BA62 1106 1166 11B5 # 멢 => 멢 +BA63 1106 1166 11B6 # 멣 => 멣 +BA64 1106 1166 11B7 # 멤 => 멤 +BA65 1106 1166 11B8 # 멥 => 멥 +BA66 1106 1166 11B9 # 멦 => 멦 +BA67 1106 1166 11BA # 멧 => 멧 +BA68 1106 1166 11BB # 멨 => 멨 +BA69 1106 1166 11BC # 멩 => 멩 +BA6A 1106 1166 11BD # 멪 => 멪 +BA6B 1106 1166 11BE # 멫 => 멫 +BA6C 1106 1166 11BF # 멬 => 멬 +BA6D 1106 1166 11C0 # 멭 => 멭 +BA6E 1106 1166 11C1 # 멮 => 멮 +BA6F 1106 1166 11C2 # 멯 => 멯 +BA70 1106 1167 # 며 => 며 +BA71 1106 1167 11A8 # 멱 => 멱 +BA72 1106 1167 11A9 # 멲 => 멲 +BA73 1106 1167 11AA # 멳 => 멳 +BA74 1106 1167 11AB # 면 => 면 +BA75 1106 1167 11AC # 멵 => 멵 +BA76 1106 1167 11AD # 멶 => 멶 +BA77 1106 1167 11AE # 멷 => 멷 +BA78 1106 1167 11AF # 멸 => 멸 +BA79 1106 1167 11B0 # 멹 => 멹 +BA7A 1106 1167 11B1 # 멺 => 멺 +BA7B 1106 1167 11B2 # 멻 => 멻 +BA7C 1106 1167 11B3 # 멼 => 멼 +BA7D 1106 1167 11B4 # 멽 => 멽 +BA7E 1106 1167 11B5 # 멾 => 멾 +BA7F 1106 1167 11B6 # 멿 => 멿 +BA80 1106 1167 11B7 # 몀 => 몀 +BA81 1106 1167 11B8 # 몁 => 몁 +BA82 1106 1167 11B9 # 몂 => 몂 +BA83 1106 1167 11BA # 몃 => 몃 +BA84 1106 1167 11BB # 몄 => 몄 +BA85 1106 1167 11BC # 명 => 명 +BA86 1106 1167 11BD # 몆 => 몆 +BA87 1106 1167 11BE # 몇 => 몇 +BA88 1106 1167 11BF # 몈 => 몈 +BA89 1106 1167 11C0 # 몉 => 몉 +BA8A 1106 1167 11C1 # 몊 => 몊 +BA8B 1106 1167 11C2 # 몋 => 몋 +BA8C 1106 1168 # 몌 => 몌 +BA8D 1106 1168 11A8 # 몍 => 몍 +BA8E 1106 1168 11A9 # 몎 => 몎 +BA8F 1106 1168 11AA # 몏 => 몏 +BA90 1106 1168 11AB # 몐 => 몐 +BA91 1106 1168 11AC # 몑 => 몑 +BA92 1106 1168 11AD # 몒 => 몒 +BA93 1106 1168 11AE # 몓 => 몓 +BA94 1106 1168 11AF # 몔 => 몔 +BA95 1106 1168 11B0 # 몕 => 몕 +BA96 1106 1168 11B1 # 몖 => 몖 +BA97 1106 1168 11B2 # 몗 => 몗 +BA98 1106 1168 11B3 # 몘 => 몘 +BA99 1106 1168 11B4 # 몙 => 몙 +BA9A 1106 1168 11B5 # 몚 => 몚 +BA9B 1106 1168 11B6 # 몛 => 몛 +BA9C 1106 1168 11B7 # 몜 => 몜 +BA9D 1106 1168 11B8 # 몝 => 몝 +BA9E 1106 1168 11B9 # 몞 => 몞 +BA9F 1106 1168 11BA # 몟 => 몟 +BAA0 1106 1168 11BB # 몠 => 몠 +BAA1 1106 1168 11BC # 몡 => 몡 +BAA2 1106 1168 11BD # 몢 => 몢 +BAA3 1106 1168 11BE # 몣 => 몣 +BAA4 1106 1168 11BF # 몤 => 몤 +BAA5 1106 1168 11C0 # 몥 => 몥 +BAA6 1106 1168 11C1 # 몦 => 몦 +BAA7 1106 1168 11C2 # 몧 => 몧 +BAA8 1106 1169 # 모 => 모 +BAA9 1106 1169 11A8 # 목 => 목 +BAAA 1106 1169 11A9 # 몪 => 몪 +BAAB 1106 1169 11AA # 몫 => 몫 +BAAC 1106 1169 11AB # 몬 => 몬 +BAAD 1106 1169 11AC # 몭 => 몭 +BAAE 1106 1169 11AD # 몮 => 몮 +BAAF 1106 1169 11AE # 몯 => 몯 +BAB0 1106 1169 11AF # 몰 => 몰 +BAB1 1106 1169 11B0 # 몱 => 몱 +BAB2 1106 1169 11B1 # 몲 => 몲 +BAB3 1106 1169 11B2 # 몳 => 몳 +BAB4 1106 1169 11B3 # 몴 => 몴 +BAB5 1106 1169 11B4 # 몵 => 몵 +BAB6 1106 1169 11B5 # 몶 => 몶 +BAB7 1106 1169 11B6 # 몷 => 몷 +BAB8 1106 1169 11B7 # 몸 => 몸 +BAB9 1106 1169 11B8 # 몹 => 몹 +BABA 1106 1169 11B9 # 몺 => 몺 +BABB 1106 1169 11BA # 못 => 못 +BABC 1106 1169 11BB # 몼 => 몼 +BABD 1106 1169 11BC # 몽 => 몽 +BABE 1106 1169 11BD # 몾 => 몾 +BABF 1106 1169 11BE # 몿 => 몿 +BAC0 1106 1169 11BF # 뫀 => 뫀 +BAC1 1106 1169 11C0 # 뫁 => 뫁 +BAC2 1106 1169 11C1 # 뫂 => 뫂 +BAC3 1106 1169 11C2 # 뫃 => 뫃 +BAC4 1106 116A # 뫄 => 뫄 +BAC5 1106 116A 11A8 # 뫅 => 뫅 +BAC6 1106 116A 11A9 # 뫆 => 뫆 +BAC7 1106 116A 11AA # 뫇 => 뫇 +BAC8 1106 116A 11AB # 뫈 => 뫈 +BAC9 1106 116A 11AC # 뫉 => 뫉 +BACA 1106 116A 11AD # 뫊 => 뫊 +BACB 1106 116A 11AE # 뫋 => 뫋 +BACC 1106 116A 11AF # 뫌 => 뫌 +BACD 1106 116A 11B0 # 뫍 => 뫍 +BACE 1106 116A 11B1 # 뫎 => 뫎 +BACF 1106 116A 11B2 # 뫏 => 뫏 +BAD0 1106 116A 11B3 # 뫐 => 뫐 +BAD1 1106 116A 11B4 # 뫑 => 뫑 +BAD2 1106 116A 11B5 # 뫒 => 뫒 +BAD3 1106 116A 11B6 # 뫓 => 뫓 +BAD4 1106 116A 11B7 # 뫔 => 뫔 +BAD5 1106 116A 11B8 # 뫕 => 뫕 +BAD6 1106 116A 11B9 # 뫖 => 뫖 +BAD7 1106 116A 11BA # 뫗 => 뫗 +BAD8 1106 116A 11BB # 뫘 => 뫘 +BAD9 1106 116A 11BC # 뫙 => 뫙 +BADA 1106 116A 11BD # 뫚 => 뫚 +BADB 1106 116A 11BE # 뫛 => 뫛 +BADC 1106 116A 11BF # 뫜 => 뫜 +BADD 1106 116A 11C0 # 뫝 => 뫝 +BADE 1106 116A 11C1 # 뫞 => 뫞 +BADF 1106 116A 11C2 # 뫟 => 뫟 +BAE0 1106 116B # 뫠 => 뫠 +BAE1 1106 116B 11A8 # 뫡 => 뫡 +BAE2 1106 116B 11A9 # 뫢 => 뫢 +BAE3 1106 116B 11AA # 뫣 => 뫣 +BAE4 1106 116B 11AB # 뫤 => 뫤 +BAE5 1106 116B 11AC # 뫥 => 뫥 +BAE6 1106 116B 11AD # 뫦 => 뫦 +BAE7 1106 116B 11AE # 뫧 => 뫧 +BAE8 1106 116B 11AF # 뫨 => 뫨 +BAE9 1106 116B 11B0 # 뫩 => 뫩 +BAEA 1106 116B 11B1 # 뫪 => 뫪 +BAEB 1106 116B 11B2 # 뫫 => 뫫 +BAEC 1106 116B 11B3 # 뫬 => 뫬 +BAED 1106 116B 11B4 # 뫭 => 뫭 +BAEE 1106 116B 11B5 # 뫮 => 뫮 +BAEF 1106 116B 11B6 # 뫯 => 뫯 +BAF0 1106 116B 11B7 # 뫰 => 뫰 +BAF1 1106 116B 11B8 # 뫱 => 뫱 +BAF2 1106 116B 11B9 # 뫲 => 뫲 +BAF3 1106 116B 11BA # 뫳 => 뫳 +BAF4 1106 116B 11BB # 뫴 => 뫴 +BAF5 1106 116B 11BC # 뫵 => 뫵 +BAF6 1106 116B 11BD # 뫶 => 뫶 +BAF7 1106 116B 11BE # 뫷 => 뫷 +BAF8 1106 116B 11BF # 뫸 => 뫸 +BAF9 1106 116B 11C0 # 뫹 => 뫹 +BAFA 1106 116B 11C1 # 뫺 => 뫺 +BAFB 1106 116B 11C2 # 뫻 => 뫻 +BAFC 1106 116C # 뫼 => 뫼 +BAFD 1106 116C 11A8 # 뫽 => 뫽 +BAFE 1106 116C 11A9 # 뫾 => 뫾 +BAFF 1106 116C 11AA # 뫿 => 뫿 +BB00 1106 116C 11AB # 묀 => 묀 +BB01 1106 116C 11AC # 묁 => 묁 +BB02 1106 116C 11AD # 묂 => 묂 +BB03 1106 116C 11AE # 묃 => 묃 +BB04 1106 116C 11AF # 묄 => 묄 +BB05 1106 116C 11B0 # 묅 => 묅 +BB06 1106 116C 11B1 # 묆 => 묆 +BB07 1106 116C 11B2 # 묇 => 묇 +BB08 1106 116C 11B3 # 묈 => 묈 +BB09 1106 116C 11B4 # 묉 => 묉 +BB0A 1106 116C 11B5 # 묊 => 묊 +BB0B 1106 116C 11B6 # 묋 => 묋 +BB0C 1106 116C 11B7 # 묌 => 묌 +BB0D 1106 116C 11B8 # 묍 => 묍 +BB0E 1106 116C 11B9 # 묎 => 묎 +BB0F 1106 116C 11BA # 묏 => 묏 +BB10 1106 116C 11BB # 묐 => 묐 +BB11 1106 116C 11BC # 묑 => 묑 +BB12 1106 116C 11BD # 묒 => 묒 +BB13 1106 116C 11BE # 묓 => 묓 +BB14 1106 116C 11BF # 묔 => 묔 +BB15 1106 116C 11C0 # 묕 => 묕 +BB16 1106 116C 11C1 # 묖 => 묖 +BB17 1106 116C 11C2 # 묗 => 묗 +BB18 1106 116D # 묘 => 묘 +BB19 1106 116D 11A8 # 묙 => 묙 +BB1A 1106 116D 11A9 # 묚 => 묚 +BB1B 1106 116D 11AA # 묛 => 묛 +BB1C 1106 116D 11AB # 묜 => 묜 +BB1D 1106 116D 11AC # 묝 => 묝 +BB1E 1106 116D 11AD # 묞 => 묞 +BB1F 1106 116D 11AE # 묟 => 묟 +BB20 1106 116D 11AF # 묠 => 묠 +BB21 1106 116D 11B0 # 묡 => 묡 +BB22 1106 116D 11B1 # 묢 => 묢 +BB23 1106 116D 11B2 # 묣 => 묣 +BB24 1106 116D 11B3 # 묤 => 묤 +BB25 1106 116D 11B4 # 묥 => 묥 +BB26 1106 116D 11B5 # 묦 => 묦 +BB27 1106 116D 11B6 # 묧 => 묧 +BB28 1106 116D 11B7 # 묨 => 묨 +BB29 1106 116D 11B8 # 묩 => 묩 +BB2A 1106 116D 11B9 # 묪 => 묪 +BB2B 1106 116D 11BA # 묫 => 묫 +BB2C 1106 116D 11BB # 묬 => 묬 +BB2D 1106 116D 11BC # 묭 => 묭 +BB2E 1106 116D 11BD # 묮 => 묮 +BB2F 1106 116D 11BE # 묯 => 묯 +BB30 1106 116D 11BF # 묰 => 묰 +BB31 1106 116D 11C0 # 묱 => 묱 +BB32 1106 116D 11C1 # 묲 => 묲 +BB33 1106 116D 11C2 # 묳 => 묳 +BB34 1106 116E # 무 => 무 +BB35 1106 116E 11A8 # 묵 => 묵 +BB36 1106 116E 11A9 # 묶 => 묶 +BB37 1106 116E 11AA # 묷 => 묷 +BB38 1106 116E 11AB # 문 => 문 +BB39 1106 116E 11AC # 묹 => 묹 +BB3A 1106 116E 11AD # 묺 => 묺 +BB3B 1106 116E 11AE # 묻 => 묻 +BB3C 1106 116E 11AF # 물 => 물 +BB3D 1106 116E 11B0 # 묽 => 묽 +BB3E 1106 116E 11B1 # 묾 => 묾 +BB3F 1106 116E 11B2 # 묿 => 묿 +BB40 1106 116E 11B3 # 뭀 => 뭀 +BB41 1106 116E 11B4 # 뭁 => 뭁 +BB42 1106 116E 11B5 # 뭂 => 뭂 +BB43 1106 116E 11B6 # 뭃 => 뭃 +BB44 1106 116E 11B7 # 뭄 => 뭄 +BB45 1106 116E 11B8 # 뭅 => 뭅 +BB46 1106 116E 11B9 # 뭆 => 뭆 +BB47 1106 116E 11BA # 뭇 => 뭇 +BB48 1106 116E 11BB # 뭈 => 뭈 +BB49 1106 116E 11BC # 뭉 => 뭉 +BB4A 1106 116E 11BD # 뭊 => 뭊 +BB4B 1106 116E 11BE # 뭋 => 뭋 +BB4C 1106 116E 11BF # 뭌 => 뭌 +BB4D 1106 116E 11C0 # 뭍 => 뭍 +BB4E 1106 116E 11C1 # 뭎 => 뭎 +BB4F 1106 116E 11C2 # 뭏 => 뭏 +BB50 1106 116F # 뭐 => 뭐 +BB51 1106 116F 11A8 # 뭑 => 뭑 +BB52 1106 116F 11A9 # 뭒 => 뭒 +BB53 1106 116F 11AA # 뭓 => 뭓 +BB54 1106 116F 11AB # 뭔 => 뭔 +BB55 1106 116F 11AC # 뭕 => 뭕 +BB56 1106 116F 11AD # 뭖 => 뭖 +BB57 1106 116F 11AE # 뭗 => 뭗 +BB58 1106 116F 11AF # 뭘 => 뭘 +BB59 1106 116F 11B0 # 뭙 => 뭙 +BB5A 1106 116F 11B1 # 뭚 => 뭚 +BB5B 1106 116F 11B2 # 뭛 => 뭛 +BB5C 1106 116F 11B3 # 뭜 => 뭜 +BB5D 1106 116F 11B4 # 뭝 => 뭝 +BB5E 1106 116F 11B5 # 뭞 => 뭞 +BB5F 1106 116F 11B6 # 뭟 => 뭟 +BB60 1106 116F 11B7 # 뭠 => 뭠 +BB61 1106 116F 11B8 # 뭡 => 뭡 +BB62 1106 116F 11B9 # 뭢 => 뭢 +BB63 1106 116F 11BA # 뭣 => 뭣 +BB64 1106 116F 11BB # 뭤 => 뭤 +BB65 1106 116F 11BC # 뭥 => 뭥 +BB66 1106 116F 11BD # 뭦 => 뭦 +BB67 1106 116F 11BE # 뭧 => 뭧 +BB68 1106 116F 11BF # 뭨 => 뭨 +BB69 1106 116F 11C0 # 뭩 => 뭩 +BB6A 1106 116F 11C1 # 뭪 => 뭪 +BB6B 1106 116F 11C2 # 뭫 => 뭫 +BB6C 1106 1170 # 뭬 => 뭬 +BB6D 1106 1170 11A8 # 뭭 => 뭭 +BB6E 1106 1170 11A9 # 뭮 => 뭮 +BB6F 1106 1170 11AA # 뭯 => 뭯 +BB70 1106 1170 11AB # 뭰 => 뭰 +BB71 1106 1170 11AC # 뭱 => 뭱 +BB72 1106 1170 11AD # 뭲 => 뭲 +BB73 1106 1170 11AE # 뭳 => 뭳 +BB74 1106 1170 11AF # 뭴 => 뭴 +BB75 1106 1170 11B0 # 뭵 => 뭵 +BB76 1106 1170 11B1 # 뭶 => 뭶 +BB77 1106 1170 11B2 # 뭷 => 뭷 +BB78 1106 1170 11B3 # 뭸 => 뭸 +BB79 1106 1170 11B4 # 뭹 => 뭹 +BB7A 1106 1170 11B5 # 뭺 => 뭺 +BB7B 1106 1170 11B6 # 뭻 => 뭻 +BB7C 1106 1170 11B7 # 뭼 => 뭼 +BB7D 1106 1170 11B8 # 뭽 => 뭽 +BB7E 1106 1170 11B9 # 뭾 => 뭾 +BB7F 1106 1170 11BA # 뭿 => 뭿 +BB80 1106 1170 11BB # 뮀 => 뮀 +BB81 1106 1170 11BC # 뮁 => 뮁 +BB82 1106 1170 11BD # 뮂 => 뮂 +BB83 1106 1170 11BE # 뮃 => 뮃 +BB84 1106 1170 11BF # 뮄 => 뮄 +BB85 1106 1170 11C0 # 뮅 => 뮅 +BB86 1106 1170 11C1 # 뮆 => 뮆 +BB87 1106 1170 11C2 # 뮇 => 뮇 +BB88 1106 1171 # 뮈 => 뮈 +BB89 1106 1171 11A8 # 뮉 => 뮉 +BB8A 1106 1171 11A9 # 뮊 => 뮊 +BB8B 1106 1171 11AA # 뮋 => 뮋 +BB8C 1106 1171 11AB # 뮌 => 뮌 +BB8D 1106 1171 11AC # 뮍 => 뮍 +BB8E 1106 1171 11AD # 뮎 => 뮎 +BB8F 1106 1171 11AE # 뮏 => 뮏 +BB90 1106 1171 11AF # 뮐 => 뮐 +BB91 1106 1171 11B0 # 뮑 => 뮑 +BB92 1106 1171 11B1 # 뮒 => 뮒 +BB93 1106 1171 11B2 # 뮓 => 뮓 +BB94 1106 1171 11B3 # 뮔 => 뮔 +BB95 1106 1171 11B4 # 뮕 => 뮕 +BB96 1106 1171 11B5 # 뮖 => 뮖 +BB97 1106 1171 11B6 # 뮗 => 뮗 +BB98 1106 1171 11B7 # 뮘 => 뮘 +BB99 1106 1171 11B8 # 뮙 => 뮙 +BB9A 1106 1171 11B9 # 뮚 => 뮚 +BB9B 1106 1171 11BA # 뮛 => 뮛 +BB9C 1106 1171 11BB # 뮜 => 뮜 +BB9D 1106 1171 11BC # 뮝 => 뮝 +BB9E 1106 1171 11BD # 뮞 => 뮞 +BB9F 1106 1171 11BE # 뮟 => 뮟 +BBA0 1106 1171 11BF # 뮠 => 뮠 +BBA1 1106 1171 11C0 # 뮡 => 뮡 +BBA2 1106 1171 11C1 # 뮢 => 뮢 +BBA3 1106 1171 11C2 # 뮣 => 뮣 +BBA4 1106 1172 # 뮤 => 뮤 +BBA5 1106 1172 11A8 # 뮥 => 뮥 +BBA6 1106 1172 11A9 # 뮦 => 뮦 +BBA7 1106 1172 11AA # 뮧 => 뮧 +BBA8 1106 1172 11AB # 뮨 => 뮨 +BBA9 1106 1172 11AC # 뮩 => 뮩 +BBAA 1106 1172 11AD # 뮪 => 뮪 +BBAB 1106 1172 11AE # 뮫 => 뮫 +BBAC 1106 1172 11AF # 뮬 => 뮬 +BBAD 1106 1172 11B0 # 뮭 => 뮭 +BBAE 1106 1172 11B1 # 뮮 => 뮮 +BBAF 1106 1172 11B2 # 뮯 => 뮯 +BBB0 1106 1172 11B3 # 뮰 => 뮰 +BBB1 1106 1172 11B4 # 뮱 => 뮱 +BBB2 1106 1172 11B5 # 뮲 => 뮲 +BBB3 1106 1172 11B6 # 뮳 => 뮳 +BBB4 1106 1172 11B7 # 뮴 => 뮴 +BBB5 1106 1172 11B8 # 뮵 => 뮵 +BBB6 1106 1172 11B9 # 뮶 => 뮶 +BBB7 1106 1172 11BA # 뮷 => 뮷 +BBB8 1106 1172 11BB # 뮸 => 뮸 +BBB9 1106 1172 11BC # 뮹 => 뮹 +BBBA 1106 1172 11BD # 뮺 => 뮺 +BBBB 1106 1172 11BE # 뮻 => 뮻 +BBBC 1106 1172 11BF # 뮼 => 뮼 +BBBD 1106 1172 11C0 # 뮽 => 뮽 +BBBE 1106 1172 11C1 # 뮾 => 뮾 +BBBF 1106 1172 11C2 # 뮿 => 뮿 +BBC0 1106 1173 # 므 => 므 +BBC1 1106 1173 11A8 # 믁 => 믁 +BBC2 1106 1173 11A9 # 믂 => 믂 +BBC3 1106 1173 11AA # 믃 => 믃 +BBC4 1106 1173 11AB # 믄 => 믄 +BBC5 1106 1173 11AC # 믅 => 믅 +BBC6 1106 1173 11AD # 믆 => 믆 +BBC7 1106 1173 11AE # 믇 => 믇 +BBC8 1106 1173 11AF # 믈 => 믈 +BBC9 1106 1173 11B0 # 믉 => 믉 +BBCA 1106 1173 11B1 # 믊 => 믊 +BBCB 1106 1173 11B2 # 믋 => 믋 +BBCC 1106 1173 11B3 # 믌 => 믌 +BBCD 1106 1173 11B4 # 믍 => 믍 +BBCE 1106 1173 11B5 # 믎 => 믎 +BBCF 1106 1173 11B6 # 믏 => 믏 +BBD0 1106 1173 11B7 # 믐 => 믐 +BBD1 1106 1173 11B8 # 믑 => 믑 +BBD2 1106 1173 11B9 # 믒 => 믒 +BBD3 1106 1173 11BA # 믓 => 믓 +BBD4 1106 1173 11BB # 믔 => 믔 +BBD5 1106 1173 11BC # 믕 => 믕 +BBD6 1106 1173 11BD # 믖 => 믖 +BBD7 1106 1173 11BE # 믗 => 믗 +BBD8 1106 1173 11BF # 믘 => 믘 +BBD9 1106 1173 11C0 # 믙 => 믙 +BBDA 1106 1173 11C1 # 믚 => 믚 +BBDB 1106 1173 11C2 # 믛 => 믛 +BBDC 1106 1174 # 믜 => 믜 +BBDD 1106 1174 11A8 # 믝 => 믝 +BBDE 1106 1174 11A9 # 믞 => 믞 +BBDF 1106 1174 11AA # 믟 => 믟 +BBE0 1106 1174 11AB # 믠 => 믠 +BBE1 1106 1174 11AC # 믡 => 믡 +BBE2 1106 1174 11AD # 믢 => 믢 +BBE3 1106 1174 11AE # 믣 => 믣 +BBE4 1106 1174 11AF # 믤 => 믤 +BBE5 1106 1174 11B0 # 믥 => 믥 +BBE6 1106 1174 11B1 # 믦 => 믦 +BBE7 1106 1174 11B2 # 믧 => 믧 +BBE8 1106 1174 11B3 # 믨 => 믨 +BBE9 1106 1174 11B4 # 믩 => 믩 +BBEA 1106 1174 11B5 # 믪 => 믪 +BBEB 1106 1174 11B6 # 믫 => 믫 +BBEC 1106 1174 11B7 # 믬 => 믬 +BBED 1106 1174 11B8 # 믭 => 믭 +BBEE 1106 1174 11B9 # 믮 => 믮 +BBEF 1106 1174 11BA # 믯 => 믯 +BBF0 1106 1174 11BB # 믰 => 믰 +BBF1 1106 1174 11BC # 믱 => 믱 +BBF2 1106 1174 11BD # 믲 => 믲 +BBF3 1106 1174 11BE # 믳 => 믳 +BBF4 1106 1174 11BF # 믴 => 믴 +BBF5 1106 1174 11C0 # 믵 => 믵 +BBF6 1106 1174 11C1 # 믶 => 믶 +BBF7 1106 1174 11C2 # 믷 => 믷 +BBF8 1106 1175 # 미 => 미 +BBF9 1106 1175 11A8 # 믹 => 믹 +BBFA 1106 1175 11A9 # 믺 => 믺 +BBFB 1106 1175 11AA # 믻 => 믻 +BBFC 1106 1175 11AB # 민 => 민 +BBFD 1106 1175 11AC # 믽 => 믽 +BBFE 1106 1175 11AD # 믾 => 믾 +BBFF 1106 1175 11AE # 믿 => 믿 +BC00 1106 1175 11AF # 밀 => 밀 +BC01 1106 1175 11B0 # 밁 => 밁 +BC02 1106 1175 11B1 # 밂 => 밂 +BC03 1106 1175 11B2 # 밃 => 밃 +BC04 1106 1175 11B3 # 밄 => 밄 +BC05 1106 1175 11B4 # 밅 => 밅 +BC06 1106 1175 11B5 # 밆 => 밆 +BC07 1106 1175 11B6 # 밇 => 밇 +BC08 1106 1175 11B7 # 밈 => 밈 +BC09 1106 1175 11B8 # 밉 => 밉 +BC0A 1106 1175 11B9 # 밊 => 밊 +BC0B 1106 1175 11BA # 밋 => 밋 +BC0C 1106 1175 11BB # 밌 => 밌 +BC0D 1106 1175 11BC # 밍 => 밍 +BC0E 1106 1175 11BD # 밎 => 밎 +BC0F 1106 1175 11BE # 및 => 및 +BC10 1106 1175 11BF # 밐 => 밐 +BC11 1106 1175 11C0 # 밑 => 밑 +BC12 1106 1175 11C1 # 밒 => 밒 +BC13 1106 1175 11C2 # 밓 => 밓 +BC14 1107 1161 # 바 => 바 +BC15 1107 1161 11A8 # 박 => 박 +BC16 1107 1161 11A9 # 밖 => 밖 +BC17 1107 1161 11AA # 밗 => 밗 +BC18 1107 1161 11AB # 반 => 반 +BC19 1107 1161 11AC # 밙 => 밙 +BC1A 1107 1161 11AD # 밚 => 밚 +BC1B 1107 1161 11AE # 받 => 받 +BC1C 1107 1161 11AF # 발 => 발 +BC1D 1107 1161 11B0 # 밝 => 밝 +BC1E 1107 1161 11B1 # 밞 => 밞 +BC1F 1107 1161 11B2 # 밟 => 밟 +BC20 1107 1161 11B3 # 밠 => 밠 +BC21 1107 1161 11B4 # 밡 => 밡 +BC22 1107 1161 11B5 # 밢 => 밢 +BC23 1107 1161 11B6 # 밣 => 밣 +BC24 1107 1161 11B7 # 밤 => 밤 +BC25 1107 1161 11B8 # 밥 => 밥 +BC26 1107 1161 11B9 # 밦 => 밦 +BC27 1107 1161 11BA # 밧 => 밧 +BC28 1107 1161 11BB # 밨 => 밨 +BC29 1107 1161 11BC # 방 => 방 +BC2A 1107 1161 11BD # 밪 => 밪 +BC2B 1107 1161 11BE # 밫 => 밫 +BC2C 1107 1161 11BF # 밬 => 밬 +BC2D 1107 1161 11C0 # 밭 => 밭 +BC2E 1107 1161 11C1 # 밮 => 밮 +BC2F 1107 1161 11C2 # 밯 => 밯 +BC30 1107 1162 # 배 => 배 +BC31 1107 1162 11A8 # 백 => 백 +BC32 1107 1162 11A9 # 밲 => 밲 +BC33 1107 1162 11AA # 밳 => 밳 +BC34 1107 1162 11AB # 밴 => 밴 +BC35 1107 1162 11AC # 밵 => 밵 +BC36 1107 1162 11AD # 밶 => 밶 +BC37 1107 1162 11AE # 밷 => 밷 +BC38 1107 1162 11AF # 밸 => 밸 +BC39 1107 1162 11B0 # 밹 => 밹 +BC3A 1107 1162 11B1 # 밺 => 밺 +BC3B 1107 1162 11B2 # 밻 => 밻 +BC3C 1107 1162 11B3 # 밼 => 밼 +BC3D 1107 1162 11B4 # 밽 => 밽 +BC3E 1107 1162 11B5 # 밾 => 밾 +BC3F 1107 1162 11B6 # 밿 => 밿 +BC40 1107 1162 11B7 # 뱀 => 뱀 +BC41 1107 1162 11B8 # 뱁 => 뱁 +BC42 1107 1162 11B9 # 뱂 => 뱂 +BC43 1107 1162 11BA # 뱃 => 뱃 +BC44 1107 1162 11BB # 뱄 => 뱄 +BC45 1107 1162 11BC # 뱅 => 뱅 +BC46 1107 1162 11BD # 뱆 => 뱆 +BC47 1107 1162 11BE # 뱇 => 뱇 +BC48 1107 1162 11BF # 뱈 => 뱈 +BC49 1107 1162 11C0 # 뱉 => 뱉 +BC4A 1107 1162 11C1 # 뱊 => 뱊 +BC4B 1107 1162 11C2 # 뱋 => 뱋 +BC4C 1107 1163 # 뱌 => 뱌 +BC4D 1107 1163 11A8 # 뱍 => 뱍 +BC4E 1107 1163 11A9 # 뱎 => 뱎 +BC4F 1107 1163 11AA # 뱏 => 뱏 +BC50 1107 1163 11AB # 뱐 => 뱐 +BC51 1107 1163 11AC # 뱑 => 뱑 +BC52 1107 1163 11AD # 뱒 => 뱒 +BC53 1107 1163 11AE # 뱓 => 뱓 +BC54 1107 1163 11AF # 뱔 => 뱔 +BC55 1107 1163 11B0 # 뱕 => 뱕 +BC56 1107 1163 11B1 # 뱖 => 뱖 +BC57 1107 1163 11B2 # 뱗 => 뱗 +BC58 1107 1163 11B3 # 뱘 => 뱘 +BC59 1107 1163 11B4 # 뱙 => 뱙 +BC5A 1107 1163 11B5 # 뱚 => 뱚 +BC5B 1107 1163 11B6 # 뱛 => 뱛 +BC5C 1107 1163 11B7 # 뱜 => 뱜 +BC5D 1107 1163 11B8 # 뱝 => 뱝 +BC5E 1107 1163 11B9 # 뱞 => 뱞 +BC5F 1107 1163 11BA # 뱟 => 뱟 +BC60 1107 1163 11BB # 뱠 => 뱠 +BC61 1107 1163 11BC # 뱡 => 뱡 +BC62 1107 1163 11BD # 뱢 => 뱢 +BC63 1107 1163 11BE # 뱣 => 뱣 +BC64 1107 1163 11BF # 뱤 => 뱤 +BC65 1107 1163 11C0 # 뱥 => 뱥 +BC66 1107 1163 11C1 # 뱦 => 뱦 +BC67 1107 1163 11C2 # 뱧 => 뱧 +BC68 1107 1164 # 뱨 => 뱨 +BC69 1107 1164 11A8 # 뱩 => 뱩 +BC6A 1107 1164 11A9 # 뱪 => 뱪 +BC6B 1107 1164 11AA # 뱫 => 뱫 +BC6C 1107 1164 11AB # 뱬 => 뱬 +BC6D 1107 1164 11AC # 뱭 => 뱭 +BC6E 1107 1164 11AD # 뱮 => 뱮 +BC6F 1107 1164 11AE # 뱯 => 뱯 +BC70 1107 1164 11AF # 뱰 => 뱰 +BC71 1107 1164 11B0 # 뱱 => 뱱 +BC72 1107 1164 11B1 # 뱲 => 뱲 +BC73 1107 1164 11B2 # 뱳 => 뱳 +BC74 1107 1164 11B3 # 뱴 => 뱴 +BC75 1107 1164 11B4 # 뱵 => 뱵 +BC76 1107 1164 11B5 # 뱶 => 뱶 +BC77 1107 1164 11B6 # 뱷 => 뱷 +BC78 1107 1164 11B7 # 뱸 => 뱸 +BC79 1107 1164 11B8 # 뱹 => 뱹 +BC7A 1107 1164 11B9 # 뱺 => 뱺 +BC7B 1107 1164 11BA # 뱻 => 뱻 +BC7C 1107 1164 11BB # 뱼 => 뱼 +BC7D 1107 1164 11BC # 뱽 => 뱽 +BC7E 1107 1164 11BD # 뱾 => 뱾 +BC7F 1107 1164 11BE # 뱿 => 뱿 +BC80 1107 1164 11BF # 벀 => 벀 +BC81 1107 1164 11C0 # 벁 => 벁 +BC82 1107 1164 11C1 # 벂 => 벂 +BC83 1107 1164 11C2 # 벃 => 벃 +BC84 1107 1165 # 버 => 버 +BC85 1107 1165 11A8 # 벅 => 벅 +BC86 1107 1165 11A9 # 벆 => 벆 +BC87 1107 1165 11AA # 벇 => 벇 +BC88 1107 1165 11AB # 번 => 번 +BC89 1107 1165 11AC # 벉 => 벉 +BC8A 1107 1165 11AD # 벊 => 벊 +BC8B 1107 1165 11AE # 벋 => 벋 +BC8C 1107 1165 11AF # 벌 => 벌 +BC8D 1107 1165 11B0 # 벍 => 벍 +BC8E 1107 1165 11B1 # 벎 => 벎 +BC8F 1107 1165 11B2 # 벏 => 벏 +BC90 1107 1165 11B3 # 벐 => 벐 +BC91 1107 1165 11B4 # 벑 => 벑 +BC92 1107 1165 11B5 # 벒 => 벒 +BC93 1107 1165 11B6 # 벓 => 벓 +BC94 1107 1165 11B7 # 범 => 범 +BC95 1107 1165 11B8 # 법 => 법 +BC96 1107 1165 11B9 # 벖 => 벖 +BC97 1107 1165 11BA # 벗 => 벗 +BC98 1107 1165 11BB # 벘 => 벘 +BC99 1107 1165 11BC # 벙 => 벙 +BC9A 1107 1165 11BD # 벚 => 벚 +BC9B 1107 1165 11BE # 벛 => 벛 +BC9C 1107 1165 11BF # 벜 => 벜 +BC9D 1107 1165 11C0 # 벝 => 벝 +BC9E 1107 1165 11C1 # 벞 => 벞 +BC9F 1107 1165 11C2 # 벟 => 벟 +BCA0 1107 1166 # 베 => 베 +BCA1 1107 1166 11A8 # 벡 => 벡 +BCA2 1107 1166 11A9 # 벢 => 벢 +BCA3 1107 1166 11AA # 벣 => 벣 +BCA4 1107 1166 11AB # 벤 => 벤 +BCA5 1107 1166 11AC # 벥 => 벥 +BCA6 1107 1166 11AD # 벦 => 벦 +BCA7 1107 1166 11AE # 벧 => 벧 +BCA8 1107 1166 11AF # 벨 => 벨 +BCA9 1107 1166 11B0 # 벩 => 벩 +BCAA 1107 1166 11B1 # 벪 => 벪 +BCAB 1107 1166 11B2 # 벫 => 벫 +BCAC 1107 1166 11B3 # 벬 => 벬 +BCAD 1107 1166 11B4 # 벭 => 벭 +BCAE 1107 1166 11B5 # 벮 => 벮 +BCAF 1107 1166 11B6 # 벯 => 벯 +BCB0 1107 1166 11B7 # 벰 => 벰 +BCB1 1107 1166 11B8 # 벱 => 벱 +BCB2 1107 1166 11B9 # 벲 => 벲 +BCB3 1107 1166 11BA # 벳 => 벳 +BCB4 1107 1166 11BB # 벴 => 벴 +BCB5 1107 1166 11BC # 벵 => 벵 +BCB6 1107 1166 11BD # 벶 => 벶 +BCB7 1107 1166 11BE # 벷 => 벷 +BCB8 1107 1166 11BF # 벸 => 벸 +BCB9 1107 1166 11C0 # 벹 => 벹 +BCBA 1107 1166 11C1 # 벺 => 벺 +BCBB 1107 1166 11C2 # 벻 => 벻 +BCBC 1107 1167 # 벼 => 벼 +BCBD 1107 1167 11A8 # 벽 => 벽 +BCBE 1107 1167 11A9 # 벾 => 벾 +BCBF 1107 1167 11AA # 벿 => 벿 +BCC0 1107 1167 11AB # 변 => 변 +BCC1 1107 1167 11AC # 볁 => 볁 +BCC2 1107 1167 11AD # 볂 => 볂 +BCC3 1107 1167 11AE # 볃 => 볃 +BCC4 1107 1167 11AF # 별 => 별 +BCC5 1107 1167 11B0 # 볅 => 볅 +BCC6 1107 1167 11B1 # 볆 => 볆 +BCC7 1107 1167 11B2 # 볇 => 볇 +BCC8 1107 1167 11B3 # 볈 => 볈 +BCC9 1107 1167 11B4 # 볉 => 볉 +BCCA 1107 1167 11B5 # 볊 => 볊 +BCCB 1107 1167 11B6 # 볋 => 볋 +BCCC 1107 1167 11B7 # 볌 => 볌 +BCCD 1107 1167 11B8 # 볍 => 볍 +BCCE 1107 1167 11B9 # 볎 => 볎 +BCCF 1107 1167 11BA # 볏 => 볏 +BCD0 1107 1167 11BB # 볐 => 볐 +BCD1 1107 1167 11BC # 병 => 병 +BCD2 1107 1167 11BD # 볒 => 볒 +BCD3 1107 1167 11BE # 볓 => 볓 +BCD4 1107 1167 11BF # 볔 => 볔 +BCD5 1107 1167 11C0 # 볕 => 볕 +BCD6 1107 1167 11C1 # 볖 => 볖 +BCD7 1107 1167 11C2 # 볗 => 볗 +BCD8 1107 1168 # 볘 => 볘 +BCD9 1107 1168 11A8 # 볙 => 볙 +BCDA 1107 1168 11A9 # 볚 => 볚 +BCDB 1107 1168 11AA # 볛 => 볛 +BCDC 1107 1168 11AB # 볜 => 볜 +BCDD 1107 1168 11AC # 볝 => 볝 +BCDE 1107 1168 11AD # 볞 => 볞 +BCDF 1107 1168 11AE # 볟 => 볟 +BCE0 1107 1168 11AF # 볠 => 볠 +BCE1 1107 1168 11B0 # 볡 => 볡 +BCE2 1107 1168 11B1 # 볢 => 볢 +BCE3 1107 1168 11B2 # 볣 => 볣 +BCE4 1107 1168 11B3 # 볤 => 볤 +BCE5 1107 1168 11B4 # 볥 => 볥 +BCE6 1107 1168 11B5 # 볦 => 볦 +BCE7 1107 1168 11B6 # 볧 => 볧 +BCE8 1107 1168 11B7 # 볨 => 볨 +BCE9 1107 1168 11B8 # 볩 => 볩 +BCEA 1107 1168 11B9 # 볪 => 볪 +BCEB 1107 1168 11BA # 볫 => 볫 +BCEC 1107 1168 11BB # 볬 => 볬 +BCED 1107 1168 11BC # 볭 => 볭 +BCEE 1107 1168 11BD # 볮 => 볮 +BCEF 1107 1168 11BE # 볯 => 볯 +BCF0 1107 1168 11BF # 볰 => 볰 +BCF1 1107 1168 11C0 # 볱 => 볱 +BCF2 1107 1168 11C1 # 볲 => 볲 +BCF3 1107 1168 11C2 # 볳 => 볳 +BCF4 1107 1169 # 보 => 보 +BCF5 1107 1169 11A8 # 복 => 복 +BCF6 1107 1169 11A9 # 볶 => 볶 +BCF7 1107 1169 11AA # 볷 => 볷 +BCF8 1107 1169 11AB # 본 => 본 +BCF9 1107 1169 11AC # 볹 => 볹 +BCFA 1107 1169 11AD # 볺 => 볺 +BCFB 1107 1169 11AE # 볻 => 볻 +BCFC 1107 1169 11AF # 볼 => 볼 +BCFD 1107 1169 11B0 # 볽 => 볽 +BCFE 1107 1169 11B1 # 볾 => 볾 +BCFF 1107 1169 11B2 # 볿 => 볿 +BD00 1107 1169 11B3 # 봀 => 봀 +BD01 1107 1169 11B4 # 봁 => 봁 +BD02 1107 1169 11B5 # 봂 => 봂 +BD03 1107 1169 11B6 # 봃 => 봃 +BD04 1107 1169 11B7 # 봄 => 봄 +BD05 1107 1169 11B8 # 봅 => 봅 +BD06 1107 1169 11B9 # 봆 => 봆 +BD07 1107 1169 11BA # 봇 => 봇 +BD08 1107 1169 11BB # 봈 => 봈 +BD09 1107 1169 11BC # 봉 => 봉 +BD0A 1107 1169 11BD # 봊 => 봊 +BD0B 1107 1169 11BE # 봋 => 봋 +BD0C 1107 1169 11BF # 봌 => 봌 +BD0D 1107 1169 11C0 # 봍 => 봍 +BD0E 1107 1169 11C1 # 봎 => 봎 +BD0F 1107 1169 11C2 # 봏 => 봏 +BD10 1107 116A # 봐 => 봐 +BD11 1107 116A 11A8 # 봑 => 봑 +BD12 1107 116A 11A9 # 봒 => 봒 +BD13 1107 116A 11AA # 봓 => 봓 +BD14 1107 116A 11AB # 봔 => 봔 +BD15 1107 116A 11AC # 봕 => 봕 +BD16 1107 116A 11AD # 봖 => 봖 +BD17 1107 116A 11AE # 봗 => 봗 +BD18 1107 116A 11AF # 봘 => 봘 +BD19 1107 116A 11B0 # 봙 => 봙 +BD1A 1107 116A 11B1 # 봚 => 봚 +BD1B 1107 116A 11B2 # 봛 => 봛 +BD1C 1107 116A 11B3 # 봜 => 봜 +BD1D 1107 116A 11B4 # 봝 => 봝 +BD1E 1107 116A 11B5 # 봞 => 봞 +BD1F 1107 116A 11B6 # 봟 => 봟 +BD20 1107 116A 11B7 # 봠 => 봠 +BD21 1107 116A 11B8 # 봡 => 봡 +BD22 1107 116A 11B9 # 봢 => 봢 +BD23 1107 116A 11BA # 봣 => 봣 +BD24 1107 116A 11BB # 봤 => 봤 +BD25 1107 116A 11BC # 봥 => 봥 +BD26 1107 116A 11BD # 봦 => 봦 +BD27 1107 116A 11BE # 봧 => 봧 +BD28 1107 116A 11BF # 봨 => 봨 +BD29 1107 116A 11C0 # 봩 => 봩 +BD2A 1107 116A 11C1 # 봪 => 봪 +BD2B 1107 116A 11C2 # 봫 => 봫 +BD2C 1107 116B # 봬 => 봬 +BD2D 1107 116B 11A8 # 봭 => 봭 +BD2E 1107 116B 11A9 # 봮 => 봮 +BD2F 1107 116B 11AA # 봯 => 봯 +BD30 1107 116B 11AB # 봰 => 봰 +BD31 1107 116B 11AC # 봱 => 봱 +BD32 1107 116B 11AD # 봲 => 봲 +BD33 1107 116B 11AE # 봳 => 봳 +BD34 1107 116B 11AF # 봴 => 봴 +BD35 1107 116B 11B0 # 봵 => 봵 +BD36 1107 116B 11B1 # 봶 => 봶 +BD37 1107 116B 11B2 # 봷 => 봷 +BD38 1107 116B 11B3 # 봸 => 봸 +BD39 1107 116B 11B4 # 봹 => 봹 +BD3A 1107 116B 11B5 # 봺 => 봺 +BD3B 1107 116B 11B6 # 봻 => 봻 +BD3C 1107 116B 11B7 # 봼 => 봼 +BD3D 1107 116B 11B8 # 봽 => 봽 +BD3E 1107 116B 11B9 # 봾 => 봾 +BD3F 1107 116B 11BA # 봿 => 봿 +BD40 1107 116B 11BB # 뵀 => 뵀 +BD41 1107 116B 11BC # 뵁 => 뵁 +BD42 1107 116B 11BD # 뵂 => 뵂 +BD43 1107 116B 11BE # 뵃 => 뵃 +BD44 1107 116B 11BF # 뵄 => 뵄 +BD45 1107 116B 11C0 # 뵅 => 뵅 +BD46 1107 116B 11C1 # 뵆 => 뵆 +BD47 1107 116B 11C2 # 뵇 => 뵇 +BD48 1107 116C # 뵈 => 뵈 +BD49 1107 116C 11A8 # 뵉 => 뵉 +BD4A 1107 116C 11A9 # 뵊 => 뵊 +BD4B 1107 116C 11AA # 뵋 => 뵋 +BD4C 1107 116C 11AB # 뵌 => 뵌 +BD4D 1107 116C 11AC # 뵍 => 뵍 +BD4E 1107 116C 11AD # 뵎 => 뵎 +BD4F 1107 116C 11AE # 뵏 => 뵏 +BD50 1107 116C 11AF # 뵐 => 뵐 +BD51 1107 116C 11B0 # 뵑 => 뵑 +BD52 1107 116C 11B1 # 뵒 => 뵒 +BD53 1107 116C 11B2 # 뵓 => 뵓 +BD54 1107 116C 11B3 # 뵔 => 뵔 +BD55 1107 116C 11B4 # 뵕 => 뵕 +BD56 1107 116C 11B5 # 뵖 => 뵖 +BD57 1107 116C 11B6 # 뵗 => 뵗 +BD58 1107 116C 11B7 # 뵘 => 뵘 +BD59 1107 116C 11B8 # 뵙 => 뵙 +BD5A 1107 116C 11B9 # 뵚 => 뵚 +BD5B 1107 116C 11BA # 뵛 => 뵛 +BD5C 1107 116C 11BB # 뵜 => 뵜 +BD5D 1107 116C 11BC # 뵝 => 뵝 +BD5E 1107 116C 11BD # 뵞 => 뵞 +BD5F 1107 116C 11BE # 뵟 => 뵟 +BD60 1107 116C 11BF # 뵠 => 뵠 +BD61 1107 116C 11C0 # 뵡 => 뵡 +BD62 1107 116C 11C1 # 뵢 => 뵢 +BD63 1107 116C 11C2 # 뵣 => 뵣 +BD64 1107 116D # 뵤 => 뵤 +BD65 1107 116D 11A8 # 뵥 => 뵥 +BD66 1107 116D 11A9 # 뵦 => 뵦 +BD67 1107 116D 11AA # 뵧 => 뵧 +BD68 1107 116D 11AB # 뵨 => 뵨 +BD69 1107 116D 11AC # 뵩 => 뵩 +BD6A 1107 116D 11AD # 뵪 => 뵪 +BD6B 1107 116D 11AE # 뵫 => 뵫 +BD6C 1107 116D 11AF # 뵬 => 뵬 +BD6D 1107 116D 11B0 # 뵭 => 뵭 +BD6E 1107 116D 11B1 # 뵮 => 뵮 +BD6F 1107 116D 11B2 # 뵯 => 뵯 +BD70 1107 116D 11B3 # 뵰 => 뵰 +BD71 1107 116D 11B4 # 뵱 => 뵱 +BD72 1107 116D 11B5 # 뵲 => 뵲 +BD73 1107 116D 11B6 # 뵳 => 뵳 +BD74 1107 116D 11B7 # 뵴 => 뵴 +BD75 1107 116D 11B8 # 뵵 => 뵵 +BD76 1107 116D 11B9 # 뵶 => 뵶 +BD77 1107 116D 11BA # 뵷 => 뵷 +BD78 1107 116D 11BB # 뵸 => 뵸 +BD79 1107 116D 11BC # 뵹 => 뵹 +BD7A 1107 116D 11BD # 뵺 => 뵺 +BD7B 1107 116D 11BE # 뵻 => 뵻 +BD7C 1107 116D 11BF # 뵼 => 뵼 +BD7D 1107 116D 11C0 # 뵽 => 뵽 +BD7E 1107 116D 11C1 # 뵾 => 뵾 +BD7F 1107 116D 11C2 # 뵿 => 뵿 +BD80 1107 116E # 부 => 부 +BD81 1107 116E 11A8 # 북 => 북 +BD82 1107 116E 11A9 # 붂 => 붂 +BD83 1107 116E 11AA # 붃 => 붃 +BD84 1107 116E 11AB # 분 => 분 +BD85 1107 116E 11AC # 붅 => 붅 +BD86 1107 116E 11AD # 붆 => 붆 +BD87 1107 116E 11AE # 붇 => 붇 +BD88 1107 116E 11AF # 불 => 불 +BD89 1107 116E 11B0 # 붉 => 붉 +BD8A 1107 116E 11B1 # 붊 => 붊 +BD8B 1107 116E 11B2 # 붋 => 붋 +BD8C 1107 116E 11B3 # 붌 => 붌 +BD8D 1107 116E 11B4 # 붍 => 붍 +BD8E 1107 116E 11B5 # 붎 => 붎 +BD8F 1107 116E 11B6 # 붏 => 붏 +BD90 1107 116E 11B7 # 붐 => 붐 +BD91 1107 116E 11B8 # 붑 => 붑 +BD92 1107 116E 11B9 # 붒 => 붒 +BD93 1107 116E 11BA # 붓 => 붓 +BD94 1107 116E 11BB # 붔 => 붔 +BD95 1107 116E 11BC # 붕 => 붕 +BD96 1107 116E 11BD # 붖 => 붖 +BD97 1107 116E 11BE # 붗 => 붗 +BD98 1107 116E 11BF # 붘 => 붘 +BD99 1107 116E 11C0 # 붙 => 붙 +BD9A 1107 116E 11C1 # 붚 => 붚 +BD9B 1107 116E 11C2 # 붛 => 붛 +BD9C 1107 116F # 붜 => 붜 +BD9D 1107 116F 11A8 # 붝 => 붝 +BD9E 1107 116F 11A9 # 붞 => 붞 +BD9F 1107 116F 11AA # 붟 => 붟 +BDA0 1107 116F 11AB # 붠 => 붠 +BDA1 1107 116F 11AC # 붡 => 붡 +BDA2 1107 116F 11AD # 붢 => 붢 +BDA3 1107 116F 11AE # 붣 => 붣 +BDA4 1107 116F 11AF # 붤 => 붤 +BDA5 1107 116F 11B0 # 붥 => 붥 +BDA6 1107 116F 11B1 # 붦 => 붦 +BDA7 1107 116F 11B2 # 붧 => 붧 +BDA8 1107 116F 11B3 # 붨 => 붨 +BDA9 1107 116F 11B4 # 붩 => 붩 +BDAA 1107 116F 11B5 # 붪 => 붪 +BDAB 1107 116F 11B6 # 붫 => 붫 +BDAC 1107 116F 11B7 # 붬 => 붬 +BDAD 1107 116F 11B8 # 붭 => 붭 +BDAE 1107 116F 11B9 # 붮 => 붮 +BDAF 1107 116F 11BA # 붯 => 붯 +BDB0 1107 116F 11BB # 붰 => 붰 +BDB1 1107 116F 11BC # 붱 => 붱 +BDB2 1107 116F 11BD # 붲 => 붲 +BDB3 1107 116F 11BE # 붳 => 붳 +BDB4 1107 116F 11BF # 붴 => 붴 +BDB5 1107 116F 11C0 # 붵 => 붵 +BDB6 1107 116F 11C1 # 붶 => 붶 +BDB7 1107 116F 11C2 # 붷 => 붷 +BDB8 1107 1170 # 붸 => 붸 +BDB9 1107 1170 11A8 # 붹 => 붹 +BDBA 1107 1170 11A9 # 붺 => 붺 +BDBB 1107 1170 11AA # 붻 => 붻 +BDBC 1107 1170 11AB # 붼 => 붼 +BDBD 1107 1170 11AC # 붽 => 붽 +BDBE 1107 1170 11AD # 붾 => 붾 +BDBF 1107 1170 11AE # 붿 => 붿 +BDC0 1107 1170 11AF # 뷀 => 뷀 +BDC1 1107 1170 11B0 # 뷁 => 뷁 +BDC2 1107 1170 11B1 # 뷂 => 뷂 +BDC3 1107 1170 11B2 # 뷃 => 뷃 +BDC4 1107 1170 11B3 # 뷄 => 뷄 +BDC5 1107 1170 11B4 # 뷅 => 뷅 +BDC6 1107 1170 11B5 # 뷆 => 뷆 +BDC7 1107 1170 11B6 # 뷇 => 뷇 +BDC8 1107 1170 11B7 # 뷈 => 뷈 +BDC9 1107 1170 11B8 # 뷉 => 뷉 +BDCA 1107 1170 11B9 # 뷊 => 뷊 +BDCB 1107 1170 11BA # 뷋 => 뷋 +BDCC 1107 1170 11BB # 뷌 => 뷌 +BDCD 1107 1170 11BC # 뷍 => 뷍 +BDCE 1107 1170 11BD # 뷎 => 뷎 +BDCF 1107 1170 11BE # 뷏 => 뷏 +BDD0 1107 1170 11BF # 뷐 => 뷐 +BDD1 1107 1170 11C0 # 뷑 => 뷑 +BDD2 1107 1170 11C1 # 뷒 => 뷒 +BDD3 1107 1170 11C2 # 뷓 => 뷓 +BDD4 1107 1171 # 뷔 => 뷔 +BDD5 1107 1171 11A8 # 뷕 => 뷕 +BDD6 1107 1171 11A9 # 뷖 => 뷖 +BDD7 1107 1171 11AA # 뷗 => 뷗 +BDD8 1107 1171 11AB # 뷘 => 뷘 +BDD9 1107 1171 11AC # 뷙 => 뷙 +BDDA 1107 1171 11AD # 뷚 => 뷚 +BDDB 1107 1171 11AE # 뷛 => 뷛 +BDDC 1107 1171 11AF # 뷜 => 뷜 +BDDD 1107 1171 11B0 # 뷝 => 뷝 +BDDE 1107 1171 11B1 # 뷞 => 뷞 +BDDF 1107 1171 11B2 # 뷟 => 뷟 +BDE0 1107 1171 11B3 # 뷠 => 뷠 +BDE1 1107 1171 11B4 # 뷡 => 뷡 +BDE2 1107 1171 11B5 # 뷢 => 뷢 +BDE3 1107 1171 11B6 # 뷣 => 뷣 +BDE4 1107 1171 11B7 # 뷤 => 뷤 +BDE5 1107 1171 11B8 # 뷥 => 뷥 +BDE6 1107 1171 11B9 # 뷦 => 뷦 +BDE7 1107 1171 11BA # 뷧 => 뷧 +BDE8 1107 1171 11BB # 뷨 => 뷨 +BDE9 1107 1171 11BC # 뷩 => 뷩 +BDEA 1107 1171 11BD # 뷪 => 뷪 +BDEB 1107 1171 11BE # 뷫 => 뷫 +BDEC 1107 1171 11BF # 뷬 => 뷬 +BDED 1107 1171 11C0 # 뷭 => 뷭 +BDEE 1107 1171 11C1 # 뷮 => 뷮 +BDEF 1107 1171 11C2 # 뷯 => 뷯 +BDF0 1107 1172 # 뷰 => 뷰 +BDF1 1107 1172 11A8 # 뷱 => 뷱 +BDF2 1107 1172 11A9 # 뷲 => 뷲 +BDF3 1107 1172 11AA # 뷳 => 뷳 +BDF4 1107 1172 11AB # 뷴 => 뷴 +BDF5 1107 1172 11AC # 뷵 => 뷵 +BDF6 1107 1172 11AD # 뷶 => 뷶 +BDF7 1107 1172 11AE # 뷷 => 뷷 +BDF8 1107 1172 11AF # 뷸 => 뷸 +BDF9 1107 1172 11B0 # 뷹 => 뷹 +BDFA 1107 1172 11B1 # 뷺 => 뷺 +BDFB 1107 1172 11B2 # 뷻 => 뷻 +BDFC 1107 1172 11B3 # 뷼 => 뷼 +BDFD 1107 1172 11B4 # 뷽 => 뷽 +BDFE 1107 1172 11B5 # 뷾 => 뷾 +BDFF 1107 1172 11B6 # 뷿 => 뷿 +BE00 1107 1172 11B7 # 븀 => 븀 +BE01 1107 1172 11B8 # 븁 => 븁 +BE02 1107 1172 11B9 # 븂 => 븂 +BE03 1107 1172 11BA # 븃 => 븃 +BE04 1107 1172 11BB # 븄 => 븄 +BE05 1107 1172 11BC # 븅 => 븅 +BE06 1107 1172 11BD # 븆 => 븆 +BE07 1107 1172 11BE # 븇 => 븇 +BE08 1107 1172 11BF # 븈 => 븈 +BE09 1107 1172 11C0 # 븉 => 븉 +BE0A 1107 1172 11C1 # 븊 => 븊 +BE0B 1107 1172 11C2 # 븋 => 븋 +BE0C 1107 1173 # 브 => 브 +BE0D 1107 1173 11A8 # 븍 => 븍 +BE0E 1107 1173 11A9 # 븎 => 븎 +BE0F 1107 1173 11AA # 븏 => 븏 +BE10 1107 1173 11AB # 븐 => 븐 +BE11 1107 1173 11AC # 븑 => 븑 +BE12 1107 1173 11AD # 븒 => 븒 +BE13 1107 1173 11AE # 븓 => 븓 +BE14 1107 1173 11AF # 블 => 블 +BE15 1107 1173 11B0 # 븕 => 븕 +BE16 1107 1173 11B1 # 븖 => 븖 +BE17 1107 1173 11B2 # 븗 => 븗 +BE18 1107 1173 11B3 # 븘 => 븘 +BE19 1107 1173 11B4 # 븙 => 븙 +BE1A 1107 1173 11B5 # 븚 => 븚 +BE1B 1107 1173 11B6 # 븛 => 븛 +BE1C 1107 1173 11B7 # 븜 => 븜 +BE1D 1107 1173 11B8 # 븝 => 븝 +BE1E 1107 1173 11B9 # 븞 => 븞 +BE1F 1107 1173 11BA # 븟 => 븟 +BE20 1107 1173 11BB # 븠 => 븠 +BE21 1107 1173 11BC # 븡 => 븡 +BE22 1107 1173 11BD # 븢 => 븢 +BE23 1107 1173 11BE # 븣 => 븣 +BE24 1107 1173 11BF # 븤 => 븤 +BE25 1107 1173 11C0 # 븥 => 븥 +BE26 1107 1173 11C1 # 븦 => 븦 +BE27 1107 1173 11C2 # 븧 => 븧 +BE28 1107 1174 # 븨 => 븨 +BE29 1107 1174 11A8 # 븩 => 븩 +BE2A 1107 1174 11A9 # 븪 => 븪 +BE2B 1107 1174 11AA # 븫 => 븫 +BE2C 1107 1174 11AB # 븬 => 븬 +BE2D 1107 1174 11AC # 븭 => 븭 +BE2E 1107 1174 11AD # 븮 => 븮 +BE2F 1107 1174 11AE # 븯 => 븯 +BE30 1107 1174 11AF # 븰 => 븰 +BE31 1107 1174 11B0 # 븱 => 븱 +BE32 1107 1174 11B1 # 븲 => 븲 +BE33 1107 1174 11B2 # 븳 => 븳 +BE34 1107 1174 11B3 # 븴 => 븴 +BE35 1107 1174 11B4 # 븵 => 븵 +BE36 1107 1174 11B5 # 븶 => 븶 +BE37 1107 1174 11B6 # 븷 => 븷 +BE38 1107 1174 11B7 # 븸 => 븸 +BE39 1107 1174 11B8 # 븹 => 븹 +BE3A 1107 1174 11B9 # 븺 => 븺 +BE3B 1107 1174 11BA # 븻 => 븻 +BE3C 1107 1174 11BB # 븼 => 븼 +BE3D 1107 1174 11BC # 븽 => 븽 +BE3E 1107 1174 11BD # 븾 => 븾 +BE3F 1107 1174 11BE # 븿 => 븿 +BE40 1107 1174 11BF # 빀 => 빀 +BE41 1107 1174 11C0 # 빁 => 빁 +BE42 1107 1174 11C1 # 빂 => 빂 +BE43 1107 1174 11C2 # 빃 => 빃 +BE44 1107 1175 # 비 => 비 +BE45 1107 1175 11A8 # 빅 => 빅 +BE46 1107 1175 11A9 # 빆 => 빆 +BE47 1107 1175 11AA # 빇 => 빇 +BE48 1107 1175 11AB # 빈 => 빈 +BE49 1107 1175 11AC # 빉 => 빉 +BE4A 1107 1175 11AD # 빊 => 빊 +BE4B 1107 1175 11AE # 빋 => 빋 +BE4C 1107 1175 11AF # 빌 => 빌 +BE4D 1107 1175 11B0 # 빍 => 빍 +BE4E 1107 1175 11B1 # 빎 => 빎 +BE4F 1107 1175 11B2 # 빏 => 빏 +BE50 1107 1175 11B3 # 빐 => 빐 +BE51 1107 1175 11B4 # 빑 => 빑 +BE52 1107 1175 11B5 # 빒 => 빒 +BE53 1107 1175 11B6 # 빓 => 빓 +BE54 1107 1175 11B7 # 빔 => 빔 +BE55 1107 1175 11B8 # 빕 => 빕 +BE56 1107 1175 11B9 # 빖 => 빖 +BE57 1107 1175 11BA # 빗 => 빗 +BE58 1107 1175 11BB # 빘 => 빘 +BE59 1107 1175 11BC # 빙 => 빙 +BE5A 1107 1175 11BD # 빚 => 빚 +BE5B 1107 1175 11BE # 빛 => 빛 +BE5C 1107 1175 11BF # 빜 => 빜 +BE5D 1107 1175 11C0 # 빝 => 빝 +BE5E 1107 1175 11C1 # 빞 => 빞 +BE5F 1107 1175 11C2 # 빟 => 빟 +BE60 1108 1161 # 빠 => 빠 +BE61 1108 1161 11A8 # 빡 => 빡 +BE62 1108 1161 11A9 # 빢 => 빢 +BE63 1108 1161 11AA # 빣 => 빣 +BE64 1108 1161 11AB # 빤 => 빤 +BE65 1108 1161 11AC # 빥 => 빥 +BE66 1108 1161 11AD # 빦 => 빦 +BE67 1108 1161 11AE # 빧 => 빧 +BE68 1108 1161 11AF # 빨 => 빨 +BE69 1108 1161 11B0 # 빩 => 빩 +BE6A 1108 1161 11B1 # 빪 => 빪 +BE6B 1108 1161 11B2 # 빫 => 빫 +BE6C 1108 1161 11B3 # 빬 => 빬 +BE6D 1108 1161 11B4 # 빭 => 빭 +BE6E 1108 1161 11B5 # 빮 => 빮 +BE6F 1108 1161 11B6 # 빯 => 빯 +BE70 1108 1161 11B7 # 빰 => 빰 +BE71 1108 1161 11B8 # 빱 => 빱 +BE72 1108 1161 11B9 # 빲 => 빲 +BE73 1108 1161 11BA # 빳 => 빳 +BE74 1108 1161 11BB # 빴 => 빴 +BE75 1108 1161 11BC # 빵 => 빵 +BE76 1108 1161 11BD # 빶 => 빶 +BE77 1108 1161 11BE # 빷 => 빷 +BE78 1108 1161 11BF # 빸 => 빸 +BE79 1108 1161 11C0 # 빹 => 빹 +BE7A 1108 1161 11C1 # 빺 => 빺 +BE7B 1108 1161 11C2 # 빻 => 빻 +BE7C 1108 1162 # 빼 => 빼 +BE7D 1108 1162 11A8 # 빽 => 빽 +BE7E 1108 1162 11A9 # 빾 => 빾 +BE7F 1108 1162 11AA # 빿 => 빿 +BE80 1108 1162 11AB # 뺀 => 뺀 +BE81 1108 1162 11AC # 뺁 => 뺁 +BE82 1108 1162 11AD # 뺂 => 뺂 +BE83 1108 1162 11AE # 뺃 => 뺃 +BE84 1108 1162 11AF # 뺄 => 뺄 +BE85 1108 1162 11B0 # 뺅 => 뺅 +BE86 1108 1162 11B1 # 뺆 => 뺆 +BE87 1108 1162 11B2 # 뺇 => 뺇 +BE88 1108 1162 11B3 # 뺈 => 뺈 +BE89 1108 1162 11B4 # 뺉 => 뺉 +BE8A 1108 1162 11B5 # 뺊 => 뺊 +BE8B 1108 1162 11B6 # 뺋 => 뺋 +BE8C 1108 1162 11B7 # 뺌 => 뺌 +BE8D 1108 1162 11B8 # 뺍 => 뺍 +BE8E 1108 1162 11B9 # 뺎 => 뺎 +BE8F 1108 1162 11BA # 뺏 => 뺏 +BE90 1108 1162 11BB # 뺐 => 뺐 +BE91 1108 1162 11BC # 뺑 => 뺑 +BE92 1108 1162 11BD # 뺒 => 뺒 +BE93 1108 1162 11BE # 뺓 => 뺓 +BE94 1108 1162 11BF # 뺔 => 뺔 +BE95 1108 1162 11C0 # 뺕 => 뺕 +BE96 1108 1162 11C1 # 뺖 => 뺖 +BE97 1108 1162 11C2 # 뺗 => 뺗 +BE98 1108 1163 # 뺘 => 뺘 +BE99 1108 1163 11A8 # 뺙 => 뺙 +BE9A 1108 1163 11A9 # 뺚 => 뺚 +BE9B 1108 1163 11AA # 뺛 => 뺛 +BE9C 1108 1163 11AB # 뺜 => 뺜 +BE9D 1108 1163 11AC # 뺝 => 뺝 +BE9E 1108 1163 11AD # 뺞 => 뺞 +BE9F 1108 1163 11AE # 뺟 => 뺟 +BEA0 1108 1163 11AF # 뺠 => 뺠 +BEA1 1108 1163 11B0 # 뺡 => 뺡 +BEA2 1108 1163 11B1 # 뺢 => 뺢 +BEA3 1108 1163 11B2 # 뺣 => 뺣 +BEA4 1108 1163 11B3 # 뺤 => 뺤 +BEA5 1108 1163 11B4 # 뺥 => 뺥 +BEA6 1108 1163 11B5 # 뺦 => 뺦 +BEA7 1108 1163 11B6 # 뺧 => 뺧 +BEA8 1108 1163 11B7 # 뺨 => 뺨 +BEA9 1108 1163 11B8 # 뺩 => 뺩 +BEAA 1108 1163 11B9 # 뺪 => 뺪 +BEAB 1108 1163 11BA # 뺫 => 뺫 +BEAC 1108 1163 11BB # 뺬 => 뺬 +BEAD 1108 1163 11BC # 뺭 => 뺭 +BEAE 1108 1163 11BD # 뺮 => 뺮 +BEAF 1108 1163 11BE # 뺯 => 뺯 +BEB0 1108 1163 11BF # 뺰 => 뺰 +BEB1 1108 1163 11C0 # 뺱 => 뺱 +BEB2 1108 1163 11C1 # 뺲 => 뺲 +BEB3 1108 1163 11C2 # 뺳 => 뺳 +BEB4 1108 1164 # 뺴 => 뺴 +BEB5 1108 1164 11A8 # 뺵 => 뺵 +BEB6 1108 1164 11A9 # 뺶 => 뺶 +BEB7 1108 1164 11AA # 뺷 => 뺷 +BEB8 1108 1164 11AB # 뺸 => 뺸 +BEB9 1108 1164 11AC # 뺹 => 뺹 +BEBA 1108 1164 11AD # 뺺 => 뺺 +BEBB 1108 1164 11AE # 뺻 => 뺻 +BEBC 1108 1164 11AF # 뺼 => 뺼 +BEBD 1108 1164 11B0 # 뺽 => 뺽 +BEBE 1108 1164 11B1 # 뺾 => 뺾 +BEBF 1108 1164 11B2 # 뺿 => 뺿 +BEC0 1108 1164 11B3 # 뻀 => 뻀 +BEC1 1108 1164 11B4 # 뻁 => 뻁 +BEC2 1108 1164 11B5 # 뻂 => 뻂 +BEC3 1108 1164 11B6 # 뻃 => 뻃 +BEC4 1108 1164 11B7 # 뻄 => 뻄 +BEC5 1108 1164 11B8 # 뻅 => 뻅 +BEC6 1108 1164 11B9 # 뻆 => 뻆 +BEC7 1108 1164 11BA # 뻇 => 뻇 +BEC8 1108 1164 11BB # 뻈 => 뻈 +BEC9 1108 1164 11BC # 뻉 => 뻉 +BECA 1108 1164 11BD # 뻊 => 뻊 +BECB 1108 1164 11BE # 뻋 => 뻋 +BECC 1108 1164 11BF # 뻌 => 뻌 +BECD 1108 1164 11C0 # 뻍 => 뻍 +BECE 1108 1164 11C1 # 뻎 => 뻎 +BECF 1108 1164 11C2 # 뻏 => 뻏 +BED0 1108 1165 # 뻐 => 뻐 +BED1 1108 1165 11A8 # 뻑 => 뻑 +BED2 1108 1165 11A9 # 뻒 => 뻒 +BED3 1108 1165 11AA # 뻓 => 뻓 +BED4 1108 1165 11AB # 뻔 => 뻔 +BED5 1108 1165 11AC # 뻕 => 뻕 +BED6 1108 1165 11AD # 뻖 => 뻖 +BED7 1108 1165 11AE # 뻗 => 뻗 +BED8 1108 1165 11AF # 뻘 => 뻘 +BED9 1108 1165 11B0 # 뻙 => 뻙 +BEDA 1108 1165 11B1 # 뻚 => 뻚 +BEDB 1108 1165 11B2 # 뻛 => 뻛 +BEDC 1108 1165 11B3 # 뻜 => 뻜 +BEDD 1108 1165 11B4 # 뻝 => 뻝 +BEDE 1108 1165 11B5 # 뻞 => 뻞 +BEDF 1108 1165 11B6 # 뻟 => 뻟 +BEE0 1108 1165 11B7 # 뻠 => 뻠 +BEE1 1108 1165 11B8 # 뻡 => 뻡 +BEE2 1108 1165 11B9 # 뻢 => 뻢 +BEE3 1108 1165 11BA # 뻣 => 뻣 +BEE4 1108 1165 11BB # 뻤 => 뻤 +BEE5 1108 1165 11BC # 뻥 => 뻥 +BEE6 1108 1165 11BD # 뻦 => 뻦 +BEE7 1108 1165 11BE # 뻧 => 뻧 +BEE8 1108 1165 11BF # 뻨 => 뻨 +BEE9 1108 1165 11C0 # 뻩 => 뻩 +BEEA 1108 1165 11C1 # 뻪 => 뻪 +BEEB 1108 1165 11C2 # 뻫 => 뻫 +BEEC 1108 1166 # 뻬 => 뻬 +BEED 1108 1166 11A8 # 뻭 => 뻭 +BEEE 1108 1166 11A9 # 뻮 => 뻮 +BEEF 1108 1166 11AA # 뻯 => 뻯 +BEF0 1108 1166 11AB # 뻰 => 뻰 +BEF1 1108 1166 11AC # 뻱 => 뻱 +BEF2 1108 1166 11AD # 뻲 => 뻲 +BEF3 1108 1166 11AE # 뻳 => 뻳 +BEF4 1108 1166 11AF # 뻴 => 뻴 +BEF5 1108 1166 11B0 # 뻵 => 뻵 +BEF6 1108 1166 11B1 # 뻶 => 뻶 +BEF7 1108 1166 11B2 # 뻷 => 뻷 +BEF8 1108 1166 11B3 # 뻸 => 뻸 +BEF9 1108 1166 11B4 # 뻹 => 뻹 +BEFA 1108 1166 11B5 # 뻺 => 뻺 +BEFB 1108 1166 11B6 # 뻻 => 뻻 +BEFC 1108 1166 11B7 # 뻼 => 뻼 +BEFD 1108 1166 11B8 # 뻽 => 뻽 +BEFE 1108 1166 11B9 # 뻾 => 뻾 +BEFF 1108 1166 11BA # 뻿 => 뻿 +BF00 1108 1166 11BB # 뼀 => 뼀 +BF01 1108 1166 11BC # 뼁 => 뼁 +BF02 1108 1166 11BD # 뼂 => 뼂 +BF03 1108 1166 11BE # 뼃 => 뼃 +BF04 1108 1166 11BF # 뼄 => 뼄 +BF05 1108 1166 11C0 # 뼅 => 뼅 +BF06 1108 1166 11C1 # 뼆 => 뼆 +BF07 1108 1166 11C2 # 뼇 => 뼇 +BF08 1108 1167 # 뼈 => 뼈 +BF09 1108 1167 11A8 # 뼉 => 뼉 +BF0A 1108 1167 11A9 # 뼊 => 뼊 +BF0B 1108 1167 11AA # 뼋 => 뼋 +BF0C 1108 1167 11AB # 뼌 => 뼌 +BF0D 1108 1167 11AC # 뼍 => 뼍 +BF0E 1108 1167 11AD # 뼎 => 뼎 +BF0F 1108 1167 11AE # 뼏 => 뼏 +BF10 1108 1167 11AF # 뼐 => 뼐 +BF11 1108 1167 11B0 # 뼑 => 뼑 +BF12 1108 1167 11B1 # 뼒 => 뼒 +BF13 1108 1167 11B2 # 뼓 => 뼓 +BF14 1108 1167 11B3 # 뼔 => 뼔 +BF15 1108 1167 11B4 # 뼕 => 뼕 +BF16 1108 1167 11B5 # 뼖 => 뼖 +BF17 1108 1167 11B6 # 뼗 => 뼗 +BF18 1108 1167 11B7 # 뼘 => 뼘 +BF19 1108 1167 11B8 # 뼙 => 뼙 +BF1A 1108 1167 11B9 # 뼚 => 뼚 +BF1B 1108 1167 11BA # 뼛 => 뼛 +BF1C 1108 1167 11BB # 뼜 => 뼜 +BF1D 1108 1167 11BC # 뼝 => 뼝 +BF1E 1108 1167 11BD # 뼞 => 뼞 +BF1F 1108 1167 11BE # 뼟 => 뼟 +BF20 1108 1167 11BF # 뼠 => 뼠 +BF21 1108 1167 11C0 # 뼡 => 뼡 +BF22 1108 1167 11C1 # 뼢 => 뼢 +BF23 1108 1167 11C2 # 뼣 => 뼣 +BF24 1108 1168 # 뼤 => 뼤 +BF25 1108 1168 11A8 # 뼥 => 뼥 +BF26 1108 1168 11A9 # 뼦 => 뼦 +BF27 1108 1168 11AA # 뼧 => 뼧 +BF28 1108 1168 11AB # 뼨 => 뼨 +BF29 1108 1168 11AC # 뼩 => 뼩 +BF2A 1108 1168 11AD # 뼪 => 뼪 +BF2B 1108 1168 11AE # 뼫 => 뼫 +BF2C 1108 1168 11AF # 뼬 => 뼬 +BF2D 1108 1168 11B0 # 뼭 => 뼭 +BF2E 1108 1168 11B1 # 뼮 => 뼮 +BF2F 1108 1168 11B2 # 뼯 => 뼯 +BF30 1108 1168 11B3 # 뼰 => 뼰 +BF31 1108 1168 11B4 # 뼱 => 뼱 +BF32 1108 1168 11B5 # 뼲 => 뼲 +BF33 1108 1168 11B6 # 뼳 => 뼳 +BF34 1108 1168 11B7 # 뼴 => 뼴 +BF35 1108 1168 11B8 # 뼵 => 뼵 +BF36 1108 1168 11B9 # 뼶 => 뼶 +BF37 1108 1168 11BA # 뼷 => 뼷 +BF38 1108 1168 11BB # 뼸 => 뼸 +BF39 1108 1168 11BC # 뼹 => 뼹 +BF3A 1108 1168 11BD # 뼺 => 뼺 +BF3B 1108 1168 11BE # 뼻 => 뼻 +BF3C 1108 1168 11BF # 뼼 => 뼼 +BF3D 1108 1168 11C0 # 뼽 => 뼽 +BF3E 1108 1168 11C1 # 뼾 => 뼾 +BF3F 1108 1168 11C2 # 뼿 => 뼿 +BF40 1108 1169 # 뽀 => 뽀 +BF41 1108 1169 11A8 # 뽁 => 뽁 +BF42 1108 1169 11A9 # 뽂 => 뽂 +BF43 1108 1169 11AA # 뽃 => 뽃 +BF44 1108 1169 11AB # 뽄 => 뽄 +BF45 1108 1169 11AC # 뽅 => 뽅 +BF46 1108 1169 11AD # 뽆 => 뽆 +BF47 1108 1169 11AE # 뽇 => 뽇 +BF48 1108 1169 11AF # 뽈 => 뽈 +BF49 1108 1169 11B0 # 뽉 => 뽉 +BF4A 1108 1169 11B1 # 뽊 => 뽊 +BF4B 1108 1169 11B2 # 뽋 => 뽋 +BF4C 1108 1169 11B3 # 뽌 => 뽌 +BF4D 1108 1169 11B4 # 뽍 => 뽍 +BF4E 1108 1169 11B5 # 뽎 => 뽎 +BF4F 1108 1169 11B6 # 뽏 => 뽏 +BF50 1108 1169 11B7 # 뽐 => 뽐 +BF51 1108 1169 11B8 # 뽑 => 뽑 +BF52 1108 1169 11B9 # 뽒 => 뽒 +BF53 1108 1169 11BA # 뽓 => 뽓 +BF54 1108 1169 11BB # 뽔 => 뽔 +BF55 1108 1169 11BC # 뽕 => 뽕 +BF56 1108 1169 11BD # 뽖 => 뽖 +BF57 1108 1169 11BE # 뽗 => 뽗 +BF58 1108 1169 11BF # 뽘 => 뽘 +BF59 1108 1169 11C0 # 뽙 => 뽙 +BF5A 1108 1169 11C1 # 뽚 => 뽚 +BF5B 1108 1169 11C2 # 뽛 => 뽛 +BF5C 1108 116A # 뽜 => 뽜 +BF5D 1108 116A 11A8 # 뽝 => 뽝 +BF5E 1108 116A 11A9 # 뽞 => 뽞 +BF5F 1108 116A 11AA # 뽟 => 뽟 +BF60 1108 116A 11AB # 뽠 => 뽠 +BF61 1108 116A 11AC # 뽡 => 뽡 +BF62 1108 116A 11AD # 뽢 => 뽢 +BF63 1108 116A 11AE # 뽣 => 뽣 +BF64 1108 116A 11AF # 뽤 => 뽤 +BF65 1108 116A 11B0 # 뽥 => 뽥 +BF66 1108 116A 11B1 # 뽦 => 뽦 +BF67 1108 116A 11B2 # 뽧 => 뽧 +BF68 1108 116A 11B3 # 뽨 => 뽨 +BF69 1108 116A 11B4 # 뽩 => 뽩 +BF6A 1108 116A 11B5 # 뽪 => 뽪 +BF6B 1108 116A 11B6 # 뽫 => 뽫 +BF6C 1108 116A 11B7 # 뽬 => 뽬 +BF6D 1108 116A 11B8 # 뽭 => 뽭 +BF6E 1108 116A 11B9 # 뽮 => 뽮 +BF6F 1108 116A 11BA # 뽯 => 뽯 +BF70 1108 116A 11BB # 뽰 => 뽰 +BF71 1108 116A 11BC # 뽱 => 뽱 +BF72 1108 116A 11BD # 뽲 => 뽲 +BF73 1108 116A 11BE # 뽳 => 뽳 +BF74 1108 116A 11BF # 뽴 => 뽴 +BF75 1108 116A 11C0 # 뽵 => 뽵 +BF76 1108 116A 11C1 # 뽶 => 뽶 +BF77 1108 116A 11C2 # 뽷 => 뽷 +BF78 1108 116B # 뽸 => 뽸 +BF79 1108 116B 11A8 # 뽹 => 뽹 +BF7A 1108 116B 11A9 # 뽺 => 뽺 +BF7B 1108 116B 11AA # 뽻 => 뽻 +BF7C 1108 116B 11AB # 뽼 => 뽼 +BF7D 1108 116B 11AC # 뽽 => 뽽 +BF7E 1108 116B 11AD # 뽾 => 뽾 +BF7F 1108 116B 11AE # 뽿 => 뽿 +BF80 1108 116B 11AF # 뾀 => 뾀 +BF81 1108 116B 11B0 # 뾁 => 뾁 +BF82 1108 116B 11B1 # 뾂 => 뾂 +BF83 1108 116B 11B2 # 뾃 => 뾃 +BF84 1108 116B 11B3 # 뾄 => 뾄 +BF85 1108 116B 11B4 # 뾅 => 뾅 +BF86 1108 116B 11B5 # 뾆 => 뾆 +BF87 1108 116B 11B6 # 뾇 => 뾇 +BF88 1108 116B 11B7 # 뾈 => 뾈 +BF89 1108 116B 11B8 # 뾉 => 뾉 +BF8A 1108 116B 11B9 # 뾊 => 뾊 +BF8B 1108 116B 11BA # 뾋 => 뾋 +BF8C 1108 116B 11BB # 뾌 => 뾌 +BF8D 1108 116B 11BC # 뾍 => 뾍 +BF8E 1108 116B 11BD # 뾎 => 뾎 +BF8F 1108 116B 11BE # 뾏 => 뾏 +BF90 1108 116B 11BF # 뾐 => 뾐 +BF91 1108 116B 11C0 # 뾑 => 뾑 +BF92 1108 116B 11C1 # 뾒 => 뾒 +BF93 1108 116B 11C2 # 뾓 => 뾓 +BF94 1108 116C # 뾔 => 뾔 +BF95 1108 116C 11A8 # 뾕 => 뾕 +BF96 1108 116C 11A9 # 뾖 => 뾖 +BF97 1108 116C 11AA # 뾗 => 뾗 +BF98 1108 116C 11AB # 뾘 => 뾘 +BF99 1108 116C 11AC # 뾙 => 뾙 +BF9A 1108 116C 11AD # 뾚 => 뾚 +BF9B 1108 116C 11AE # 뾛 => 뾛 +BF9C 1108 116C 11AF # 뾜 => 뾜 +BF9D 1108 116C 11B0 # 뾝 => 뾝 +BF9E 1108 116C 11B1 # 뾞 => 뾞 +BF9F 1108 116C 11B2 # 뾟 => 뾟 +BFA0 1108 116C 11B3 # 뾠 => 뾠 +BFA1 1108 116C 11B4 # 뾡 => 뾡 +BFA2 1108 116C 11B5 # 뾢 => 뾢 +BFA3 1108 116C 11B6 # 뾣 => 뾣 +BFA4 1108 116C 11B7 # 뾤 => 뾤 +BFA5 1108 116C 11B8 # 뾥 => 뾥 +BFA6 1108 116C 11B9 # 뾦 => 뾦 +BFA7 1108 116C 11BA # 뾧 => 뾧 +BFA8 1108 116C 11BB # 뾨 => 뾨 +BFA9 1108 116C 11BC # 뾩 => 뾩 +BFAA 1108 116C 11BD # 뾪 => 뾪 +BFAB 1108 116C 11BE # 뾫 => 뾫 +BFAC 1108 116C 11BF # 뾬 => 뾬 +BFAD 1108 116C 11C0 # 뾭 => 뾭 +BFAE 1108 116C 11C1 # 뾮 => 뾮 +BFAF 1108 116C 11C2 # 뾯 => 뾯 +BFB0 1108 116D # 뾰 => 뾰 +BFB1 1108 116D 11A8 # 뾱 => 뾱 +BFB2 1108 116D 11A9 # 뾲 => 뾲 +BFB3 1108 116D 11AA # 뾳 => 뾳 +BFB4 1108 116D 11AB # 뾴 => 뾴 +BFB5 1108 116D 11AC # 뾵 => 뾵 +BFB6 1108 116D 11AD # 뾶 => 뾶 +BFB7 1108 116D 11AE # 뾷 => 뾷 +BFB8 1108 116D 11AF # 뾸 => 뾸 +BFB9 1108 116D 11B0 # 뾹 => 뾹 +BFBA 1108 116D 11B1 # 뾺 => 뾺 +BFBB 1108 116D 11B2 # 뾻 => 뾻 +BFBC 1108 116D 11B3 # 뾼 => 뾼 +BFBD 1108 116D 11B4 # 뾽 => 뾽 +BFBE 1108 116D 11B5 # 뾾 => 뾾 +BFBF 1108 116D 11B6 # 뾿 => 뾿 +BFC0 1108 116D 11B7 # 뿀 => 뿀 +BFC1 1108 116D 11B8 # 뿁 => 뿁 +BFC2 1108 116D 11B9 # 뿂 => 뿂 +BFC3 1108 116D 11BA # 뿃 => 뿃 +BFC4 1108 116D 11BB # 뿄 => 뿄 +BFC5 1108 116D 11BC # 뿅 => 뿅 +BFC6 1108 116D 11BD # 뿆 => 뿆 +BFC7 1108 116D 11BE # 뿇 => 뿇 +BFC8 1108 116D 11BF # 뿈 => 뿈 +BFC9 1108 116D 11C0 # 뿉 => 뿉 +BFCA 1108 116D 11C1 # 뿊 => 뿊 +BFCB 1108 116D 11C2 # 뿋 => 뿋 +BFCC 1108 116E # 뿌 => 뿌 +BFCD 1108 116E 11A8 # 뿍 => 뿍 +BFCE 1108 116E 11A9 # 뿎 => 뿎 +BFCF 1108 116E 11AA # 뿏 => 뿏 +BFD0 1108 116E 11AB # 뿐 => 뿐 +BFD1 1108 116E 11AC # 뿑 => 뿑 +BFD2 1108 116E 11AD # 뿒 => 뿒 +BFD3 1108 116E 11AE # 뿓 => 뿓 +BFD4 1108 116E 11AF # 뿔 => 뿔 +BFD5 1108 116E 11B0 # 뿕 => 뿕 +BFD6 1108 116E 11B1 # 뿖 => 뿖 +BFD7 1108 116E 11B2 # 뿗 => 뿗 +BFD8 1108 116E 11B3 # 뿘 => 뿘 +BFD9 1108 116E 11B4 # 뿙 => 뿙 +BFDA 1108 116E 11B5 # 뿚 => 뿚 +BFDB 1108 116E 11B6 # 뿛 => 뿛 +BFDC 1108 116E 11B7 # 뿜 => 뿜 +BFDD 1108 116E 11B8 # 뿝 => 뿝 +BFDE 1108 116E 11B9 # 뿞 => 뿞 +BFDF 1108 116E 11BA # 뿟 => 뿟 +BFE0 1108 116E 11BB # 뿠 => 뿠 +BFE1 1108 116E 11BC # 뿡 => 뿡 +BFE2 1108 116E 11BD # 뿢 => 뿢 +BFE3 1108 116E 11BE # 뿣 => 뿣 +BFE4 1108 116E 11BF # 뿤 => 뿤 +BFE5 1108 116E 11C0 # 뿥 => 뿥 +BFE6 1108 116E 11C1 # 뿦 => 뿦 +BFE7 1108 116E 11C2 # 뿧 => 뿧 +BFE8 1108 116F # 뿨 => 뿨 +BFE9 1108 116F 11A8 # 뿩 => 뿩 +BFEA 1108 116F 11A9 # 뿪 => 뿪 +BFEB 1108 116F 11AA # 뿫 => 뿫 +BFEC 1108 116F 11AB # 뿬 => 뿬 +BFED 1108 116F 11AC # 뿭 => 뿭 +BFEE 1108 116F 11AD # 뿮 => 뿮 +BFEF 1108 116F 11AE # 뿯 => 뿯 +BFF0 1108 116F 11AF # 뿰 => 뿰 +BFF1 1108 116F 11B0 # 뿱 => 뿱 +BFF2 1108 116F 11B1 # 뿲 => 뿲 +BFF3 1108 116F 11B2 # 뿳 => 뿳 +BFF4 1108 116F 11B3 # 뿴 => 뿴 +BFF5 1108 116F 11B4 # 뿵 => 뿵 +BFF6 1108 116F 11B5 # 뿶 => 뿶 +BFF7 1108 116F 11B6 # 뿷 => 뿷 +BFF8 1108 116F 11B7 # 뿸 => 뿸 +BFF9 1108 116F 11B8 # 뿹 => 뿹 +BFFA 1108 116F 11B9 # 뿺 => 뿺 +BFFB 1108 116F 11BA # 뿻 => 뿻 +BFFC 1108 116F 11BB # 뿼 => 뿼 +BFFD 1108 116F 11BC # 뿽 => 뿽 +BFFE 1108 116F 11BD # 뿾 => 뿾 +BFFF 1108 116F 11BE # 뿿 => 뿿 +C000 1108 116F 11BF # 쀀 => 쀀 +C001 1108 116F 11C0 # 쀁 => 쀁 +C002 1108 116F 11C1 # 쀂 => 쀂 +C003 1108 116F 11C2 # 쀃 => 쀃 +C004 1108 1170 # 쀄 => 쀄 +C005 1108 1170 11A8 # 쀅 => 쀅 +C006 1108 1170 11A9 # 쀆 => 쀆 +C007 1108 1170 11AA # 쀇 => 쀇 +C008 1108 1170 11AB # 쀈 => 쀈 +C009 1108 1170 11AC # 쀉 => 쀉 +C00A 1108 1170 11AD # 쀊 => 쀊 +C00B 1108 1170 11AE # 쀋 => 쀋 +C00C 1108 1170 11AF # 쀌 => 쀌 +C00D 1108 1170 11B0 # 쀍 => 쀍 +C00E 1108 1170 11B1 # 쀎 => 쀎 +C00F 1108 1170 11B2 # 쀏 => 쀏 +C010 1108 1170 11B3 # 쀐 => 쀐 +C011 1108 1170 11B4 # 쀑 => 쀑 +C012 1108 1170 11B5 # 쀒 => 쀒 +C013 1108 1170 11B6 # 쀓 => 쀓 +C014 1108 1170 11B7 # 쀔 => 쀔 +C015 1108 1170 11B8 # 쀕 => 쀕 +C016 1108 1170 11B9 # 쀖 => 쀖 +C017 1108 1170 11BA # 쀗 => 쀗 +C018 1108 1170 11BB # 쀘 => 쀘 +C019 1108 1170 11BC # 쀙 => 쀙 +C01A 1108 1170 11BD # 쀚 => 쀚 +C01B 1108 1170 11BE # 쀛 => 쀛 +C01C 1108 1170 11BF # 쀜 => 쀜 +C01D 1108 1170 11C0 # 쀝 => 쀝 +C01E 1108 1170 11C1 # 쀞 => 쀞 +C01F 1108 1170 11C2 # 쀟 => 쀟 +C020 1108 1171 # 쀠 => 쀠 +C021 1108 1171 11A8 # 쀡 => 쀡 +C022 1108 1171 11A9 # 쀢 => 쀢 +C023 1108 1171 11AA # 쀣 => 쀣 +C024 1108 1171 11AB # 쀤 => 쀤 +C025 1108 1171 11AC # 쀥 => 쀥 +C026 1108 1171 11AD # 쀦 => 쀦 +C027 1108 1171 11AE # 쀧 => 쀧 +C028 1108 1171 11AF # 쀨 => 쀨 +C029 1108 1171 11B0 # 쀩 => 쀩 +C02A 1108 1171 11B1 # 쀪 => 쀪 +C02B 1108 1171 11B2 # 쀫 => 쀫 +C02C 1108 1171 11B3 # 쀬 => 쀬 +C02D 1108 1171 11B4 # 쀭 => 쀭 +C02E 1108 1171 11B5 # 쀮 => 쀮 +C02F 1108 1171 11B6 # 쀯 => 쀯 +C030 1108 1171 11B7 # 쀰 => 쀰 +C031 1108 1171 11B8 # 쀱 => 쀱 +C032 1108 1171 11B9 # 쀲 => 쀲 +C033 1108 1171 11BA # 쀳 => 쀳 +C034 1108 1171 11BB # 쀴 => 쀴 +C035 1108 1171 11BC # 쀵 => 쀵 +C036 1108 1171 11BD # 쀶 => 쀶 +C037 1108 1171 11BE # 쀷 => 쀷 +C038 1108 1171 11BF # 쀸 => 쀸 +C039 1108 1171 11C0 # 쀹 => 쀹 +C03A 1108 1171 11C1 # 쀺 => 쀺 +C03B 1108 1171 11C2 # 쀻 => 쀻 +C03C 1108 1172 # 쀼 => 쀼 +C03D 1108 1172 11A8 # 쀽 => 쀽 +C03E 1108 1172 11A9 # 쀾 => 쀾 +C03F 1108 1172 11AA # 쀿 => 쀿 +C040 1108 1172 11AB # 쁀 => 쁀 +C041 1108 1172 11AC # 쁁 => 쁁 +C042 1108 1172 11AD # 쁂 => 쁂 +C043 1108 1172 11AE # 쁃 => 쁃 +C044 1108 1172 11AF # 쁄 => 쁄 +C045 1108 1172 11B0 # 쁅 => 쁅 +C046 1108 1172 11B1 # 쁆 => 쁆 +C047 1108 1172 11B2 # 쁇 => 쁇 +C048 1108 1172 11B3 # 쁈 => 쁈 +C049 1108 1172 11B4 # 쁉 => 쁉 +C04A 1108 1172 11B5 # 쁊 => 쁊 +C04B 1108 1172 11B6 # 쁋 => 쁋 +C04C 1108 1172 11B7 # 쁌 => 쁌 +C04D 1108 1172 11B8 # 쁍 => 쁍 +C04E 1108 1172 11B9 # 쁎 => 쁎 +C04F 1108 1172 11BA # 쁏 => 쁏 +C050 1108 1172 11BB # 쁐 => 쁐 +C051 1108 1172 11BC # 쁑 => 쁑 +C052 1108 1172 11BD # 쁒 => 쁒 +C053 1108 1172 11BE # 쁓 => 쁓 +C054 1108 1172 11BF # 쁔 => 쁔 +C055 1108 1172 11C0 # 쁕 => 쁕 +C056 1108 1172 11C1 # 쁖 => 쁖 +C057 1108 1172 11C2 # 쁗 => 쁗 +C058 1108 1173 # 쁘 => 쁘 +C059 1108 1173 11A8 # 쁙 => 쁙 +C05A 1108 1173 11A9 # 쁚 => 쁚 +C05B 1108 1173 11AA # 쁛 => 쁛 +C05C 1108 1173 11AB # 쁜 => 쁜 +C05D 1108 1173 11AC # 쁝 => 쁝 +C05E 1108 1173 11AD # 쁞 => 쁞 +C05F 1108 1173 11AE # 쁟 => 쁟 +C060 1108 1173 11AF # 쁠 => 쁠 +C061 1108 1173 11B0 # 쁡 => 쁡 +C062 1108 1173 11B1 # 쁢 => 쁢 +C063 1108 1173 11B2 # 쁣 => 쁣 +C064 1108 1173 11B3 # 쁤 => 쁤 +C065 1108 1173 11B4 # 쁥 => 쁥 +C066 1108 1173 11B5 # 쁦 => 쁦 +C067 1108 1173 11B6 # 쁧 => 쁧 +C068 1108 1173 11B7 # 쁨 => 쁨 +C069 1108 1173 11B8 # 쁩 => 쁩 +C06A 1108 1173 11B9 # 쁪 => 쁪 +C06B 1108 1173 11BA # 쁫 => 쁫 +C06C 1108 1173 11BB # 쁬 => 쁬 +C06D 1108 1173 11BC # 쁭 => 쁭 +C06E 1108 1173 11BD # 쁮 => 쁮 +C06F 1108 1173 11BE # 쁯 => 쁯 +C070 1108 1173 11BF # 쁰 => 쁰 +C071 1108 1173 11C0 # 쁱 => 쁱 +C072 1108 1173 11C1 # 쁲 => 쁲 +C073 1108 1173 11C2 # 쁳 => 쁳 +C074 1108 1174 # 쁴 => 쁴 +C075 1108 1174 11A8 # 쁵 => 쁵 +C076 1108 1174 11A9 # 쁶 => 쁶 +C077 1108 1174 11AA # 쁷 => 쁷 +C078 1108 1174 11AB # 쁸 => 쁸 +C079 1108 1174 11AC # 쁹 => 쁹 +C07A 1108 1174 11AD # 쁺 => 쁺 +C07B 1108 1174 11AE # 쁻 => 쁻 +C07C 1108 1174 11AF # 쁼 => 쁼 +C07D 1108 1174 11B0 # 쁽 => 쁽 +C07E 1108 1174 11B1 # 쁾 => 쁾 +C07F 1108 1174 11B2 # 쁿 => 쁿 +C080 1108 1174 11B3 # 삀 => 삀 +C081 1108 1174 11B4 # 삁 => 삁 +C082 1108 1174 11B5 # 삂 => 삂 +C083 1108 1174 11B6 # 삃 => 삃 +C084 1108 1174 11B7 # 삄 => 삄 +C085 1108 1174 11B8 # 삅 => 삅 +C086 1108 1174 11B9 # 삆 => 삆 +C087 1108 1174 11BA # 삇 => 삇 +C088 1108 1174 11BB # 삈 => 삈 +C089 1108 1174 11BC # 삉 => 삉 +C08A 1108 1174 11BD # 삊 => 삊 +C08B 1108 1174 11BE # 삋 => 삋 +C08C 1108 1174 11BF # 삌 => 삌 +C08D 1108 1174 11C0 # 삍 => 삍 +C08E 1108 1174 11C1 # 삎 => 삎 +C08F 1108 1174 11C2 # 삏 => 삏 +C090 1108 1175 # 삐 => 삐 +C091 1108 1175 11A8 # 삑 => 삑 +C092 1108 1175 11A9 # 삒 => 삒 +C093 1108 1175 11AA # 삓 => 삓 +C094 1108 1175 11AB # 삔 => 삔 +C095 1108 1175 11AC # 삕 => 삕 +C096 1108 1175 11AD # 삖 => 삖 +C097 1108 1175 11AE # 삗 => 삗 +C098 1108 1175 11AF # 삘 => 삘 +C099 1108 1175 11B0 # 삙 => 삙 +C09A 1108 1175 11B1 # 삚 => 삚 +C09B 1108 1175 11B2 # 삛 => 삛 +C09C 1108 1175 11B3 # 삜 => 삜 +C09D 1108 1175 11B4 # 삝 => 삝 +C09E 1108 1175 11B5 # 삞 => 삞 +C09F 1108 1175 11B6 # 삟 => 삟 +C0A0 1108 1175 11B7 # 삠 => 삠 +C0A1 1108 1175 11B8 # 삡 => 삡 +C0A2 1108 1175 11B9 # 삢 => 삢 +C0A3 1108 1175 11BA # 삣 => 삣 +C0A4 1108 1175 11BB # 삤 => 삤 +C0A5 1108 1175 11BC # 삥 => 삥 +C0A6 1108 1175 11BD # 삦 => 삦 +C0A7 1108 1175 11BE # 삧 => 삧 +C0A8 1108 1175 11BF # 삨 => 삨 +C0A9 1108 1175 11C0 # 삩 => 삩 +C0AA 1108 1175 11C1 # 삪 => 삪 +C0AB 1108 1175 11C2 # 삫 => 삫 +C0AC 1109 1161 # 사 => 사 +C0AD 1109 1161 11A8 # 삭 => 삭 +C0AE 1109 1161 11A9 # 삮 => 삮 +C0AF 1109 1161 11AA # 삯 => 삯 +C0B0 1109 1161 11AB # 산 => 산 +C0B1 1109 1161 11AC # 삱 => 삱 +C0B2 1109 1161 11AD # 삲 => 삲 +C0B3 1109 1161 11AE # 삳 => 삳 +C0B4 1109 1161 11AF # 살 => 살 +C0B5 1109 1161 11B0 # 삵 => 삵 +C0B6 1109 1161 11B1 # 삶 => 삶 +C0B7 1109 1161 11B2 # 삷 => 삷 +C0B8 1109 1161 11B3 # 삸 => 삸 +C0B9 1109 1161 11B4 # 삹 => 삹 +C0BA 1109 1161 11B5 # 삺 => 삺 +C0BB 1109 1161 11B6 # 삻 => 삻 +C0BC 1109 1161 11B7 # 삼 => 삼 +C0BD 1109 1161 11B8 # 삽 => 삽 +C0BE 1109 1161 11B9 # 삾 => 삾 +C0BF 1109 1161 11BA # 삿 => 삿 +C0C0 1109 1161 11BB # 샀 => 샀 +C0C1 1109 1161 11BC # 상 => 상 +C0C2 1109 1161 11BD # 샂 => 샂 +C0C3 1109 1161 11BE # 샃 => 샃 +C0C4 1109 1161 11BF # 샄 => 샄 +C0C5 1109 1161 11C0 # 샅 => 샅 +C0C6 1109 1161 11C1 # 샆 => 샆 +C0C7 1109 1161 11C2 # 샇 => 샇 +C0C8 1109 1162 # 새 => 새 +C0C9 1109 1162 11A8 # 색 => 색 +C0CA 1109 1162 11A9 # 샊 => 샊 +C0CB 1109 1162 11AA # 샋 => 샋 +C0CC 1109 1162 11AB # 샌 => 샌 +C0CD 1109 1162 11AC # 샍 => 샍 +C0CE 1109 1162 11AD # 샎 => 샎 +C0CF 1109 1162 11AE # 샏 => 샏 +C0D0 1109 1162 11AF # 샐 => 샐 +C0D1 1109 1162 11B0 # 샑 => 샑 +C0D2 1109 1162 11B1 # 샒 => 샒 +C0D3 1109 1162 11B2 # 샓 => 샓 +C0D4 1109 1162 11B3 # 샔 => 샔 +C0D5 1109 1162 11B4 # 샕 => 샕 +C0D6 1109 1162 11B5 # 샖 => 샖 +C0D7 1109 1162 11B6 # 샗 => 샗 +C0D8 1109 1162 11B7 # 샘 => 샘 +C0D9 1109 1162 11B8 # 샙 => 샙 +C0DA 1109 1162 11B9 # 샚 => 샚 +C0DB 1109 1162 11BA # 샛 => 샛 +C0DC 1109 1162 11BB # 샜 => 샜 +C0DD 1109 1162 11BC # 생 => 생 +C0DE 1109 1162 11BD # 샞 => 샞 +C0DF 1109 1162 11BE # 샟 => 샟 +C0E0 1109 1162 11BF # 샠 => 샠 +C0E1 1109 1162 11C0 # 샡 => 샡 +C0E2 1109 1162 11C1 # 샢 => 샢 +C0E3 1109 1162 11C2 # 샣 => 샣 +C0E4 1109 1163 # 샤 => 샤 +C0E5 1109 1163 11A8 # 샥 => 샥 +C0E6 1109 1163 11A9 # 샦 => 샦 +C0E7 1109 1163 11AA # 샧 => 샧 +C0E8 1109 1163 11AB # 샨 => 샨 +C0E9 1109 1163 11AC # 샩 => 샩 +C0EA 1109 1163 11AD # 샪 => 샪 +C0EB 1109 1163 11AE # 샫 => 샫 +C0EC 1109 1163 11AF # 샬 => 샬 +C0ED 1109 1163 11B0 # 샭 => 샭 +C0EE 1109 1163 11B1 # 샮 => 샮 +C0EF 1109 1163 11B2 # 샯 => 샯 +C0F0 1109 1163 11B3 # 샰 => 샰 +C0F1 1109 1163 11B4 # 샱 => 샱 +C0F2 1109 1163 11B5 # 샲 => 샲 +C0F3 1109 1163 11B6 # 샳 => 샳 +C0F4 1109 1163 11B7 # 샴 => 샴 +C0F5 1109 1163 11B8 # 샵 => 샵 +C0F6 1109 1163 11B9 # 샶 => 샶 +C0F7 1109 1163 11BA # 샷 => 샷 +C0F8 1109 1163 11BB # 샸 => 샸 +C0F9 1109 1163 11BC # 샹 => 샹 +C0FA 1109 1163 11BD # 샺 => 샺 +C0FB 1109 1163 11BE # 샻 => 샻 +C0FC 1109 1163 11BF # 샼 => 샼 +C0FD 1109 1163 11C0 # 샽 => 샽 +C0FE 1109 1163 11C1 # 샾 => 샾 +C0FF 1109 1163 11C2 # 샿 => 샿 +C100 1109 1164 # 섀 => 섀 +C101 1109 1164 11A8 # 섁 => 섁 +C102 1109 1164 11A9 # 섂 => 섂 +C103 1109 1164 11AA # 섃 => 섃 +C104 1109 1164 11AB # 섄 => 섄 +C105 1109 1164 11AC # 섅 => 섅 +C106 1109 1164 11AD # 섆 => 섆 +C107 1109 1164 11AE # 섇 => 섇 +C108 1109 1164 11AF # 섈 => 섈 +C109 1109 1164 11B0 # 섉 => 섉 +C10A 1109 1164 11B1 # 섊 => 섊 +C10B 1109 1164 11B2 # 섋 => 섋 +C10C 1109 1164 11B3 # 섌 => 섌 +C10D 1109 1164 11B4 # 섍 => 섍 +C10E 1109 1164 11B5 # 섎 => 섎 +C10F 1109 1164 11B6 # 섏 => 섏 +C110 1109 1164 11B7 # 섐 => 섐 +C111 1109 1164 11B8 # 섑 => 섑 +C112 1109 1164 11B9 # 섒 => 섒 +C113 1109 1164 11BA # 섓 => 섓 +C114 1109 1164 11BB # 섔 => 섔 +C115 1109 1164 11BC # 섕 => 섕 +C116 1109 1164 11BD # 섖 => 섖 +C117 1109 1164 11BE # 섗 => 섗 +C118 1109 1164 11BF # 섘 => 섘 +C119 1109 1164 11C0 # 섙 => 섙 +C11A 1109 1164 11C1 # 섚 => 섚 +C11B 1109 1164 11C2 # 섛 => 섛 +C11C 1109 1165 # 서 => 서 +C11D 1109 1165 11A8 # 석 => 석 +C11E 1109 1165 11A9 # 섞 => 섞 +C11F 1109 1165 11AA # 섟 => 섟 +C120 1109 1165 11AB # 선 => 선 +C121 1109 1165 11AC # 섡 => 섡 +C122 1109 1165 11AD # 섢 => 섢 +C123 1109 1165 11AE # 섣 => 섣 +C124 1109 1165 11AF # 설 => 설 +C125 1109 1165 11B0 # 섥 => 섥 +C126 1109 1165 11B1 # 섦 => 섦 +C127 1109 1165 11B2 # 섧 => 섧 +C128 1109 1165 11B3 # 섨 => 섨 +C129 1109 1165 11B4 # 섩 => 섩 +C12A 1109 1165 11B5 # 섪 => 섪 +C12B 1109 1165 11B6 # 섫 => 섫 +C12C 1109 1165 11B7 # 섬 => 섬 +C12D 1109 1165 11B8 # 섭 => 섭 +C12E 1109 1165 11B9 # 섮 => 섮 +C12F 1109 1165 11BA # 섯 => 섯 +C130 1109 1165 11BB # 섰 => 섰 +C131 1109 1165 11BC # 성 => 성 +C132 1109 1165 11BD # 섲 => 섲 +C133 1109 1165 11BE # 섳 => 섳 +C134 1109 1165 11BF # 섴 => 섴 +C135 1109 1165 11C0 # 섵 => 섵 +C136 1109 1165 11C1 # 섶 => 섶 +C137 1109 1165 11C2 # 섷 => 섷 +C138 1109 1166 # 세 => 세 +C139 1109 1166 11A8 # 섹 => 섹 +C13A 1109 1166 11A9 # 섺 => 섺 +C13B 1109 1166 11AA # 섻 => 섻 +C13C 1109 1166 11AB # 센 => 센 +C13D 1109 1166 11AC # 섽 => 섽 +C13E 1109 1166 11AD # 섾 => 섾 +C13F 1109 1166 11AE # 섿 => 섿 +C140 1109 1166 11AF # 셀 => 셀 +C141 1109 1166 11B0 # 셁 => 셁 +C142 1109 1166 11B1 # 셂 => 셂 +C143 1109 1166 11B2 # 셃 => 셃 +C144 1109 1166 11B3 # 셄 => 셄 +C145 1109 1166 11B4 # 셅 => 셅 +C146 1109 1166 11B5 # 셆 => 셆 +C147 1109 1166 11B6 # 셇 => 셇 +C148 1109 1166 11B7 # 셈 => 셈 +C149 1109 1166 11B8 # 셉 => 셉 +C14A 1109 1166 11B9 # 셊 => 셊 +C14B 1109 1166 11BA # 셋 => 셋 +C14C 1109 1166 11BB # 셌 => 셌 +C14D 1109 1166 11BC # 셍 => 셍 +C14E 1109 1166 11BD # 셎 => 셎 +C14F 1109 1166 11BE # 셏 => 셏 +C150 1109 1166 11BF # 셐 => 셐 +C151 1109 1166 11C0 # 셑 => 셑 +C152 1109 1166 11C1 # 셒 => 셒 +C153 1109 1166 11C2 # 셓 => 셓 +C154 1109 1167 # 셔 => 셔 +C155 1109 1167 11A8 # 셕 => 셕 +C156 1109 1167 11A9 # 셖 => 셖 +C157 1109 1167 11AA # 셗 => 셗 +C158 1109 1167 11AB # 션 => 션 +C159 1109 1167 11AC # 셙 => 셙 +C15A 1109 1167 11AD # 셚 => 셚 +C15B 1109 1167 11AE # 셛 => 셛 +C15C 1109 1167 11AF # 셜 => 셜 +C15D 1109 1167 11B0 # 셝 => 셝 +C15E 1109 1167 11B1 # 셞 => 셞 +C15F 1109 1167 11B2 # 셟 => 셟 +C160 1109 1167 11B3 # 셠 => 셠 +C161 1109 1167 11B4 # 셡 => 셡 +C162 1109 1167 11B5 # 셢 => 셢 +C163 1109 1167 11B6 # 셣 => 셣 +C164 1109 1167 11B7 # 셤 => 셤 +C165 1109 1167 11B8 # 셥 => 셥 +C166 1109 1167 11B9 # 셦 => 셦 +C167 1109 1167 11BA # 셧 => 셧 +C168 1109 1167 11BB # 셨 => 셨 +C169 1109 1167 11BC # 셩 => 셩 +C16A 1109 1167 11BD # 셪 => 셪 +C16B 1109 1167 11BE # 셫 => 셫 +C16C 1109 1167 11BF # 셬 => 셬 +C16D 1109 1167 11C0 # 셭 => 셭 +C16E 1109 1167 11C1 # 셮 => 셮 +C16F 1109 1167 11C2 # 셯 => 셯 +C170 1109 1168 # 셰 => 셰 +C171 1109 1168 11A8 # 셱 => 셱 +C172 1109 1168 11A9 # 셲 => 셲 +C173 1109 1168 11AA # 셳 => 셳 +C174 1109 1168 11AB # 셴 => 셴 +C175 1109 1168 11AC # 셵 => 셵 +C176 1109 1168 11AD # 셶 => 셶 +C177 1109 1168 11AE # 셷 => 셷 +C178 1109 1168 11AF # 셸 => 셸 +C179 1109 1168 11B0 # 셹 => 셹 +C17A 1109 1168 11B1 # 셺 => 셺 +C17B 1109 1168 11B2 # 셻 => 셻 +C17C 1109 1168 11B3 # 셼 => 셼 +C17D 1109 1168 11B4 # 셽 => 셽 +C17E 1109 1168 11B5 # 셾 => 셾 +C17F 1109 1168 11B6 # 셿 => 셿 +C180 1109 1168 11B7 # 솀 => 솀 +C181 1109 1168 11B8 # 솁 => 솁 +C182 1109 1168 11B9 # 솂 => 솂 +C183 1109 1168 11BA # 솃 => 솃 +C184 1109 1168 11BB # 솄 => 솄 +C185 1109 1168 11BC # 솅 => 솅 +C186 1109 1168 11BD # 솆 => 솆 +C187 1109 1168 11BE # 솇 => 솇 +C188 1109 1168 11BF # 솈 => 솈 +C189 1109 1168 11C0 # 솉 => 솉 +C18A 1109 1168 11C1 # 솊 => 솊 +C18B 1109 1168 11C2 # 솋 => 솋 +C18C 1109 1169 # 소 => 소 +C18D 1109 1169 11A8 # 속 => 속 +C18E 1109 1169 11A9 # 솎 => 솎 +C18F 1109 1169 11AA # 솏 => 솏 +C190 1109 1169 11AB # 손 => 손 +C191 1109 1169 11AC # 솑 => 솑 +C192 1109 1169 11AD # 솒 => 솒 +C193 1109 1169 11AE # 솓 => 솓 +C194 1109 1169 11AF # 솔 => 솔 +C195 1109 1169 11B0 # 솕 => 솕 +C196 1109 1169 11B1 # 솖 => 솖 +C197 1109 1169 11B2 # 솗 => 솗 +C198 1109 1169 11B3 # 솘 => 솘 +C199 1109 1169 11B4 # 솙 => 솙 +C19A 1109 1169 11B5 # 솚 => 솚 +C19B 1109 1169 11B6 # 솛 => 솛 +C19C 1109 1169 11B7 # 솜 => 솜 +C19D 1109 1169 11B8 # 솝 => 솝 +C19E 1109 1169 11B9 # 솞 => 솞 +C19F 1109 1169 11BA # 솟 => 솟 +C1A0 1109 1169 11BB # 솠 => 솠 +C1A1 1109 1169 11BC # 송 => 송 +C1A2 1109 1169 11BD # 솢 => 솢 +C1A3 1109 1169 11BE # 솣 => 솣 +C1A4 1109 1169 11BF # 솤 => 솤 +C1A5 1109 1169 11C0 # 솥 => 솥 +C1A6 1109 1169 11C1 # 솦 => 솦 +C1A7 1109 1169 11C2 # 솧 => 솧 +C1A8 1109 116A # 솨 => 솨 +C1A9 1109 116A 11A8 # 솩 => 솩 +C1AA 1109 116A 11A9 # 솪 => 솪 +C1AB 1109 116A 11AA # 솫 => 솫 +C1AC 1109 116A 11AB # 솬 => 솬 +C1AD 1109 116A 11AC # 솭 => 솭 +C1AE 1109 116A 11AD # 솮 => 솮 +C1AF 1109 116A 11AE # 솯 => 솯 +C1B0 1109 116A 11AF # 솰 => 솰 +C1B1 1109 116A 11B0 # 솱 => 솱 +C1B2 1109 116A 11B1 # 솲 => 솲 +C1B3 1109 116A 11B2 # 솳 => 솳 +C1B4 1109 116A 11B3 # 솴 => 솴 +C1B5 1109 116A 11B4 # 솵 => 솵 +C1B6 1109 116A 11B5 # 솶 => 솶 +C1B7 1109 116A 11B6 # 솷 => 솷 +C1B8 1109 116A 11B7 # 솸 => 솸 +C1B9 1109 116A 11B8 # 솹 => 솹 +C1BA 1109 116A 11B9 # 솺 => 솺 +C1BB 1109 116A 11BA # 솻 => 솻 +C1BC 1109 116A 11BB # 솼 => 솼 +C1BD 1109 116A 11BC # 솽 => 솽 +C1BE 1109 116A 11BD # 솾 => 솾 +C1BF 1109 116A 11BE # 솿 => 솿 +C1C0 1109 116A 11BF # 쇀 => 쇀 +C1C1 1109 116A 11C0 # 쇁 => 쇁 +C1C2 1109 116A 11C1 # 쇂 => 쇂 +C1C3 1109 116A 11C2 # 쇃 => 쇃 +C1C4 1109 116B # 쇄 => 쇄 +C1C5 1109 116B 11A8 # 쇅 => 쇅 +C1C6 1109 116B 11A9 # 쇆 => 쇆 +C1C7 1109 116B 11AA # 쇇 => 쇇 +C1C8 1109 116B 11AB # 쇈 => 쇈 +C1C9 1109 116B 11AC # 쇉 => 쇉 +C1CA 1109 116B 11AD # 쇊 => 쇊 +C1CB 1109 116B 11AE # 쇋 => 쇋 +C1CC 1109 116B 11AF # 쇌 => 쇌 +C1CD 1109 116B 11B0 # 쇍 => 쇍 +C1CE 1109 116B 11B1 # 쇎 => 쇎 +C1CF 1109 116B 11B2 # 쇏 => 쇏 +C1D0 1109 116B 11B3 # 쇐 => 쇐 +C1D1 1109 116B 11B4 # 쇑 => 쇑 +C1D2 1109 116B 11B5 # 쇒 => 쇒 +C1D3 1109 116B 11B6 # 쇓 => 쇓 +C1D4 1109 116B 11B7 # 쇔 => 쇔 +C1D5 1109 116B 11B8 # 쇕 => 쇕 +C1D6 1109 116B 11B9 # 쇖 => 쇖 +C1D7 1109 116B 11BA # 쇗 => 쇗 +C1D8 1109 116B 11BB # 쇘 => 쇘 +C1D9 1109 116B 11BC # 쇙 => 쇙 +C1DA 1109 116B 11BD # 쇚 => 쇚 +C1DB 1109 116B 11BE # 쇛 => 쇛 +C1DC 1109 116B 11BF # 쇜 => 쇜 +C1DD 1109 116B 11C0 # 쇝 => 쇝 +C1DE 1109 116B 11C1 # 쇞 => 쇞 +C1DF 1109 116B 11C2 # 쇟 => 쇟 +C1E0 1109 116C # 쇠 => 쇠 +C1E1 1109 116C 11A8 # 쇡 => 쇡 +C1E2 1109 116C 11A9 # 쇢 => 쇢 +C1E3 1109 116C 11AA # 쇣 => 쇣 +C1E4 1109 116C 11AB # 쇤 => 쇤 +C1E5 1109 116C 11AC # 쇥 => 쇥 +C1E6 1109 116C 11AD # 쇦 => 쇦 +C1E7 1109 116C 11AE # 쇧 => 쇧 +C1E8 1109 116C 11AF # 쇨 => 쇨 +C1E9 1109 116C 11B0 # 쇩 => 쇩 +C1EA 1109 116C 11B1 # 쇪 => 쇪 +C1EB 1109 116C 11B2 # 쇫 => 쇫 +C1EC 1109 116C 11B3 # 쇬 => 쇬 +C1ED 1109 116C 11B4 # 쇭 => 쇭 +C1EE 1109 116C 11B5 # 쇮 => 쇮 +C1EF 1109 116C 11B6 # 쇯 => 쇯 +C1F0 1109 116C 11B7 # 쇰 => 쇰 +C1F1 1109 116C 11B8 # 쇱 => 쇱 +C1F2 1109 116C 11B9 # 쇲 => 쇲 +C1F3 1109 116C 11BA # 쇳 => 쇳 +C1F4 1109 116C 11BB # 쇴 => 쇴 +C1F5 1109 116C 11BC # 쇵 => 쇵 +C1F6 1109 116C 11BD # 쇶 => 쇶 +C1F7 1109 116C 11BE # 쇷 => 쇷 +C1F8 1109 116C 11BF # 쇸 => 쇸 +C1F9 1109 116C 11C0 # 쇹 => 쇹 +C1FA 1109 116C 11C1 # 쇺 => 쇺 +C1FB 1109 116C 11C2 # 쇻 => 쇻 +C1FC 1109 116D # 쇼 => 쇼 +C1FD 1109 116D 11A8 # 쇽 => 쇽 +C1FE 1109 116D 11A9 # 쇾 => 쇾 +C1FF 1109 116D 11AA # 쇿 => 쇿 +C200 1109 116D 11AB # 숀 => 숀 +C201 1109 116D 11AC # 숁 => 숁 +C202 1109 116D 11AD # 숂 => 숂 +C203 1109 116D 11AE # 숃 => 숃 +C204 1109 116D 11AF # 숄 => 숄 +C205 1109 116D 11B0 # 숅 => 숅 +C206 1109 116D 11B1 # 숆 => 숆 +C207 1109 116D 11B2 # 숇 => 숇 +C208 1109 116D 11B3 # 숈 => 숈 +C209 1109 116D 11B4 # 숉 => 숉 +C20A 1109 116D 11B5 # 숊 => 숊 +C20B 1109 116D 11B6 # 숋 => 숋 +C20C 1109 116D 11B7 # 숌 => 숌 +C20D 1109 116D 11B8 # 숍 => 숍 +C20E 1109 116D 11B9 # 숎 => 숎 +C20F 1109 116D 11BA # 숏 => 숏 +C210 1109 116D 11BB # 숐 => 숐 +C211 1109 116D 11BC # 숑 => 숑 +C212 1109 116D 11BD # 숒 => 숒 +C213 1109 116D 11BE # 숓 => 숓 +C214 1109 116D 11BF # 숔 => 숔 +C215 1109 116D 11C0 # 숕 => 숕 +C216 1109 116D 11C1 # 숖 => 숖 +C217 1109 116D 11C2 # 숗 => 숗 +C218 1109 116E # 수 => 수 +C219 1109 116E 11A8 # 숙 => 숙 +C21A 1109 116E 11A9 # 숚 => 숚 +C21B 1109 116E 11AA # 숛 => 숛 +C21C 1109 116E 11AB # 순 => 순 +C21D 1109 116E 11AC # 숝 => 숝 +C21E 1109 116E 11AD # 숞 => 숞 +C21F 1109 116E 11AE # 숟 => 숟 +C220 1109 116E 11AF # 술 => 술 +C221 1109 116E 11B0 # 숡 => 숡 +C222 1109 116E 11B1 # 숢 => 숢 +C223 1109 116E 11B2 # 숣 => 숣 +C224 1109 116E 11B3 # 숤 => 숤 +C225 1109 116E 11B4 # 숥 => 숥 +C226 1109 116E 11B5 # 숦 => 숦 +C227 1109 116E 11B6 # 숧 => 숧 +C228 1109 116E 11B7 # 숨 => 숨 +C229 1109 116E 11B8 # 숩 => 숩 +C22A 1109 116E 11B9 # 숪 => 숪 +C22B 1109 116E 11BA # 숫 => 숫 +C22C 1109 116E 11BB # 숬 => 숬 +C22D 1109 116E 11BC # 숭 => 숭 +C22E 1109 116E 11BD # 숮 => 숮 +C22F 1109 116E 11BE # 숯 => 숯 +C230 1109 116E 11BF # 숰 => 숰 +C231 1109 116E 11C0 # 숱 => 숱 +C232 1109 116E 11C1 # 숲 => 숲 +C233 1109 116E 11C2 # 숳 => 숳 +C234 1109 116F # 숴 => 숴 +C235 1109 116F 11A8 # 숵 => 숵 +C236 1109 116F 11A9 # 숶 => 숶 +C237 1109 116F 11AA # 숷 => 숷 +C238 1109 116F 11AB # 숸 => 숸 +C239 1109 116F 11AC # 숹 => 숹 +C23A 1109 116F 11AD # 숺 => 숺 +C23B 1109 116F 11AE # 숻 => 숻 +C23C 1109 116F 11AF # 숼 => 숼 +C23D 1109 116F 11B0 # 숽 => 숽 +C23E 1109 116F 11B1 # 숾 => 숾 +C23F 1109 116F 11B2 # 숿 => 숿 +C240 1109 116F 11B3 # 쉀 => 쉀 +C241 1109 116F 11B4 # 쉁 => 쉁 +C242 1109 116F 11B5 # 쉂 => 쉂 +C243 1109 116F 11B6 # 쉃 => 쉃 +C244 1109 116F 11B7 # 쉄 => 쉄 +C245 1109 116F 11B8 # 쉅 => 쉅 +C246 1109 116F 11B9 # 쉆 => 쉆 +C247 1109 116F 11BA # 쉇 => 쉇 +C248 1109 116F 11BB # 쉈 => 쉈 +C249 1109 116F 11BC # 쉉 => 쉉 +C24A 1109 116F 11BD # 쉊 => 쉊 +C24B 1109 116F 11BE # 쉋 => 쉋 +C24C 1109 116F 11BF # 쉌 => 쉌 +C24D 1109 116F 11C0 # 쉍 => 쉍 +C24E 1109 116F 11C1 # 쉎 => 쉎 +C24F 1109 116F 11C2 # 쉏 => 쉏 +C250 1109 1170 # 쉐 => 쉐 +C251 1109 1170 11A8 # 쉑 => 쉑 +C252 1109 1170 11A9 # 쉒 => 쉒 +C253 1109 1170 11AA # 쉓 => 쉓 +C254 1109 1170 11AB # 쉔 => 쉔 +C255 1109 1170 11AC # 쉕 => 쉕 +C256 1109 1170 11AD # 쉖 => 쉖 +C257 1109 1170 11AE # 쉗 => 쉗 +C258 1109 1170 11AF # 쉘 => 쉘 +C259 1109 1170 11B0 # 쉙 => 쉙 +C25A 1109 1170 11B1 # 쉚 => 쉚 +C25B 1109 1170 11B2 # 쉛 => 쉛 +C25C 1109 1170 11B3 # 쉜 => 쉜 +C25D 1109 1170 11B4 # 쉝 => 쉝 +C25E 1109 1170 11B5 # 쉞 => 쉞 +C25F 1109 1170 11B6 # 쉟 => 쉟 +C260 1109 1170 11B7 # 쉠 => 쉠 +C261 1109 1170 11B8 # 쉡 => 쉡 +C262 1109 1170 11B9 # 쉢 => 쉢 +C263 1109 1170 11BA # 쉣 => 쉣 +C264 1109 1170 11BB # 쉤 => 쉤 +C265 1109 1170 11BC # 쉥 => 쉥 +C266 1109 1170 11BD # 쉦 => 쉦 +C267 1109 1170 11BE # 쉧 => 쉧 +C268 1109 1170 11BF # 쉨 => 쉨 +C269 1109 1170 11C0 # 쉩 => 쉩 +C26A 1109 1170 11C1 # 쉪 => 쉪 +C26B 1109 1170 11C2 # 쉫 => 쉫 +C26C 1109 1171 # 쉬 => 쉬 +C26D 1109 1171 11A8 # 쉭 => 쉭 +C26E 1109 1171 11A9 # 쉮 => 쉮 +C26F 1109 1171 11AA # 쉯 => 쉯 +C270 1109 1171 11AB # 쉰 => 쉰 +C271 1109 1171 11AC # 쉱 => 쉱 +C272 1109 1171 11AD # 쉲 => 쉲 +C273 1109 1171 11AE # 쉳 => 쉳 +C274 1109 1171 11AF # 쉴 => 쉴 +C275 1109 1171 11B0 # 쉵 => 쉵 +C276 1109 1171 11B1 # 쉶 => 쉶 +C277 1109 1171 11B2 # 쉷 => 쉷 +C278 1109 1171 11B3 # 쉸 => 쉸 +C279 1109 1171 11B4 # 쉹 => 쉹 +C27A 1109 1171 11B5 # 쉺 => 쉺 +C27B 1109 1171 11B6 # 쉻 => 쉻 +C27C 1109 1171 11B7 # 쉼 => 쉼 +C27D 1109 1171 11B8 # 쉽 => 쉽 +C27E 1109 1171 11B9 # 쉾 => 쉾 +C27F 1109 1171 11BA # 쉿 => 쉿 +C280 1109 1171 11BB # 슀 => 슀 +C281 1109 1171 11BC # 슁 => 슁 +C282 1109 1171 11BD # 슂 => 슂 +C283 1109 1171 11BE # 슃 => 슃 +C284 1109 1171 11BF # 슄 => 슄 +C285 1109 1171 11C0 # 슅 => 슅 +C286 1109 1171 11C1 # 슆 => 슆 +C287 1109 1171 11C2 # 슇 => 슇 +C288 1109 1172 # 슈 => 슈 +C289 1109 1172 11A8 # 슉 => 슉 +C28A 1109 1172 11A9 # 슊 => 슊 +C28B 1109 1172 11AA # 슋 => 슋 +C28C 1109 1172 11AB # 슌 => 슌 +C28D 1109 1172 11AC # 슍 => 슍 +C28E 1109 1172 11AD # 슎 => 슎 +C28F 1109 1172 11AE # 슏 => 슏 +C290 1109 1172 11AF # 슐 => 슐 +C291 1109 1172 11B0 # 슑 => 슑 +C292 1109 1172 11B1 # 슒 => 슒 +C293 1109 1172 11B2 # 슓 => 슓 +C294 1109 1172 11B3 # 슔 => 슔 +C295 1109 1172 11B4 # 슕 => 슕 +C296 1109 1172 11B5 # 슖 => 슖 +C297 1109 1172 11B6 # 슗 => 슗 +C298 1109 1172 11B7 # 슘 => 슘 +C299 1109 1172 11B8 # 슙 => 슙 +C29A 1109 1172 11B9 # 슚 => 슚 +C29B 1109 1172 11BA # 슛 => 슛 +C29C 1109 1172 11BB # 슜 => 슜 +C29D 1109 1172 11BC # 슝 => 슝 +C29E 1109 1172 11BD # 슞 => 슞 +C29F 1109 1172 11BE # 슟 => 슟 +C2A0 1109 1172 11BF # 슠 => 슠 +C2A1 1109 1172 11C0 # 슡 => 슡 +C2A2 1109 1172 11C1 # 슢 => 슢 +C2A3 1109 1172 11C2 # 슣 => 슣 +C2A4 1109 1173 # 스 => 스 +C2A5 1109 1173 11A8 # 슥 => 슥 +C2A6 1109 1173 11A9 # 슦 => 슦 +C2A7 1109 1173 11AA # 슧 => 슧 +C2A8 1109 1173 11AB # 슨 => 슨 +C2A9 1109 1173 11AC # 슩 => 슩 +C2AA 1109 1173 11AD # 슪 => 슪 +C2AB 1109 1173 11AE # 슫 => 슫 +C2AC 1109 1173 11AF # 슬 => 슬 +C2AD 1109 1173 11B0 # 슭 => 슭 +C2AE 1109 1173 11B1 # 슮 => 슮 +C2AF 1109 1173 11B2 # 슯 => 슯 +C2B0 1109 1173 11B3 # 슰 => 슰 +C2B1 1109 1173 11B4 # 슱 => 슱 +C2B2 1109 1173 11B5 # 슲 => 슲 +C2B3 1109 1173 11B6 # 슳 => 슳 +C2B4 1109 1173 11B7 # 슴 => 슴 +C2B5 1109 1173 11B8 # 습 => 습 +C2B6 1109 1173 11B9 # 슶 => 슶 +C2B7 1109 1173 11BA # 슷 => 슷 +C2B8 1109 1173 11BB # 슸 => 슸 +C2B9 1109 1173 11BC # 승 => 승 +C2BA 1109 1173 11BD # 슺 => 슺 +C2BB 1109 1173 11BE # 슻 => 슻 +C2BC 1109 1173 11BF # 슼 => 슼 +C2BD 1109 1173 11C0 # 슽 => 슽 +C2BE 1109 1173 11C1 # 슾 => 슾 +C2BF 1109 1173 11C2 # 슿 => 슿 +C2C0 1109 1174 # 싀 => 싀 +C2C1 1109 1174 11A8 # 싁 => 싁 +C2C2 1109 1174 11A9 # 싂 => 싂 +C2C3 1109 1174 11AA # 싃 => 싃 +C2C4 1109 1174 11AB # 싄 => 싄 +C2C5 1109 1174 11AC # 싅 => 싅 +C2C6 1109 1174 11AD # 싆 => 싆 +C2C7 1109 1174 11AE # 싇 => 싇 +C2C8 1109 1174 11AF # 싈 => 싈 +C2C9 1109 1174 11B0 # 싉 => 싉 +C2CA 1109 1174 11B1 # 싊 => 싊 +C2CB 1109 1174 11B2 # 싋 => 싋 +C2CC 1109 1174 11B3 # 싌 => 싌 +C2CD 1109 1174 11B4 # 싍 => 싍 +C2CE 1109 1174 11B5 # 싎 => 싎 +C2CF 1109 1174 11B6 # 싏 => 싏 +C2D0 1109 1174 11B7 # 싐 => 싐 +C2D1 1109 1174 11B8 # 싑 => 싑 +C2D2 1109 1174 11B9 # 싒 => 싒 +C2D3 1109 1174 11BA # 싓 => 싓 +C2D4 1109 1174 11BB # 싔 => 싔 +C2D5 1109 1174 11BC # 싕 => 싕 +C2D6 1109 1174 11BD # 싖 => 싖 +C2D7 1109 1174 11BE # 싗 => 싗 +C2D8 1109 1174 11BF # 싘 => 싘 +C2D9 1109 1174 11C0 # 싙 => 싙 +C2DA 1109 1174 11C1 # 싚 => 싚 +C2DB 1109 1174 11C2 # 싛 => 싛 +C2DC 1109 1175 # 시 => 시 +C2DD 1109 1175 11A8 # 식 => 식 +C2DE 1109 1175 11A9 # 싞 => 싞 +C2DF 1109 1175 11AA # 싟 => 싟 +C2E0 1109 1175 11AB # 신 => 신 +C2E1 1109 1175 11AC # 싡 => 싡 +C2E2 1109 1175 11AD # 싢 => 싢 +C2E3 1109 1175 11AE # 싣 => 싣 +C2E4 1109 1175 11AF # 실 => 실 +C2E5 1109 1175 11B0 # 싥 => 싥 +C2E6 1109 1175 11B1 # 싦 => 싦 +C2E7 1109 1175 11B2 # 싧 => 싧 +C2E8 1109 1175 11B3 # 싨 => 싨 +C2E9 1109 1175 11B4 # 싩 => 싩 +C2EA 1109 1175 11B5 # 싪 => 싪 +C2EB 1109 1175 11B6 # 싫 => 싫 +C2EC 1109 1175 11B7 # 심 => 심 +C2ED 1109 1175 11B8 # 십 => 십 +C2EE 1109 1175 11B9 # 싮 => 싮 +C2EF 1109 1175 11BA # 싯 => 싯 +C2F0 1109 1175 11BB # 싰 => 싰 +C2F1 1109 1175 11BC # 싱 => 싱 +C2F2 1109 1175 11BD # 싲 => 싲 +C2F3 1109 1175 11BE # 싳 => 싳 +C2F4 1109 1175 11BF # 싴 => 싴 +C2F5 1109 1175 11C0 # 싵 => 싵 +C2F6 1109 1175 11C1 # 싶 => 싶 +C2F7 1109 1175 11C2 # 싷 => 싷 +C2F8 110A 1161 # 싸 => 싸 +C2F9 110A 1161 11A8 # 싹 => 싹 +C2FA 110A 1161 11A9 # 싺 => 싺 +C2FB 110A 1161 11AA # 싻 => 싻 +C2FC 110A 1161 11AB # 싼 => 싼 +C2FD 110A 1161 11AC # 싽 => 싽 +C2FE 110A 1161 11AD # 싾 => 싾 +C2FF 110A 1161 11AE # 싿 => 싿 +C300 110A 1161 11AF # 쌀 => 쌀 +C301 110A 1161 11B0 # 쌁 => 쌁 +C302 110A 1161 11B1 # 쌂 => 쌂 +C303 110A 1161 11B2 # 쌃 => 쌃 +C304 110A 1161 11B3 # 쌄 => 쌄 +C305 110A 1161 11B4 # 쌅 => 쌅 +C306 110A 1161 11B5 # 쌆 => 쌆 +C307 110A 1161 11B6 # 쌇 => 쌇 +C308 110A 1161 11B7 # 쌈 => 쌈 +C309 110A 1161 11B8 # 쌉 => 쌉 +C30A 110A 1161 11B9 # 쌊 => 쌊 +C30B 110A 1161 11BA # 쌋 => 쌋 +C30C 110A 1161 11BB # 쌌 => 쌌 +C30D 110A 1161 11BC # 쌍 => 쌍 +C30E 110A 1161 11BD # 쌎 => 쌎 +C30F 110A 1161 11BE # 쌏 => 쌏 +C310 110A 1161 11BF # 쌐 => 쌐 +C311 110A 1161 11C0 # 쌑 => 쌑 +C312 110A 1161 11C1 # 쌒 => 쌒 +C313 110A 1161 11C2 # 쌓 => 쌓 +C314 110A 1162 # 쌔 => 쌔 +C315 110A 1162 11A8 # 쌕 => 쌕 +C316 110A 1162 11A9 # 쌖 => 쌖 +C317 110A 1162 11AA # 쌗 => 쌗 +C318 110A 1162 11AB # 쌘 => 쌘 +C319 110A 1162 11AC # 쌙 => 쌙 +C31A 110A 1162 11AD # 쌚 => 쌚 +C31B 110A 1162 11AE # 쌛 => 쌛 +C31C 110A 1162 11AF # 쌜 => 쌜 +C31D 110A 1162 11B0 # 쌝 => 쌝 +C31E 110A 1162 11B1 # 쌞 => 쌞 +C31F 110A 1162 11B2 # 쌟 => 쌟 +C320 110A 1162 11B3 # 쌠 => 쌠 +C321 110A 1162 11B4 # 쌡 => 쌡 +C322 110A 1162 11B5 # 쌢 => 쌢 +C323 110A 1162 11B6 # 쌣 => 쌣 +C324 110A 1162 11B7 # 쌤 => 쌤 +C325 110A 1162 11B8 # 쌥 => 쌥 +C326 110A 1162 11B9 # 쌦 => 쌦 +C327 110A 1162 11BA # 쌧 => 쌧 +C328 110A 1162 11BB # 쌨 => 쌨 +C329 110A 1162 11BC # 쌩 => 쌩 +C32A 110A 1162 11BD # 쌪 => 쌪 +C32B 110A 1162 11BE # 쌫 => 쌫 +C32C 110A 1162 11BF # 쌬 => 쌬 +C32D 110A 1162 11C0 # 쌭 => 쌭 +C32E 110A 1162 11C1 # 쌮 => 쌮 +C32F 110A 1162 11C2 # 쌯 => 쌯 +C330 110A 1163 # 쌰 => 쌰 +C331 110A 1163 11A8 # 쌱 => 쌱 +C332 110A 1163 11A9 # 쌲 => 쌲 +C333 110A 1163 11AA # 쌳 => 쌳 +C334 110A 1163 11AB # 쌴 => 쌴 +C335 110A 1163 11AC # 쌵 => 쌵 +C336 110A 1163 11AD # 쌶 => 쌶 +C337 110A 1163 11AE # 쌷 => 쌷 +C338 110A 1163 11AF # 쌸 => 쌸 +C339 110A 1163 11B0 # 쌹 => 쌹 +C33A 110A 1163 11B1 # 쌺 => 쌺 +C33B 110A 1163 11B2 # 쌻 => 쌻 +C33C 110A 1163 11B3 # 쌼 => 쌼 +C33D 110A 1163 11B4 # 쌽 => 쌽 +C33E 110A 1163 11B5 # 쌾 => 쌾 +C33F 110A 1163 11B6 # 쌿 => 쌿 +C340 110A 1163 11B7 # 썀 => 썀 +C341 110A 1163 11B8 # 썁 => 썁 +C342 110A 1163 11B9 # 썂 => 썂 +C343 110A 1163 11BA # 썃 => 썃 +C344 110A 1163 11BB # 썄 => 썄 +C345 110A 1163 11BC # 썅 => 썅 +C346 110A 1163 11BD # 썆 => 썆 +C347 110A 1163 11BE # 썇 => 썇 +C348 110A 1163 11BF # 썈 => 썈 +C349 110A 1163 11C0 # 썉 => 썉 +C34A 110A 1163 11C1 # 썊 => 썊 +C34B 110A 1163 11C2 # 썋 => 썋 +C34C 110A 1164 # 썌 => 썌 +C34D 110A 1164 11A8 # 썍 => 썍 +C34E 110A 1164 11A9 # 썎 => 썎 +C34F 110A 1164 11AA # 썏 => 썏 +C350 110A 1164 11AB # 썐 => 썐 +C351 110A 1164 11AC # 썑 => 썑 +C352 110A 1164 11AD # 썒 => 썒 +C353 110A 1164 11AE # 썓 => 썓 +C354 110A 1164 11AF # 썔 => 썔 +C355 110A 1164 11B0 # 썕 => 썕 +C356 110A 1164 11B1 # 썖 => 썖 +C357 110A 1164 11B2 # 썗 => 썗 +C358 110A 1164 11B3 # 썘 => 썘 +C359 110A 1164 11B4 # 썙 => 썙 +C35A 110A 1164 11B5 # 썚 => 썚 +C35B 110A 1164 11B6 # 썛 => 썛 +C35C 110A 1164 11B7 # 썜 => 썜 +C35D 110A 1164 11B8 # 썝 => 썝 +C35E 110A 1164 11B9 # 썞 => 썞 +C35F 110A 1164 11BA # 썟 => 썟 +C360 110A 1164 11BB # 썠 => 썠 +C361 110A 1164 11BC # 썡 => 썡 +C362 110A 1164 11BD # 썢 => 썢 +C363 110A 1164 11BE # 썣 => 썣 +C364 110A 1164 11BF # 썤 => 썤 +C365 110A 1164 11C0 # 썥 => 썥 +C366 110A 1164 11C1 # 썦 => 썦 +C367 110A 1164 11C2 # 썧 => 썧 +C368 110A 1165 # 써 => 써 +C369 110A 1165 11A8 # 썩 => 썩 +C36A 110A 1165 11A9 # 썪 => 썪 +C36B 110A 1165 11AA # 썫 => 썫 +C36C 110A 1165 11AB # 썬 => 썬 +C36D 110A 1165 11AC # 썭 => 썭 +C36E 110A 1165 11AD # 썮 => 썮 +C36F 110A 1165 11AE # 썯 => 썯 +C370 110A 1165 11AF # 썰 => 썰 +C371 110A 1165 11B0 # 썱 => 썱 +C372 110A 1165 11B1 # 썲 => 썲 +C373 110A 1165 11B2 # 썳 => 썳 +C374 110A 1165 11B3 # 썴 => 썴 +C375 110A 1165 11B4 # 썵 => 썵 +C376 110A 1165 11B5 # 썶 => 썶 +C377 110A 1165 11B6 # 썷 => 썷 +C378 110A 1165 11B7 # 썸 => 썸 +C379 110A 1165 11B8 # 썹 => 썹 +C37A 110A 1165 11B9 # 썺 => 썺 +C37B 110A 1165 11BA # 썻 => 썻 +C37C 110A 1165 11BB # 썼 => 썼 +C37D 110A 1165 11BC # 썽 => 썽 +C37E 110A 1165 11BD # 썾 => 썾 +C37F 110A 1165 11BE # 썿 => 썿 +C380 110A 1165 11BF # 쎀 => 쎀 +C381 110A 1165 11C0 # 쎁 => 쎁 +C382 110A 1165 11C1 # 쎂 => 쎂 +C383 110A 1165 11C2 # 쎃 => 쎃 +C384 110A 1166 # 쎄 => 쎄 +C385 110A 1166 11A8 # 쎅 => 쎅 +C386 110A 1166 11A9 # 쎆 => 쎆 +C387 110A 1166 11AA # 쎇 => 쎇 +C388 110A 1166 11AB # 쎈 => 쎈 +C389 110A 1166 11AC # 쎉 => 쎉 +C38A 110A 1166 11AD # 쎊 => 쎊 +C38B 110A 1166 11AE # 쎋 => 쎋 +C38C 110A 1166 11AF # 쎌 => 쎌 +C38D 110A 1166 11B0 # 쎍 => 쎍 +C38E 110A 1166 11B1 # 쎎 => 쎎 +C38F 110A 1166 11B2 # 쎏 => 쎏 +C390 110A 1166 11B3 # 쎐 => 쎐 +C391 110A 1166 11B4 # 쎑 => 쎑 +C392 110A 1166 11B5 # 쎒 => 쎒 +C393 110A 1166 11B6 # 쎓 => 쎓 +C394 110A 1166 11B7 # 쎔 => 쎔 +C395 110A 1166 11B8 # 쎕 => 쎕 +C396 110A 1166 11B9 # 쎖 => 쎖 +C397 110A 1166 11BA # 쎗 => 쎗 +C398 110A 1166 11BB # 쎘 => 쎘 +C399 110A 1166 11BC # 쎙 => 쎙 +C39A 110A 1166 11BD # 쎚 => 쎚 +C39B 110A 1166 11BE # 쎛 => 쎛 +C39C 110A 1166 11BF # 쎜 => 쎜 +C39D 110A 1166 11C0 # 쎝 => 쎝 +C39E 110A 1166 11C1 # 쎞 => 쎞 +C39F 110A 1166 11C2 # 쎟 => 쎟 +C3A0 110A 1167 # 쎠 => 쎠 +C3A1 110A 1167 11A8 # 쎡 => 쎡 +C3A2 110A 1167 11A9 # 쎢 => 쎢 +C3A3 110A 1167 11AA # 쎣 => 쎣 +C3A4 110A 1167 11AB # 쎤 => 쎤 +C3A5 110A 1167 11AC # 쎥 => 쎥 +C3A6 110A 1167 11AD # 쎦 => 쎦 +C3A7 110A 1167 11AE # 쎧 => 쎧 +C3A8 110A 1167 11AF # 쎨 => 쎨 +C3A9 110A 1167 11B0 # 쎩 => 쎩 +C3AA 110A 1167 11B1 # 쎪 => 쎪 +C3AB 110A 1167 11B2 # 쎫 => 쎫 +C3AC 110A 1167 11B3 # 쎬 => 쎬 +C3AD 110A 1167 11B4 # 쎭 => 쎭 +C3AE 110A 1167 11B5 # 쎮 => 쎮 +C3AF 110A 1167 11B6 # 쎯 => 쎯 +C3B0 110A 1167 11B7 # 쎰 => 쎰 +C3B1 110A 1167 11B8 # 쎱 => 쎱 +C3B2 110A 1167 11B9 # 쎲 => 쎲 +C3B3 110A 1167 11BA # 쎳 => 쎳 +C3B4 110A 1167 11BB # 쎴 => 쎴 +C3B5 110A 1167 11BC # 쎵 => 쎵 +C3B6 110A 1167 11BD # 쎶 => 쎶 +C3B7 110A 1167 11BE # 쎷 => 쎷 +C3B8 110A 1167 11BF # 쎸 => 쎸 +C3B9 110A 1167 11C0 # 쎹 => 쎹 +C3BA 110A 1167 11C1 # 쎺 => 쎺 +C3BB 110A 1167 11C2 # 쎻 => 쎻 +C3BC 110A 1168 # 쎼 => 쎼 +C3BD 110A 1168 11A8 # 쎽 => 쎽 +C3BE 110A 1168 11A9 # 쎾 => 쎾 +C3BF 110A 1168 11AA # 쎿 => 쎿 +C3C0 110A 1168 11AB # 쏀 => 쏀 +C3C1 110A 1168 11AC # 쏁 => 쏁 +C3C2 110A 1168 11AD # 쏂 => 쏂 +C3C3 110A 1168 11AE # 쏃 => 쏃 +C3C4 110A 1168 11AF # 쏄 => 쏄 +C3C5 110A 1168 11B0 # 쏅 => 쏅 +C3C6 110A 1168 11B1 # 쏆 => 쏆 +C3C7 110A 1168 11B2 # 쏇 => 쏇 +C3C8 110A 1168 11B3 # 쏈 => 쏈 +C3C9 110A 1168 11B4 # 쏉 => 쏉 +C3CA 110A 1168 11B5 # 쏊 => 쏊 +C3CB 110A 1168 11B6 # 쏋 => 쏋 +C3CC 110A 1168 11B7 # 쏌 => 쏌 +C3CD 110A 1168 11B8 # 쏍 => 쏍 +C3CE 110A 1168 11B9 # 쏎 => 쏎 +C3CF 110A 1168 11BA # 쏏 => 쏏 +C3D0 110A 1168 11BB # 쏐 => 쏐 +C3D1 110A 1168 11BC # 쏑 => 쏑 +C3D2 110A 1168 11BD # 쏒 => 쏒 +C3D3 110A 1168 11BE # 쏓 => 쏓 +C3D4 110A 1168 11BF # 쏔 => 쏔 +C3D5 110A 1168 11C0 # 쏕 => 쏕 +C3D6 110A 1168 11C1 # 쏖 => 쏖 +C3D7 110A 1168 11C2 # 쏗 => 쏗 +C3D8 110A 1169 # 쏘 => 쏘 +C3D9 110A 1169 11A8 # 쏙 => 쏙 +C3DA 110A 1169 11A9 # 쏚 => 쏚 +C3DB 110A 1169 11AA # 쏛 => 쏛 +C3DC 110A 1169 11AB # 쏜 => 쏜 +C3DD 110A 1169 11AC # 쏝 => 쏝 +C3DE 110A 1169 11AD # 쏞 => 쏞 +C3DF 110A 1169 11AE # 쏟 => 쏟 +C3E0 110A 1169 11AF # 쏠 => 쏠 +C3E1 110A 1169 11B0 # 쏡 => 쏡 +C3E2 110A 1169 11B1 # 쏢 => 쏢 +C3E3 110A 1169 11B2 # 쏣 => 쏣 +C3E4 110A 1169 11B3 # 쏤 => 쏤 +C3E5 110A 1169 11B4 # 쏥 => 쏥 +C3E6 110A 1169 11B5 # 쏦 => 쏦 +C3E7 110A 1169 11B6 # 쏧 => 쏧 +C3E8 110A 1169 11B7 # 쏨 => 쏨 +C3E9 110A 1169 11B8 # 쏩 => 쏩 +C3EA 110A 1169 11B9 # 쏪 => 쏪 +C3EB 110A 1169 11BA # 쏫 => 쏫 +C3EC 110A 1169 11BB # 쏬 => 쏬 +C3ED 110A 1169 11BC # 쏭 => 쏭 +C3EE 110A 1169 11BD # 쏮 => 쏮 +C3EF 110A 1169 11BE # 쏯 => 쏯 +C3F0 110A 1169 11BF # 쏰 => 쏰 +C3F1 110A 1169 11C0 # 쏱 => 쏱 +C3F2 110A 1169 11C1 # 쏲 => 쏲 +C3F3 110A 1169 11C2 # 쏳 => 쏳 +C3F4 110A 116A # 쏴 => 쏴 +C3F5 110A 116A 11A8 # 쏵 => 쏵 +C3F6 110A 116A 11A9 # 쏶 => 쏶 +C3F7 110A 116A 11AA # 쏷 => 쏷 +C3F8 110A 116A 11AB # 쏸 => 쏸 +C3F9 110A 116A 11AC # 쏹 => 쏹 +C3FA 110A 116A 11AD # 쏺 => 쏺 +C3FB 110A 116A 11AE # 쏻 => 쏻 +C3FC 110A 116A 11AF # 쏼 => 쏼 +C3FD 110A 116A 11B0 # 쏽 => 쏽 +C3FE 110A 116A 11B1 # 쏾 => 쏾 +C3FF 110A 116A 11B2 # 쏿 => 쏿 +C400 110A 116A 11B3 # 쐀 => 쐀 +C401 110A 116A 11B4 # 쐁 => 쐁 +C402 110A 116A 11B5 # 쐂 => 쐂 +C403 110A 116A 11B6 # 쐃 => 쐃 +C404 110A 116A 11B7 # 쐄 => 쐄 +C405 110A 116A 11B8 # 쐅 => 쐅 +C406 110A 116A 11B9 # 쐆 => 쐆 +C407 110A 116A 11BA # 쐇 => 쐇 +C408 110A 116A 11BB # 쐈 => 쐈 +C409 110A 116A 11BC # 쐉 => 쐉 +C40A 110A 116A 11BD # 쐊 => 쐊 +C40B 110A 116A 11BE # 쐋 => 쐋 +C40C 110A 116A 11BF # 쐌 => 쐌 +C40D 110A 116A 11C0 # 쐍 => 쐍 +C40E 110A 116A 11C1 # 쐎 => 쐎 +C40F 110A 116A 11C2 # 쐏 => 쐏 +C410 110A 116B # 쐐 => 쐐 +C411 110A 116B 11A8 # 쐑 => 쐑 +C412 110A 116B 11A9 # 쐒 => 쐒 +C413 110A 116B 11AA # 쐓 => 쐓 +C414 110A 116B 11AB # 쐔 => 쐔 +C415 110A 116B 11AC # 쐕 => 쐕 +C416 110A 116B 11AD # 쐖 => 쐖 +C417 110A 116B 11AE # 쐗 => 쐗 +C418 110A 116B 11AF # 쐘 => 쐘 +C419 110A 116B 11B0 # 쐙 => 쐙 +C41A 110A 116B 11B1 # 쐚 => 쐚 +C41B 110A 116B 11B2 # 쐛 => 쐛 +C41C 110A 116B 11B3 # 쐜 => 쐜 +C41D 110A 116B 11B4 # 쐝 => 쐝 +C41E 110A 116B 11B5 # 쐞 => 쐞 +C41F 110A 116B 11B6 # 쐟 => 쐟 +C420 110A 116B 11B7 # 쐠 => 쐠 +C421 110A 116B 11B8 # 쐡 => 쐡 +C422 110A 116B 11B9 # 쐢 => 쐢 +C423 110A 116B 11BA # 쐣 => 쐣 +C424 110A 116B 11BB # 쐤 => 쐤 +C425 110A 116B 11BC # 쐥 => 쐥 +C426 110A 116B 11BD # 쐦 => 쐦 +C427 110A 116B 11BE # 쐧 => 쐧 +C428 110A 116B 11BF # 쐨 => 쐨 +C429 110A 116B 11C0 # 쐩 => 쐩 +C42A 110A 116B 11C1 # 쐪 => 쐪 +C42B 110A 116B 11C2 # 쐫 => 쐫 +C42C 110A 116C # 쐬 => 쐬 +C42D 110A 116C 11A8 # 쐭 => 쐭 +C42E 110A 116C 11A9 # 쐮 => 쐮 +C42F 110A 116C 11AA # 쐯 => 쐯 +C430 110A 116C 11AB # 쐰 => 쐰 +C431 110A 116C 11AC # 쐱 => 쐱 +C432 110A 116C 11AD # 쐲 => 쐲 +C433 110A 116C 11AE # 쐳 => 쐳 +C434 110A 116C 11AF # 쐴 => 쐴 +C435 110A 116C 11B0 # 쐵 => 쐵 +C436 110A 116C 11B1 # 쐶 => 쐶 +C437 110A 116C 11B2 # 쐷 => 쐷 +C438 110A 116C 11B3 # 쐸 => 쐸 +C439 110A 116C 11B4 # 쐹 => 쐹 +C43A 110A 116C 11B5 # 쐺 => 쐺 +C43B 110A 116C 11B6 # 쐻 => 쐻 +C43C 110A 116C 11B7 # 쐼 => 쐼 +C43D 110A 116C 11B8 # 쐽 => 쐽 +C43E 110A 116C 11B9 # 쐾 => 쐾 +C43F 110A 116C 11BA # 쐿 => 쐿 +C440 110A 116C 11BB # 쑀 => 쑀 +C441 110A 116C 11BC # 쑁 => 쑁 +C442 110A 116C 11BD # 쑂 => 쑂 +C443 110A 116C 11BE # 쑃 => 쑃 +C444 110A 116C 11BF # 쑄 => 쑄 +C445 110A 116C 11C0 # 쑅 => 쑅 +C446 110A 116C 11C1 # 쑆 => 쑆 +C447 110A 116C 11C2 # 쑇 => 쑇 +C448 110A 116D # 쑈 => 쑈 +C449 110A 116D 11A8 # 쑉 => 쑉 +C44A 110A 116D 11A9 # 쑊 => 쑊 +C44B 110A 116D 11AA # 쑋 => 쑋 +C44C 110A 116D 11AB # 쑌 => 쑌 +C44D 110A 116D 11AC # 쑍 => 쑍 +C44E 110A 116D 11AD # 쑎 => 쑎 +C44F 110A 116D 11AE # 쑏 => 쑏 +C450 110A 116D 11AF # 쑐 => 쑐 +C451 110A 116D 11B0 # 쑑 => 쑑 +C452 110A 116D 11B1 # 쑒 => 쑒 +C453 110A 116D 11B2 # 쑓 => 쑓 +C454 110A 116D 11B3 # 쑔 => 쑔 +C455 110A 116D 11B4 # 쑕 => 쑕 +C456 110A 116D 11B5 # 쑖 => 쑖 +C457 110A 116D 11B6 # 쑗 => 쑗 +C458 110A 116D 11B7 # 쑘 => 쑘 +C459 110A 116D 11B8 # 쑙 => 쑙 +C45A 110A 116D 11B9 # 쑚 => 쑚 +C45B 110A 116D 11BA # 쑛 => 쑛 +C45C 110A 116D 11BB # 쑜 => 쑜 +C45D 110A 116D 11BC # 쑝 => 쑝 +C45E 110A 116D 11BD # 쑞 => 쑞 +C45F 110A 116D 11BE # 쑟 => 쑟 +C460 110A 116D 11BF # 쑠 => 쑠 +C461 110A 116D 11C0 # 쑡 => 쑡 +C462 110A 116D 11C1 # 쑢 => 쑢 +C463 110A 116D 11C2 # 쑣 => 쑣 +C464 110A 116E # 쑤 => 쑤 +C465 110A 116E 11A8 # 쑥 => 쑥 +C466 110A 116E 11A9 # 쑦 => 쑦 +C467 110A 116E 11AA # 쑧 => 쑧 +C468 110A 116E 11AB # 쑨 => 쑨 +C469 110A 116E 11AC # 쑩 => 쑩 +C46A 110A 116E 11AD # 쑪 => 쑪 +C46B 110A 116E 11AE # 쑫 => 쑫 +C46C 110A 116E 11AF # 쑬 => 쑬 +C46D 110A 116E 11B0 # 쑭 => 쑭 +C46E 110A 116E 11B1 # 쑮 => 쑮 +C46F 110A 116E 11B2 # 쑯 => 쑯 +C470 110A 116E 11B3 # 쑰 => 쑰 +C471 110A 116E 11B4 # 쑱 => 쑱 +C472 110A 116E 11B5 # 쑲 => 쑲 +C473 110A 116E 11B6 # 쑳 => 쑳 +C474 110A 116E 11B7 # 쑴 => 쑴 +C475 110A 116E 11B8 # 쑵 => 쑵 +C476 110A 116E 11B9 # 쑶 => 쑶 +C477 110A 116E 11BA # 쑷 => 쑷 +C478 110A 116E 11BB # 쑸 => 쑸 +C479 110A 116E 11BC # 쑹 => 쑹 +C47A 110A 116E 11BD # 쑺 => 쑺 +C47B 110A 116E 11BE # 쑻 => 쑻 +C47C 110A 116E 11BF # 쑼 => 쑼 +C47D 110A 116E 11C0 # 쑽 => 쑽 +C47E 110A 116E 11C1 # 쑾 => 쑾 +C47F 110A 116E 11C2 # 쑿 => 쑿 +C480 110A 116F # 쒀 => 쒀 +C481 110A 116F 11A8 # 쒁 => 쒁 +C482 110A 116F 11A9 # 쒂 => 쒂 +C483 110A 116F 11AA # 쒃 => 쒃 +C484 110A 116F 11AB # 쒄 => 쒄 +C485 110A 116F 11AC # 쒅 => 쒅 +C486 110A 116F 11AD # 쒆 => 쒆 +C487 110A 116F 11AE # 쒇 => 쒇 +C488 110A 116F 11AF # 쒈 => 쒈 +C489 110A 116F 11B0 # 쒉 => 쒉 +C48A 110A 116F 11B1 # 쒊 => 쒊 +C48B 110A 116F 11B2 # 쒋 => 쒋 +C48C 110A 116F 11B3 # 쒌 => 쒌 +C48D 110A 116F 11B4 # 쒍 => 쒍 +C48E 110A 116F 11B5 # 쒎 => 쒎 +C48F 110A 116F 11B6 # 쒏 => 쒏 +C490 110A 116F 11B7 # 쒐 => 쒐 +C491 110A 116F 11B8 # 쒑 => 쒑 +C492 110A 116F 11B9 # 쒒 => 쒒 +C493 110A 116F 11BA # 쒓 => 쒓 +C494 110A 116F 11BB # 쒔 => 쒔 +C495 110A 116F 11BC # 쒕 => 쒕 +C496 110A 116F 11BD # 쒖 => 쒖 +C497 110A 116F 11BE # 쒗 => 쒗 +C498 110A 116F 11BF # 쒘 => 쒘 +C499 110A 116F 11C0 # 쒙 => 쒙 +C49A 110A 116F 11C1 # 쒚 => 쒚 +C49B 110A 116F 11C2 # 쒛 => 쒛 +C49C 110A 1170 # 쒜 => 쒜 +C49D 110A 1170 11A8 # 쒝 => 쒝 +C49E 110A 1170 11A9 # 쒞 => 쒞 +C49F 110A 1170 11AA # 쒟 => 쒟 +C4A0 110A 1170 11AB # 쒠 => 쒠 +C4A1 110A 1170 11AC # 쒡 => 쒡 +C4A2 110A 1170 11AD # 쒢 => 쒢 +C4A3 110A 1170 11AE # 쒣 => 쒣 +C4A4 110A 1170 11AF # 쒤 => 쒤 +C4A5 110A 1170 11B0 # 쒥 => 쒥 +C4A6 110A 1170 11B1 # 쒦 => 쒦 +C4A7 110A 1170 11B2 # 쒧 => 쒧 +C4A8 110A 1170 11B3 # 쒨 => 쒨 +C4A9 110A 1170 11B4 # 쒩 => 쒩 +C4AA 110A 1170 11B5 # 쒪 => 쒪 +C4AB 110A 1170 11B6 # 쒫 => 쒫 +C4AC 110A 1170 11B7 # 쒬 => 쒬 +C4AD 110A 1170 11B8 # 쒭 => 쒭 +C4AE 110A 1170 11B9 # 쒮 => 쒮 +C4AF 110A 1170 11BA # 쒯 => 쒯 +C4B0 110A 1170 11BB # 쒰 => 쒰 +C4B1 110A 1170 11BC # 쒱 => 쒱 +C4B2 110A 1170 11BD # 쒲 => 쒲 +C4B3 110A 1170 11BE # 쒳 => 쒳 +C4B4 110A 1170 11BF # 쒴 => 쒴 +C4B5 110A 1170 11C0 # 쒵 => 쒵 +C4B6 110A 1170 11C1 # 쒶 => 쒶 +C4B7 110A 1170 11C2 # 쒷 => 쒷 +C4B8 110A 1171 # 쒸 => 쒸 +C4B9 110A 1171 11A8 # 쒹 => 쒹 +C4BA 110A 1171 11A9 # 쒺 => 쒺 +C4BB 110A 1171 11AA # 쒻 => 쒻 +C4BC 110A 1171 11AB # 쒼 => 쒼 +C4BD 110A 1171 11AC # 쒽 => 쒽 +C4BE 110A 1171 11AD # 쒾 => 쒾 +C4BF 110A 1171 11AE # 쒿 => 쒿 +C4C0 110A 1171 11AF # 쓀 => 쓀 +C4C1 110A 1171 11B0 # 쓁 => 쓁 +C4C2 110A 1171 11B1 # 쓂 => 쓂 +C4C3 110A 1171 11B2 # 쓃 => 쓃 +C4C4 110A 1171 11B3 # 쓄 => 쓄 +C4C5 110A 1171 11B4 # 쓅 => 쓅 +C4C6 110A 1171 11B5 # 쓆 => 쓆 +C4C7 110A 1171 11B6 # 쓇 => 쓇 +C4C8 110A 1171 11B7 # 쓈 => 쓈 +C4C9 110A 1171 11B8 # 쓉 => 쓉 +C4CA 110A 1171 11B9 # 쓊 => 쓊 +C4CB 110A 1171 11BA # 쓋 => 쓋 +C4CC 110A 1171 11BB # 쓌 => 쓌 +C4CD 110A 1171 11BC # 쓍 => 쓍 +C4CE 110A 1171 11BD # 쓎 => 쓎 +C4CF 110A 1171 11BE # 쓏 => 쓏 +C4D0 110A 1171 11BF # 쓐 => 쓐 +C4D1 110A 1171 11C0 # 쓑 => 쓑 +C4D2 110A 1171 11C1 # 쓒 => 쓒 +C4D3 110A 1171 11C2 # 쓓 => 쓓 +C4D4 110A 1172 # 쓔 => 쓔 +C4D5 110A 1172 11A8 # 쓕 => 쓕 +C4D6 110A 1172 11A9 # 쓖 => 쓖 +C4D7 110A 1172 11AA # 쓗 => 쓗 +C4D8 110A 1172 11AB # 쓘 => 쓘 +C4D9 110A 1172 11AC # 쓙 => 쓙 +C4DA 110A 1172 11AD # 쓚 => 쓚 +C4DB 110A 1172 11AE # 쓛 => 쓛 +C4DC 110A 1172 11AF # 쓜 => 쓜 +C4DD 110A 1172 11B0 # 쓝 => 쓝 +C4DE 110A 1172 11B1 # 쓞 => 쓞 +C4DF 110A 1172 11B2 # 쓟 => 쓟 +C4E0 110A 1172 11B3 # 쓠 => 쓠 +C4E1 110A 1172 11B4 # 쓡 => 쓡 +C4E2 110A 1172 11B5 # 쓢 => 쓢 +C4E3 110A 1172 11B6 # 쓣 => 쓣 +C4E4 110A 1172 11B7 # 쓤 => 쓤 +C4E5 110A 1172 11B8 # 쓥 => 쓥 +C4E6 110A 1172 11B9 # 쓦 => 쓦 +C4E7 110A 1172 11BA # 쓧 => 쓧 +C4E8 110A 1172 11BB # 쓨 => 쓨 +C4E9 110A 1172 11BC # 쓩 => 쓩 +C4EA 110A 1172 11BD # 쓪 => 쓪 +C4EB 110A 1172 11BE # 쓫 => 쓫 +C4EC 110A 1172 11BF # 쓬 => 쓬 +C4ED 110A 1172 11C0 # 쓭 => 쓭 +C4EE 110A 1172 11C1 # 쓮 => 쓮 +C4EF 110A 1172 11C2 # 쓯 => 쓯 +C4F0 110A 1173 # 쓰 => 쓰 +C4F1 110A 1173 11A8 # 쓱 => 쓱 +C4F2 110A 1173 11A9 # 쓲 => 쓲 +C4F3 110A 1173 11AA # 쓳 => 쓳 +C4F4 110A 1173 11AB # 쓴 => 쓴 +C4F5 110A 1173 11AC # 쓵 => 쓵 +C4F6 110A 1173 11AD # 쓶 => 쓶 +C4F7 110A 1173 11AE # 쓷 => 쓷 +C4F8 110A 1173 11AF # 쓸 => 쓸 +C4F9 110A 1173 11B0 # 쓹 => 쓹 +C4FA 110A 1173 11B1 # 쓺 => 쓺 +C4FB 110A 1173 11B2 # 쓻 => 쓻 +C4FC 110A 1173 11B3 # 쓼 => 쓼 +C4FD 110A 1173 11B4 # 쓽 => 쓽 +C4FE 110A 1173 11B5 # 쓾 => 쓾 +C4FF 110A 1173 11B6 # 쓿 => 쓿 +C500 110A 1173 11B7 # 씀 => 씀 +C501 110A 1173 11B8 # 씁 => 씁 +C502 110A 1173 11B9 # 씂 => 씂 +C503 110A 1173 11BA # 씃 => 씃 +C504 110A 1173 11BB # 씄 => 씄 +C505 110A 1173 11BC # 씅 => 씅 +C506 110A 1173 11BD # 씆 => 씆 +C507 110A 1173 11BE # 씇 => 씇 +C508 110A 1173 11BF # 씈 => 씈 +C509 110A 1173 11C0 # 씉 => 씉 +C50A 110A 1173 11C1 # 씊 => 씊 +C50B 110A 1173 11C2 # 씋 => 씋 +C50C 110A 1174 # 씌 => 씌 +C50D 110A 1174 11A8 # 씍 => 씍 +C50E 110A 1174 11A9 # 씎 => 씎 +C50F 110A 1174 11AA # 씏 => 씏 +C510 110A 1174 11AB # 씐 => 씐 +C511 110A 1174 11AC # 씑 => 씑 +C512 110A 1174 11AD # 씒 => 씒 +C513 110A 1174 11AE # 씓 => 씓 +C514 110A 1174 11AF # 씔 => 씔 +C515 110A 1174 11B0 # 씕 => 씕 +C516 110A 1174 11B1 # 씖 => 씖 +C517 110A 1174 11B2 # 씗 => 씗 +C518 110A 1174 11B3 # 씘 => 씘 +C519 110A 1174 11B4 # 씙 => 씙 +C51A 110A 1174 11B5 # 씚 => 씚 +C51B 110A 1174 11B6 # 씛 => 씛 +C51C 110A 1174 11B7 # 씜 => 씜 +C51D 110A 1174 11B8 # 씝 => 씝 +C51E 110A 1174 11B9 # 씞 => 씞 +C51F 110A 1174 11BA # 씟 => 씟 +C520 110A 1174 11BB # 씠 => 씠 +C521 110A 1174 11BC # 씡 => 씡 +C522 110A 1174 11BD # 씢 => 씢 +C523 110A 1174 11BE # 씣 => 씣 +C524 110A 1174 11BF # 씤 => 씤 +C525 110A 1174 11C0 # 씥 => 씥 +C526 110A 1174 11C1 # 씦 => 씦 +C527 110A 1174 11C2 # 씧 => 씧 +C528 110A 1175 # 씨 => 씨 +C529 110A 1175 11A8 # 씩 => 씩 +C52A 110A 1175 11A9 # 씪 => 씪 +C52B 110A 1175 11AA # 씫 => 씫 +C52C 110A 1175 11AB # 씬 => 씬 +C52D 110A 1175 11AC # 씭 => 씭 +C52E 110A 1175 11AD # 씮 => 씮 +C52F 110A 1175 11AE # 씯 => 씯 +C530 110A 1175 11AF # 씰 => 씰 +C531 110A 1175 11B0 # 씱 => 씱 +C532 110A 1175 11B1 # 씲 => 씲 +C533 110A 1175 11B2 # 씳 => 씳 +C534 110A 1175 11B3 # 씴 => 씴 +C535 110A 1175 11B4 # 씵 => 씵 +C536 110A 1175 11B5 # 씶 => 씶 +C537 110A 1175 11B6 # 씷 => 씷 +C538 110A 1175 11B7 # 씸 => 씸 +C539 110A 1175 11B8 # 씹 => 씹 +C53A 110A 1175 11B9 # 씺 => 씺 +C53B 110A 1175 11BA # 씻 => 씻 +C53C 110A 1175 11BB # 씼 => 씼 +C53D 110A 1175 11BC # 씽 => 씽 +C53E 110A 1175 11BD # 씾 => 씾 +C53F 110A 1175 11BE # 씿 => 씿 +C540 110A 1175 11BF # 앀 => 앀 +C541 110A 1175 11C0 # 앁 => 앁 +C542 110A 1175 11C1 # 앂 => 앂 +C543 110A 1175 11C2 # 앃 => 앃 +C544 110B 1161 # 아 => 아 +C545 110B 1161 11A8 # 악 => 악 +C546 110B 1161 11A9 # 앆 => 앆 +C547 110B 1161 11AA # 앇 => 앇 +C548 110B 1161 11AB # 안 => 안 +C549 110B 1161 11AC # 앉 => 앉 +C54A 110B 1161 11AD # 않 => 않 +C54B 110B 1161 11AE # 앋 => 앋 +C54C 110B 1161 11AF # 알 => 알 +C54D 110B 1161 11B0 # 앍 => 앍 +C54E 110B 1161 11B1 # 앎 => 앎 +C54F 110B 1161 11B2 # 앏 => 앏 +C550 110B 1161 11B3 # 앐 => 앐 +C551 110B 1161 11B4 # 앑 => 앑 +C552 110B 1161 11B5 # 앒 => 앒 +C553 110B 1161 11B6 # 앓 => 앓 +C554 110B 1161 11B7 # 암 => 암 +C555 110B 1161 11B8 # 압 => 압 +C556 110B 1161 11B9 # 앖 => 앖 +C557 110B 1161 11BA # 앗 => 앗 +C558 110B 1161 11BB # 았 => 았 +C559 110B 1161 11BC # 앙 => 앙 +C55A 110B 1161 11BD # 앚 => 앚 +C55B 110B 1161 11BE # 앛 => 앛 +C55C 110B 1161 11BF # 앜 => 앜 +C55D 110B 1161 11C0 # 앝 => 앝 +C55E 110B 1161 11C1 # 앞 => 앞 +C55F 110B 1161 11C2 # 앟 => 앟 +C560 110B 1162 # 애 => 애 +C561 110B 1162 11A8 # 액 => 액 +C562 110B 1162 11A9 # 앢 => 앢 +C563 110B 1162 11AA # 앣 => 앣 +C564 110B 1162 11AB # 앤 => 앤 +C565 110B 1162 11AC # 앥 => 앥 +C566 110B 1162 11AD # 앦 => 앦 +C567 110B 1162 11AE # 앧 => 앧 +C568 110B 1162 11AF # 앨 => 앨 +C569 110B 1162 11B0 # 앩 => 앩 +C56A 110B 1162 11B1 # 앪 => 앪 +C56B 110B 1162 11B2 # 앫 => 앫 +C56C 110B 1162 11B3 # 앬 => 앬 +C56D 110B 1162 11B4 # 앭 => 앭 +C56E 110B 1162 11B5 # 앮 => 앮 +C56F 110B 1162 11B6 # 앯 => 앯 +C570 110B 1162 11B7 # 앰 => 앰 +C571 110B 1162 11B8 # 앱 => 앱 +C572 110B 1162 11B9 # 앲 => 앲 +C573 110B 1162 11BA # 앳 => 앳 +C574 110B 1162 11BB # 앴 => 앴 +C575 110B 1162 11BC # 앵 => 앵 +C576 110B 1162 11BD # 앶 => 앶 +C577 110B 1162 11BE # 앷 => 앷 +C578 110B 1162 11BF # 앸 => 앸 +C579 110B 1162 11C0 # 앹 => 앹 +C57A 110B 1162 11C1 # 앺 => 앺 +C57B 110B 1162 11C2 # 앻 => 앻 +C57C 110B 1163 # 야 => 야 +C57D 110B 1163 11A8 # 약 => 약 +C57E 110B 1163 11A9 # 앾 => 앾 +C57F 110B 1163 11AA # 앿 => 앿 +C580 110B 1163 11AB # 얀 => 얀 +C581 110B 1163 11AC # 얁 => 얁 +C582 110B 1163 11AD # 얂 => 얂 +C583 110B 1163 11AE # 얃 => 얃 +C584 110B 1163 11AF # 얄 => 얄 +C585 110B 1163 11B0 # 얅 => 얅 +C586 110B 1163 11B1 # 얆 => 얆 +C587 110B 1163 11B2 # 얇 => 얇 +C588 110B 1163 11B3 # 얈 => 얈 +C589 110B 1163 11B4 # 얉 => 얉 +C58A 110B 1163 11B5 # 얊 => 얊 +C58B 110B 1163 11B6 # 얋 => 얋 +C58C 110B 1163 11B7 # 얌 => 얌 +C58D 110B 1163 11B8 # 얍 => 얍 +C58E 110B 1163 11B9 # 얎 => 얎 +C58F 110B 1163 11BA # 얏 => 얏 +C590 110B 1163 11BB # 얐 => 얐 +C591 110B 1163 11BC # 양 => 양 +C592 110B 1163 11BD # 얒 => 얒 +C593 110B 1163 11BE # 얓 => 얓 +C594 110B 1163 11BF # 얔 => 얔 +C595 110B 1163 11C0 # 얕 => 얕 +C596 110B 1163 11C1 # 얖 => 얖 +C597 110B 1163 11C2 # 얗 => 얗 +C598 110B 1164 # 얘 => 얘 +C599 110B 1164 11A8 # 얙 => 얙 +C59A 110B 1164 11A9 # 얚 => 얚 +C59B 110B 1164 11AA # 얛 => 얛 +C59C 110B 1164 11AB # 얜 => 얜 +C59D 110B 1164 11AC # 얝 => 얝 +C59E 110B 1164 11AD # 얞 => 얞 +C59F 110B 1164 11AE # 얟 => 얟 +C5A0 110B 1164 11AF # 얠 => 얠 +C5A1 110B 1164 11B0 # 얡 => 얡 +C5A2 110B 1164 11B1 # 얢 => 얢 +C5A3 110B 1164 11B2 # 얣 => 얣 +C5A4 110B 1164 11B3 # 얤 => 얤 +C5A5 110B 1164 11B4 # 얥 => 얥 +C5A6 110B 1164 11B5 # 얦 => 얦 +C5A7 110B 1164 11B6 # 얧 => 얧 +C5A8 110B 1164 11B7 # 얨 => 얨 +C5A9 110B 1164 11B8 # 얩 => 얩 +C5AA 110B 1164 11B9 # 얪 => 얪 +C5AB 110B 1164 11BA # 얫 => 얫 +C5AC 110B 1164 11BB # 얬 => 얬 +C5AD 110B 1164 11BC # 얭 => 얭 +C5AE 110B 1164 11BD # 얮 => 얮 +C5AF 110B 1164 11BE # 얯 => 얯 +C5B0 110B 1164 11BF # 얰 => 얰 +C5B1 110B 1164 11C0 # 얱 => 얱 +C5B2 110B 1164 11C1 # 얲 => 얲 +C5B3 110B 1164 11C2 # 얳 => 얳 +C5B4 110B 1165 # 어 => 어 +C5B5 110B 1165 11A8 # 억 => 억 +C5B6 110B 1165 11A9 # 얶 => 얶 +C5B7 110B 1165 11AA # 얷 => 얷 +C5B8 110B 1165 11AB # 언 => 언 +C5B9 110B 1165 11AC # 얹 => 얹 +C5BA 110B 1165 11AD # 얺 => 얺 +C5BB 110B 1165 11AE # 얻 => 얻 +C5BC 110B 1165 11AF # 얼 => 얼 +C5BD 110B 1165 11B0 # 얽 => 얽 +C5BE 110B 1165 11B1 # 얾 => 얾 +C5BF 110B 1165 11B2 # 얿 => 얿 +C5C0 110B 1165 11B3 # 엀 => 엀 +C5C1 110B 1165 11B4 # 엁 => 엁 +C5C2 110B 1165 11B5 # 엂 => 엂 +C5C3 110B 1165 11B6 # 엃 => 엃 +C5C4 110B 1165 11B7 # 엄 => 엄 +C5C5 110B 1165 11B8 # 업 => 업 +C5C6 110B 1165 11B9 # 없 => 없 +C5C7 110B 1165 11BA # 엇 => 엇 +C5C8 110B 1165 11BB # 었 => 었 +C5C9 110B 1165 11BC # 엉 => 엉 +C5CA 110B 1165 11BD # 엊 => 엊 +C5CB 110B 1165 11BE # 엋 => 엋 +C5CC 110B 1165 11BF # 엌 => 엌 +C5CD 110B 1165 11C0 # 엍 => 엍 +C5CE 110B 1165 11C1 # 엎 => 엎 +C5CF 110B 1165 11C2 # 엏 => 엏 +C5D0 110B 1166 # 에 => 에 +C5D1 110B 1166 11A8 # 엑 => 엑 +C5D2 110B 1166 11A9 # 엒 => 엒 +C5D3 110B 1166 11AA # 엓 => 엓 +C5D4 110B 1166 11AB # 엔 => 엔 +C5D5 110B 1166 11AC # 엕 => 엕 +C5D6 110B 1166 11AD # 엖 => 엖 +C5D7 110B 1166 11AE # 엗 => 엗 +C5D8 110B 1166 11AF # 엘 => 엘 +C5D9 110B 1166 11B0 # 엙 => 엙 +C5DA 110B 1166 11B1 # 엚 => 엚 +C5DB 110B 1166 11B2 # 엛 => 엛 +C5DC 110B 1166 11B3 # 엜 => 엜 +C5DD 110B 1166 11B4 # 엝 => 엝 +C5DE 110B 1166 11B5 # 엞 => 엞 +C5DF 110B 1166 11B6 # 엟 => 엟 +C5E0 110B 1166 11B7 # 엠 => 엠 +C5E1 110B 1166 11B8 # 엡 => 엡 +C5E2 110B 1166 11B9 # 엢 => 엢 +C5E3 110B 1166 11BA # 엣 => 엣 +C5E4 110B 1166 11BB # 엤 => 엤 +C5E5 110B 1166 11BC # 엥 => 엥 +C5E6 110B 1166 11BD # 엦 => 엦 +C5E7 110B 1166 11BE # 엧 => 엧 +C5E8 110B 1166 11BF # 엨 => 엨 +C5E9 110B 1166 11C0 # 엩 => 엩 +C5EA 110B 1166 11C1 # 엪 => 엪 +C5EB 110B 1166 11C2 # 엫 => 엫 +C5EC 110B 1167 # 여 => 여 +C5ED 110B 1167 11A8 # 역 => 역 +C5EE 110B 1167 11A9 # 엮 => 엮 +C5EF 110B 1167 11AA # 엯 => 엯 +C5F0 110B 1167 11AB # 연 => 연 +C5F1 110B 1167 11AC # 엱 => 엱 +C5F2 110B 1167 11AD # 엲 => 엲 +C5F3 110B 1167 11AE # 엳 => 엳 +C5F4 110B 1167 11AF # 열 => 열 +C5F5 110B 1167 11B0 # 엵 => 엵 +C5F6 110B 1167 11B1 # 엶 => 엶 +C5F7 110B 1167 11B2 # 엷 => 엷 +C5F8 110B 1167 11B3 # 엸 => 엸 +C5F9 110B 1167 11B4 # 엹 => 엹 +C5FA 110B 1167 11B5 # 엺 => 엺 +C5FB 110B 1167 11B6 # 엻 => 엻 +C5FC 110B 1167 11B7 # 염 => 염 +C5FD 110B 1167 11B8 # 엽 => 엽 +C5FE 110B 1167 11B9 # 엾 => 엾 +C5FF 110B 1167 11BA # 엿 => 엿 +C600 110B 1167 11BB # 였 => 였 +C601 110B 1167 11BC # 영 => 영 +C602 110B 1167 11BD # 옂 => 옂 +C603 110B 1167 11BE # 옃 => 옃 +C604 110B 1167 11BF # 옄 => 옄 +C605 110B 1167 11C0 # 옅 => 옅 +C606 110B 1167 11C1 # 옆 => 옆 +C607 110B 1167 11C2 # 옇 => 옇 +C608 110B 1168 # 예 => 예 +C609 110B 1168 11A8 # 옉 => 옉 +C60A 110B 1168 11A9 # 옊 => 옊 +C60B 110B 1168 11AA # 옋 => 옋 +C60C 110B 1168 11AB # 옌 => 옌 +C60D 110B 1168 11AC # 옍 => 옍 +C60E 110B 1168 11AD # 옎 => 옎 +C60F 110B 1168 11AE # 옏 => 옏 +C610 110B 1168 11AF # 옐 => 옐 +C611 110B 1168 11B0 # 옑 => 옑 +C612 110B 1168 11B1 # 옒 => 옒 +C613 110B 1168 11B2 # 옓 => 옓 +C614 110B 1168 11B3 # 옔 => 옔 +C615 110B 1168 11B4 # 옕 => 옕 +C616 110B 1168 11B5 # 옖 => 옖 +C617 110B 1168 11B6 # 옗 => 옗 +C618 110B 1168 11B7 # 옘 => 옘 +C619 110B 1168 11B8 # 옙 => 옙 +C61A 110B 1168 11B9 # 옚 => 옚 +C61B 110B 1168 11BA # 옛 => 옛 +C61C 110B 1168 11BB # 옜 => 옜 +C61D 110B 1168 11BC # 옝 => 옝 +C61E 110B 1168 11BD # 옞 => 옞 +C61F 110B 1168 11BE # 옟 => 옟 +C620 110B 1168 11BF # 옠 => 옠 +C621 110B 1168 11C0 # 옡 => 옡 +C622 110B 1168 11C1 # 옢 => 옢 +C623 110B 1168 11C2 # 옣 => 옣 +C624 110B 1169 # 오 => 오 +C625 110B 1169 11A8 # 옥 => 옥 +C626 110B 1169 11A9 # 옦 => 옦 +C627 110B 1169 11AA # 옧 => 옧 +C628 110B 1169 11AB # 온 => 온 +C629 110B 1169 11AC # 옩 => 옩 +C62A 110B 1169 11AD # 옪 => 옪 +C62B 110B 1169 11AE # 옫 => 옫 +C62C 110B 1169 11AF # 올 => 올 +C62D 110B 1169 11B0 # 옭 => 옭 +C62E 110B 1169 11B1 # 옮 => 옮 +C62F 110B 1169 11B2 # 옯 => 옯 +C630 110B 1169 11B3 # 옰 => 옰 +C631 110B 1169 11B4 # 옱 => 옱 +C632 110B 1169 11B5 # 옲 => 옲 +C633 110B 1169 11B6 # 옳 => 옳 +C634 110B 1169 11B7 # 옴 => 옴 +C635 110B 1169 11B8 # 옵 => 옵 +C636 110B 1169 11B9 # 옶 => 옶 +C637 110B 1169 11BA # 옷 => 옷 +C638 110B 1169 11BB # 옸 => 옸 +C639 110B 1169 11BC # 옹 => 옹 +C63A 110B 1169 11BD # 옺 => 옺 +C63B 110B 1169 11BE # 옻 => 옻 +C63C 110B 1169 11BF # 옼 => 옼 +C63D 110B 1169 11C0 # 옽 => 옽 +C63E 110B 1169 11C1 # 옾 => 옾 +C63F 110B 1169 11C2 # 옿 => 옿 +C640 110B 116A # 와 => 와 +C641 110B 116A 11A8 # 왁 => 왁 +C642 110B 116A 11A9 # 왂 => 왂 +C643 110B 116A 11AA # 왃 => 왃 +C644 110B 116A 11AB # 완 => 완 +C645 110B 116A 11AC # 왅 => 왅 +C646 110B 116A 11AD # 왆 => 왆 +C647 110B 116A 11AE # 왇 => 왇 +C648 110B 116A 11AF # 왈 => 왈 +C649 110B 116A 11B0 # 왉 => 왉 +C64A 110B 116A 11B1 # 왊 => 왊 +C64B 110B 116A 11B2 # 왋 => 왋 +C64C 110B 116A 11B3 # 왌 => 왌 +C64D 110B 116A 11B4 # 왍 => 왍 +C64E 110B 116A 11B5 # 왎 => 왎 +C64F 110B 116A 11B6 # 왏 => 왏 +C650 110B 116A 11B7 # 왐 => 왐 +C651 110B 116A 11B8 # 왑 => 왑 +C652 110B 116A 11B9 # 왒 => 왒 +C653 110B 116A 11BA # 왓 => 왓 +C654 110B 116A 11BB # 왔 => 왔 +C655 110B 116A 11BC # 왕 => 왕 +C656 110B 116A 11BD # 왖 => 왖 +C657 110B 116A 11BE # 왗 => 왗 +C658 110B 116A 11BF # 왘 => 왘 +C659 110B 116A 11C0 # 왙 => 왙 +C65A 110B 116A 11C1 # 왚 => 왚 +C65B 110B 116A 11C2 # 왛 => 왛 +C65C 110B 116B # 왜 => 왜 +C65D 110B 116B 11A8 # 왝 => 왝 +C65E 110B 116B 11A9 # 왞 => 왞 +C65F 110B 116B 11AA # 왟 => 왟 +C660 110B 116B 11AB # 왠 => 왠 +C661 110B 116B 11AC # 왡 => 왡 +C662 110B 116B 11AD # 왢 => 왢 +C663 110B 116B 11AE # 왣 => 왣 +C664 110B 116B 11AF # 왤 => 왤 +C665 110B 116B 11B0 # 왥 => 왥 +C666 110B 116B 11B1 # 왦 => 왦 +C667 110B 116B 11B2 # 왧 => 왧 +C668 110B 116B 11B3 # 왨 => 왨 +C669 110B 116B 11B4 # 왩 => 왩 +C66A 110B 116B 11B5 # 왪 => 왪 +C66B 110B 116B 11B6 # 왫 => 왫 +C66C 110B 116B 11B7 # 왬 => 왬 +C66D 110B 116B 11B8 # 왭 => 왭 +C66E 110B 116B 11B9 # 왮 => 왮 +C66F 110B 116B 11BA # 왯 => 왯 +C670 110B 116B 11BB # 왰 => 왰 +C671 110B 116B 11BC # 왱 => 왱 +C672 110B 116B 11BD # 왲 => 왲 +C673 110B 116B 11BE # 왳 => 왳 +C674 110B 116B 11BF # 왴 => 왴 +C675 110B 116B 11C0 # 왵 => 왵 +C676 110B 116B 11C1 # 왶 => 왶 +C677 110B 116B 11C2 # 왷 => 왷 +C678 110B 116C # 외 => 외 +C679 110B 116C 11A8 # 왹 => 왹 +C67A 110B 116C 11A9 # 왺 => 왺 +C67B 110B 116C 11AA # 왻 => 왻 +C67C 110B 116C 11AB # 왼 => 왼 +C67D 110B 116C 11AC # 왽 => 왽 +C67E 110B 116C 11AD # 왾 => 왾 +C67F 110B 116C 11AE # 왿 => 왿 +C680 110B 116C 11AF # 욀 => 욀 +C681 110B 116C 11B0 # 욁 => 욁 +C682 110B 116C 11B1 # 욂 => 욂 +C683 110B 116C 11B2 # 욃 => 욃 +C684 110B 116C 11B3 # 욄 => 욄 +C685 110B 116C 11B4 # 욅 => 욅 +C686 110B 116C 11B5 # 욆 => 욆 +C687 110B 116C 11B6 # 욇 => 욇 +C688 110B 116C 11B7 # 욈 => 욈 +C689 110B 116C 11B8 # 욉 => 욉 +C68A 110B 116C 11B9 # 욊 => 욊 +C68B 110B 116C 11BA # 욋 => 욋 +C68C 110B 116C 11BB # 욌 => 욌 +C68D 110B 116C 11BC # 욍 => 욍 +C68E 110B 116C 11BD # 욎 => 욎 +C68F 110B 116C 11BE # 욏 => 욏 +C690 110B 116C 11BF # 욐 => 욐 +C691 110B 116C 11C0 # 욑 => 욑 +C692 110B 116C 11C1 # 욒 => 욒 +C693 110B 116C 11C2 # 욓 => 욓 +C694 110B 116D # 요 => 요 +C695 110B 116D 11A8 # 욕 => 욕 +C696 110B 116D 11A9 # 욖 => 욖 +C697 110B 116D 11AA # 욗 => 욗 +C698 110B 116D 11AB # 욘 => 욘 +C699 110B 116D 11AC # 욙 => 욙 +C69A 110B 116D 11AD # 욚 => 욚 +C69B 110B 116D 11AE # 욛 => 욛 +C69C 110B 116D 11AF # 욜 => 욜 +C69D 110B 116D 11B0 # 욝 => 욝 +C69E 110B 116D 11B1 # 욞 => 욞 +C69F 110B 116D 11B2 # 욟 => 욟 +C6A0 110B 116D 11B3 # 욠 => 욠 +C6A1 110B 116D 11B4 # 욡 => 욡 +C6A2 110B 116D 11B5 # 욢 => 욢 +C6A3 110B 116D 11B6 # 욣 => 욣 +C6A4 110B 116D 11B7 # 욤 => 욤 +C6A5 110B 116D 11B8 # 욥 => 욥 +C6A6 110B 116D 11B9 # 욦 => 욦 +C6A7 110B 116D 11BA # 욧 => 욧 +C6A8 110B 116D 11BB # 욨 => 욨 +C6A9 110B 116D 11BC # 용 => 용 +C6AA 110B 116D 11BD # 욪 => 욪 +C6AB 110B 116D 11BE # 욫 => 욫 +C6AC 110B 116D 11BF # 욬 => 욬 +C6AD 110B 116D 11C0 # 욭 => 욭 +C6AE 110B 116D 11C1 # 욮 => 욮 +C6AF 110B 116D 11C2 # 욯 => 욯 +C6B0 110B 116E # 우 => 우 +C6B1 110B 116E 11A8 # 욱 => 욱 +C6B2 110B 116E 11A9 # 욲 => 욲 +C6B3 110B 116E 11AA # 욳 => 욳 +C6B4 110B 116E 11AB # 운 => 운 +C6B5 110B 116E 11AC # 욵 => 욵 +C6B6 110B 116E 11AD # 욶 => 욶 +C6B7 110B 116E 11AE # 욷 => 욷 +C6B8 110B 116E 11AF # 울 => 울 +C6B9 110B 116E 11B0 # 욹 => 욹 +C6BA 110B 116E 11B1 # 욺 => 욺 +C6BB 110B 116E 11B2 # 욻 => 욻 +C6BC 110B 116E 11B3 # 욼 => 욼 +C6BD 110B 116E 11B4 # 욽 => 욽 +C6BE 110B 116E 11B5 # 욾 => 욾 +C6BF 110B 116E 11B6 # 욿 => 욿 +C6C0 110B 116E 11B7 # 움 => 움 +C6C1 110B 116E 11B8 # 웁 => 웁 +C6C2 110B 116E 11B9 # 웂 => 웂 +C6C3 110B 116E 11BA # 웃 => 웃 +C6C4 110B 116E 11BB # 웄 => 웄 +C6C5 110B 116E 11BC # 웅 => 웅 +C6C6 110B 116E 11BD # 웆 => 웆 +C6C7 110B 116E 11BE # 웇 => 웇 +C6C8 110B 116E 11BF # 웈 => 웈 +C6C9 110B 116E 11C0 # 웉 => 웉 +C6CA 110B 116E 11C1 # 웊 => 웊 +C6CB 110B 116E 11C2 # 웋 => 웋 +C6CC 110B 116F # 워 => 워 +C6CD 110B 116F 11A8 # 웍 => 웍 +C6CE 110B 116F 11A9 # 웎 => 웎 +C6CF 110B 116F 11AA # 웏 => 웏 +C6D0 110B 116F 11AB # 원 => 원 +C6D1 110B 116F 11AC # 웑 => 웑 +C6D2 110B 116F 11AD # 웒 => 웒 +C6D3 110B 116F 11AE # 웓 => 웓 +C6D4 110B 116F 11AF # 월 => 월 +C6D5 110B 116F 11B0 # 웕 => 웕 +C6D6 110B 116F 11B1 # 웖 => 웖 +C6D7 110B 116F 11B2 # 웗 => 웗 +C6D8 110B 116F 11B3 # 웘 => 웘 +C6D9 110B 116F 11B4 # 웙 => 웙 +C6DA 110B 116F 11B5 # 웚 => 웚 +C6DB 110B 116F 11B6 # 웛 => 웛 +C6DC 110B 116F 11B7 # 웜 => 웜 +C6DD 110B 116F 11B8 # 웝 => 웝 +C6DE 110B 116F 11B9 # 웞 => 웞 +C6DF 110B 116F 11BA # 웟 => 웟 +C6E0 110B 116F 11BB # 웠 => 웠 +C6E1 110B 116F 11BC # 웡 => 웡 +C6E2 110B 116F 11BD # 웢 => 웢 +C6E3 110B 116F 11BE # 웣 => 웣 +C6E4 110B 116F 11BF # 웤 => 웤 +C6E5 110B 116F 11C0 # 웥 => 웥 +C6E6 110B 116F 11C1 # 웦 => 웦 +C6E7 110B 116F 11C2 # 웧 => 웧 +C6E8 110B 1170 # 웨 => 웨 +C6E9 110B 1170 11A8 # 웩 => 웩 +C6EA 110B 1170 11A9 # 웪 => 웪 +C6EB 110B 1170 11AA # 웫 => 웫 +C6EC 110B 1170 11AB # 웬 => 웬 +C6ED 110B 1170 11AC # 웭 => 웭 +C6EE 110B 1170 11AD # 웮 => 웮 +C6EF 110B 1170 11AE # 웯 => 웯 +C6F0 110B 1170 11AF # 웰 => 웰 +C6F1 110B 1170 11B0 # 웱 => 웱 +C6F2 110B 1170 11B1 # 웲 => 웲 +C6F3 110B 1170 11B2 # 웳 => 웳 +C6F4 110B 1170 11B3 # 웴 => 웴 +C6F5 110B 1170 11B4 # 웵 => 웵 +C6F6 110B 1170 11B5 # 웶 => 웶 +C6F7 110B 1170 11B6 # 웷 => 웷 +C6F8 110B 1170 11B7 # 웸 => 웸 +C6F9 110B 1170 11B8 # 웹 => 웹 +C6FA 110B 1170 11B9 # 웺 => 웺 +C6FB 110B 1170 11BA # 웻 => 웻 +C6FC 110B 1170 11BB # 웼 => 웼 +C6FD 110B 1170 11BC # 웽 => 웽 +C6FE 110B 1170 11BD # 웾 => 웾 +C6FF 110B 1170 11BE # 웿 => 웿 +C700 110B 1170 11BF # 윀 => 윀 +C701 110B 1170 11C0 # 윁 => 윁 +C702 110B 1170 11C1 # 윂 => 윂 +C703 110B 1170 11C2 # 윃 => 윃 +C704 110B 1171 # 위 => 위 +C705 110B 1171 11A8 # 윅 => 윅 +C706 110B 1171 11A9 # 윆 => 윆 +C707 110B 1171 11AA # 윇 => 윇 +C708 110B 1171 11AB # 윈 => 윈 +C709 110B 1171 11AC # 윉 => 윉 +C70A 110B 1171 11AD # 윊 => 윊 +C70B 110B 1171 11AE # 윋 => 윋 +C70C 110B 1171 11AF # 윌 => 윌 +C70D 110B 1171 11B0 # 윍 => 윍 +C70E 110B 1171 11B1 # 윎 => 윎 +C70F 110B 1171 11B2 # 윏 => 윏 +C710 110B 1171 11B3 # 윐 => 윐 +C711 110B 1171 11B4 # 윑 => 윑 +C712 110B 1171 11B5 # 윒 => 윒 +C713 110B 1171 11B6 # 윓 => 윓 +C714 110B 1171 11B7 # 윔 => 윔 +C715 110B 1171 11B8 # 윕 => 윕 +C716 110B 1171 11B9 # 윖 => 윖 +C717 110B 1171 11BA # 윗 => 윗 +C718 110B 1171 11BB # 윘 => 윘 +C719 110B 1171 11BC # 윙 => 윙 +C71A 110B 1171 11BD # 윚 => 윚 +C71B 110B 1171 11BE # 윛 => 윛 +C71C 110B 1171 11BF # 윜 => 윜 +C71D 110B 1171 11C0 # 윝 => 윝 +C71E 110B 1171 11C1 # 윞 => 윞 +C71F 110B 1171 11C2 # 윟 => 윟 +C720 110B 1172 # 유 => 유 +C721 110B 1172 11A8 # 육 => 육 +C722 110B 1172 11A9 # 윢 => 윢 +C723 110B 1172 11AA # 윣 => 윣 +C724 110B 1172 11AB # 윤 => 윤 +C725 110B 1172 11AC # 윥 => 윥 +C726 110B 1172 11AD # 윦 => 윦 +C727 110B 1172 11AE # 윧 => 윧 +C728 110B 1172 11AF # 율 => 율 +C729 110B 1172 11B0 # 윩 => 윩 +C72A 110B 1172 11B1 # 윪 => 윪 +C72B 110B 1172 11B2 # 윫 => 윫 +C72C 110B 1172 11B3 # 윬 => 윬 +C72D 110B 1172 11B4 # 윭 => 윭 +C72E 110B 1172 11B5 # 윮 => 윮 +C72F 110B 1172 11B6 # 윯 => 윯 +C730 110B 1172 11B7 # 윰 => 윰 +C731 110B 1172 11B8 # 윱 => 윱 +C732 110B 1172 11B9 # 윲 => 윲 +C733 110B 1172 11BA # 윳 => 윳 +C734 110B 1172 11BB # 윴 => 윴 +C735 110B 1172 11BC # 융 => 융 +C736 110B 1172 11BD # 윶 => 윶 +C737 110B 1172 11BE # 윷 => 윷 +C738 110B 1172 11BF # 윸 => 윸 +C739 110B 1172 11C0 # 윹 => 윹 +C73A 110B 1172 11C1 # 윺 => 윺 +C73B 110B 1172 11C2 # 윻 => 윻 +C73C 110B 1173 # 으 => 으 +C73D 110B 1173 11A8 # 윽 => 윽 +C73E 110B 1173 11A9 # 윾 => 윾 +C73F 110B 1173 11AA # 윿 => 윿 +C740 110B 1173 11AB # 은 => 은 +C741 110B 1173 11AC # 읁 => 읁 +C742 110B 1173 11AD # 읂 => 읂 +C743 110B 1173 11AE # 읃 => 읃 +C744 110B 1173 11AF # 을 => 을 +C745 110B 1173 11B0 # 읅 => 읅 +C746 110B 1173 11B1 # 읆 => 읆 +C747 110B 1173 11B2 # 읇 => 읇 +C748 110B 1173 11B3 # 읈 => 읈 +C749 110B 1173 11B4 # 읉 => 읉 +C74A 110B 1173 11B5 # 읊 => 읊 +C74B 110B 1173 11B6 # 읋 => 읋 +C74C 110B 1173 11B7 # 음 => 음 +C74D 110B 1173 11B8 # 읍 => 읍 +C74E 110B 1173 11B9 # 읎 => 읎 +C74F 110B 1173 11BA # 읏 => 읏 +C750 110B 1173 11BB # 읐 => 읐 +C751 110B 1173 11BC # 응 => 응 +C752 110B 1173 11BD # 읒 => 읒 +C753 110B 1173 11BE # 읓 => 읓 +C754 110B 1173 11BF # 읔 => 읔 +C755 110B 1173 11C0 # 읕 => 읕 +C756 110B 1173 11C1 # 읖 => 읖 +C757 110B 1173 11C2 # 읗 => 읗 +C758 110B 1174 # 의 => 의 +C759 110B 1174 11A8 # 읙 => 읙 +C75A 110B 1174 11A9 # 읚 => 읚 +C75B 110B 1174 11AA # 읛 => 읛 +C75C 110B 1174 11AB # 읜 => 읜 +C75D 110B 1174 11AC # 읝 => 읝 +C75E 110B 1174 11AD # 읞 => 읞 +C75F 110B 1174 11AE # 읟 => 읟 +C760 110B 1174 11AF # 읠 => 읠 +C761 110B 1174 11B0 # 읡 => 읡 +C762 110B 1174 11B1 # 읢 => 읢 +C763 110B 1174 11B2 # 읣 => 읣 +C764 110B 1174 11B3 # 읤 => 읤 +C765 110B 1174 11B4 # 읥 => 읥 +C766 110B 1174 11B5 # 읦 => 읦 +C767 110B 1174 11B6 # 읧 => 읧 +C768 110B 1174 11B7 # 읨 => 읨 +C769 110B 1174 11B8 # 읩 => 읩 +C76A 110B 1174 11B9 # 읪 => 읪 +C76B 110B 1174 11BA # 읫 => 읫 +C76C 110B 1174 11BB # 읬 => 읬 +C76D 110B 1174 11BC # 읭 => 읭 +C76E 110B 1174 11BD # 읮 => 읮 +C76F 110B 1174 11BE # 읯 => 읯 +C770 110B 1174 11BF # 읰 => 읰 +C771 110B 1174 11C0 # 읱 => 읱 +C772 110B 1174 11C1 # 읲 => 읲 +C773 110B 1174 11C2 # 읳 => 읳 +C774 110B 1175 # 이 => 이 +C775 110B 1175 11A8 # 익 => 익 +C776 110B 1175 11A9 # 읶 => 읶 +C777 110B 1175 11AA # 읷 => 읷 +C778 110B 1175 11AB # 인 => 인 +C779 110B 1175 11AC # 읹 => 읹 +C77A 110B 1175 11AD # 읺 => 읺 +C77B 110B 1175 11AE # 읻 => 읻 +C77C 110B 1175 11AF # 일 => 일 +C77D 110B 1175 11B0 # 읽 => 읽 +C77E 110B 1175 11B1 # 읾 => 읾 +C77F 110B 1175 11B2 # 읿 => 읿 +C780 110B 1175 11B3 # 잀 => 잀 +C781 110B 1175 11B4 # 잁 => 잁 +C782 110B 1175 11B5 # 잂 => 잂 +C783 110B 1175 11B6 # 잃 => 잃 +C784 110B 1175 11B7 # 임 => 임 +C785 110B 1175 11B8 # 입 => 입 +C786 110B 1175 11B9 # 잆 => 잆 +C787 110B 1175 11BA # 잇 => 잇 +C788 110B 1175 11BB # 있 => 있 +C789 110B 1175 11BC # 잉 => 잉 +C78A 110B 1175 11BD # 잊 => 잊 +C78B 110B 1175 11BE # 잋 => 잋 +C78C 110B 1175 11BF # 잌 => 잌 +C78D 110B 1175 11C0 # 잍 => 잍 +C78E 110B 1175 11C1 # 잎 => 잎 +C78F 110B 1175 11C2 # 잏 => 잏 +C790 110C 1161 # 자 => 자 +C791 110C 1161 11A8 # 작 => 작 +C792 110C 1161 11A9 # 잒 => 잒 +C793 110C 1161 11AA # 잓 => 잓 +C794 110C 1161 11AB # 잔 => 잔 +C795 110C 1161 11AC # 잕 => 잕 +C796 110C 1161 11AD # 잖 => 잖 +C797 110C 1161 11AE # 잗 => 잗 +C798 110C 1161 11AF # 잘 => 잘 +C799 110C 1161 11B0 # 잙 => 잙 +C79A 110C 1161 11B1 # 잚 => 잚 +C79B 110C 1161 11B2 # 잛 => 잛 +C79C 110C 1161 11B3 # 잜 => 잜 +C79D 110C 1161 11B4 # 잝 => 잝 +C79E 110C 1161 11B5 # 잞 => 잞 +C79F 110C 1161 11B6 # 잟 => 잟 +C7A0 110C 1161 11B7 # 잠 => 잠 +C7A1 110C 1161 11B8 # 잡 => 잡 +C7A2 110C 1161 11B9 # 잢 => 잢 +C7A3 110C 1161 11BA # 잣 => 잣 +C7A4 110C 1161 11BB # 잤 => 잤 +C7A5 110C 1161 11BC # 장 => 장 +C7A6 110C 1161 11BD # 잦 => 잦 +C7A7 110C 1161 11BE # 잧 => 잧 +C7A8 110C 1161 11BF # 잨 => 잨 +C7A9 110C 1161 11C0 # 잩 => 잩 +C7AA 110C 1161 11C1 # 잪 => 잪 +C7AB 110C 1161 11C2 # 잫 => 잫 +C7AC 110C 1162 # 재 => 재 +C7AD 110C 1162 11A8 # 잭 => 잭 +C7AE 110C 1162 11A9 # 잮 => 잮 +C7AF 110C 1162 11AA # 잯 => 잯 +C7B0 110C 1162 11AB # 잰 => 잰 +C7B1 110C 1162 11AC # 잱 => 잱 +C7B2 110C 1162 11AD # 잲 => 잲 +C7B3 110C 1162 11AE # 잳 => 잳 +C7B4 110C 1162 11AF # 잴 => 잴 +C7B5 110C 1162 11B0 # 잵 => 잵 +C7B6 110C 1162 11B1 # 잶 => 잶 +C7B7 110C 1162 11B2 # 잷 => 잷 +C7B8 110C 1162 11B3 # 잸 => 잸 +C7B9 110C 1162 11B4 # 잹 => 잹 +C7BA 110C 1162 11B5 # 잺 => 잺 +C7BB 110C 1162 11B6 # 잻 => 잻 +C7BC 110C 1162 11B7 # 잼 => 잼 +C7BD 110C 1162 11B8 # 잽 => 잽 +C7BE 110C 1162 11B9 # 잾 => 잾 +C7BF 110C 1162 11BA # 잿 => 잿 +C7C0 110C 1162 11BB # 쟀 => 쟀 +C7C1 110C 1162 11BC # 쟁 => 쟁 +C7C2 110C 1162 11BD # 쟂 => 쟂 +C7C3 110C 1162 11BE # 쟃 => 쟃 +C7C4 110C 1162 11BF # 쟄 => 쟄 +C7C5 110C 1162 11C0 # 쟅 => 쟅 +C7C6 110C 1162 11C1 # 쟆 => 쟆 +C7C7 110C 1162 11C2 # 쟇 => 쟇 +C7C8 110C 1163 # 쟈 => 쟈 +C7C9 110C 1163 11A8 # 쟉 => 쟉 +C7CA 110C 1163 11A9 # 쟊 => 쟊 +C7CB 110C 1163 11AA # 쟋 => 쟋 +C7CC 110C 1163 11AB # 쟌 => 쟌 +C7CD 110C 1163 11AC # 쟍 => 쟍 +C7CE 110C 1163 11AD # 쟎 => 쟎 +C7CF 110C 1163 11AE # 쟏 => 쟏 +C7D0 110C 1163 11AF # 쟐 => 쟐 +C7D1 110C 1163 11B0 # 쟑 => 쟑 +C7D2 110C 1163 11B1 # 쟒 => 쟒 +C7D3 110C 1163 11B2 # 쟓 => 쟓 +C7D4 110C 1163 11B3 # 쟔 => 쟔 +C7D5 110C 1163 11B4 # 쟕 => 쟕 +C7D6 110C 1163 11B5 # 쟖 => 쟖 +C7D7 110C 1163 11B6 # 쟗 => 쟗 +C7D8 110C 1163 11B7 # 쟘 => 쟘 +C7D9 110C 1163 11B8 # 쟙 => 쟙 +C7DA 110C 1163 11B9 # 쟚 => 쟚 +C7DB 110C 1163 11BA # 쟛 => 쟛 +C7DC 110C 1163 11BB # 쟜 => 쟜 +C7DD 110C 1163 11BC # 쟝 => 쟝 +C7DE 110C 1163 11BD # 쟞 => 쟞 +C7DF 110C 1163 11BE # 쟟 => 쟟 +C7E0 110C 1163 11BF # 쟠 => 쟠 +C7E1 110C 1163 11C0 # 쟡 => 쟡 +C7E2 110C 1163 11C1 # 쟢 => 쟢 +C7E3 110C 1163 11C2 # 쟣 => 쟣 +C7E4 110C 1164 # 쟤 => 쟤 +C7E5 110C 1164 11A8 # 쟥 => 쟥 +C7E6 110C 1164 11A9 # 쟦 => 쟦 +C7E7 110C 1164 11AA # 쟧 => 쟧 +C7E8 110C 1164 11AB # 쟨 => 쟨 +C7E9 110C 1164 11AC # 쟩 => 쟩 +C7EA 110C 1164 11AD # 쟪 => 쟪 +C7EB 110C 1164 11AE # 쟫 => 쟫 +C7EC 110C 1164 11AF # 쟬 => 쟬 +C7ED 110C 1164 11B0 # 쟭 => 쟭 +C7EE 110C 1164 11B1 # 쟮 => 쟮 +C7EF 110C 1164 11B2 # 쟯 => 쟯 +C7F0 110C 1164 11B3 # 쟰 => 쟰 +C7F1 110C 1164 11B4 # 쟱 => 쟱 +C7F2 110C 1164 11B5 # 쟲 => 쟲 +C7F3 110C 1164 11B6 # 쟳 => 쟳 +C7F4 110C 1164 11B7 # 쟴 => 쟴 +C7F5 110C 1164 11B8 # 쟵 => 쟵 +C7F6 110C 1164 11B9 # 쟶 => 쟶 +C7F7 110C 1164 11BA # 쟷 => 쟷 +C7F8 110C 1164 11BB # 쟸 => 쟸 +C7F9 110C 1164 11BC # 쟹 => 쟹 +C7FA 110C 1164 11BD # 쟺 => 쟺 +C7FB 110C 1164 11BE # 쟻 => 쟻 +C7FC 110C 1164 11BF # 쟼 => 쟼 +C7FD 110C 1164 11C0 # 쟽 => 쟽 +C7FE 110C 1164 11C1 # 쟾 => 쟾 +C7FF 110C 1164 11C2 # 쟿 => 쟿 +C800 110C 1165 # 저 => 저 +C801 110C 1165 11A8 # 적 => 적 +C802 110C 1165 11A9 # 젂 => 젂 +C803 110C 1165 11AA # 젃 => 젃 +C804 110C 1165 11AB # 전 => 전 +C805 110C 1165 11AC # 젅 => 젅 +C806 110C 1165 11AD # 젆 => 젆 +C807 110C 1165 11AE # 젇 => 젇 +C808 110C 1165 11AF # 절 => 절 +C809 110C 1165 11B0 # 젉 => 젉 +C80A 110C 1165 11B1 # 젊 => 젊 +C80B 110C 1165 11B2 # 젋 => 젋 +C80C 110C 1165 11B3 # 젌 => 젌 +C80D 110C 1165 11B4 # 젍 => 젍 +C80E 110C 1165 11B5 # 젎 => 젎 +C80F 110C 1165 11B6 # 젏 => 젏 +C810 110C 1165 11B7 # 점 => 점 +C811 110C 1165 11B8 # 접 => 접 +C812 110C 1165 11B9 # 젒 => 젒 +C813 110C 1165 11BA # 젓 => 젓 +C814 110C 1165 11BB # 젔 => 젔 +C815 110C 1165 11BC # 정 => 정 +C816 110C 1165 11BD # 젖 => 젖 +C817 110C 1165 11BE # 젗 => 젗 +C818 110C 1165 11BF # 젘 => 젘 +C819 110C 1165 11C0 # 젙 => 젙 +C81A 110C 1165 11C1 # 젚 => 젚 +C81B 110C 1165 11C2 # 젛 => 젛 +C81C 110C 1166 # 제 => 제 +C81D 110C 1166 11A8 # 젝 => 젝 +C81E 110C 1166 11A9 # 젞 => 젞 +C81F 110C 1166 11AA # 젟 => 젟 +C820 110C 1166 11AB # 젠 => 젠 +C821 110C 1166 11AC # 젡 => 젡 +C822 110C 1166 11AD # 젢 => 젢 +C823 110C 1166 11AE # 젣 => 젣 +C824 110C 1166 11AF # 젤 => 젤 +C825 110C 1166 11B0 # 젥 => 젥 +C826 110C 1166 11B1 # 젦 => 젦 +C827 110C 1166 11B2 # 젧 => 젧 +C828 110C 1166 11B3 # 젨 => 젨 +C829 110C 1166 11B4 # 젩 => 젩 +C82A 110C 1166 11B5 # 젪 => 젪 +C82B 110C 1166 11B6 # 젫 => 젫 +C82C 110C 1166 11B7 # 젬 => 젬 +C82D 110C 1166 11B8 # 젭 => 젭 +C82E 110C 1166 11B9 # 젮 => 젮 +C82F 110C 1166 11BA # 젯 => 젯 +C830 110C 1166 11BB # 젰 => 젰 +C831 110C 1166 11BC # 젱 => 젱 +C832 110C 1166 11BD # 젲 => 젲 +C833 110C 1166 11BE # 젳 => 젳 +C834 110C 1166 11BF # 젴 => 젴 +C835 110C 1166 11C0 # 젵 => 젵 +C836 110C 1166 11C1 # 젶 => 젶 +C837 110C 1166 11C2 # 젷 => 젷 +C838 110C 1167 # 져 => 져 +C839 110C 1167 11A8 # 젹 => 젹 +C83A 110C 1167 11A9 # 젺 => 젺 +C83B 110C 1167 11AA # 젻 => 젻 +C83C 110C 1167 11AB # 젼 => 젼 +C83D 110C 1167 11AC # 젽 => 젽 +C83E 110C 1167 11AD # 젾 => 젾 +C83F 110C 1167 11AE # 젿 => 젿 +C840 110C 1167 11AF # 졀 => 졀 +C841 110C 1167 11B0 # 졁 => 졁 +C842 110C 1167 11B1 # 졂 => 졂 +C843 110C 1167 11B2 # 졃 => 졃 +C844 110C 1167 11B3 # 졄 => 졄 +C845 110C 1167 11B4 # 졅 => 졅 +C846 110C 1167 11B5 # 졆 => 졆 +C847 110C 1167 11B6 # 졇 => 졇 +C848 110C 1167 11B7 # 졈 => 졈 +C849 110C 1167 11B8 # 졉 => 졉 +C84A 110C 1167 11B9 # 졊 => 졊 +C84B 110C 1167 11BA # 졋 => 졋 +C84C 110C 1167 11BB # 졌 => 졌 +C84D 110C 1167 11BC # 졍 => 졍 +C84E 110C 1167 11BD # 졎 => 졎 +C84F 110C 1167 11BE # 졏 => 졏 +C850 110C 1167 11BF # 졐 => 졐 +C851 110C 1167 11C0 # 졑 => 졑 +C852 110C 1167 11C1 # 졒 => 졒 +C853 110C 1167 11C2 # 졓 => 졓 +C854 110C 1168 # 졔 => 졔 +C855 110C 1168 11A8 # 졕 => 졕 +C856 110C 1168 11A9 # 졖 => 졖 +C857 110C 1168 11AA # 졗 => 졗 +C858 110C 1168 11AB # 졘 => 졘 +C859 110C 1168 11AC # 졙 => 졙 +C85A 110C 1168 11AD # 졚 => 졚 +C85B 110C 1168 11AE # 졛 => 졛 +C85C 110C 1168 11AF # 졜 => 졜 +C85D 110C 1168 11B0 # 졝 => 졝 +C85E 110C 1168 11B1 # 졞 => 졞 +C85F 110C 1168 11B2 # 졟 => 졟 +C860 110C 1168 11B3 # 졠 => 졠 +C861 110C 1168 11B4 # 졡 => 졡 +C862 110C 1168 11B5 # 졢 => 졢 +C863 110C 1168 11B6 # 졣 => 졣 +C864 110C 1168 11B7 # 졤 => 졤 +C865 110C 1168 11B8 # 졥 => 졥 +C866 110C 1168 11B9 # 졦 => 졦 +C867 110C 1168 11BA # 졧 => 졧 +C868 110C 1168 11BB # 졨 => 졨 +C869 110C 1168 11BC # 졩 => 졩 +C86A 110C 1168 11BD # 졪 => 졪 +C86B 110C 1168 11BE # 졫 => 졫 +C86C 110C 1168 11BF # 졬 => 졬 +C86D 110C 1168 11C0 # 졭 => 졭 +C86E 110C 1168 11C1 # 졮 => 졮 +C86F 110C 1168 11C2 # 졯 => 졯 +C870 110C 1169 # 조 => 조 +C871 110C 1169 11A8 # 족 => 족 +C872 110C 1169 11A9 # 졲 => 졲 +C873 110C 1169 11AA # 졳 => 졳 +C874 110C 1169 11AB # 존 => 존 +C875 110C 1169 11AC # 졵 => 졵 +C876 110C 1169 11AD # 졶 => 졶 +C877 110C 1169 11AE # 졷 => 졷 +C878 110C 1169 11AF # 졸 => 졸 +C879 110C 1169 11B0 # 졹 => 졹 +C87A 110C 1169 11B1 # 졺 => 졺 +C87B 110C 1169 11B2 # 졻 => 졻 +C87C 110C 1169 11B3 # 졼 => 졼 +C87D 110C 1169 11B4 # 졽 => 졽 +C87E 110C 1169 11B5 # 졾 => 졾 +C87F 110C 1169 11B6 # 졿 => 졿 +C880 110C 1169 11B7 # 좀 => 좀 +C881 110C 1169 11B8 # 좁 => 좁 +C882 110C 1169 11B9 # 좂 => 좂 +C883 110C 1169 11BA # 좃 => 좃 +C884 110C 1169 11BB # 좄 => 좄 +C885 110C 1169 11BC # 종 => 종 +C886 110C 1169 11BD # 좆 => 좆 +C887 110C 1169 11BE # 좇 => 좇 +C888 110C 1169 11BF # 좈 => 좈 +C889 110C 1169 11C0 # 좉 => 좉 +C88A 110C 1169 11C1 # 좊 => 좊 +C88B 110C 1169 11C2 # 좋 => 좋 +C88C 110C 116A # 좌 => 좌 +C88D 110C 116A 11A8 # 좍 => 좍 +C88E 110C 116A 11A9 # 좎 => 좎 +C88F 110C 116A 11AA # 좏 => 좏 +C890 110C 116A 11AB # 좐 => 좐 +C891 110C 116A 11AC # 좑 => 좑 +C892 110C 116A 11AD # 좒 => 좒 +C893 110C 116A 11AE # 좓 => 좓 +C894 110C 116A 11AF # 좔 => 좔 +C895 110C 116A 11B0 # 좕 => 좕 +C896 110C 116A 11B1 # 좖 => 좖 +C897 110C 116A 11B2 # 좗 => 좗 +C898 110C 116A 11B3 # 좘 => 좘 +C899 110C 116A 11B4 # 좙 => 좙 +C89A 110C 116A 11B5 # 좚 => 좚 +C89B 110C 116A 11B6 # 좛 => 좛 +C89C 110C 116A 11B7 # 좜 => 좜 +C89D 110C 116A 11B8 # 좝 => 좝 +C89E 110C 116A 11B9 # 좞 => 좞 +C89F 110C 116A 11BA # 좟 => 좟 +C8A0 110C 116A 11BB # 좠 => 좠 +C8A1 110C 116A 11BC # 좡 => 좡 +C8A2 110C 116A 11BD # 좢 => 좢 +C8A3 110C 116A 11BE # 좣 => 좣 +C8A4 110C 116A 11BF # 좤 => 좤 +C8A5 110C 116A 11C0 # 좥 => 좥 +C8A6 110C 116A 11C1 # 좦 => 좦 +C8A7 110C 116A 11C2 # 좧 => 좧 +C8A8 110C 116B # 좨 => 좨 +C8A9 110C 116B 11A8 # 좩 => 좩 +C8AA 110C 116B 11A9 # 좪 => 좪 +C8AB 110C 116B 11AA # 좫 => 좫 +C8AC 110C 116B 11AB # 좬 => 좬 +C8AD 110C 116B 11AC # 좭 => 좭 +C8AE 110C 116B 11AD # 좮 => 좮 +C8AF 110C 116B 11AE # 좯 => 좯 +C8B0 110C 116B 11AF # 좰 => 좰 +C8B1 110C 116B 11B0 # 좱 => 좱 +C8B2 110C 116B 11B1 # 좲 => 좲 +C8B3 110C 116B 11B2 # 좳 => 좳 +C8B4 110C 116B 11B3 # 좴 => 좴 +C8B5 110C 116B 11B4 # 좵 => 좵 +C8B6 110C 116B 11B5 # 좶 => 좶 +C8B7 110C 116B 11B6 # 좷 => 좷 +C8B8 110C 116B 11B7 # 좸 => 좸 +C8B9 110C 116B 11B8 # 좹 => 좹 +C8BA 110C 116B 11B9 # 좺 => 좺 +C8BB 110C 116B 11BA # 좻 => 좻 +C8BC 110C 116B 11BB # 좼 => 좼 +C8BD 110C 116B 11BC # 좽 => 좽 +C8BE 110C 116B 11BD # 좾 => 좾 +C8BF 110C 116B 11BE # 좿 => 좿 +C8C0 110C 116B 11BF # 죀 => 죀 +C8C1 110C 116B 11C0 # 죁 => 죁 +C8C2 110C 116B 11C1 # 죂 => 죂 +C8C3 110C 116B 11C2 # 죃 => 죃 +C8C4 110C 116C # 죄 => 죄 +C8C5 110C 116C 11A8 # 죅 => 죅 +C8C6 110C 116C 11A9 # 죆 => 죆 +C8C7 110C 116C 11AA # 죇 => 죇 +C8C8 110C 116C 11AB # 죈 => 죈 +C8C9 110C 116C 11AC # 죉 => 죉 +C8CA 110C 116C 11AD # 죊 => 죊 +C8CB 110C 116C 11AE # 죋 => 죋 +C8CC 110C 116C 11AF # 죌 => 죌 +C8CD 110C 116C 11B0 # 죍 => 죍 +C8CE 110C 116C 11B1 # 죎 => 죎 +C8CF 110C 116C 11B2 # 죏 => 죏 +C8D0 110C 116C 11B3 # 죐 => 죐 +C8D1 110C 116C 11B4 # 죑 => 죑 +C8D2 110C 116C 11B5 # 죒 => 죒 +C8D3 110C 116C 11B6 # 죓 => 죓 +C8D4 110C 116C 11B7 # 죔 => 죔 +C8D5 110C 116C 11B8 # 죕 => 죕 +C8D6 110C 116C 11B9 # 죖 => 죖 +C8D7 110C 116C 11BA # 죗 => 죗 +C8D8 110C 116C 11BB # 죘 => 죘 +C8D9 110C 116C 11BC # 죙 => 죙 +C8DA 110C 116C 11BD # 죚 => 죚 +C8DB 110C 116C 11BE # 죛 => 죛 +C8DC 110C 116C 11BF # 죜 => 죜 +C8DD 110C 116C 11C0 # 죝 => 죝 +C8DE 110C 116C 11C1 # 죞 => 죞 +C8DF 110C 116C 11C2 # 죟 => 죟 +C8E0 110C 116D # 죠 => 죠 +C8E1 110C 116D 11A8 # 죡 => 죡 +C8E2 110C 116D 11A9 # 죢 => 죢 +C8E3 110C 116D 11AA # 죣 => 죣 +C8E4 110C 116D 11AB # 죤 => 죤 +C8E5 110C 116D 11AC # 죥 => 죥 +C8E6 110C 116D 11AD # 죦 => 죦 +C8E7 110C 116D 11AE # 죧 => 죧 +C8E8 110C 116D 11AF # 죨 => 죨 +C8E9 110C 116D 11B0 # 죩 => 죩 +C8EA 110C 116D 11B1 # 죪 => 죪 +C8EB 110C 116D 11B2 # 죫 => 죫 +C8EC 110C 116D 11B3 # 죬 => 죬 +C8ED 110C 116D 11B4 # 죭 => 죭 +C8EE 110C 116D 11B5 # 죮 => 죮 +C8EF 110C 116D 11B6 # 죯 => 죯 +C8F0 110C 116D 11B7 # 죰 => 죰 +C8F1 110C 116D 11B8 # 죱 => 죱 +C8F2 110C 116D 11B9 # 죲 => 죲 +C8F3 110C 116D 11BA # 죳 => 죳 +C8F4 110C 116D 11BB # 죴 => 죴 +C8F5 110C 116D 11BC # 죵 => 죵 +C8F6 110C 116D 11BD # 죶 => 죶 +C8F7 110C 116D 11BE # 죷 => 죷 +C8F8 110C 116D 11BF # 죸 => 죸 +C8F9 110C 116D 11C0 # 죹 => 죹 +C8FA 110C 116D 11C1 # 죺 => 죺 +C8FB 110C 116D 11C2 # 죻 => 죻 +C8FC 110C 116E # 주 => 주 +C8FD 110C 116E 11A8 # 죽 => 죽 +C8FE 110C 116E 11A9 # 죾 => 죾 +C8FF 110C 116E 11AA # 죿 => 죿 +C900 110C 116E 11AB # 준 => 준 +C901 110C 116E 11AC # 줁 => 줁 +C902 110C 116E 11AD # 줂 => 줂 +C903 110C 116E 11AE # 줃 => 줃 +C904 110C 116E 11AF # 줄 => 줄 +C905 110C 116E 11B0 # 줅 => 줅 +C906 110C 116E 11B1 # 줆 => 줆 +C907 110C 116E 11B2 # 줇 => 줇 +C908 110C 116E 11B3 # 줈 => 줈 +C909 110C 116E 11B4 # 줉 => 줉 +C90A 110C 116E 11B5 # 줊 => 줊 +C90B 110C 116E 11B6 # 줋 => 줋 +C90C 110C 116E 11B7 # 줌 => 줌 +C90D 110C 116E 11B8 # 줍 => 줍 +C90E 110C 116E 11B9 # 줎 => 줎 +C90F 110C 116E 11BA # 줏 => 줏 +C910 110C 116E 11BB # 줐 => 줐 +C911 110C 116E 11BC # 중 => 중 +C912 110C 116E 11BD # 줒 => 줒 +C913 110C 116E 11BE # 줓 => 줓 +C914 110C 116E 11BF # 줔 => 줔 +C915 110C 116E 11C0 # 줕 => 줕 +C916 110C 116E 11C1 # 줖 => 줖 +C917 110C 116E 11C2 # 줗 => 줗 +C918 110C 116F # 줘 => 줘 +C919 110C 116F 11A8 # 줙 => 줙 +C91A 110C 116F 11A9 # 줚 => 줚 +C91B 110C 116F 11AA # 줛 => 줛 +C91C 110C 116F 11AB # 줜 => 줜 +C91D 110C 116F 11AC # 줝 => 줝 +C91E 110C 116F 11AD # 줞 => 줞 +C91F 110C 116F 11AE # 줟 => 줟 +C920 110C 116F 11AF # 줠 => 줠 +C921 110C 116F 11B0 # 줡 => 줡 +C922 110C 116F 11B1 # 줢 => 줢 +C923 110C 116F 11B2 # 줣 => 줣 +C924 110C 116F 11B3 # 줤 => 줤 +C925 110C 116F 11B4 # 줥 => 줥 +C926 110C 116F 11B5 # 줦 => 줦 +C927 110C 116F 11B6 # 줧 => 줧 +C928 110C 116F 11B7 # 줨 => 줨 +C929 110C 116F 11B8 # 줩 => 줩 +C92A 110C 116F 11B9 # 줪 => 줪 +C92B 110C 116F 11BA # 줫 => 줫 +C92C 110C 116F 11BB # 줬 => 줬 +C92D 110C 116F 11BC # 줭 => 줭 +C92E 110C 116F 11BD # 줮 => 줮 +C92F 110C 116F 11BE # 줯 => 줯 +C930 110C 116F 11BF # 줰 => 줰 +C931 110C 116F 11C0 # 줱 => 줱 +C932 110C 116F 11C1 # 줲 => 줲 +C933 110C 116F 11C2 # 줳 => 줳 +C934 110C 1170 # 줴 => 줴 +C935 110C 1170 11A8 # 줵 => 줵 +C936 110C 1170 11A9 # 줶 => 줶 +C937 110C 1170 11AA # 줷 => 줷 +C938 110C 1170 11AB # 줸 => 줸 +C939 110C 1170 11AC # 줹 => 줹 +C93A 110C 1170 11AD # 줺 => 줺 +C93B 110C 1170 11AE # 줻 => 줻 +C93C 110C 1170 11AF # 줼 => 줼 +C93D 110C 1170 11B0 # 줽 => 줽 +C93E 110C 1170 11B1 # 줾 => 줾 +C93F 110C 1170 11B2 # 줿 => 줿 +C940 110C 1170 11B3 # 쥀 => 쥀 +C941 110C 1170 11B4 # 쥁 => 쥁 +C942 110C 1170 11B5 # 쥂 => 쥂 +C943 110C 1170 11B6 # 쥃 => 쥃 +C944 110C 1170 11B7 # 쥄 => 쥄 +C945 110C 1170 11B8 # 쥅 => 쥅 +C946 110C 1170 11B9 # 쥆 => 쥆 +C947 110C 1170 11BA # 쥇 => 쥇 +C948 110C 1170 11BB # 쥈 => 쥈 +C949 110C 1170 11BC # 쥉 => 쥉 +C94A 110C 1170 11BD # 쥊 => 쥊 +C94B 110C 1170 11BE # 쥋 => 쥋 +C94C 110C 1170 11BF # 쥌 => 쥌 +C94D 110C 1170 11C0 # 쥍 => 쥍 +C94E 110C 1170 11C1 # 쥎 => 쥎 +C94F 110C 1170 11C2 # 쥏 => 쥏 +C950 110C 1171 # 쥐 => 쥐 +C951 110C 1171 11A8 # 쥑 => 쥑 +C952 110C 1171 11A9 # 쥒 => 쥒 +C953 110C 1171 11AA # 쥓 => 쥓 +C954 110C 1171 11AB # 쥔 => 쥔 +C955 110C 1171 11AC # 쥕 => 쥕 +C956 110C 1171 11AD # 쥖 => 쥖 +C957 110C 1171 11AE # 쥗 => 쥗 +C958 110C 1171 11AF # 쥘 => 쥘 +C959 110C 1171 11B0 # 쥙 => 쥙 +C95A 110C 1171 11B1 # 쥚 => 쥚 +C95B 110C 1171 11B2 # 쥛 => 쥛 +C95C 110C 1171 11B3 # 쥜 => 쥜 +C95D 110C 1171 11B4 # 쥝 => 쥝 +C95E 110C 1171 11B5 # 쥞 => 쥞 +C95F 110C 1171 11B6 # 쥟 => 쥟 +C960 110C 1171 11B7 # 쥠 => 쥠 +C961 110C 1171 11B8 # 쥡 => 쥡 +C962 110C 1171 11B9 # 쥢 => 쥢 +C963 110C 1171 11BA # 쥣 => 쥣 +C964 110C 1171 11BB # 쥤 => 쥤 +C965 110C 1171 11BC # 쥥 => 쥥 +C966 110C 1171 11BD # 쥦 => 쥦 +C967 110C 1171 11BE # 쥧 => 쥧 +C968 110C 1171 11BF # 쥨 => 쥨 +C969 110C 1171 11C0 # 쥩 => 쥩 +C96A 110C 1171 11C1 # 쥪 => 쥪 +C96B 110C 1171 11C2 # 쥫 => 쥫 +C96C 110C 1172 # 쥬 => 쥬 +C96D 110C 1172 11A8 # 쥭 => 쥭 +C96E 110C 1172 11A9 # 쥮 => 쥮 +C96F 110C 1172 11AA # 쥯 => 쥯 +C970 110C 1172 11AB # 쥰 => 쥰 +C971 110C 1172 11AC # 쥱 => 쥱 +C972 110C 1172 11AD # 쥲 => 쥲 +C973 110C 1172 11AE # 쥳 => 쥳 +C974 110C 1172 11AF # 쥴 => 쥴 +C975 110C 1172 11B0 # 쥵 => 쥵 +C976 110C 1172 11B1 # 쥶 => 쥶 +C977 110C 1172 11B2 # 쥷 => 쥷 +C978 110C 1172 11B3 # 쥸 => 쥸 +C979 110C 1172 11B4 # 쥹 => 쥹 +C97A 110C 1172 11B5 # 쥺 => 쥺 +C97B 110C 1172 11B6 # 쥻 => 쥻 +C97C 110C 1172 11B7 # 쥼 => 쥼 +C97D 110C 1172 11B8 # 쥽 => 쥽 +C97E 110C 1172 11B9 # 쥾 => 쥾 +C97F 110C 1172 11BA # 쥿 => 쥿 +C980 110C 1172 11BB # 즀 => 즀 +C981 110C 1172 11BC # 즁 => 즁 +C982 110C 1172 11BD # 즂 => 즂 +C983 110C 1172 11BE # 즃 => 즃 +C984 110C 1172 11BF # 즄 => 즄 +C985 110C 1172 11C0 # 즅 => 즅 +C986 110C 1172 11C1 # 즆 => 즆 +C987 110C 1172 11C2 # 즇 => 즇 +C988 110C 1173 # 즈 => 즈 +C989 110C 1173 11A8 # 즉 => 즉 +C98A 110C 1173 11A9 # 즊 => 즊 +C98B 110C 1173 11AA # 즋 => 즋 +C98C 110C 1173 11AB # 즌 => 즌 +C98D 110C 1173 11AC # 즍 => 즍 +C98E 110C 1173 11AD # 즎 => 즎 +C98F 110C 1173 11AE # 즏 => 즏 +C990 110C 1173 11AF # 즐 => 즐 +C991 110C 1173 11B0 # 즑 => 즑 +C992 110C 1173 11B1 # 즒 => 즒 +C993 110C 1173 11B2 # 즓 => 즓 +C994 110C 1173 11B3 # 즔 => 즔 +C995 110C 1173 11B4 # 즕 => 즕 +C996 110C 1173 11B5 # 즖 => 즖 +C997 110C 1173 11B6 # 즗 => 즗 +C998 110C 1173 11B7 # 즘 => 즘 +C999 110C 1173 11B8 # 즙 => 즙 +C99A 110C 1173 11B9 # 즚 => 즚 +C99B 110C 1173 11BA # 즛 => 즛 +C99C 110C 1173 11BB # 즜 => 즜 +C99D 110C 1173 11BC # 증 => 증 +C99E 110C 1173 11BD # 즞 => 즞 +C99F 110C 1173 11BE # 즟 => 즟 +C9A0 110C 1173 11BF # 즠 => 즠 +C9A1 110C 1173 11C0 # 즡 => 즡 +C9A2 110C 1173 11C1 # 즢 => 즢 +C9A3 110C 1173 11C2 # 즣 => 즣 +C9A4 110C 1174 # 즤 => 즤 +C9A5 110C 1174 11A8 # 즥 => 즥 +C9A6 110C 1174 11A9 # 즦 => 즦 +C9A7 110C 1174 11AA # 즧 => 즧 +C9A8 110C 1174 11AB # 즨 => 즨 +C9A9 110C 1174 11AC # 즩 => 즩 +C9AA 110C 1174 11AD # 즪 => 즪 +C9AB 110C 1174 11AE # 즫 => 즫 +C9AC 110C 1174 11AF # 즬 => 즬 +C9AD 110C 1174 11B0 # 즭 => 즭 +C9AE 110C 1174 11B1 # 즮 => 즮 +C9AF 110C 1174 11B2 # 즯 => 즯 +C9B0 110C 1174 11B3 # 즰 => 즰 +C9B1 110C 1174 11B4 # 즱 => 즱 +C9B2 110C 1174 11B5 # 즲 => 즲 +C9B3 110C 1174 11B6 # 즳 => 즳 +C9B4 110C 1174 11B7 # 즴 => 즴 +C9B5 110C 1174 11B8 # 즵 => 즵 +C9B6 110C 1174 11B9 # 즶 => 즶 +C9B7 110C 1174 11BA # 즷 => 즷 +C9B8 110C 1174 11BB # 즸 => 즸 +C9B9 110C 1174 11BC # 즹 => 즹 +C9BA 110C 1174 11BD # 즺 => 즺 +C9BB 110C 1174 11BE # 즻 => 즻 +C9BC 110C 1174 11BF # 즼 => 즼 +C9BD 110C 1174 11C0 # 즽 => 즽 +C9BE 110C 1174 11C1 # 즾 => 즾 +C9BF 110C 1174 11C2 # 즿 => 즿 +C9C0 110C 1175 # 지 => 지 +C9C1 110C 1175 11A8 # 직 => 직 +C9C2 110C 1175 11A9 # 짂 => 짂 +C9C3 110C 1175 11AA # 짃 => 짃 +C9C4 110C 1175 11AB # 진 => 진 +C9C5 110C 1175 11AC # 짅 => 짅 +C9C6 110C 1175 11AD # 짆 => 짆 +C9C7 110C 1175 11AE # 짇 => 짇 +C9C8 110C 1175 11AF # 질 => 질 +C9C9 110C 1175 11B0 # 짉 => 짉 +C9CA 110C 1175 11B1 # 짊 => 짊 +C9CB 110C 1175 11B2 # 짋 => 짋 +C9CC 110C 1175 11B3 # 짌 => 짌 +C9CD 110C 1175 11B4 # 짍 => 짍 +C9CE 110C 1175 11B5 # 짎 => 짎 +C9CF 110C 1175 11B6 # 짏 => 짏 +C9D0 110C 1175 11B7 # 짐 => 짐 +C9D1 110C 1175 11B8 # 집 => 집 +C9D2 110C 1175 11B9 # 짒 => 짒 +C9D3 110C 1175 11BA # 짓 => 짓 +C9D4 110C 1175 11BB # 짔 => 짔 +C9D5 110C 1175 11BC # 징 => 징 +C9D6 110C 1175 11BD # 짖 => 짖 +C9D7 110C 1175 11BE # 짗 => 짗 +C9D8 110C 1175 11BF # 짘 => 짘 +C9D9 110C 1175 11C0 # 짙 => 짙 +C9DA 110C 1175 11C1 # 짚 => 짚 +C9DB 110C 1175 11C2 # 짛 => 짛 +C9DC 110D 1161 # 짜 => 짜 +C9DD 110D 1161 11A8 # 짝 => 짝 +C9DE 110D 1161 11A9 # 짞 => 짞 +C9DF 110D 1161 11AA # 짟 => 짟 +C9E0 110D 1161 11AB # 짠 => 짠 +C9E1 110D 1161 11AC # 짡 => 짡 +C9E2 110D 1161 11AD # 짢 => 짢 +C9E3 110D 1161 11AE # 짣 => 짣 +C9E4 110D 1161 11AF # 짤 => 짤 +C9E5 110D 1161 11B0 # 짥 => 짥 +C9E6 110D 1161 11B1 # 짦 => 짦 +C9E7 110D 1161 11B2 # 짧 => 짧 +C9E8 110D 1161 11B3 # 짨 => 짨 +C9E9 110D 1161 11B4 # 짩 => 짩 +C9EA 110D 1161 11B5 # 짪 => 짪 +C9EB 110D 1161 11B6 # 짫 => 짫 +C9EC 110D 1161 11B7 # 짬 => 짬 +C9ED 110D 1161 11B8 # 짭 => 짭 +C9EE 110D 1161 11B9 # 짮 => 짮 +C9EF 110D 1161 11BA # 짯 => 짯 +C9F0 110D 1161 11BB # 짰 => 짰 +C9F1 110D 1161 11BC # 짱 => 짱 +C9F2 110D 1161 11BD # 짲 => 짲 +C9F3 110D 1161 11BE # 짳 => 짳 +C9F4 110D 1161 11BF # 짴 => 짴 +C9F5 110D 1161 11C0 # 짵 => 짵 +C9F6 110D 1161 11C1 # 짶 => 짶 +C9F7 110D 1161 11C2 # 짷 => 짷 +C9F8 110D 1162 # 째 => 째 +C9F9 110D 1162 11A8 # 짹 => 짹 +C9FA 110D 1162 11A9 # 짺 => 짺 +C9FB 110D 1162 11AA # 짻 => 짻 +C9FC 110D 1162 11AB # 짼 => 짼 +C9FD 110D 1162 11AC # 짽 => 짽 +C9FE 110D 1162 11AD # 짾 => 짾 +C9FF 110D 1162 11AE # 짿 => 짿 +CA00 110D 1162 11AF # 쨀 => 쨀 +CA01 110D 1162 11B0 # 쨁 => 쨁 +CA02 110D 1162 11B1 # 쨂 => 쨂 +CA03 110D 1162 11B2 # 쨃 => 쨃 +CA04 110D 1162 11B3 # 쨄 => 쨄 +CA05 110D 1162 11B4 # 쨅 => 쨅 +CA06 110D 1162 11B5 # 쨆 => 쨆 +CA07 110D 1162 11B6 # 쨇 => 쨇 +CA08 110D 1162 11B7 # 쨈 => 쨈 +CA09 110D 1162 11B8 # 쨉 => 쨉 +CA0A 110D 1162 11B9 # 쨊 => 쨊 +CA0B 110D 1162 11BA # 쨋 => 쨋 +CA0C 110D 1162 11BB # 쨌 => 쨌 +CA0D 110D 1162 11BC # 쨍 => 쨍 +CA0E 110D 1162 11BD # 쨎 => 쨎 +CA0F 110D 1162 11BE # 쨏 => 쨏 +CA10 110D 1162 11BF # 쨐 => 쨐 +CA11 110D 1162 11C0 # 쨑 => 쨑 +CA12 110D 1162 11C1 # 쨒 => 쨒 +CA13 110D 1162 11C2 # 쨓 => 쨓 +CA14 110D 1163 # 쨔 => 쨔 +CA15 110D 1163 11A8 # 쨕 => 쨕 +CA16 110D 1163 11A9 # 쨖 => 쨖 +CA17 110D 1163 11AA # 쨗 => 쨗 +CA18 110D 1163 11AB # 쨘 => 쨘 +CA19 110D 1163 11AC # 쨙 => 쨙 +CA1A 110D 1163 11AD # 쨚 => 쨚 +CA1B 110D 1163 11AE # 쨛 => 쨛 +CA1C 110D 1163 11AF # 쨜 => 쨜 +CA1D 110D 1163 11B0 # 쨝 => 쨝 +CA1E 110D 1163 11B1 # 쨞 => 쨞 +CA1F 110D 1163 11B2 # 쨟 => 쨟 +CA20 110D 1163 11B3 # 쨠 => 쨠 +CA21 110D 1163 11B4 # 쨡 => 쨡 +CA22 110D 1163 11B5 # 쨢 => 쨢 +CA23 110D 1163 11B6 # 쨣 => 쨣 +CA24 110D 1163 11B7 # 쨤 => 쨤 +CA25 110D 1163 11B8 # 쨥 => 쨥 +CA26 110D 1163 11B9 # 쨦 => 쨦 +CA27 110D 1163 11BA # 쨧 => 쨧 +CA28 110D 1163 11BB # 쨨 => 쨨 +CA29 110D 1163 11BC # 쨩 => 쨩 +CA2A 110D 1163 11BD # 쨪 => 쨪 +CA2B 110D 1163 11BE # 쨫 => 쨫 +CA2C 110D 1163 11BF # 쨬 => 쨬 +CA2D 110D 1163 11C0 # 쨭 => 쨭 +CA2E 110D 1163 11C1 # 쨮 => 쨮 +CA2F 110D 1163 11C2 # 쨯 => 쨯 +CA30 110D 1164 # 쨰 => 쨰 +CA31 110D 1164 11A8 # 쨱 => 쨱 +CA32 110D 1164 11A9 # 쨲 => 쨲 +CA33 110D 1164 11AA # 쨳 => 쨳 +CA34 110D 1164 11AB # 쨴 => 쨴 +CA35 110D 1164 11AC # 쨵 => 쨵 +CA36 110D 1164 11AD # 쨶 => 쨶 +CA37 110D 1164 11AE # 쨷 => 쨷 +CA38 110D 1164 11AF # 쨸 => 쨸 +CA39 110D 1164 11B0 # 쨹 => 쨹 +CA3A 110D 1164 11B1 # 쨺 => 쨺 +CA3B 110D 1164 11B2 # 쨻 => 쨻 +CA3C 110D 1164 11B3 # 쨼 => 쨼 +CA3D 110D 1164 11B4 # 쨽 => 쨽 +CA3E 110D 1164 11B5 # 쨾 => 쨾 +CA3F 110D 1164 11B6 # 쨿 => 쨿 +CA40 110D 1164 11B7 # 쩀 => 쩀 +CA41 110D 1164 11B8 # 쩁 => 쩁 +CA42 110D 1164 11B9 # 쩂 => 쩂 +CA43 110D 1164 11BA # 쩃 => 쩃 +CA44 110D 1164 11BB # 쩄 => 쩄 +CA45 110D 1164 11BC # 쩅 => 쩅 +CA46 110D 1164 11BD # 쩆 => 쩆 +CA47 110D 1164 11BE # 쩇 => 쩇 +CA48 110D 1164 11BF # 쩈 => 쩈 +CA49 110D 1164 11C0 # 쩉 => 쩉 +CA4A 110D 1164 11C1 # 쩊 => 쩊 +CA4B 110D 1164 11C2 # 쩋 => 쩋 +CA4C 110D 1165 # 쩌 => 쩌 +CA4D 110D 1165 11A8 # 쩍 => 쩍 +CA4E 110D 1165 11A9 # 쩎 => 쩎 +CA4F 110D 1165 11AA # 쩏 => 쩏 +CA50 110D 1165 11AB # 쩐 => 쩐 +CA51 110D 1165 11AC # 쩑 => 쩑 +CA52 110D 1165 11AD # 쩒 => 쩒 +CA53 110D 1165 11AE # 쩓 => 쩓 +CA54 110D 1165 11AF # 쩔 => 쩔 +CA55 110D 1165 11B0 # 쩕 => 쩕 +CA56 110D 1165 11B1 # 쩖 => 쩖 +CA57 110D 1165 11B2 # 쩗 => 쩗 +CA58 110D 1165 11B3 # 쩘 => 쩘 +CA59 110D 1165 11B4 # 쩙 => 쩙 +CA5A 110D 1165 11B5 # 쩚 => 쩚 +CA5B 110D 1165 11B6 # 쩛 => 쩛 +CA5C 110D 1165 11B7 # 쩜 => 쩜 +CA5D 110D 1165 11B8 # 쩝 => 쩝 +CA5E 110D 1165 11B9 # 쩞 => 쩞 +CA5F 110D 1165 11BA # 쩟 => 쩟 +CA60 110D 1165 11BB # 쩠 => 쩠 +CA61 110D 1165 11BC # 쩡 => 쩡 +CA62 110D 1165 11BD # 쩢 => 쩢 +CA63 110D 1165 11BE # 쩣 => 쩣 +CA64 110D 1165 11BF # 쩤 => 쩤 +CA65 110D 1165 11C0 # 쩥 => 쩥 +CA66 110D 1165 11C1 # 쩦 => 쩦 +CA67 110D 1165 11C2 # 쩧 => 쩧 +CA68 110D 1166 # 쩨 => 쩨 +CA69 110D 1166 11A8 # 쩩 => 쩩 +CA6A 110D 1166 11A9 # 쩪 => 쩪 +CA6B 110D 1166 11AA # 쩫 => 쩫 +CA6C 110D 1166 11AB # 쩬 => 쩬 +CA6D 110D 1166 11AC # 쩭 => 쩭 +CA6E 110D 1166 11AD # 쩮 => 쩮 +CA6F 110D 1166 11AE # 쩯 => 쩯 +CA70 110D 1166 11AF # 쩰 => 쩰 +CA71 110D 1166 11B0 # 쩱 => 쩱 +CA72 110D 1166 11B1 # 쩲 => 쩲 +CA73 110D 1166 11B2 # 쩳 => 쩳 +CA74 110D 1166 11B3 # 쩴 => 쩴 +CA75 110D 1166 11B4 # 쩵 => 쩵 +CA76 110D 1166 11B5 # 쩶 => 쩶 +CA77 110D 1166 11B6 # 쩷 => 쩷 +CA78 110D 1166 11B7 # 쩸 => 쩸 +CA79 110D 1166 11B8 # 쩹 => 쩹 +CA7A 110D 1166 11B9 # 쩺 => 쩺 +CA7B 110D 1166 11BA # 쩻 => 쩻 +CA7C 110D 1166 11BB # 쩼 => 쩼 +CA7D 110D 1166 11BC # 쩽 => 쩽 +CA7E 110D 1166 11BD # 쩾 => 쩾 +CA7F 110D 1166 11BE # 쩿 => 쩿 +CA80 110D 1166 11BF # 쪀 => 쪀 +CA81 110D 1166 11C0 # 쪁 => 쪁 +CA82 110D 1166 11C1 # 쪂 => 쪂 +CA83 110D 1166 11C2 # 쪃 => 쪃 +CA84 110D 1167 # 쪄 => 쪄 +CA85 110D 1167 11A8 # 쪅 => 쪅 +CA86 110D 1167 11A9 # 쪆 => 쪆 +CA87 110D 1167 11AA # 쪇 => 쪇 +CA88 110D 1167 11AB # 쪈 => 쪈 +CA89 110D 1167 11AC # 쪉 => 쪉 +CA8A 110D 1167 11AD # 쪊 => 쪊 +CA8B 110D 1167 11AE # 쪋 => 쪋 +CA8C 110D 1167 11AF # 쪌 => 쪌 +CA8D 110D 1167 11B0 # 쪍 => 쪍 +CA8E 110D 1167 11B1 # 쪎 => 쪎 +CA8F 110D 1167 11B2 # 쪏 => 쪏 +CA90 110D 1167 11B3 # 쪐 => 쪐 +CA91 110D 1167 11B4 # 쪑 => 쪑 +CA92 110D 1167 11B5 # 쪒 => 쪒 +CA93 110D 1167 11B6 # 쪓 => 쪓 +CA94 110D 1167 11B7 # 쪔 => 쪔 +CA95 110D 1167 11B8 # 쪕 => 쪕 +CA96 110D 1167 11B9 # 쪖 => 쪖 +CA97 110D 1167 11BA # 쪗 => 쪗 +CA98 110D 1167 11BB # 쪘 => 쪘 +CA99 110D 1167 11BC # 쪙 => 쪙 +CA9A 110D 1167 11BD # 쪚 => 쪚 +CA9B 110D 1167 11BE # 쪛 => 쪛 +CA9C 110D 1167 11BF # 쪜 => 쪜 +CA9D 110D 1167 11C0 # 쪝 => 쪝 +CA9E 110D 1167 11C1 # 쪞 => 쪞 +CA9F 110D 1167 11C2 # 쪟 => 쪟 +CAA0 110D 1168 # 쪠 => 쪠 +CAA1 110D 1168 11A8 # 쪡 => 쪡 +CAA2 110D 1168 11A9 # 쪢 => 쪢 +CAA3 110D 1168 11AA # 쪣 => 쪣 +CAA4 110D 1168 11AB # 쪤 => 쪤 +CAA5 110D 1168 11AC # 쪥 => 쪥 +CAA6 110D 1168 11AD # 쪦 => 쪦 +CAA7 110D 1168 11AE # 쪧 => 쪧 +CAA8 110D 1168 11AF # 쪨 => 쪨 +CAA9 110D 1168 11B0 # 쪩 => 쪩 +CAAA 110D 1168 11B1 # 쪪 => 쪪 +CAAB 110D 1168 11B2 # 쪫 => 쪫 +CAAC 110D 1168 11B3 # 쪬 => 쪬 +CAAD 110D 1168 11B4 # 쪭 => 쪭 +CAAE 110D 1168 11B5 # 쪮 => 쪮 +CAAF 110D 1168 11B6 # 쪯 => 쪯 +CAB0 110D 1168 11B7 # 쪰 => 쪰 +CAB1 110D 1168 11B8 # 쪱 => 쪱 +CAB2 110D 1168 11B9 # 쪲 => 쪲 +CAB3 110D 1168 11BA # 쪳 => 쪳 +CAB4 110D 1168 11BB # 쪴 => 쪴 +CAB5 110D 1168 11BC # 쪵 => 쪵 +CAB6 110D 1168 11BD # 쪶 => 쪶 +CAB7 110D 1168 11BE # 쪷 => 쪷 +CAB8 110D 1168 11BF # 쪸 => 쪸 +CAB9 110D 1168 11C0 # 쪹 => 쪹 +CABA 110D 1168 11C1 # 쪺 => 쪺 +CABB 110D 1168 11C2 # 쪻 => 쪻 +CABC 110D 1169 # 쪼 => 쪼 +CABD 110D 1169 11A8 # 쪽 => 쪽 +CABE 110D 1169 11A9 # 쪾 => 쪾 +CABF 110D 1169 11AA # 쪿 => 쪿 +CAC0 110D 1169 11AB # 쫀 => 쫀 +CAC1 110D 1169 11AC # 쫁 => 쫁 +CAC2 110D 1169 11AD # 쫂 => 쫂 +CAC3 110D 1169 11AE # 쫃 => 쫃 +CAC4 110D 1169 11AF # 쫄 => 쫄 +CAC5 110D 1169 11B0 # 쫅 => 쫅 +CAC6 110D 1169 11B1 # 쫆 => 쫆 +CAC7 110D 1169 11B2 # 쫇 => 쫇 +CAC8 110D 1169 11B3 # 쫈 => 쫈 +CAC9 110D 1169 11B4 # 쫉 => 쫉 +CACA 110D 1169 11B5 # 쫊 => 쫊 +CACB 110D 1169 11B6 # 쫋 => 쫋 +CACC 110D 1169 11B7 # 쫌 => 쫌 +CACD 110D 1169 11B8 # 쫍 => 쫍 +CACE 110D 1169 11B9 # 쫎 => 쫎 +CACF 110D 1169 11BA # 쫏 => 쫏 +CAD0 110D 1169 11BB # 쫐 => 쫐 +CAD1 110D 1169 11BC # 쫑 => 쫑 +CAD2 110D 1169 11BD # 쫒 => 쫒 +CAD3 110D 1169 11BE # 쫓 => 쫓 +CAD4 110D 1169 11BF # 쫔 => 쫔 +CAD5 110D 1169 11C0 # 쫕 => 쫕 +CAD6 110D 1169 11C1 # 쫖 => 쫖 +CAD7 110D 1169 11C2 # 쫗 => 쫗 +CAD8 110D 116A # 쫘 => 쫘 +CAD9 110D 116A 11A8 # 쫙 => 쫙 +CADA 110D 116A 11A9 # 쫚 => 쫚 +CADB 110D 116A 11AA # 쫛 => 쫛 +CADC 110D 116A 11AB # 쫜 => 쫜 +CADD 110D 116A 11AC # 쫝 => 쫝 +CADE 110D 116A 11AD # 쫞 => 쫞 +CADF 110D 116A 11AE # 쫟 => 쫟 +CAE0 110D 116A 11AF # 쫠 => 쫠 +CAE1 110D 116A 11B0 # 쫡 => 쫡 +CAE2 110D 116A 11B1 # 쫢 => 쫢 +CAE3 110D 116A 11B2 # 쫣 => 쫣 +CAE4 110D 116A 11B3 # 쫤 => 쫤 +CAE5 110D 116A 11B4 # 쫥 => 쫥 +CAE6 110D 116A 11B5 # 쫦 => 쫦 +CAE7 110D 116A 11B6 # 쫧 => 쫧 +CAE8 110D 116A 11B7 # 쫨 => 쫨 +CAE9 110D 116A 11B8 # 쫩 => 쫩 +CAEA 110D 116A 11B9 # 쫪 => 쫪 +CAEB 110D 116A 11BA # 쫫 => 쫫 +CAEC 110D 116A 11BB # 쫬 => 쫬 +CAED 110D 116A 11BC # 쫭 => 쫭 +CAEE 110D 116A 11BD # 쫮 => 쫮 +CAEF 110D 116A 11BE # 쫯 => 쫯 +CAF0 110D 116A 11BF # 쫰 => 쫰 +CAF1 110D 116A 11C0 # 쫱 => 쫱 +CAF2 110D 116A 11C1 # 쫲 => 쫲 +CAF3 110D 116A 11C2 # 쫳 => 쫳 +CAF4 110D 116B # 쫴 => 쫴 +CAF5 110D 116B 11A8 # 쫵 => 쫵 +CAF6 110D 116B 11A9 # 쫶 => 쫶 +CAF7 110D 116B 11AA # 쫷 => 쫷 +CAF8 110D 116B 11AB # 쫸 => 쫸 +CAF9 110D 116B 11AC # 쫹 => 쫹 +CAFA 110D 116B 11AD # 쫺 => 쫺 +CAFB 110D 116B 11AE # 쫻 => 쫻 +CAFC 110D 116B 11AF # 쫼 => 쫼 +CAFD 110D 116B 11B0 # 쫽 => 쫽 +CAFE 110D 116B 11B1 # 쫾 => 쫾 +CAFF 110D 116B 11B2 # 쫿 => 쫿 +CB00 110D 116B 11B3 # 쬀 => 쬀 +CB01 110D 116B 11B4 # 쬁 => 쬁 +CB02 110D 116B 11B5 # 쬂 => 쬂 +CB03 110D 116B 11B6 # 쬃 => 쬃 +CB04 110D 116B 11B7 # 쬄 => 쬄 +CB05 110D 116B 11B8 # 쬅 => 쬅 +CB06 110D 116B 11B9 # 쬆 => 쬆 +CB07 110D 116B 11BA # 쬇 => 쬇 +CB08 110D 116B 11BB # 쬈 => 쬈 +CB09 110D 116B 11BC # 쬉 => 쬉 +CB0A 110D 116B 11BD # 쬊 => 쬊 +CB0B 110D 116B 11BE # 쬋 => 쬋 +CB0C 110D 116B 11BF # 쬌 => 쬌 +CB0D 110D 116B 11C0 # 쬍 => 쬍 +CB0E 110D 116B 11C1 # 쬎 => 쬎 +CB0F 110D 116B 11C2 # 쬏 => 쬏 +CB10 110D 116C # 쬐 => 쬐 +CB11 110D 116C 11A8 # 쬑 => 쬑 +CB12 110D 116C 11A9 # 쬒 => 쬒 +CB13 110D 116C 11AA # 쬓 => 쬓 +CB14 110D 116C 11AB # 쬔 => 쬔 +CB15 110D 116C 11AC # 쬕 => 쬕 +CB16 110D 116C 11AD # 쬖 => 쬖 +CB17 110D 116C 11AE # 쬗 => 쬗 +CB18 110D 116C 11AF # 쬘 => 쬘 +CB19 110D 116C 11B0 # 쬙 => 쬙 +CB1A 110D 116C 11B1 # 쬚 => 쬚 +CB1B 110D 116C 11B2 # 쬛 => 쬛 +CB1C 110D 116C 11B3 # 쬜 => 쬜 +CB1D 110D 116C 11B4 # 쬝 => 쬝 +CB1E 110D 116C 11B5 # 쬞 => 쬞 +CB1F 110D 116C 11B6 # 쬟 => 쬟 +CB20 110D 116C 11B7 # 쬠 => 쬠 +CB21 110D 116C 11B8 # 쬡 => 쬡 +CB22 110D 116C 11B9 # 쬢 => 쬢 +CB23 110D 116C 11BA # 쬣 => 쬣 +CB24 110D 116C 11BB # 쬤 => 쬤 +CB25 110D 116C 11BC # 쬥 => 쬥 +CB26 110D 116C 11BD # 쬦 => 쬦 +CB27 110D 116C 11BE # 쬧 => 쬧 +CB28 110D 116C 11BF # 쬨 => 쬨 +CB29 110D 116C 11C0 # 쬩 => 쬩 +CB2A 110D 116C 11C1 # 쬪 => 쬪 +CB2B 110D 116C 11C2 # 쬫 => 쬫 +CB2C 110D 116D # 쬬 => 쬬 +CB2D 110D 116D 11A8 # 쬭 => 쬭 +CB2E 110D 116D 11A9 # 쬮 => 쬮 +CB2F 110D 116D 11AA # 쬯 => 쬯 +CB30 110D 116D 11AB # 쬰 => 쬰 +CB31 110D 116D 11AC # 쬱 => 쬱 +CB32 110D 116D 11AD # 쬲 => 쬲 +CB33 110D 116D 11AE # 쬳 => 쬳 +CB34 110D 116D 11AF # 쬴 => 쬴 +CB35 110D 116D 11B0 # 쬵 => 쬵 +CB36 110D 116D 11B1 # 쬶 => 쬶 +CB37 110D 116D 11B2 # 쬷 => 쬷 +CB38 110D 116D 11B3 # 쬸 => 쬸 +CB39 110D 116D 11B4 # 쬹 => 쬹 +CB3A 110D 116D 11B5 # 쬺 => 쬺 +CB3B 110D 116D 11B6 # 쬻 => 쬻 +CB3C 110D 116D 11B7 # 쬼 => 쬼 +CB3D 110D 116D 11B8 # 쬽 => 쬽 +CB3E 110D 116D 11B9 # 쬾 => 쬾 +CB3F 110D 116D 11BA # 쬿 => 쬿 +CB40 110D 116D 11BB # 쭀 => 쭀 +CB41 110D 116D 11BC # 쭁 => 쭁 +CB42 110D 116D 11BD # 쭂 => 쭂 +CB43 110D 116D 11BE # 쭃 => 쭃 +CB44 110D 116D 11BF # 쭄 => 쭄 +CB45 110D 116D 11C0 # 쭅 => 쭅 +CB46 110D 116D 11C1 # 쭆 => 쭆 +CB47 110D 116D 11C2 # 쭇 => 쭇 +CB48 110D 116E # 쭈 => 쭈 +CB49 110D 116E 11A8 # 쭉 => 쭉 +CB4A 110D 116E 11A9 # 쭊 => 쭊 +CB4B 110D 116E 11AA # 쭋 => 쭋 +CB4C 110D 116E 11AB # 쭌 => 쭌 +CB4D 110D 116E 11AC # 쭍 => 쭍 +CB4E 110D 116E 11AD # 쭎 => 쭎 +CB4F 110D 116E 11AE # 쭏 => 쭏 +CB50 110D 116E 11AF # 쭐 => 쭐 +CB51 110D 116E 11B0 # 쭑 => 쭑 +CB52 110D 116E 11B1 # 쭒 => 쭒 +CB53 110D 116E 11B2 # 쭓 => 쭓 +CB54 110D 116E 11B3 # 쭔 => 쭔 +CB55 110D 116E 11B4 # 쭕 => 쭕 +CB56 110D 116E 11B5 # 쭖 => 쭖 +CB57 110D 116E 11B6 # 쭗 => 쭗 +CB58 110D 116E 11B7 # 쭘 => 쭘 +CB59 110D 116E 11B8 # 쭙 => 쭙 +CB5A 110D 116E 11B9 # 쭚 => 쭚 +CB5B 110D 116E 11BA # 쭛 => 쭛 +CB5C 110D 116E 11BB # 쭜 => 쭜 +CB5D 110D 116E 11BC # 쭝 => 쭝 +CB5E 110D 116E 11BD # 쭞 => 쭞 +CB5F 110D 116E 11BE # 쭟 => 쭟 +CB60 110D 116E 11BF # 쭠 => 쭠 +CB61 110D 116E 11C0 # 쭡 => 쭡 +CB62 110D 116E 11C1 # 쭢 => 쭢 +CB63 110D 116E 11C2 # 쭣 => 쭣 +CB64 110D 116F # 쭤 => 쭤 +CB65 110D 116F 11A8 # 쭥 => 쭥 +CB66 110D 116F 11A9 # 쭦 => 쭦 +CB67 110D 116F 11AA # 쭧 => 쭧 +CB68 110D 116F 11AB # 쭨 => 쭨 +CB69 110D 116F 11AC # 쭩 => 쭩 +CB6A 110D 116F 11AD # 쭪 => 쭪 +CB6B 110D 116F 11AE # 쭫 => 쭫 +CB6C 110D 116F 11AF # 쭬 => 쭬 +CB6D 110D 116F 11B0 # 쭭 => 쭭 +CB6E 110D 116F 11B1 # 쭮 => 쭮 +CB6F 110D 116F 11B2 # 쭯 => 쭯 +CB70 110D 116F 11B3 # 쭰 => 쭰 +CB71 110D 116F 11B4 # 쭱 => 쭱 +CB72 110D 116F 11B5 # 쭲 => 쭲 +CB73 110D 116F 11B6 # 쭳 => 쭳 +CB74 110D 116F 11B7 # 쭴 => 쭴 +CB75 110D 116F 11B8 # 쭵 => 쭵 +CB76 110D 116F 11B9 # 쭶 => 쭶 +CB77 110D 116F 11BA # 쭷 => 쭷 +CB78 110D 116F 11BB # 쭸 => 쭸 +CB79 110D 116F 11BC # 쭹 => 쭹 +CB7A 110D 116F 11BD # 쭺 => 쭺 +CB7B 110D 116F 11BE # 쭻 => 쭻 +CB7C 110D 116F 11BF # 쭼 => 쭼 +CB7D 110D 116F 11C0 # 쭽 => 쭽 +CB7E 110D 116F 11C1 # 쭾 => 쭾 +CB7F 110D 116F 11C2 # 쭿 => 쭿 +CB80 110D 1170 # 쮀 => 쮀 +CB81 110D 1170 11A8 # 쮁 => 쮁 +CB82 110D 1170 11A9 # 쮂 => 쮂 +CB83 110D 1170 11AA # 쮃 => 쮃 +CB84 110D 1170 11AB # 쮄 => 쮄 +CB85 110D 1170 11AC # 쮅 => 쮅 +CB86 110D 1170 11AD # 쮆 => 쮆 +CB87 110D 1170 11AE # 쮇 => 쮇 +CB88 110D 1170 11AF # 쮈 => 쮈 +CB89 110D 1170 11B0 # 쮉 => 쮉 +CB8A 110D 1170 11B1 # 쮊 => 쮊 +CB8B 110D 1170 11B2 # 쮋 => 쮋 +CB8C 110D 1170 11B3 # 쮌 => 쮌 +CB8D 110D 1170 11B4 # 쮍 => 쮍 +CB8E 110D 1170 11B5 # 쮎 => 쮎 +CB8F 110D 1170 11B6 # 쮏 => 쮏 +CB90 110D 1170 11B7 # 쮐 => 쮐 +CB91 110D 1170 11B8 # 쮑 => 쮑 +CB92 110D 1170 11B9 # 쮒 => 쮒 +CB93 110D 1170 11BA # 쮓 => 쮓 +CB94 110D 1170 11BB # 쮔 => 쮔 +CB95 110D 1170 11BC # 쮕 => 쮕 +CB96 110D 1170 11BD # 쮖 => 쮖 +CB97 110D 1170 11BE # 쮗 => 쮗 +CB98 110D 1170 11BF # 쮘 => 쮘 +CB99 110D 1170 11C0 # 쮙 => 쮙 +CB9A 110D 1170 11C1 # 쮚 => 쮚 +CB9B 110D 1170 11C2 # 쮛 => 쮛 +CB9C 110D 1171 # 쮜 => 쮜 +CB9D 110D 1171 11A8 # 쮝 => 쮝 +CB9E 110D 1171 11A9 # 쮞 => 쮞 +CB9F 110D 1171 11AA # 쮟 => 쮟 +CBA0 110D 1171 11AB # 쮠 => 쮠 +CBA1 110D 1171 11AC # 쮡 => 쮡 +CBA2 110D 1171 11AD # 쮢 => 쮢 +CBA3 110D 1171 11AE # 쮣 => 쮣 +CBA4 110D 1171 11AF # 쮤 => 쮤 +CBA5 110D 1171 11B0 # 쮥 => 쮥 +CBA6 110D 1171 11B1 # 쮦 => 쮦 +CBA7 110D 1171 11B2 # 쮧 => 쮧 +CBA8 110D 1171 11B3 # 쮨 => 쮨 +CBA9 110D 1171 11B4 # 쮩 => 쮩 +CBAA 110D 1171 11B5 # 쮪 => 쮪 +CBAB 110D 1171 11B6 # 쮫 => 쮫 +CBAC 110D 1171 11B7 # 쮬 => 쮬 +CBAD 110D 1171 11B8 # 쮭 => 쮭 +CBAE 110D 1171 11B9 # 쮮 => 쮮 +CBAF 110D 1171 11BA # 쮯 => 쮯 +CBB0 110D 1171 11BB # 쮰 => 쮰 +CBB1 110D 1171 11BC # 쮱 => 쮱 +CBB2 110D 1171 11BD # 쮲 => 쮲 +CBB3 110D 1171 11BE # 쮳 => 쮳 +CBB4 110D 1171 11BF # 쮴 => 쮴 +CBB5 110D 1171 11C0 # 쮵 => 쮵 +CBB6 110D 1171 11C1 # 쮶 => 쮶 +CBB7 110D 1171 11C2 # 쮷 => 쮷 +CBB8 110D 1172 # 쮸 => 쮸 +CBB9 110D 1172 11A8 # 쮹 => 쮹 +CBBA 110D 1172 11A9 # 쮺 => 쮺 +CBBB 110D 1172 11AA # 쮻 => 쮻 +CBBC 110D 1172 11AB # 쮼 => 쮼 +CBBD 110D 1172 11AC # 쮽 => 쮽 +CBBE 110D 1172 11AD # 쮾 => 쮾 +CBBF 110D 1172 11AE # 쮿 => 쮿 +CBC0 110D 1172 11AF # 쯀 => 쯀 +CBC1 110D 1172 11B0 # 쯁 => 쯁 +CBC2 110D 1172 11B1 # 쯂 => 쯂 +CBC3 110D 1172 11B2 # 쯃 => 쯃 +CBC4 110D 1172 11B3 # 쯄 => 쯄 +CBC5 110D 1172 11B4 # 쯅 => 쯅 +CBC6 110D 1172 11B5 # 쯆 => 쯆 +CBC7 110D 1172 11B6 # 쯇 => 쯇 +CBC8 110D 1172 11B7 # 쯈 => 쯈 +CBC9 110D 1172 11B8 # 쯉 => 쯉 +CBCA 110D 1172 11B9 # 쯊 => 쯊 +CBCB 110D 1172 11BA # 쯋 => 쯋 +CBCC 110D 1172 11BB # 쯌 => 쯌 +CBCD 110D 1172 11BC # 쯍 => 쯍 +CBCE 110D 1172 11BD # 쯎 => 쯎 +CBCF 110D 1172 11BE # 쯏 => 쯏 +CBD0 110D 1172 11BF # 쯐 => 쯐 +CBD1 110D 1172 11C0 # 쯑 => 쯑 +CBD2 110D 1172 11C1 # 쯒 => 쯒 +CBD3 110D 1172 11C2 # 쯓 => 쯓 +CBD4 110D 1173 # 쯔 => 쯔 +CBD5 110D 1173 11A8 # 쯕 => 쯕 +CBD6 110D 1173 11A9 # 쯖 => 쯖 +CBD7 110D 1173 11AA # 쯗 => 쯗 +CBD8 110D 1173 11AB # 쯘 => 쯘 +CBD9 110D 1173 11AC # 쯙 => 쯙 +CBDA 110D 1173 11AD # 쯚 => 쯚 +CBDB 110D 1173 11AE # 쯛 => 쯛 +CBDC 110D 1173 11AF # 쯜 => 쯜 +CBDD 110D 1173 11B0 # 쯝 => 쯝 +CBDE 110D 1173 11B1 # 쯞 => 쯞 +CBDF 110D 1173 11B2 # 쯟 => 쯟 +CBE0 110D 1173 11B3 # 쯠 => 쯠 +CBE1 110D 1173 11B4 # 쯡 => 쯡 +CBE2 110D 1173 11B5 # 쯢 => 쯢 +CBE3 110D 1173 11B6 # 쯣 => 쯣 +CBE4 110D 1173 11B7 # 쯤 => 쯤 +CBE5 110D 1173 11B8 # 쯥 => 쯥 +CBE6 110D 1173 11B9 # 쯦 => 쯦 +CBE7 110D 1173 11BA # 쯧 => 쯧 +CBE8 110D 1173 11BB # 쯨 => 쯨 +CBE9 110D 1173 11BC # 쯩 => 쯩 +CBEA 110D 1173 11BD # 쯪 => 쯪 +CBEB 110D 1173 11BE # 쯫 => 쯫 +CBEC 110D 1173 11BF # 쯬 => 쯬 +CBED 110D 1173 11C0 # 쯭 => 쯭 +CBEE 110D 1173 11C1 # 쯮 => 쯮 +CBEF 110D 1173 11C2 # 쯯 => 쯯 +CBF0 110D 1174 # 쯰 => 쯰 +CBF1 110D 1174 11A8 # 쯱 => 쯱 +CBF2 110D 1174 11A9 # 쯲 => 쯲 +CBF3 110D 1174 11AA # 쯳 => 쯳 +CBF4 110D 1174 11AB # 쯴 => 쯴 +CBF5 110D 1174 11AC # 쯵 => 쯵 +CBF6 110D 1174 11AD # 쯶 => 쯶 +CBF7 110D 1174 11AE # 쯷 => 쯷 +CBF8 110D 1174 11AF # 쯸 => 쯸 +CBF9 110D 1174 11B0 # 쯹 => 쯹 +CBFA 110D 1174 11B1 # 쯺 => 쯺 +CBFB 110D 1174 11B2 # 쯻 => 쯻 +CBFC 110D 1174 11B3 # 쯼 => 쯼 +CBFD 110D 1174 11B4 # 쯽 => 쯽 +CBFE 110D 1174 11B5 # 쯾 => 쯾 +CBFF 110D 1174 11B6 # 쯿 => 쯿 +CC00 110D 1174 11B7 # 찀 => 찀 +CC01 110D 1174 11B8 # 찁 => 찁 +CC02 110D 1174 11B9 # 찂 => 찂 +CC03 110D 1174 11BA # 찃 => 찃 +CC04 110D 1174 11BB # 찄 => 찄 +CC05 110D 1174 11BC # 찅 => 찅 +CC06 110D 1174 11BD # 찆 => 찆 +CC07 110D 1174 11BE # 찇 => 찇 +CC08 110D 1174 11BF # 찈 => 찈 +CC09 110D 1174 11C0 # 찉 => 찉 +CC0A 110D 1174 11C1 # 찊 => 찊 +CC0B 110D 1174 11C2 # 찋 => 찋 +CC0C 110D 1175 # 찌 => 찌 +CC0D 110D 1175 11A8 # 찍 => 찍 +CC0E 110D 1175 11A9 # 찎 => 찎 +CC0F 110D 1175 11AA # 찏 => 찏 +CC10 110D 1175 11AB # 찐 => 찐 +CC11 110D 1175 11AC # 찑 => 찑 +CC12 110D 1175 11AD # 찒 => 찒 +CC13 110D 1175 11AE # 찓 => 찓 +CC14 110D 1175 11AF # 찔 => 찔 +CC15 110D 1175 11B0 # 찕 => 찕 +CC16 110D 1175 11B1 # 찖 => 찖 +CC17 110D 1175 11B2 # 찗 => 찗 +CC18 110D 1175 11B3 # 찘 => 찘 +CC19 110D 1175 11B4 # 찙 => 찙 +CC1A 110D 1175 11B5 # 찚 => 찚 +CC1B 110D 1175 11B6 # 찛 => 찛 +CC1C 110D 1175 11B7 # 찜 => 찜 +CC1D 110D 1175 11B8 # 찝 => 찝 +CC1E 110D 1175 11B9 # 찞 => 찞 +CC1F 110D 1175 11BA # 찟 => 찟 +CC20 110D 1175 11BB # 찠 => 찠 +CC21 110D 1175 11BC # 찡 => 찡 +CC22 110D 1175 11BD # 찢 => 찢 +CC23 110D 1175 11BE # 찣 => 찣 +CC24 110D 1175 11BF # 찤 => 찤 +CC25 110D 1175 11C0 # 찥 => 찥 +CC26 110D 1175 11C1 # 찦 => 찦 +CC27 110D 1175 11C2 # 찧 => 찧 +CC28 110E 1161 # 차 => 차 +CC29 110E 1161 11A8 # 착 => 착 +CC2A 110E 1161 11A9 # 찪 => 찪 +CC2B 110E 1161 11AA # 찫 => 찫 +CC2C 110E 1161 11AB # 찬 => 찬 +CC2D 110E 1161 11AC # 찭 => 찭 +CC2E 110E 1161 11AD # 찮 => 찮 +CC2F 110E 1161 11AE # 찯 => 찯 +CC30 110E 1161 11AF # 찰 => 찰 +CC31 110E 1161 11B0 # 찱 => 찱 +CC32 110E 1161 11B1 # 찲 => 찲 +CC33 110E 1161 11B2 # 찳 => 찳 +CC34 110E 1161 11B3 # 찴 => 찴 +CC35 110E 1161 11B4 # 찵 => 찵 +CC36 110E 1161 11B5 # 찶 => 찶 +CC37 110E 1161 11B6 # 찷 => 찷 +CC38 110E 1161 11B7 # 참 => 참 +CC39 110E 1161 11B8 # 찹 => 찹 +CC3A 110E 1161 11B9 # 찺 => 찺 +CC3B 110E 1161 11BA # 찻 => 찻 +CC3C 110E 1161 11BB # 찼 => 찼 +CC3D 110E 1161 11BC # 창 => 창 +CC3E 110E 1161 11BD # 찾 => 찾 +CC3F 110E 1161 11BE # 찿 => 찿 +CC40 110E 1161 11BF # 챀 => 챀 +CC41 110E 1161 11C0 # 챁 => 챁 +CC42 110E 1161 11C1 # 챂 => 챂 +CC43 110E 1161 11C2 # 챃 => 챃 +CC44 110E 1162 # 채 => 채 +CC45 110E 1162 11A8 # 책 => 책 +CC46 110E 1162 11A9 # 챆 => 챆 +CC47 110E 1162 11AA # 챇 => 챇 +CC48 110E 1162 11AB # 챈 => 챈 +CC49 110E 1162 11AC # 챉 => 챉 +CC4A 110E 1162 11AD # 챊 => 챊 +CC4B 110E 1162 11AE # 챋 => 챋 +CC4C 110E 1162 11AF # 챌 => 챌 +CC4D 110E 1162 11B0 # 챍 => 챍 +CC4E 110E 1162 11B1 # 챎 => 챎 +CC4F 110E 1162 11B2 # 챏 => 챏 +CC50 110E 1162 11B3 # 챐 => 챐 +CC51 110E 1162 11B4 # 챑 => 챑 +CC52 110E 1162 11B5 # 챒 => 챒 +CC53 110E 1162 11B6 # 챓 => 챓 +CC54 110E 1162 11B7 # 챔 => 챔 +CC55 110E 1162 11B8 # 챕 => 챕 +CC56 110E 1162 11B9 # 챖 => 챖 +CC57 110E 1162 11BA # 챗 => 챗 +CC58 110E 1162 11BB # 챘 => 챘 +CC59 110E 1162 11BC # 챙 => 챙 +CC5A 110E 1162 11BD # 챚 => 챚 +CC5B 110E 1162 11BE # 챛 => 챛 +CC5C 110E 1162 11BF # 챜 => 챜 +CC5D 110E 1162 11C0 # 챝 => 챝 +CC5E 110E 1162 11C1 # 챞 => 챞 +CC5F 110E 1162 11C2 # 챟 => 챟 +CC60 110E 1163 # 챠 => 챠 +CC61 110E 1163 11A8 # 챡 => 챡 +CC62 110E 1163 11A9 # 챢 => 챢 +CC63 110E 1163 11AA # 챣 => 챣 +CC64 110E 1163 11AB # 챤 => 챤 +CC65 110E 1163 11AC # 챥 => 챥 +CC66 110E 1163 11AD # 챦 => 챦 +CC67 110E 1163 11AE # 챧 => 챧 +CC68 110E 1163 11AF # 챨 => 챨 +CC69 110E 1163 11B0 # 챩 => 챩 +CC6A 110E 1163 11B1 # 챪 => 챪 +CC6B 110E 1163 11B2 # 챫 => 챫 +CC6C 110E 1163 11B3 # 챬 => 챬 +CC6D 110E 1163 11B4 # 챭 => 챭 +CC6E 110E 1163 11B5 # 챮 => 챮 +CC6F 110E 1163 11B6 # 챯 => 챯 +CC70 110E 1163 11B7 # 챰 => 챰 +CC71 110E 1163 11B8 # 챱 => 챱 +CC72 110E 1163 11B9 # 챲 => 챲 +CC73 110E 1163 11BA # 챳 => 챳 +CC74 110E 1163 11BB # 챴 => 챴 +CC75 110E 1163 11BC # 챵 => 챵 +CC76 110E 1163 11BD # 챶 => 챶 +CC77 110E 1163 11BE # 챷 => 챷 +CC78 110E 1163 11BF # 챸 => 챸 +CC79 110E 1163 11C0 # 챹 => 챹 +CC7A 110E 1163 11C1 # 챺 => 챺 +CC7B 110E 1163 11C2 # 챻 => 챻 +CC7C 110E 1164 # 챼 => 챼 +CC7D 110E 1164 11A8 # 챽 => 챽 +CC7E 110E 1164 11A9 # 챾 => 챾 +CC7F 110E 1164 11AA # 챿 => 챿 +CC80 110E 1164 11AB # 첀 => 첀 +CC81 110E 1164 11AC # 첁 => 첁 +CC82 110E 1164 11AD # 첂 => 첂 +CC83 110E 1164 11AE # 첃 => 첃 +CC84 110E 1164 11AF # 첄 => 첄 +CC85 110E 1164 11B0 # 첅 => 첅 +CC86 110E 1164 11B1 # 첆 => 첆 +CC87 110E 1164 11B2 # 첇 => 첇 +CC88 110E 1164 11B3 # 첈 => 첈 +CC89 110E 1164 11B4 # 첉 => 첉 +CC8A 110E 1164 11B5 # 첊 => 첊 +CC8B 110E 1164 11B6 # 첋 => 첋 +CC8C 110E 1164 11B7 # 첌 => 첌 +CC8D 110E 1164 11B8 # 첍 => 첍 +CC8E 110E 1164 11B9 # 첎 => 첎 +CC8F 110E 1164 11BA # 첏 => 첏 +CC90 110E 1164 11BB # 첐 => 첐 +CC91 110E 1164 11BC # 첑 => 첑 +CC92 110E 1164 11BD # 첒 => 첒 +CC93 110E 1164 11BE # 첓 => 첓 +CC94 110E 1164 11BF # 첔 => 첔 +CC95 110E 1164 11C0 # 첕 => 첕 +CC96 110E 1164 11C1 # 첖 => 첖 +CC97 110E 1164 11C2 # 첗 => 첗 +CC98 110E 1165 # 처 => 처 +CC99 110E 1165 11A8 # 척 => 척 +CC9A 110E 1165 11A9 # 첚 => 첚 +CC9B 110E 1165 11AA # 첛 => 첛 +CC9C 110E 1165 11AB # 천 => 천 +CC9D 110E 1165 11AC # 첝 => 첝 +CC9E 110E 1165 11AD # 첞 => 첞 +CC9F 110E 1165 11AE # 첟 => 첟 +CCA0 110E 1165 11AF # 철 => 철 +CCA1 110E 1165 11B0 # 첡 => 첡 +CCA2 110E 1165 11B1 # 첢 => 첢 +CCA3 110E 1165 11B2 # 첣 => 첣 +CCA4 110E 1165 11B3 # 첤 => 첤 +CCA5 110E 1165 11B4 # 첥 => 첥 +CCA6 110E 1165 11B5 # 첦 => 첦 +CCA7 110E 1165 11B6 # 첧 => 첧 +CCA8 110E 1165 11B7 # 첨 => 첨 +CCA9 110E 1165 11B8 # 첩 => 첩 +CCAA 110E 1165 11B9 # 첪 => 첪 +CCAB 110E 1165 11BA # 첫 => 첫 +CCAC 110E 1165 11BB # 첬 => 첬 +CCAD 110E 1165 11BC # 청 => 청 +CCAE 110E 1165 11BD # 첮 => 첮 +CCAF 110E 1165 11BE # 첯 => 첯 +CCB0 110E 1165 11BF # 첰 => 첰 +CCB1 110E 1165 11C0 # 첱 => 첱 +CCB2 110E 1165 11C1 # 첲 => 첲 +CCB3 110E 1165 11C2 # 첳 => 첳 +CCB4 110E 1166 # 체 => 체 +CCB5 110E 1166 11A8 # 첵 => 첵 +CCB6 110E 1166 11A9 # 첶 => 첶 +CCB7 110E 1166 11AA # 첷 => 첷 +CCB8 110E 1166 11AB # 첸 => 첸 +CCB9 110E 1166 11AC # 첹 => 첹 +CCBA 110E 1166 11AD # 첺 => 첺 +CCBB 110E 1166 11AE # 첻 => 첻 +CCBC 110E 1166 11AF # 첼 => 첼 +CCBD 110E 1166 11B0 # 첽 => 첽 +CCBE 110E 1166 11B1 # 첾 => 첾 +CCBF 110E 1166 11B2 # 첿 => 첿 +CCC0 110E 1166 11B3 # 쳀 => 쳀 +CCC1 110E 1166 11B4 # 쳁 => 쳁 +CCC2 110E 1166 11B5 # 쳂 => 쳂 +CCC3 110E 1166 11B6 # 쳃 => 쳃 +CCC4 110E 1166 11B7 # 쳄 => 쳄 +CCC5 110E 1166 11B8 # 쳅 => 쳅 +CCC6 110E 1166 11B9 # 쳆 => 쳆 +CCC7 110E 1166 11BA # 쳇 => 쳇 +CCC8 110E 1166 11BB # 쳈 => 쳈 +CCC9 110E 1166 11BC # 쳉 => 쳉 +CCCA 110E 1166 11BD # 쳊 => 쳊 +CCCB 110E 1166 11BE # 쳋 => 쳋 +CCCC 110E 1166 11BF # 쳌 => 쳌 +CCCD 110E 1166 11C0 # 쳍 => 쳍 +CCCE 110E 1166 11C1 # 쳎 => 쳎 +CCCF 110E 1166 11C2 # 쳏 => 쳏 +CCD0 110E 1167 # 쳐 => 쳐 +CCD1 110E 1167 11A8 # 쳑 => 쳑 +CCD2 110E 1167 11A9 # 쳒 => 쳒 +CCD3 110E 1167 11AA # 쳓 => 쳓 +CCD4 110E 1167 11AB # 쳔 => 쳔 +CCD5 110E 1167 11AC # 쳕 => 쳕 +CCD6 110E 1167 11AD # 쳖 => 쳖 +CCD7 110E 1167 11AE # 쳗 => 쳗 +CCD8 110E 1167 11AF # 쳘 => 쳘 +CCD9 110E 1167 11B0 # 쳙 => 쳙 +CCDA 110E 1167 11B1 # 쳚 => 쳚 +CCDB 110E 1167 11B2 # 쳛 => 쳛 +CCDC 110E 1167 11B3 # 쳜 => 쳜 +CCDD 110E 1167 11B4 # 쳝 => 쳝 +CCDE 110E 1167 11B5 # 쳞 => 쳞 +CCDF 110E 1167 11B6 # 쳟 => 쳟 +CCE0 110E 1167 11B7 # 쳠 => 쳠 +CCE1 110E 1167 11B8 # 쳡 => 쳡 +CCE2 110E 1167 11B9 # 쳢 => 쳢 +CCE3 110E 1167 11BA # 쳣 => 쳣 +CCE4 110E 1167 11BB # 쳤 => 쳤 +CCE5 110E 1167 11BC # 쳥 => 쳥 +CCE6 110E 1167 11BD # 쳦 => 쳦 +CCE7 110E 1167 11BE # 쳧 => 쳧 +CCE8 110E 1167 11BF # 쳨 => 쳨 +CCE9 110E 1167 11C0 # 쳩 => 쳩 +CCEA 110E 1167 11C1 # 쳪 => 쳪 +CCEB 110E 1167 11C2 # 쳫 => 쳫 +CCEC 110E 1168 # 쳬 => 쳬 +CCED 110E 1168 11A8 # 쳭 => 쳭 +CCEE 110E 1168 11A9 # 쳮 => 쳮 +CCEF 110E 1168 11AA # 쳯 => 쳯 +CCF0 110E 1168 11AB # 쳰 => 쳰 +CCF1 110E 1168 11AC # 쳱 => 쳱 +CCF2 110E 1168 11AD # 쳲 => 쳲 +CCF3 110E 1168 11AE # 쳳 => 쳳 +CCF4 110E 1168 11AF # 쳴 => 쳴 +CCF5 110E 1168 11B0 # 쳵 => 쳵 +CCF6 110E 1168 11B1 # 쳶 => 쳶 +CCF7 110E 1168 11B2 # 쳷 => 쳷 +CCF8 110E 1168 11B3 # 쳸 => 쳸 +CCF9 110E 1168 11B4 # 쳹 => 쳹 +CCFA 110E 1168 11B5 # 쳺 => 쳺 +CCFB 110E 1168 11B6 # 쳻 => 쳻 +CCFC 110E 1168 11B7 # 쳼 => 쳼 +CCFD 110E 1168 11B8 # 쳽 => 쳽 +CCFE 110E 1168 11B9 # 쳾 => 쳾 +CCFF 110E 1168 11BA # 쳿 => 쳿 +CD00 110E 1168 11BB # 촀 => 촀 +CD01 110E 1168 11BC # 촁 => 촁 +CD02 110E 1168 11BD # 촂 => 촂 +CD03 110E 1168 11BE # 촃 => 촃 +CD04 110E 1168 11BF # 촄 => 촄 +CD05 110E 1168 11C0 # 촅 => 촅 +CD06 110E 1168 11C1 # 촆 => 촆 +CD07 110E 1168 11C2 # 촇 => 촇 +CD08 110E 1169 # 초 => 초 +CD09 110E 1169 11A8 # 촉 => 촉 +CD0A 110E 1169 11A9 # 촊 => 촊 +CD0B 110E 1169 11AA # 촋 => 촋 +CD0C 110E 1169 11AB # 촌 => 촌 +CD0D 110E 1169 11AC # 촍 => 촍 +CD0E 110E 1169 11AD # 촎 => 촎 +CD0F 110E 1169 11AE # 촏 => 촏 +CD10 110E 1169 11AF # 촐 => 촐 +CD11 110E 1169 11B0 # 촑 => 촑 +CD12 110E 1169 11B1 # 촒 => 촒 +CD13 110E 1169 11B2 # 촓 => 촓 +CD14 110E 1169 11B3 # 촔 => 촔 +CD15 110E 1169 11B4 # 촕 => 촕 +CD16 110E 1169 11B5 # 촖 => 촖 +CD17 110E 1169 11B6 # 촗 => 촗 +CD18 110E 1169 11B7 # 촘 => 촘 +CD19 110E 1169 11B8 # 촙 => 촙 +CD1A 110E 1169 11B9 # 촚 => 촚 +CD1B 110E 1169 11BA # 촛 => 촛 +CD1C 110E 1169 11BB # 촜 => 촜 +CD1D 110E 1169 11BC # 총 => 총 +CD1E 110E 1169 11BD # 촞 => 촞 +CD1F 110E 1169 11BE # 촟 => 촟 +CD20 110E 1169 11BF # 촠 => 촠 +CD21 110E 1169 11C0 # 촡 => 촡 +CD22 110E 1169 11C1 # 촢 => 촢 +CD23 110E 1169 11C2 # 촣 => 촣 +CD24 110E 116A # 촤 => 촤 +CD25 110E 116A 11A8 # 촥 => 촥 +CD26 110E 116A 11A9 # 촦 => 촦 +CD27 110E 116A 11AA # 촧 => 촧 +CD28 110E 116A 11AB # 촨 => 촨 +CD29 110E 116A 11AC # 촩 => 촩 +CD2A 110E 116A 11AD # 촪 => 촪 +CD2B 110E 116A 11AE # 촫 => 촫 +CD2C 110E 116A 11AF # 촬 => 촬 +CD2D 110E 116A 11B0 # 촭 => 촭 +CD2E 110E 116A 11B1 # 촮 => 촮 +CD2F 110E 116A 11B2 # 촯 => 촯 +CD30 110E 116A 11B3 # 촰 => 촰 +CD31 110E 116A 11B4 # 촱 => 촱 +CD32 110E 116A 11B5 # 촲 => 촲 +CD33 110E 116A 11B6 # 촳 => 촳 +CD34 110E 116A 11B7 # 촴 => 촴 +CD35 110E 116A 11B8 # 촵 => 촵 +CD36 110E 116A 11B9 # 촶 => 촶 +CD37 110E 116A 11BA # 촷 => 촷 +CD38 110E 116A 11BB # 촸 => 촸 +CD39 110E 116A 11BC # 촹 => 촹 +CD3A 110E 116A 11BD # 촺 => 촺 +CD3B 110E 116A 11BE # 촻 => 촻 +CD3C 110E 116A 11BF # 촼 => 촼 +CD3D 110E 116A 11C0 # 촽 => 촽 +CD3E 110E 116A 11C1 # 촾 => 촾 +CD3F 110E 116A 11C2 # 촿 => 촿 +CD40 110E 116B # 쵀 => 쵀 +CD41 110E 116B 11A8 # 쵁 => 쵁 +CD42 110E 116B 11A9 # 쵂 => 쵂 +CD43 110E 116B 11AA # 쵃 => 쵃 +CD44 110E 116B 11AB # 쵄 => 쵄 +CD45 110E 116B 11AC # 쵅 => 쵅 +CD46 110E 116B 11AD # 쵆 => 쵆 +CD47 110E 116B 11AE # 쵇 => 쵇 +CD48 110E 116B 11AF # 쵈 => 쵈 +CD49 110E 116B 11B0 # 쵉 => 쵉 +CD4A 110E 116B 11B1 # 쵊 => 쵊 +CD4B 110E 116B 11B2 # 쵋 => 쵋 +CD4C 110E 116B 11B3 # 쵌 => 쵌 +CD4D 110E 116B 11B4 # 쵍 => 쵍 +CD4E 110E 116B 11B5 # 쵎 => 쵎 +CD4F 110E 116B 11B6 # 쵏 => 쵏 +CD50 110E 116B 11B7 # 쵐 => 쵐 +CD51 110E 116B 11B8 # 쵑 => 쵑 +CD52 110E 116B 11B9 # 쵒 => 쵒 +CD53 110E 116B 11BA # 쵓 => 쵓 +CD54 110E 116B 11BB # 쵔 => 쵔 +CD55 110E 116B 11BC # 쵕 => 쵕 +CD56 110E 116B 11BD # 쵖 => 쵖 +CD57 110E 116B 11BE # 쵗 => 쵗 +CD58 110E 116B 11BF # 쵘 => 쵘 +CD59 110E 116B 11C0 # 쵙 => 쵙 +CD5A 110E 116B 11C1 # 쵚 => 쵚 +CD5B 110E 116B 11C2 # 쵛 => 쵛 +CD5C 110E 116C # 최 => 최 +CD5D 110E 116C 11A8 # 쵝 => 쵝 +CD5E 110E 116C 11A9 # 쵞 => 쵞 +CD5F 110E 116C 11AA # 쵟 => 쵟 +CD60 110E 116C 11AB # 쵠 => 쵠 +CD61 110E 116C 11AC # 쵡 => 쵡 +CD62 110E 116C 11AD # 쵢 => 쵢 +CD63 110E 116C 11AE # 쵣 => 쵣 +CD64 110E 116C 11AF # 쵤 => 쵤 +CD65 110E 116C 11B0 # 쵥 => 쵥 +CD66 110E 116C 11B1 # 쵦 => 쵦 +CD67 110E 116C 11B2 # 쵧 => 쵧 +CD68 110E 116C 11B3 # 쵨 => 쵨 +CD69 110E 116C 11B4 # 쵩 => 쵩 +CD6A 110E 116C 11B5 # 쵪 => 쵪 +CD6B 110E 116C 11B6 # 쵫 => 쵫 +CD6C 110E 116C 11B7 # 쵬 => 쵬 +CD6D 110E 116C 11B8 # 쵭 => 쵭 +CD6E 110E 116C 11B9 # 쵮 => 쵮 +CD6F 110E 116C 11BA # 쵯 => 쵯 +CD70 110E 116C 11BB # 쵰 => 쵰 +CD71 110E 116C 11BC # 쵱 => 쵱 +CD72 110E 116C 11BD # 쵲 => 쵲 +CD73 110E 116C 11BE # 쵳 => 쵳 +CD74 110E 116C 11BF # 쵴 => 쵴 +CD75 110E 116C 11C0 # 쵵 => 쵵 +CD76 110E 116C 11C1 # 쵶 => 쵶 +CD77 110E 116C 11C2 # 쵷 => 쵷 +CD78 110E 116D # 쵸 => 쵸 +CD79 110E 116D 11A8 # 쵹 => 쵹 +CD7A 110E 116D 11A9 # 쵺 => 쵺 +CD7B 110E 116D 11AA # 쵻 => 쵻 +CD7C 110E 116D 11AB # 쵼 => 쵼 +CD7D 110E 116D 11AC # 쵽 => 쵽 +CD7E 110E 116D 11AD # 쵾 => 쵾 +CD7F 110E 116D 11AE # 쵿 => 쵿 +CD80 110E 116D 11AF # 춀 => 춀 +CD81 110E 116D 11B0 # 춁 => 춁 +CD82 110E 116D 11B1 # 춂 => 춂 +CD83 110E 116D 11B2 # 춃 => 춃 +CD84 110E 116D 11B3 # 춄 => 춄 +CD85 110E 116D 11B4 # 춅 => 춅 +CD86 110E 116D 11B5 # 춆 => 춆 +CD87 110E 116D 11B6 # 춇 => 춇 +CD88 110E 116D 11B7 # 춈 => 춈 +CD89 110E 116D 11B8 # 춉 => 춉 +CD8A 110E 116D 11B9 # 춊 => 춊 +CD8B 110E 116D 11BA # 춋 => 춋 +CD8C 110E 116D 11BB # 춌 => 춌 +CD8D 110E 116D 11BC # 춍 => 춍 +CD8E 110E 116D 11BD # 춎 => 춎 +CD8F 110E 116D 11BE # 춏 => 춏 +CD90 110E 116D 11BF # 춐 => 춐 +CD91 110E 116D 11C0 # 춑 => 춑 +CD92 110E 116D 11C1 # 춒 => 춒 +CD93 110E 116D 11C2 # 춓 => 춓 +CD94 110E 116E # 추 => 추 +CD95 110E 116E 11A8 # 축 => 축 +CD96 110E 116E 11A9 # 춖 => 춖 +CD97 110E 116E 11AA # 춗 => 춗 +CD98 110E 116E 11AB # 춘 => 춘 +CD99 110E 116E 11AC # 춙 => 춙 +CD9A 110E 116E 11AD # 춚 => 춚 +CD9B 110E 116E 11AE # 춛 => 춛 +CD9C 110E 116E 11AF # 출 => 출 +CD9D 110E 116E 11B0 # 춝 => 춝 +CD9E 110E 116E 11B1 # 춞 => 춞 +CD9F 110E 116E 11B2 # 춟 => 춟 +CDA0 110E 116E 11B3 # 춠 => 춠 +CDA1 110E 116E 11B4 # 춡 => 춡 +CDA2 110E 116E 11B5 # 춢 => 춢 +CDA3 110E 116E 11B6 # 춣 => 춣 +CDA4 110E 116E 11B7 # 춤 => 춤 +CDA5 110E 116E 11B8 # 춥 => 춥 +CDA6 110E 116E 11B9 # 춦 => 춦 +CDA7 110E 116E 11BA # 춧 => 춧 +CDA8 110E 116E 11BB # 춨 => 춨 +CDA9 110E 116E 11BC # 충 => 충 +CDAA 110E 116E 11BD # 춪 => 춪 +CDAB 110E 116E 11BE # 춫 => 춫 +CDAC 110E 116E 11BF # 춬 => 춬 +CDAD 110E 116E 11C0 # 춭 => 춭 +CDAE 110E 116E 11C1 # 춮 => 춮 +CDAF 110E 116E 11C2 # 춯 => 춯 +CDB0 110E 116F # 춰 => 춰 +CDB1 110E 116F 11A8 # 춱 => 춱 +CDB2 110E 116F 11A9 # 춲 => 춲 +CDB3 110E 116F 11AA # 춳 => 춳 +CDB4 110E 116F 11AB # 춴 => 춴 +CDB5 110E 116F 11AC # 춵 => 춵 +CDB6 110E 116F 11AD # 춶 => 춶 +CDB7 110E 116F 11AE # 춷 => 춷 +CDB8 110E 116F 11AF # 춸 => 춸 +CDB9 110E 116F 11B0 # 춹 => 춹 +CDBA 110E 116F 11B1 # 춺 => 춺 +CDBB 110E 116F 11B2 # 춻 => 춻 +CDBC 110E 116F 11B3 # 춼 => 춼 +CDBD 110E 116F 11B4 # 춽 => 춽 +CDBE 110E 116F 11B5 # 춾 => 춾 +CDBF 110E 116F 11B6 # 춿 => 춿 +CDC0 110E 116F 11B7 # 췀 => 췀 +CDC1 110E 116F 11B8 # 췁 => 췁 +CDC2 110E 116F 11B9 # 췂 => 췂 +CDC3 110E 116F 11BA # 췃 => 췃 +CDC4 110E 116F 11BB # 췄 => 췄 +CDC5 110E 116F 11BC # 췅 => 췅 +CDC6 110E 116F 11BD # 췆 => 췆 +CDC7 110E 116F 11BE # 췇 => 췇 +CDC8 110E 116F 11BF # 췈 => 췈 +CDC9 110E 116F 11C0 # 췉 => 췉 +CDCA 110E 116F 11C1 # 췊 => 췊 +CDCB 110E 116F 11C2 # 췋 => 췋 +CDCC 110E 1170 # 췌 => 췌 +CDCD 110E 1170 11A8 # 췍 => 췍 +CDCE 110E 1170 11A9 # 췎 => 췎 +CDCF 110E 1170 11AA # 췏 => 췏 +CDD0 110E 1170 11AB # 췐 => 췐 +CDD1 110E 1170 11AC # 췑 => 췑 +CDD2 110E 1170 11AD # 췒 => 췒 +CDD3 110E 1170 11AE # 췓 => 췓 +CDD4 110E 1170 11AF # 췔 => 췔 +CDD5 110E 1170 11B0 # 췕 => 췕 +CDD6 110E 1170 11B1 # 췖 => 췖 +CDD7 110E 1170 11B2 # 췗 => 췗 +CDD8 110E 1170 11B3 # 췘 => 췘 +CDD9 110E 1170 11B4 # 췙 => 췙 +CDDA 110E 1170 11B5 # 췚 => 췚 +CDDB 110E 1170 11B6 # 췛 => 췛 +CDDC 110E 1170 11B7 # 췜 => 췜 +CDDD 110E 1170 11B8 # 췝 => 췝 +CDDE 110E 1170 11B9 # 췞 => 췞 +CDDF 110E 1170 11BA # 췟 => 췟 +CDE0 110E 1170 11BB # 췠 => 췠 +CDE1 110E 1170 11BC # 췡 => 췡 +CDE2 110E 1170 11BD # 췢 => 췢 +CDE3 110E 1170 11BE # 췣 => 췣 +CDE4 110E 1170 11BF # 췤 => 췤 +CDE5 110E 1170 11C0 # 췥 => 췥 +CDE6 110E 1170 11C1 # 췦 => 췦 +CDE7 110E 1170 11C2 # 췧 => 췧 +CDE8 110E 1171 # 취 => 취 +CDE9 110E 1171 11A8 # 췩 => 췩 +CDEA 110E 1171 11A9 # 췪 => 췪 +CDEB 110E 1171 11AA # 췫 => 췫 +CDEC 110E 1171 11AB # 췬 => 췬 +CDED 110E 1171 11AC # 췭 => 췭 +CDEE 110E 1171 11AD # 췮 => 췮 +CDEF 110E 1171 11AE # 췯 => 췯 +CDF0 110E 1171 11AF # 췰 => 췰 +CDF1 110E 1171 11B0 # 췱 => 췱 +CDF2 110E 1171 11B1 # 췲 => 췲 +CDF3 110E 1171 11B2 # 췳 => 췳 +CDF4 110E 1171 11B3 # 췴 => 췴 +CDF5 110E 1171 11B4 # 췵 => 췵 +CDF6 110E 1171 11B5 # 췶 => 췶 +CDF7 110E 1171 11B6 # 췷 => 췷 +CDF8 110E 1171 11B7 # 췸 => 췸 +CDF9 110E 1171 11B8 # 췹 => 췹 +CDFA 110E 1171 11B9 # 췺 => 췺 +CDFB 110E 1171 11BA # 췻 => 췻 +CDFC 110E 1171 11BB # 췼 => 췼 +CDFD 110E 1171 11BC # 췽 => 췽 +CDFE 110E 1171 11BD # 췾 => 췾 +CDFF 110E 1171 11BE # 췿 => 췿 +CE00 110E 1171 11BF # 츀 => 츀 +CE01 110E 1171 11C0 # 츁 => 츁 +CE02 110E 1171 11C1 # 츂 => 츂 +CE03 110E 1171 11C2 # 츃 => 츃 +CE04 110E 1172 # 츄 => 츄 +CE05 110E 1172 11A8 # 츅 => 츅 +CE06 110E 1172 11A9 # 츆 => 츆 +CE07 110E 1172 11AA # 츇 => 츇 +CE08 110E 1172 11AB # 츈 => 츈 +CE09 110E 1172 11AC # 츉 => 츉 +CE0A 110E 1172 11AD # 츊 => 츊 +CE0B 110E 1172 11AE # 츋 => 츋 +CE0C 110E 1172 11AF # 츌 => 츌 +CE0D 110E 1172 11B0 # 츍 => 츍 +CE0E 110E 1172 11B1 # 츎 => 츎 +CE0F 110E 1172 11B2 # 츏 => 츏 +CE10 110E 1172 11B3 # 츐 => 츐 +CE11 110E 1172 11B4 # 츑 => 츑 +CE12 110E 1172 11B5 # 츒 => 츒 +CE13 110E 1172 11B6 # 츓 => 츓 +CE14 110E 1172 11B7 # 츔 => 츔 +CE15 110E 1172 11B8 # 츕 => 츕 +CE16 110E 1172 11B9 # 츖 => 츖 +CE17 110E 1172 11BA # 츗 => 츗 +CE18 110E 1172 11BB # 츘 => 츘 +CE19 110E 1172 11BC # 츙 => 츙 +CE1A 110E 1172 11BD # 츚 => 츚 +CE1B 110E 1172 11BE # 츛 => 츛 +CE1C 110E 1172 11BF # 츜 => 츜 +CE1D 110E 1172 11C0 # 츝 => 츝 +CE1E 110E 1172 11C1 # 츞 => 츞 +CE1F 110E 1172 11C2 # 츟 => 츟 +CE20 110E 1173 # 츠 => 츠 +CE21 110E 1173 11A8 # 측 => 측 +CE22 110E 1173 11A9 # 츢 => 츢 +CE23 110E 1173 11AA # 츣 => 츣 +CE24 110E 1173 11AB # 츤 => 츤 +CE25 110E 1173 11AC # 츥 => 츥 +CE26 110E 1173 11AD # 츦 => 츦 +CE27 110E 1173 11AE # 츧 => 츧 +CE28 110E 1173 11AF # 츨 => 츨 +CE29 110E 1173 11B0 # 츩 => 츩 +CE2A 110E 1173 11B1 # 츪 => 츪 +CE2B 110E 1173 11B2 # 츫 => 츫 +CE2C 110E 1173 11B3 # 츬 => 츬 +CE2D 110E 1173 11B4 # 츭 => 츭 +CE2E 110E 1173 11B5 # 츮 => 츮 +CE2F 110E 1173 11B6 # 츯 => 츯 +CE30 110E 1173 11B7 # 츰 => 츰 +CE31 110E 1173 11B8 # 츱 => 츱 +CE32 110E 1173 11B9 # 츲 => 츲 +CE33 110E 1173 11BA # 츳 => 츳 +CE34 110E 1173 11BB # 츴 => 츴 +CE35 110E 1173 11BC # 층 => 층 +CE36 110E 1173 11BD # 츶 => 츶 +CE37 110E 1173 11BE # 츷 => 츷 +CE38 110E 1173 11BF # 츸 => 츸 +CE39 110E 1173 11C0 # 츹 => 츹 +CE3A 110E 1173 11C1 # 츺 => 츺 +CE3B 110E 1173 11C2 # 츻 => 츻 +CE3C 110E 1174 # 츼 => 츼 +CE3D 110E 1174 11A8 # 츽 => 츽 +CE3E 110E 1174 11A9 # 츾 => 츾 +CE3F 110E 1174 11AA # 츿 => 츿 +CE40 110E 1174 11AB # 칀 => 칀 +CE41 110E 1174 11AC # 칁 => 칁 +CE42 110E 1174 11AD # 칂 => 칂 +CE43 110E 1174 11AE # 칃 => 칃 +CE44 110E 1174 11AF # 칄 => 칄 +CE45 110E 1174 11B0 # 칅 => 칅 +CE46 110E 1174 11B1 # 칆 => 칆 +CE47 110E 1174 11B2 # 칇 => 칇 +CE48 110E 1174 11B3 # 칈 => 칈 +CE49 110E 1174 11B4 # 칉 => 칉 +CE4A 110E 1174 11B5 # 칊 => 칊 +CE4B 110E 1174 11B6 # 칋 => 칋 +CE4C 110E 1174 11B7 # 칌 => 칌 +CE4D 110E 1174 11B8 # 칍 => 칍 +CE4E 110E 1174 11B9 # 칎 => 칎 +CE4F 110E 1174 11BA # 칏 => 칏 +CE50 110E 1174 11BB # 칐 => 칐 +CE51 110E 1174 11BC # 칑 => 칑 +CE52 110E 1174 11BD # 칒 => 칒 +CE53 110E 1174 11BE # 칓 => 칓 +CE54 110E 1174 11BF # 칔 => 칔 +CE55 110E 1174 11C0 # 칕 => 칕 +CE56 110E 1174 11C1 # 칖 => 칖 +CE57 110E 1174 11C2 # 칗 => 칗 +CE58 110E 1175 # 치 => 치 +CE59 110E 1175 11A8 # 칙 => 칙 +CE5A 110E 1175 11A9 # 칚 => 칚 +CE5B 110E 1175 11AA # 칛 => 칛 +CE5C 110E 1175 11AB # 친 => 친 +CE5D 110E 1175 11AC # 칝 => 칝 +CE5E 110E 1175 11AD # 칞 => 칞 +CE5F 110E 1175 11AE # 칟 => 칟 +CE60 110E 1175 11AF # 칠 => 칠 +CE61 110E 1175 11B0 # 칡 => 칡 +CE62 110E 1175 11B1 # 칢 => 칢 +CE63 110E 1175 11B2 # 칣 => 칣 +CE64 110E 1175 11B3 # 칤 => 칤 +CE65 110E 1175 11B4 # 칥 => 칥 +CE66 110E 1175 11B5 # 칦 => 칦 +CE67 110E 1175 11B6 # 칧 => 칧 +CE68 110E 1175 11B7 # 침 => 침 +CE69 110E 1175 11B8 # 칩 => 칩 +CE6A 110E 1175 11B9 # 칪 => 칪 +CE6B 110E 1175 11BA # 칫 => 칫 +CE6C 110E 1175 11BB # 칬 => 칬 +CE6D 110E 1175 11BC # 칭 => 칭 +CE6E 110E 1175 11BD # 칮 => 칮 +CE6F 110E 1175 11BE # 칯 => 칯 +CE70 110E 1175 11BF # 칰 => 칰 +CE71 110E 1175 11C0 # 칱 => 칱 +CE72 110E 1175 11C1 # 칲 => 칲 +CE73 110E 1175 11C2 # 칳 => 칳 +CE74 110F 1161 # 카 => 카 +CE75 110F 1161 11A8 # 칵 => 칵 +CE76 110F 1161 11A9 # 칶 => 칶 +CE77 110F 1161 11AA # 칷 => 칷 +CE78 110F 1161 11AB # 칸 => 칸 +CE79 110F 1161 11AC # 칹 => 칹 +CE7A 110F 1161 11AD # 칺 => 칺 +CE7B 110F 1161 11AE # 칻 => 칻 +CE7C 110F 1161 11AF # 칼 => 칼 +CE7D 110F 1161 11B0 # 칽 => 칽 +CE7E 110F 1161 11B1 # 칾 => 칾 +CE7F 110F 1161 11B2 # 칿 => 칿 +CE80 110F 1161 11B3 # 캀 => 캀 +CE81 110F 1161 11B4 # 캁 => 캁 +CE82 110F 1161 11B5 # 캂 => 캂 +CE83 110F 1161 11B6 # 캃 => 캃 +CE84 110F 1161 11B7 # 캄 => 캄 +CE85 110F 1161 11B8 # 캅 => 캅 +CE86 110F 1161 11B9 # 캆 => 캆 +CE87 110F 1161 11BA # 캇 => 캇 +CE88 110F 1161 11BB # 캈 => 캈 +CE89 110F 1161 11BC # 캉 => 캉 +CE8A 110F 1161 11BD # 캊 => 캊 +CE8B 110F 1161 11BE # 캋 => 캋 +CE8C 110F 1161 11BF # 캌 => 캌 +CE8D 110F 1161 11C0 # 캍 => 캍 +CE8E 110F 1161 11C1 # 캎 => 캎 +CE8F 110F 1161 11C2 # 캏 => 캏 +CE90 110F 1162 # 캐 => 캐 +CE91 110F 1162 11A8 # 캑 => 캑 +CE92 110F 1162 11A9 # 캒 => 캒 +CE93 110F 1162 11AA # 캓 => 캓 +CE94 110F 1162 11AB # 캔 => 캔 +CE95 110F 1162 11AC # 캕 => 캕 +CE96 110F 1162 11AD # 캖 => 캖 +CE97 110F 1162 11AE # 캗 => 캗 +CE98 110F 1162 11AF # 캘 => 캘 +CE99 110F 1162 11B0 # 캙 => 캙 +CE9A 110F 1162 11B1 # 캚 => 캚 +CE9B 110F 1162 11B2 # 캛 => 캛 +CE9C 110F 1162 11B3 # 캜 => 캜 +CE9D 110F 1162 11B4 # 캝 => 캝 +CE9E 110F 1162 11B5 # 캞 => 캞 +CE9F 110F 1162 11B6 # 캟 => 캟 +CEA0 110F 1162 11B7 # 캠 => 캠 +CEA1 110F 1162 11B8 # 캡 => 캡 +CEA2 110F 1162 11B9 # 캢 => 캢 +CEA3 110F 1162 11BA # 캣 => 캣 +CEA4 110F 1162 11BB # 캤 => 캤 +CEA5 110F 1162 11BC # 캥 => 캥 +CEA6 110F 1162 11BD # 캦 => 캦 +CEA7 110F 1162 11BE # 캧 => 캧 +CEA8 110F 1162 11BF # 캨 => 캨 +CEA9 110F 1162 11C0 # 캩 => 캩 +CEAA 110F 1162 11C1 # 캪 => 캪 +CEAB 110F 1162 11C2 # 캫 => 캫 +CEAC 110F 1163 # 캬 => 캬 +CEAD 110F 1163 11A8 # 캭 => 캭 +CEAE 110F 1163 11A9 # 캮 => 캮 +CEAF 110F 1163 11AA # 캯 => 캯 +CEB0 110F 1163 11AB # 캰 => 캰 +CEB1 110F 1163 11AC # 캱 => 캱 +CEB2 110F 1163 11AD # 캲 => 캲 +CEB3 110F 1163 11AE # 캳 => 캳 +CEB4 110F 1163 11AF # 캴 => 캴 +CEB5 110F 1163 11B0 # 캵 => 캵 +CEB6 110F 1163 11B1 # 캶 => 캶 +CEB7 110F 1163 11B2 # 캷 => 캷 +CEB8 110F 1163 11B3 # 캸 => 캸 +CEB9 110F 1163 11B4 # 캹 => 캹 +CEBA 110F 1163 11B5 # 캺 => 캺 +CEBB 110F 1163 11B6 # 캻 => 캻 +CEBC 110F 1163 11B7 # 캼 => 캼 +CEBD 110F 1163 11B8 # 캽 => 캽 +CEBE 110F 1163 11B9 # 캾 => 캾 +CEBF 110F 1163 11BA # 캿 => 캿 +CEC0 110F 1163 11BB # 컀 => 컀 +CEC1 110F 1163 11BC # 컁 => 컁 +CEC2 110F 1163 11BD # 컂 => 컂 +CEC3 110F 1163 11BE # 컃 => 컃 +CEC4 110F 1163 11BF # 컄 => 컄 +CEC5 110F 1163 11C0 # 컅 => 컅 +CEC6 110F 1163 11C1 # 컆 => 컆 +CEC7 110F 1163 11C2 # 컇 => 컇 +CEC8 110F 1164 # 컈 => 컈 +CEC9 110F 1164 11A8 # 컉 => 컉 +CECA 110F 1164 11A9 # 컊 => 컊 +CECB 110F 1164 11AA # 컋 => 컋 +CECC 110F 1164 11AB # 컌 => 컌 +CECD 110F 1164 11AC # 컍 => 컍 +CECE 110F 1164 11AD # 컎 => 컎 +CECF 110F 1164 11AE # 컏 => 컏 +CED0 110F 1164 11AF # 컐 => 컐 +CED1 110F 1164 11B0 # 컑 => 컑 +CED2 110F 1164 11B1 # 컒 => 컒 +CED3 110F 1164 11B2 # 컓 => 컓 +CED4 110F 1164 11B3 # 컔 => 컔 +CED5 110F 1164 11B4 # 컕 => 컕 +CED6 110F 1164 11B5 # 컖 => 컖 +CED7 110F 1164 11B6 # 컗 => 컗 +CED8 110F 1164 11B7 # 컘 => 컘 +CED9 110F 1164 11B8 # 컙 => 컙 +CEDA 110F 1164 11B9 # 컚 => 컚 +CEDB 110F 1164 11BA # 컛 => 컛 +CEDC 110F 1164 11BB # 컜 => 컜 +CEDD 110F 1164 11BC # 컝 => 컝 +CEDE 110F 1164 11BD # 컞 => 컞 +CEDF 110F 1164 11BE # 컟 => 컟 +CEE0 110F 1164 11BF # 컠 => 컠 +CEE1 110F 1164 11C0 # 컡 => 컡 +CEE2 110F 1164 11C1 # 컢 => 컢 +CEE3 110F 1164 11C2 # 컣 => 컣 +CEE4 110F 1165 # 커 => 커 +CEE5 110F 1165 11A8 # 컥 => 컥 +CEE6 110F 1165 11A9 # 컦 => 컦 +CEE7 110F 1165 11AA # 컧 => 컧 +CEE8 110F 1165 11AB # 컨 => 컨 +CEE9 110F 1165 11AC # 컩 => 컩 +CEEA 110F 1165 11AD # 컪 => 컪 +CEEB 110F 1165 11AE # 컫 => 컫 +CEEC 110F 1165 11AF # 컬 => 컬 +CEED 110F 1165 11B0 # 컭 => 컭 +CEEE 110F 1165 11B1 # 컮 => 컮 +CEEF 110F 1165 11B2 # 컯 => 컯 +CEF0 110F 1165 11B3 # 컰 => 컰 +CEF1 110F 1165 11B4 # 컱 => 컱 +CEF2 110F 1165 11B5 # 컲 => 컲 +CEF3 110F 1165 11B6 # 컳 => 컳 +CEF4 110F 1165 11B7 # 컴 => 컴 +CEF5 110F 1165 11B8 # 컵 => 컵 +CEF6 110F 1165 11B9 # 컶 => 컶 +CEF7 110F 1165 11BA # 컷 => 컷 +CEF8 110F 1165 11BB # 컸 => 컸 +CEF9 110F 1165 11BC # 컹 => 컹 +CEFA 110F 1165 11BD # 컺 => 컺 +CEFB 110F 1165 11BE # 컻 => 컻 +CEFC 110F 1165 11BF # 컼 => 컼 +CEFD 110F 1165 11C0 # 컽 => 컽 +CEFE 110F 1165 11C1 # 컾 => 컾 +CEFF 110F 1165 11C2 # 컿 => 컿 +CF00 110F 1166 # 케 => 케 +CF01 110F 1166 11A8 # 켁 => 켁 +CF02 110F 1166 11A9 # 켂 => 켂 +CF03 110F 1166 11AA # 켃 => 켃 +CF04 110F 1166 11AB # 켄 => 켄 +CF05 110F 1166 11AC # 켅 => 켅 +CF06 110F 1166 11AD # 켆 => 켆 +CF07 110F 1166 11AE # 켇 => 켇 +CF08 110F 1166 11AF # 켈 => 켈 +CF09 110F 1166 11B0 # 켉 => 켉 +CF0A 110F 1166 11B1 # 켊 => 켊 +CF0B 110F 1166 11B2 # 켋 => 켋 +CF0C 110F 1166 11B3 # 켌 => 켌 +CF0D 110F 1166 11B4 # 켍 => 켍 +CF0E 110F 1166 11B5 # 켎 => 켎 +CF0F 110F 1166 11B6 # 켏 => 켏 +CF10 110F 1166 11B7 # 켐 => 켐 +CF11 110F 1166 11B8 # 켑 => 켑 +CF12 110F 1166 11B9 # 켒 => 켒 +CF13 110F 1166 11BA # 켓 => 켓 +CF14 110F 1166 11BB # 켔 => 켔 +CF15 110F 1166 11BC # 켕 => 켕 +CF16 110F 1166 11BD # 켖 => 켖 +CF17 110F 1166 11BE # 켗 => 켗 +CF18 110F 1166 11BF # 켘 => 켘 +CF19 110F 1166 11C0 # 켙 => 켙 +CF1A 110F 1166 11C1 # 켚 => 켚 +CF1B 110F 1166 11C2 # 켛 => 켛 +CF1C 110F 1167 # 켜 => 켜 +CF1D 110F 1167 11A8 # 켝 => 켝 +CF1E 110F 1167 11A9 # 켞 => 켞 +CF1F 110F 1167 11AA # 켟 => 켟 +CF20 110F 1167 11AB # 켠 => 켠 +CF21 110F 1167 11AC # 켡 => 켡 +CF22 110F 1167 11AD # 켢 => 켢 +CF23 110F 1167 11AE # 켣 => 켣 +CF24 110F 1167 11AF # 켤 => 켤 +CF25 110F 1167 11B0 # 켥 => 켥 +CF26 110F 1167 11B1 # 켦 => 켦 +CF27 110F 1167 11B2 # 켧 => 켧 +CF28 110F 1167 11B3 # 켨 => 켨 +CF29 110F 1167 11B4 # 켩 => 켩 +CF2A 110F 1167 11B5 # 켪 => 켪 +CF2B 110F 1167 11B6 # 켫 => 켫 +CF2C 110F 1167 11B7 # 켬 => 켬 +CF2D 110F 1167 11B8 # 켭 => 켭 +CF2E 110F 1167 11B9 # 켮 => 켮 +CF2F 110F 1167 11BA # 켯 => 켯 +CF30 110F 1167 11BB # 켰 => 켰 +CF31 110F 1167 11BC # 켱 => 켱 +CF32 110F 1167 11BD # 켲 => 켲 +CF33 110F 1167 11BE # 켳 => 켳 +CF34 110F 1167 11BF # 켴 => 켴 +CF35 110F 1167 11C0 # 켵 => 켵 +CF36 110F 1167 11C1 # 켶 => 켶 +CF37 110F 1167 11C2 # 켷 => 켷 +CF38 110F 1168 # 켸 => 켸 +CF39 110F 1168 11A8 # 켹 => 켹 +CF3A 110F 1168 11A9 # 켺 => 켺 +CF3B 110F 1168 11AA # 켻 => 켻 +CF3C 110F 1168 11AB # 켼 => 켼 +CF3D 110F 1168 11AC # 켽 => 켽 +CF3E 110F 1168 11AD # 켾 => 켾 +CF3F 110F 1168 11AE # 켿 => 켿 +CF40 110F 1168 11AF # 콀 => 콀 +CF41 110F 1168 11B0 # 콁 => 콁 +CF42 110F 1168 11B1 # 콂 => 콂 +CF43 110F 1168 11B2 # 콃 => 콃 +CF44 110F 1168 11B3 # 콄 => 콄 +CF45 110F 1168 11B4 # 콅 => 콅 +CF46 110F 1168 11B5 # 콆 => 콆 +CF47 110F 1168 11B6 # 콇 => 콇 +CF48 110F 1168 11B7 # 콈 => 콈 +CF49 110F 1168 11B8 # 콉 => 콉 +CF4A 110F 1168 11B9 # 콊 => 콊 +CF4B 110F 1168 11BA # 콋 => 콋 +CF4C 110F 1168 11BB # 콌 => 콌 +CF4D 110F 1168 11BC # 콍 => 콍 +CF4E 110F 1168 11BD # 콎 => 콎 +CF4F 110F 1168 11BE # 콏 => 콏 +CF50 110F 1168 11BF # 콐 => 콐 +CF51 110F 1168 11C0 # 콑 => 콑 +CF52 110F 1168 11C1 # 콒 => 콒 +CF53 110F 1168 11C2 # 콓 => 콓 +CF54 110F 1169 # 코 => 코 +CF55 110F 1169 11A8 # 콕 => 콕 +CF56 110F 1169 11A9 # 콖 => 콖 +CF57 110F 1169 11AA # 콗 => 콗 +CF58 110F 1169 11AB # 콘 => 콘 +CF59 110F 1169 11AC # 콙 => 콙 +CF5A 110F 1169 11AD # 콚 => 콚 +CF5B 110F 1169 11AE # 콛 => 콛 +CF5C 110F 1169 11AF # 콜 => 콜 +CF5D 110F 1169 11B0 # 콝 => 콝 +CF5E 110F 1169 11B1 # 콞 => 콞 +CF5F 110F 1169 11B2 # 콟 => 콟 +CF60 110F 1169 11B3 # 콠 => 콠 +CF61 110F 1169 11B4 # 콡 => 콡 +CF62 110F 1169 11B5 # 콢 => 콢 +CF63 110F 1169 11B6 # 콣 => 콣 +CF64 110F 1169 11B7 # 콤 => 콤 +CF65 110F 1169 11B8 # 콥 => 콥 +CF66 110F 1169 11B9 # 콦 => 콦 +CF67 110F 1169 11BA # 콧 => 콧 +CF68 110F 1169 11BB # 콨 => 콨 +CF69 110F 1169 11BC # 콩 => 콩 +CF6A 110F 1169 11BD # 콪 => 콪 +CF6B 110F 1169 11BE # 콫 => 콫 +CF6C 110F 1169 11BF # 콬 => 콬 +CF6D 110F 1169 11C0 # 콭 => 콭 +CF6E 110F 1169 11C1 # 콮 => 콮 +CF6F 110F 1169 11C2 # 콯 => 콯 +CF70 110F 116A # 콰 => 콰 +CF71 110F 116A 11A8 # 콱 => 콱 +CF72 110F 116A 11A9 # 콲 => 콲 +CF73 110F 116A 11AA # 콳 => 콳 +CF74 110F 116A 11AB # 콴 => 콴 +CF75 110F 116A 11AC # 콵 => 콵 +CF76 110F 116A 11AD # 콶 => 콶 +CF77 110F 116A 11AE # 콷 => 콷 +CF78 110F 116A 11AF # 콸 => 콸 +CF79 110F 116A 11B0 # 콹 => 콹 +CF7A 110F 116A 11B1 # 콺 => 콺 +CF7B 110F 116A 11B2 # 콻 => 콻 +CF7C 110F 116A 11B3 # 콼 => 콼 +CF7D 110F 116A 11B4 # 콽 => 콽 +CF7E 110F 116A 11B5 # 콾 => 콾 +CF7F 110F 116A 11B6 # 콿 => 콿 +CF80 110F 116A 11B7 # 쾀 => 쾀 +CF81 110F 116A 11B8 # 쾁 => 쾁 +CF82 110F 116A 11B9 # 쾂 => 쾂 +CF83 110F 116A 11BA # 쾃 => 쾃 +CF84 110F 116A 11BB # 쾄 => 쾄 +CF85 110F 116A 11BC # 쾅 => 쾅 +CF86 110F 116A 11BD # 쾆 => 쾆 +CF87 110F 116A 11BE # 쾇 => 쾇 +CF88 110F 116A 11BF # 쾈 => 쾈 +CF89 110F 116A 11C0 # 쾉 => 쾉 +CF8A 110F 116A 11C1 # 쾊 => 쾊 +CF8B 110F 116A 11C2 # 쾋 => 쾋 +CF8C 110F 116B # 쾌 => 쾌 +CF8D 110F 116B 11A8 # 쾍 => 쾍 +CF8E 110F 116B 11A9 # 쾎 => 쾎 +CF8F 110F 116B 11AA # 쾏 => 쾏 +CF90 110F 116B 11AB # 쾐 => 쾐 +CF91 110F 116B 11AC # 쾑 => 쾑 +CF92 110F 116B 11AD # 쾒 => 쾒 +CF93 110F 116B 11AE # 쾓 => 쾓 +CF94 110F 116B 11AF # 쾔 => 쾔 +CF95 110F 116B 11B0 # 쾕 => 쾕 +CF96 110F 116B 11B1 # 쾖 => 쾖 +CF97 110F 116B 11B2 # 쾗 => 쾗 +CF98 110F 116B 11B3 # 쾘 => 쾘 +CF99 110F 116B 11B4 # 쾙 => 쾙 +CF9A 110F 116B 11B5 # 쾚 => 쾚 +CF9B 110F 116B 11B6 # 쾛 => 쾛 +CF9C 110F 116B 11B7 # 쾜 => 쾜 +CF9D 110F 116B 11B8 # 쾝 => 쾝 +CF9E 110F 116B 11B9 # 쾞 => 쾞 +CF9F 110F 116B 11BA # 쾟 => 쾟 +CFA0 110F 116B 11BB # 쾠 => 쾠 +CFA1 110F 116B 11BC # 쾡 => 쾡 +CFA2 110F 116B 11BD # 쾢 => 쾢 +CFA3 110F 116B 11BE # 쾣 => 쾣 +CFA4 110F 116B 11BF # 쾤 => 쾤 +CFA5 110F 116B 11C0 # 쾥 => 쾥 +CFA6 110F 116B 11C1 # 쾦 => 쾦 +CFA7 110F 116B 11C2 # 쾧 => 쾧 +CFA8 110F 116C # 쾨 => 쾨 +CFA9 110F 116C 11A8 # 쾩 => 쾩 +CFAA 110F 116C 11A9 # 쾪 => 쾪 +CFAB 110F 116C 11AA # 쾫 => 쾫 +CFAC 110F 116C 11AB # 쾬 => 쾬 +CFAD 110F 116C 11AC # 쾭 => 쾭 +CFAE 110F 116C 11AD # 쾮 => 쾮 +CFAF 110F 116C 11AE # 쾯 => 쾯 +CFB0 110F 116C 11AF # 쾰 => 쾰 +CFB1 110F 116C 11B0 # 쾱 => 쾱 +CFB2 110F 116C 11B1 # 쾲 => 쾲 +CFB3 110F 116C 11B2 # 쾳 => 쾳 +CFB4 110F 116C 11B3 # 쾴 => 쾴 +CFB5 110F 116C 11B4 # 쾵 => 쾵 +CFB6 110F 116C 11B5 # 쾶 => 쾶 +CFB7 110F 116C 11B6 # 쾷 => 쾷 +CFB8 110F 116C 11B7 # 쾸 => 쾸 +CFB9 110F 116C 11B8 # 쾹 => 쾹 +CFBA 110F 116C 11B9 # 쾺 => 쾺 +CFBB 110F 116C 11BA # 쾻 => 쾻 +CFBC 110F 116C 11BB # 쾼 => 쾼 +CFBD 110F 116C 11BC # 쾽 => 쾽 +CFBE 110F 116C 11BD # 쾾 => 쾾 +CFBF 110F 116C 11BE # 쾿 => 쾿 +CFC0 110F 116C 11BF # 쿀 => 쿀 +CFC1 110F 116C 11C0 # 쿁 => 쿁 +CFC2 110F 116C 11C1 # 쿂 => 쿂 +CFC3 110F 116C 11C2 # 쿃 => 쿃 +CFC4 110F 116D # 쿄 => 쿄 +CFC5 110F 116D 11A8 # 쿅 => 쿅 +CFC6 110F 116D 11A9 # 쿆 => 쿆 +CFC7 110F 116D 11AA # 쿇 => 쿇 +CFC8 110F 116D 11AB # 쿈 => 쿈 +CFC9 110F 116D 11AC # 쿉 => 쿉 +CFCA 110F 116D 11AD # 쿊 => 쿊 +CFCB 110F 116D 11AE # 쿋 => 쿋 +CFCC 110F 116D 11AF # 쿌 => 쿌 +CFCD 110F 116D 11B0 # 쿍 => 쿍 +CFCE 110F 116D 11B1 # 쿎 => 쿎 +CFCF 110F 116D 11B2 # 쿏 => 쿏 +CFD0 110F 116D 11B3 # 쿐 => 쿐 +CFD1 110F 116D 11B4 # 쿑 => 쿑 +CFD2 110F 116D 11B5 # 쿒 => 쿒 +CFD3 110F 116D 11B6 # 쿓 => 쿓 +CFD4 110F 116D 11B7 # 쿔 => 쿔 +CFD5 110F 116D 11B8 # 쿕 => 쿕 +CFD6 110F 116D 11B9 # 쿖 => 쿖 +CFD7 110F 116D 11BA # 쿗 => 쿗 +CFD8 110F 116D 11BB # 쿘 => 쿘 +CFD9 110F 116D 11BC # 쿙 => 쿙 +CFDA 110F 116D 11BD # 쿚 => 쿚 +CFDB 110F 116D 11BE # 쿛 => 쿛 +CFDC 110F 116D 11BF # 쿜 => 쿜 +CFDD 110F 116D 11C0 # 쿝 => 쿝 +CFDE 110F 116D 11C1 # 쿞 => 쿞 +CFDF 110F 116D 11C2 # 쿟 => 쿟 +CFE0 110F 116E # 쿠 => 쿠 +CFE1 110F 116E 11A8 # 쿡 => 쿡 +CFE2 110F 116E 11A9 # 쿢 => 쿢 +CFE3 110F 116E 11AA # 쿣 => 쿣 +CFE4 110F 116E 11AB # 쿤 => 쿤 +CFE5 110F 116E 11AC # 쿥 => 쿥 +CFE6 110F 116E 11AD # 쿦 => 쿦 +CFE7 110F 116E 11AE # 쿧 => 쿧 +CFE8 110F 116E 11AF # 쿨 => 쿨 +CFE9 110F 116E 11B0 # 쿩 => 쿩 +CFEA 110F 116E 11B1 # 쿪 => 쿪 +CFEB 110F 116E 11B2 # 쿫 => 쿫 +CFEC 110F 116E 11B3 # 쿬 => 쿬 +CFED 110F 116E 11B4 # 쿭 => 쿭 +CFEE 110F 116E 11B5 # 쿮 => 쿮 +CFEF 110F 116E 11B6 # 쿯 => 쿯 +CFF0 110F 116E 11B7 # 쿰 => 쿰 +CFF1 110F 116E 11B8 # 쿱 => 쿱 +CFF2 110F 116E 11B9 # 쿲 => 쿲 +CFF3 110F 116E 11BA # 쿳 => 쿳 +CFF4 110F 116E 11BB # 쿴 => 쿴 +CFF5 110F 116E 11BC # 쿵 => 쿵 +CFF6 110F 116E 11BD # 쿶 => 쿶 +CFF7 110F 116E 11BE # 쿷 => 쿷 +CFF8 110F 116E 11BF # 쿸 => 쿸 +CFF9 110F 116E 11C0 # 쿹 => 쿹 +CFFA 110F 116E 11C1 # 쿺 => 쿺 +CFFB 110F 116E 11C2 # 쿻 => 쿻 +CFFC 110F 116F # 쿼 => 쿼 +CFFD 110F 116F 11A8 # 쿽 => 쿽 +CFFE 110F 116F 11A9 # 쿾 => 쿾 +CFFF 110F 116F 11AA # 쿿 => 쿿 +D000 110F 116F 11AB # 퀀 => 퀀 +D001 110F 116F 11AC # 퀁 => 퀁 +D002 110F 116F 11AD # 퀂 => 퀂 +D003 110F 116F 11AE # 퀃 => 퀃 +D004 110F 116F 11AF # 퀄 => 퀄 +D005 110F 116F 11B0 # 퀅 => 퀅 +D006 110F 116F 11B1 # 퀆 => 퀆 +D007 110F 116F 11B2 # 퀇 => 퀇 +D008 110F 116F 11B3 # 퀈 => 퀈 +D009 110F 116F 11B4 # 퀉 => 퀉 +D00A 110F 116F 11B5 # 퀊 => 퀊 +D00B 110F 116F 11B6 # 퀋 => 퀋 +D00C 110F 116F 11B7 # 퀌 => 퀌 +D00D 110F 116F 11B8 # 퀍 => 퀍 +D00E 110F 116F 11B9 # 퀎 => 퀎 +D00F 110F 116F 11BA # 퀏 => 퀏 +D010 110F 116F 11BB # 퀐 => 퀐 +D011 110F 116F 11BC # 퀑 => 퀑 +D012 110F 116F 11BD # 퀒 => 퀒 +D013 110F 116F 11BE # 퀓 => 퀓 +D014 110F 116F 11BF # 퀔 => 퀔 +D015 110F 116F 11C0 # 퀕 => 퀕 +D016 110F 116F 11C1 # 퀖 => 퀖 +D017 110F 116F 11C2 # 퀗 => 퀗 +D018 110F 1170 # 퀘 => 퀘 +D019 110F 1170 11A8 # 퀙 => 퀙 +D01A 110F 1170 11A9 # 퀚 => 퀚 +D01B 110F 1170 11AA # 퀛 => 퀛 +D01C 110F 1170 11AB # 퀜 => 퀜 +D01D 110F 1170 11AC # 퀝 => 퀝 +D01E 110F 1170 11AD # 퀞 => 퀞 +D01F 110F 1170 11AE # 퀟 => 퀟 +D020 110F 1170 11AF # 퀠 => 퀠 +D021 110F 1170 11B0 # 퀡 => 퀡 +D022 110F 1170 11B1 # 퀢 => 퀢 +D023 110F 1170 11B2 # 퀣 => 퀣 +D024 110F 1170 11B3 # 퀤 => 퀤 +D025 110F 1170 11B4 # 퀥 => 퀥 +D026 110F 1170 11B5 # 퀦 => 퀦 +D027 110F 1170 11B6 # 퀧 => 퀧 +D028 110F 1170 11B7 # 퀨 => 퀨 +D029 110F 1170 11B8 # 퀩 => 퀩 +D02A 110F 1170 11B9 # 퀪 => 퀪 +D02B 110F 1170 11BA # 퀫 => 퀫 +D02C 110F 1170 11BB # 퀬 => 퀬 +D02D 110F 1170 11BC # 퀭 => 퀭 +D02E 110F 1170 11BD # 퀮 => 퀮 +D02F 110F 1170 11BE # 퀯 => 퀯 +D030 110F 1170 11BF # 퀰 => 퀰 +D031 110F 1170 11C0 # 퀱 => 퀱 +D032 110F 1170 11C1 # 퀲 => 퀲 +D033 110F 1170 11C2 # 퀳 => 퀳 +D034 110F 1171 # 퀴 => 퀴 +D035 110F 1171 11A8 # 퀵 => 퀵 +D036 110F 1171 11A9 # 퀶 => 퀶 +D037 110F 1171 11AA # 퀷 => 퀷 +D038 110F 1171 11AB # 퀸 => 퀸 +D039 110F 1171 11AC # 퀹 => 퀹 +D03A 110F 1171 11AD # 퀺 => 퀺 +D03B 110F 1171 11AE # 퀻 => 퀻 +D03C 110F 1171 11AF # 퀼 => 퀼 +D03D 110F 1171 11B0 # 퀽 => 퀽 +D03E 110F 1171 11B1 # 퀾 => 퀾 +D03F 110F 1171 11B2 # 퀿 => 퀿 +D040 110F 1171 11B3 # 큀 => 큀 +D041 110F 1171 11B4 # 큁 => 큁 +D042 110F 1171 11B5 # 큂 => 큂 +D043 110F 1171 11B6 # 큃 => 큃 +D044 110F 1171 11B7 # 큄 => 큄 +D045 110F 1171 11B8 # 큅 => 큅 +D046 110F 1171 11B9 # 큆 => 큆 +D047 110F 1171 11BA # 큇 => 큇 +D048 110F 1171 11BB # 큈 => 큈 +D049 110F 1171 11BC # 큉 => 큉 +D04A 110F 1171 11BD # 큊 => 큊 +D04B 110F 1171 11BE # 큋 => 큋 +D04C 110F 1171 11BF # 큌 => 큌 +D04D 110F 1171 11C0 # 큍 => 큍 +D04E 110F 1171 11C1 # 큎 => 큎 +D04F 110F 1171 11C2 # 큏 => 큏 +D050 110F 1172 # 큐 => 큐 +D051 110F 1172 11A8 # 큑 => 큑 +D052 110F 1172 11A9 # 큒 => 큒 +D053 110F 1172 11AA # 큓 => 큓 +D054 110F 1172 11AB # 큔 => 큔 +D055 110F 1172 11AC # 큕 => 큕 +D056 110F 1172 11AD # 큖 => 큖 +D057 110F 1172 11AE # 큗 => 큗 +D058 110F 1172 11AF # 큘 => 큘 +D059 110F 1172 11B0 # 큙 => 큙 +D05A 110F 1172 11B1 # 큚 => 큚 +D05B 110F 1172 11B2 # 큛 => 큛 +D05C 110F 1172 11B3 # 큜 => 큜 +D05D 110F 1172 11B4 # 큝 => 큝 +D05E 110F 1172 11B5 # 큞 => 큞 +D05F 110F 1172 11B6 # 큟 => 큟 +D060 110F 1172 11B7 # 큠 => 큠 +D061 110F 1172 11B8 # 큡 => 큡 +D062 110F 1172 11B9 # 큢 => 큢 +D063 110F 1172 11BA # 큣 => 큣 +D064 110F 1172 11BB # 큤 => 큤 +D065 110F 1172 11BC # 큥 => 큥 +D066 110F 1172 11BD # 큦 => 큦 +D067 110F 1172 11BE # 큧 => 큧 +D068 110F 1172 11BF # 큨 => 큨 +D069 110F 1172 11C0 # 큩 => 큩 +D06A 110F 1172 11C1 # 큪 => 큪 +D06B 110F 1172 11C2 # 큫 => 큫 +D06C 110F 1173 # 크 => 크 +D06D 110F 1173 11A8 # 큭 => 큭 +D06E 110F 1173 11A9 # 큮 => 큮 +D06F 110F 1173 11AA # 큯 => 큯 +D070 110F 1173 11AB # 큰 => 큰 +D071 110F 1173 11AC # 큱 => 큱 +D072 110F 1173 11AD # 큲 => 큲 +D073 110F 1173 11AE # 큳 => 큳 +D074 110F 1173 11AF # 클 => 클 +D075 110F 1173 11B0 # 큵 => 큵 +D076 110F 1173 11B1 # 큶 => 큶 +D077 110F 1173 11B2 # 큷 => 큷 +D078 110F 1173 11B3 # 큸 => 큸 +D079 110F 1173 11B4 # 큹 => 큹 +D07A 110F 1173 11B5 # 큺 => 큺 +D07B 110F 1173 11B6 # 큻 => 큻 +D07C 110F 1173 11B7 # 큼 => 큼 +D07D 110F 1173 11B8 # 큽 => 큽 +D07E 110F 1173 11B9 # 큾 => 큾 +D07F 110F 1173 11BA # 큿 => 큿 +D080 110F 1173 11BB # 킀 => 킀 +D081 110F 1173 11BC # 킁 => 킁 +D082 110F 1173 11BD # 킂 => 킂 +D083 110F 1173 11BE # 킃 => 킃 +D084 110F 1173 11BF # 킄 => 킄 +D085 110F 1173 11C0 # 킅 => 킅 +D086 110F 1173 11C1 # 킆 => 킆 +D087 110F 1173 11C2 # 킇 => 킇 +D088 110F 1174 # 킈 => 킈 +D089 110F 1174 11A8 # 킉 => 킉 +D08A 110F 1174 11A9 # 킊 => 킊 +D08B 110F 1174 11AA # 킋 => 킋 +D08C 110F 1174 11AB # 킌 => 킌 +D08D 110F 1174 11AC # 킍 => 킍 +D08E 110F 1174 11AD # 킎 => 킎 +D08F 110F 1174 11AE # 킏 => 킏 +D090 110F 1174 11AF # 킐 => 킐 +D091 110F 1174 11B0 # 킑 => 킑 +D092 110F 1174 11B1 # 킒 => 킒 +D093 110F 1174 11B2 # 킓 => 킓 +D094 110F 1174 11B3 # 킔 => 킔 +D095 110F 1174 11B4 # 킕 => 킕 +D096 110F 1174 11B5 # 킖 => 킖 +D097 110F 1174 11B6 # 킗 => 킗 +D098 110F 1174 11B7 # 킘 => 킘 +D099 110F 1174 11B8 # 킙 => 킙 +D09A 110F 1174 11B9 # 킚 => 킚 +D09B 110F 1174 11BA # 킛 => 킛 +D09C 110F 1174 11BB # 킜 => 킜 +D09D 110F 1174 11BC # 킝 => 킝 +D09E 110F 1174 11BD # 킞 => 킞 +D09F 110F 1174 11BE # 킟 => 킟 +D0A0 110F 1174 11BF # 킠 => 킠 +D0A1 110F 1174 11C0 # 킡 => 킡 +D0A2 110F 1174 11C1 # 킢 => 킢 +D0A3 110F 1174 11C2 # 킣 => 킣 +D0A4 110F 1175 # 키 => 키 +D0A5 110F 1175 11A8 # 킥 => 킥 +D0A6 110F 1175 11A9 # 킦 => 킦 +D0A7 110F 1175 11AA # 킧 => 킧 +D0A8 110F 1175 11AB # 킨 => 킨 +D0A9 110F 1175 11AC # 킩 => 킩 +D0AA 110F 1175 11AD # 킪 => 킪 +D0AB 110F 1175 11AE # 킫 => 킫 +D0AC 110F 1175 11AF # 킬 => 킬 +D0AD 110F 1175 11B0 # 킭 => 킭 +D0AE 110F 1175 11B1 # 킮 => 킮 +D0AF 110F 1175 11B2 # 킯 => 킯 +D0B0 110F 1175 11B3 # 킰 => 킰 +D0B1 110F 1175 11B4 # 킱 => 킱 +D0B2 110F 1175 11B5 # 킲 => 킲 +D0B3 110F 1175 11B6 # 킳 => 킳 +D0B4 110F 1175 11B7 # 킴 => 킴 +D0B5 110F 1175 11B8 # 킵 => 킵 +D0B6 110F 1175 11B9 # 킶 => 킶 +D0B7 110F 1175 11BA # 킷 => 킷 +D0B8 110F 1175 11BB # 킸 => 킸 +D0B9 110F 1175 11BC # 킹 => 킹 +D0BA 110F 1175 11BD # 킺 => 킺 +D0BB 110F 1175 11BE # 킻 => 킻 +D0BC 110F 1175 11BF # 킼 => 킼 +D0BD 110F 1175 11C0 # 킽 => 킽 +D0BE 110F 1175 11C1 # 킾 => 킾 +D0BF 110F 1175 11C2 # 킿 => 킿 +D0C0 1110 1161 # 타 => 타 +D0C1 1110 1161 11A8 # 탁 => 탁 +D0C2 1110 1161 11A9 # 탂 => 탂 +D0C3 1110 1161 11AA # 탃 => 탃 +D0C4 1110 1161 11AB # 탄 => 탄 +D0C5 1110 1161 11AC # 탅 => 탅 +D0C6 1110 1161 11AD # 탆 => 탆 +D0C7 1110 1161 11AE # 탇 => 탇 +D0C8 1110 1161 11AF # 탈 => 탈 +D0C9 1110 1161 11B0 # 탉 => 탉 +D0CA 1110 1161 11B1 # 탊 => 탊 +D0CB 1110 1161 11B2 # 탋 => 탋 +D0CC 1110 1161 11B3 # 탌 => 탌 +D0CD 1110 1161 11B4 # 탍 => 탍 +D0CE 1110 1161 11B5 # 탎 => 탎 +D0CF 1110 1161 11B6 # 탏 => 탏 +D0D0 1110 1161 11B7 # 탐 => 탐 +D0D1 1110 1161 11B8 # 탑 => 탑 +D0D2 1110 1161 11B9 # 탒 => 탒 +D0D3 1110 1161 11BA # 탓 => 탓 +D0D4 1110 1161 11BB # 탔 => 탔 +D0D5 1110 1161 11BC # 탕 => 탕 +D0D6 1110 1161 11BD # 탖 => 탖 +D0D7 1110 1161 11BE # 탗 => 탗 +D0D8 1110 1161 11BF # 탘 => 탘 +D0D9 1110 1161 11C0 # 탙 => 탙 +D0DA 1110 1161 11C1 # 탚 => 탚 +D0DB 1110 1161 11C2 # 탛 => 탛 +D0DC 1110 1162 # 태 => 태 +D0DD 1110 1162 11A8 # 택 => 택 +D0DE 1110 1162 11A9 # 탞 => 탞 +D0DF 1110 1162 11AA # 탟 => 탟 +D0E0 1110 1162 11AB # 탠 => 탠 +D0E1 1110 1162 11AC # 탡 => 탡 +D0E2 1110 1162 11AD # 탢 => 탢 +D0E3 1110 1162 11AE # 탣 => 탣 +D0E4 1110 1162 11AF # 탤 => 탤 +D0E5 1110 1162 11B0 # 탥 => 탥 +D0E6 1110 1162 11B1 # 탦 => 탦 +D0E7 1110 1162 11B2 # 탧 => 탧 +D0E8 1110 1162 11B3 # 탨 => 탨 +D0E9 1110 1162 11B4 # 탩 => 탩 +D0EA 1110 1162 11B5 # 탪 => 탪 +D0EB 1110 1162 11B6 # 탫 => 탫 +D0EC 1110 1162 11B7 # 탬 => 탬 +D0ED 1110 1162 11B8 # 탭 => 탭 +D0EE 1110 1162 11B9 # 탮 => 탮 +D0EF 1110 1162 11BA # 탯 => 탯 +D0F0 1110 1162 11BB # 탰 => 탰 +D0F1 1110 1162 11BC # 탱 => 탱 +D0F2 1110 1162 11BD # 탲 => 탲 +D0F3 1110 1162 11BE # 탳 => 탳 +D0F4 1110 1162 11BF # 탴 => 탴 +D0F5 1110 1162 11C0 # 탵 => 탵 +D0F6 1110 1162 11C1 # 탶 => 탶 +D0F7 1110 1162 11C2 # 탷 => 탷 +D0F8 1110 1163 # 탸 => 탸 +D0F9 1110 1163 11A8 # 탹 => 탹 +D0FA 1110 1163 11A9 # 탺 => 탺 +D0FB 1110 1163 11AA # 탻 => 탻 +D0FC 1110 1163 11AB # 탼 => 탼 +D0FD 1110 1163 11AC # 탽 => 탽 +D0FE 1110 1163 11AD # 탾 => 탾 +D0FF 1110 1163 11AE # 탿 => 탿 +D100 1110 1163 11AF # 턀 => 턀 +D101 1110 1163 11B0 # 턁 => 턁 +D102 1110 1163 11B1 # 턂 => 턂 +D103 1110 1163 11B2 # 턃 => 턃 +D104 1110 1163 11B3 # 턄 => 턄 +D105 1110 1163 11B4 # 턅 => 턅 +D106 1110 1163 11B5 # 턆 => 턆 +D107 1110 1163 11B6 # 턇 => 턇 +D108 1110 1163 11B7 # 턈 => 턈 +D109 1110 1163 11B8 # 턉 => 턉 +D10A 1110 1163 11B9 # 턊 => 턊 +D10B 1110 1163 11BA # 턋 => 턋 +D10C 1110 1163 11BB # 턌 => 턌 +D10D 1110 1163 11BC # 턍 => 턍 +D10E 1110 1163 11BD # 턎 => 턎 +D10F 1110 1163 11BE # 턏 => 턏 +D110 1110 1163 11BF # 턐 => 턐 +D111 1110 1163 11C0 # 턑 => 턑 +D112 1110 1163 11C1 # 턒 => 턒 +D113 1110 1163 11C2 # 턓 => 턓 +D114 1110 1164 # 턔 => 턔 +D115 1110 1164 11A8 # 턕 => 턕 +D116 1110 1164 11A9 # 턖 => 턖 +D117 1110 1164 11AA # 턗 => 턗 +D118 1110 1164 11AB # 턘 => 턘 +D119 1110 1164 11AC # 턙 => 턙 +D11A 1110 1164 11AD # 턚 => 턚 +D11B 1110 1164 11AE # 턛 => 턛 +D11C 1110 1164 11AF # 턜 => 턜 +D11D 1110 1164 11B0 # 턝 => 턝 +D11E 1110 1164 11B1 # 턞 => 턞 +D11F 1110 1164 11B2 # 턟 => 턟 +D120 1110 1164 11B3 # 턠 => 턠 +D121 1110 1164 11B4 # 턡 => 턡 +D122 1110 1164 11B5 # 턢 => 턢 +D123 1110 1164 11B6 # 턣 => 턣 +D124 1110 1164 11B7 # 턤 => 턤 +D125 1110 1164 11B8 # 턥 => 턥 +D126 1110 1164 11B9 # 턦 => 턦 +D127 1110 1164 11BA # 턧 => 턧 +D128 1110 1164 11BB # 턨 => 턨 +D129 1110 1164 11BC # 턩 => 턩 +D12A 1110 1164 11BD # 턪 => 턪 +D12B 1110 1164 11BE # 턫 => 턫 +D12C 1110 1164 11BF # 턬 => 턬 +D12D 1110 1164 11C0 # 턭 => 턭 +D12E 1110 1164 11C1 # 턮 => 턮 +D12F 1110 1164 11C2 # 턯 => 턯 +D130 1110 1165 # 터 => 터 +D131 1110 1165 11A8 # 턱 => 턱 +D132 1110 1165 11A9 # 턲 => 턲 +D133 1110 1165 11AA # 턳 => 턳 +D134 1110 1165 11AB # 턴 => 턴 +D135 1110 1165 11AC # 턵 => 턵 +D136 1110 1165 11AD # 턶 => 턶 +D137 1110 1165 11AE # 턷 => 턷 +D138 1110 1165 11AF # 털 => 털 +D139 1110 1165 11B0 # 턹 => 턹 +D13A 1110 1165 11B1 # 턺 => 턺 +D13B 1110 1165 11B2 # 턻 => 턻 +D13C 1110 1165 11B3 # 턼 => 턼 +D13D 1110 1165 11B4 # 턽 => 턽 +D13E 1110 1165 11B5 # 턾 => 턾 +D13F 1110 1165 11B6 # 턿 => 턿 +D140 1110 1165 11B7 # 텀 => 텀 +D141 1110 1165 11B8 # 텁 => 텁 +D142 1110 1165 11B9 # 텂 => 텂 +D143 1110 1165 11BA # 텃 => 텃 +D144 1110 1165 11BB # 텄 => 텄 +D145 1110 1165 11BC # 텅 => 텅 +D146 1110 1165 11BD # 텆 => 텆 +D147 1110 1165 11BE # 텇 => 텇 +D148 1110 1165 11BF # 텈 => 텈 +D149 1110 1165 11C0 # 텉 => 텉 +D14A 1110 1165 11C1 # 텊 => 텊 +D14B 1110 1165 11C2 # 텋 => 텋 +D14C 1110 1166 # 테 => 테 +D14D 1110 1166 11A8 # 텍 => 텍 +D14E 1110 1166 11A9 # 텎 => 텎 +D14F 1110 1166 11AA # 텏 => 텏 +D150 1110 1166 11AB # 텐 => 텐 +D151 1110 1166 11AC # 텑 => 텑 +D152 1110 1166 11AD # 텒 => 텒 +D153 1110 1166 11AE # 텓 => 텓 +D154 1110 1166 11AF # 텔 => 텔 +D155 1110 1166 11B0 # 텕 => 텕 +D156 1110 1166 11B1 # 텖 => 텖 +D157 1110 1166 11B2 # 텗 => 텗 +D158 1110 1166 11B3 # 텘 => 텘 +D159 1110 1166 11B4 # 텙 => 텙 +D15A 1110 1166 11B5 # 텚 => 텚 +D15B 1110 1166 11B6 # 텛 => 텛 +D15C 1110 1166 11B7 # 템 => 템 +D15D 1110 1166 11B8 # 텝 => 텝 +D15E 1110 1166 11B9 # 텞 => 텞 +D15F 1110 1166 11BA # 텟 => 텟 +D160 1110 1166 11BB # 텠 => 텠 +D161 1110 1166 11BC # 텡 => 텡 +D162 1110 1166 11BD # 텢 => 텢 +D163 1110 1166 11BE # 텣 => 텣 +D164 1110 1166 11BF # 텤 => 텤 +D165 1110 1166 11C0 # 텥 => 텥 +D166 1110 1166 11C1 # 텦 => 텦 +D167 1110 1166 11C2 # 텧 => 텧 +D168 1110 1167 # 텨 => 텨 +D169 1110 1167 11A8 # 텩 => 텩 +D16A 1110 1167 11A9 # 텪 => 텪 +D16B 1110 1167 11AA # 텫 => 텫 +D16C 1110 1167 11AB # 텬 => 텬 +D16D 1110 1167 11AC # 텭 => 텭 +D16E 1110 1167 11AD # 텮 => 텮 +D16F 1110 1167 11AE # 텯 => 텯 +D170 1110 1167 11AF # 텰 => 텰 +D171 1110 1167 11B0 # 텱 => 텱 +D172 1110 1167 11B1 # 텲 => 텲 +D173 1110 1167 11B2 # 텳 => 텳 +D174 1110 1167 11B3 # 텴 => 텴 +D175 1110 1167 11B4 # 텵 => 텵 +D176 1110 1167 11B5 # 텶 => 텶 +D177 1110 1167 11B6 # 텷 => 텷 +D178 1110 1167 11B7 # 텸 => 텸 +D179 1110 1167 11B8 # 텹 => 텹 +D17A 1110 1167 11B9 # 텺 => 텺 +D17B 1110 1167 11BA # 텻 => 텻 +D17C 1110 1167 11BB # 텼 => 텼 +D17D 1110 1167 11BC # 텽 => 텽 +D17E 1110 1167 11BD # 텾 => 텾 +D17F 1110 1167 11BE # 텿 => 텿 +D180 1110 1167 11BF # 톀 => 톀 +D181 1110 1167 11C0 # 톁 => 톁 +D182 1110 1167 11C1 # 톂 => 톂 +D183 1110 1167 11C2 # 톃 => 톃 +D184 1110 1168 # 톄 => 톄 +D185 1110 1168 11A8 # 톅 => 톅 +D186 1110 1168 11A9 # 톆 => 톆 +D187 1110 1168 11AA # 톇 => 톇 +D188 1110 1168 11AB # 톈 => 톈 +D189 1110 1168 11AC # 톉 => 톉 +D18A 1110 1168 11AD # 톊 => 톊 +D18B 1110 1168 11AE # 톋 => 톋 +D18C 1110 1168 11AF # 톌 => 톌 +D18D 1110 1168 11B0 # 톍 => 톍 +D18E 1110 1168 11B1 # 톎 => 톎 +D18F 1110 1168 11B2 # 톏 => 톏 +D190 1110 1168 11B3 # 톐 => 톐 +D191 1110 1168 11B4 # 톑 => 톑 +D192 1110 1168 11B5 # 톒 => 톒 +D193 1110 1168 11B6 # 톓 => 톓 +D194 1110 1168 11B7 # 톔 => 톔 +D195 1110 1168 11B8 # 톕 => 톕 +D196 1110 1168 11B9 # 톖 => 톖 +D197 1110 1168 11BA # 톗 => 톗 +D198 1110 1168 11BB # 톘 => 톘 +D199 1110 1168 11BC # 톙 => 톙 +D19A 1110 1168 11BD # 톚 => 톚 +D19B 1110 1168 11BE # 톛 => 톛 +D19C 1110 1168 11BF # 톜 => 톜 +D19D 1110 1168 11C0 # 톝 => 톝 +D19E 1110 1168 11C1 # 톞 => 톞 +D19F 1110 1168 11C2 # 톟 => 톟 +D1A0 1110 1169 # 토 => 토 +D1A1 1110 1169 11A8 # 톡 => 톡 +D1A2 1110 1169 11A9 # 톢 => 톢 +D1A3 1110 1169 11AA # 톣 => 톣 +D1A4 1110 1169 11AB # 톤 => 톤 +D1A5 1110 1169 11AC # 톥 => 톥 +D1A6 1110 1169 11AD # 톦 => 톦 +D1A7 1110 1169 11AE # 톧 => 톧 +D1A8 1110 1169 11AF # 톨 => 톨 +D1A9 1110 1169 11B0 # 톩 => 톩 +D1AA 1110 1169 11B1 # 톪 => 톪 +D1AB 1110 1169 11B2 # 톫 => 톫 +D1AC 1110 1169 11B3 # 톬 => 톬 +D1AD 1110 1169 11B4 # 톭 => 톭 +D1AE 1110 1169 11B5 # 톮 => 톮 +D1AF 1110 1169 11B6 # 톯 => 톯 +D1B0 1110 1169 11B7 # 톰 => 톰 +D1B1 1110 1169 11B8 # 톱 => 톱 +D1B2 1110 1169 11B9 # 톲 => 톲 +D1B3 1110 1169 11BA # 톳 => 톳 +D1B4 1110 1169 11BB # 톴 => 톴 +D1B5 1110 1169 11BC # 통 => 통 +D1B6 1110 1169 11BD # 톶 => 톶 +D1B7 1110 1169 11BE # 톷 => 톷 +D1B8 1110 1169 11BF # 톸 => 톸 +D1B9 1110 1169 11C0 # 톹 => 톹 +D1BA 1110 1169 11C1 # 톺 => 톺 +D1BB 1110 1169 11C2 # 톻 => 톻 +D1BC 1110 116A # 톼 => 톼 +D1BD 1110 116A 11A8 # 톽 => 톽 +D1BE 1110 116A 11A9 # 톾 => 톾 +D1BF 1110 116A 11AA # 톿 => 톿 +D1C0 1110 116A 11AB # 퇀 => 퇀 +D1C1 1110 116A 11AC # 퇁 => 퇁 +D1C2 1110 116A 11AD # 퇂 => 퇂 +D1C3 1110 116A 11AE # 퇃 => 퇃 +D1C4 1110 116A 11AF # 퇄 => 퇄 +D1C5 1110 116A 11B0 # 퇅 => 퇅 +D1C6 1110 116A 11B1 # 퇆 => 퇆 +D1C7 1110 116A 11B2 # 퇇 => 퇇 +D1C8 1110 116A 11B3 # 퇈 => 퇈 +D1C9 1110 116A 11B4 # 퇉 => 퇉 +D1CA 1110 116A 11B5 # 퇊 => 퇊 +D1CB 1110 116A 11B6 # 퇋 => 퇋 +D1CC 1110 116A 11B7 # 퇌 => 퇌 +D1CD 1110 116A 11B8 # 퇍 => 퇍 +D1CE 1110 116A 11B9 # 퇎 => 퇎 +D1CF 1110 116A 11BA # 퇏 => 퇏 +D1D0 1110 116A 11BB # 퇐 => 퇐 +D1D1 1110 116A 11BC # 퇑 => 퇑 +D1D2 1110 116A 11BD # 퇒 => 퇒 +D1D3 1110 116A 11BE # 퇓 => 퇓 +D1D4 1110 116A 11BF # 퇔 => 퇔 +D1D5 1110 116A 11C0 # 퇕 => 퇕 +D1D6 1110 116A 11C1 # 퇖 => 퇖 +D1D7 1110 116A 11C2 # 퇗 => 퇗 +D1D8 1110 116B # 퇘 => 퇘 +D1D9 1110 116B 11A8 # 퇙 => 퇙 +D1DA 1110 116B 11A9 # 퇚 => 퇚 +D1DB 1110 116B 11AA # 퇛 => 퇛 +D1DC 1110 116B 11AB # 퇜 => 퇜 +D1DD 1110 116B 11AC # 퇝 => 퇝 +D1DE 1110 116B 11AD # 퇞 => 퇞 +D1DF 1110 116B 11AE # 퇟 => 퇟 +D1E0 1110 116B 11AF # 퇠 => 퇠 +D1E1 1110 116B 11B0 # 퇡 => 퇡 +D1E2 1110 116B 11B1 # 퇢 => 퇢 +D1E3 1110 116B 11B2 # 퇣 => 퇣 +D1E4 1110 116B 11B3 # 퇤 => 퇤 +D1E5 1110 116B 11B4 # 퇥 => 퇥 +D1E6 1110 116B 11B5 # 퇦 => 퇦 +D1E7 1110 116B 11B6 # 퇧 => 퇧 +D1E8 1110 116B 11B7 # 퇨 => 퇨 +D1E9 1110 116B 11B8 # 퇩 => 퇩 +D1EA 1110 116B 11B9 # 퇪 => 퇪 +D1EB 1110 116B 11BA # 퇫 => 퇫 +D1EC 1110 116B 11BB # 퇬 => 퇬 +D1ED 1110 116B 11BC # 퇭 => 퇭 +D1EE 1110 116B 11BD # 퇮 => 퇮 +D1EF 1110 116B 11BE # 퇯 => 퇯 +D1F0 1110 116B 11BF # 퇰 => 퇰 +D1F1 1110 116B 11C0 # 퇱 => 퇱 +D1F2 1110 116B 11C1 # 퇲 => 퇲 +D1F3 1110 116B 11C2 # 퇳 => 퇳 +D1F4 1110 116C # 퇴 => 퇴 +D1F5 1110 116C 11A8 # 퇵 => 퇵 +D1F6 1110 116C 11A9 # 퇶 => 퇶 +D1F7 1110 116C 11AA # 퇷 => 퇷 +D1F8 1110 116C 11AB # 퇸 => 퇸 +D1F9 1110 116C 11AC # 퇹 => 퇹 +D1FA 1110 116C 11AD # 퇺 => 퇺 +D1FB 1110 116C 11AE # 퇻 => 퇻 +D1FC 1110 116C 11AF # 퇼 => 퇼 +D1FD 1110 116C 11B0 # 퇽 => 퇽 +D1FE 1110 116C 11B1 # 퇾 => 퇾 +D1FF 1110 116C 11B2 # 퇿 => 퇿 +D200 1110 116C 11B3 # 툀 => 툀 +D201 1110 116C 11B4 # 툁 => 툁 +D202 1110 116C 11B5 # 툂 => 툂 +D203 1110 116C 11B6 # 툃 => 툃 +D204 1110 116C 11B7 # 툄 => 툄 +D205 1110 116C 11B8 # 툅 => 툅 +D206 1110 116C 11B9 # 툆 => 툆 +D207 1110 116C 11BA # 툇 => 툇 +D208 1110 116C 11BB # 툈 => 툈 +D209 1110 116C 11BC # 툉 => 툉 +D20A 1110 116C 11BD # 툊 => 툊 +D20B 1110 116C 11BE # 툋 => 툋 +D20C 1110 116C 11BF # 툌 => 툌 +D20D 1110 116C 11C0 # 툍 => 툍 +D20E 1110 116C 11C1 # 툎 => 툎 +D20F 1110 116C 11C2 # 툏 => 툏 +D210 1110 116D # 툐 => 툐 +D211 1110 116D 11A8 # 툑 => 툑 +D212 1110 116D 11A9 # 툒 => 툒 +D213 1110 116D 11AA # 툓 => 툓 +D214 1110 116D 11AB # 툔 => 툔 +D215 1110 116D 11AC # 툕 => 툕 +D216 1110 116D 11AD # 툖 => 툖 +D217 1110 116D 11AE # 툗 => 툗 +D218 1110 116D 11AF # 툘 => 툘 +D219 1110 116D 11B0 # 툙 => 툙 +D21A 1110 116D 11B1 # 툚 => 툚 +D21B 1110 116D 11B2 # 툛 => 툛 +D21C 1110 116D 11B3 # 툜 => 툜 +D21D 1110 116D 11B4 # 툝 => 툝 +D21E 1110 116D 11B5 # 툞 => 툞 +D21F 1110 116D 11B6 # 툟 => 툟 +D220 1110 116D 11B7 # 툠 => 툠 +D221 1110 116D 11B8 # 툡 => 툡 +D222 1110 116D 11B9 # 툢 => 툢 +D223 1110 116D 11BA # 툣 => 툣 +D224 1110 116D 11BB # 툤 => 툤 +D225 1110 116D 11BC # 툥 => 툥 +D226 1110 116D 11BD # 툦 => 툦 +D227 1110 116D 11BE # 툧 => 툧 +D228 1110 116D 11BF # 툨 => 툨 +D229 1110 116D 11C0 # 툩 => 툩 +D22A 1110 116D 11C1 # 툪 => 툪 +D22B 1110 116D 11C2 # 툫 => 툫 +D22C 1110 116E # 투 => 투 +D22D 1110 116E 11A8 # 툭 => 툭 +D22E 1110 116E 11A9 # 툮 => 툮 +D22F 1110 116E 11AA # 툯 => 툯 +D230 1110 116E 11AB # 툰 => 툰 +D231 1110 116E 11AC # 툱 => 툱 +D232 1110 116E 11AD # 툲 => 툲 +D233 1110 116E 11AE # 툳 => 툳 +D234 1110 116E 11AF # 툴 => 툴 +D235 1110 116E 11B0 # 툵 => 툵 +D236 1110 116E 11B1 # 툶 => 툶 +D237 1110 116E 11B2 # 툷 => 툷 +D238 1110 116E 11B3 # 툸 => 툸 +D239 1110 116E 11B4 # 툹 => 툹 +D23A 1110 116E 11B5 # 툺 => 툺 +D23B 1110 116E 11B6 # 툻 => 툻 +D23C 1110 116E 11B7 # 툼 => 툼 +D23D 1110 116E 11B8 # 툽 => 툽 +D23E 1110 116E 11B9 # 툾 => 툾 +D23F 1110 116E 11BA # 툿 => 툿 +D240 1110 116E 11BB # 퉀 => 퉀 +D241 1110 116E 11BC # 퉁 => 퉁 +D242 1110 116E 11BD # 퉂 => 퉂 +D243 1110 116E 11BE # 퉃 => 퉃 +D244 1110 116E 11BF # 퉄 => 퉄 +D245 1110 116E 11C0 # 퉅 => 퉅 +D246 1110 116E 11C1 # 퉆 => 퉆 +D247 1110 116E 11C2 # 퉇 => 퉇 +D248 1110 116F # 퉈 => 퉈 +D249 1110 116F 11A8 # 퉉 => 퉉 +D24A 1110 116F 11A9 # 퉊 => 퉊 +D24B 1110 116F 11AA # 퉋 => 퉋 +D24C 1110 116F 11AB # 퉌 => 퉌 +D24D 1110 116F 11AC # 퉍 => 퉍 +D24E 1110 116F 11AD # 퉎 => 퉎 +D24F 1110 116F 11AE # 퉏 => 퉏 +D250 1110 116F 11AF # 퉐 => 퉐 +D251 1110 116F 11B0 # 퉑 => 퉑 +D252 1110 116F 11B1 # 퉒 => 퉒 +D253 1110 116F 11B2 # 퉓 => 퉓 +D254 1110 116F 11B3 # 퉔 => 퉔 +D255 1110 116F 11B4 # 퉕 => 퉕 +D256 1110 116F 11B5 # 퉖 => 퉖 +D257 1110 116F 11B6 # 퉗 => 퉗 +D258 1110 116F 11B7 # 퉘 => 퉘 +D259 1110 116F 11B8 # 퉙 => 퉙 +D25A 1110 116F 11B9 # 퉚 => 퉚 +D25B 1110 116F 11BA # 퉛 => 퉛 +D25C 1110 116F 11BB # 퉜 => 퉜 +D25D 1110 116F 11BC # 퉝 => 퉝 +D25E 1110 116F 11BD # 퉞 => 퉞 +D25F 1110 116F 11BE # 퉟 => 퉟 +D260 1110 116F 11BF # 퉠 => 퉠 +D261 1110 116F 11C0 # 퉡 => 퉡 +D262 1110 116F 11C1 # 퉢 => 퉢 +D263 1110 116F 11C2 # 퉣 => 퉣 +D264 1110 1170 # 퉤 => 퉤 +D265 1110 1170 11A8 # 퉥 => 퉥 +D266 1110 1170 11A9 # 퉦 => 퉦 +D267 1110 1170 11AA # 퉧 => 퉧 +D268 1110 1170 11AB # 퉨 => 퉨 +D269 1110 1170 11AC # 퉩 => 퉩 +D26A 1110 1170 11AD # 퉪 => 퉪 +D26B 1110 1170 11AE # 퉫 => 퉫 +D26C 1110 1170 11AF # 퉬 => 퉬 +D26D 1110 1170 11B0 # 퉭 => 퉭 +D26E 1110 1170 11B1 # 퉮 => 퉮 +D26F 1110 1170 11B2 # 퉯 => 퉯 +D270 1110 1170 11B3 # 퉰 => 퉰 +D271 1110 1170 11B4 # 퉱 => 퉱 +D272 1110 1170 11B5 # 퉲 => 퉲 +D273 1110 1170 11B6 # 퉳 => 퉳 +D274 1110 1170 11B7 # 퉴 => 퉴 +D275 1110 1170 11B8 # 퉵 => 퉵 +D276 1110 1170 11B9 # 퉶 => 퉶 +D277 1110 1170 11BA # 퉷 => 퉷 +D278 1110 1170 11BB # 퉸 => 퉸 +D279 1110 1170 11BC # 퉹 => 퉹 +D27A 1110 1170 11BD # 퉺 => 퉺 +D27B 1110 1170 11BE # 퉻 => 퉻 +D27C 1110 1170 11BF # 퉼 => 퉼 +D27D 1110 1170 11C0 # 퉽 => 퉽 +D27E 1110 1170 11C1 # 퉾 => 퉾 +D27F 1110 1170 11C2 # 퉿 => 퉿 +D280 1110 1171 # 튀 => 튀 +D281 1110 1171 11A8 # 튁 => 튁 +D282 1110 1171 11A9 # 튂 => 튂 +D283 1110 1171 11AA # 튃 => 튃 +D284 1110 1171 11AB # 튄 => 튄 +D285 1110 1171 11AC # 튅 => 튅 +D286 1110 1171 11AD # 튆 => 튆 +D287 1110 1171 11AE # 튇 => 튇 +D288 1110 1171 11AF # 튈 => 튈 +D289 1110 1171 11B0 # 튉 => 튉 +D28A 1110 1171 11B1 # 튊 => 튊 +D28B 1110 1171 11B2 # 튋 => 튋 +D28C 1110 1171 11B3 # 튌 => 튌 +D28D 1110 1171 11B4 # 튍 => 튍 +D28E 1110 1171 11B5 # 튎 => 튎 +D28F 1110 1171 11B6 # 튏 => 튏 +D290 1110 1171 11B7 # 튐 => 튐 +D291 1110 1171 11B8 # 튑 => 튑 +D292 1110 1171 11B9 # 튒 => 튒 +D293 1110 1171 11BA # 튓 => 튓 +D294 1110 1171 11BB # 튔 => 튔 +D295 1110 1171 11BC # 튕 => 튕 +D296 1110 1171 11BD # 튖 => 튖 +D297 1110 1171 11BE # 튗 => 튗 +D298 1110 1171 11BF # 튘 => 튘 +D299 1110 1171 11C0 # 튙 => 튙 +D29A 1110 1171 11C1 # 튚 => 튚 +D29B 1110 1171 11C2 # 튛 => 튛 +D29C 1110 1172 # 튜 => 튜 +D29D 1110 1172 11A8 # 튝 => 튝 +D29E 1110 1172 11A9 # 튞 => 튞 +D29F 1110 1172 11AA # 튟 => 튟 +D2A0 1110 1172 11AB # 튠 => 튠 +D2A1 1110 1172 11AC # 튡 => 튡 +D2A2 1110 1172 11AD # 튢 => 튢 +D2A3 1110 1172 11AE # 튣 => 튣 +D2A4 1110 1172 11AF # 튤 => 튤 +D2A5 1110 1172 11B0 # 튥 => 튥 +D2A6 1110 1172 11B1 # 튦 => 튦 +D2A7 1110 1172 11B2 # 튧 => 튧 +D2A8 1110 1172 11B3 # 튨 => 튨 +D2A9 1110 1172 11B4 # 튩 => 튩 +D2AA 1110 1172 11B5 # 튪 => 튪 +D2AB 1110 1172 11B6 # 튫 => 튫 +D2AC 1110 1172 11B7 # 튬 => 튬 +D2AD 1110 1172 11B8 # 튭 => 튭 +D2AE 1110 1172 11B9 # 튮 => 튮 +D2AF 1110 1172 11BA # 튯 => 튯 +D2B0 1110 1172 11BB # 튰 => 튰 +D2B1 1110 1172 11BC # 튱 => 튱 +D2B2 1110 1172 11BD # 튲 => 튲 +D2B3 1110 1172 11BE # 튳 => 튳 +D2B4 1110 1172 11BF # 튴 => 튴 +D2B5 1110 1172 11C0 # 튵 => 튵 +D2B6 1110 1172 11C1 # 튶 => 튶 +D2B7 1110 1172 11C2 # 튷 => 튷 +D2B8 1110 1173 # 트 => 트 +D2B9 1110 1173 11A8 # 특 => 특 +D2BA 1110 1173 11A9 # 튺 => 튺 +D2BB 1110 1173 11AA # 튻 => 튻 +D2BC 1110 1173 11AB # 튼 => 튼 +D2BD 1110 1173 11AC # 튽 => 튽 +D2BE 1110 1173 11AD # 튾 => 튾 +D2BF 1110 1173 11AE # 튿 => 튿 +D2C0 1110 1173 11AF # 틀 => 틀 +D2C1 1110 1173 11B0 # 틁 => 틁 +D2C2 1110 1173 11B1 # 틂 => 틂 +D2C3 1110 1173 11B2 # 틃 => 틃 +D2C4 1110 1173 11B3 # 틄 => 틄 +D2C5 1110 1173 11B4 # 틅 => 틅 +D2C6 1110 1173 11B5 # 틆 => 틆 +D2C7 1110 1173 11B6 # 틇 => 틇 +D2C8 1110 1173 11B7 # 틈 => 틈 +D2C9 1110 1173 11B8 # 틉 => 틉 +D2CA 1110 1173 11B9 # 틊 => 틊 +D2CB 1110 1173 11BA # 틋 => 틋 +D2CC 1110 1173 11BB # 틌 => 틌 +D2CD 1110 1173 11BC # 틍 => 틍 +D2CE 1110 1173 11BD # 틎 => 틎 +D2CF 1110 1173 11BE # 틏 => 틏 +D2D0 1110 1173 11BF # 틐 => 틐 +D2D1 1110 1173 11C0 # 틑 => 틑 +D2D2 1110 1173 11C1 # 틒 => 틒 +D2D3 1110 1173 11C2 # 틓 => 틓 +D2D4 1110 1174 # 틔 => 틔 +D2D5 1110 1174 11A8 # 틕 => 틕 +D2D6 1110 1174 11A9 # 틖 => 틖 +D2D7 1110 1174 11AA # 틗 => 틗 +D2D8 1110 1174 11AB # 틘 => 틘 +D2D9 1110 1174 11AC # 틙 => 틙 +D2DA 1110 1174 11AD # 틚 => 틚 +D2DB 1110 1174 11AE # 틛 => 틛 +D2DC 1110 1174 11AF # 틜 => 틜 +D2DD 1110 1174 11B0 # 틝 => 틝 +D2DE 1110 1174 11B1 # 틞 => 틞 +D2DF 1110 1174 11B2 # 틟 => 틟 +D2E0 1110 1174 11B3 # 틠 => 틠 +D2E1 1110 1174 11B4 # 틡 => 틡 +D2E2 1110 1174 11B5 # 틢 => 틢 +D2E3 1110 1174 11B6 # 틣 => 틣 +D2E4 1110 1174 11B7 # 틤 => 틤 +D2E5 1110 1174 11B8 # 틥 => 틥 +D2E6 1110 1174 11B9 # 틦 => 틦 +D2E7 1110 1174 11BA # 틧 => 틧 +D2E8 1110 1174 11BB # 틨 => 틨 +D2E9 1110 1174 11BC # 틩 => 틩 +D2EA 1110 1174 11BD # 틪 => 틪 +D2EB 1110 1174 11BE # 틫 => 틫 +D2EC 1110 1174 11BF # 틬 => 틬 +D2ED 1110 1174 11C0 # 틭 => 틭 +D2EE 1110 1174 11C1 # 틮 => 틮 +D2EF 1110 1174 11C2 # 틯 => 틯 +D2F0 1110 1175 # 티 => 티 +D2F1 1110 1175 11A8 # 틱 => 틱 +D2F2 1110 1175 11A9 # 틲 => 틲 +D2F3 1110 1175 11AA # 틳 => 틳 +D2F4 1110 1175 11AB # 틴 => 틴 +D2F5 1110 1175 11AC # 틵 => 틵 +D2F6 1110 1175 11AD # 틶 => 틶 +D2F7 1110 1175 11AE # 틷 => 틷 +D2F8 1110 1175 11AF # 틸 => 틸 +D2F9 1110 1175 11B0 # 틹 => 틹 +D2FA 1110 1175 11B1 # 틺 => 틺 +D2FB 1110 1175 11B2 # 틻 => 틻 +D2FC 1110 1175 11B3 # 틼 => 틼 +D2FD 1110 1175 11B4 # 틽 => 틽 +D2FE 1110 1175 11B5 # 틾 => 틾 +D2FF 1110 1175 11B6 # 틿 => 틿 +D300 1110 1175 11B7 # 팀 => 팀 +D301 1110 1175 11B8 # 팁 => 팁 +D302 1110 1175 11B9 # 팂 => 팂 +D303 1110 1175 11BA # 팃 => 팃 +D304 1110 1175 11BB # 팄 => 팄 +D305 1110 1175 11BC # 팅 => 팅 +D306 1110 1175 11BD # 팆 => 팆 +D307 1110 1175 11BE # 팇 => 팇 +D308 1110 1175 11BF # 팈 => 팈 +D309 1110 1175 11C0 # 팉 => 팉 +D30A 1110 1175 11C1 # 팊 => 팊 +D30B 1110 1175 11C2 # 팋 => 팋 +D30C 1111 1161 # 파 => 파 +D30D 1111 1161 11A8 # 팍 => 팍 +D30E 1111 1161 11A9 # 팎 => 팎 +D30F 1111 1161 11AA # 팏 => 팏 +D310 1111 1161 11AB # 판 => 판 +D311 1111 1161 11AC # 팑 => 팑 +D312 1111 1161 11AD # 팒 => 팒 +D313 1111 1161 11AE # 팓 => 팓 +D314 1111 1161 11AF # 팔 => 팔 +D315 1111 1161 11B0 # 팕 => 팕 +D316 1111 1161 11B1 # 팖 => 팖 +D317 1111 1161 11B2 # 팗 => 팗 +D318 1111 1161 11B3 # 팘 => 팘 +D319 1111 1161 11B4 # 팙 => 팙 +D31A 1111 1161 11B5 # 팚 => 팚 +D31B 1111 1161 11B6 # 팛 => 팛 +D31C 1111 1161 11B7 # 팜 => 팜 +D31D 1111 1161 11B8 # 팝 => 팝 +D31E 1111 1161 11B9 # 팞 => 팞 +D31F 1111 1161 11BA # 팟 => 팟 +D320 1111 1161 11BB # 팠 => 팠 +D321 1111 1161 11BC # 팡 => 팡 +D322 1111 1161 11BD # 팢 => 팢 +D323 1111 1161 11BE # 팣 => 팣 +D324 1111 1161 11BF # 팤 => 팤 +D325 1111 1161 11C0 # 팥 => 팥 +D326 1111 1161 11C1 # 팦 => 팦 +D327 1111 1161 11C2 # 팧 => 팧 +D328 1111 1162 # 패 => 패 +D329 1111 1162 11A8 # 팩 => 팩 +D32A 1111 1162 11A9 # 팪 => 팪 +D32B 1111 1162 11AA # 팫 => 팫 +D32C 1111 1162 11AB # 팬 => 팬 +D32D 1111 1162 11AC # 팭 => 팭 +D32E 1111 1162 11AD # 팮 => 팮 +D32F 1111 1162 11AE # 팯 => 팯 +D330 1111 1162 11AF # 팰 => 팰 +D331 1111 1162 11B0 # 팱 => 팱 +D332 1111 1162 11B1 # 팲 => 팲 +D333 1111 1162 11B2 # 팳 => 팳 +D334 1111 1162 11B3 # 팴 => 팴 +D335 1111 1162 11B4 # 팵 => 팵 +D336 1111 1162 11B5 # 팶 => 팶 +D337 1111 1162 11B6 # 팷 => 팷 +D338 1111 1162 11B7 # 팸 => 팸 +D339 1111 1162 11B8 # 팹 => 팹 +D33A 1111 1162 11B9 # 팺 => 팺 +D33B 1111 1162 11BA # 팻 => 팻 +D33C 1111 1162 11BB # 팼 => 팼 +D33D 1111 1162 11BC # 팽 => 팽 +D33E 1111 1162 11BD # 팾 => 팾 +D33F 1111 1162 11BE # 팿 => 팿 +D340 1111 1162 11BF # 퍀 => 퍀 +D341 1111 1162 11C0 # 퍁 => 퍁 +D342 1111 1162 11C1 # 퍂 => 퍂 +D343 1111 1162 11C2 # 퍃 => 퍃 +D344 1111 1163 # 퍄 => 퍄 +D345 1111 1163 11A8 # 퍅 => 퍅 +D346 1111 1163 11A9 # 퍆 => 퍆 +D347 1111 1163 11AA # 퍇 => 퍇 +D348 1111 1163 11AB # 퍈 => 퍈 +D349 1111 1163 11AC # 퍉 => 퍉 +D34A 1111 1163 11AD # 퍊 => 퍊 +D34B 1111 1163 11AE # 퍋 => 퍋 +D34C 1111 1163 11AF # 퍌 => 퍌 +D34D 1111 1163 11B0 # 퍍 => 퍍 +D34E 1111 1163 11B1 # 퍎 => 퍎 +D34F 1111 1163 11B2 # 퍏 => 퍏 +D350 1111 1163 11B3 # 퍐 => 퍐 +D351 1111 1163 11B4 # 퍑 => 퍑 +D352 1111 1163 11B5 # 퍒 => 퍒 +D353 1111 1163 11B6 # 퍓 => 퍓 +D354 1111 1163 11B7 # 퍔 => 퍔 +D355 1111 1163 11B8 # 퍕 => 퍕 +D356 1111 1163 11B9 # 퍖 => 퍖 +D357 1111 1163 11BA # 퍗 => 퍗 +D358 1111 1163 11BB # 퍘 => 퍘 +D359 1111 1163 11BC # 퍙 => 퍙 +D35A 1111 1163 11BD # 퍚 => 퍚 +D35B 1111 1163 11BE # 퍛 => 퍛 +D35C 1111 1163 11BF # 퍜 => 퍜 +D35D 1111 1163 11C0 # 퍝 => 퍝 +D35E 1111 1163 11C1 # 퍞 => 퍞 +D35F 1111 1163 11C2 # 퍟 => 퍟 +D360 1111 1164 # 퍠 => 퍠 +D361 1111 1164 11A8 # 퍡 => 퍡 +D362 1111 1164 11A9 # 퍢 => 퍢 +D363 1111 1164 11AA # 퍣 => 퍣 +D364 1111 1164 11AB # 퍤 => 퍤 +D365 1111 1164 11AC # 퍥 => 퍥 +D366 1111 1164 11AD # 퍦 => 퍦 +D367 1111 1164 11AE # 퍧 => 퍧 +D368 1111 1164 11AF # 퍨 => 퍨 +D369 1111 1164 11B0 # 퍩 => 퍩 +D36A 1111 1164 11B1 # 퍪 => 퍪 +D36B 1111 1164 11B2 # 퍫 => 퍫 +D36C 1111 1164 11B3 # 퍬 => 퍬 +D36D 1111 1164 11B4 # 퍭 => 퍭 +D36E 1111 1164 11B5 # 퍮 => 퍮 +D36F 1111 1164 11B6 # 퍯 => 퍯 +D370 1111 1164 11B7 # 퍰 => 퍰 +D371 1111 1164 11B8 # 퍱 => 퍱 +D372 1111 1164 11B9 # 퍲 => 퍲 +D373 1111 1164 11BA # 퍳 => 퍳 +D374 1111 1164 11BB # 퍴 => 퍴 +D375 1111 1164 11BC # 퍵 => 퍵 +D376 1111 1164 11BD # 퍶 => 퍶 +D377 1111 1164 11BE # 퍷 => 퍷 +D378 1111 1164 11BF # 퍸 => 퍸 +D379 1111 1164 11C0 # 퍹 => 퍹 +D37A 1111 1164 11C1 # 퍺 => 퍺 +D37B 1111 1164 11C2 # 퍻 => 퍻 +D37C 1111 1165 # 퍼 => 퍼 +D37D 1111 1165 11A8 # 퍽 => 퍽 +D37E 1111 1165 11A9 # 퍾 => 퍾 +D37F 1111 1165 11AA # 퍿 => 퍿 +D380 1111 1165 11AB # 펀 => 펀 +D381 1111 1165 11AC # 펁 => 펁 +D382 1111 1165 11AD # 펂 => 펂 +D383 1111 1165 11AE # 펃 => 펃 +D384 1111 1165 11AF # 펄 => 펄 +D385 1111 1165 11B0 # 펅 => 펅 +D386 1111 1165 11B1 # 펆 => 펆 +D387 1111 1165 11B2 # 펇 => 펇 +D388 1111 1165 11B3 # 펈 => 펈 +D389 1111 1165 11B4 # 펉 => 펉 +D38A 1111 1165 11B5 # 펊 => 펊 +D38B 1111 1165 11B6 # 펋 => 펋 +D38C 1111 1165 11B7 # 펌 => 펌 +D38D 1111 1165 11B8 # 펍 => 펍 +D38E 1111 1165 11B9 # 펎 => 펎 +D38F 1111 1165 11BA # 펏 => 펏 +D390 1111 1165 11BB # 펐 => 펐 +D391 1111 1165 11BC # 펑 => 펑 +D392 1111 1165 11BD # 펒 => 펒 +D393 1111 1165 11BE # 펓 => 펓 +D394 1111 1165 11BF # 펔 => 펔 +D395 1111 1165 11C0 # 펕 => 펕 +D396 1111 1165 11C1 # 펖 => 펖 +D397 1111 1165 11C2 # 펗 => 펗 +D398 1111 1166 # 페 => 페 +D399 1111 1166 11A8 # 펙 => 펙 +D39A 1111 1166 11A9 # 펚 => 펚 +D39B 1111 1166 11AA # 펛 => 펛 +D39C 1111 1166 11AB # 펜 => 펜 +D39D 1111 1166 11AC # 펝 => 펝 +D39E 1111 1166 11AD # 펞 => 펞 +D39F 1111 1166 11AE # 펟 => 펟 +D3A0 1111 1166 11AF # 펠 => 펠 +D3A1 1111 1166 11B0 # 펡 => 펡 +D3A2 1111 1166 11B1 # 펢 => 펢 +D3A3 1111 1166 11B2 # 펣 => 펣 +D3A4 1111 1166 11B3 # 펤 => 펤 +D3A5 1111 1166 11B4 # 펥 => 펥 +D3A6 1111 1166 11B5 # 펦 => 펦 +D3A7 1111 1166 11B6 # 펧 => 펧 +D3A8 1111 1166 11B7 # 펨 => 펨 +D3A9 1111 1166 11B8 # 펩 => 펩 +D3AA 1111 1166 11B9 # 펪 => 펪 +D3AB 1111 1166 11BA # 펫 => 펫 +D3AC 1111 1166 11BB # 펬 => 펬 +D3AD 1111 1166 11BC # 펭 => 펭 +D3AE 1111 1166 11BD # 펮 => 펮 +D3AF 1111 1166 11BE # 펯 => 펯 +D3B0 1111 1166 11BF # 펰 => 펰 +D3B1 1111 1166 11C0 # 펱 => 펱 +D3B2 1111 1166 11C1 # 펲 => 펲 +D3B3 1111 1166 11C2 # 펳 => 펳 +D3B4 1111 1167 # 펴 => 펴 +D3B5 1111 1167 11A8 # 펵 => 펵 +D3B6 1111 1167 11A9 # 펶 => 펶 +D3B7 1111 1167 11AA # 펷 => 펷 +D3B8 1111 1167 11AB # 편 => 편 +D3B9 1111 1167 11AC # 펹 => 펹 +D3BA 1111 1167 11AD # 펺 => 펺 +D3BB 1111 1167 11AE # 펻 => 펻 +D3BC 1111 1167 11AF # 펼 => 펼 +D3BD 1111 1167 11B0 # 펽 => 펽 +D3BE 1111 1167 11B1 # 펾 => 펾 +D3BF 1111 1167 11B2 # 펿 => 펿 +D3C0 1111 1167 11B3 # 폀 => 폀 +D3C1 1111 1167 11B4 # 폁 => 폁 +D3C2 1111 1167 11B5 # 폂 => 폂 +D3C3 1111 1167 11B6 # 폃 => 폃 +D3C4 1111 1167 11B7 # 폄 => 폄 +D3C5 1111 1167 11B8 # 폅 => 폅 +D3C6 1111 1167 11B9 # 폆 => 폆 +D3C7 1111 1167 11BA # 폇 => 폇 +D3C8 1111 1167 11BB # 폈 => 폈 +D3C9 1111 1167 11BC # 평 => 평 +D3CA 1111 1167 11BD # 폊 => 폊 +D3CB 1111 1167 11BE # 폋 => 폋 +D3CC 1111 1167 11BF # 폌 => 폌 +D3CD 1111 1167 11C0 # 폍 => 폍 +D3CE 1111 1167 11C1 # 폎 => 폎 +D3CF 1111 1167 11C2 # 폏 => 폏 +D3D0 1111 1168 # 폐 => 폐 +D3D1 1111 1168 11A8 # 폑 => 폑 +D3D2 1111 1168 11A9 # 폒 => 폒 +D3D3 1111 1168 11AA # 폓 => 폓 +D3D4 1111 1168 11AB # 폔 => 폔 +D3D5 1111 1168 11AC # 폕 => 폕 +D3D6 1111 1168 11AD # 폖 => 폖 +D3D7 1111 1168 11AE # 폗 => 폗 +D3D8 1111 1168 11AF # 폘 => 폘 +D3D9 1111 1168 11B0 # 폙 => 폙 +D3DA 1111 1168 11B1 # 폚 => 폚 +D3DB 1111 1168 11B2 # 폛 => 폛 +D3DC 1111 1168 11B3 # 폜 => 폜 +D3DD 1111 1168 11B4 # 폝 => 폝 +D3DE 1111 1168 11B5 # 폞 => 폞 +D3DF 1111 1168 11B6 # 폟 => 폟 +D3E0 1111 1168 11B7 # 폠 => 폠 +D3E1 1111 1168 11B8 # 폡 => 폡 +D3E2 1111 1168 11B9 # 폢 => 폢 +D3E3 1111 1168 11BA # 폣 => 폣 +D3E4 1111 1168 11BB # 폤 => 폤 +D3E5 1111 1168 11BC # 폥 => 폥 +D3E6 1111 1168 11BD # 폦 => 폦 +D3E7 1111 1168 11BE # 폧 => 폧 +D3E8 1111 1168 11BF # 폨 => 폨 +D3E9 1111 1168 11C0 # 폩 => 폩 +D3EA 1111 1168 11C1 # 폪 => 폪 +D3EB 1111 1168 11C2 # 폫 => 폫 +D3EC 1111 1169 # 포 => 포 +D3ED 1111 1169 11A8 # 폭 => 폭 +D3EE 1111 1169 11A9 # 폮 => 폮 +D3EF 1111 1169 11AA # 폯 => 폯 +D3F0 1111 1169 11AB # 폰 => 폰 +D3F1 1111 1169 11AC # 폱 => 폱 +D3F2 1111 1169 11AD # 폲 => 폲 +D3F3 1111 1169 11AE # 폳 => 폳 +D3F4 1111 1169 11AF # 폴 => 폴 +D3F5 1111 1169 11B0 # 폵 => 폵 +D3F6 1111 1169 11B1 # 폶 => 폶 +D3F7 1111 1169 11B2 # 폷 => 폷 +D3F8 1111 1169 11B3 # 폸 => 폸 +D3F9 1111 1169 11B4 # 폹 => 폹 +D3FA 1111 1169 11B5 # 폺 => 폺 +D3FB 1111 1169 11B6 # 폻 => 폻 +D3FC 1111 1169 11B7 # 폼 => 폼 +D3FD 1111 1169 11B8 # 폽 => 폽 +D3FE 1111 1169 11B9 # 폾 => 폾 +D3FF 1111 1169 11BA # 폿 => 폿 +D400 1111 1169 11BB # 퐀 => 퐀 +D401 1111 1169 11BC # 퐁 => 퐁 +D402 1111 1169 11BD # 퐂 => 퐂 +D403 1111 1169 11BE # 퐃 => 퐃 +D404 1111 1169 11BF # 퐄 => 퐄 +D405 1111 1169 11C0 # 퐅 => 퐅 +D406 1111 1169 11C1 # 퐆 => 퐆 +D407 1111 1169 11C2 # 퐇 => 퐇 +D408 1111 116A # 퐈 => 퐈 +D409 1111 116A 11A8 # 퐉 => 퐉 +D40A 1111 116A 11A9 # 퐊 => 퐊 +D40B 1111 116A 11AA # 퐋 => 퐋 +D40C 1111 116A 11AB # 퐌 => 퐌 +D40D 1111 116A 11AC # 퐍 => 퐍 +D40E 1111 116A 11AD # 퐎 => 퐎 +D40F 1111 116A 11AE # 퐏 => 퐏 +D410 1111 116A 11AF # 퐐 => 퐐 +D411 1111 116A 11B0 # 퐑 => 퐑 +D412 1111 116A 11B1 # 퐒 => 퐒 +D413 1111 116A 11B2 # 퐓 => 퐓 +D414 1111 116A 11B3 # 퐔 => 퐔 +D415 1111 116A 11B4 # 퐕 => 퐕 +D416 1111 116A 11B5 # 퐖 => 퐖 +D417 1111 116A 11B6 # 퐗 => 퐗 +D418 1111 116A 11B7 # 퐘 => 퐘 +D419 1111 116A 11B8 # 퐙 => 퐙 +D41A 1111 116A 11B9 # 퐚 => 퐚 +D41B 1111 116A 11BA # 퐛 => 퐛 +D41C 1111 116A 11BB # 퐜 => 퐜 +D41D 1111 116A 11BC # 퐝 => 퐝 +D41E 1111 116A 11BD # 퐞 => 퐞 +D41F 1111 116A 11BE # 퐟 => 퐟 +D420 1111 116A 11BF # 퐠 => 퐠 +D421 1111 116A 11C0 # 퐡 => 퐡 +D422 1111 116A 11C1 # 퐢 => 퐢 +D423 1111 116A 11C2 # 퐣 => 퐣 +D424 1111 116B # 퐤 => 퐤 +D425 1111 116B 11A8 # 퐥 => 퐥 +D426 1111 116B 11A9 # 퐦 => 퐦 +D427 1111 116B 11AA # 퐧 => 퐧 +D428 1111 116B 11AB # 퐨 => 퐨 +D429 1111 116B 11AC # 퐩 => 퐩 +D42A 1111 116B 11AD # 퐪 => 퐪 +D42B 1111 116B 11AE # 퐫 => 퐫 +D42C 1111 116B 11AF # 퐬 => 퐬 +D42D 1111 116B 11B0 # 퐭 => 퐭 +D42E 1111 116B 11B1 # 퐮 => 퐮 +D42F 1111 116B 11B2 # 퐯 => 퐯 +D430 1111 116B 11B3 # 퐰 => 퐰 +D431 1111 116B 11B4 # 퐱 => 퐱 +D432 1111 116B 11B5 # 퐲 => 퐲 +D433 1111 116B 11B6 # 퐳 => 퐳 +D434 1111 116B 11B7 # 퐴 => 퐴 +D435 1111 116B 11B8 # 퐵 => 퐵 +D436 1111 116B 11B9 # 퐶 => 퐶 +D437 1111 116B 11BA # 퐷 => 퐷 +D438 1111 116B 11BB # 퐸 => 퐸 +D439 1111 116B 11BC # 퐹 => 퐹 +D43A 1111 116B 11BD # 퐺 => 퐺 +D43B 1111 116B 11BE # 퐻 => 퐻 +D43C 1111 116B 11BF # 퐼 => 퐼 +D43D 1111 116B 11C0 # 퐽 => 퐽 +D43E 1111 116B 11C1 # 퐾 => 퐾 +D43F 1111 116B 11C2 # 퐿 => 퐿 +D440 1111 116C # 푀 => 푀 +D441 1111 116C 11A8 # 푁 => 푁 +D442 1111 116C 11A9 # 푂 => 푂 +D443 1111 116C 11AA # 푃 => 푃 +D444 1111 116C 11AB # 푄 => 푄 +D445 1111 116C 11AC # 푅 => 푅 +D446 1111 116C 11AD # 푆 => 푆 +D447 1111 116C 11AE # 푇 => 푇 +D448 1111 116C 11AF # 푈 => 푈 +D449 1111 116C 11B0 # 푉 => 푉 +D44A 1111 116C 11B1 # 푊 => 푊 +D44B 1111 116C 11B2 # 푋 => 푋 +D44C 1111 116C 11B3 # 푌 => 푌 +D44D 1111 116C 11B4 # 푍 => 푍 +D44E 1111 116C 11B5 # 푎 => 푎 +D44F 1111 116C 11B6 # 푏 => 푏 +D450 1111 116C 11B7 # 푐 => 푐 +D451 1111 116C 11B8 # 푑 => 푑 +D452 1111 116C 11B9 # 푒 => 푒 +D453 1111 116C 11BA # 푓 => 푓 +D454 1111 116C 11BB # 푔 => 푔 +D455 1111 116C 11BC # 푕 => 푕 +D456 1111 116C 11BD # 푖 => 푖 +D457 1111 116C 11BE # 푗 => 푗 +D458 1111 116C 11BF # 푘 => 푘 +D459 1111 116C 11C0 # 푙 => 푙 +D45A 1111 116C 11C1 # 푚 => 푚 +D45B 1111 116C 11C2 # 푛 => 푛 +D45C 1111 116D # 표 => 표 +D45D 1111 116D 11A8 # 푝 => 푝 +D45E 1111 116D 11A9 # 푞 => 푞 +D45F 1111 116D 11AA # 푟 => 푟 +D460 1111 116D 11AB # 푠 => 푠 +D461 1111 116D 11AC # 푡 => 푡 +D462 1111 116D 11AD # 푢 => 푢 +D463 1111 116D 11AE # 푣 => 푣 +D464 1111 116D 11AF # 푤 => 푤 +D465 1111 116D 11B0 # 푥 => 푥 +D466 1111 116D 11B1 # 푦 => 푦 +D467 1111 116D 11B2 # 푧 => 푧 +D468 1111 116D 11B3 # 푨 => 푨 +D469 1111 116D 11B4 # 푩 => 푩 +D46A 1111 116D 11B5 # 푪 => 푪 +D46B 1111 116D 11B6 # 푫 => 푫 +D46C 1111 116D 11B7 # 푬 => 푬 +D46D 1111 116D 11B8 # 푭 => 푭 +D46E 1111 116D 11B9 # 푮 => 푮 +D46F 1111 116D 11BA # 푯 => 푯 +D470 1111 116D 11BB # 푰 => 푰 +D471 1111 116D 11BC # 푱 => 푱 +D472 1111 116D 11BD # 푲 => 푲 +D473 1111 116D 11BE # 푳 => 푳 +D474 1111 116D 11BF # 푴 => 푴 +D475 1111 116D 11C0 # 푵 => 푵 +D476 1111 116D 11C1 # 푶 => 푶 +D477 1111 116D 11C2 # 푷 => 푷 +D478 1111 116E # 푸 => 푸 +D479 1111 116E 11A8 # 푹 => 푹 +D47A 1111 116E 11A9 # 푺 => 푺 +D47B 1111 116E 11AA # 푻 => 푻 +D47C 1111 116E 11AB # 푼 => 푼 +D47D 1111 116E 11AC # 푽 => 푽 +D47E 1111 116E 11AD # 푾 => 푾 +D47F 1111 116E 11AE # 푿 => 푿 +D480 1111 116E 11AF # 풀 => 풀 +D481 1111 116E 11B0 # 풁 => 풁 +D482 1111 116E 11B1 # 풂 => 풂 +D483 1111 116E 11B2 # 풃 => 풃 +D484 1111 116E 11B3 # 풄 => 풄 +D485 1111 116E 11B4 # 풅 => 풅 +D486 1111 116E 11B5 # 풆 => 풆 +D487 1111 116E 11B6 # 풇 => 풇 +D488 1111 116E 11B7 # 품 => 품 +D489 1111 116E 11B8 # 풉 => 풉 +D48A 1111 116E 11B9 # 풊 => 풊 +D48B 1111 116E 11BA # 풋 => 풋 +D48C 1111 116E 11BB # 풌 => 풌 +D48D 1111 116E 11BC # 풍 => 풍 +D48E 1111 116E 11BD # 풎 => 풎 +D48F 1111 116E 11BE # 풏 => 풏 +D490 1111 116E 11BF # 풐 => 풐 +D491 1111 116E 11C0 # 풑 => 풑 +D492 1111 116E 11C1 # 풒 => 풒 +D493 1111 116E 11C2 # 풓 => 풓 +D494 1111 116F # 풔 => 풔 +D495 1111 116F 11A8 # 풕 => 풕 +D496 1111 116F 11A9 # 풖 => 풖 +D497 1111 116F 11AA # 풗 => 풗 +D498 1111 116F 11AB # 풘 => 풘 +D499 1111 116F 11AC # 풙 => 풙 +D49A 1111 116F 11AD # 풚 => 풚 +D49B 1111 116F 11AE # 풛 => 풛 +D49C 1111 116F 11AF # 풜 => 풜 +D49D 1111 116F 11B0 # 풝 => 풝 +D49E 1111 116F 11B1 # 풞 => 풞 +D49F 1111 116F 11B2 # 풟 => 풟 +D4A0 1111 116F 11B3 # 풠 => 풠 +D4A1 1111 116F 11B4 # 풡 => 풡 +D4A2 1111 116F 11B5 # 풢 => 풢 +D4A3 1111 116F 11B6 # 풣 => 풣 +D4A4 1111 116F 11B7 # 풤 => 풤 +D4A5 1111 116F 11B8 # 풥 => 풥 +D4A6 1111 116F 11B9 # 풦 => 풦 +D4A7 1111 116F 11BA # 풧 => 풧 +D4A8 1111 116F 11BB # 풨 => 풨 +D4A9 1111 116F 11BC # 풩 => 풩 +D4AA 1111 116F 11BD # 풪 => 풪 +D4AB 1111 116F 11BE # 풫 => 풫 +D4AC 1111 116F 11BF # 풬 => 풬 +D4AD 1111 116F 11C0 # 풭 => 풭 +D4AE 1111 116F 11C1 # 풮 => 풮 +D4AF 1111 116F 11C2 # 풯 => 풯 +D4B0 1111 1170 # 풰 => 풰 +D4B1 1111 1170 11A8 # 풱 => 풱 +D4B2 1111 1170 11A9 # 풲 => 풲 +D4B3 1111 1170 11AA # 풳 => 풳 +D4B4 1111 1170 11AB # 풴 => 풴 +D4B5 1111 1170 11AC # 풵 => 풵 +D4B6 1111 1170 11AD # 풶 => 풶 +D4B7 1111 1170 11AE # 풷 => 풷 +D4B8 1111 1170 11AF # 풸 => 풸 +D4B9 1111 1170 11B0 # 풹 => 풹 +D4BA 1111 1170 11B1 # 풺 => 풺 +D4BB 1111 1170 11B2 # 풻 => 풻 +D4BC 1111 1170 11B3 # 풼 => 풼 +D4BD 1111 1170 11B4 # 풽 => 풽 +D4BE 1111 1170 11B5 # 풾 => 풾 +D4BF 1111 1170 11B6 # 풿 => 풿 +D4C0 1111 1170 11B7 # 퓀 => 퓀 +D4C1 1111 1170 11B8 # 퓁 => 퓁 +D4C2 1111 1170 11B9 # 퓂 => 퓂 +D4C3 1111 1170 11BA # 퓃 => 퓃 +D4C4 1111 1170 11BB # 퓄 => 퓄 +D4C5 1111 1170 11BC # 퓅 => 퓅 +D4C6 1111 1170 11BD # 퓆 => 퓆 +D4C7 1111 1170 11BE # 퓇 => 퓇 +D4C8 1111 1170 11BF # 퓈 => 퓈 +D4C9 1111 1170 11C0 # 퓉 => 퓉 +D4CA 1111 1170 11C1 # 퓊 => 퓊 +D4CB 1111 1170 11C2 # 퓋 => 퓋 +D4CC 1111 1171 # 퓌 => 퓌 +D4CD 1111 1171 11A8 # 퓍 => 퓍 +D4CE 1111 1171 11A9 # 퓎 => 퓎 +D4CF 1111 1171 11AA # 퓏 => 퓏 +D4D0 1111 1171 11AB # 퓐 => 퓐 +D4D1 1111 1171 11AC # 퓑 => 퓑 +D4D2 1111 1171 11AD # 퓒 => 퓒 +D4D3 1111 1171 11AE # 퓓 => 퓓 +D4D4 1111 1171 11AF # 퓔 => 퓔 +D4D5 1111 1171 11B0 # 퓕 => 퓕 +D4D6 1111 1171 11B1 # 퓖 => 퓖 +D4D7 1111 1171 11B2 # 퓗 => 퓗 +D4D8 1111 1171 11B3 # 퓘 => 퓘 +D4D9 1111 1171 11B4 # 퓙 => 퓙 +D4DA 1111 1171 11B5 # 퓚 => 퓚 +D4DB 1111 1171 11B6 # 퓛 => 퓛 +D4DC 1111 1171 11B7 # 퓜 => 퓜 +D4DD 1111 1171 11B8 # 퓝 => 퓝 +D4DE 1111 1171 11B9 # 퓞 => 퓞 +D4DF 1111 1171 11BA # 퓟 => 퓟 +D4E0 1111 1171 11BB # 퓠 => 퓠 +D4E1 1111 1171 11BC # 퓡 => 퓡 +D4E2 1111 1171 11BD # 퓢 => 퓢 +D4E3 1111 1171 11BE # 퓣 => 퓣 +D4E4 1111 1171 11BF # 퓤 => 퓤 +D4E5 1111 1171 11C0 # 퓥 => 퓥 +D4E6 1111 1171 11C1 # 퓦 => 퓦 +D4E7 1111 1171 11C2 # 퓧 => 퓧 +D4E8 1111 1172 # 퓨 => 퓨 +D4E9 1111 1172 11A8 # 퓩 => 퓩 +D4EA 1111 1172 11A9 # 퓪 => 퓪 +D4EB 1111 1172 11AA # 퓫 => 퓫 +D4EC 1111 1172 11AB # 퓬 => 퓬 +D4ED 1111 1172 11AC # 퓭 => 퓭 +D4EE 1111 1172 11AD # 퓮 => 퓮 +D4EF 1111 1172 11AE # 퓯 => 퓯 +D4F0 1111 1172 11AF # 퓰 => 퓰 +D4F1 1111 1172 11B0 # 퓱 => 퓱 +D4F2 1111 1172 11B1 # 퓲 => 퓲 +D4F3 1111 1172 11B2 # 퓳 => 퓳 +D4F4 1111 1172 11B3 # 퓴 => 퓴 +D4F5 1111 1172 11B4 # 퓵 => 퓵 +D4F6 1111 1172 11B5 # 퓶 => 퓶 +D4F7 1111 1172 11B6 # 퓷 => 퓷 +D4F8 1111 1172 11B7 # 퓸 => 퓸 +D4F9 1111 1172 11B8 # 퓹 => 퓹 +D4FA 1111 1172 11B9 # 퓺 => 퓺 +D4FB 1111 1172 11BA # 퓻 => 퓻 +D4FC 1111 1172 11BB # 퓼 => 퓼 +D4FD 1111 1172 11BC # 퓽 => 퓽 +D4FE 1111 1172 11BD # 퓾 => 퓾 +D4FF 1111 1172 11BE # 퓿 => 퓿 +D500 1111 1172 11BF # 픀 => 픀 +D501 1111 1172 11C0 # 픁 => 픁 +D502 1111 1172 11C1 # 픂 => 픂 +D503 1111 1172 11C2 # 픃 => 픃 +D504 1111 1173 # 프 => 프 +D505 1111 1173 11A8 # 픅 => 픅 +D506 1111 1173 11A9 # 픆 => 픆 +D507 1111 1173 11AA # 픇 => 픇 +D508 1111 1173 11AB # 픈 => 픈 +D509 1111 1173 11AC # 픉 => 픉 +D50A 1111 1173 11AD # 픊 => 픊 +D50B 1111 1173 11AE # 픋 => 픋 +D50C 1111 1173 11AF # 플 => 플 +D50D 1111 1173 11B0 # 픍 => 픍 +D50E 1111 1173 11B1 # 픎 => 픎 +D50F 1111 1173 11B2 # 픏 => 픏 +D510 1111 1173 11B3 # 픐 => 픐 +D511 1111 1173 11B4 # 픑 => 픑 +D512 1111 1173 11B5 # 픒 => 픒 +D513 1111 1173 11B6 # 픓 => 픓 +D514 1111 1173 11B7 # 픔 => 픔 +D515 1111 1173 11B8 # 픕 => 픕 +D516 1111 1173 11B9 # 픖 => 픖 +D517 1111 1173 11BA # 픗 => 픗 +D518 1111 1173 11BB # 픘 => 픘 +D519 1111 1173 11BC # 픙 => 픙 +D51A 1111 1173 11BD # 픚 => 픚 +D51B 1111 1173 11BE # 픛 => 픛 +D51C 1111 1173 11BF # 픜 => 픜 +D51D 1111 1173 11C0 # 픝 => 픝 +D51E 1111 1173 11C1 # 픞 => 픞 +D51F 1111 1173 11C2 # 픟 => 픟 +D520 1111 1174 # 픠 => 픠 +D521 1111 1174 11A8 # 픡 => 픡 +D522 1111 1174 11A9 # 픢 => 픢 +D523 1111 1174 11AA # 픣 => 픣 +D524 1111 1174 11AB # 픤 => 픤 +D525 1111 1174 11AC # 픥 => 픥 +D526 1111 1174 11AD # 픦 => 픦 +D527 1111 1174 11AE # 픧 => 픧 +D528 1111 1174 11AF # 픨 => 픨 +D529 1111 1174 11B0 # 픩 => 픩 +D52A 1111 1174 11B1 # 픪 => 픪 +D52B 1111 1174 11B2 # 픫 => 픫 +D52C 1111 1174 11B3 # 픬 => 픬 +D52D 1111 1174 11B4 # 픭 => 픭 +D52E 1111 1174 11B5 # 픮 => 픮 +D52F 1111 1174 11B6 # 픯 => 픯 +D530 1111 1174 11B7 # 픰 => 픰 +D531 1111 1174 11B8 # 픱 => 픱 +D532 1111 1174 11B9 # 픲 => 픲 +D533 1111 1174 11BA # 픳 => 픳 +D534 1111 1174 11BB # 픴 => 픴 +D535 1111 1174 11BC # 픵 => 픵 +D536 1111 1174 11BD # 픶 => 픶 +D537 1111 1174 11BE # 픷 => 픷 +D538 1111 1174 11BF # 픸 => 픸 +D539 1111 1174 11C0 # 픹 => 픹 +D53A 1111 1174 11C1 # 픺 => 픺 +D53B 1111 1174 11C2 # 픻 => 픻 +D53C 1111 1175 # 피 => 피 +D53D 1111 1175 11A8 # 픽 => 픽 +D53E 1111 1175 11A9 # 픾 => 픾 +D53F 1111 1175 11AA # 픿 => 픿 +D540 1111 1175 11AB # 핀 => 핀 +D541 1111 1175 11AC # 핁 => 핁 +D542 1111 1175 11AD # 핂 => 핂 +D543 1111 1175 11AE # 핃 => 핃 +D544 1111 1175 11AF # 필 => 필 +D545 1111 1175 11B0 # 핅 => 핅 +D546 1111 1175 11B1 # 핆 => 핆 +D547 1111 1175 11B2 # 핇 => 핇 +D548 1111 1175 11B3 # 핈 => 핈 +D549 1111 1175 11B4 # 핉 => 핉 +D54A 1111 1175 11B5 # 핊 => 핊 +D54B 1111 1175 11B6 # 핋 => 핋 +D54C 1111 1175 11B7 # 핌 => 핌 +D54D 1111 1175 11B8 # 핍 => 핍 +D54E 1111 1175 11B9 # 핎 => 핎 +D54F 1111 1175 11BA # 핏 => 핏 +D550 1111 1175 11BB # 핐 => 핐 +D551 1111 1175 11BC # 핑 => 핑 +D552 1111 1175 11BD # 핒 => 핒 +D553 1111 1175 11BE # 핓 => 핓 +D554 1111 1175 11BF # 핔 => 핔 +D555 1111 1175 11C0 # 핕 => 핕 +D556 1111 1175 11C1 # 핖 => 핖 +D557 1111 1175 11C2 # 핗 => 핗 +D558 1112 1161 # 하 => 하 +D559 1112 1161 11A8 # 학 => 학 +D55A 1112 1161 11A9 # 핚 => 핚 +D55B 1112 1161 11AA # 핛 => 핛 +D55C 1112 1161 11AB # 한 => 한 +D55D 1112 1161 11AC # 핝 => 핝 +D55E 1112 1161 11AD # 핞 => 핞 +D55F 1112 1161 11AE # 핟 => 핟 +D560 1112 1161 11AF # 할 => 할 +D561 1112 1161 11B0 # 핡 => 핡 +D562 1112 1161 11B1 # 핢 => 핢 +D563 1112 1161 11B2 # 핣 => 핣 +D564 1112 1161 11B3 # 핤 => 핤 +D565 1112 1161 11B4 # 핥 => 핥 +D566 1112 1161 11B5 # 핦 => 핦 +D567 1112 1161 11B6 # 핧 => 핧 +D568 1112 1161 11B7 # 함 => 함 +D569 1112 1161 11B8 # 합 => 합 +D56A 1112 1161 11B9 # 핪 => 핪 +D56B 1112 1161 11BA # 핫 => 핫 +D56C 1112 1161 11BB # 핬 => 핬 +D56D 1112 1161 11BC # 항 => 항 +D56E 1112 1161 11BD # 핮 => 핮 +D56F 1112 1161 11BE # 핯 => 핯 +D570 1112 1161 11BF # 핰 => 핰 +D571 1112 1161 11C0 # 핱 => 핱 +D572 1112 1161 11C1 # 핲 => 핲 +D573 1112 1161 11C2 # 핳 => 핳 +D574 1112 1162 # 해 => 해 +D575 1112 1162 11A8 # 핵 => 핵 +D576 1112 1162 11A9 # 핶 => 핶 +D577 1112 1162 11AA # 핷 => 핷 +D578 1112 1162 11AB # 핸 => 핸 +D579 1112 1162 11AC # 핹 => 핹 +D57A 1112 1162 11AD # 핺 => 핺 +D57B 1112 1162 11AE # 핻 => 핻 +D57C 1112 1162 11AF # 핼 => 핼 +D57D 1112 1162 11B0 # 핽 => 핽 +D57E 1112 1162 11B1 # 핾 => 핾 +D57F 1112 1162 11B2 # 핿 => 핿 +D580 1112 1162 11B3 # 햀 => 햀 +D581 1112 1162 11B4 # 햁 => 햁 +D582 1112 1162 11B5 # 햂 => 햂 +D583 1112 1162 11B6 # 햃 => 햃 +D584 1112 1162 11B7 # 햄 => 햄 +D585 1112 1162 11B8 # 햅 => 햅 +D586 1112 1162 11B9 # 햆 => 햆 +D587 1112 1162 11BA # 햇 => 햇 +D588 1112 1162 11BB # 했 => 했 +D589 1112 1162 11BC # 행 => 행 +D58A 1112 1162 11BD # 햊 => 햊 +D58B 1112 1162 11BE # 햋 => 햋 +D58C 1112 1162 11BF # 햌 => 햌 +D58D 1112 1162 11C0 # 햍 => 햍 +D58E 1112 1162 11C1 # 햎 => 햎 +D58F 1112 1162 11C2 # 햏 => 햏 +D590 1112 1163 # 햐 => 햐 +D591 1112 1163 11A8 # 햑 => 햑 +D592 1112 1163 11A9 # 햒 => 햒 +D593 1112 1163 11AA # 햓 => 햓 +D594 1112 1163 11AB # 햔 => 햔 +D595 1112 1163 11AC # 햕 => 햕 +D596 1112 1163 11AD # 햖 => 햖 +D597 1112 1163 11AE # 햗 => 햗 +D598 1112 1163 11AF # 햘 => 햘 +D599 1112 1163 11B0 # 햙 => 햙 +D59A 1112 1163 11B1 # 햚 => 햚 +D59B 1112 1163 11B2 # 햛 => 햛 +D59C 1112 1163 11B3 # 햜 => 햜 +D59D 1112 1163 11B4 # 햝 => 햝 +D59E 1112 1163 11B5 # 햞 => 햞 +D59F 1112 1163 11B6 # 햟 => 햟 +D5A0 1112 1163 11B7 # 햠 => 햠 +D5A1 1112 1163 11B8 # 햡 => 햡 +D5A2 1112 1163 11B9 # 햢 => 햢 +D5A3 1112 1163 11BA # 햣 => 햣 +D5A4 1112 1163 11BB # 햤 => 햤 +D5A5 1112 1163 11BC # 향 => 향 +D5A6 1112 1163 11BD # 햦 => 햦 +D5A7 1112 1163 11BE # 햧 => 햧 +D5A8 1112 1163 11BF # 햨 => 햨 +D5A9 1112 1163 11C0 # 햩 => 햩 +D5AA 1112 1163 11C1 # 햪 => 햪 +D5AB 1112 1163 11C2 # 햫 => 햫 +D5AC 1112 1164 # 햬 => 햬 +D5AD 1112 1164 11A8 # 햭 => 햭 +D5AE 1112 1164 11A9 # 햮 => 햮 +D5AF 1112 1164 11AA # 햯 => 햯 +D5B0 1112 1164 11AB # 햰 => 햰 +D5B1 1112 1164 11AC # 햱 => 햱 +D5B2 1112 1164 11AD # 햲 => 햲 +D5B3 1112 1164 11AE # 햳 => 햳 +D5B4 1112 1164 11AF # 햴 => 햴 +D5B5 1112 1164 11B0 # 햵 => 햵 +D5B6 1112 1164 11B1 # 햶 => 햶 +D5B7 1112 1164 11B2 # 햷 => 햷 +D5B8 1112 1164 11B3 # 햸 => 햸 +D5B9 1112 1164 11B4 # 햹 => 햹 +D5BA 1112 1164 11B5 # 햺 => 햺 +D5BB 1112 1164 11B6 # 햻 => 햻 +D5BC 1112 1164 11B7 # 햼 => 햼 +D5BD 1112 1164 11B8 # 햽 => 햽 +D5BE 1112 1164 11B9 # 햾 => 햾 +D5BF 1112 1164 11BA # 햿 => 햿 +D5C0 1112 1164 11BB # 헀 => 헀 +D5C1 1112 1164 11BC # 헁 => 헁 +D5C2 1112 1164 11BD # 헂 => 헂 +D5C3 1112 1164 11BE # 헃 => 헃 +D5C4 1112 1164 11BF # 헄 => 헄 +D5C5 1112 1164 11C0 # 헅 => 헅 +D5C6 1112 1164 11C1 # 헆 => 헆 +D5C7 1112 1164 11C2 # 헇 => 헇 +D5C8 1112 1165 # 허 => 허 +D5C9 1112 1165 11A8 # 헉 => 헉 +D5CA 1112 1165 11A9 # 헊 => 헊 +D5CB 1112 1165 11AA # 헋 => 헋 +D5CC 1112 1165 11AB # 헌 => 헌 +D5CD 1112 1165 11AC # 헍 => 헍 +D5CE 1112 1165 11AD # 헎 => 헎 +D5CF 1112 1165 11AE # 헏 => 헏 +D5D0 1112 1165 11AF # 헐 => 헐 +D5D1 1112 1165 11B0 # 헑 => 헑 +D5D2 1112 1165 11B1 # 헒 => 헒 +D5D3 1112 1165 11B2 # 헓 => 헓 +D5D4 1112 1165 11B3 # 헔 => 헔 +D5D5 1112 1165 11B4 # 헕 => 헕 +D5D6 1112 1165 11B5 # 헖 => 헖 +D5D7 1112 1165 11B6 # 헗 => 헗 +D5D8 1112 1165 11B7 # 험 => 험 +D5D9 1112 1165 11B8 # 헙 => 헙 +D5DA 1112 1165 11B9 # 헚 => 헚 +D5DB 1112 1165 11BA # 헛 => 헛 +D5DC 1112 1165 11BB # 헜 => 헜 +D5DD 1112 1165 11BC # 헝 => 헝 +D5DE 1112 1165 11BD # 헞 => 헞 +D5DF 1112 1165 11BE # 헟 => 헟 +D5E0 1112 1165 11BF # 헠 => 헠 +D5E1 1112 1165 11C0 # 헡 => 헡 +D5E2 1112 1165 11C1 # 헢 => 헢 +D5E3 1112 1165 11C2 # 헣 => 헣 +D5E4 1112 1166 # 헤 => 헤 +D5E5 1112 1166 11A8 # 헥 => 헥 +D5E6 1112 1166 11A9 # 헦 => 헦 +D5E7 1112 1166 11AA # 헧 => 헧 +D5E8 1112 1166 11AB # 헨 => 헨 +D5E9 1112 1166 11AC # 헩 => 헩 +D5EA 1112 1166 11AD # 헪 => 헪 +D5EB 1112 1166 11AE # 헫 => 헫 +D5EC 1112 1166 11AF # 헬 => 헬 +D5ED 1112 1166 11B0 # 헭 => 헭 +D5EE 1112 1166 11B1 # 헮 => 헮 +D5EF 1112 1166 11B2 # 헯 => 헯 +D5F0 1112 1166 11B3 # 헰 => 헰 +D5F1 1112 1166 11B4 # 헱 => 헱 +D5F2 1112 1166 11B5 # 헲 => 헲 +D5F3 1112 1166 11B6 # 헳 => 헳 +D5F4 1112 1166 11B7 # 헴 => 헴 +D5F5 1112 1166 11B8 # 헵 => 헵 +D5F6 1112 1166 11B9 # 헶 => 헶 +D5F7 1112 1166 11BA # 헷 => 헷 +D5F8 1112 1166 11BB # 헸 => 헸 +D5F9 1112 1166 11BC # 헹 => 헹 +D5FA 1112 1166 11BD # 헺 => 헺 +D5FB 1112 1166 11BE # 헻 => 헻 +D5FC 1112 1166 11BF # 헼 => 헼 +D5FD 1112 1166 11C0 # 헽 => 헽 +D5FE 1112 1166 11C1 # 헾 => 헾 +D5FF 1112 1166 11C2 # 헿 => 헿 +D600 1112 1167 # 혀 => 혀 +D601 1112 1167 11A8 # 혁 => 혁 +D602 1112 1167 11A9 # 혂 => 혂 +D603 1112 1167 11AA # 혃 => 혃 +D604 1112 1167 11AB # 현 => 현 +D605 1112 1167 11AC # 혅 => 혅 +D606 1112 1167 11AD # 혆 => 혆 +D607 1112 1167 11AE # 혇 => 혇 +D608 1112 1167 11AF # 혈 => 혈 +D609 1112 1167 11B0 # 혉 => 혉 +D60A 1112 1167 11B1 # 혊 => 혊 +D60B 1112 1167 11B2 # 혋 => 혋 +D60C 1112 1167 11B3 # 혌 => 혌 +D60D 1112 1167 11B4 # 혍 => 혍 +D60E 1112 1167 11B5 # 혎 => 혎 +D60F 1112 1167 11B6 # 혏 => 혏 +D610 1112 1167 11B7 # 혐 => 혐 +D611 1112 1167 11B8 # 협 => 협 +D612 1112 1167 11B9 # 혒 => 혒 +D613 1112 1167 11BA # 혓 => 혓 +D614 1112 1167 11BB # 혔 => 혔 +D615 1112 1167 11BC # 형 => 형 +D616 1112 1167 11BD # 혖 => 혖 +D617 1112 1167 11BE # 혗 => 혗 +D618 1112 1167 11BF # 혘 => 혘 +D619 1112 1167 11C0 # 혙 => 혙 +D61A 1112 1167 11C1 # 혚 => 혚 +D61B 1112 1167 11C2 # 혛 => 혛 +D61C 1112 1168 # 혜 => 혜 +D61D 1112 1168 11A8 # 혝 => 혝 +D61E 1112 1168 11A9 # 혞 => 혞 +D61F 1112 1168 11AA # 혟 => 혟 +D620 1112 1168 11AB # 혠 => 혠 +D621 1112 1168 11AC # 혡 => 혡 +D622 1112 1168 11AD # 혢 => 혢 +D623 1112 1168 11AE # 혣 => 혣 +D624 1112 1168 11AF # 혤 => 혤 +D625 1112 1168 11B0 # 혥 => 혥 +D626 1112 1168 11B1 # 혦 => 혦 +D627 1112 1168 11B2 # 혧 => 혧 +D628 1112 1168 11B3 # 혨 => 혨 +D629 1112 1168 11B4 # 혩 => 혩 +D62A 1112 1168 11B5 # 혪 => 혪 +D62B 1112 1168 11B6 # 혫 => 혫 +D62C 1112 1168 11B7 # 혬 => 혬 +D62D 1112 1168 11B8 # 혭 => 혭 +D62E 1112 1168 11B9 # 혮 => 혮 +D62F 1112 1168 11BA # 혯 => 혯 +D630 1112 1168 11BB # 혰 => 혰 +D631 1112 1168 11BC # 혱 => 혱 +D632 1112 1168 11BD # 혲 => 혲 +D633 1112 1168 11BE # 혳 => 혳 +D634 1112 1168 11BF # 혴 => 혴 +D635 1112 1168 11C0 # 혵 => 혵 +D636 1112 1168 11C1 # 혶 => 혶 +D637 1112 1168 11C2 # 혷 => 혷 +D638 1112 1169 # 호 => 호 +D639 1112 1169 11A8 # 혹 => 혹 +D63A 1112 1169 11A9 # 혺 => 혺 +D63B 1112 1169 11AA # 혻 => 혻 +D63C 1112 1169 11AB # 혼 => 혼 +D63D 1112 1169 11AC # 혽 => 혽 +D63E 1112 1169 11AD # 혾 => 혾 +D63F 1112 1169 11AE # 혿 => 혿 +D640 1112 1169 11AF # 홀 => 홀 +D641 1112 1169 11B0 # 홁 => 홁 +D642 1112 1169 11B1 # 홂 => 홂 +D643 1112 1169 11B2 # 홃 => 홃 +D644 1112 1169 11B3 # 홄 => 홄 +D645 1112 1169 11B4 # 홅 => 홅 +D646 1112 1169 11B5 # 홆 => 홆 +D647 1112 1169 11B6 # 홇 => 홇 +D648 1112 1169 11B7 # 홈 => 홈 +D649 1112 1169 11B8 # 홉 => 홉 +D64A 1112 1169 11B9 # 홊 => 홊 +D64B 1112 1169 11BA # 홋 => 홋 +D64C 1112 1169 11BB # 홌 => 홌 +D64D 1112 1169 11BC # 홍 => 홍 +D64E 1112 1169 11BD # 홎 => 홎 +D64F 1112 1169 11BE # 홏 => 홏 +D650 1112 1169 11BF # 홐 => 홐 +D651 1112 1169 11C0 # 홑 => 홑 +D652 1112 1169 11C1 # 홒 => 홒 +D653 1112 1169 11C2 # 홓 => 홓 +D654 1112 116A # 화 => 화 +D655 1112 116A 11A8 # 확 => 확 +D656 1112 116A 11A9 # 홖 => 홖 +D657 1112 116A 11AA # 홗 => 홗 +D658 1112 116A 11AB # 환 => 환 +D659 1112 116A 11AC # 홙 => 홙 +D65A 1112 116A 11AD # 홚 => 홚 +D65B 1112 116A 11AE # 홛 => 홛 +D65C 1112 116A 11AF # 활 => 활 +D65D 1112 116A 11B0 # 홝 => 홝 +D65E 1112 116A 11B1 # 홞 => 홞 +D65F 1112 116A 11B2 # 홟 => 홟 +D660 1112 116A 11B3 # 홠 => 홠 +D661 1112 116A 11B4 # 홡 => 홡 +D662 1112 116A 11B5 # 홢 => 홢 +D663 1112 116A 11B6 # 홣 => 홣 +D664 1112 116A 11B7 # 홤 => 홤 +D665 1112 116A 11B8 # 홥 => 홥 +D666 1112 116A 11B9 # 홦 => 홦 +D667 1112 116A 11BA # 홧 => 홧 +D668 1112 116A 11BB # 홨 => 홨 +D669 1112 116A 11BC # 황 => 황 +D66A 1112 116A 11BD # 홪 => 홪 +D66B 1112 116A 11BE # 홫 => 홫 +D66C 1112 116A 11BF # 홬 => 홬 +D66D 1112 116A 11C0 # 홭 => 홭 +D66E 1112 116A 11C1 # 홮 => 홮 +D66F 1112 116A 11C2 # 홯 => 홯 +D670 1112 116B # 홰 => 홰 +D671 1112 116B 11A8 # 홱 => 홱 +D672 1112 116B 11A9 # 홲 => 홲 +D673 1112 116B 11AA # 홳 => 홳 +D674 1112 116B 11AB # 홴 => 홴 +D675 1112 116B 11AC # 홵 => 홵 +D676 1112 116B 11AD # 홶 => 홶 +D677 1112 116B 11AE # 홷 => 홷 +D678 1112 116B 11AF # 홸 => 홸 +D679 1112 116B 11B0 # 홹 => 홹 +D67A 1112 116B 11B1 # 홺 => 홺 +D67B 1112 116B 11B2 # 홻 => 홻 +D67C 1112 116B 11B3 # 홼 => 홼 +D67D 1112 116B 11B4 # 홽 => 홽 +D67E 1112 116B 11B5 # 홾 => 홾 +D67F 1112 116B 11B6 # 홿 => 홿 +D680 1112 116B 11B7 # 횀 => 횀 +D681 1112 116B 11B8 # 횁 => 횁 +D682 1112 116B 11B9 # 횂 => 횂 +D683 1112 116B 11BA # 횃 => 횃 +D684 1112 116B 11BB # 횄 => 횄 +D685 1112 116B 11BC # 횅 => 횅 +D686 1112 116B 11BD # 횆 => 횆 +D687 1112 116B 11BE # 횇 => 횇 +D688 1112 116B 11BF # 횈 => 횈 +D689 1112 116B 11C0 # 횉 => 횉 +D68A 1112 116B 11C1 # 횊 => 횊 +D68B 1112 116B 11C2 # 횋 => 횋 +D68C 1112 116C # 회 => 회 +D68D 1112 116C 11A8 # 획 => 획 +D68E 1112 116C 11A9 # 횎 => 횎 +D68F 1112 116C 11AA # 횏 => 횏 +D690 1112 116C 11AB # 횐 => 횐 +D691 1112 116C 11AC # 횑 => 횑 +D692 1112 116C 11AD # 횒 => 횒 +D693 1112 116C 11AE # 횓 => 횓 +D694 1112 116C 11AF # 횔 => 횔 +D695 1112 116C 11B0 # 횕 => 횕 +D696 1112 116C 11B1 # 횖 => 횖 +D697 1112 116C 11B2 # 횗 => 횗 +D698 1112 116C 11B3 # 횘 => 횘 +D699 1112 116C 11B4 # 횙 => 횙 +D69A 1112 116C 11B5 # 횚 => 횚 +D69B 1112 116C 11B6 # 횛 => 횛 +D69C 1112 116C 11B7 # 횜 => 횜 +D69D 1112 116C 11B8 # 횝 => 횝 +D69E 1112 116C 11B9 # 횞 => 횞 +D69F 1112 116C 11BA # 횟 => 횟 +D6A0 1112 116C 11BB # 횠 => 횠 +D6A1 1112 116C 11BC # 횡 => 횡 +D6A2 1112 116C 11BD # 횢 => 횢 +D6A3 1112 116C 11BE # 횣 => 횣 +D6A4 1112 116C 11BF # 횤 => 횤 +D6A5 1112 116C 11C0 # 횥 => 횥 +D6A6 1112 116C 11C1 # 횦 => 횦 +D6A7 1112 116C 11C2 # 횧 => 횧 +D6A8 1112 116D # 효 => 효 +D6A9 1112 116D 11A8 # 횩 => 횩 +D6AA 1112 116D 11A9 # 횪 => 횪 +D6AB 1112 116D 11AA # 횫 => 횫 +D6AC 1112 116D 11AB # 횬 => 횬 +D6AD 1112 116D 11AC # 횭 => 횭 +D6AE 1112 116D 11AD # 횮 => 횮 +D6AF 1112 116D 11AE # 횯 => 횯 +D6B0 1112 116D 11AF # 횰 => 횰 +D6B1 1112 116D 11B0 # 횱 => 횱 +D6B2 1112 116D 11B1 # 횲 => 횲 +D6B3 1112 116D 11B2 # 횳 => 횳 +D6B4 1112 116D 11B3 # 횴 => 횴 +D6B5 1112 116D 11B4 # 횵 => 횵 +D6B6 1112 116D 11B5 # 횶 => 횶 +D6B7 1112 116D 11B6 # 횷 => 횷 +D6B8 1112 116D 11B7 # 횸 => 횸 +D6B9 1112 116D 11B8 # 횹 => 횹 +D6BA 1112 116D 11B9 # 횺 => 횺 +D6BB 1112 116D 11BA # 횻 => 횻 +D6BC 1112 116D 11BB # 횼 => 횼 +D6BD 1112 116D 11BC # 횽 => 횽 +D6BE 1112 116D 11BD # 횾 => 횾 +D6BF 1112 116D 11BE # 횿 => 횿 +D6C0 1112 116D 11BF # 훀 => 훀 +D6C1 1112 116D 11C0 # 훁 => 훁 +D6C2 1112 116D 11C1 # 훂 => 훂 +D6C3 1112 116D 11C2 # 훃 => 훃 +D6C4 1112 116E # 후 => 후 +D6C5 1112 116E 11A8 # 훅 => 훅 +D6C6 1112 116E 11A9 # 훆 => 훆 +D6C7 1112 116E 11AA # 훇 => 훇 +D6C8 1112 116E 11AB # 훈 => 훈 +D6C9 1112 116E 11AC # 훉 => 훉 +D6CA 1112 116E 11AD # 훊 => 훊 +D6CB 1112 116E 11AE # 훋 => 훋 +D6CC 1112 116E 11AF # 훌 => 훌 +D6CD 1112 116E 11B0 # 훍 => 훍 +D6CE 1112 116E 11B1 # 훎 => 훎 +D6CF 1112 116E 11B2 # 훏 => 훏 +D6D0 1112 116E 11B3 # 훐 => 훐 +D6D1 1112 116E 11B4 # 훑 => 훑 +D6D2 1112 116E 11B5 # 훒 => 훒 +D6D3 1112 116E 11B6 # 훓 => 훓 +D6D4 1112 116E 11B7 # 훔 => 훔 +D6D5 1112 116E 11B8 # 훕 => 훕 +D6D6 1112 116E 11B9 # 훖 => 훖 +D6D7 1112 116E 11BA # 훗 => 훗 +D6D8 1112 116E 11BB # 훘 => 훘 +D6D9 1112 116E 11BC # 훙 => 훙 +D6DA 1112 116E 11BD # 훚 => 훚 +D6DB 1112 116E 11BE # 훛 => 훛 +D6DC 1112 116E 11BF # 훜 => 훜 +D6DD 1112 116E 11C0 # 훝 => 훝 +D6DE 1112 116E 11C1 # 훞 => 훞 +D6DF 1112 116E 11C2 # 훟 => 훟 +D6E0 1112 116F # 훠 => 훠 +D6E1 1112 116F 11A8 # 훡 => 훡 +D6E2 1112 116F 11A9 # 훢 => 훢 +D6E3 1112 116F 11AA # 훣 => 훣 +D6E4 1112 116F 11AB # 훤 => 훤 +D6E5 1112 116F 11AC # 훥 => 훥 +D6E6 1112 116F 11AD # 훦 => 훦 +D6E7 1112 116F 11AE # 훧 => 훧 +D6E8 1112 116F 11AF # 훨 => 훨 +D6E9 1112 116F 11B0 # 훩 => 훩 +D6EA 1112 116F 11B1 # 훪 => 훪 +D6EB 1112 116F 11B2 # 훫 => 훫 +D6EC 1112 116F 11B3 # 훬 => 훬 +D6ED 1112 116F 11B4 # 훭 => 훭 +D6EE 1112 116F 11B5 # 훮 => 훮 +D6EF 1112 116F 11B6 # 훯 => 훯 +D6F0 1112 116F 11B7 # 훰 => 훰 +D6F1 1112 116F 11B8 # 훱 => 훱 +D6F2 1112 116F 11B9 # 훲 => 훲 +D6F3 1112 116F 11BA # 훳 => 훳 +D6F4 1112 116F 11BB # 훴 => 훴 +D6F5 1112 116F 11BC # 훵 => 훵 +D6F6 1112 116F 11BD # 훶 => 훶 +D6F7 1112 116F 11BE # 훷 => 훷 +D6F8 1112 116F 11BF # 훸 => 훸 +D6F9 1112 116F 11C0 # 훹 => 훹 +D6FA 1112 116F 11C1 # 훺 => 훺 +D6FB 1112 116F 11C2 # 훻 => 훻 +D6FC 1112 1170 # 훼 => 훼 +D6FD 1112 1170 11A8 # 훽 => 훽 +D6FE 1112 1170 11A9 # 훾 => 훾 +D6FF 1112 1170 11AA # 훿 => 훿 +D700 1112 1170 11AB # 휀 => 휀 +D701 1112 1170 11AC # 휁 => 휁 +D702 1112 1170 11AD # 휂 => 휂 +D703 1112 1170 11AE # 휃 => 휃 +D704 1112 1170 11AF # 휄 => 휄 +D705 1112 1170 11B0 # 휅 => 휅 +D706 1112 1170 11B1 # 휆 => 휆 +D707 1112 1170 11B2 # 휇 => 휇 +D708 1112 1170 11B3 # 휈 => 휈 +D709 1112 1170 11B4 # 휉 => 휉 +D70A 1112 1170 11B5 # 휊 => 휊 +D70B 1112 1170 11B6 # 휋 => 휋 +D70C 1112 1170 11B7 # 휌 => 휌 +D70D 1112 1170 11B8 # 휍 => 휍 +D70E 1112 1170 11B9 # 휎 => 휎 +D70F 1112 1170 11BA # 휏 => 휏 +D710 1112 1170 11BB # 휐 => 휐 +D711 1112 1170 11BC # 휑 => 휑 +D712 1112 1170 11BD # 휒 => 휒 +D713 1112 1170 11BE # 휓 => 휓 +D714 1112 1170 11BF # 휔 => 휔 +D715 1112 1170 11C0 # 휕 => 휕 +D716 1112 1170 11C1 # 휖 => 휖 +D717 1112 1170 11C2 # 휗 => 휗 +D718 1112 1171 # 휘 => 휘 +D719 1112 1171 11A8 # 휙 => 휙 +D71A 1112 1171 11A9 # 휚 => 휚 +D71B 1112 1171 11AA # 휛 => 휛 +D71C 1112 1171 11AB # 휜 => 휜 +D71D 1112 1171 11AC # 휝 => 휝 +D71E 1112 1171 11AD # 휞 => 휞 +D71F 1112 1171 11AE # 휟 => 휟 +D720 1112 1171 11AF # 휠 => 휠 +D721 1112 1171 11B0 # 휡 => 휡 +D722 1112 1171 11B1 # 휢 => 휢 +D723 1112 1171 11B2 # 휣 => 휣 +D724 1112 1171 11B3 # 휤 => 휤 +D725 1112 1171 11B4 # 휥 => 휥 +D726 1112 1171 11B5 # 휦 => 휦 +D727 1112 1171 11B6 # 휧 => 휧 +D728 1112 1171 11B7 # 휨 => 휨 +D729 1112 1171 11B8 # 휩 => 휩 +D72A 1112 1171 11B9 # 휪 => 휪 +D72B 1112 1171 11BA # 휫 => 휫 +D72C 1112 1171 11BB # 휬 => 휬 +D72D 1112 1171 11BC # 휭 => 휭 +D72E 1112 1171 11BD # 휮 => 휮 +D72F 1112 1171 11BE # 휯 => 휯 +D730 1112 1171 11BF # 휰 => 휰 +D731 1112 1171 11C0 # 휱 => 휱 +D732 1112 1171 11C1 # 휲 => 휲 +D733 1112 1171 11C2 # 휳 => 휳 +D734 1112 1172 # 휴 => 휴 +D735 1112 1172 11A8 # 휵 => 휵 +D736 1112 1172 11A9 # 휶 => 휶 +D737 1112 1172 11AA # 휷 => 휷 +D738 1112 1172 11AB # 휸 => 휸 +D739 1112 1172 11AC # 휹 => 휹 +D73A 1112 1172 11AD # 휺 => 휺 +D73B 1112 1172 11AE # 휻 => 휻 +D73C 1112 1172 11AF # 휼 => 휼 +D73D 1112 1172 11B0 # 휽 => 휽 +D73E 1112 1172 11B1 # 휾 => 휾 +D73F 1112 1172 11B2 # 휿 => 휿 +D740 1112 1172 11B3 # 흀 => 흀 +D741 1112 1172 11B4 # 흁 => 흁 +D742 1112 1172 11B5 # 흂 => 흂 +D743 1112 1172 11B6 # 흃 => 흃 +D744 1112 1172 11B7 # 흄 => 흄 +D745 1112 1172 11B8 # 흅 => 흅 +D746 1112 1172 11B9 # 흆 => 흆 +D747 1112 1172 11BA # 흇 => 흇 +D748 1112 1172 11BB # 흈 => 흈 +D749 1112 1172 11BC # 흉 => 흉 +D74A 1112 1172 11BD # 흊 => 흊 +D74B 1112 1172 11BE # 흋 => 흋 +D74C 1112 1172 11BF # 흌 => 흌 +D74D 1112 1172 11C0 # 흍 => 흍 +D74E 1112 1172 11C1 # 흎 => 흎 +D74F 1112 1172 11C2 # 흏 => 흏 +D750 1112 1173 # 흐 => 흐 +D751 1112 1173 11A8 # 흑 => 흑 +D752 1112 1173 11A9 # 흒 => 흒 +D753 1112 1173 11AA # 흓 => 흓 +D754 1112 1173 11AB # 흔 => 흔 +D755 1112 1173 11AC # 흕 => 흕 +D756 1112 1173 11AD # 흖 => 흖 +D757 1112 1173 11AE # 흗 => 흗 +D758 1112 1173 11AF # 흘 => 흘 +D759 1112 1173 11B0 # 흙 => 흙 +D75A 1112 1173 11B1 # 흚 => 흚 +D75B 1112 1173 11B2 # 흛 => 흛 +D75C 1112 1173 11B3 # 흜 => 흜 +D75D 1112 1173 11B4 # 흝 => 흝 +D75E 1112 1173 11B5 # 흞 => 흞 +D75F 1112 1173 11B6 # 흟 => 흟 +D760 1112 1173 11B7 # 흠 => 흠 +D761 1112 1173 11B8 # 흡 => 흡 +D762 1112 1173 11B9 # 흢 => 흢 +D763 1112 1173 11BA # 흣 => 흣 +D764 1112 1173 11BB # 흤 => 흤 +D765 1112 1173 11BC # 흥 => 흥 +D766 1112 1173 11BD # 흦 => 흦 +D767 1112 1173 11BE # 흧 => 흧 +D768 1112 1173 11BF # 흨 => 흨 +D769 1112 1173 11C0 # 흩 => 흩 +D76A 1112 1173 11C1 # 흪 => 흪 +D76B 1112 1173 11C2 # 흫 => 흫 +D76C 1112 1174 # 희 => 희 +D76D 1112 1174 11A8 # 흭 => 흭 +D76E 1112 1174 11A9 # 흮 => 흮 +D76F 1112 1174 11AA # 흯 => 흯 +D770 1112 1174 11AB # 흰 => 흰 +D771 1112 1174 11AC # 흱 => 흱 +D772 1112 1174 11AD # 흲 => 흲 +D773 1112 1174 11AE # 흳 => 흳 +D774 1112 1174 11AF # 흴 => 흴 +D775 1112 1174 11B0 # 흵 => 흵 +D776 1112 1174 11B1 # 흶 => 흶 +D777 1112 1174 11B2 # 흷 => 흷 +D778 1112 1174 11B3 # 흸 => 흸 +D779 1112 1174 11B4 # 흹 => 흹 +D77A 1112 1174 11B5 # 흺 => 흺 +D77B 1112 1174 11B6 # 흻 => 흻 +D77C 1112 1174 11B7 # 흼 => 흼 +D77D 1112 1174 11B8 # 흽 => 흽 +D77E 1112 1174 11B9 # 흾 => 흾 +D77F 1112 1174 11BA # 흿 => 흿 +D780 1112 1174 11BB # 힀 => 힀 +D781 1112 1174 11BC # 힁 => 힁 +D782 1112 1174 11BD # 힂 => 힂 +D783 1112 1174 11BE # 힃 => 힃 +D784 1112 1174 11BF # 힄 => 힄 +D785 1112 1174 11C0 # 힅 => 힅 +D786 1112 1174 11C1 # 힆 => 힆 +D787 1112 1174 11C2 # 힇 => 힇 +D788 1112 1175 # 히 => 히 +D789 1112 1175 11A8 # 힉 => 힉 +D78A 1112 1175 11A9 # 힊 => 힊 +D78B 1112 1175 11AA # 힋 => 힋 +D78C 1112 1175 11AB # 힌 => 힌 +D78D 1112 1175 11AC # 힍 => 힍 +D78E 1112 1175 11AD # 힎 => 힎 +D78F 1112 1175 11AE # 힏 => 힏 +D790 1112 1175 11AF # 힐 => 힐 +D791 1112 1175 11B0 # 힑 => 힑 +D792 1112 1175 11B1 # 힒 => 힒 +D793 1112 1175 11B2 # 힓 => 힓 +D794 1112 1175 11B3 # 힔 => 힔 +D795 1112 1175 11B4 # 힕 => 힕 +D796 1112 1175 11B5 # 힖 => 힖 +D797 1112 1175 11B6 # 힗 => 힗 +D798 1112 1175 11B7 # 힘 => 힘 +D799 1112 1175 11B8 # 힙 => 힙 +D79A 1112 1175 11B9 # 힚 => 힚 +D79B 1112 1175 11BA # 힛 => 힛 +D79C 1112 1175 11BB # 힜 => 힜 +D79D 1112 1175 11BC # 힝 => 힝 +D79E 1112 1175 11BD # 힞 => 힞 +D79F 1112 1175 11BE # 힟 => 힟 +D7A0 1112 1175 11BF # 힠 => 힠 +D7A1 1112 1175 11C0 # 힡 => 힡 +D7A2 1112 1175 11C1 # 힢 => 힢 +D7A3 1112 1175 11C2 # 힣 => 힣 +F900 8C48 # 豈 => 豈 +F901 66F4 # 更 => 更 +F902 8ECA # 車 => 車 +F903 8CC8 # 賈 => 賈 +F904 6ED1 # 滑 => 滑 +F905 4E32 # 串 => 串 +F906 53E5 # 句 => 句 +F907 9F9C # 龜 => 龜 +F908 9F9C # 龜 => 龜 +F909 5951 # 契 => 契 +F90A 91D1 # 金 => 金 +F90B 5587 # 喇 => 喇 +F90C 5948 # 奈 => 奈 +F90D 61F6 # 懶 => 懶 +F90E 7669 # 癩 => 癩 +F90F 7F85 # 羅 => 羅 +F910 863F # 蘿 => 蘿 +F911 87BA # 螺 => 螺 +F912 88F8 # 裸 => 裸 +F913 908F # 邏 => 邏 +F914 6A02 # 樂 => 樂 +F915 6D1B # 洛 => 洛 +F916 70D9 # 烙 => 烙 +F917 73DE # 珞 => 珞 +F918 843D # 落 => 落 +F919 916A # 酪 => 酪 +F91A 99F1 # 駱 => 駱 +F91B 4E82 # 亂 => 亂 +F91C 5375 # 卵 => 卵 +F91D 6B04 # 欄 => 欄 +F91E 721B # 爛 => 爛 +F91F 862D # 蘭 => 蘭 +F920 9E1E # 鸞 => 鸞 +F921 5D50 # 嵐 => 嵐 +F922 6FEB # 濫 => 濫 +F923 85CD # 藍 => 藍 +F924 8964 # 襤 => 襤 +F925 62C9 # 拉 => 拉 +F926 81D8 # 臘 => 臘 +F927 881F # 蠟 => 蠟 +F928 5ECA # 廊 => 廊 +F929 6717 # 朗 => 朗 +F92A 6D6A # 浪 => 浪 +F92B 72FC # 狼 => 狼 +F92C 90CE # 郎 => 郎 +F92D 4F86 # 來 => 來 +F92E 51B7 # 冷 => 冷 +F92F 52DE # 勞 => 勞 +F930 64C4 # 擄 => 擄 +F931 6AD3 # 櫓 => 櫓 +F932 7210 # 爐 => 爐 +F933 76E7 # 盧 => 盧 +F934 8001 # 老 => 老 +F935 8606 # 蘆 => 蘆 +F936 865C # 虜 => 虜 +F937 8DEF # 路 => 路 +F938 9732 # 露 => 露 +F939 9B6F # 魯 => 魯 +F93A 9DFA # 鷺 => 鷺 +F93B 788C # 碌 => 碌 +F93C 797F # 祿 => 祿 +F93D 7DA0 # 綠 => 綠 +F93E 83C9 # 菉 => 菉 +F93F 9304 # 錄 => 錄 +F940 9E7F # 鹿 => 鹿 +F941 8AD6 # 論 => 論 +F942 58DF # 壟 => 壟 +F943 5F04 # 弄 => 弄 +F944 7C60 # 籠 => 籠 +F945 807E # 聾 => 聾 +F946 7262 # 牢 => 牢 +F947 78CA # 磊 => 磊 +F948 8CC2 # 賂 => 賂 +F949 96F7 # 雷 => 雷 +F94A 58D8 # 壘 => 壘 +F94B 5C62 # 屢 => 屢 +F94C 6A13 # 樓 => 樓 +F94D 6DDA # 淚 => 淚 +F94E 6F0F # 漏 => 漏 +F94F 7D2F # 累 => 累 +F950 7E37 # 縷 => 縷 +F951 964B # 陋 => 陋 +F952 52D2 # 勒 => 勒 +F953 808B # 肋 => 肋 +F954 51DC # 凜 => 凜 +F955 51CC # 凌 => 凌 +F956 7A1C # 稜 => 稜 +F957 7DBE # 綾 => 綾 +F958 83F1 # 菱 => 菱 +F959 9675 # 陵 => 陵 +F95A 8B80 # 讀 => 讀 +F95B 62CF # 拏 => 拏 +F95C 6A02 # 樂 => 樂 +F95D 8AFE # 諾 => 諾 +F95E 4E39 # 丹 => 丹 +F95F 5BE7 # 寧 => 寧 +F960 6012 # 怒 => 怒 +F961 7387 # 率 => 率 +F962 7570 # 異 => 異 +F963 5317 # 北 => 北 +F964 78FB # 磻 => 磻 +F965 4FBF # 便 => 便 +F966 5FA9 # 復 => 復 +F967 4E0D # 不 => 不 +F968 6CCC # 泌 => 泌 +F969 6578 # 數 => 數 +F96A 7D22 # 索 => 索 +F96B 53C3 # 參 => 參 +F96C 585E # 塞 => 塞 +F96D 7701 # 省 => 省 +F96E 8449 # 葉 => 葉 +F96F 8AAA # 說 => 說 +F970 6BBA # 殺 => 殺 +F971 8FB0 # 辰 => 辰 +F972 6C88 # 沈 => 沈 +F973 62FE # 拾 => 拾 +F974 82E5 # 若 => 若 +F975 63A0 # 掠 => 掠 +F976 7565 # 略 => 略 +F977 4EAE # 亮 => 亮 +F978 5169 # 兩 => 兩 +F979 51C9 # 凉 => 凉 +F97A 6881 # 梁 => 梁 +F97B 7CE7 # 糧 => 糧 +F97C 826F # 良 => 良 +F97D 8AD2 # 諒 => 諒 +F97E 91CF # 量 => 量 +F97F 52F5 # 勵 => 勵 +F980 5442 # 呂 => 呂 +F981 5973 # 女 => 女 +F982 5EEC # 廬 => 廬 +F983 65C5 # 旅 => 旅 +F984 6FFE # 濾 => 濾 +F985 792A # 礪 => 礪 +F986 95AD # 閭 => 閭 +F987 9A6A # 驪 => 驪 +F988 9E97 # 麗 => 麗 +F989 9ECE # 黎 => 黎 +F98A 529B # 力 => 力 +F98B 66C6 # 曆 => 曆 +F98C 6B77 # 歷 => 歷 +F98D 8F62 # 轢 => 轢 +F98E 5E74 # 年 => 年 +F98F 6190 # 憐 => 憐 +F990 6200 # 戀 => 戀 +F991 649A # 撚 => 撚 +F992 6F23 # 漣 => 漣 +F993 7149 # 煉 => 煉 +F994 7489 # 璉 => 璉 +F995 79CA # 秊 => 秊 +F996 7DF4 # 練 => 練 +F997 806F # 聯 => 聯 +F998 8F26 # 輦 => 輦 +F999 84EE # 蓮 => 蓮 +F99A 9023 # 連 => 連 +F99B 934A # 鍊 => 鍊 +F99C 5217 # 列 => 列 +F99D 52A3 # 劣 => 劣 +F99E 54BD # 咽 => 咽 +F99F 70C8 # 烈 => 烈 +F9A0 88C2 # 裂 => 裂 +F9A1 8AAA # 說 => 說 +F9A2 5EC9 # 廉 => 廉 +F9A3 5FF5 # 念 => 念 +F9A4 637B # 捻 => 捻 +F9A5 6BAE # 殮 => 殮 +F9A6 7C3E # 簾 => 簾 +F9A7 7375 # 獵 => 獵 +F9A8 4EE4 # 令 => 令 +F9A9 56F9 # 囹 => 囹 +F9AA 5BE7 # 寧 => 寧 +F9AB 5DBA # 嶺 => 嶺 +F9AC 601C # 怜 => 怜 +F9AD 73B2 # 玲 => 玲 +F9AE 7469 # 瑩 => 瑩 +F9AF 7F9A # 羚 => 羚 +F9B0 8046 # 聆 => 聆 +F9B1 9234 # 鈴 => 鈴 +F9B2 96F6 # 零 => 零 +F9B3 9748 # 靈 => 靈 +F9B4 9818 # 領 => 領 +F9B5 4F8B # 例 => 例 +F9B6 79AE # 禮 => 禮 +F9B7 91B4 # 醴 => 醴 +F9B8 96B8 # 隸 => 隸 +F9B9 60E1 # 惡 => 惡 +F9BA 4E86 # 了 => 了 +F9BB 50DA # 僚 => 僚 +F9BC 5BEE # 寮 => 寮 +F9BD 5C3F # 尿 => 尿 +F9BE 6599 # 料 => 料 +F9BF 6A02 # 樂 => 樂 +F9C0 71CE # 燎 => 燎 +F9C1 7642 # 療 => 療 +F9C2 84FC # 蓼 => 蓼 +F9C3 907C # 遼 => 遼 +F9C4 9F8D # 龍 => 龍 +F9C5 6688 # 暈 => 暈 +F9C6 962E # 阮 => 阮 +F9C7 5289 # 劉 => 劉 +F9C8 677B # 杻 => 杻 +F9C9 67F3 # 柳 => 柳 +F9CA 6D41 # 流 => 流 +F9CB 6E9C # 溜 => 溜 +F9CC 7409 # 琉 => 琉 +F9CD 7559 # 留 => 留 +F9CE 786B # 硫 => 硫 +F9CF 7D10 # 紐 => 紐 +F9D0 985E # 類 => 類 +F9D1 516D # 六 => 六 +F9D2 622E # 戮 => 戮 +F9D3 9678 # 陸 => 陸 +F9D4 502B # 倫 => 倫 +F9D5 5D19 # 崙 => 崙 +F9D6 6DEA # 淪 => 淪 +F9D7 8F2A # 輪 => 輪 +F9D8 5F8B # 律 => 律 +F9D9 6144 # 慄 => 慄 +F9DA 6817 # 栗 => 栗 +F9DB 7387 # 率 => 率 +F9DC 9686 # 隆 => 隆 +F9DD 5229 # 利 => 利 +F9DE 540F # 吏 => 吏 +F9DF 5C65 # 履 => 履 +F9E0 6613 # 易 => 易 +F9E1 674E # 李 => 李 +F9E2 68A8 # 梨 => 梨 +F9E3 6CE5 # 泥 => 泥 +F9E4 7406 # 理 => 理 +F9E5 75E2 # 痢 => 痢 +F9E6 7F79 # 罹 => 罹 +F9E7 88CF # 裏 => 裏 +F9E8 88E1 # 裡 => 裡 +F9E9 91CC # 里 => 里 +F9EA 96E2 # 離 => 離 +F9EB 533F # 匿 => 匿 +F9EC 6EBA # 溺 => 溺 +F9ED 541D # 吝 => 吝 +F9EE 71D0 # 燐 => 燐 +F9EF 7498 # 璘 => 璘 +F9F0 85FA # 藺 => 藺 +F9F1 96A3 # 隣 => 隣 +F9F2 9C57 # 鱗 => 鱗 +F9F3 9E9F # 麟 => 麟 +F9F4 6797 # 林 => 林 +F9F5 6DCB # 淋 => 淋 +F9F6 81E8 # 臨 => 臨 +F9F7 7ACB # 立 => 立 +F9F8 7B20 # 笠 => 笠 +F9F9 7C92 # 粒 => 粒 +F9FA 72C0 # 狀 => 狀 +F9FB 7099 # 炙 => 炙 +F9FC 8B58 # 識 => 識 +F9FD 4EC0 # 什 => 什 +F9FE 8336 # 茶 => 茶 +F9FF 523A # 刺 => 刺 +FA00 5207 # 切 => 切 +FA01 5EA6 # 度 => 度 +FA02 62D3 # 拓 => 拓 +FA03 7CD6 # 糖 => 糖 +FA04 5B85 # 宅 => 宅 +FA05 6D1E # 洞 => 洞 +FA06 66B4 # 暴 => 暴 +FA07 8F3B # 輻 => 輻 +FA08 884C # 行 => 行 +FA09 964D # 降 => 降 +FA0A 898B # 見 => 見 +FA0B 5ED3 # 廓 => 廓 +FA0C 5140 # 兀 => 兀 +FA0D 55C0 # 嗀 => 嗀 +FA10 585A # 塚 => 塚 +FA12 6674 # 晴 => 晴 +FA15 51DE # 凞 => 凞 +FA16 732A # 猪 => 猪 +FA17 76CA # 益 => 益 +FA18 793C # 礼 => 礼 +FA19 795E # 神 => 神 +FA1A 7965 # 祥 => 祥 +FA1B 798F # 福 => 福 +FA1C 9756 # 靖 => 靖 +FA1D 7CBE # 精 => 精 +FA1E 7FBD # 羽 => 羽 +FA20 8612 # 蘒 => 蘒 +FA22 8AF8 # 諸 => 諸 +FA25 9038 # 逸 => 逸 +FA26 90FD # 都 => 都 +FA2A 98EF # 飯 => 飯 +FA2B 98FC # 飼 => 飼 +FA2C 9928 # 館 => 館 +FA2D 9DB4 # 鶴 => 鶴 +FA2E 90DE # 郞 => 郞 +FA2F 96B7 # 隷 => 隷 +FA30 4FAE # 侮 => 侮 +FA31 50E7 # 僧 => 僧 +FA32 514D # 免 => 免 +FA33 52C9 # 勉 => 勉 +FA34 52E4 # 勤 => 勤 +FA35 5351 # 卑 => 卑 +FA36 559D # 喝 => 喝 +FA37 5606 # 嘆 => 嘆 +FA38 5668 # 器 => 器 +FA39 5840 # 塀 => 塀 +FA3A 58A8 # 墨 => 墨 +FA3B 5C64 # 層 => 層 +FA3C 5C6E # 屮 => 屮 +FA3D 6094 # 悔 => 悔 +FA3E 6168 # 慨 => 慨 +FA3F 618E # 憎 => 憎 +FA40 61F2 # 懲 => 懲 +FA41 654F # 敏 => 敏 +FA42 65E2 # 既 => 既 +FA43 6691 # 暑 => 暑 +FA44 6885 # 梅 => 梅 +FA45 6D77 # 海 => 海 +FA46 6E1A # 渚 => 渚 +FA47 6F22 # 漢 => 漢 +FA48 716E # 煮 => 煮 +FA49 722B # 爫 => 爫 +FA4A 7422 # 琢 => 琢 +FA4B 7891 # 碑 => 碑 +FA4C 793E # 社 => 社 +FA4D 7949 # 祉 => 祉 +FA4E 7948 # 祈 => 祈 +FA4F 7950 # 祐 => 祐 +FA50 7956 # 祖 => 祖 +FA51 795D # 祝 => 祝 +FA52 798D # 禍 => 禍 +FA53 798E # 禎 => 禎 +FA54 7A40 # 穀 => 穀 +FA55 7A81 # 突 => 突 +FA56 7BC0 # 節 => 節 +FA57 7DF4 # 練 => 練 +FA58 7E09 # 縉 => 縉 +FA59 7E41 # 繁 => 繁 +FA5A 7F72 # 署 => 署 +FA5B 8005 # 者 => 者 +FA5C 81ED # 臭 => 臭 +FA5D 8279 # 艹 => 艹 +FA5E 8279 # 艹 => 艹 +FA5F 8457 # 著 => 著 +FA60 8910 # 褐 => 褐 +FA61 8996 # 視 => 視 +FA62 8B01 # 謁 => 謁 +FA63 8B39 # 謹 => 謹 +FA64 8CD3 # 賓 => 賓 +FA65 8D08 # 贈 => 贈 +FA66 8FB6 # 辶 => 辶 +FA67 9038 # 逸 => 逸 +FA68 96E3 # 難 => 難 +FA69 97FF # 響 => 響 +FA6A 983B # 頻 => 頻 +FA6B 6075 # 恵 => 恵 +FA6C 242EE # 𤋮 => 𤋮 +FA6D 8218 # 舘 => 舘 +FA70 4E26 # 並 => 並 +FA71 51B5 # 况 => 况 +FA72 5168 # 全 => 全 +FA73 4F80 # 侀 => 侀 +FA74 5145 # 充 => 充 +FA75 5180 # 冀 => 冀 +FA76 52C7 # 勇 => 勇 +FA77 52FA # 勺 => 勺 +FA78 559D # 喝 => 喝 +FA79 5555 # 啕 => 啕 +FA7A 5599 # 喙 => 喙 +FA7B 55E2 # 嗢 => 嗢 +FA7C 585A # 塚 => 塚 +FA7D 58B3 # 墳 => 墳 +FA7E 5944 # 奄 => 奄 +FA7F 5954 # 奔 => 奔 +FA80 5A62 # 婢 => 婢 +FA81 5B28 # 嬨 => 嬨 +FA82 5ED2 # 廒 => 廒 +FA83 5ED9 # 廙 => 廙 +FA84 5F69 # 彩 => 彩 +FA85 5FAD # 徭 => 徭 +FA86 60D8 # 惘 => 惘 +FA87 614E # 慎 => 慎 +FA88 6108 # 愈 => 愈 +FA89 618E # 憎 => 憎 +FA8A 6160 # 慠 => 慠 +FA8B 61F2 # 懲 => 懲 +FA8C 6234 # 戴 => 戴 +FA8D 63C4 # 揄 => 揄 +FA8E 641C # 搜 => 搜 +FA8F 6452 # 摒 => 摒 +FA90 6556 # 敖 => 敖 +FA91 6674 # 晴 => 晴 +FA92 6717 # 朗 => 朗 +FA93 671B # 望 => 望 +FA94 6756 # 杖 => 杖 +FA95 6B79 # 歹 => 歹 +FA96 6BBA # 殺 => 殺 +FA97 6D41 # 流 => 流 +FA98 6EDB # 滛 => 滛 +FA99 6ECB # 滋 => 滋 +FA9A 6F22 # 漢 => 漢 +FA9B 701E # 瀞 => 瀞 +FA9C 716E # 煮 => 煮 +FA9D 77A7 # 瞧 => 瞧 +FA9E 7235 # 爵 => 爵 +FA9F 72AF # 犯 => 犯 +FAA0 732A # 猪 => 猪 +FAA1 7471 # 瑱 => 瑱 +FAA2 7506 # 甆 => 甆 +FAA3 753B # 画 => 画 +FAA4 761D # 瘝 => 瘝 +FAA5 761F # 瘟 => 瘟 +FAA6 76CA # 益 => 益 +FAA7 76DB # 盛 => 盛 +FAA8 76F4 # 直 => 直 +FAA9 774A # 睊 => 睊 +FAAA 7740 # 着 => 着 +FAAB 78CC # 磌 => 磌 +FAAC 7AB1 # 窱 => 窱 +FAAD 7BC0 # 節 => 節 +FAAE 7C7B # 类 => 类 +FAAF 7D5B # 絛 => 絛 +FAB0 7DF4 # 練 => 練 +FAB1 7F3E # 缾 => 缾 +FAB2 8005 # 者 => 者 +FAB3 8352 # 荒 => 荒 +FAB4 83EF # 華 => 華 +FAB5 8779 # 蝹 => 蝹 +FAB6 8941 # 襁 => 襁 +FAB7 8986 # 覆 => 覆 +FAB8 8996 # 視 => 視 +FAB9 8ABF # 調 => 調 +FABA 8AF8 # 諸 => 諸 +FABB 8ACB # 請 => 請 +FABC 8B01 # 謁 => 謁 +FABD 8AFE # 諾 => 諾 +FABE 8AED # 諭 => 諭 +FABF 8B39 # 謹 => 謹 +FAC0 8B8A # 變 => 變 +FAC1 8D08 # 贈 => 贈 +FAC2 8F38 # 輸 => 輸 +FAC3 9072 # 遲 => 遲 +FAC4 9199 # 醙 => 醙 +FAC5 9276 # 鉶 => 鉶 +FAC6 967C # 陼 => 陼 +FAC7 96E3 # 難 => 難 +FAC8 9756 # 靖 => 靖 +FAC9 97DB # 韛 => 韛 +FACA 97FF # 響 => 響 +FACB 980B # 頋 => 頋 +FACC 983B # 頻 => 頻 +FACD 9B12 # 鬒 => 鬒 +FACE 9F9C # 龜 => 龜 +FACF 2284A # 𢡊 => 𢡊 +FAD0 22844 # 𢡄 => 𢡄 +FAD1 233D5 # 𣏕 => 𣏕 +FAD2 3B9D # 㮝 => 㮝 +FAD3 4018 # 䀘 => 䀘 +FAD4 4039 # 䀹 => 䀹 +FAD5 25249 # 𥉉 => 𥉉 +FAD6 25CD0 # 𥳐 => 𥳐 +FAD7 27ED3 # 𧻓 => 𧻓 +FAD8 9F43 # 齃 => 齃 +FAD9 9F8E # 龎 => 龎 +FB00 66 66 # ff => ff +FB01 66 69 # fi => fi +FB02 66 6C # fl => fl +FB03 66 66 69 # ffi => ffi +FB04 66 66 6C # ffl => ffl +FB05 73 74 # ſt => st +FB06 73 74 # st => st +FB13 574 576 # ﬓ => մն +FB14 574 565 # ﬔ => մե +FB15 574 56B # ﬕ => մի +FB16 57E 576 # ﬖ => վն +FB17 574 56D # ﬗ => մխ +FB1D 5D9 5B4 # יִ => יִ +FB1F 5F2 5B7 # ײַ => ײַ +FB20 5E2 # ﬠ => ע +FB21 5D0 # ﬡ => א +FB22 5D3 # ﬢ => ד +FB23 5D4 # ﬣ => ה +FB24 5DB # ﬤ => כ +FB25 5DC # ﬥ => ל +FB26 5DD # ﬦ => ם +FB27 5E8 # ﬧ => ר +FB28 5EA # ﬨ => ת +FB29 2B # ﬩ => + +FB2A 5E9 5C1 # שׁ => שׁ +FB2B 5E9 5C2 # שׂ => שׂ +FB2C 5E9 5BC 5C1 # שּׁ => שּׁ +FB2D 5E9 5BC 5C2 # שּׂ => שּׂ +FB2E 5D0 5B7 # אַ => אַ +FB2F 5D0 5B8 # אָ => אָ +FB30 5D0 5BC # אּ => אּ +FB31 5D1 5BC # בּ => בּ +FB32 5D2 5BC # גּ => גּ +FB33 5D3 5BC # דּ => דּ +FB34 5D4 5BC # הּ => הּ +FB35 5D5 5BC # וּ => וּ +FB36 5D6 5BC # זּ => זּ +FB38 5D8 5BC # טּ => טּ +FB39 5D9 5BC # יּ => יּ +FB3A 5DA 5BC # ךּ => ךּ +FB3B 5DB 5BC # כּ => כּ +FB3C 5DC 5BC # לּ => לּ +FB3E 5DE 5BC # מּ => מּ +FB40 5E0 5BC # נּ => נּ +FB41 5E1 5BC # סּ => סּ +FB43 5E3 5BC # ףּ => ףּ +FB44 5E4 5BC # פּ => פּ +FB46 5E6 5BC # צּ => צּ +FB47 5E7 5BC # קּ => קּ +FB48 5E8 5BC # רּ => רּ +FB49 5E9 5BC # שּ => שּ +FB4A 5EA 5BC # תּ => תּ +FB4B 5D5 5B9 # וֹ => וֹ +FB4C 5D1 5BF # בֿ => בֿ +FB4D 5DB 5BF # כֿ => כֿ +FB4E 5E4 5BF # פֿ => פֿ +FB4F 5D0 5DC # ﭏ => אל +FB50 671 # ﭐ => ٱ +FB51 671 # ﭑ => ٱ +FB52 67B # ﭒ => ٻ +FB53 67B # ﭓ => ٻ +FB54 67B # ﭔ => ٻ +FB55 67B # ﭕ => ٻ +FB56 67E # ﭖ => پ +FB57 67E # ﭗ => پ +FB58 67E # ﭘ => پ +FB59 67E # ﭙ => پ +FB5A 680 # ﭚ => ڀ +FB5B 680 # ﭛ => ڀ +FB5C 680 # ﭜ => ڀ +FB5D 680 # ﭝ => ڀ +FB5E 67A # ﭞ => ٺ +FB5F 67A # ﭟ => ٺ +FB60 67A # ﭠ => ٺ +FB61 67A # ﭡ => ٺ +FB62 67F # ﭢ => ٿ +FB63 67F # ﭣ => ٿ +FB64 67F # ﭤ => ٿ +FB65 67F # ﭥ => ٿ +FB66 679 # ﭦ => ٹ +FB67 679 # ﭧ => ٹ +FB68 679 # ﭨ => ٹ +FB69 679 # ﭩ => ٹ +FB6A 6A4 # ﭪ => ڤ +FB6B 6A4 # ﭫ => ڤ +FB6C 6A4 # ﭬ => ڤ +FB6D 6A4 # ﭭ => ڤ +FB6E 6A6 # ﭮ => ڦ +FB6F 6A6 # ﭯ => ڦ +FB70 6A6 # ﭰ => ڦ +FB71 6A6 # ﭱ => ڦ +FB72 684 # ﭲ => ڄ +FB73 684 # ﭳ => ڄ +FB74 684 # ﭴ => ڄ +FB75 684 # ﭵ => ڄ +FB76 683 # ﭶ => ڃ +FB77 683 # ﭷ => ڃ +FB78 683 # ﭸ => ڃ +FB79 683 # ﭹ => ڃ +FB7A 686 # ﭺ => چ +FB7B 686 # ﭻ => چ +FB7C 686 # ﭼ => چ +FB7D 686 # ﭽ => چ +FB7E 687 # ﭾ => ڇ +FB7F 687 # ﭿ => ڇ +FB80 687 # ﮀ => ڇ +FB81 687 # ﮁ => ڇ +FB82 68D # ﮂ => ڍ +FB83 68D # ﮃ => ڍ +FB84 68C # ﮄ => ڌ +FB85 68C # ﮅ => ڌ +FB86 68E # ﮆ => ڎ +FB87 68E # ﮇ => ڎ +FB88 688 # ﮈ => ڈ +FB89 688 # ﮉ => ڈ +FB8A 698 # ﮊ => ژ +FB8B 698 # ﮋ => ژ +FB8C 691 # ﮌ => ڑ +FB8D 691 # ﮍ => ڑ +FB8E 6A9 # ﮎ => ک +FB8F 6A9 # ﮏ => ک +FB90 6A9 # ﮐ => ک +FB91 6A9 # ﮑ => ک +FB92 6AF # ﮒ => گ +FB93 6AF # ﮓ => گ +FB94 6AF # ﮔ => گ +FB95 6AF # ﮕ => گ +FB96 6B3 # ﮖ => ڳ +FB97 6B3 # ﮗ => ڳ +FB98 6B3 # ﮘ => ڳ +FB99 6B3 # ﮙ => ڳ +FB9A 6B1 # ﮚ => ڱ +FB9B 6B1 # ﮛ => ڱ +FB9C 6B1 # ﮜ => ڱ +FB9D 6B1 # ﮝ => ڱ +FB9E 6BA # ﮞ => ں +FB9F 6BA # ﮟ => ں +FBA0 6BB # ﮠ => ڻ +FBA1 6BB # ﮡ => ڻ +FBA2 6BB # ﮢ => ڻ +FBA3 6BB # ﮣ => ڻ +FBA4 6D5 654 # ﮤ => ۀ +FBA5 6D5 654 # ﮥ => ۀ +FBA6 6C1 # ﮦ => ہ +FBA7 6C1 # ﮧ => ہ +FBA8 6C1 # ﮨ => ہ +FBA9 6C1 # ﮩ => ہ +FBAA 6BE # ﮪ => ھ +FBAB 6BE # ﮫ => ھ +FBAC 6BE # ﮬ => ھ +FBAD 6BE # ﮭ => ھ +FBAE 6D2 # ﮮ => ے +FBAF 6D2 # ﮯ => ے +FBB0 6D2 654 # ﮰ => ۓ +FBB1 6D2 654 # ﮱ => ۓ +FBD3 6AD # ﯓ => ڭ +FBD4 6AD # ﯔ => ڭ +FBD5 6AD # ﯕ => ڭ +FBD6 6AD # ﯖ => ڭ +FBD7 6C7 # ﯗ => ۇ +FBD8 6C7 # ﯘ => ۇ +FBD9 6C6 # ﯙ => ۆ +FBDA 6C6 # ﯚ => ۆ +FBDB 6C8 # ﯛ => ۈ +FBDC 6C8 # ﯜ => ۈ +FBDD 6C7 674 # ﯝ => ۇٴ +FBDE 6CB # ﯞ => ۋ +FBDF 6CB # ﯟ => ۋ +FBE0 6C5 # ﯠ => ۅ +FBE1 6C5 # ﯡ => ۅ +FBE2 6C9 # ﯢ => ۉ +FBE3 6C9 # ﯣ => ۉ +FBE4 6D0 # ﯤ => ې +FBE5 6D0 # ﯥ => ې +FBE6 6D0 # ﯦ => ې +FBE7 6D0 # ﯧ => ې +FBE8 649 # ﯨ => ى +FBE9 649 # ﯩ => ى +FBEA 64A 654 627 # ﯪ => ئا +FBEB 64A 654 627 # ﯫ => ئا +FBEC 64A 654 6D5 # ﯬ => ئە +FBED 64A 654 6D5 # ﯭ => ئە +FBEE 64A 654 648 # ﯮ => ئو +FBEF 64A 654 648 # ﯯ => ئو +FBF0 64A 654 6C7 # ﯰ => ئۇ +FBF1 64A 654 6C7 # ﯱ => ئۇ +FBF2 64A 654 6C6 # ﯲ => ئۆ +FBF3 64A 654 6C6 # ﯳ => ئۆ +FBF4 64A 654 6C8 # ﯴ => ئۈ +FBF5 64A 654 6C8 # ﯵ => ئۈ +FBF6 64A 654 6D0 # ﯶ => ئې +FBF7 64A 654 6D0 # ﯷ => ئې +FBF8 64A 654 6D0 # ﯸ => ئې +FBF9 64A 654 649 # ﯹ => ئى +FBFA 64A 654 649 # ﯺ => ئى +FBFB 64A 654 649 # ﯻ => ئى +FBFC 6CC # ﯼ => ی +FBFD 6CC # ﯽ => ی +FBFE 6CC # ﯾ => ی +FBFF 6CC # ﯿ => ی +FC00 64A 654 62C # ﰀ => ئج +FC01 64A 654 62D # ﰁ => ئح +FC02 64A 654 645 # ﰂ => ئم +FC03 64A 654 649 # ﰃ => ئى +FC04 64A 654 64A # ﰄ => ئي +FC05 628 62C # ﰅ => بج +FC06 628 62D # ﰆ => بح +FC07 628 62E # ﰇ => بخ +FC08 628 645 # ﰈ => بم +FC09 628 649 # ﰉ => بى +FC0A 628 64A # ﰊ => بي +FC0B 62A 62C # ﰋ => تج +FC0C 62A 62D # ﰌ => تح +FC0D 62A 62E # ﰍ => تخ +FC0E 62A 645 # ﰎ => تم +FC0F 62A 649 # ﰏ => تى +FC10 62A 64A # ﰐ => تي +FC11 62B 62C # ﰑ => ثج +FC12 62B 645 # ﰒ => ثم +FC13 62B 649 # ﰓ => ثى +FC14 62B 64A # ﰔ => ثي +FC15 62C 62D # ﰕ => جح +FC16 62C 645 # ﰖ => جم +FC17 62D 62C # ﰗ => حج +FC18 62D 645 # ﰘ => حم +FC19 62E 62C # ﰙ => خج +FC1A 62E 62D # ﰚ => خح +FC1B 62E 645 # ﰛ => خم +FC1C 633 62C # ﰜ => سج +FC1D 633 62D # ﰝ => سح +FC1E 633 62E # ﰞ => سخ +FC1F 633 645 # ﰟ => سم +FC20 635 62D # ﰠ => صح +FC21 635 645 # ﰡ => صم +FC22 636 62C # ﰢ => ضج +FC23 636 62D # ﰣ => ضح +FC24 636 62E # ﰤ => ضخ +FC25 636 645 # ﰥ => ضم +FC26 637 62D # ﰦ => طح +FC27 637 645 # ﰧ => طم +FC28 638 645 # ﰨ => ظم +FC29 639 62C # ﰩ => عج +FC2A 639 645 # ﰪ => عم +FC2B 63A 62C # ﰫ => غج +FC2C 63A 645 # ﰬ => غم +FC2D 641 62C # ﰭ => فج +FC2E 641 62D # ﰮ => فح +FC2F 641 62E # ﰯ => فخ +FC30 641 645 # ﰰ => فم +FC31 641 649 # ﰱ => فى +FC32 641 64A # ﰲ => في +FC33 642 62D # ﰳ => قح +FC34 642 645 # ﰴ => قم +FC35 642 649 # ﰵ => قى +FC36 642 64A # ﰶ => قي +FC37 643 627 # ﰷ => كا +FC38 643 62C # ﰸ => كج +FC39 643 62D # ﰹ => كح +FC3A 643 62E # ﰺ => كخ +FC3B 643 644 # ﰻ => كل +FC3C 643 645 # ﰼ => كم +FC3D 643 649 # ﰽ => كى +FC3E 643 64A # ﰾ => كي +FC3F 644 62C # ﰿ => لج +FC40 644 62D # ﱀ => لح +FC41 644 62E # ﱁ => لخ +FC42 644 645 # ﱂ => لم +FC43 644 649 # ﱃ => لى +FC44 644 64A # ﱄ => لي +FC45 645 62C # ﱅ => مج +FC46 645 62D # ﱆ => مح +FC47 645 62E # ﱇ => مخ +FC48 645 645 # ﱈ => مم +FC49 645 649 # ﱉ => مى +FC4A 645 64A # ﱊ => مي +FC4B 646 62C # ﱋ => نج +FC4C 646 62D # ﱌ => نح +FC4D 646 62E # ﱍ => نخ +FC4E 646 645 # ﱎ => نم +FC4F 646 649 # ﱏ => نى +FC50 646 64A # ﱐ => ني +FC51 647 62C # ﱑ => هج +FC52 647 645 # ﱒ => هم +FC53 647 649 # ﱓ => هى +FC54 647 64A # ﱔ => هي +FC55 64A 62C # ﱕ => يج +FC56 64A 62D # ﱖ => يح +FC57 64A 62E # ﱗ => يخ +FC58 64A 645 # ﱘ => يم +FC59 64A 649 # ﱙ => يى +FC5A 64A 64A # ﱚ => يي +FC5B 630 670 # ﱛ => ذٰ +FC5C 631 670 # ﱜ => رٰ +FC5D 649 670 # ﱝ => ىٰ +FC5E 20 64C 651 # ﱞ => ٌّ +FC5F 20 64D 651 # ﱟ => ٍّ +FC60 20 64E 651 # ﱠ => َّ +FC61 20 64F 651 # ﱡ => ُّ +FC62 20 650 651 # ﱢ => ِّ +FC63 20 651 670 # ﱣ => ّٰ +FC64 64A 654 631 # ﱤ => ئر +FC65 64A 654 632 # ﱥ => ئز +FC66 64A 654 645 # ﱦ => ئم +FC67 64A 654 646 # ﱧ => ئن +FC68 64A 654 649 # ﱨ => ئى +FC69 64A 654 64A # ﱩ => ئي +FC6A 628 631 # ﱪ => بر +FC6B 628 632 # ﱫ => بز +FC6C 628 645 # ﱬ => بم +FC6D 628 646 # ﱭ => بن +FC6E 628 649 # ﱮ => بى +FC6F 628 64A # ﱯ => بي +FC70 62A 631 # ﱰ => تر +FC71 62A 632 # ﱱ => تز +FC72 62A 645 # ﱲ => تم +FC73 62A 646 # ﱳ => تن +FC74 62A 649 # ﱴ => تى +FC75 62A 64A # ﱵ => تي +FC76 62B 631 # ﱶ => ثر +FC77 62B 632 # ﱷ => ثز +FC78 62B 645 # ﱸ => ثم +FC79 62B 646 # ﱹ => ثن +FC7A 62B 649 # ﱺ => ثى +FC7B 62B 64A # ﱻ => ثي +FC7C 641 649 # ﱼ => فى +FC7D 641 64A # ﱽ => في +FC7E 642 649 # ﱾ => قى +FC7F 642 64A # ﱿ => قي +FC80 643 627 # ﲀ => كا +FC81 643 644 # ﲁ => كل +FC82 643 645 # ﲂ => كم +FC83 643 649 # ﲃ => كى +FC84 643 64A # ﲄ => كي +FC85 644 645 # ﲅ => لم +FC86 644 649 # ﲆ => لى +FC87 644 64A # ﲇ => لي +FC88 645 627 # ﲈ => ما +FC89 645 645 # ﲉ => مم +FC8A 646 631 # ﲊ => نر +FC8B 646 632 # ﲋ => نز +FC8C 646 645 # ﲌ => نم +FC8D 646 646 # ﲍ => نن +FC8E 646 649 # ﲎ => نى +FC8F 646 64A # ﲏ => ني +FC90 649 670 # ﲐ => ىٰ +FC91 64A 631 # ﲑ => ير +FC92 64A 632 # ﲒ => يز +FC93 64A 645 # ﲓ => يم +FC94 64A 646 # ﲔ => ين +FC95 64A 649 # ﲕ => يى +FC96 64A 64A # ﲖ => يي +FC97 64A 654 62C # ﲗ => ئج +FC98 64A 654 62D # ﲘ => ئح +FC99 64A 654 62E # ﲙ => ئخ +FC9A 64A 654 645 # ﲚ => ئم +FC9B 64A 654 647 # ﲛ => ئه +FC9C 628 62C # ﲜ => بج +FC9D 628 62D # ﲝ => بح +FC9E 628 62E # ﲞ => بخ +FC9F 628 645 # ﲟ => بم +FCA0 628 647 # ﲠ => به +FCA1 62A 62C # ﲡ => تج +FCA2 62A 62D # ﲢ => تح +FCA3 62A 62E # ﲣ => تخ +FCA4 62A 645 # ﲤ => تم +FCA5 62A 647 # ﲥ => ته +FCA6 62B 645 # ﲦ => ثم +FCA7 62C 62D # ﲧ => جح +FCA8 62C 645 # ﲨ => جم +FCA9 62D 62C # ﲩ => حج +FCAA 62D 645 # ﲪ => حم +FCAB 62E 62C # ﲫ => خج +FCAC 62E 645 # ﲬ => خم +FCAD 633 62C # ﲭ => سج +FCAE 633 62D # ﲮ => سح +FCAF 633 62E # ﲯ => سخ +FCB0 633 645 # ﲰ => سم +FCB1 635 62D # ﲱ => صح +FCB2 635 62E # ﲲ => صخ +FCB3 635 645 # ﲳ => صم +FCB4 636 62C # ﲴ => ضج +FCB5 636 62D # ﲵ => ضح +FCB6 636 62E # ﲶ => ضخ +FCB7 636 645 # ﲷ => ضم +FCB8 637 62D # ﲸ => طح +FCB9 638 645 # ﲹ => ظم +FCBA 639 62C # ﲺ => عج +FCBB 639 645 # ﲻ => عم +FCBC 63A 62C # ﲼ => غج +FCBD 63A 645 # ﲽ => غم +FCBE 641 62C # ﲾ => فج +FCBF 641 62D # ﲿ => فح +FCC0 641 62E # ﳀ => فخ +FCC1 641 645 # ﳁ => فم +FCC2 642 62D # ﳂ => قح +FCC3 642 645 # ﳃ => قم +FCC4 643 62C # ﳄ => كج +FCC5 643 62D # ﳅ => كح +FCC6 643 62E # ﳆ => كخ +FCC7 643 644 # ﳇ => كل +FCC8 643 645 # ﳈ => كم +FCC9 644 62C # ﳉ => لج +FCCA 644 62D # ﳊ => لح +FCCB 644 62E # ﳋ => لخ +FCCC 644 645 # ﳌ => لم +FCCD 644 647 # ﳍ => له +FCCE 645 62C # ﳎ => مج +FCCF 645 62D # ﳏ => مح +FCD0 645 62E # ﳐ => مخ +FCD1 645 645 # ﳑ => مم +FCD2 646 62C # ﳒ => نج +FCD3 646 62D # ﳓ => نح +FCD4 646 62E # ﳔ => نخ +FCD5 646 645 # ﳕ => نم +FCD6 646 647 # ﳖ => نه +FCD7 647 62C # ﳗ => هج +FCD8 647 645 # ﳘ => هم +FCD9 647 670 # ﳙ => هٰ +FCDA 64A 62C # ﳚ => يج +FCDB 64A 62D # ﳛ => يح +FCDC 64A 62E # ﳜ => يخ +FCDD 64A 645 # ﳝ => يم +FCDE 64A 647 # ﳞ => يه +FCDF 64A 654 645 # ﳟ => ئم +FCE0 64A 654 647 # ﳠ => ئه +FCE1 628 645 # ﳡ => بم +FCE2 628 647 # ﳢ => به +FCE3 62A 645 # ﳣ => تم +FCE4 62A 647 # ﳤ => ته +FCE5 62B 645 # ﳥ => ثم +FCE6 62B 647 # ﳦ => ثه +FCE7 633 645 # ﳧ => سم +FCE8 633 647 # ﳨ => سه +FCE9 634 645 # ﳩ => شم +FCEA 634 647 # ﳪ => شه +FCEB 643 644 # ﳫ => كل +FCEC 643 645 # ﳬ => كم +FCED 644 645 # ﳭ => لم +FCEE 646 645 # ﳮ => نم +FCEF 646 647 # ﳯ => نه +FCF0 64A 645 # ﳰ => يم +FCF1 64A 647 # ﳱ => يه +FCF2 640 64E 651 # ﳲ => ـَّ +FCF3 640 64F 651 # ﳳ => ـُّ +FCF4 640 650 651 # ﳴ => ـِّ +FCF5 637 649 # ﳵ => طى +FCF6 637 64A # ﳶ => طي +FCF7 639 649 # ﳷ => عى +FCF8 639 64A # ﳸ => عي +FCF9 63A 649 # ﳹ => غى +FCFA 63A 64A # ﳺ => غي +FCFB 633 649 # ﳻ => سى +FCFC 633 64A # ﳼ => سي +FCFD 634 649 # ﳽ => شى +FCFE 634 64A # ﳾ => شي +FCFF 62D 649 # ﳿ => حى +FD00 62D 64A # ﴀ => حي +FD01 62C 649 # ﴁ => جى +FD02 62C 64A # ﴂ => جي +FD03 62E 649 # ﴃ => خى +FD04 62E 64A # ﴄ => خي +FD05 635 649 # ﴅ => صى +FD06 635 64A # ﴆ => صي +FD07 636 649 # ﴇ => ضى +FD08 636 64A # ﴈ => ضي +FD09 634 62C # ﴉ => شج +FD0A 634 62D # ﴊ => شح +FD0B 634 62E # ﴋ => شخ +FD0C 634 645 # ﴌ => شم +FD0D 634 631 # ﴍ => شر +FD0E 633 631 # ﴎ => سر +FD0F 635 631 # ﴏ => صر +FD10 636 631 # ﴐ => ضر +FD11 637 649 # ﴑ => طى +FD12 637 64A # ﴒ => طي +FD13 639 649 # ﴓ => عى +FD14 639 64A # ﴔ => عي +FD15 63A 649 # ﴕ => غى +FD16 63A 64A # ﴖ => غي +FD17 633 649 # ﴗ => سى +FD18 633 64A # ﴘ => سي +FD19 634 649 # ﴙ => شى +FD1A 634 64A # ﴚ => شي +FD1B 62D 649 # ﴛ => حى +FD1C 62D 64A # ﴜ => حي +FD1D 62C 649 # ﴝ => جى +FD1E 62C 64A # ﴞ => جي +FD1F 62E 649 # ﴟ => خى +FD20 62E 64A # ﴠ => خي +FD21 635 649 # ﴡ => صى +FD22 635 64A # ﴢ => صي +FD23 636 649 # ﴣ => ضى +FD24 636 64A # ﴤ => ضي +FD25 634 62C # ﴥ => شج +FD26 634 62D # ﴦ => شح +FD27 634 62E # ﴧ => شخ +FD28 634 645 # ﴨ => شم +FD29 634 631 # ﴩ => شر +FD2A 633 631 # ﴪ => سر +FD2B 635 631 # ﴫ => صر +FD2C 636 631 # ﴬ => ضر +FD2D 634 62C # ﴭ => شج +FD2E 634 62D # ﴮ => شح +FD2F 634 62E # ﴯ => شخ +FD30 634 645 # ﴰ => شم +FD31 633 647 # ﴱ => سه +FD32 634 647 # ﴲ => شه +FD33 637 645 # ﴳ => طم +FD34 633 62C # ﴴ => سج +FD35 633 62D # ﴵ => سح +FD36 633 62E # ﴶ => سخ +FD37 634 62C # ﴷ => شج +FD38 634 62D # ﴸ => شح +FD39 634 62E # ﴹ => شخ +FD3A 637 645 # ﴺ => طم +FD3B 638 645 # ﴻ => ظم +FD3C 627 64B # ﴼ => اً +FD3D 627 64B # ﴽ => اً +FD50 62A 62C 645 # ﵐ => تجم +FD51 62A 62D 62C # ﵑ => تحج +FD52 62A 62D 62C # ﵒ => تحج +FD53 62A 62D 645 # ﵓ => تحم +FD54 62A 62E 645 # ﵔ => تخم +FD55 62A 645 62C # ﵕ => تمج +FD56 62A 645 62D # ﵖ => تمح +FD57 62A 645 62E # ﵗ => تمخ +FD58 62C 645 62D # ﵘ => جمح +FD59 62C 645 62D # ﵙ => جمح +FD5A 62D 645 64A # ﵚ => حمي +FD5B 62D 645 649 # ﵛ => حمى +FD5C 633 62D 62C # ﵜ => سحج +FD5D 633 62C 62D # ﵝ => سجح +FD5E 633 62C 649 # ﵞ => سجى +FD5F 633 645 62D # ﵟ => سمح +FD60 633 645 62D # ﵠ => سمح +FD61 633 645 62C # ﵡ => سمج +FD62 633 645 645 # ﵢ => سمم +FD63 633 645 645 # ﵣ => سمم +FD64 635 62D 62D # ﵤ => صحح +FD65 635 62D 62D # ﵥ => صحح +FD66 635 645 645 # ﵦ => صمم +FD67 634 62D 645 # ﵧ => شحم +FD68 634 62D 645 # ﵨ => شحم +FD69 634 62C 64A # ﵩ => شجي +FD6A 634 645 62E # ﵪ => شمخ +FD6B 634 645 62E # ﵫ => شمخ +FD6C 634 645 645 # ﵬ => شمم +FD6D 634 645 645 # ﵭ => شمم +FD6E 636 62D 649 # ﵮ => ضحى +FD6F 636 62E 645 # ﵯ => ضخم +FD70 636 62E 645 # ﵰ => ضخم +FD71 637 645 62D # ﵱ => طمح +FD72 637 645 62D # ﵲ => طمح +FD73 637 645 645 # ﵳ => طمم +FD74 637 645 64A # ﵴ => طمي +FD75 639 62C 645 # ﵵ => عجم +FD76 639 645 645 # ﵶ => عمم +FD77 639 645 645 # ﵷ => عمم +FD78 639 645 649 # ﵸ => عمى +FD79 63A 645 645 # ﵹ => غمم +FD7A 63A 645 64A # ﵺ => غمي +FD7B 63A 645 649 # ﵻ => غمى +FD7C 641 62E 645 # ﵼ => فخم +FD7D 641 62E 645 # ﵽ => فخم +FD7E 642 645 62D # ﵾ => قمح +FD7F 642 645 645 # ﵿ => قمم +FD80 644 62D 645 # ﶀ => لحم +FD81 644 62D 64A # ﶁ => لحي +FD82 644 62D 649 # ﶂ => لحى +FD83 644 62C 62C # ﶃ => لجج +FD84 644 62C 62C # ﶄ => لجج +FD85 644 62E 645 # ﶅ => لخم +FD86 644 62E 645 # ﶆ => لخم +FD87 644 645 62D # ﶇ => لمح +FD88 644 645 62D # ﶈ => لمح +FD89 645 62D 62C # ﶉ => محج +FD8A 645 62D 645 # ﶊ => محم +FD8B 645 62D 64A # ﶋ => محي +FD8C 645 62C 62D # ﶌ => مجح +FD8D 645 62C 645 # ﶍ => مجم +FD8E 645 62E 62C # ﶎ => مخج +FD8F 645 62E 645 # ﶏ => مخم +FD92 645 62C 62E # ﶒ => مجخ +FD93 647 645 62C # ﶓ => همج +FD94 647 645 645 # ﶔ => همم +FD95 646 62D 645 # ﶕ => نحم +FD96 646 62D 649 # ﶖ => نحى +FD97 646 62C 645 # ﶗ => نجم +FD98 646 62C 645 # ﶘ => نجم +FD99 646 62C 649 # ﶙ => نجى +FD9A 646 645 64A # ﶚ => نمي +FD9B 646 645 649 # ﶛ => نمى +FD9C 64A 645 645 # ﶜ => يمم +FD9D 64A 645 645 # ﶝ => يمم +FD9E 628 62E 64A # ﶞ => بخي +FD9F 62A 62C 64A # ﶟ => تجي +FDA0 62A 62C 649 # ﶠ => تجى +FDA1 62A 62E 64A # ﶡ => تخي +FDA2 62A 62E 649 # ﶢ => تخى +FDA3 62A 645 64A # ﶣ => تمي +FDA4 62A 645 649 # ﶤ => تمى +FDA5 62C 645 64A # ﶥ => جمي +FDA6 62C 62D 649 # ﶦ => جحى +FDA7 62C 645 649 # ﶧ => جمى +FDA8 633 62E 649 # ﶨ => سخى +FDA9 635 62D 64A # ﶩ => صحي +FDAA 634 62D 64A # ﶪ => شحي +FDAB 636 62D 64A # ﶫ => ضحي +FDAC 644 62C 64A # ﶬ => لجي +FDAD 644 645 64A # ﶭ => لمي +FDAE 64A 62D 64A # ﶮ => يحي +FDAF 64A 62C 64A # ﶯ => يجي +FDB0 64A 645 64A # ﶰ => يمي +FDB1 645 645 64A # ﶱ => ممي +FDB2 642 645 64A # ﶲ => قمي +FDB3 646 62D 64A # ﶳ => نحي +FDB4 642 645 62D # ﶴ => قمح +FDB5 644 62D 645 # ﶵ => لحم +FDB6 639 645 64A # ﶶ => عمي +FDB7 643 645 64A # ﶷ => كمي +FDB8 646 62C 62D # ﶸ => نجح +FDB9 645 62E 64A # ﶹ => مخي +FDBA 644 62C 645 # ﶺ => لجم +FDBB 643 645 645 # ﶻ => كمم +FDBC 644 62C 645 # ﶼ => لجم +FDBD 646 62C 62D # ﶽ => نجح +FDBE 62C 62D 64A # ﶾ => جحي +FDBF 62D 62C 64A # ﶿ => حجي +FDC0 645 62C 64A # ﷀ => مجي +FDC1 641 645 64A # ﷁ => فمي +FDC2 628 62D 64A # ﷂ => بحي +FDC3 643 645 645 # ﷃ => كمم +FDC4 639 62C 645 # ﷄ => عجم +FDC5 635 645 645 # ﷅ => صمم +FDC6 633 62E 64A # ﷆ => سخي +FDC7 646 62C 64A # ﷇ => نجي +FDF0 635 644 6D2 # ﷰ => صلے +FDF1 642 644 6D2 # ﷱ => قلے +FDF2 627 644 644 647 # ﷲ => الله +FDF3 627 643 628 631 # ﷳ => اكبر +FDF4 645 62D 645 62F # ﷴ => محمد +FDF5 635 644 639 645 # ﷵ => صلعم +FDF6 631 633 648 644 # ﷶ => رسول +FDF7 639 644 64A 647 # ﷷ => عليه +FDF8 648 633 644 645 # ﷸ => وسلم +FDF9 635 644 649 # ﷹ => صلى +FDFA 635 644 649 20 627 644 644 647 20 639 644 64A 647 20 648 633 644 645 # ﷺ => صلى الله عليه وسلم +FDFB 62C 644 20 62C 644 627 644 647 # ﷻ => جل جلاله +FDFC 631 6CC 627 644 # ﷼ => ریال +FE10 2C # ︐ => , +FE11 3001 # ︑ => 、 +FE12 3002 # ︒ => 。 +FE13 3A # ︓ => : +FE14 3B # ︔ => ; +FE15 21 # ︕ => ! +FE16 3F # ︖ => ? +FE17 3016 # ︗ => 〖 +FE18 3017 # ︘ => 〗 +FE19 2E 2E 2E # ︙ => ... +FE30 2E 2E # ︰ => .. +FE31 2014 # ︱ => — +FE32 2013 # ︲ => – +FE33 5F # ︳ => _ +FE34 5F # ︴ => _ +FE35 28 # ︵ => ( +FE36 29 # ︶ => ) +FE37 7B # ︷ => { +FE38 7D # ︸ => } +FE39 3014 # ︹ => 〔 +FE3A 3015 # ︺ => 〕 +FE3B 3010 # ︻ => 【 +FE3C 3011 # ︼ => 】 +FE3D 300A # ︽ => 《 +FE3E 300B # ︾ => 》 +FE3F 3008 # ︿ => 〈 +FE40 3009 # ﹀ => 〉 +FE41 300C # ﹁ => 「 +FE42 300D # ﹂ => 」 +FE43 300E # ﹃ => 『 +FE44 300F # ﹄ => 』 +FE47 5B # ﹇ => [ +FE48 5D # ﹈ => ] +FE49 20 305 # ﹉ => ̅ +FE4A 20 305 # ﹊ => ̅ +FE4B 20 305 # ﹋ => ̅ +FE4C 20 305 # ﹌ => ̅ +FE4D 5F # ﹍ => _ +FE4E 5F # ﹎ => _ +FE4F 5F # ﹏ => _ +FE50 2C # ﹐ => , +FE51 3001 # ﹑ => 、 +FE52 2E # ﹒ => . +FE54 3B # ﹔ => ; +FE55 3A # ﹕ => : +FE56 3F # ﹖ => ? +FE57 21 # ﹗ => ! +FE58 2014 # ﹘ => — +FE59 28 # ﹙ => ( +FE5A 29 # ﹚ => ) +FE5B 7B # ﹛ => { +FE5C 7D # ﹜ => } +FE5D 3014 # ﹝ => 〔 +FE5E 3015 # ﹞ => 〕 +FE5F 23 # ﹟ => # +FE60 26 # ﹠ => & +FE61 2A # ﹡ => * +FE62 2B # ﹢ => + +FE63 2D # ﹣ => - +FE64 3C # ﹤ => < +FE65 3E # ﹥ => > +FE66 3D # ﹦ => = +FE68 5C # ﹨ => \ +FE69 24 # ﹩ => $ +FE6A 25 # ﹪ => % +FE6B 40 # ﹫ => @ +FE70 20 64B # ﹰ => ً +FE71 640 64B # ﹱ => ـً +FE72 20 64C # ﹲ => ٌ +FE74 20 64D # ﹴ => ٍ +FE76 20 64E # ﹶ => َ +FE77 640 64E # ﹷ => ـَ +FE78 20 64F # ﹸ => ُ +FE79 640 64F # ﹹ => ـُ +FE7A 20 650 # ﹺ => ِ +FE7B 640 650 # ﹻ => ـِ +FE7C 20 651 # ﹼ => ّ +FE7D 640 651 # ﹽ => ـّ +FE7E 20 652 # ﹾ => ْ +FE7F 640 652 # ﹿ => ـْ +FE80 621 # ﺀ => ء +FE81 627 653 # ﺁ => آ +FE82 627 653 # ﺂ => آ +FE83 627 654 # ﺃ => أ +FE84 627 654 # ﺄ => أ +FE85 648 654 # ﺅ => ؤ +FE86 648 654 # ﺆ => ؤ +FE87 627 655 # ﺇ => إ +FE88 627 655 # ﺈ => إ +FE89 64A 654 # ﺉ => ئ +FE8A 64A 654 # ﺊ => ئ +FE8B 64A 654 # ﺋ => ئ +FE8C 64A 654 # ﺌ => ئ +FE8D 627 # ﺍ => ا +FE8E 627 # ﺎ => ا +FE8F 628 # ﺏ => ب +FE90 628 # ﺐ => ب +FE91 628 # ﺑ => ب +FE92 628 # ﺒ => ب +FE93 629 # ﺓ => ة +FE94 629 # ﺔ => ة +FE95 62A # ﺕ => ت +FE96 62A # ﺖ => ت +FE97 62A # ﺗ => ت +FE98 62A # ﺘ => ت +FE99 62B # ﺙ => ث +FE9A 62B # ﺚ => ث +FE9B 62B # ﺛ => ث +FE9C 62B # ﺜ => ث +FE9D 62C # ﺝ => ج +FE9E 62C # ﺞ => ج +FE9F 62C # ﺟ => ج +FEA0 62C # ﺠ => ج +FEA1 62D # ﺡ => ح +FEA2 62D # ﺢ => ح +FEA3 62D # ﺣ => ح +FEA4 62D # ﺤ => ح +FEA5 62E # ﺥ => خ +FEA6 62E # ﺦ => خ +FEA7 62E # ﺧ => خ +FEA8 62E # ﺨ => خ +FEA9 62F # ﺩ => د +FEAA 62F # ﺪ => د +FEAB 630 # ﺫ => ذ +FEAC 630 # ﺬ => ذ +FEAD 631 # ﺭ => ر +FEAE 631 # ﺮ => ر +FEAF 632 # ﺯ => ز +FEB0 632 # ﺰ => ز +FEB1 633 # ﺱ => س +FEB2 633 # ﺲ => س +FEB3 633 # ﺳ => س +FEB4 633 # ﺴ => س +FEB5 634 # ﺵ => ش +FEB6 634 # ﺶ => ش +FEB7 634 # ﺷ => ش +FEB8 634 # ﺸ => ش +FEB9 635 # ﺹ => ص +FEBA 635 # ﺺ => ص +FEBB 635 # ﺻ => ص +FEBC 635 # ﺼ => ص +FEBD 636 # ﺽ => ض +FEBE 636 # ﺾ => ض +FEBF 636 # ﺿ => ض +FEC0 636 # ﻀ => ض +FEC1 637 # ﻁ => ط +FEC2 637 # ﻂ => ط +FEC3 637 # ﻃ => ط +FEC4 637 # ﻄ => ط +FEC5 638 # ﻅ => ظ +FEC6 638 # ﻆ => ظ +FEC7 638 # ﻇ => ظ +FEC8 638 # ﻈ => ظ +FEC9 639 # ﻉ => ع +FECA 639 # ﻊ => ع +FECB 639 # ﻋ => ع +FECC 639 # ﻌ => ع +FECD 63A # ﻍ => غ +FECE 63A # ﻎ => غ +FECF 63A # ﻏ => غ +FED0 63A # ﻐ => غ +FED1 641 # ﻑ => ف +FED2 641 # ﻒ => ف +FED3 641 # ﻓ => ف +FED4 641 # ﻔ => ف +FED5 642 # ﻕ => ق +FED6 642 # ﻖ => ق +FED7 642 # ﻗ => ق +FED8 642 # ﻘ => ق +FED9 643 # ﻙ => ك +FEDA 643 # ﻚ => ك +FEDB 643 # ﻛ => ك +FEDC 643 # ﻜ => ك +FEDD 644 # ﻝ => ل +FEDE 644 # ﻞ => ل +FEDF 644 # ﻟ => ل +FEE0 644 # ﻠ => ل +FEE1 645 # ﻡ => م +FEE2 645 # ﻢ => م +FEE3 645 # ﻣ => م +FEE4 645 # ﻤ => م +FEE5 646 # ﻥ => ن +FEE6 646 # ﻦ => ن +FEE7 646 # ﻧ => ن +FEE8 646 # ﻨ => ن +FEE9 647 # ﻩ => ه +FEEA 647 # ﻪ => ه +FEEB 647 # ﻫ => ه +FEEC 647 # ﻬ => ه +FEED 648 # ﻭ => و +FEEE 648 # ﻮ => و +FEEF 649 # ﻯ => ى +FEF0 649 # ﻰ => ى +FEF1 64A # ﻱ => ي +FEF2 64A # ﻲ => ي +FEF3 64A # ﻳ => ي +FEF4 64A # ﻴ => ي +FEF5 644 627 653 # ﻵ => لآ +FEF6 644 627 653 # ﻶ => لآ +FEF7 644 627 654 # ﻷ => لأ +FEF8 644 627 654 # ﻸ => لأ +FEF9 644 627 655 # ﻹ => لإ +FEFA 644 627 655 # ﻺ => لإ +FEFB 644 627 # ﻻ => لا +FEFC 644 627 # ﻼ => لا +FF01 21 # ! => ! +FF02 22 # " => " +FF03 23 # # => # +FF04 24 # $ => $ +FF05 25 # % => % +FF06 26 # & => & +FF07 27 # ' => ' +FF08 28 # ( => ( +FF09 29 # ) => ) +FF0A 2A # * => * +FF0B 2B # + => + +FF0C 2C # , => , +FF0D 2D # - => - +FF0E 2E # . => . +FF0F 2F # / => / +FF10 30 # 0 => 0 +FF11 31 # 1 => 1 +FF12 32 # 2 => 2 +FF13 33 # 3 => 3 +FF14 34 # 4 => 4 +FF15 35 # 5 => 5 +FF16 36 # 6 => 6 +FF17 37 # 7 => 7 +FF18 38 # 8 => 8 +FF19 39 # 9 => 9 +FF1A 3A # : => : +FF1B 3B # ; => ; +FF1C 3C # < => < +FF1D 3D # = => = +FF1E 3E # > => > +FF1F 3F # ? => ? +FF20 40 # @ => @ +FF21 41 # A => A +FF22 42 # B => B +FF23 43 # C => C +FF24 44 # D => D +FF25 45 # E => E +FF26 46 # F => F +FF27 47 # G => G +FF28 48 # H => H +FF29 49 # I => I +FF2A 4A # J => J +FF2B 4B # K => K +FF2C 4C # L => L +FF2D 4D # M => M +FF2E 4E # N => N +FF2F 4F # O => O +FF30 50 # P => P +FF31 51 # Q => Q +FF32 52 # R => R +FF33 53 # S => S +FF34 54 # T => T +FF35 55 # U => U +FF36 56 # V => V +FF37 57 # W => W +FF38 58 # X => X +FF39 59 # Y => Y +FF3A 5A # Z => Z +FF3B 5B # [ => [ +FF3C 5C # \ => \ +FF3D 5D # ] => ] +FF3E 5E # ^ => ^ +FF3F 5F # _ => _ +FF40 60 # ` => ` +FF41 61 # a => a +FF42 62 # b => b +FF43 63 # c => c +FF44 64 # d => d +FF45 65 # e => e +FF46 66 # f => f +FF47 67 # g => g +FF48 68 # h => h +FF49 69 # i => i +FF4A 6A # j => j +FF4B 6B # k => k +FF4C 6C # l => l +FF4D 6D # m => m +FF4E 6E # n => n +FF4F 6F # o => o +FF50 70 # p => p +FF51 71 # q => q +FF52 72 # r => r +FF53 73 # s => s +FF54 74 # t => t +FF55 75 # u => u +FF56 76 # v => v +FF57 77 # w => w +FF58 78 # x => x +FF59 79 # y => y +FF5A 7A # z => z +FF5B 7B # { => { +FF5C 7C # | => | +FF5D 7D # } => } +FF5E 7E # ~ => ~ +FF5F 2985 # ⦅ => ⦅ +FF60 2986 # ⦆ => ⦆ +FF61 3002 # 。 => 。 +FF62 300C # 「 => 「 +FF63 300D # 」 => 」 +FF64 3001 # 、 => 、 +FF65 30FB # ・ => ・ +FF66 30F2 # ヲ => ヲ +FF67 30A1 # ァ => ァ +FF68 30A3 # ィ => ィ +FF69 30A5 # ゥ => ゥ +FF6A 30A7 # ェ => ェ +FF6B 30A9 # ォ => ォ +FF6C 30E3 # ャ => ャ +FF6D 30E5 # ュ => ュ +FF6E 30E7 # ョ => ョ +FF6F 30C3 # ッ => ッ +FF70 30FC # ー => ー +FF71 30A2 # ア => ア +FF72 30A4 # イ => イ +FF73 30A6 # ウ => ウ +FF74 30A8 # エ => エ +FF75 30AA # オ => オ +FF76 30AB # カ => カ +FF77 30AD # キ => キ +FF78 30AF # ク => ク +FF79 30B1 # ケ => ケ +FF7A 30B3 # コ => コ +FF7B 30B5 # サ => サ +FF7C 30B7 # シ => シ +FF7D 30B9 # ス => ス +FF7E 30BB # セ => セ +FF7F 30BD # ソ => ソ +FF80 30BF # タ => タ +FF81 30C1 # チ => チ +FF82 30C4 # ツ => ツ +FF83 30C6 # テ => テ +FF84 30C8 # ト => ト +FF85 30CA # ナ => ナ +FF86 30CB # ニ => ニ +FF87 30CC # ヌ => ヌ +FF88 30CD # ネ => ネ +FF89 30CE # ノ => ノ +FF8A 30CF # ハ => ハ +FF8B 30D2 # ヒ => ヒ +FF8C 30D5 # フ => フ +FF8D 30D8 # ヘ => ヘ +FF8E 30DB # ホ => ホ +FF8F 30DE # マ => マ +FF90 30DF # ミ => ミ +FF91 30E0 # ム => ム +FF92 30E1 # メ => メ +FF93 30E2 # モ => モ +FF94 30E4 # ヤ => ヤ +FF95 30E6 # ユ => ユ +FF96 30E8 # ヨ => ヨ +FF97 30E9 # ラ => ラ +FF98 30EA # リ => リ +FF99 30EB # ル => ル +FF9A 30EC # レ => レ +FF9B 30ED # ロ => ロ +FF9C 30EF # ワ => ワ +FF9D 30F3 # ン => ン +FF9E 3099 # ゙ => ゙ +FF9F 309A # ゚ => ゚ +FFA0 1160 # ᅠ => ᅠ +FFA1 1100 # ᄀ => ᄀ +FFA2 1101 # ᄁ => ᄁ +FFA3 11AA # ᆪ => ᆪ +FFA4 1102 # ᄂ => ᄂ +FFA5 11AC # ᆬ => ᆬ +FFA6 11AD # ᆭ => ᆭ +FFA7 1103 # ᄃ => ᄃ +FFA8 1104 # ᄄ => ᄄ +FFA9 1105 # ᄅ => ᄅ +FFAA 11B0 # ᆰ => ᆰ +FFAB 11B1 # ᆱ => ᆱ +FFAC 11B2 # ᆲ => ᆲ +FFAD 11B3 # ᆳ => ᆳ +FFAE 11B4 # ᆴ => ᆴ +FFAF 11B5 # ᆵ => ᆵ +FFB0 111A # ᄚ => ᄚ +FFB1 1106 # ᄆ => ᄆ +FFB2 1107 # ᄇ => ᄇ +FFB3 1108 # ᄈ => ᄈ +FFB4 1121 # ᄡ => ᄡ +FFB5 1109 # ᄉ => ᄉ +FFB6 110A # ᄊ => ᄊ +FFB7 110B # ᄋ => ᄋ +FFB8 110C # ᄌ => ᄌ +FFB9 110D # ᄍ => ᄍ +FFBA 110E # ᄎ => ᄎ +FFBB 110F # ᄏ => ᄏ +FFBC 1110 # ᄐ => ᄐ +FFBD 1111 # ᄑ => ᄑ +FFBE 1112 # ᄒ => ᄒ +FFC2 1161 # ᅡ => ᅡ +FFC3 1162 # ᅢ => ᅢ +FFC4 1163 # ᅣ => ᅣ +FFC5 1164 # ᅤ => ᅤ +FFC6 1165 # ᅥ => ᅥ +FFC7 1166 # ᅦ => ᅦ +FFCA 1167 # ᅧ => ᅧ +FFCB 1168 # ᅨ => ᅨ +FFCC 1169 # ᅩ => ᅩ +FFCD 116A # ᅪ => ᅪ +FFCE 116B # ᅫ => ᅫ +FFCF 116C # ᅬ => ᅬ +FFD2 116D # ᅭ => ᅭ +FFD3 116E # ᅮ => ᅮ +FFD4 116F # ᅯ => ᅯ +FFD5 1170 # ᅰ => ᅰ +FFD6 1171 # ᅱ => ᅱ +FFD7 1172 # ᅲ => ᅲ +FFDA 1173 # ᅳ => ᅳ +FFDB 1174 # ᅴ => ᅴ +FFDC 1175 # ᅵ => ᅵ +FFE0 A2 # ¢ => ¢ +FFE1 A3 # £ => £ +FFE2 AC # ¬ => ¬ +FFE3 20 304 #  ̄ => ̄ +FFE4 A6 # ¦ => ¦ +FFE5 A5 # ¥ => ¥ +FFE6 20A9 # ₩ => ₩ +FFE8 2502 # │ => │ +FFE9 2190 # ← => ← +FFEA 2191 # ↑ => ↑ +FFEB 2192 # → => → +FFEC 2193 # ↓ => ↓ +FFED 25A0 # ■ => ■ +FFEE 25CB # ○ => ○ +1109A 11099 110BA # 𑂚 => 𑂚 +1109C 1109B 110BA # 𑂜 => 𑂜 +110AB 110A5 110BA # 𑂫 => 𑂫 +1112E 11131 11127 # 𑄮 => 𑄮 +1112F 11132 11127 # 𑄯 => 𑄯 +1134B 11347 1133E # 𑍋 => 𑍋 +1134C 11347 11357 # 𑍌 => 𑍌 +114BB 114B9 114BA # 𑒻 => 𑒻 +114BC 114B9 114B0 # 𑒼 => 𑒼 +114BE 114B9 114BD # 𑒾 => 𑒾 +115BA 115B8 115AF # 𑖺 => 𑖺 +115BB 115B9 115AF # 𑖻 => 𑖻 +11938 11935 11930 # 𑤸 => 𑤸 +1D15E 1D157 1D165 # 𝅗𝅥 => 𝅗𝅥 +1D15F 1D158 1D165 # 𝅘𝅥 => 𝅘𝅥 +1D160 1D158 1D165 1D16E # 𝅘𝅥𝅮 => 𝅘𝅥𝅮 +1D161 1D158 1D165 1D16F # 𝅘𝅥𝅯 => 𝅘𝅥𝅯 +1D162 1D158 1D165 1D170 # 𝅘𝅥𝅰 => 𝅘𝅥𝅰 +1D163 1D158 1D165 1D171 # 𝅘𝅥𝅱 => 𝅘𝅥𝅱 +1D164 1D158 1D165 1D172 # 𝅘𝅥𝅲 => 𝅘𝅥𝅲 +1D1BB 1D1B9 1D165 # 𝆹𝅥 => 𝆹𝅥 +1D1BC 1D1BA 1D165 # 𝆺𝅥 => 𝆺𝅥 +1D1BD 1D1B9 1D165 1D16E # 𝆹𝅥𝅮 => 𝆹𝅥𝅮 +1D1BE 1D1BA 1D165 1D16E # 𝆺𝅥𝅮 => 𝆺𝅥𝅮 +1D1BF 1D1B9 1D165 1D16F # 𝆹𝅥𝅯 => 𝆹𝅥𝅯 +1D1C0 1D1BA 1D165 1D16F # 𝆺𝅥𝅯 => 𝆺𝅥𝅯 +1D400 41 # 𝐀 => A +1D401 42 # 𝐁 => B +1D402 43 # 𝐂 => C +1D403 44 # 𝐃 => D +1D404 45 # 𝐄 => E +1D405 46 # 𝐅 => F +1D406 47 # 𝐆 => G +1D407 48 # 𝐇 => H +1D408 49 # 𝐈 => I +1D409 4A # 𝐉 => J +1D40A 4B # 𝐊 => K +1D40B 4C # 𝐋 => L +1D40C 4D # 𝐌 => M +1D40D 4E # 𝐍 => N +1D40E 4F # 𝐎 => O +1D40F 50 # 𝐏 => P +1D410 51 # 𝐐 => Q +1D411 52 # 𝐑 => R +1D412 53 # 𝐒 => S +1D413 54 # 𝐓 => T +1D414 55 # 𝐔 => U +1D415 56 # 𝐕 => V +1D416 57 # 𝐖 => W +1D417 58 # 𝐗 => X +1D418 59 # 𝐘 => Y +1D419 5A # 𝐙 => Z +1D41A 61 # 𝐚 => a +1D41B 62 # 𝐛 => b +1D41C 63 # 𝐜 => c +1D41D 64 # 𝐝 => d +1D41E 65 # 𝐞 => e +1D41F 66 # 𝐟 => f +1D420 67 # 𝐠 => g +1D421 68 # 𝐡 => h +1D422 69 # 𝐢 => i +1D423 6A # 𝐣 => j +1D424 6B # 𝐤 => k +1D425 6C # 𝐥 => l +1D426 6D # 𝐦 => m +1D427 6E # 𝐧 => n +1D428 6F # 𝐨 => o +1D429 70 # 𝐩 => p +1D42A 71 # 𝐪 => q +1D42B 72 # 𝐫 => r +1D42C 73 # 𝐬 => s +1D42D 74 # 𝐭 => t +1D42E 75 # 𝐮 => u +1D42F 76 # 𝐯 => v +1D430 77 # 𝐰 => w +1D431 78 # 𝐱 => x +1D432 79 # 𝐲 => y +1D433 7A # 𝐳 => z +1D434 41 # 𝐴 => A +1D435 42 # 𝐵 => B +1D436 43 # 𝐶 => C +1D437 44 # 𝐷 => D +1D438 45 # 𝐸 => E +1D439 46 # 𝐹 => F +1D43A 47 # 𝐺 => G +1D43B 48 # 𝐻 => H +1D43C 49 # 𝐼 => I +1D43D 4A # 𝐽 => J +1D43E 4B # 𝐾 => K +1D43F 4C # 𝐿 => L +1D440 4D # 𝑀 => M +1D441 4E # 𝑁 => N +1D442 4F # 𝑂 => O +1D443 50 # 𝑃 => P +1D444 51 # 𝑄 => Q +1D445 52 # 𝑅 => R +1D446 53 # 𝑆 => S +1D447 54 # 𝑇 => T +1D448 55 # 𝑈 => U +1D449 56 # 𝑉 => V +1D44A 57 # 𝑊 => W +1D44B 58 # 𝑋 => X +1D44C 59 # 𝑌 => Y +1D44D 5A # 𝑍 => Z +1D44E 61 # 𝑎 => a +1D44F 62 # 𝑏 => b +1D450 63 # 𝑐 => c +1D451 64 # 𝑑 => d +1D452 65 # 𝑒 => e +1D453 66 # 𝑓 => f +1D454 67 # 𝑔 => g +1D456 69 # 𝑖 => i +1D457 6A # 𝑗 => j +1D458 6B # 𝑘 => k +1D459 6C # 𝑙 => l +1D45A 6D # 𝑚 => m +1D45B 6E # 𝑛 => n +1D45C 6F # 𝑜 => o +1D45D 70 # 𝑝 => p +1D45E 71 # 𝑞 => q +1D45F 72 # 𝑟 => r +1D460 73 # 𝑠 => s +1D461 74 # 𝑡 => t +1D462 75 # 𝑢 => u +1D463 76 # 𝑣 => v +1D464 77 # 𝑤 => w +1D465 78 # 𝑥 => x +1D466 79 # 𝑦 => y +1D467 7A # 𝑧 => z +1D468 41 # 𝑨 => A +1D469 42 # 𝑩 => B +1D46A 43 # 𝑪 => C +1D46B 44 # 𝑫 => D +1D46C 45 # 𝑬 => E +1D46D 46 # 𝑭 => F +1D46E 47 # 𝑮 => G +1D46F 48 # 𝑯 => H +1D470 49 # 𝑰 => I +1D471 4A # 𝑱 => J +1D472 4B # 𝑲 => K +1D473 4C # 𝑳 => L +1D474 4D # 𝑴 => M +1D475 4E # 𝑵 => N +1D476 4F # 𝑶 => O +1D477 50 # 𝑷 => P +1D478 51 # 𝑸 => Q +1D479 52 # 𝑹 => R +1D47A 53 # 𝑺 => S +1D47B 54 # 𝑻 => T +1D47C 55 # 𝑼 => U +1D47D 56 # 𝑽 => V +1D47E 57 # 𝑾 => W +1D47F 58 # 𝑿 => X +1D480 59 # 𝒀 => Y +1D481 5A # 𝒁 => Z +1D482 61 # 𝒂 => a +1D483 62 # 𝒃 => b +1D484 63 # 𝒄 => c +1D485 64 # 𝒅 => d +1D486 65 # 𝒆 => e +1D487 66 # 𝒇 => f +1D488 67 # 𝒈 => g +1D489 68 # 𝒉 => h +1D48A 69 # 𝒊 => i +1D48B 6A # 𝒋 => j +1D48C 6B # 𝒌 => k +1D48D 6C # 𝒍 => l +1D48E 6D # 𝒎 => m +1D48F 6E # 𝒏 => n +1D490 6F # 𝒐 => o +1D491 70 # 𝒑 => p +1D492 71 # 𝒒 => q +1D493 72 # 𝒓 => r +1D494 73 # 𝒔 => s +1D495 74 # 𝒕 => t +1D496 75 # 𝒖 => u +1D497 76 # 𝒗 => v +1D498 77 # 𝒘 => w +1D499 78 # 𝒙 => x +1D49A 79 # 𝒚 => y +1D49B 7A # 𝒛 => z +1D49C 41 # 𝒜 => A +1D49E 43 # 𝒞 => C +1D49F 44 # 𝒟 => D +1D4A2 47 # 𝒢 => G +1D4A5 4A # 𝒥 => J +1D4A6 4B # 𝒦 => K +1D4A9 4E # 𝒩 => N +1D4AA 4F # 𝒪 => O +1D4AB 50 # 𝒫 => P +1D4AC 51 # 𝒬 => Q +1D4AE 53 # 𝒮 => S +1D4AF 54 # 𝒯 => T +1D4B0 55 # 𝒰 => U +1D4B1 56 # 𝒱 => V +1D4B2 57 # 𝒲 => W +1D4B3 58 # 𝒳 => X +1D4B4 59 # 𝒴 => Y +1D4B5 5A # 𝒵 => Z +1D4B6 61 # 𝒶 => a +1D4B7 62 # 𝒷 => b +1D4B8 63 # 𝒸 => c +1D4B9 64 # 𝒹 => d +1D4BB 66 # 𝒻 => f +1D4BD 68 # 𝒽 => h +1D4BE 69 # 𝒾 => i +1D4BF 6A # 𝒿 => j +1D4C0 6B # 𝓀 => k +1D4C1 6C # 𝓁 => l +1D4C2 6D # 𝓂 => m +1D4C3 6E # 𝓃 => n +1D4C5 70 # 𝓅 => p +1D4C6 71 # 𝓆 => q +1D4C7 72 # 𝓇 => r +1D4C8 73 # 𝓈 => s +1D4C9 74 # 𝓉 => t +1D4CA 75 # 𝓊 => u +1D4CB 76 # 𝓋 => v +1D4CC 77 # 𝓌 => w +1D4CD 78 # 𝓍 => x +1D4CE 79 # 𝓎 => y +1D4CF 7A # 𝓏 => z +1D4D0 41 # 𝓐 => A +1D4D1 42 # 𝓑 => B +1D4D2 43 # 𝓒 => C +1D4D3 44 # 𝓓 => D +1D4D4 45 # 𝓔 => E +1D4D5 46 # 𝓕 => F +1D4D6 47 # 𝓖 => G +1D4D7 48 # 𝓗 => H +1D4D8 49 # 𝓘 => I +1D4D9 4A # 𝓙 => J +1D4DA 4B # 𝓚 => K +1D4DB 4C # 𝓛 => L +1D4DC 4D # 𝓜 => M +1D4DD 4E # 𝓝 => N +1D4DE 4F # 𝓞 => O +1D4DF 50 # 𝓟 => P +1D4E0 51 # 𝓠 => Q +1D4E1 52 # 𝓡 => R +1D4E2 53 # 𝓢 => S +1D4E3 54 # 𝓣 => T +1D4E4 55 # 𝓤 => U +1D4E5 56 # 𝓥 => V +1D4E6 57 # 𝓦 => W +1D4E7 58 # 𝓧 => X +1D4E8 59 # 𝓨 => Y +1D4E9 5A # 𝓩 => Z +1D4EA 61 # 𝓪 => a +1D4EB 62 # 𝓫 => b +1D4EC 63 # 𝓬 => c +1D4ED 64 # 𝓭 => d +1D4EE 65 # 𝓮 => e +1D4EF 66 # 𝓯 => f +1D4F0 67 # 𝓰 => g +1D4F1 68 # 𝓱 => h +1D4F2 69 # 𝓲 => i +1D4F3 6A # 𝓳 => j +1D4F4 6B # 𝓴 => k +1D4F5 6C # 𝓵 => l +1D4F6 6D # 𝓶 => m +1D4F7 6E # 𝓷 => n +1D4F8 6F # 𝓸 => o +1D4F9 70 # 𝓹 => p +1D4FA 71 # 𝓺 => q +1D4FB 72 # 𝓻 => r +1D4FC 73 # 𝓼 => s +1D4FD 74 # 𝓽 => t +1D4FE 75 # 𝓾 => u +1D4FF 76 # 𝓿 => v +1D500 77 # 𝔀 => w +1D501 78 # 𝔁 => x +1D502 79 # 𝔂 => y +1D503 7A # 𝔃 => z +1D504 41 # 𝔄 => A +1D505 42 # 𝔅 => B +1D507 44 # 𝔇 => D +1D508 45 # 𝔈 => E +1D509 46 # 𝔉 => F +1D50A 47 # 𝔊 => G +1D50D 4A # 𝔍 => J +1D50E 4B # 𝔎 => K +1D50F 4C # 𝔏 => L +1D510 4D # 𝔐 => M +1D511 4E # 𝔑 => N +1D512 4F # 𝔒 => O +1D513 50 # 𝔓 => P +1D514 51 # 𝔔 => Q +1D516 53 # 𝔖 => S +1D517 54 # 𝔗 => T +1D518 55 # 𝔘 => U +1D519 56 # 𝔙 => V +1D51A 57 # 𝔚 => W +1D51B 58 # 𝔛 => X +1D51C 59 # 𝔜 => Y +1D51E 61 # 𝔞 => a +1D51F 62 # 𝔟 => b +1D520 63 # 𝔠 => c +1D521 64 # 𝔡 => d +1D522 65 # 𝔢 => e +1D523 66 # 𝔣 => f +1D524 67 # 𝔤 => g +1D525 68 # 𝔥 => h +1D526 69 # 𝔦 => i +1D527 6A # 𝔧 => j +1D528 6B # 𝔨 => k +1D529 6C # 𝔩 => l +1D52A 6D # 𝔪 => m +1D52B 6E # 𝔫 => n +1D52C 6F # 𝔬 => o +1D52D 70 # 𝔭 => p +1D52E 71 # 𝔮 => q +1D52F 72 # 𝔯 => r +1D530 73 # 𝔰 => s +1D531 74 # 𝔱 => t +1D532 75 # 𝔲 => u +1D533 76 # 𝔳 => v +1D534 77 # 𝔴 => w +1D535 78 # 𝔵 => x +1D536 79 # 𝔶 => y +1D537 7A # 𝔷 => z +1D538 41 # 𝔸 => A +1D539 42 # 𝔹 => B +1D53B 44 # 𝔻 => D +1D53C 45 # 𝔼 => E +1D53D 46 # 𝔽 => F +1D53E 47 # 𝔾 => G +1D540 49 # 𝕀 => I +1D541 4A # 𝕁 => J +1D542 4B # 𝕂 => K +1D543 4C # 𝕃 => L +1D544 4D # 𝕄 => M +1D546 4F # 𝕆 => O +1D54A 53 # 𝕊 => S +1D54B 54 # 𝕋 => T +1D54C 55 # 𝕌 => U +1D54D 56 # 𝕍 => V +1D54E 57 # 𝕎 => W +1D54F 58 # 𝕏 => X +1D550 59 # 𝕐 => Y +1D552 61 # 𝕒 => a +1D553 62 # 𝕓 => b +1D554 63 # 𝕔 => c +1D555 64 # 𝕕 => d +1D556 65 # 𝕖 => e +1D557 66 # 𝕗 => f +1D558 67 # 𝕘 => g +1D559 68 # 𝕙 => h +1D55A 69 # 𝕚 => i +1D55B 6A # 𝕛 => j +1D55C 6B # 𝕜 => k +1D55D 6C # 𝕝 => l +1D55E 6D # 𝕞 => m +1D55F 6E # 𝕟 => n +1D560 6F # 𝕠 => o +1D561 70 # 𝕡 => p +1D562 71 # 𝕢 => q +1D563 72 # 𝕣 => r +1D564 73 # 𝕤 => s +1D565 74 # 𝕥 => t +1D566 75 # 𝕦 => u +1D567 76 # 𝕧 => v +1D568 77 # 𝕨 => w +1D569 78 # 𝕩 => x +1D56A 79 # 𝕪 => y +1D56B 7A # 𝕫 => z +1D56C 41 # 𝕬 => A +1D56D 42 # 𝕭 => B +1D56E 43 # 𝕮 => C +1D56F 44 # 𝕯 => D +1D570 45 # 𝕰 => E +1D571 46 # 𝕱 => F +1D572 47 # 𝕲 => G +1D573 48 # 𝕳 => H +1D574 49 # 𝕴 => I +1D575 4A # 𝕵 => J +1D576 4B # 𝕶 => K +1D577 4C # 𝕷 => L +1D578 4D # 𝕸 => M +1D579 4E # 𝕹 => N +1D57A 4F # 𝕺 => O +1D57B 50 # 𝕻 => P +1D57C 51 # 𝕼 => Q +1D57D 52 # 𝕽 => R +1D57E 53 # 𝕾 => S +1D57F 54 # 𝕿 => T +1D580 55 # 𝖀 => U +1D581 56 # 𝖁 => V +1D582 57 # 𝖂 => W +1D583 58 # 𝖃 => X +1D584 59 # 𝖄 => Y +1D585 5A # 𝖅 => Z +1D586 61 # 𝖆 => a +1D587 62 # 𝖇 => b +1D588 63 # 𝖈 => c +1D589 64 # 𝖉 => d +1D58A 65 # 𝖊 => e +1D58B 66 # 𝖋 => f +1D58C 67 # 𝖌 => g +1D58D 68 # 𝖍 => h +1D58E 69 # 𝖎 => i +1D58F 6A # 𝖏 => j +1D590 6B # 𝖐 => k +1D591 6C # 𝖑 => l +1D592 6D # 𝖒 => m +1D593 6E # 𝖓 => n +1D594 6F # 𝖔 => o +1D595 70 # 𝖕 => p +1D596 71 # 𝖖 => q +1D597 72 # 𝖗 => r +1D598 73 # 𝖘 => s +1D599 74 # 𝖙 => t +1D59A 75 # 𝖚 => u +1D59B 76 # 𝖛 => v +1D59C 77 # 𝖜 => w +1D59D 78 # 𝖝 => x +1D59E 79 # 𝖞 => y +1D59F 7A # 𝖟 => z +1D5A0 41 # 𝖠 => A +1D5A1 42 # 𝖡 => B +1D5A2 43 # 𝖢 => C +1D5A3 44 # 𝖣 => D +1D5A4 45 # 𝖤 => E +1D5A5 46 # 𝖥 => F +1D5A6 47 # 𝖦 => G +1D5A7 48 # 𝖧 => H +1D5A8 49 # 𝖨 => I +1D5A9 4A # 𝖩 => J +1D5AA 4B # 𝖪 => K +1D5AB 4C # 𝖫 => L +1D5AC 4D # 𝖬 => M +1D5AD 4E # 𝖭 => N +1D5AE 4F # 𝖮 => O +1D5AF 50 # 𝖯 => P +1D5B0 51 # 𝖰 => Q +1D5B1 52 # 𝖱 => R +1D5B2 53 # 𝖲 => S +1D5B3 54 # 𝖳 => T +1D5B4 55 # 𝖴 => U +1D5B5 56 # 𝖵 => V +1D5B6 57 # 𝖶 => W +1D5B7 58 # 𝖷 => X +1D5B8 59 # 𝖸 => Y +1D5B9 5A # 𝖹 => Z +1D5BA 61 # 𝖺 => a +1D5BB 62 # 𝖻 => b +1D5BC 63 # 𝖼 => c +1D5BD 64 # 𝖽 => d +1D5BE 65 # 𝖾 => e +1D5BF 66 # 𝖿 => f +1D5C0 67 # 𝗀 => g +1D5C1 68 # 𝗁 => h +1D5C2 69 # 𝗂 => i +1D5C3 6A # 𝗃 => j +1D5C4 6B # 𝗄 => k +1D5C5 6C # 𝗅 => l +1D5C6 6D # 𝗆 => m +1D5C7 6E # 𝗇 => n +1D5C8 6F # 𝗈 => o +1D5C9 70 # 𝗉 => p +1D5CA 71 # 𝗊 => q +1D5CB 72 # 𝗋 => r +1D5CC 73 # 𝗌 => s +1D5CD 74 # 𝗍 => t +1D5CE 75 # 𝗎 => u +1D5CF 76 # 𝗏 => v +1D5D0 77 # 𝗐 => w +1D5D1 78 # 𝗑 => x +1D5D2 79 # 𝗒 => y +1D5D3 7A # 𝗓 => z +1D5D4 41 # 𝗔 => A +1D5D5 42 # 𝗕 => B +1D5D6 43 # 𝗖 => C +1D5D7 44 # 𝗗 => D +1D5D8 45 # 𝗘 => E +1D5D9 46 # 𝗙 => F +1D5DA 47 # 𝗚 => G +1D5DB 48 # 𝗛 => H +1D5DC 49 # 𝗜 => I +1D5DD 4A # 𝗝 => J +1D5DE 4B # 𝗞 => K +1D5DF 4C # 𝗟 => L +1D5E0 4D # 𝗠 => M +1D5E1 4E # 𝗡 => N +1D5E2 4F # 𝗢 => O +1D5E3 50 # 𝗣 => P +1D5E4 51 # 𝗤 => Q +1D5E5 52 # 𝗥 => R +1D5E6 53 # 𝗦 => S +1D5E7 54 # 𝗧 => T +1D5E8 55 # 𝗨 => U +1D5E9 56 # 𝗩 => V +1D5EA 57 # 𝗪 => W +1D5EB 58 # 𝗫 => X +1D5EC 59 # 𝗬 => Y +1D5ED 5A # 𝗭 => Z +1D5EE 61 # 𝗮 => a +1D5EF 62 # 𝗯 => b +1D5F0 63 # 𝗰 => c +1D5F1 64 # 𝗱 => d +1D5F2 65 # 𝗲 => e +1D5F3 66 # 𝗳 => f +1D5F4 67 # 𝗴 => g +1D5F5 68 # 𝗵 => h +1D5F6 69 # 𝗶 => i +1D5F7 6A # 𝗷 => j +1D5F8 6B # 𝗸 => k +1D5F9 6C # 𝗹 => l +1D5FA 6D # 𝗺 => m +1D5FB 6E # 𝗻 => n +1D5FC 6F # 𝗼 => o +1D5FD 70 # 𝗽 => p +1D5FE 71 # 𝗾 => q +1D5FF 72 # 𝗿 => r +1D600 73 # 𝘀 => s +1D601 74 # 𝘁 => t +1D602 75 # 𝘂 => u +1D603 76 # 𝘃 => v +1D604 77 # 𝘄 => w +1D605 78 # 𝘅 => x +1D606 79 # 𝘆 => y +1D607 7A # 𝘇 => z +1D608 41 # 𝘈 => A +1D609 42 # 𝘉 => B +1D60A 43 # 𝘊 => C +1D60B 44 # 𝘋 => D +1D60C 45 # 𝘌 => E +1D60D 46 # 𝘍 => F +1D60E 47 # 𝘎 => G +1D60F 48 # 𝘏 => H +1D610 49 # 𝘐 => I +1D611 4A # 𝘑 => J +1D612 4B # 𝘒 => K +1D613 4C # 𝘓 => L +1D614 4D # 𝘔 => M +1D615 4E # 𝘕 => N +1D616 4F # 𝘖 => O +1D617 50 # 𝘗 => P +1D618 51 # 𝘘 => Q +1D619 52 # 𝘙 => R +1D61A 53 # 𝘚 => S +1D61B 54 # 𝘛 => T +1D61C 55 # 𝘜 => U +1D61D 56 # 𝘝 => V +1D61E 57 # 𝘞 => W +1D61F 58 # 𝘟 => X +1D620 59 # 𝘠 => Y +1D621 5A # 𝘡 => Z +1D622 61 # 𝘢 => a +1D623 62 # 𝘣 => b +1D624 63 # 𝘤 => c +1D625 64 # 𝘥 => d +1D626 65 # 𝘦 => e +1D627 66 # 𝘧 => f +1D628 67 # 𝘨 => g +1D629 68 # 𝘩 => h +1D62A 69 # 𝘪 => i +1D62B 6A # 𝘫 => j +1D62C 6B # 𝘬 => k +1D62D 6C # 𝘭 => l +1D62E 6D # 𝘮 => m +1D62F 6E # 𝘯 => n +1D630 6F # 𝘰 => o +1D631 70 # 𝘱 => p +1D632 71 # 𝘲 => q +1D633 72 # 𝘳 => r +1D634 73 # 𝘴 => s +1D635 74 # 𝘵 => t +1D636 75 # 𝘶 => u +1D637 76 # 𝘷 => v +1D638 77 # 𝘸 => w +1D639 78 # 𝘹 => x +1D63A 79 # 𝘺 => y +1D63B 7A # 𝘻 => z +1D63C 41 # 𝘼 => A +1D63D 42 # 𝘽 => B +1D63E 43 # 𝘾 => C +1D63F 44 # 𝘿 => D +1D640 45 # 𝙀 => E +1D641 46 # 𝙁 => F +1D642 47 # 𝙂 => G +1D643 48 # 𝙃 => H +1D644 49 # 𝙄 => I +1D645 4A # 𝙅 => J +1D646 4B # 𝙆 => K +1D647 4C # 𝙇 => L +1D648 4D # 𝙈 => M +1D649 4E # 𝙉 => N +1D64A 4F # 𝙊 => O +1D64B 50 # 𝙋 => P +1D64C 51 # 𝙌 => Q +1D64D 52 # 𝙍 => R +1D64E 53 # 𝙎 => S +1D64F 54 # 𝙏 => T +1D650 55 # 𝙐 => U +1D651 56 # 𝙑 => V +1D652 57 # 𝙒 => W +1D653 58 # 𝙓 => X +1D654 59 # 𝙔 => Y +1D655 5A # 𝙕 => Z +1D656 61 # 𝙖 => a +1D657 62 # 𝙗 => b +1D658 63 # 𝙘 => c +1D659 64 # 𝙙 => d +1D65A 65 # 𝙚 => e +1D65B 66 # 𝙛 => f +1D65C 67 # 𝙜 => g +1D65D 68 # 𝙝 => h +1D65E 69 # 𝙞 => i +1D65F 6A # 𝙟 => j +1D660 6B # 𝙠 => k +1D661 6C # 𝙡 => l +1D662 6D # 𝙢 => m +1D663 6E # 𝙣 => n +1D664 6F # 𝙤 => o +1D665 70 # 𝙥 => p +1D666 71 # 𝙦 => q +1D667 72 # 𝙧 => r +1D668 73 # 𝙨 => s +1D669 74 # 𝙩 => t +1D66A 75 # 𝙪 => u +1D66B 76 # 𝙫 => v +1D66C 77 # 𝙬 => w +1D66D 78 # 𝙭 => x +1D66E 79 # 𝙮 => y +1D66F 7A # 𝙯 => z +1D670 41 # 𝙰 => A +1D671 42 # 𝙱 => B +1D672 43 # 𝙲 => C +1D673 44 # 𝙳 => D +1D674 45 # 𝙴 => E +1D675 46 # 𝙵 => F +1D676 47 # 𝙶 => G +1D677 48 # 𝙷 => H +1D678 49 # 𝙸 => I +1D679 4A # 𝙹 => J +1D67A 4B # 𝙺 => K +1D67B 4C # 𝙻 => L +1D67C 4D # 𝙼 => M +1D67D 4E # 𝙽 => N +1D67E 4F # 𝙾 => O +1D67F 50 # 𝙿 => P +1D680 51 # 𝚀 => Q +1D681 52 # 𝚁 => R +1D682 53 # 𝚂 => S +1D683 54 # 𝚃 => T +1D684 55 # 𝚄 => U +1D685 56 # 𝚅 => V +1D686 57 # 𝚆 => W +1D687 58 # 𝚇 => X +1D688 59 # 𝚈 => Y +1D689 5A # 𝚉 => Z +1D68A 61 # 𝚊 => a +1D68B 62 # 𝚋 => b +1D68C 63 # 𝚌 => c +1D68D 64 # 𝚍 => d +1D68E 65 # 𝚎 => e +1D68F 66 # 𝚏 => f +1D690 67 # 𝚐 => g +1D691 68 # 𝚑 => h +1D692 69 # 𝚒 => i +1D693 6A # 𝚓 => j +1D694 6B # 𝚔 => k +1D695 6C # 𝚕 => l +1D696 6D # 𝚖 => m +1D697 6E # 𝚗 => n +1D698 6F # 𝚘 => o +1D699 70 # 𝚙 => p +1D69A 71 # 𝚚 => q +1D69B 72 # 𝚛 => r +1D69C 73 # 𝚜 => s +1D69D 74 # 𝚝 => t +1D69E 75 # 𝚞 => u +1D69F 76 # 𝚟 => v +1D6A0 77 # 𝚠 => w +1D6A1 78 # 𝚡 => x +1D6A2 79 # 𝚢 => y +1D6A3 7A # 𝚣 => z +1D6A4 131 # 𝚤 => ı +1D6A5 237 # 𝚥 => ȷ +1D6A8 391 # 𝚨 => Α +1D6A9 392 # 𝚩 => Β +1D6AA 393 # 𝚪 => Γ +1D6AB 394 # 𝚫 => Δ +1D6AC 395 # 𝚬 => Ε +1D6AD 396 # 𝚭 => Ζ +1D6AE 397 # 𝚮 => Η +1D6AF 398 # 𝚯 => Θ +1D6B0 399 # 𝚰 => Ι +1D6B1 39A # 𝚱 => Κ +1D6B2 39B # 𝚲 => Λ +1D6B3 39C # 𝚳 => Μ +1D6B4 39D # 𝚴 => Ν +1D6B5 39E # 𝚵 => Ξ +1D6B6 39F # 𝚶 => Ο +1D6B7 3A0 # 𝚷 => Π +1D6B8 3A1 # 𝚸 => Ρ +1D6B9 398 # 𝚹 => Θ +1D6BA 3A3 # 𝚺 => Σ +1D6BB 3A4 # 𝚻 => Τ +1D6BC 3A5 # 𝚼 => Υ +1D6BD 3A6 # 𝚽 => Φ +1D6BE 3A7 # 𝚾 => Χ +1D6BF 3A8 # 𝚿 => Ψ +1D6C0 3A9 # 𝛀 => Ω +1D6C1 2207 # 𝛁 => ∇ +1D6C2 3B1 # 𝛂 => α +1D6C3 3B2 # 𝛃 => β +1D6C4 3B3 # 𝛄 => γ +1D6C5 3B4 # 𝛅 => δ +1D6C6 3B5 # 𝛆 => ε +1D6C7 3B6 # 𝛇 => ζ +1D6C8 3B7 # 𝛈 => η +1D6C9 3B8 # 𝛉 => θ +1D6CA 3B9 # 𝛊 => ι +1D6CB 3BA # 𝛋 => κ +1D6CC 3BB # 𝛌 => λ +1D6CD 3BC # 𝛍 => μ +1D6CE 3BD # 𝛎 => ν +1D6CF 3BE # 𝛏 => ξ +1D6D0 3BF # 𝛐 => ο +1D6D1 3C0 # 𝛑 => π +1D6D2 3C1 # 𝛒 => ρ +1D6D3 3C2 # 𝛓 => ς +1D6D4 3C3 # 𝛔 => σ +1D6D5 3C4 # 𝛕 => τ +1D6D6 3C5 # 𝛖 => υ +1D6D7 3C6 # 𝛗 => φ +1D6D8 3C7 # 𝛘 => χ +1D6D9 3C8 # 𝛙 => ψ +1D6DA 3C9 # 𝛚 => ω +1D6DB 2202 # 𝛛 => ∂ +1D6DC 3B5 # 𝛜 => ε +1D6DD 3B8 # 𝛝 => θ +1D6DE 3BA # 𝛞 => κ +1D6DF 3C6 # 𝛟 => φ +1D6E0 3C1 # 𝛠 => ρ +1D6E1 3C0 # 𝛡 => π +1D6E2 391 # 𝛢 => Α +1D6E3 392 # 𝛣 => Β +1D6E4 393 # 𝛤 => Γ +1D6E5 394 # 𝛥 => Δ +1D6E6 395 # 𝛦 => Ε +1D6E7 396 # 𝛧 => Ζ +1D6E8 397 # 𝛨 => Η +1D6E9 398 # 𝛩 => Θ +1D6EA 399 # 𝛪 => Ι +1D6EB 39A # 𝛫 => Κ +1D6EC 39B # 𝛬 => Λ +1D6ED 39C # 𝛭 => Μ +1D6EE 39D # 𝛮 => Ν +1D6EF 39E # 𝛯 => Ξ +1D6F0 39F # 𝛰 => Ο +1D6F1 3A0 # 𝛱 => Π +1D6F2 3A1 # 𝛲 => Ρ +1D6F3 398 # 𝛳 => Θ +1D6F4 3A3 # 𝛴 => Σ +1D6F5 3A4 # 𝛵 => Τ +1D6F6 3A5 # 𝛶 => Υ +1D6F7 3A6 # 𝛷 => Φ +1D6F8 3A7 # 𝛸 => Χ +1D6F9 3A8 # 𝛹 => Ψ +1D6FA 3A9 # 𝛺 => Ω +1D6FB 2207 # 𝛻 => ∇ +1D6FC 3B1 # 𝛼 => α +1D6FD 3B2 # 𝛽 => β +1D6FE 3B3 # 𝛾 => γ +1D6FF 3B4 # 𝛿 => δ +1D700 3B5 # 𝜀 => ε +1D701 3B6 # 𝜁 => ζ +1D702 3B7 # 𝜂 => η +1D703 3B8 # 𝜃 => θ +1D704 3B9 # 𝜄 => ι +1D705 3BA # 𝜅 => κ +1D706 3BB # 𝜆 => λ +1D707 3BC # 𝜇 => μ +1D708 3BD # 𝜈 => ν +1D709 3BE # 𝜉 => ξ +1D70A 3BF # 𝜊 => ο +1D70B 3C0 # 𝜋 => π +1D70C 3C1 # 𝜌 => ρ +1D70D 3C2 # 𝜍 => ς +1D70E 3C3 # 𝜎 => σ +1D70F 3C4 # 𝜏 => τ +1D710 3C5 # 𝜐 => υ +1D711 3C6 # 𝜑 => φ +1D712 3C7 # 𝜒 => χ +1D713 3C8 # 𝜓 => ψ +1D714 3C9 # 𝜔 => ω +1D715 2202 # 𝜕 => ∂ +1D716 3B5 # 𝜖 => ε +1D717 3B8 # 𝜗 => θ +1D718 3BA # 𝜘 => κ +1D719 3C6 # 𝜙 => φ +1D71A 3C1 # 𝜚 => ρ +1D71B 3C0 # 𝜛 => π +1D71C 391 # 𝜜 => Α +1D71D 392 # 𝜝 => Β +1D71E 393 # 𝜞 => Γ +1D71F 394 # 𝜟 => Δ +1D720 395 # 𝜠 => Ε +1D721 396 # 𝜡 => Ζ +1D722 397 # 𝜢 => Η +1D723 398 # 𝜣 => Θ +1D724 399 # 𝜤 => Ι +1D725 39A # 𝜥 => Κ +1D726 39B # 𝜦 => Λ +1D727 39C # 𝜧 => Μ +1D728 39D # 𝜨 => Ν +1D729 39E # 𝜩 => Ξ +1D72A 39F # 𝜪 => Ο +1D72B 3A0 # 𝜫 => Π +1D72C 3A1 # 𝜬 => Ρ +1D72D 398 # 𝜭 => Θ +1D72E 3A3 # 𝜮 => Σ +1D72F 3A4 # 𝜯 => Τ +1D730 3A5 # 𝜰 => Υ +1D731 3A6 # 𝜱 => Φ +1D732 3A7 # 𝜲 => Χ +1D733 3A8 # 𝜳 => Ψ +1D734 3A9 # 𝜴 => Ω +1D735 2207 # 𝜵 => ∇ +1D736 3B1 # 𝜶 => α +1D737 3B2 # 𝜷 => β +1D738 3B3 # 𝜸 => γ +1D739 3B4 # 𝜹 => δ +1D73A 3B5 # 𝜺 => ε +1D73B 3B6 # 𝜻 => ζ +1D73C 3B7 # 𝜼 => η +1D73D 3B8 # 𝜽 => θ +1D73E 3B9 # 𝜾 => ι +1D73F 3BA # 𝜿 => κ +1D740 3BB # 𝝀 => λ +1D741 3BC # 𝝁 => μ +1D742 3BD # 𝝂 => ν +1D743 3BE # 𝝃 => ξ +1D744 3BF # 𝝄 => ο +1D745 3C0 # 𝝅 => π +1D746 3C1 # 𝝆 => ρ +1D747 3C2 # 𝝇 => ς +1D748 3C3 # 𝝈 => σ +1D749 3C4 # 𝝉 => τ +1D74A 3C5 # 𝝊 => υ +1D74B 3C6 # 𝝋 => φ +1D74C 3C7 # 𝝌 => χ +1D74D 3C8 # 𝝍 => ψ +1D74E 3C9 # 𝝎 => ω +1D74F 2202 # 𝝏 => ∂ +1D750 3B5 # 𝝐 => ε +1D751 3B8 # 𝝑 => θ +1D752 3BA # 𝝒 => κ +1D753 3C6 # 𝝓 => φ +1D754 3C1 # 𝝔 => ρ +1D755 3C0 # 𝝕 => π +1D756 391 # 𝝖 => Α +1D757 392 # 𝝗 => Β +1D758 393 # 𝝘 => Γ +1D759 394 # 𝝙 => Δ +1D75A 395 # 𝝚 => Ε +1D75B 396 # 𝝛 => Ζ +1D75C 397 # 𝝜 => Η +1D75D 398 # 𝝝 => Θ +1D75E 399 # 𝝞 => Ι +1D75F 39A # 𝝟 => Κ +1D760 39B # 𝝠 => Λ +1D761 39C # 𝝡 => Μ +1D762 39D # 𝝢 => Ν +1D763 39E # 𝝣 => Ξ +1D764 39F # 𝝤 => Ο +1D765 3A0 # 𝝥 => Π +1D766 3A1 # 𝝦 => Ρ +1D767 398 # 𝝧 => Θ +1D768 3A3 # 𝝨 => Σ +1D769 3A4 # 𝝩 => Τ +1D76A 3A5 # 𝝪 => Υ +1D76B 3A6 # 𝝫 => Φ +1D76C 3A7 # 𝝬 => Χ +1D76D 3A8 # 𝝭 => Ψ +1D76E 3A9 # 𝝮 => Ω +1D76F 2207 # 𝝯 => ∇ +1D770 3B1 # 𝝰 => α +1D771 3B2 # 𝝱 => β +1D772 3B3 # 𝝲 => γ +1D773 3B4 # 𝝳 => δ +1D774 3B5 # 𝝴 => ε +1D775 3B6 # 𝝵 => ζ +1D776 3B7 # 𝝶 => η +1D777 3B8 # 𝝷 => θ +1D778 3B9 # 𝝸 => ι +1D779 3BA # 𝝹 => κ +1D77A 3BB # 𝝺 => λ +1D77B 3BC # 𝝻 => μ +1D77C 3BD # 𝝼 => ν +1D77D 3BE # 𝝽 => ξ +1D77E 3BF # 𝝾 => ο +1D77F 3C0 # 𝝿 => π +1D780 3C1 # 𝞀 => ρ +1D781 3C2 # 𝞁 => ς +1D782 3C3 # 𝞂 => σ +1D783 3C4 # 𝞃 => τ +1D784 3C5 # 𝞄 => υ +1D785 3C6 # 𝞅 => φ +1D786 3C7 # 𝞆 => χ +1D787 3C8 # 𝞇 => ψ +1D788 3C9 # 𝞈 => ω +1D789 2202 # 𝞉 => ∂ +1D78A 3B5 # 𝞊 => ε +1D78B 3B8 # 𝞋 => θ +1D78C 3BA # 𝞌 => κ +1D78D 3C6 # 𝞍 => φ +1D78E 3C1 # 𝞎 => ρ +1D78F 3C0 # 𝞏 => π +1D790 391 # 𝞐 => Α +1D791 392 # 𝞑 => Β +1D792 393 # 𝞒 => Γ +1D793 394 # 𝞓 => Δ +1D794 395 # 𝞔 => Ε +1D795 396 # 𝞕 => Ζ +1D796 397 # 𝞖 => Η +1D797 398 # 𝞗 => Θ +1D798 399 # 𝞘 => Ι +1D799 39A # 𝞙 => Κ +1D79A 39B # 𝞚 => Λ +1D79B 39C # 𝞛 => Μ +1D79C 39D # 𝞜 => Ν +1D79D 39E # 𝞝 => Ξ +1D79E 39F # 𝞞 => Ο +1D79F 3A0 # 𝞟 => Π +1D7A0 3A1 # 𝞠 => Ρ +1D7A1 398 # 𝞡 => Θ +1D7A2 3A3 # 𝞢 => Σ +1D7A3 3A4 # 𝞣 => Τ +1D7A4 3A5 # 𝞤 => Υ +1D7A5 3A6 # 𝞥 => Φ +1D7A6 3A7 # 𝞦 => Χ +1D7A7 3A8 # 𝞧 => Ψ +1D7A8 3A9 # 𝞨 => Ω +1D7A9 2207 # 𝞩 => ∇ +1D7AA 3B1 # 𝞪 => α +1D7AB 3B2 # 𝞫 => β +1D7AC 3B3 # 𝞬 => γ +1D7AD 3B4 # 𝞭 => δ +1D7AE 3B5 # 𝞮 => ε +1D7AF 3B6 # 𝞯 => ζ +1D7B0 3B7 # 𝞰 => η +1D7B1 3B8 # 𝞱 => θ +1D7B2 3B9 # 𝞲 => ι +1D7B3 3BA # 𝞳 => κ +1D7B4 3BB # 𝞴 => λ +1D7B5 3BC # 𝞵 => μ +1D7B6 3BD # 𝞶 => ν +1D7B7 3BE # 𝞷 => ξ +1D7B8 3BF # 𝞸 => ο +1D7B9 3C0 # 𝞹 => π +1D7BA 3C1 # 𝞺 => ρ +1D7BB 3C2 # 𝞻 => ς +1D7BC 3C3 # 𝞼 => σ +1D7BD 3C4 # 𝞽 => τ +1D7BE 3C5 # 𝞾 => υ +1D7BF 3C6 # 𝞿 => φ +1D7C0 3C7 # 𝟀 => χ +1D7C1 3C8 # 𝟁 => ψ +1D7C2 3C9 # 𝟂 => ω +1D7C3 2202 # 𝟃 => ∂ +1D7C4 3B5 # 𝟄 => ε +1D7C5 3B8 # 𝟅 => θ +1D7C6 3BA # 𝟆 => κ +1D7C7 3C6 # 𝟇 => φ +1D7C8 3C1 # 𝟈 => ρ +1D7C9 3C0 # 𝟉 => π +1D7CA 3DC # 𝟊 => Ϝ +1D7CB 3DD # 𝟋 => ϝ +1D7CE 30 # 𝟎 => 0 +1D7CF 31 # 𝟏 => 1 +1D7D0 32 # 𝟐 => 2 +1D7D1 33 # 𝟑 => 3 +1D7D2 34 # 𝟒 => 4 +1D7D3 35 # 𝟓 => 5 +1D7D4 36 # 𝟔 => 6 +1D7D5 37 # 𝟕 => 7 +1D7D6 38 # 𝟖 => 8 +1D7D7 39 # 𝟗 => 9 +1D7D8 30 # 𝟘 => 0 +1D7D9 31 # 𝟙 => 1 +1D7DA 32 # 𝟚 => 2 +1D7DB 33 # 𝟛 => 3 +1D7DC 34 # 𝟜 => 4 +1D7DD 35 # 𝟝 => 5 +1D7DE 36 # 𝟞 => 6 +1D7DF 37 # 𝟟 => 7 +1D7E0 38 # 𝟠 => 8 +1D7E1 39 # 𝟡 => 9 +1D7E2 30 # 𝟢 => 0 +1D7E3 31 # 𝟣 => 1 +1D7E4 32 # 𝟤 => 2 +1D7E5 33 # 𝟥 => 3 +1D7E6 34 # 𝟦 => 4 +1D7E7 35 # 𝟧 => 5 +1D7E8 36 # 𝟨 => 6 +1D7E9 37 # 𝟩 => 7 +1D7EA 38 # 𝟪 => 8 +1D7EB 39 # 𝟫 => 9 +1D7EC 30 # 𝟬 => 0 +1D7ED 31 # 𝟭 => 1 +1D7EE 32 # 𝟮 => 2 +1D7EF 33 # 𝟯 => 3 +1D7F0 34 # 𝟰 => 4 +1D7F1 35 # 𝟱 => 5 +1D7F2 36 # 𝟲 => 6 +1D7F3 37 # 𝟳 => 7 +1D7F4 38 # 𝟴 => 8 +1D7F5 39 # 𝟵 => 9 +1D7F6 30 # 𝟶 => 0 +1D7F7 31 # 𝟷 => 1 +1D7F8 32 # 𝟸 => 2 +1D7F9 33 # 𝟹 => 3 +1D7FA 34 # 𝟺 => 4 +1D7FB 35 # 𝟻 => 5 +1D7FC 36 # 𝟼 => 6 +1D7FD 37 # 𝟽 => 7 +1D7FE 38 # 𝟾 => 8 +1D7FF 39 # 𝟿 => 9 +1EE00 627 # 𞸀 => ا +1EE01 628 # 𞸁 => ب +1EE02 62C # 𞸂 => ج +1EE03 62F # 𞸃 => د +1EE05 648 # 𞸅 => و +1EE06 632 # 𞸆 => ز +1EE07 62D # 𞸇 => ح +1EE08 637 # 𞸈 => ط +1EE09 64A # 𞸉 => ي +1EE0A 643 # 𞸊 => ك +1EE0B 644 # 𞸋 => ل +1EE0C 645 # 𞸌 => م +1EE0D 646 # 𞸍 => ن +1EE0E 633 # 𞸎 => س +1EE0F 639 # 𞸏 => ع +1EE10 641 # 𞸐 => ف +1EE11 635 # 𞸑 => ص +1EE12 642 # 𞸒 => ق +1EE13 631 # 𞸓 => ر +1EE14 634 # 𞸔 => ش +1EE15 62A # 𞸕 => ت +1EE16 62B # 𞸖 => ث +1EE17 62E # 𞸗 => خ +1EE18 630 # 𞸘 => ذ +1EE19 636 # 𞸙 => ض +1EE1A 638 # 𞸚 => ظ +1EE1B 63A # 𞸛 => غ +1EE1C 66E # 𞸜 => ٮ +1EE1D 6BA # 𞸝 => ں +1EE1E 6A1 # 𞸞 => ڡ +1EE1F 66F # 𞸟 => ٯ +1EE21 628 # 𞸡 => ب +1EE22 62C # 𞸢 => ج +1EE24 647 # 𞸤 => ه +1EE27 62D # 𞸧 => ح +1EE29 64A # 𞸩 => ي +1EE2A 643 # 𞸪 => ك +1EE2B 644 # 𞸫 => ل +1EE2C 645 # 𞸬 => م +1EE2D 646 # 𞸭 => ن +1EE2E 633 # 𞸮 => س +1EE2F 639 # 𞸯 => ع +1EE30 641 # 𞸰 => ف +1EE31 635 # 𞸱 => ص +1EE32 642 # 𞸲 => ق +1EE34 634 # 𞸴 => ش +1EE35 62A # 𞸵 => ت +1EE36 62B # 𞸶 => ث +1EE37 62E # 𞸷 => خ +1EE39 636 # 𞸹 => ض +1EE3B 63A # 𞸻 => غ +1EE42 62C # 𞹂 => ج +1EE47 62D # 𞹇 => ح +1EE49 64A # 𞹉 => ي +1EE4B 644 # 𞹋 => ل +1EE4D 646 # 𞹍 => ن +1EE4E 633 # 𞹎 => س +1EE4F 639 # 𞹏 => ع +1EE51 635 # 𞹑 => ص +1EE52 642 # 𞹒 => ق +1EE54 634 # 𞹔 => ش +1EE57 62E # 𞹗 => خ +1EE59 636 # 𞹙 => ض +1EE5B 63A # 𞹛 => غ +1EE5D 6BA # 𞹝 => ں +1EE5F 66F # 𞹟 => ٯ +1EE61 628 # 𞹡 => ب +1EE62 62C # 𞹢 => ج +1EE64 647 # 𞹤 => ه +1EE67 62D # 𞹧 => ح +1EE68 637 # 𞹨 => ط +1EE69 64A # 𞹩 => ي +1EE6A 643 # 𞹪 => ك +1EE6C 645 # 𞹬 => م +1EE6D 646 # 𞹭 => ن +1EE6E 633 # 𞹮 => س +1EE6F 639 # 𞹯 => ع +1EE70 641 # 𞹰 => ف +1EE71 635 # 𞹱 => ص +1EE72 642 # 𞹲 => ق +1EE74 634 # 𞹴 => ش +1EE75 62A # 𞹵 => ت +1EE76 62B # 𞹶 => ث +1EE77 62E # 𞹷 => خ +1EE79 636 # 𞹹 => ض +1EE7A 638 # 𞹺 => ظ +1EE7B 63A # 𞹻 => غ +1EE7C 66E # 𞹼 => ٮ +1EE7E 6A1 # 𞹾 => ڡ +1EE80 627 # 𞺀 => ا +1EE81 628 # 𞺁 => ب +1EE82 62C # 𞺂 => ج +1EE83 62F # 𞺃 => د +1EE84 647 # 𞺄 => ه +1EE85 648 # 𞺅 => و +1EE86 632 # 𞺆 => ز +1EE87 62D # 𞺇 => ح +1EE88 637 # 𞺈 => ط +1EE89 64A # 𞺉 => ي +1EE8B 644 # 𞺋 => ل +1EE8C 645 # 𞺌 => م +1EE8D 646 # 𞺍 => ن +1EE8E 633 # 𞺎 => س +1EE8F 639 # 𞺏 => ع +1EE90 641 # 𞺐 => ف +1EE91 635 # 𞺑 => ص +1EE92 642 # 𞺒 => ق +1EE93 631 # 𞺓 => ر +1EE94 634 # 𞺔 => ش +1EE95 62A # 𞺕 => ت +1EE96 62B # 𞺖 => ث +1EE97 62E # 𞺗 => خ +1EE98 630 # 𞺘 => ذ +1EE99 636 # 𞺙 => ض +1EE9A 638 # 𞺚 => ظ +1EE9B 63A # 𞺛 => غ +1EEA1 628 # 𞺡 => ب +1EEA2 62C # 𞺢 => ج +1EEA3 62F # 𞺣 => د +1EEA5 648 # 𞺥 => و +1EEA6 632 # 𞺦 => ز +1EEA7 62D # 𞺧 => ح +1EEA8 637 # 𞺨 => ط +1EEA9 64A # 𞺩 => ي +1EEAB 644 # 𞺫 => ل +1EEAC 645 # 𞺬 => م +1EEAD 646 # 𞺭 => ن +1EEAE 633 # 𞺮 => س +1EEAF 639 # 𞺯 => ع +1EEB0 641 # 𞺰 => ف +1EEB1 635 # 𞺱 => ص +1EEB2 642 # 𞺲 => ق +1EEB3 631 # 𞺳 => ر +1EEB4 634 # 𞺴 => ش +1EEB5 62A # 𞺵 => ت +1EEB6 62B # 𞺶 => ث +1EEB7 62E # 𞺷 => خ +1EEB8 630 # 𞺸 => ذ +1EEB9 636 # 𞺹 => ض +1EEBA 638 # 𞺺 => ظ +1EEBB 63A # 𞺻 => غ +1F100 30 2E # 🄀 => 0. +1F101 30 2C # 🄁 => 0, +1F102 31 2C # 🄂 => 1, +1F103 32 2C # 🄃 => 2, +1F104 33 2C # 🄄 => 3, +1F105 34 2C # 🄅 => 4, +1F106 35 2C # 🄆 => 5, +1F107 36 2C # 🄇 => 6, +1F108 37 2C # 🄈 => 7, +1F109 38 2C # 🄉 => 8, +1F10A 39 2C # 🄊 => 9, +1F110 28 41 29 # 🄐 => (A) +1F111 28 42 29 # 🄑 => (B) +1F112 28 43 29 # 🄒 => (C) +1F113 28 44 29 # 🄓 => (D) +1F114 28 45 29 # 🄔 => (E) +1F115 28 46 29 # 🄕 => (F) +1F116 28 47 29 # 🄖 => (G) +1F117 28 48 29 # 🄗 => (H) +1F118 28 49 29 # 🄘 => (I) +1F119 28 4A 29 # 🄙 => (J) +1F11A 28 4B 29 # 🄚 => (K) +1F11B 28 4C 29 # 🄛 => (L) +1F11C 28 4D 29 # 🄜 => (M) +1F11D 28 4E 29 # 🄝 => (N) +1F11E 28 4F 29 # 🄞 => (O) +1F11F 28 50 29 # 🄟 => (P) +1F120 28 51 29 # 🄠 => (Q) +1F121 28 52 29 # 🄡 => (R) +1F122 28 53 29 # 🄢 => (S) +1F123 28 54 29 # 🄣 => (T) +1F124 28 55 29 # 🄤 => (U) +1F125 28 56 29 # 🄥 => (V) +1F126 28 57 29 # 🄦 => (W) +1F127 28 58 29 # 🄧 => (X) +1F128 28 59 29 # 🄨 => (Y) +1F129 28 5A 29 # 🄩 => (Z) +1F12A 3014 53 3015 # 🄪 => 〔S〕 +1F12B 43 # 🄫 => C +1F12C 52 # 🄬 => R +1F12D 43 44 # 🄭 => CD +1F12E 57 5A # 🄮 => WZ +1F130 41 # 🄰 => A +1F131 42 # 🄱 => B +1F132 43 # 🄲 => C +1F133 44 # 🄳 => D +1F134 45 # 🄴 => E +1F135 46 # 🄵 => F +1F136 47 # 🄶 => G +1F137 48 # 🄷 => H +1F138 49 # 🄸 => I +1F139 4A # 🄹 => J +1F13A 4B # 🄺 => K +1F13B 4C # 🄻 => L +1F13C 4D # 🄼 => M +1F13D 4E # 🄽 => N +1F13E 4F # 🄾 => O +1F13F 50 # 🄿 => P +1F140 51 # 🅀 => Q +1F141 52 # 🅁 => R +1F142 53 # 🅂 => S +1F143 54 # 🅃 => T +1F144 55 # 🅄 => U +1F145 56 # 🅅 => V +1F146 57 # 🅆 => W +1F147 58 # 🅇 => X +1F148 59 # 🅈 => Y +1F149 5A # 🅉 => Z +1F14A 48 56 # 🅊 => HV +1F14B 4D 56 # 🅋 => MV +1F14C 53 44 # 🅌 => SD +1F14D 53 53 # 🅍 => SS +1F14E 50 50 56 # 🅎 => PPV +1F14F 57 43 # 🅏 => WC +1F16A 4D 43 # 🅪 => MC +1F16B 4D 44 # 🅫 => MD +1F16C 4D 52 # 🅬 => MR +1F190 44 4A # 🆐 => DJ +1F200 307B 304B # 🈀 => ほか +1F201 30B3 30B3 # 🈁 => ココ +1F202 30B5 # 🈂 => サ +1F210 624B # 🈐 => 手 +1F211 5B57 # 🈑 => 字 +1F212 53CC # 🈒 => 双 +1F213 30C6 3099 # 🈓 => デ +1F214 4E8C # 🈔 => 二 +1F215 591A # 🈕 => 多 +1F216 89E3 # 🈖 => 解 +1F217 5929 # 🈗 => 天 +1F218 4EA4 # 🈘 => 交 +1F219 6620 # 🈙 => 映 +1F21A 7121 # 🈚 => 無 +1F21B 6599 # 🈛 => 料 +1F21C 524D # 🈜 => 前 +1F21D 5F8C # 🈝 => 後 +1F21E 518D # 🈞 => 再 +1F21F 65B0 # 🈟 => 新 +1F220 521D # 🈠 => 初 +1F221 7D42 # 🈡 => 終 +1F222 751F # 🈢 => 生 +1F223 8CA9 # 🈣 => 販 +1F224 58F0 # 🈤 => 声 +1F225 5439 # 🈥 => 吹 +1F226 6F14 # 🈦 => 演 +1F227 6295 # 🈧 => 投 +1F228 6355 # 🈨 => 捕 +1F229 4E00 # 🈩 => 一 +1F22A 4E09 # 🈪 => 三 +1F22B 904A # 🈫 => 遊 +1F22C 5DE6 # 🈬 => 左 +1F22D 4E2D # 🈭 => 中 +1F22E 53F3 # 🈮 => 右 +1F22F 6307 # 🈯 => 指 +1F230 8D70 # 🈰 => 走 +1F231 6253 # 🈱 => 打 +1F232 7981 # 🈲 => 禁 +1F233 7A7A # 🈳 => 空 +1F234 5408 # 🈴 => 合 +1F235 6E80 # 🈵 => 満 +1F236 6709 # 🈶 => 有 +1F237 6708 # 🈷 => 月 +1F238 7533 # 🈸 => 申 +1F239 5272 # 🈹 => 割 +1F23A 55B6 # 🈺 => 営 +1F23B 914D # 🈻 => 配 +1F240 3014 672C 3015 # 🉀 => 〔本〕 +1F241 3014 4E09 3015 # 🉁 => 〔三〕 +1F242 3014 4E8C 3015 # 🉂 => 〔二〕 +1F243 3014 5B89 3015 # 🉃 => 〔安〕 +1F244 3014 70B9 3015 # 🉄 => 〔点〕 +1F245 3014 6253 3015 # 🉅 => 〔打〕 +1F246 3014 76D7 3015 # 🉆 => 〔盗〕 +1F247 3014 52DD 3015 # 🉇 => 〔勝〕 +1F248 3014 6557 3015 # 🉈 => 〔敗〕 +1F250 5F97 # 🉐 => 得 +1F251 53EF # 🉑 => 可 +1FBF0 30 # 🯰 => 0 +1FBF1 31 # 🯱 => 1 +1FBF2 32 # 🯲 => 2 +1FBF3 33 # 🯳 => 3 +1FBF4 34 # 🯴 => 4 +1FBF5 35 # 🯵 => 5 +1FBF6 36 # 🯶 => 6 +1FBF7 37 # 🯷 => 7 +1FBF8 38 # 🯸 => 8 +1FBF9 39 # 🯹 => 9 +2F800 4E3D # 丽 => 丽 +2F801 4E38 # 丸 => 丸 +2F802 4E41 # 乁 => 乁 +2F803 20122 # 𠄢 => 𠄢 +2F804 4F60 # 你 => 你 +2F805 4FAE # 侮 => 侮 +2F806 4FBB # 侻 => 侻 +2F807 5002 # 倂 => 倂 +2F808 507A # 偺 => 偺 +2F809 5099 # 備 => 備 +2F80A 50E7 # 僧 => 僧 +2F80B 50CF # 像 => 像 +2F80C 349E # 㒞 => 㒞 +2F80D 2063A # 𠘺 => 𠘺 +2F80E 514D # 免 => 免 +2F80F 5154 # 兔 => 兔 +2F810 5164 # 兤 => 兤 +2F811 5177 # 具 => 具 +2F812 2051C # 𠔜 => 𠔜 +2F813 34B9 # 㒹 => 㒹 +2F814 5167 # 內 => 內 +2F815 518D # 再 => 再 +2F816 2054B # 𠕋 => 𠕋 +2F817 5197 # 冗 => 冗 +2F818 51A4 # 冤 => 冤 +2F819 4ECC # 仌 => 仌 +2F81A 51AC # 冬 => 冬 +2F81B 51B5 # 况 => 况 +2F81C 291DF # 𩇟 => 𩇟 +2F81D 51F5 # 凵 => 凵 +2F81E 5203 # 刃 => 刃 +2F81F 34DF # 㓟 => 㓟 +2F820 523B # 刻 => 刻 +2F821 5246 # 剆 => 剆 +2F822 5272 # 割 => 割 +2F823 5277 # 剷 => 剷 +2F824 3515 # 㔕 => 㔕 +2F825 52C7 # 勇 => 勇 +2F826 52C9 # 勉 => 勉 +2F827 52E4 # 勤 => 勤 +2F828 52FA # 勺 => 勺 +2F829 5305 # 包 => 包 +2F82A 5306 # 匆 => 匆 +2F82B 5317 # 北 => 北 +2F82C 5349 # 卉 => 卉 +2F82D 5351 # 卑 => 卑 +2F82E 535A # 博 => 博 +2F82F 5373 # 即 => 即 +2F830 537D # 卽 => 卽 +2F831 537F # 卿 => 卿 +2F832 537F # 卿 => 卿 +2F833 537F # 卿 => 卿 +2F834 20A2C # 𠨬 => 𠨬 +2F835 7070 # 灰 => 灰 +2F836 53CA # 及 => 及 +2F837 53DF # 叟 => 叟 +2F838 20B63 # 𠭣 => 𠭣 +2F839 53EB # 叫 => 叫 +2F83A 53F1 # 叱 => 叱 +2F83B 5406 # 吆 => 吆 +2F83C 549E # 咞 => 咞 +2F83D 5438 # 吸 => 吸 +2F83E 5448 # 呈 => 呈 +2F83F 5468 # 周 => 周 +2F840 54A2 # 咢 => 咢 +2F841 54F6 # 哶 => 哶 +2F842 5510 # 唐 => 唐 +2F843 5553 # 啓 => 啓 +2F844 5563 # 啣 => 啣 +2F845 5584 # 善 => 善 +2F846 5584 # 善 => 善 +2F847 5599 # 喙 => 喙 +2F848 55AB # 喫 => 喫 +2F849 55B3 # 喳 => 喳 +2F84A 55C2 # 嗂 => 嗂 +2F84B 5716 # 圖 => 圖 +2F84C 5606 # 嘆 => 嘆 +2F84D 5717 # 圗 => 圗 +2F84E 5651 # 噑 => 噑 +2F84F 5674 # 噴 => 噴 +2F850 5207 # 切 => 切 +2F851 58EE # 壮 => 壮 +2F852 57CE # 城 => 城 +2F853 57F4 # 埴 => 埴 +2F854 580D # 堍 => 堍 +2F855 578B # 型 => 型 +2F856 5832 # 堲 => 堲 +2F857 5831 # 報 => 報 +2F858 58AC # 墬 => 墬 +2F859 214E4 # 𡓤 => 𡓤 +2F85A 58F2 # 売 => 売 +2F85B 58F7 # 壷 => 壷 +2F85C 5906 # 夆 => 夆 +2F85D 591A # 多 => 多 +2F85E 5922 # 夢 => 夢 +2F85F 5962 # 奢 => 奢 +2F860 216A8 # 𡚨 => 𡚨 +2F861 216EA # 𡛪 => 𡛪 +2F862 59EC # 姬 => 姬 +2F863 5A1B # 娛 => 娛 +2F864 5A27 # 娧 => 娧 +2F865 59D8 # 姘 => 姘 +2F866 5A66 # 婦 => 婦 +2F867 36EE # 㛮 => 㛮 +2F868 36FC # 㛼 => 㛼 +2F869 5B08 # 嬈 => 嬈 +2F86A 5B3E # 嬾 => 嬾 +2F86B 5B3E # 嬾 => 嬾 +2F86C 219C8 # 𡧈 => 𡧈 +2F86D 5BC3 # 寃 => 寃 +2F86E 5BD8 # 寘 => 寘 +2F86F 5BE7 # 寧 => 寧 +2F870 5BF3 # 寳 => 寳 +2F871 21B18 # 𡬘 => 𡬘 +2F872 5BFF # 寿 => 寿 +2F873 5C06 # 将 => 将 +2F874 5F53 # 当 => 当 +2F875 5C22 # 尢 => 尢 +2F876 3781 # 㞁 => 㞁 +2F877 5C60 # 屠 => 屠 +2F878 5C6E # 屮 => 屮 +2F879 5CC0 # 峀 => 峀 +2F87A 5C8D # 岍 => 岍 +2F87B 21DE4 # 𡷤 => 𡷤 +2F87C 5D43 # 嵃 => 嵃 +2F87D 21DE6 # 𡷦 => 𡷦 +2F87E 5D6E # 嵮 => 嵮 +2F87F 5D6B # 嵫 => 嵫 +2F880 5D7C # 嵼 => 嵼 +2F881 5DE1 # 巡 => 巡 +2F882 5DE2 # 巢 => 巢 +2F883 382F # 㠯 => 㠯 +2F884 5DFD # 巽 => 巽 +2F885 5E28 # 帨 => 帨 +2F886 5E3D # 帽 => 帽 +2F887 5E69 # 幩 => 幩 +2F888 3862 # 㡢 => 㡢 +2F889 22183 # 𢆃 => 𢆃 +2F88A 387C # 㡼 => 㡼 +2F88B 5EB0 # 庰 => 庰 +2F88C 5EB3 # 庳 => 庳 +2F88D 5EB6 # 庶 => 庶 +2F88E 5ECA # 廊 => 廊 +2F88F 2A392 # 𪎒 => 𪎒 +2F890 5EFE # 廾 => 廾 +2F891 22331 # 𢌱 => 𢌱 +2F892 22331 # 𢌱 => 𢌱 +2F893 8201 # 舁 => 舁 +2F894 5F22 # 弢 => 弢 +2F895 5F22 # 弢 => 弢 +2F896 38C7 # 㣇 => 㣇 +2F897 232B8 # 𣊸 => 𣊸 +2F898 261DA # 𦇚 => 𦇚 +2F899 5F62 # 形 => 形 +2F89A 5F6B # 彫 => 彫 +2F89B 38E3 # 㣣 => 㣣 +2F89C 5F9A # 徚 => 徚 +2F89D 5FCD # 忍 => 忍 +2F89E 5FD7 # 志 => 志 +2F89F 5FF9 # 忹 => 忹 +2F8A0 6081 # 悁 => 悁 +2F8A1 393A # 㤺 => 㤺 +2F8A2 391C # 㤜 => 㤜 +2F8A3 6094 # 悔 => 悔 +2F8A4 226D4 # 𢛔 => 𢛔 +2F8A5 60C7 # 惇 => 惇 +2F8A6 6148 # 慈 => 慈 +2F8A7 614C # 慌 => 慌 +2F8A8 614E # 慎 => 慎 +2F8A9 614C # 慌 => 慌 +2F8AA 617A # 慺 => 慺 +2F8AB 618E # 憎 => 憎 +2F8AC 61B2 # 憲 => 憲 +2F8AD 61A4 # 憤 => 憤 +2F8AE 61AF # 憯 => 憯 +2F8AF 61DE # 懞 => 懞 +2F8B0 61F2 # 懲 => 懲 +2F8B1 61F6 # 懶 => 懶 +2F8B2 6210 # 成 => 成 +2F8B3 621B # 戛 => 戛 +2F8B4 625D # 扝 => 扝 +2F8B5 62B1 # 抱 => 抱 +2F8B6 62D4 # 拔 => 拔 +2F8B7 6350 # 捐 => 捐 +2F8B8 22B0C # 𢬌 => 𢬌 +2F8B9 633D # 挽 => 挽 +2F8BA 62FC # 拼 => 拼 +2F8BB 6368 # 捨 => 捨 +2F8BC 6383 # 掃 => 掃 +2F8BD 63E4 # 揤 => 揤 +2F8BE 22BF1 # 𢯱 => 𢯱 +2F8BF 6422 # 搢 => 搢 +2F8C0 63C5 # 揅 => 揅 +2F8C1 63A9 # 掩 => 掩 +2F8C2 3A2E # 㨮 => 㨮 +2F8C3 6469 # 摩 => 摩 +2F8C4 647E # 摾 => 摾 +2F8C5 649D # 撝 => 撝 +2F8C6 6477 # 摷 => 摷 +2F8C7 3A6C # 㩬 => 㩬 +2F8C8 654F # 敏 => 敏 +2F8C9 656C # 敬 => 敬 +2F8CA 2300A # 𣀊 => 𣀊 +2F8CB 65E3 # 旣 => 旣 +2F8CC 66F8 # 書 => 書 +2F8CD 6649 # 晉 => 晉 +2F8CE 3B19 # 㬙 => 㬙 +2F8CF 6691 # 暑 => 暑 +2F8D0 3B08 # 㬈 => 㬈 +2F8D1 3AE4 # 㫤 => 㫤 +2F8D2 5192 # 冒 => 冒 +2F8D3 5195 # 冕 => 冕 +2F8D4 6700 # 最 => 最 +2F8D5 669C # 暜 => 暜 +2F8D6 80AD # 肭 => 肭 +2F8D7 43D9 # 䏙 => 䏙 +2F8D8 6717 # 朗 => 朗 +2F8D9 671B # 望 => 望 +2F8DA 6721 # 朡 => 朡 +2F8DB 675E # 杞 => 杞 +2F8DC 6753 # 杓 => 杓 +2F8DD 233C3 # 𣏃 => 𣏃 +2F8DE 3B49 # 㭉 => 㭉 +2F8DF 67FA # 柺 => 柺 +2F8E0 6785 # 枅 => 枅 +2F8E1 6852 # 桒 => 桒 +2F8E2 6885 # 梅 => 梅 +2F8E3 2346D # 𣑭 => 𣑭 +2F8E4 688E # 梎 => 梎 +2F8E5 681F # 栟 => 栟 +2F8E6 6914 # 椔 => 椔 +2F8E7 3B9D # 㮝 => 㮝 +2F8E8 6942 # 楂 => 楂 +2F8E9 69A3 # 榣 => 榣 +2F8EA 69EA # 槪 => 槪 +2F8EB 6AA8 # 檨 => 檨 +2F8EC 236A3 # 𣚣 => 𣚣 +2F8ED 6ADB # 櫛 => 櫛 +2F8EE 3C18 # 㰘 => 㰘 +2F8EF 6B21 # 次 => 次 +2F8F0 238A7 # 𣢧 => 𣢧 +2F8F1 6B54 # 歔 => 歔 +2F8F2 3C4E # 㱎 => 㱎 +2F8F3 6B72 # 歲 => 歲 +2F8F4 6B9F # 殟 => 殟 +2F8F5 6BBA # 殺 => 殺 +2F8F6 6BBB # 殻 => 殻 +2F8F7 23A8D # 𣪍 => 𣪍 +2F8F8 21D0B # 𡴋 => 𡴋 +2F8F9 23AFA # 𣫺 => 𣫺 +2F8FA 6C4E # 汎 => 汎 +2F8FB 23CBC # 𣲼 => 𣲼 +2F8FC 6CBF # 沿 => 沿 +2F8FD 6CCD # 泍 => 泍 +2F8FE 6C67 # 汧 => 汧 +2F8FF 6D16 # 洖 => 洖 +2F900 6D3E # 派 => 派 +2F901 6D77 # 海 => 海 +2F902 6D41 # 流 => 流 +2F903 6D69 # 浩 => 浩 +2F904 6D78 # 浸 => 浸 +2F905 6D85 # 涅 => 涅 +2F906 23D1E # 𣴞 => 𣴞 +2F907 6D34 # 洴 => 洴 +2F908 6E2F # 港 => 港 +2F909 6E6E # 湮 => 湮 +2F90A 3D33 # 㴳 => 㴳 +2F90B 6ECB # 滋 => 滋 +2F90C 6EC7 # 滇 => 滇 +2F90D 23ED1 # 𣻑 => 𣻑 +2F90E 6DF9 # 淹 => 淹 +2F90F 6F6E # 潮 => 潮 +2F910 23F5E # 𣽞 => 𣽞 +2F911 23F8E # 𣾎 => 𣾎 +2F912 6FC6 # 濆 => 濆 +2F913 7039 # 瀹 => 瀹 +2F914 701E # 瀞 => 瀞 +2F915 701B # 瀛 => 瀛 +2F916 3D96 # 㶖 => 㶖 +2F917 704A # 灊 => 灊 +2F918 707D # 災 => 災 +2F919 7077 # 灷 => 灷 +2F91A 70AD # 炭 => 炭 +2F91B 20525 # 𠔥 => 𠔥 +2F91C 7145 # 煅 => 煅 +2F91D 24263 # 𤉣 => 𤉣 +2F91E 719C # 熜 => 熜 +2F91F 243AB # 𤎫 => 𤎫 +2F920 7228 # 爨 => 爨 +2F921 7235 # 爵 => 爵 +2F922 7250 # 牐 => 牐 +2F923 24608 # 𤘈 => 𤘈 +2F924 7280 # 犀 => 犀 +2F925 7295 # 犕 => 犕 +2F926 24735 # 𤜵 => 𤜵 +2F927 24814 # 𤠔 => 𤠔 +2F928 737A # 獺 => 獺 +2F929 738B # 王 => 王 +2F92A 3EAC # 㺬 => 㺬 +2F92B 73A5 # 玥 => 玥 +2F92C 3EB8 # 㺸 => 㺸 +2F92D 3EB8 # 㺸 => 㺸 +2F92E 7447 # 瑇 => 瑇 +2F92F 745C # 瑜 => 瑜 +2F930 7471 # 瑱 => 瑱 +2F931 7485 # 璅 => 璅 +2F932 74CA # 瓊 => 瓊 +2F933 3F1B # 㼛 => 㼛 +2F934 7524 # 甤 => 甤 +2F935 24C36 # 𤰶 => 𤰶 +2F936 753E # 甾 => 甾 +2F937 24C92 # 𤲒 => 𤲒 +2F938 7570 # 異 => 異 +2F939 2219F # 𢆟 => 𢆟 +2F93A 7610 # 瘐 => 瘐 +2F93B 24FA1 # 𤾡 => 𤾡 +2F93C 24FB8 # 𤾸 => 𤾸 +2F93D 25044 # 𥁄 => 𥁄 +2F93E 3FFC # 㿼 => 㿼 +2F93F 4008 # 䀈 => 䀈 +2F940 76F4 # 直 => 直 +2F941 250F3 # 𥃳 => 𥃳 +2F942 250F2 # 𥃲 => 𥃲 +2F943 25119 # 𥄙 => 𥄙 +2F944 25133 # 𥄳 => 𥄳 +2F945 771E # 眞 => 眞 +2F946 771F # 真 => 真 +2F947 771F # 真 => 真 +2F948 774A # 睊 => 睊 +2F949 4039 # 䀹 => 䀹 +2F94A 778B # 瞋 => 瞋 +2F94B 4046 # 䁆 => 䁆 +2F94C 4096 # 䂖 => 䂖 +2F94D 2541D # 𥐝 => 𥐝 +2F94E 784E # 硎 => 硎 +2F94F 788C # 碌 => 碌 +2F950 78CC # 磌 => 磌 +2F951 40E3 # 䃣 => 䃣 +2F952 25626 # 𥘦 => 𥘦 +2F953 7956 # 祖 => 祖 +2F954 2569A # 𥚚 => 𥚚 +2F955 256C5 # 𥛅 => 𥛅 +2F956 798F # 福 => 福 +2F957 79EB # 秫 => 秫 +2F958 412F # 䄯 => 䄯 +2F959 7A40 # 穀 => 穀 +2F95A 7A4A # 穊 => 穊 +2F95B 7A4F # 穏 => 穏 +2F95C 2597C # 𥥼 => 𥥼 +2F95D 25AA7 # 𥪧 => 𥪧 +2F95E 25AA7 # 𥪧 => 𥪧 +2F95F 7AEE # 竮 => 竮 +2F960 4202 # 䈂 => 䈂 +2F961 25BAB # 𥮫 => 𥮫 +2F962 7BC6 # 篆 => 篆 +2F963 7BC9 # 築 => 築 +2F964 4227 # 䈧 => 䈧 +2F965 25C80 # 𥲀 => 𥲀 +2F966 7CD2 # 糒 => 糒 +2F967 42A0 # 䊠 => 䊠 +2F968 7CE8 # 糨 => 糨 +2F969 7CE3 # 糣 => 糣 +2F96A 7D00 # 紀 => 紀 +2F96B 25F86 # 𥾆 => 𥾆 +2F96C 7D63 # 絣 => 絣 +2F96D 4301 # 䌁 => 䌁 +2F96E 7DC7 # 緇 => 緇 +2F96F 7E02 # 縂 => 縂 +2F970 7E45 # 繅 => 繅 +2F971 4334 # 䌴 => 䌴 +2F972 26228 # 𦈨 => 𦈨 +2F973 26247 # 𦉇 => 𦉇 +2F974 4359 # 䍙 => 䍙 +2F975 262D9 # 𦋙 => 𦋙 +2F976 7F7A # 罺 => 罺 +2F977 2633E # 𦌾 => 𦌾 +2F978 7F95 # 羕 => 羕 +2F979 7FFA # 翺 => 翺 +2F97A 8005 # 者 => 者 +2F97B 264DA # 𦓚 => 𦓚 +2F97C 26523 # 𦔣 => 𦔣 +2F97D 8060 # 聠 => 聠 +2F97E 265A8 # 𦖨 => 𦖨 +2F97F 8070 # 聰 => 聰 +2F980 2335F # 𣍟 => 𣍟 +2F981 43D5 # 䏕 => 䏕 +2F982 80B2 # 育 => 育 +2F983 8103 # 脃 => 脃 +2F984 440B # 䐋 => 䐋 +2F985 813E # 脾 => 脾 +2F986 5AB5 # 媵 => 媵 +2F987 267A7 # 𦞧 => 𦞧 +2F988 267B5 # 𦞵 => 𦞵 +2F989 23393 # 𣎓 => 𣎓 +2F98A 2339C # 𣎜 => 𣎜 +2F98B 8201 # 舁 => 舁 +2F98C 8204 # 舄 => 舄 +2F98D 8F9E # 辞 => 辞 +2F98E 446B # 䑫 => 䑫 +2F98F 8291 # 芑 => 芑 +2F990 828B # 芋 => 芋 +2F991 829D # 芝 => 芝 +2F992 52B3 # 劳 => 劳 +2F993 82B1 # 花 => 花 +2F994 82B3 # 芳 => 芳 +2F995 82BD # 芽 => 芽 +2F996 82E6 # 苦 => 苦 +2F997 26B3C # 𦬼 => 𦬼 +2F998 82E5 # 若 => 若 +2F999 831D # 茝 => 茝 +2F99A 8363 # 荣 => 荣 +2F99B 83AD # 莭 => 莭 +2F99C 8323 # 茣 => 茣 +2F99D 83BD # 莽 => 莽 +2F99E 83E7 # 菧 => 菧 +2F99F 8457 # 著 => 著 +2F9A0 8353 # 荓 => 荓 +2F9A1 83CA # 菊 => 菊 +2F9A2 83CC # 菌 => 菌 +2F9A3 83DC # 菜 => 菜 +2F9A4 26C36 # 𦰶 => 𦰶 +2F9A5 26D6B # 𦵫 => 𦵫 +2F9A6 26CD5 # 𦳕 => 𦳕 +2F9A7 452B # 䔫 => 䔫 +2F9A8 84F1 # 蓱 => 蓱 +2F9A9 84F3 # 蓳 => 蓳 +2F9AA 8516 # 蔖 => 蔖 +2F9AB 273CA # 𧏊 => 𧏊 +2F9AC 8564 # 蕤 => 蕤 +2F9AD 26F2C # 𦼬 => 𦼬 +2F9AE 455D # 䕝 => 䕝 +2F9AF 4561 # 䕡 => 䕡 +2F9B0 26FB1 # 𦾱 => 𦾱 +2F9B1 270D2 # 𧃒 => 𧃒 +2F9B2 456B # 䕫 => 䕫 +2F9B3 8650 # 虐 => 虐 +2F9B4 865C # 虜 => 虜 +2F9B5 8667 # 虧 => 虧 +2F9B6 8669 # 虩 => 虩 +2F9B7 86A9 # 蚩 => 蚩 +2F9B8 8688 # 蚈 => 蚈 +2F9B9 870E # 蜎 => 蜎 +2F9BA 86E2 # 蛢 => 蛢 +2F9BB 8779 # 蝹 => 蝹 +2F9BC 8728 # 蜨 => 蜨 +2F9BD 876B # 蝫 => 蝫 +2F9BE 8786 # 螆 => 螆 +2F9BF 45D7 # 䗗 => 䗗 +2F9C0 87E1 # 蟡 => 蟡 +2F9C1 8801 # 蠁 => 蠁 +2F9C2 45F9 # 䗹 => 䗹 +2F9C3 8860 # 衠 => 衠 +2F9C4 8863 # 衣 => 衣 +2F9C5 27667 # 𧙧 => 𧙧 +2F9C6 88D7 # 裗 => 裗 +2F9C7 88DE # 裞 => 裞 +2F9C8 4635 # 䘵 => 䘵 +2F9C9 88FA # 裺 => 裺 +2F9CA 34BB # 㒻 => 㒻 +2F9CB 278AE # 𧢮 => 𧢮 +2F9CC 27966 # 𧥦 => 𧥦 +2F9CD 46BE # 䚾 => 䚾 +2F9CE 46C7 # 䛇 => 䛇 +2F9CF 8AA0 # 誠 => 誠 +2F9D0 8AED # 諭 => 諭 +2F9D1 8B8A # 變 => 變 +2F9D2 8C55 # 豕 => 豕 +2F9D3 27CA8 # 𧲨 => 𧲨 +2F9D4 8CAB # 貫 => 貫 +2F9D5 8CC1 # 賁 => 賁 +2F9D6 8D1B # 贛 => 贛 +2F9D7 8D77 # 起 => 起 +2F9D8 27F2F # 𧼯 => 𧼯 +2F9D9 20804 # 𠠄 => 𠠄 +2F9DA 8DCB # 跋 => 跋 +2F9DB 8DBC # 趼 => 趼 +2F9DC 8DF0 # 跰 => 跰 +2F9DD 208DE # 𠣞 => 𠣞 +2F9DE 8ED4 # 軔 => 軔 +2F9DF 8F38 # 輸 => 輸 +2F9E0 285D2 # 𨗒 => 𨗒 +2F9E1 285ED # 𨗭 => 𨗭 +2F9E2 9094 # 邔 => 邔 +2F9E3 90F1 # 郱 => 郱 +2F9E4 9111 # 鄑 => 鄑 +2F9E5 2872E # 𨜮 => 𨜮 +2F9E6 911B # 鄛 => 鄛 +2F9E7 9238 # 鈸 => 鈸 +2F9E8 92D7 # 鋗 => 鋗 +2F9E9 92D8 # 鋘 => 鋘 +2F9EA 927C # 鉼 => 鉼 +2F9EB 93F9 # 鏹 => 鏹 +2F9EC 9415 # 鐕 => 鐕 +2F9ED 28BFA # 𨯺 => 𨯺 +2F9EE 958B # 開 => 開 +2F9EF 4995 # 䦕 => 䦕 +2F9F0 95B7 # 閷 => 閷 +2F9F1 28D77 # 𨵷 => 𨵷 +2F9F2 49E6 # 䧦 => 䧦 +2F9F3 96C3 # 雃 => 雃 +2F9F4 5DB2 # 嶲 => 嶲 +2F9F5 9723 # 霣 => 霣 +2F9F6 29145 # 𩅅 => 𩅅 +2F9F7 2921A # 𩈚 => 𩈚 +2F9F8 4A6E # 䩮 => 䩮 +2F9F9 4A76 # 䩶 => 䩶 +2F9FA 97E0 # 韠 => 韠 +2F9FB 2940A # 𩐊 => 𩐊 +2F9FC 4AB2 # 䪲 => 䪲 +2F9FD 29496 # 𩒖 => 𩒖 +2F9FE 980B # 頋 => 頋 +2F9FF 980B # 頋 => 頋 +2FA00 9829 # 頩 => 頩 +2FA01 295B6 # 𩖶 => 𩖶 +2FA02 98E2 # 飢 => 飢 +2FA03 4B33 # 䬳 => 䬳 +2FA04 9929 # 餩 => 餩 +2FA05 99A7 # 馧 => 馧 +2FA06 99C2 # 駂 => 駂 +2FA07 99FE # 駾 => 駾 +2FA08 4BCE # 䯎 => 䯎 +2FA09 29B30 # 𩬰 => 𩬰 +2FA0A 9B12 # 鬒 => 鬒 +2FA0B 9C40 # 鱀 => 鱀 +2FA0C 9CFD # 鳽 => 鳽 +2FA0D 4CCE # 䳎 => 䳎 +2FA0E 4CED # 䳭 => 䳭 +2FA0F 9D67 # 鵧 => 鵧 +2FA10 2A0CE # 𪃎 => 𪃎 +2FA11 4CF8 # 䳸 => 䳸 +2FA12 2A105 # 𪄅 => 𪄅 +2FA13 2A20E # 𪈎 => 𪈎 +2FA14 2A291 # 𪊑 => 𪊑 +2FA15 9EBB # 麻 => 麻 +2FA16 4D56 # 䵖 => 䵖 +2FA17 9EF9 # 黹 => 黹 +2FA18 9EFE # 黾 => 黾 +2FA19 9F05 # 鼅 => 鼅 +2FA1A 9F0F # 鼏 => 鼏 +2FA1B 9F16 # 鼖 => 鼖 +2FA1C 9F3B # 鼻 => 鼻 +2FA1D 2A600 # 𪘀 => 𪘀 diff --git a/data/nmt_nfkc.tsv b/data/nmt_nfkc.tsv index 1ce2b719..7df6614f 100644 --- a/data/nmt_nfkc.tsv +++ b/data/nmt_nfkc.tsv @@ -57263,7 +57263,6 @@ FB9 F90 FB5 # ྐྵ => ྐྵ 200A 20 #   => 200B 20 # ​ => 200C 20 # ‌ => -200D 20 # ‍ => 200E 20 # ‎ => 200F 20 # ‏ => 2011 2010 # ‑ => ‐ @@ -155312,6 +155311,7 @@ FB9 F90 FB5 # ྐྵ => ྐྵ 32FE 30F2 # ㋾ => ヲ 32FE 3099 30FA # ㋾゙ => ヺ 32FE FF9E 30FA # ㋾゙ => ヺ +32FF 4EE4 548C # ㋿ => 令和 3300 30A2 30D1 30FC 30C8 # ㌀ => アパート 3301 30A2 30EB 30D5 30A1 # ㌁ => アルファ 3302 30A2 30F3 30DA 30A2 # ㌂ => アンペア @@ -155577,6 +155577,7 @@ AB5C A727 # ꭜ => ꜧ AB5D AB37 # ꭝ => ꬷ AB5E 26B # ꭞ => ɫ AB5F AB52 # ꭟ => ꭒ +AB69 28D # ꭩ => ʍ F900 8C48 # 豈 => 豈 F901 66F4 # 更 => 更 F902 8ECA # 車 => 車 @@ -212859,6 +212860,7 @@ FFFD 20 # � => 114B9 114BD 114BE # 𑒾 => 𑒾 115B8 115AF 115BA # 𑖺 => 𑖺 115B9 115AF 115BB # 𑖻 => 𑖻 +11935 11930 11938 # 𑤸 => 𑤸 1D15E 1D157 1D165 # 𝅗𝅥 => 𝅗𝅥 1D15F 1D158 1D165 # 𝅘𝅥 => 𝅘𝅥 1D160 1D158 1D165 1D16E # 𝅘𝅥𝅮 => 𝅘𝅥𝅮 @@ -224107,6 +224109,7 @@ FFFD 20 # � => 1F14F 57 43 # 🅏 => WC 1F16A 4D 43 # 🅪 => MC 1F16B 4D 44 # 🅫 => MD +1F16C 4D 52 # 🅬 => MR 1F190 44 4A # 🆐 => DJ 1F200 307B 304B # 🈀 => ほか 1F201 30B3 30B3 # 🈁 => ココ @@ -224156,6 +224159,7 @@ FFFD 20 # � => 1F238 7533 # 🈸 => 申 1F239 5272 # 🈹 => 割 1F23A 55B6 # 🈺 => 営 +1F23B 914D # 🈻 => 配 1F240 3014 672C 3015 # 🉀 => 〔本〕 1F241 3014 4E09 3015 # 🉁 => 〔三〕 1F242 3014 4E8C 3015 # 🉂 => 〔二〕 @@ -224167,6 +224171,16 @@ FFFD 20 # � => 1F248 3014 6557 3015 # 🉈 => 〔敗〕 1F250 5F97 # 🉐 => 得 1F251 53EF # 🉑 => 可 +1FBF0 30 # 🯰 => 0 +1FBF1 31 # 🯱 => 1 +1FBF2 32 # 🯲 => 2 +1FBF3 33 # 🯳 => 3 +1FBF4 34 # 🯴 => 4 +1FBF5 35 # 🯵 => 5 +1FBF6 36 # 🯶 => 6 +1FBF7 37 # 🯷 => 7 +1FBF8 38 # 🯸 => 8 +1FBF9 39 # 🯹 => 9 2F800 4E3D # 丽 => 丽 2F801 4E38 # 丸 => 丸 2F802 4E41 # 乁 => 乁 diff --git a/data/nmt_nfkc_cf.tsv b/data/nmt_nfkc_cf.tsv index 21788827..5502725c 100644 --- a/data/nmt_nfkc_cf.tsv +++ b/data/nmt_nfkc_cf.tsv @@ -57066,6 +57066,61 @@ FB9 F90 FB5 # ྐྵ => ྐྵ 1B3E 1B35 1B40 # ᭀ => ᭀ 1B3F 1B35 1B41 # ᭁ => ᭁ 1B42 1B35 1B43 # ᭃ => ᭃ +1C80 432 # ᲀ => в +1C81 434 # ᲁ => д +1C82 43E # ᲂ => о +1C83 441 # ᲃ => с +1C84 442 # ᲄ => т +1C85 442 # ᲅ => т +1C86 44A # ᲆ => ъ +1C87 463 # ᲇ => ѣ +1C88 A64B # ᲈ => ꙋ +1C90 10D0 # Ა => ა +1C91 10D1 # Ბ => ბ +1C92 10D2 # Გ => გ +1C93 10D3 # Დ => დ +1C94 10D4 # Ე => ე +1C95 10D5 # Ვ => ვ +1C96 10D6 # Ზ => ზ +1C97 10D7 # Თ => თ +1C98 10D8 # Ი => ი +1C99 10D9 # Კ => კ +1C9A 10DA # Ლ => ლ +1C9B 10DB # Მ => მ +1C9C 10DC # Ნ => ნ +1C9D 10DD # Ო => ო +1C9E 10DE # Პ => პ +1C9F 10DF # Ჟ => ჟ +1CA0 10E0 # Რ => რ +1CA1 10E1 # Ს => ს +1CA2 10E2 # Ტ => ტ +1CA3 10E3 # Უ => უ +1CA4 10E4 # Ფ => ფ +1CA5 10E5 # Ქ => ქ +1CA6 10E6 # Ღ => ღ +1CA7 10E7 # Ყ => ყ +1CA8 10E8 # Შ => შ +1CA9 10E9 # Ჩ => ჩ +1CAA 10EA # Ც => ც +1CAB 10EB # Ძ => ძ +1CAC 10EC # Წ => წ +1CAD 10ED # Ჭ => ჭ +1CAE 10EE # Ხ => ხ +1CAF 10EF # Ჯ => ჯ +1CB0 10F0 # Ჰ => ჰ +1CB1 10F1 # Ჱ => ჱ +1CB2 10F2 # Ჲ => ჲ +1CB3 10F3 # Ჳ => ჳ +1CB4 10F4 # Ჴ => ჴ +1CB5 10F5 # Ჵ => ჵ +1CB6 10F6 # Ჶ => ჶ +1CB7 10F7 # Ჷ => ჷ +1CB8 10F8 # Ჸ => ჸ +1CB9 10F9 # Ჹ => ჹ +1CBA 10FA # Ჺ => ჺ +1CBD 10FD # Ჽ => ჽ +1CBE 10FE # Ჾ => ჾ +1CBF 10FF # Ჿ => ჿ 1D2C 61 # ᴬ => a 1D2C 300 E0 # ᴬ̀ => à 1D2C 301 E1 # ᴬ́ => á @@ -57980,7 +58035,6 @@ FB9 F90 FB5 # ྐྵ => ྐྵ 200A 20 #   => 200B 20 # ​ => 200C 20 # ‌ => -200D 20 # ‍ => 200E 20 # ‎ => 200F 20 # ‏ => 2011 2010 # ‑ => ‐ @@ -156146,6 +156200,7 @@ FB9 F90 FB5 # ྐྵ => ྐྵ 32FE 30F2 # ㋾ => ヲ 32FE 3099 30FA # ㋾゙ => ヺ 32FE FF9E 30FA # ㋾゙ => ヺ +32FF 4EE4 548C # ㋿ => 令和 3300 30A2 30D1 30FC 30C8 # ㌀ => アパート 3301 30A2 30EB 30D5 30A1 # ㌁ => アルファ 3302 30A2 30F3 30DA 30A2 # ㌂ => アンペア @@ -156506,18 +156561,31 @@ A7AA 266 # Ɦ => ɦ A7AB 25C # Ɜ => ɜ A7AC 261 # Ɡ => ɡ A7AD 26C # Ɬ => ɬ +A7AE 26A # Ɪ => ɪ A7B0 29E # Ʞ => ʞ A7B1 287 # Ʇ => ʇ A7B2 29D # Ʝ => ʝ A7B3 AB53 # Ꭓ => ꭓ A7B4 A7B5 # Ꞵ => ꞵ A7B6 A7B7 # Ꞷ => ꞷ +A7B8 A7B9 # Ꞹ => ꞹ +A7BA A7BB # Ꞻ => ꞻ +A7BC A7BD # Ꞽ => ꞽ +A7BE A7BF # Ꞿ => ꞿ +A7C2 A7C3 # Ꟃ => ꟃ +A7C4 A794 # Ꞔ => ꞔ +A7C5 282 # Ʂ => ʂ +A7C6 1D8E # Ᶎ => ᶎ +A7C7 A7C8 # Ꟈ => ꟈ +A7C9 A7CA # Ꟊ => ꟊ +A7F5 A7F6 # Ꟶ => ꟶ A7F8 127 # ꟸ => ħ A7F9 153 # ꟹ => œ AB5C A727 # ꭜ => ꜧ AB5D AB37 # ꭝ => ꬷ AB5E 26B # ꭞ => ɫ AB5F AB52 # ꭟ => ꭒ +AB69 28D # ꭩ => ʍ AB70 13A0 # ꭰ => Ꭰ AB71 13A1 # ꭱ => Ꭱ AB72 13A2 # ꭲ => Ꭲ @@ -213908,6 +213976,42 @@ FFFD 20 # � => 10425 1044D # 𐐥 => 𐑍 10426 1044E # 𐐦 => 𐑎 10427 1044F # 𐐧 => 𐑏 +104B0 104D8 # 𐒰 => 𐓘 +104B1 104D9 # 𐒱 => 𐓙 +104B2 104DA # 𐒲 => 𐓚 +104B3 104DB # 𐒳 => 𐓛 +104B4 104DC # 𐒴 => 𐓜 +104B5 104DD # 𐒵 => 𐓝 +104B6 104DE # 𐒶 => 𐓞 +104B7 104DF # 𐒷 => 𐓟 +104B8 104E0 # 𐒸 => 𐓠 +104B9 104E1 # 𐒹 => 𐓡 +104BA 104E2 # 𐒺 => 𐓢 +104BB 104E3 # 𐒻 => 𐓣 +104BC 104E4 # 𐒼 => 𐓤 +104BD 104E5 # 𐒽 => 𐓥 +104BE 104E6 # 𐒾 => 𐓦 +104BF 104E7 # 𐒿 => 𐓧 +104C0 104E8 # 𐓀 => 𐓨 +104C1 104E9 # 𐓁 => 𐓩 +104C2 104EA # 𐓂 => 𐓪 +104C3 104EB # 𐓃 => 𐓫 +104C4 104EC # 𐓄 => 𐓬 +104C5 104ED # 𐓅 => 𐓭 +104C6 104EE # 𐓆 => 𐓮 +104C7 104EF # 𐓇 => 𐓯 +104C8 104F0 # 𐓈 => 𐓰 +104C9 104F1 # 𐓉 => 𐓱 +104CA 104F2 # 𐓊 => 𐓲 +104CB 104F3 # 𐓋 => 𐓳 +104CC 104F4 # 𐓌 => 𐓴 +104CD 104F5 # 𐓍 => 𐓵 +104CE 104F6 # 𐓎 => 𐓶 +104CF 104F7 # 𐓏 => 𐓷 +104D0 104F8 # 𐓐 => 𐓸 +104D1 104F9 # 𐓑 => 𐓹 +104D2 104FA # 𐓒 => 𐓺 +104D3 104FB # 𐓓 => 𐓻 10C80 10CC0 # 𐲀 => 𐳀 10C81 10CC1 # 𐲁 => 𐳁 10C82 10CC2 # 𐲂 => 𐳂 @@ -214003,6 +214107,39 @@ FFFD 20 # � => 118BD 118DD # 𑢽 => 𑣝 118BE 118DE # 𑢾 => 𑣞 118BF 118DF # 𑢿 => 𑣟 +11935 11930 11938 # 𑤸 => 𑤸 +16E40 16E60 # 𖹀 => 𖹠 +16E41 16E61 # 𖹁 => 𖹡 +16E42 16E62 # 𖹂 => 𖹢 +16E43 16E63 # 𖹃 => 𖹣 +16E44 16E64 # 𖹄 => 𖹤 +16E45 16E65 # 𖹅 => 𖹥 +16E46 16E66 # 𖹆 => 𖹦 +16E47 16E67 # 𖹇 => 𖹧 +16E48 16E68 # 𖹈 => 𖹨 +16E49 16E69 # 𖹉 => 𖹩 +16E4A 16E6A # 𖹊 => 𖹪 +16E4B 16E6B # 𖹋 => 𖹫 +16E4C 16E6C # 𖹌 => 𖹬 +16E4D 16E6D # 𖹍 => 𖹭 +16E4E 16E6E # 𖹎 => 𖹮 +16E4F 16E6F # 𖹏 => 𖹯 +16E50 16E70 # 𖹐 => 𖹰 +16E51 16E71 # 𖹑 => 𖹱 +16E52 16E72 # 𖹒 => 𖹲 +16E53 16E73 # 𖹓 => 𖹳 +16E54 16E74 # 𖹔 => 𖹴 +16E55 16E75 # 𖹕 => 𖹵 +16E56 16E76 # 𖹖 => 𖹶 +16E57 16E77 # 𖹗 => 𖹷 +16E58 16E78 # 𖹘 => 𖹸 +16E59 16E79 # 𖹙 => 𖹹 +16E5A 16E7A # 𖹚 => 𖹺 +16E5B 16E7B # 𖹛 => 𖹻 +16E5C 16E7C # 𖹜 => 𖹼 +16E5D 16E7D # 𖹝 => 𖹽 +16E5E 16E7E # 𖹞 => 𖹾 +16E5F 16E7F # 𖹟 => 𖹿 1D15E 1D157 1D165 # 𝅗𝅥 => 𝅗𝅥 1D15F 1D158 1D165 # 𝅘𝅥 => 𝅘𝅥 1D160 1D158 1D165 1D16E # 𝅘𝅥𝅮 => 𝅘𝅥𝅮 @@ -224710,6 +224847,40 @@ FFFD 20 # � => 1D7FD 37 # 𝟽 => 7 1D7FE 38 # 𝟾 => 8 1D7FF 39 # 𝟿 => 9 +1E900 1E922 # 𞤀 => 𞤢 +1E901 1E923 # 𞤁 => 𞤣 +1E902 1E924 # 𞤂 => 𞤤 +1E903 1E925 # 𞤃 => 𞤥 +1E904 1E926 # 𞤄 => 𞤦 +1E905 1E927 # 𞤅 => 𞤧 +1E906 1E928 # 𞤆 => 𞤨 +1E907 1E929 # 𞤇 => 𞤩 +1E908 1E92A # 𞤈 => 𞤪 +1E909 1E92B # 𞤉 => 𞤫 +1E90A 1E92C # 𞤊 => 𞤬 +1E90B 1E92D # 𞤋 => 𞤭 +1E90C 1E92E # 𞤌 => 𞤮 +1E90D 1E92F # 𞤍 => 𞤯 +1E90E 1E930 # 𞤎 => 𞤰 +1E90F 1E931 # 𞤏 => 𞤱 +1E910 1E932 # 𞤐 => 𞤲 +1E911 1E933 # 𞤑 => 𞤳 +1E912 1E934 # 𞤒 => 𞤴 +1E913 1E935 # 𞤓 => 𞤵 +1E914 1E936 # 𞤔 => 𞤶 +1E915 1E937 # 𞤕 => 𞤷 +1E916 1E938 # 𞤖 => 𞤸 +1E917 1E939 # 𞤗 => 𞤹 +1E918 1E93A # 𞤘 => 𞤺 +1E919 1E93B # 𞤙 => 𞤻 +1E91A 1E93C # 𞤚 => 𞤼 +1E91B 1E93D # 𞤛 => 𞤽 +1E91C 1E93E # 𞤜 => 𞤾 +1E91D 1E93F # 𞤝 => 𞤿 +1E91E 1E940 # 𞤞 => 𞥀 +1E91F 1E941 # 𞤟 => 𞥁 +1E920 1E942 # 𞤠 => 𞥂 +1E921 1E943 # 𞤡 => 𞥃 1EE00 627 # 𞸀 => ا 1EE00 653 622 # 𞸀ٓ => آ 1EE00 654 623 # 𞸀ٔ => أ @@ -225251,6 +225422,7 @@ FFFD 20 # � => 1F14F 77 63 # 🅏 => wc 1F16A 6D 63 # 🅪 => mc 1F16B 6D 64 # 🅫 => md +1F16C 6D 72 # 🅬 => mr 1F190 64 6A # 🆐 => dj 1F200 307B 304B # 🈀 => ほか 1F201 30B3 30B3 # 🈁 => ココ @@ -225300,6 +225472,7 @@ FFFD 20 # � => 1F238 7533 # 🈸 => 申 1F239 5272 # 🈹 => 割 1F23A 55B6 # 🈺 => 営 +1F23B 914D # 🈻 => 配 1F240 3014 672C 3015 # 🉀 => 〔本〕 1F241 3014 4E09 3015 # 🉁 => 〔三〕 1F242 3014 4E8C 3015 # 🉂 => 〔二〕 @@ -225311,6 +225484,16 @@ FFFD 20 # � => 1F248 3014 6557 3015 # 🉈 => 〔敗〕 1F250 5F97 # 🉐 => 得 1F251 53EF # 🉑 => 可 +1FBF0 30 # 🯰 => 0 +1FBF1 31 # 🯱 => 1 +1FBF2 32 # 🯲 => 2 +1FBF3 33 # 🯳 => 3 +1FBF4 34 # 🯴 => 4 +1FBF5 35 # 🯵 => 5 +1FBF6 36 # 🯶 => 6 +1FBF7 37 # 🯷 => 7 +1FBF8 38 # 🯸 => 8 +1FBF9 39 # 🯹 => 9 2F800 4E3D # 丽 => 丽 2F801 4E38 # 丸 => 丸 2F802 4E41 # 乁 => 乁 diff --git a/doc/api.md b/doc/api.md index 797074c1..ebde8806 100644 --- a/doc/api.md +++ b/doc/api.md @@ -14,9 +14,9 @@ if (!status.ok()) { // error } -// You can also load a model from std::ifstream. -// std::ifstream in("//path/to/model.model"); -// auto status = processor.Load(in); +// You can also load a serialized model from std::string. +// const std::stirng str = // Load blob contents from a file. +// auto status = processor.LoadFromSerializedProto(str); ``` ## Tokenize text (preprocessing) @@ -75,16 +75,20 @@ Calls `SentencePieceTrainer::Train` function to train sentencepiece model. You c sentencepiece::SentencePieceTrainer::Train("--input=test/botchan.txt --model_prefix=m --vocab_size=1000"); ``` -## SentencePieceText proto -You will want to use `SentencePieceText` class to obtain the pieces and ids at the same time. This proto also encodes a utf8-byte offset of each piece over user input or detokenized text. +## ImmutableSentencePieceText +You will want to use `ImmutableSentencePieceText` class to obtain the pieces and ids at the same time. +This proto also encodes a utf8-byte offset of each piece over user input or detokenized text. ```C++ -#include +#include -sentencepiece::SentencePieceText spt; +sentencepiece::ImmutableSentencePieceText spt; // Encode -processor.Encode("This is a test.", &spt); +processor.Encode("This is a test.", spt.mutable_proto()); + +// or +// spt = processor.EncodeAsImmutableProto("This is a test."); std::cout << spt.text() << std::endl; // This is the same as the input. for (const auto &piece : spt.pieces()) { @@ -96,7 +100,7 @@ for (const auto &piece : spt.pieces()) { } // Decode -processor.Decode({10, 20, 30}, &spt); +processor.Decode({10, 20, 30}, spt.mutable_proto()); std::cout << spt.text() << std::endl; // This is the same as the decoded string. for (const auto &piece : spt.pieces()) { // the same as above. diff --git a/doc/experiments.md b/doc/experiments.md index 5a58cd13..e0881529 100644 --- a/doc/experiments.md +++ b/doc/experiments.md @@ -112,7 +112,7 @@ We have evaluated SentencePiece segmentation with the following configurations. * [KFTT](http://www.phontron.com/kftt/index.html) * [MultiUN](http://opus.lingfil.uu.se/MultiUN.php) (First 5M and next 5k/5k sentences are used for training and development/testing respectively.) - * [WMT16](http://www.statmt.org/WMT16/) + * [WMT16](https://www.statmt.org/wmt16/) * In-house: (Used 5M parallel sentences for training) **NoPretok** and **WsPretok** do not use any language-dependent resources. diff --git a/doc/normalization.md b/doc/normalization.md index af16b144..eb32a667 100644 --- a/doc/normalization.md +++ b/doc/normalization.md @@ -34,7 +34,7 @@ You can use custom normalization rule by preparing a TSV file formatted as follo ``` In this sample, UCS4 sequence [41 302 300] (hex) is converted into [1EA6] (hex). When there are ambiguities in the conversions, the longest rule is used. Note that the tab is used as a delimiter for source and target sequence and space is used as a delimiter for UCS4 characters. We can make the target sequence empty to remove some specific characters from the text. -See [data/nfkc.tsv](data/nfkc.tsv) as an example. Once a TSV file is prepared, you can specify it with `--normalization_rule_tsv` flag. +See [data/nfkc.tsv](../data/nfkc.tsv) as an example. Once a TSV file is prepared, you can specify it with `--normalization_rule_tsv` flag. ``` % spm_train --normalization_rule_tsv= --input= --model_prefix= --vocab_size=8000 ``` @@ -45,6 +45,6 @@ See [data/nfkc.tsv](data/nfkc.tsv) as an example. Once a TSV file is prepared, y ## Command line tool to perform normalization ``` % spm_normalize --model= file1 file2.. -% spm_normalize --normalizatoin_rule_tsv=custom.tsv file1 file2.. +% spm_normalize --normalization_rule_tsv=custom.tsv file1 file2.. ``` The first command line uses the normalization rule embedded in the model file. The second command line uses the normalization rule in TSV file and is useful to make normalization rule interactively. diff --git a/doc/options.md b/doc/options.md index 7861fdcf..6cdc0f94 100644 --- a/doc/options.md +++ b/doc/options.md @@ -3,49 +3,60 @@ The training options for the `spm_train` can be listed using `spm_train --help`. Since the standard `pip install` of sentencepiece does not necessarily install `spm_train`, the options are also listed here. ``` ---help (show help) type: bool default: false ---version (show version) type: bool default: false ---minloglevel (Messages logged at a lower level than this don't actually get logged anywhere) type: int default: 0 ---input (comma separated list of input sentences) type: std::string default: "" ---input_format (Input format. Supported format is `text` or `tsv`.) type: std::string default: "" ---model_prefix (output model prefix) type: std::string default: "" --model_type (model algorithm: unigram, bpe, word or char) type: std::string default: "unigram" ---vocab_size (vocabulary size) type: int32 default: 8000 ---accept_language (comma-separated list of languages this model can accept) type: std::string default: "" ---self_test_sample_size (the size of self test samples) type: int32 default: 0 ---character_coverage (character coverage to determine the minimum symbols) type: double default: 0.9995 ---input_sentence_size (maximum size of sentences the trainer loads) type: int32 default: 0 ---shuffle_input_sentence (Randomly sample input sentences in advance. Valid when --input_sentence_size > 0) type: bool default: true ---seed_sentencepiece_size (the size of seed sentencepieces) type: int32 default: 1000000 ---shrinking_factor (Keeps top shrinking_factor pieces with respect to the loss) type: double default: 0.75 ---num_threads (number of threads for training) type: int32 default: 16 ---num_sub_iterations (number of EM sub-iterations) type: int32 default: 2 ---max_sentencepiece_length (maximum length of sentence piece) type: int32 default: 16 ---max_sentence_length (maximum length of sentence in byte) type: int32 default: 4192 ---split_by_unicode_script (use Unicode script to split sentence pieces) type: bool default: true ---split_by_number (split tokens by numbers (0-9)) type: bool default: true ---split_by_whitespace (use a white space to split sentence pieces) type: bool default: true ---split_digits (split all digits (0-9) into separate pieces) type: bool default: false ---treat_whitespace_as_suffix (treat whitespace marker as suffix instead of prefix.) type: bool default: false ---control_symbols (comma separated list of control symbols) type: std::string default: "" ---user_defined_symbols (comma separated list of user defined symbols) type: std::string default: "" ---required_chars (UTF8 characters in this flag are always used in the character set regardless of --character_coverage) type: std::string default: "" ---byte_fallback (decompose unknown pieces into UTF-8 byte pieces) type: bool default: false ---vocabulary_output_piece_score (Define score in vocab file) type: bool default: true ---normalization_rule_name (Normalization rule name. Choose from nfkc or identity) type: std::string default: "nmt_nfkc" ---normalization_rule_tsv (Normalization rule TSV file. ) type: std::string default: "" ---denormalization_rule_tsv (Denormalization rule TSV file.) type: std::string default: "" ---add_dummy_prefix (Add dummy whitespace at the beginning of text) type: bool default: true ---remove_extra_whitespaces (Removes leading, trailing, and duplicate internal whitespace) type: bool default: true ---hard_vocab_limit (If set to false, --vocab_size is considered as a soft limit.) type: bool default: true ---use_all_vocab (If set to true, use all tokens as vocab. Valid for word/char models.) type: bool default: false ---unk_id (Override UNK () id.) type: int32 default: 0 ---bos_id (Override BOS () id. Set -1 to disable BOS.) type: int32 default: 1 ---eos_id (Override EOS () id. Set -1 to disable EOS.) type: int32 default: 2 ---pad_id (Override PAD () id. Set -1 to disable PAD.) type: int32 default: -1 ---unk_piece (Override UNK () piece.) type: std::string default: "" ---bos_piece (Override BOS () piece.) type: std::string default: "" ---eos_piece (Override EOS () piece.) type: std::string default: "" ---pad_piece (Override PAD () piece.) type: std::string default: "" ---unk_surface (Dummy surface string for . In decoding is decoded to `unk_surface`.) type: std::string default: " ⁇ " ---train_extremely_large_corpus (Increase bit depth for unigram tokenization.) type: bool default: false +Usage: ../build/src/spm_train [options] files + + --input (comma separated list of input sentences) type: std::string default: "" + --input_format (Input format. Supported format is `text` or `tsv`.) type: std::string default: "" + --model_prefix (output model prefix) type: std::string default: "" + --model_type (model algorithm: unigram, bpe, word or char) type: std::string default: "unigram" + --vocab_size (vocabulary size) type: int32 default: 8000 + --accept_language (comma-separated list of languages this model can accept) type: std::string default: "" + --self_test_sample_size (the size of self test samples) type: int32 default: 0 + --character_coverage (character coverage to determine the minimum symbols) type: double default: 0.9995 + --input_sentence_size (maximum size of sentences the trainer loads) type: std::uint64_t default: 0 + --shuffle_input_sentence (Randomly sample input sentences in advance. Valid when --input_sentence_size > 0) type: bool default: true + --seed_sentencepiece_size (the size of seed sentencepieces) type: int32 default: 1000000 + --shrinking_factor (Keeps top shrinking_factor pieces with respect to the loss) type: double default: 0.75 + --num_threads (number of threads for training) type: int32 default: 16 + --num_sub_iterations (number of EM sub-iterations) type: int32 default: 2 + --max_sentencepiece_length (maximum length of sentence piece) type: int32 default: 16 + --max_sentence_length (maximum length of sentence in byte) type: int32 default: 4192 + --split_by_unicode_script (use Unicode script to split sentence pieces) type: bool default: true + --split_by_number (split tokens by numbers (0-9)) type: bool default: true + --split_by_whitespace (use a white space to split sentence pieces) type: bool default: true + --split_digits (split all digits (0-9) into separate pieces) type: bool default: false + --treat_whitespace_as_suffix (treat whitespace marker as suffix instead of prefix.) type: bool default: false + --allow_whitespace_only_pieces (allow pieces that only contain (consecutive) whitespace tokens) type: bool default: false + --control_symbols (comma separated list of control symbols) type: std::string default: "" + --control_symbols_file (load control_symbols from file.) type: std::string default: "" + --user_defined_symbols (comma separated list of user defined symbols) type: std::string default: "" + --user_defined_symbols_file (load user_defined_symbols from file.) type: std::string default: "" + --required_chars (UTF8 characters in this flag are always used in the character set regardless of --character_coverage) type: std::string default: "" + --required_chars_file (load required_chars from file.) type: std::string default: "" + --byte_fallback (decompose unknown pieces into UTF-8 byte pieces) type: bool default: false + --vocabulary_output_piece_score (Define score in vocab file) type: bool default: true + --normalization_rule_name (Normalization rule name. Choose from nfkc or identity) type: std::string default: "nmt_nfkc" + --normalization_rule_tsv (Normalization rule TSV file. ) type: std::string default: "" + --denormalization_rule_tsv (Denormalization rule TSV file.) type: std::string default: "" + --add_dummy_prefix (Add dummy whitespace at the beginning of text) type: bool default: true + --remove_extra_whitespaces (Removes leading, trailing, and duplicate internal whitespace) type: bool default: true + --hard_vocab_limit (If set to false, --vocab_size is considered as a soft limit.) type: bool default: true + --use_all_vocab (If set to true, use all tokens as vocab. Valid for word/char models.) type: bool default: false + --unk_id (Override UNK () id.) type: int32 default: 0 + --bos_id (Override BOS () id. Set -1 to disable BOS.) type: int32 default: 1 + --eos_id (Override EOS () id. Set -1 to disable EOS.) type: int32 default: 2 + --pad_id (Override PAD () id. Set -1 to disable PAD.) type: int32 default: -1 + --unk_piece (Override UNK () piece.) type: std::string default: "" + --bos_piece (Override BOS () piece.) type: std::string default: "" + --eos_piece (Override EOS () piece.) type: std::string default: "" + --pad_piece (Override PAD () piece.) type: std::string default: "" + --unk_surface (Dummy surface string for . In decoding is decoded to `unk_surface`.) type: std::string default: " ⁇ " + --train_extremely_large_corpus (Increase bit depth for unigram tokenization.) type: bool default: false + --random_seed (Seed value for random generator.) type: uint32 default: 4294967295 + --enable_differential_privacy (Whether to add DP while training. Currently supported only by UNIGRAM model.) type: bool default: false + --differential_privacy_noise_level (Amount of noise to add for DP) type: float default: 0 + --differential_privacy_clipping_threshold (Threshold for clipping the counts for DP) type: std::uint64_t default: 0 + --help (show help) type: bool default: false + --version (show version) type: bool default: false + --minloglevel (Messages logged at a lower level than this don't actually get logged anywhere) type: int default: 0 ``` diff --git a/python/MANIFEST.in b/python/MANIFEST.in index c182f30a..fab33805 100644 --- a/python/MANIFEST.in +++ b/python/MANIFEST.in @@ -1,3 +1,4 @@ recursive-include test *.py *.model botchan.txt recursive-include src *.i -include *.md VERSION build_bundled.sh +recursive-include sentencepiece * +include *.md VERSION.* build_bundled.sh diff --git a/python/README.md b/python/README.md index b6830822..bc5a59a5 100644 --- a/python/README.md +++ b/python/README.md @@ -9,10 +9,17 @@ For Linux (x64/i686), macOS, and Windows(win32/x64) environment, you can simply % pip install sentencepiece ``` -To build and install the Python wrapper from source, please install [SentencePiece C++](https://github.com/google/sentencepiece#c-from-source) and try the following commands: +To build and install the Python wrapper from source, try the following commands to build and install wheel package. ``` -% python setup.py build -% sudo python setup.py install +% git clone https://github.com/google/sentencepiece.git +% cd sentencepiece +% mkdir build +% cd build +% cmake .. -DSPM_ENABLE_SHARED=OFF -DCMAKE_INSTALL_PREFIX=./root +% make install +% cd ../python +% python setup.py bdist_wheel +% pip install dist/sentencepiece*.whl ``` If you don’t have write permission to the global site-packages directory or don’t want to install into it, please try: @@ -22,21 +29,50 @@ If you don’t have write permission to the global site-packages directory or do ## Usage -See [this google colab page](https://github.com/google/sentencepiece/blob/master/python/sentencepiece_python_module_example.ipynb) to run sentencepiece interactively. (Note: this sample is written in old interface.) +See [this google colab page](https://github.com/google/sentencepiece/blob/master/python/sentencepiece_python_module_example.ipynb) to run sentencepiece interactively. ### Segmentation ``` % python >>> import sentencepiece as spm >>> sp = spm.SentencePieceProcessor(model_file='test/test_model.model') + >>> sp.encode('This is a test') [284, 47, 11, 4, 15, 400] + >>> sp.encode(['This is a test', 'Hello world'], out_type=int) [[284, 47, 11, 4, 15, 400], [151, 88, 21, 887]] + +>>> sp.encode_as_ids(['This is a test', 'Hello world']) +[[284, 47, 11, 4, 15, 400], [151, 88, 21, 887]] + >>> sp.encode('This is a test', out_type=str) ['▁This', '▁is', '▁a', '▁', 't', 'est'] + >>> sp.encode(['This is a test', 'Hello world'], out_type=str) [['▁This', '▁is', '▁a', '▁', 't', 'est'], ['▁He', 'll', 'o', '▁world']] + +>>> sp.encode_as_pieces(['This is a test', 'Hello world']) +[['▁This', '▁is', '▁a', '▁', 't', 'est'], ['▁He', 'll', 'o', '▁world']] + +>>> proto = sp.encode('This is a test', out_type='immutable_proto') +>>> for n in proto.pieces: +... print('piece="{}" surface="{}" id={} begin={} end={}'.format(n.piece, n.surface, n.id, n.begin, n.end)) +... +piece="▁This" surface="This" id=284 begin=0 end=4 +piece="▁is" surface=" is" id=47 begin=4 end=7 +piece="▁a" surface=" a" id=11 begin=7 end=9 +piece="▁" surface=" " id=4 begin=9 end=10 +piece="t" surface="t" id=15 begin=10 end=11 +piece="est" surface="est" id=400 begin=11 end=14 + +>>> [[x.id for x in proto.pieces], [x.piece for x in proto.pieces], [x.begin for x in proto.pieces], [x.end for x in proto.pieces]] +[[284, 47, 11, 4, 15, 400], ['▁This', '▁is', '▁a', '▁', 't', 'est'], [0, 4, 7, 9, 10, 11], [4, 7, 9, 10, 11, 14]] + +>>> proto2 = sp.encode_as_immutable_proto('This is a test') +>>> proto2 == proto +True + >>> for _ in range(10): ... sp.encode('This is a test', out_type=str, enable_sampling=True, alpha=0.1, nbest_size=-1) ... @@ -50,26 +86,55 @@ See [this google colab page](https://github.com/google/sentencepiece/blob/master ['▁', 'T', 'h', 'is', '▁', 'is', '▁', 'a', '▁', 'te', 'st'] ['▁', 'This', '▁', 'i', 's', '▁a', '▁', 't', 'e', 'st'] ['▁This', '▁', 'is', '▁a', '▁', 't', 'est'] + +>> sp.nbest_encode('This is a test', nbest_size=5, out_type=str) +[['▁This', '▁is', '▁a', '▁', 't', 'est'], +['▁This', '▁is', '▁a', '▁', 'te', 'st'], +['▁This', '▁is', '▁a', '▁', 'te', 's', 't'], +['▁This', '▁is', '▁a', '▁', 't', 'e', 'st'], +['▁This', '▁is', '▁a', '▁', 't', 'es', 't']] + +>>> sp.sample_encode_and_score('This is a test', num_samples=5, alpha=0.1, out_type=str, wor=True) +[(['▁This', '▁', 'i', 's', '▁a', '▁', 'te', 's', 't'], -3.043105125427246), +(['▁This', '▁', 'i', 's', '▁a', '▁', 'te', 'st'], -2.8475849628448486), +(['▁', 'This', '▁is', '▁', 'a', '▁', 'te', 'st'], -3.043248176574707), +(['▁', 'This', '▁is', '▁a', '▁', 't', 'e', 'st'], -2.87727689743042), +(['▁', 'This', '▁', 'i', 's', '▁', 'a', '▁', 't', 'est'], -3.6284031867980957)] + >>> sp.decode([284, 47, 11, 4, 15, 400]) 'This is a test' + >>> sp.decode([[284, 47, 11, 4, 15, 400], [151, 88, 21, 887]]) ['This is a test', 'Hello world'] + +>>> proto = sp.decode([284, 47, 11, 4, 15, 400], out_type='immutable_proto') +>>> proto.text +'This is a test' + >>> sp.decode(['▁', 'This', '▁', 'is', '▁a', '▁', 't', 'e', 'st']) 'This is a test' + >>> sp.decode([['▁This', '▁is', '▁a', '▁', 't', 'est'], ['▁He', 'll', 'o', '▁world']]) ['This is a test', 'Hello world'] + >>> sp.get_piece_size() 1000 + >>> sp.id_to_piece(2) '' + >>> sp.id_to_piece([2, 3, 4]) ['', '\r', '▁'] + >>> sp.piece_to_id('') 1 + >>> sp.piece_to_id(['', '\r', '▁']) [2, 3, 4] + >>> len(sp) 1000 + >>> sp[''] 2 ``` @@ -116,98 +181,3 @@ with urllib.request.urlopen( sp = spm.SentencePieceProcessor(model_proto=model.getvalue()) print(sp.encode('this is test')) ``` - - -### Segmentation (old interface) -``` -% python ->>> import sentencepiece as spm ->>> sp = spm.SentencePieceProcessor() ->>> sp.Load("test/test_model.model") -True ->>> sp.EncodeAsPieces("This is a test") -['\xe2\x96\x81This', '\xe2\x96\x81is', '\xe2\x96\x81a', '\xe2\x96\x81', 't', 'est'] ->>> sp.EncodeAsIds("This is a test") -[284, 47, 11, 4, 15, 400] ->>> sp.DecodePieces(['\xe2\x96\x81This', '\xe2\x96\x81is', '\xe2\x96\x81a', '\xe2\x96\x81', 't', 'est']) -'This is a test' ->>> sp.NBestEncodeAsPieces("This is a test", 5) -[['\xe2\x96\x81This', '\xe2\x96\x81is', '\xe2\x96\x81a', '\xe2\x96\x81', 't', 'est'], ['\xe2\x96\x81This', '\xe2\x96\x81is', '\xe2\x96\x81a', '\xe2\x96\x81', 'te', 'st'], ['\xe2\x96\x81This', '\xe2\x96\x81is', '\xe2\x96\x81a', '\xe2\x96\x81', 'te', 's', 't'], ['\xe2\x96\x81This', '\xe2\x96\x81is', '\xe2\x96\x81a', '\xe2\x96\x81', 't', 'e', 'st'], ['\xe2\x96\x81This', '\xe2\x96\x81is', '\xe2\x96\x81a', '\xe2\x96\x81', 't', 'es', 't']] ->>> for x in range(10): -... sp.SampleEncodeAsPieces("This is a test", -1, 0.1) -... -['\xe2\x96\x81', 'T', 'h', 'i', 's', '\xe2\x96\x81', 'is', '\xe2\x96\x81a', '\xe2\x96\x81', 't', 'e', 's', 't'] -['\xe2\x96\x81T', 'h', 'is', '\xe2\x96\x81is', '\xe2\x96\x81', 'a', '\xe2\x96\x81', 't', 'est'] -['\xe2\x96\x81This', '\xe2\x96\x81is', '\xe2\x96\x81', 'a', '\xe2\x96\x81', 't', 'e', 'st'] -['\xe2\x96\x81This', '\xe2\x96\x81is', '\xe2\x96\x81a', '\xe2\x96\x81', 't', 'e', 'st'] -['\xe2\x96\x81This', '\xe2\x96\x81is', '\xe2\x96\x81a', '\xe2\x96\x81', 't', 'e', 's', 't'] -['\xe2\x96\x81T', 'h', 'is', '\xe2\x96\x81', 'i', 's', '\xe2\x96\x81a', '\xe2\x96\x81', 'te', 's', 't'] -['\xe2\x96\x81This', '\xe2\x96\x81', 'is', '\xe2\x96\x81a', '\xe2\x96\x81', 'te', 's', 't'] -['\xe2\x96\x81This', '\xe2\x96\x81', 'i', 's', '\xe2\x96\x81a', '\xe2\x96\x81', 't', 'e', 'st'] -['\xe2\x96\x81This', '\xe2\x96\x81', 'is', '\xe2\x96\x81', 'a', '\xe2\x96\x81', 't', 'e', 'st'] -['\xe2\x96\x81This', '\xe2\x96\x81', 'i', 's', '\xe2\x96\x81', 'a', '\xe2\x96\x81', 'te', 's', 't'] ->>> sp.DecodeIds([284, 47, 11, 4, 15, 400]) -'This is a test' ->>> sp.GetPieceSize() -1000 ->>> sp.IdToPiece(2) -'' ->>> sp.PieceToId('') -2 ->>> len(sp) -1000 ->>> sp[''] -2 -``` - -### Model Training (old interface) -Training is performed by passing parameters of [spm_train](https://github.com/google/sentencepiece#train-sentencepiece-model) to SentencePieceTrainer.Train() function. - -``` ->>> import sentencepiece as spm ->>> spm.SentencePieceTrainer.Train('--input=test/botchan.txt --model_prefix=m --vocab_size=1000') -unigram_model_trainer.cc(494) LOG(INFO) Starts training with : -input: "test/botchan.txt" -model_prefix: "m" -model_type: UNIGRAM -..snip.. -unigram_model_trainer.cc(529) LOG(INFO) EM sub_iter=0 size=1239 obj=10.4055 num_tokens=36256 num_tokens/piece=29.2623 -unigram_model_trainer.cc(529) LOG(INFO) EM sub_iter=1 size=1239 obj=10.3187 num_tokens=36256 num_tokens/piece=29.2623 -unigram_model_trainer.cc(529) LOG(INFO) EM sub_iter=0 size=1100 obj=10.5285 num_tokens=37633 num_tokens/piece=34.2118 -unigram_model_trainer.cc(529) LOG(INFO) EM sub_iter=1 size=1100 obj=10.4973 num_tokens=37630 num_tokens/piece=34.2091 -trainer_interface.cc(284) LOG(INFO) Saving model: m.model -trainer_interface.cc(293) LOG(INFO) Saving vocabs: m.vocab ->>> -``` - -## Python2/3 String/Unicode compatibility -Sentencepiece python wrapper accepts both Unicode string and legacy byte string. -The output string type is determined by the input string type. -The output type of IdToPiece/DecodeIds methods is *str*, but note that it is a legacy byte string in Python2 and Unicode string in Python3 respectively. - -* Python2: -``` ->>> sp.EncodeAsPieces('吾輩は猫である') -['\xe2\x96\x81', '\xe5\x90\xbe', '\xe8\xbc\xa9', '\xe3\x81\xaf', '\xe7\x8c\xab', '\xe3\x81\xa7\xe3\x81\x82\xe3\x82\x8b'] ->>> sp.EncodeAsPieces(u'吾輩は猫である') -[u'\u2581', u'\u543e', u'\u8f29', u'\u306f', u'\u732b', u'\u3067\u3042\u308b'] ->>> sp.EncodeAsPieces(u'吾輩は猫である'.encode('utf-8')) -['\xe2\x96\x81', '\xe5\x90\xbe', '\xe8\xbc\xa9', '\xe3\x81\xaf', '\xe7\x8c\xab', '\xe3\x81\xa7\xe3\x81\x82\xe3\x82\x8b'] ->>> sp.IdToPiece(10) -'\xe3\x81\xab' ->>> type(sp.IdToPiece(10)) - -``` - -* Python3: -``` ->>> sp.EncodeAsPieces('吾輩は猫である') -['▁', '吾', '輩', 'は', '猫', 'である'] ->>> sp.EncodeAsPieces('吾輩は猫である'.encode('utf-8')) -[b'\xe2\x96\x81', b'\xe5\x90\xbe', b'\xe8\xbc\xa9', b'\xe3\x81\xaf', b'\xe7\x8c\xab', b'\xe3\x81\xa7\xe3\x81\x82\xe3\x82\x8b'] ->>> ->>> sp.IdToPiece(10) -'に' ->>> type(sp.IdToPiece(10)) - -``` diff --git a/python/add_new_vocab.ipynb b/python/add_new_vocab.ipynb index fb2dedca..a8ee35c0 100644 --- a/python/add_new_vocab.ipynb +++ b/python/add_new_vocab.ipynb @@ -5,7 +5,7 @@ "metadata": {}, "source": [ "#### You can add new special tokens to pre-trained sentencepiece model\n", - "#### Run this code in google/sentencepiece/python/" + "#### Run this code in google/sentencepiece/python/src/sentencepiece" ] }, { diff --git a/python/build_bundled.sh b/python/build_bundled.sh index 496702a1..a3f68614 100755 --- a/python/build_bundled.sh +++ b/python/build_bundled.sh @@ -2,17 +2,21 @@ VERSION="$1" -mkdir bundled -cd bundled -# Try taged version. Othewise, use head. -git clone https://github.com/google/sentencepiece.git \ - -b v"${VERSION}" --depth 1 || \ +mkdir -p build + +BUILD_DIR=./build +INSTALL_DIR=./build/root + +if [ -f ./sentencepiece/src/CMakeLists.txt ]; then + SRC_DIR=./sentencepiece +elif [ -f ../src/CMakeLists.txt ]; then + SRC_DIR=.. +else + # Try taged version. Othewise, use head. + git clone https://github.com/google/sentencepiece.git -b v"${VERSION}" --depth 1 || \ git clone https://github.com/google/sentencepiece.git --depth 1 + SRC_DIR=./sentencepiece +fi -cd sentencepiece -mkdir build -cd build -cmake .. -DSPM_ENABLE_SHARED=OFF -DCMAKE_INSTALL_PREFIX=../.. -make -j $(nproc) -make install -cd ../.. +cmake ${SRC_DIR} -B ${BUILD_DIR} -DSPM_ENABLE_SHARED=OFF -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} +cmake --build ${BUILD_DIR} --config Release --target install --parallel $(nproc) diff --git a/python/build_sdist.sh b/python/build_sdist.sh new file mode 100755 index 00000000..92da94b5 --- /dev/null +++ b/python/build_sdist.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +mkdir -p sentencepiece + +for i in CMakeLists.txt LICENSE README.md VERSION.txt cmake config.h.in sentencepiece.pc.in src third_party +do + echo "copying ../${i} sentencepiece/${i}" + cp -f -R "../${i}" sentencepiece +done + +python3 setup.py sdist diff --git a/python/make_py_wheel.sh b/python/make_py_wheel.sh deleted file mode 100755 index 79aeb450..00000000 --- a/python/make_py_wheel.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/bash -# Copyright 2018 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License.! -set -e # exit immediately on error -set -x # display all commands - -CMAKE_VERSION=3.12.0 - -run_docker() { - cd `dirname $0` - docker pull $1 - docker run --rm -ti --name py_sentencepiece \ - -v `pwd`/../:/sentencepiece -w /sentencepiece/python \ - -td $1 /bin/bash - docker exec py_sentencepiece bash -c "./make_py_wheel.sh native $2" - docker stop py_sentencepiece -} - -build() { - TRG=$1 - rm -fr build - mkdir -p build - cd build - - # Install sentencepiece - cmake ../.. -DSPM_ENABLE_SHARED=OFF - make -j4 - make install - cd .. - - for i in /opt/python/* - do - $i/bin/python setup.py bdist - strip build/*/*/*.so - $i/bin/python setup.py bdist_wheel - $i/bin/python setup.py test - rm -fr build - rm -fr *.so - done - - cd dist - for i in *${TRG}.whl - do - auditwheel repair $i - done - - mv -f wheelhouse/*${TRG}.whl . - - cd .. - rm -fr build -} - -if [ "$1" = "native" ]; then - build $2 -elif [ "$#" -eq 1 ]; then - run_docker quay.io/pypa/manylinux2014_${1} ${1} -else - run_docker quay.io/pypa/manylinux2014_i686 i686 - run_docker quay.io/pypa/manylinux2014_x86_64 x86_64 -fi diff --git a/python/make_py_wheel_mac.sh b/python/make_py_wheel_mac.sh deleted file mode 100755 index dfb217db..00000000 --- a/python/make_py_wheel_mac.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/bin/bash -# Copyright 2018 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License.! - -set -e # exit immediately on error -set -x # display all commands - -build_python() { - VERSION=$1 - URL=$2 - INSTALL_PATH="/Library/Frameworks/Python.framework/Versions/${VERSION}/bin" - CURRENT_PATH=${PATH} - - curl -L -o python.pkg ${URL} - sudo installer -pkg python.pkg -target / - - if [ -f "${INSTALL_PATH}/python3" ]; then - ln -s ${INSTALL_PATH}/python3 ${INSTALL_PATH}/python - ln -s ${INSTALL_PATH}/python3-config ${INSTALL_PATH}/python-config - ln -s ${INSTALL_PATH}/pip3 ${INSTALL_PATH}/pip - fi - - export PATH="${INSTALL_PATH}:${CURRENT_PATH}" - ls -l ${INSTALL_PATH} - which python - which pip - python --version - sudo python get-pip.py --no-setuptools --no-wheel --ignore-installed - pip install --upgrade setuptools - pip install wheel - pip install delocate - python setup.py bdist_wheel --plat-name=macosx_10_6_x86_64 - python setup.py test - delocate-listdeps dist/*.whl - delocate-wheel -w dist/delocated_wheel dist/*.whl - export PATH="${CURRENT_PATH}" - - ls -l dist/delocated_wheel - rm -fr build - rm -fr *.so - rm -fr dist/*.whl - rm -fr python.pkg -} - -build() { - cd python - rm -fr build - mkdir -p build - cd build - - # Install sentencepiece - cmake ../.. -DSPM_ENABLE_SHARED=OFF -DSPM_NO_THREADLOCAL=ON - make -j4 VERBOSE=1 - make install - cd .. - - mkdir -p dist/delocated_wheel - curl -L -O https://bootstrap.pypa.io/get-pip.py - -# build_python 2.7 https://www.python.org/ftp/python/2.7.15/python-2.7.15-macosx10.6.pkg -# latest pip doesn't support Py3.4 -# build_python 3.4 https://www.python.org/ftp/python/3.4.4/python-3.4.4-macosx10.6.pkg - build_python 3.5 https://www.python.org/ftp/python/3.5.4/python-3.5.4-macosx10.6.pkg - build_python 3.6 https://www.python.org/ftp/python/3.6.6/python-3.6.6-macosx10.6.pkg - build_python 3.7 https://www.python.org/ftp/python/3.7.9/python-3.7.9-macosx10.9.pkg - build_python 3.8 https://www.python.org/ftp/python/3.8.6/python-3.8.6-macosx10.9.pkg - build_python 3.9 https://www.python.org/ftp/python/3.9.0/python-3.9.0-macosx10.9.pkg - - cd .. - - rm -fr build -} - -build diff --git a/python/once.h b/python/once.h deleted file mode 100644 index fc7553ac..00000000 --- a/python/once.h +++ /dev/null @@ -1,157 +0,0 @@ -// Protocol Buffers - Google's data interchange format -// Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Author: kenton@google.com (Kenton Varda) -// -// emulates google3/base/once.h -// -// This header is intended to be included only by internal .cc files and -// generated .pb.cc files. Users should not use this directly. -// -// This is basically a portable version of pthread_once(). -// -// This header declares: -// * A type called ProtobufOnceType. -// * A macro GOOGLE_PROTOBUF_DECLARE_ONCE() which declares a variable of type -// ProtobufOnceType. This is the only legal way to declare such a variable. -// The macro may only be used at the global scope (you cannot create local or -// class member variables of this type). -// * A function GoogleOnceInit(ProtobufOnceType* once, void (*init_func)()). -// This function, when invoked multiple times given the same ProtobufOnceType -// object, will invoke init_func on the first call only, and will make sure -// none of the calls return before that first call to init_func has finished. -// * The user can provide a parameter which GoogleOnceInit() forwards to the -// user-provided function when it is called. Usage example: -// int a = 10; -// GoogleOnceInit(&my_once, &MyFunctionExpectingIntArgument, &a); -// * This implementation guarantees that ProtobufOnceType is a POD (i.e. no -// static initializer generated). -// -// This implements a way to perform lazy initialization. It's more efficient -// than using mutexes as no lock is needed if initialization has already -// happened. -// -// Example usage: -// void Init(); -// GOOGLE_PROTOBUF_DECLARE_ONCE(once_init); -// -// // Calls Init() exactly once. -// void InitOnce() { -// GoogleOnceInit(&once_init, &Init); -// } -// -// Note that if GoogleOnceInit() is called before main() has begun, it must -// only be called by the thread that will eventually call main() -- that is, -// the thread that performs dynamic initialization. In general this is a safe -// assumption since people don't usually construct threads before main() starts, -// but it is technically not guaranteed. Unfortunately, Win32 provides no way -// whatsoever to statically-initialize its synchronization primitives, so our -// only choice is to assume that dynamic initialization is single-threaded. - -#ifndef GOOGLE_PROTOBUF_STUBS_ONCE_H__ -#define GOOGLE_PROTOBUF_STUBS_ONCE_H__ - -#include -#include -#include -#include - -namespace google { -namespace protobuf { -namespace internal { - -using once_flag = std::atomic; - -template -void my_call_once(once_flag& once, Callable&& fn, Args&&... args) { - enum CallOnceState { - ONCE_INIT = 0, - ONCE_RUNNING = 1, - ONCE_DONE = 2, - }; - - int expected_state = ONCE_INIT; - if (once.compare_exchange_strong(expected_state, ONCE_RUNNING)) { - fn(std::forward(args)...); - once.store(ONCE_DONE); - return; - } - - if (expected_state == ONCE_DONE) { - return; - } - - while (once.load() == ONCE_RUNNING) { - sched_yield(); - } -} - -template -void call_once(Args&&... args) { - my_call_once(std::forward(args)...); -} -} // namespace internal - -// TODO(gerbens) remove this once third_party is fully extracted -using ProtobufOnceType = internal::once_flag; - -inline void GoogleOnceInit(ProtobufOnceType* once, void (*init_func)()) { - internal::my_call_once(*once, init_func); -} - -template -inline void GoogleOnceInitArg(ProtobufOnceType* once, void (*init_func)(Arg*), - Arg* arg) { - internal::my_call_once(*once, init_func, arg); -} - -class GoogleOnceDynamic { - public: - // If this->Init() has not been called before by any thread, - // execute (*func_with_arg)(arg) then return. - // Otherwise, wait until that prior invocation has finished - // executing its function, then return. - template - void Init(void (*func_with_arg)(T*), T* arg) { - GoogleOnceInitArg(&this->state_, func_with_arg, arg); - } - - private: - ProtobufOnceType state_; -}; - -#define GOOGLE_PROTOBUF_ONCE_TYPE ::google::protobuf::ProtobufOnceType -#define GOOGLE_PROTOBUF_DECLARE_ONCE(NAME) \ - ::google::protobuf::ProtobufOnceType NAME - -} // namespace protobuf -} // namespace google - -#endif // GOOGLE_PROTOBUF_STUBS_ONCE_H__ diff --git a/python/sentencepiece_python_module_example.ipynb b/python/sentencepiece_python_module_example.ipynb index 78464d1f..7138d915 100644 --- a/python/sentencepiece_python_module_example.ipynb +++ b/python/sentencepiece_python_module_example.ipynb @@ -216,7 +216,7 @@ "import tensorflow as tf\n", "\n", "# Assumes that m.model is stored in non-Posix file system.\n", - "serialized_model_proto = tf.gfile.GFile('m.model', 'rb').read()\n", + "serialized_model_proto = tf.io.gfile.GFile('m.model', 'rb').read()\n", "\n", "sp = spm.SentencePieceProcessor()\n", "sp.load_from_serialized_proto(serialized_model_proto)\n", @@ -265,7 +265,7 @@ }, "cell_type": "code", "source": [ - "## Example of user defined symbols\n", + "# Example of user defined symbols\n", "spm.SentencePieceTrainer.train('--input=botchan.txt --model_prefix=m_user --user_defined_symbols=, --vocab_size=2000')\n", "\n", "sp_user = spm.SentencePieceProcessor()\n", @@ -307,7 +307,7 @@ }, "cell_type": "code", "source": [ - "## Example of control symbols\n", + "# Example of control symbols\n", "spm.SentencePieceTrainer.train('--input=botchan.txt --model_prefix=m_ctrl --control_symbols=, --vocab_size=2000')\n", "\n", "sp_ctrl = spm.SentencePieceProcessor()\n", @@ -564,7 +564,7 @@ "spm.SentencePieceTrainer.train('--input=botchan.txt --vocab_size=2000 --model_prefix=m --unk_surface=__UNKNOWN__')\n", "sp = spm.SentencePieceProcessor()\n", "sp.load('m.model')\n", - "print(sp.decode_ids([sp.unk_id()])) " + "print(sp.decode_ids([sp.unk_id()]))" ], "execution_count": 0, "outputs": [ @@ -608,7 +608,7 @@ "# There are two hyperparamenters for sampling (nbest_size and inverse temperature). see the paper [kudo18] for detail.\n", "for n in range(10):\n", " print(sp.sample_encode_as_pieces('hello world', -1, 0.1))\n", - " \n", + "\n", "for n in range(10):\n", " print(sp.sample_encode_as_ids('hello world', -1, 0.1))" ], @@ -858,8 +858,6 @@ }, "cell_type": "code", "source": [ - "import sentencepiece as spm\n", - "\n", "# NFKC normalization and lower casing.\n", "spm.SentencePieceTrainer.train('--input=botchan.txt --model_prefix=m --vocab_size=2000 --normalization_rule_name=nfkc_cf')\n", "\n", @@ -903,11 +901,12 @@ }, "cell_type": "code", "source": [ - "def tocode(s): \n", - " out = [] \n", - " for c in s: \n", - " out.append(str(hex(ord(c))).replace('0x', 'U+')) \n", - " return ' '.join(out) \n", + "def tocode(s):\n", + " out = []\n", + " for c in s:\n", + " out.append(str(hex(ord(c))).replace('0x', 'U+'))\n", + " return ' '.join(out)\n", + "\n", "\n", "# TSV format: source Unicode code points target code points\n", "# normalize \"don't => do not, I'm => I am\"\n", @@ -923,7 +922,7 @@ "# m.model embeds the normalization rule compiled into an FST.\n", "sp.load('m.model')\n", "print(sp.encode_as_pieces(\"I'm busy\")) # normalzied to `I am busy'\n", - "print(sp.encode_as_pieces(\"I don't know it.\")) # normalized to 'I do not know it.'\n" + "print(sp.encode_as_pieces(\"I don't know it.\")) # normalized to 'I do not know it.'" ], "execution_count": 0, "outputs": [ @@ -949,7 +948,7 @@ "source": [ "## Randomizing training data\n", "\n", - "Sentencepiece loads all the lines of training data into memory to train the model. However, larger training data increases the training time and memory usage, though they are liner to the training data. When **--input_sentence_size=<SIZE>** is specified, Sentencepiece randomly samples <SIZE> lines from the whole training data. **--shuffle_input_sentence=false** disables the random shuffle and takes the first <SIZE> lines." + "Sentencepiece loads all the lines of training data into memory to train the model. However, larger training data increases the training time and memory usage, though they are linear to the training data. When **--input_sentence_size=<SIZE>** is specified, Sentencepiece randomly samples <SIZE> lines from the whole training data. **--shuffle_input_sentence=false** disables the random shuffle and takes the first <SIZE> lines." ] }, { @@ -1029,9 +1028,9 @@ " for piece in sp.encode_as_pieces(line):\n", " freq.setdefault(piece, 0)\n", " freq[piece] += 1\n", - " \n", + "\n", "# only uses the token appearing more than 1000 times in the training data.\n", - "vocabs = list(filter(lambda x : x in freq and freq[x] > 1000, vocabs))\n", + "vocabs = list(filter(lambda x: x in freq and freq[x] > 1000, vocabs))\n", "sp.set_vocabulary(vocabs)\n", "print(sp.encode_as_pieces('this is a test.'))\n", "\n", @@ -1133,20 +1132,17 @@ }, "cell_type": "code", "source": [ - "freq={}\n", + "freq = {}\n", "with open('botchan.txt', 'r') as f:\n", " for line in f:\n", " line = line.rstrip()\n", " for piece in line.split():\n", " freq.setdefault(piece, 0)\n", " freq[piece] += 1\n", - " \n", + "\n", "with open('word_freq_list.tsv', 'w') as f:\n", " for k, v in freq.items():\n", " f.write('%s\\t%d\\n' % (k, v))\n", - " \n", - "\n", - "import sentencepiece as spm\n", "\n", "spm.SentencePieceTrainer.train('--input=word_freq_list.tsv --input_format=tsv --model_prefix=m --vocab_size=2000')\n", "sp = spm.SentencePieceProcessor()\n", @@ -1176,7 +1172,7 @@ "\n", "Sentencepiece keeps track of byte offset (span) of each token, which is useful for highlighting the token on top of unnormalized text.\n", "\n", - "We first need to install protobuf module and sentencepiece_pb2.py as the byte offsets and all other meta data for segementation are encoded in protocol buffer.\n", + "We first need to install protobuf module as the byte offsets and all other meta data for segementation are encoded in protocol buffer.\n", "**encode_as_serialized_proto** method resturns serialized SentencePieceText proto. You can get the deserialized object by calling ParseFromString method.\n", "\n", "The definition of SentencePieceText proto is found [here](https://github.com/google/sentencepiece/blob/3be3f2e11e2bb923c579c6be5e7335809341587f/src/sentencepiece.proto#L23).\n" @@ -1194,8 +1190,7 @@ }, "cell_type": "code", "source": [ - "!pip install protobuf\n", - "!wget https://raw.githubusercontent.com/google/sentencepiece/master/python/sentencepiece_pb2.py" + "!pip install protobuf" ], "execution_count": 0, "outputs": [ @@ -1233,8 +1228,7 @@ }, "cell_type": "code", "source": [ - "import sentencepiece_pb2\n", - "import sentencepiece as spm\n", + "from sentencepiece import sentencepiece_pb2\n", "\n", "spm.SentencePieceTrainer.train('--input=botchan.txt --model_prefix=m --vocab_size=2000')\n", "\n", diff --git a/python/setup.cfg b/python/setup.cfg index b88034e4..08aedd7e 100644 --- a/python/setup.cfg +++ b/python/setup.cfg @@ -1,2 +1,2 @@ [metadata] -description-file = README.md +description_file = README.md diff --git a/python/setup.py b/python/setup.py index 9551ad92..54112313 100755 --- a/python/setup.py +++ b/python/setup.py @@ -32,10 +32,7 @@ def long_description(): return long_description -def version(): - with codecs.open('VERSION.txt', 'r', 'utf-8') as f: - version = f.read().rstrip() - return version +exec(open('src/sentencepiece/_version.py').read()) def run_pkg_config(section, pkg_config_path=None): @@ -60,22 +57,43 @@ def is_sentencepiece_installed(): return False +def get_cflags_and_libs(root): + cflags = ['-std=c++17', '-I' + os.path.join(root, 'include')] + libs = [] + if os.path.exists(os.path.join(root, 'lib/pkgconfig/sentencepiece.pc')): + libs = [ + os.path.join(root, 'lib/libsentencepiece.a'), + os.path.join(root, 'lib/libsentencepiece_train.a'), + ] + elif os.path.exists(os.path.join(root, 'lib64/pkgconfig/sentencepiece.pc')): + libs = [ + os.path.join(root, 'lib64/libsentencepiece.a'), + os.path.join(root, 'lib64/libsentencepiece_train.a'), + ] + return cflags, libs + + class build_ext(_build_ext): """Override build_extension to run cmake.""" def build_extension(self, ext): - pkg_config_path = None - if not is_sentencepiece_installed(): - subprocess.run(['./build_bundled.sh', version()], check=True) - pkg_config_path = './bundled/lib/pkgconfig:./bundled/lib64/pkgconfig' + cflags, libs = get_cflags_and_libs('../build/root') + + if len(libs) == 0: + if is_sentencepiece_installed(): + cflags = cflags + run_pkg_config('cflags') + libs = run_pkg_config('libs') + else: + subprocess.check_call(['./build_bundled.sh', __version__]) + cflags, libs = get_cflags_and_libs('./build/root') - cflags = ['-std=c++11'] # Fix compile on some versions of Mac OSX # See: https://github.com/neulab/xnmt/issues/199 if sys.platform == 'darwin': cflags.append('-mmacosx-version-min=10.9') - cflags = cflags + run_pkg_config('cflags', pkg_config_path) - libs = run_pkg_config('libs', pkg_config_path) + else: + cflags.append('-Wl,-strip-all') + libs.append('-Wl,-strip-all') print('## cflags={}'.format(' '.join(cflags))) print('## libs={}'.format(' '.join(libs))) ext.extra_compile_args = cflags @@ -84,21 +102,66 @@ def build_extension(self, ext): if os.name == 'nt': - cflags = ['/MT', '/I..\\build\\root\\include'] - libs = [ - '..\\build\\root\\lib\\sentencepiece.lib', - '..\\build\\root\\lib\\sentencepiece_train.lib' - ] + # Must pre-install sentencepice into build directory. + arch = 'win32' + if sys.maxsize > 2**32: + arch = 'amd64' + if os.path.exists('..\\build\\root_{}\\lib'.format(arch)): + cflags = ['/std:c++17', '/I..\\build\\root_{}\\include'.format(arch)] + libs = [ + '..\\build\\root_{}\\lib\\sentencepiece.lib'.format(arch), + '..\\build\\root_{}\\lib\\sentencepiece_train.lib'.format(arch), + ] + elif os.path.exists('..\\build\\root\\lib'): + cflags = ['/std:c++17', '/I..\\build\\root\\include'] + libs = [ + '..\\build\\root\\lib\\sentencepiece.lib', + '..\\build\\root\\lib\\sentencepiece_train.lib', + ] + else: + # build library locally with cmake and vc++. + cmake_arch = 'Win32' + if arch == 'amd64': + cmake_arch = 'x64' + subprocess.check_call([ + 'cmake', + 'sentencepiece', + '-A', + cmake_arch, + '-B', + 'build', + '-DSPM_ENABLE_SHARED=OFF', + '-DCMAKE_INSTALL_PREFIX=build\\root', + ]) + subprocess.check_call([ + 'cmake', + '--build', + 'build', + '--config', + 'Release', + '--target', + 'install', + '--parallel', + '8', + ]) + cflags = ['/std:c++17', '/I.\\build\\root\\include'] + libs = [ + '.\\build\\root\\lib\\sentencepiece.lib', + '.\\build\\root\\lib\\sentencepiece_train.lib', + ] + SENTENCEPIECE_EXT = Extension( 'sentencepiece._sentencepiece', sources=['src/sentencepiece/sentencepiece_wrap.cxx'], extra_compile_args=cflags, - extra_link_args=libs) + extra_link_args=libs, + ) cmdclass = {} else: SENTENCEPIECE_EXT = Extension( 'sentencepiece._sentencepiece', - sources=['src/sentencepiece/sentencepiece_wrap.cxx']) + sources=['src/sentencepiece/sentencepiece_wrap.cxx'], + ) cmdclass = {'build_ext': build_ext} setup( @@ -108,24 +171,29 @@ def build_extension(self, ext): description='SentencePiece python wrapper', long_description=long_description(), long_description_content_type='text/markdown', - version=version(), + version=__version__, package_dir={'': 'src'}, url='https://github.com/google/sentencepiece', license='Apache', platforms='Unix', py_modules=[ - 'sentencepiece/__init__', 'sentencepiece/sentencepiece_model_pb2', - 'sentencepiece/sentencepiece_pb2' + 'sentencepiece/__init__', + 'sentencepiece/_version', + 'sentencepiece/sentencepiece_model_pb2', + 'sentencepiece/sentencepiece_pb2', ], ext_modules=[SENTENCEPIECE_EXT], cmdclass=cmdclass, classifiers=[ - 'Development Status :: 5 - Production/Stable', 'Environment :: Console', + 'Development Status :: 5 - Production/Stable', + 'Environment :: Console', 'Intended Audience :: Developers', 'Intended Audience :: Science/Research', 'License :: OSI Approved :: Apache Software License', - 'Operating System :: Unix', 'Programming Language :: Python', + 'Operating System :: Unix', + 'Programming Language :: Python', 'Topic :: Text Processing :: Linguistic', - 'Topic :: Software Development :: Libraries :: Python Modules' + 'Topic :: Software Development :: Libraries :: Python Modules', ], - test_suite='sentencepiece_test.suite') + test_suite='sentencepiece_test.suite', +) diff --git a/python/src/sentencepiece/__init__.py b/python/src/sentencepiece/__init__.py index 566f810a..6040e7bb 100644 --- a/python/src/sentencepiece/__init__.py +++ b/python/src/sentencepiece/__init__.py @@ -1,13 +1,10 @@ -# This file was automatically generated by SWIG (http://www.swig.org). -# Version 4.0.1 +# This file was automatically generated by SWIG (https://www.swig.org). +# Version 4.1.0 # -# Do not make changes to this file unless you know what you are doing--modify +# Do not make changes to this file unless you know what you are doing - modify # the SWIG interface file instead. from sys import version_info as _swig_python_version_info -if _swig_python_version_info < (2, 7, 0): - raise RuntimeError("Python 2.7 or later required") - # Import the low-level C/C++ module if __package__ or "." in __name__: from . import _sentencepiece @@ -29,10 +26,10 @@ def _swig_repr(self): def _swig_setattr_nondynamic_instance_variable(set): def set_instance_attr(self, name, value): - if name == "thisown": - self.this.own(value) - elif name == "this": + if name == "this": set(self, name, value) + elif name == "thisown": + self.this.own(value) elif hasattr(self, name) and isinstance(getattr(type(self), name), property): set(self, name, value) else: @@ -61,8 +58,180 @@ class _SwigNonDynamicMeta(type): __setattr__ = _swig_setattr_nondynamic_class_variable(type.__setattr__) -EncoderVersion_kOptimized = _sentencepiece.EncoderVersion_kOptimized -EncoderVersion_kOriginal = _sentencepiece.EncoderVersion_kOriginal +class ImmutableSentencePieceText_ImmutableSentencePiece(object): + thisown = property(lambda x: x.this.own(), lambda x, v: x.this.own(v), doc="The membership flag") + __repr__ = _swig_repr + + def __init__(self): + _sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece_swiginit(self, _sentencepiece.new_ImmutableSentencePieceText_ImmutableSentencePiece()) + __swig_destroy__ = _sentencepiece.delete_ImmutableSentencePieceText_ImmutableSentencePiece + + def _piece(self): + return _sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece__piece(self) + + def _surface(self): + return _sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece__surface(self) + + def _id(self): + return _sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece__id(self) + + def _begin(self): + return _sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece__begin(self) + + def _end(self): + return _sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece__end(self) + + piece = property(_piece) + surface = property(_surface) + id = property(_id) + begin = property(_begin) + end = property(_end) + + def __str__(self): + return ('piece: \"{}\"\n' + 'id: {}\n' + 'surface: \"{}\"\n' + 'begin: {}\n' + 'end: {}\n').format(self.piece, self.id, self.surface, + self.begin, self.end) + + def __eq__(self, other): + return self.piece == other.piece and self.id == other.id and self.surface == other.surface and self.begin == other.begin and self.end == other.end + + def __hash__(self): + return hash(str(self)) + + __repr__ = __str__ + + +# Register ImmutableSentencePieceText_ImmutableSentencePiece in _sentencepiece: +_sentencepiece.ImmutableSentencePieceText_ImmutableSentencePiece_swigregister(ImmutableSentencePieceText_ImmutableSentencePiece) +class ImmutableSentencePieceText(object): + thisown = property(lambda x: x.this.own(), lambda x, v: x.this.own(v), doc="The membership flag") + __repr__ = _swig_repr + + def __init__(self): + _sentencepiece.ImmutableSentencePieceText_swiginit(self, _sentencepiece.new_ImmutableSentencePieceText()) + __swig_destroy__ = _sentencepiece.delete_ImmutableSentencePieceText + + def _pieces_size(self): + return _sentencepiece.ImmutableSentencePieceText__pieces_size(self) + + def _pieces(self, index): + return _sentencepiece.ImmutableSentencePieceText__pieces(self, index) + + def _text(self): + return _sentencepiece.ImmutableSentencePieceText__text(self) + + def _score(self): + return _sentencepiece.ImmutableSentencePieceText__score(self) + + def SerializeAsString(self): + return _sentencepiece.ImmutableSentencePieceText_SerializeAsString(self) + + text = property(_text) + score = property(_score) + + class ImmutableSentencePieceIterator: + def __init__(self, proto): + self.proto = proto + self.len = self.proto._pieces_size() + + def __len__(self): + return self.len + + def __getitem__(self, index): + if isinstance(index, slice): + return [self.proto._pieces(i) for i in range(self.len)][index.start:index.stop:index.step] + if index < 0: + index = index + self.len + if index < 0 or index >= self.len: + raise IndexError('piece index is out of range') + return self.proto._pieces(index) + + def __str__(self): + return '\n'.join(['pieces {{\n{}}}'.format(str(x)) for x in self]) + + __repr__ = __str__ + + @property + def pieces(self): + return ImmutableSentencePieceText.ImmutableSentencePieceIterator(self) + + def __eq__(self, other): + return self.SerializeAsString() == other.SerializeAsString() + + def __hash__(self): + return hash(self.SerializeAsString()) + + def __str__(self): + return ('text: \"{}\"\n' + 'score: {}\n' + '{}').format(self.text, self.score, + '\n'.join(['pieces {{\n{}}}'.format(str(x)) for x in self.pieces])) + + __repr__ = __str__ + + +# Register ImmutableSentencePieceText in _sentencepiece: +_sentencepiece.ImmutableSentencePieceText_swigregister(ImmutableSentencePieceText) +class ImmutableNBestSentencePieceText(object): + thisown = property(lambda x: x.this.own(), lambda x, v: x.this.own(v), doc="The membership flag") + __repr__ = _swig_repr + + def __init__(self): + _sentencepiece.ImmutableNBestSentencePieceText_swiginit(self, _sentencepiece.new_ImmutableNBestSentencePieceText()) + __swig_destroy__ = _sentencepiece.delete_ImmutableNBestSentencePieceText + + def _nbests_size(self): + return _sentencepiece.ImmutableNBestSentencePieceText__nbests_size(self) + + def _nbests(self, index): + return _sentencepiece.ImmutableNBestSentencePieceText__nbests(self, index) + + def SerializeAsString(self): + return _sentencepiece.ImmutableNBestSentencePieceText_SerializeAsString(self) + + class ImmutableSentencePieceTextIterator: + def __init__(self, proto): + self.proto = proto + self.len = self.proto._nbests_size() + + def __len__(self): + return self.len + + def __getitem__(self, index): + if isinstance(index, slice): + return [self.proto._nbests(i) for i in range(self.len)][index.start:index.stop:index.step] + if index < 0: + index = index + self.len + if index < 0 or index >= self.len: + raise IndexError('nbests index is out of range') + return self.proto._nbests(index) + + def __str__(self): + return '\n'.join(['nbests {{\n{}}}'.format(str(x)) for x in self]) + + __repr__ = __str__ + + @property + def nbests(self): + return ImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator(self) + + def __eq__(self, other): + return self.SerializeAsString() == other.SerializeAsString() + + def __hash__(self): + return hash(self.SerializeAsString()) + + def __str__(self): + return '\n'.join(['nbests {{\n{}}}'.format(str(x)) for x in self.nbests]) + + __repr__ = __str__ + + +# Register ImmutableNBestSentencePieceText in _sentencepiece: +_sentencepiece.ImmutableNBestSentencePieceText_swigregister(ImmutableNBestSentencePieceText) class SentencePieceProcessor(object): thisown = property(lambda x: x.this.own(), lambda x, v: x.this.own(v), doc="The membership flag") __repr__ = _swig_repr @@ -89,44 +258,8 @@ def ResetVocabulary(self): def LoadVocabulary(self, filename, threshold): return _sentencepiece.SentencePieceProcessor_LoadVocabulary(self, filename, threshold) - def SetEncoderVersion(self, encoder_version): - return _sentencepiece.SentencePieceProcessor_SetEncoderVersion(self, encoder_version) - - def GetEncoderVersion(self): - return _sentencepiece.SentencePieceProcessor_GetEncoderVersion(self) - - def EncodeAsPieces(self, input): - return _sentencepiece.SentencePieceProcessor_EncodeAsPieces(self, input) - - def EncodeAsIds(self, input): - return _sentencepiece.SentencePieceProcessor_EncodeAsIds(self, input) - - def NBestEncodeAsPieces(self, input, nbest_size): - return _sentencepiece.SentencePieceProcessor_NBestEncodeAsPieces(self, input, nbest_size) - - def NBestEncodeAsIds(self, input, nbest_size): - return _sentencepiece.SentencePieceProcessor_NBestEncodeAsIds(self, input, nbest_size) - - def SampleEncodeAsPieces(self, input, nbest_size, alpha): - return _sentencepiece.SentencePieceProcessor_SampleEncodeAsPieces(self, input, nbest_size, alpha) - - def SampleEncodeAsIds(self, input, nbest_size, alpha): - return _sentencepiece.SentencePieceProcessor_SampleEncodeAsIds(self, input, nbest_size, alpha) - - def DecodePieces(self, pieces): - return _sentencepiece.SentencePieceProcessor_DecodePieces(self, pieces) - - def EncodeAsSerializedProto(self, input): - return _sentencepiece.SentencePieceProcessor_EncodeAsSerializedProto(self, input) - - def SampleEncodeAsSerializedProto(self, input, nbest_size, alpha): - return _sentencepiece.SentencePieceProcessor_SampleEncodeAsSerializedProto(self, input, nbest_size, alpha) - - def NBestEncodeAsSerializedProto(self, input, nbest_size): - return _sentencepiece.SentencePieceProcessor_NBestEncodeAsSerializedProto(self, input, nbest_size) - - def DecodePiecesAsSerializedProto(self, pieces): - return _sentencepiece.SentencePieceProcessor_DecodePiecesAsSerializedProto(self, pieces) + def CalculateEntropy(self, *args): + return _sentencepiece.SentencePieceProcessor_CalculateEntropy(self, *args) def GetPieceSize(self): return _sentencepiece.SentencePieceProcessor_GetPieceSize(self) @@ -170,11 +303,95 @@ def serialized_model_proto(self): def LoadFromFile(self, arg): return _sentencepiece.SentencePieceProcessor_LoadFromFile(self, arg) - def DecodeIdsWithCheck(self, ids): - return _sentencepiece.SentencePieceProcessor_DecodeIdsWithCheck(self, ids) + def _EncodeAsIds(self, text, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__EncodeAsIds(self, text, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece) + + def _EncodeAsPieces(self, text, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__EncodeAsPieces(self, text, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece) + + def _EncodeAsSerializedProto(self, text, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__EncodeAsSerializedProto(self, text, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece) + + def _EncodeAsImmutableProto(self, text, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__EncodeAsImmutableProto(self, text, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece) + + def _EncodeAsIdsBatch(self, ins, num_threads, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__EncodeAsIdsBatch(self, ins, num_threads, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece) + + def _EncodeAsPiecesBatch(self, ins, num_threads, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__EncodeAsPiecesBatch(self, ins, num_threads, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece) + + def _EncodeAsSerializedProtoBatch(self, ins, num_threads, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__EncodeAsSerializedProtoBatch(self, ins, num_threads, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece) + + def _EncodeAsImmutableProtoBatch(self, ins, num_threads, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__EncodeAsImmutableProtoBatch(self, ins, num_threads, enable_sampling, nbest_size, alpha, add_bos, add_eos, reverse, emit_unk_piece) + + def _DecodeIds(self, ids): + return _sentencepiece.SentencePieceProcessor__DecodeIds(self, ids) + + def _DecodePieces(self, pieces): + return _sentencepiece.SentencePieceProcessor__DecodePieces(self, pieces) + + def _DecodeIdsAsSerializedProto(self, ids): + return _sentencepiece.SentencePieceProcessor__DecodeIdsAsSerializedProto(self, ids) + + def _DecodePiecesAsSerializedProto(self, pieces): + return _sentencepiece.SentencePieceProcessor__DecodePiecesAsSerializedProto(self, pieces) + + def _DecodeIdsAsImmutableProto(self, ids): + return _sentencepiece.SentencePieceProcessor__DecodeIdsAsImmutableProto(self, ids) - def DecodeIdsAsSerializedProtoWithCheck(self, ids): - return _sentencepiece.SentencePieceProcessor_DecodeIdsAsSerializedProtoWithCheck(self, ids) + def _DecodePiecesAsImmutableProto(self, pieces): + return _sentencepiece.SentencePieceProcessor__DecodePiecesAsImmutableProto(self, pieces) + + def _DecodeIdsBatch(self, ins, num_threads): + return _sentencepiece.SentencePieceProcessor__DecodeIdsBatch(self, ins, num_threads) + + def _DecodeIdsAsSerializedProtoBatch(self, ins, num_threads): + return _sentencepiece.SentencePieceProcessor__DecodeIdsAsSerializedProtoBatch(self, ins, num_threads) + + def _DecodeIdsAsImmutableProtoBatch(self, ins, num_threads): + return _sentencepiece.SentencePieceProcessor__DecodeIdsAsImmutableProtoBatch(self, ins, num_threads) + + def _DecodePiecesBatch(self, ins, num_threads): + return _sentencepiece.SentencePieceProcessor__DecodePiecesBatch(self, ins, num_threads) + + def _DecodePiecesAsSerializedProtoBatch(self, ins, num_threads): + return _sentencepiece.SentencePieceProcessor__DecodePiecesAsSerializedProtoBatch(self, ins, num_threads) + + def _DecodePiecesAsImmutableProtoBatch(self, ins, num_threads): + return _sentencepiece.SentencePieceProcessor__DecodePiecesAsImmutableProtoBatch(self, ins, num_threads) + + def _NBestEncodeAsIds(self, text, nbest_size, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__NBestEncodeAsIds(self, text, nbest_size, add_bos, add_eos, reverse, emit_unk_piece) + + def _NBestEncodeAsPieces(self, text, nbest_size, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__NBestEncodeAsPieces(self, text, nbest_size, add_bos, add_eos, reverse, emit_unk_piece) + + def _NBestEncodeAsSerializedProto(self, text, nbest_size, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__NBestEncodeAsSerializedProto(self, text, nbest_size, add_bos, add_eos, reverse, emit_unk_piece) + + def _NBestEncodeAsImmutableProto(self, text, nbest_size, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__NBestEncodeAsImmutableProto(self, text, nbest_size, add_bos, add_eos, reverse, emit_unk_piece) + + def _SampleEncodeAndScoreAsIds(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__SampleEncodeAndScoreAsIds(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece) + + def _SampleEncodeAndScoreAsPieces(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__SampleEncodeAndScoreAsPieces(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece) + + def _SampleEncodeAndScoreAsSerializedProto(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece) + + def _SampleEncodeAndScoreAsImmutableProto(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece): + return _sentencepiece.SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto(self, text, num_samples, alpha, wor, include_best, add_bos, add_eos, reverse, emit_unk_piece) + + def _CalculateEntropy(self, text, alpha): + return _sentencepiece.SentencePieceProcessor__CalculateEntropy(self, text, alpha) + + def _CalculateEntropyBatch(self, ins, alpha, num_threads): + return _sentencepiece.SentencePieceProcessor__CalculateEntropyBatch(self, ins, alpha, num_threads) def Init(self, model_file=None, @@ -183,9 +400,11 @@ def Init(self, add_bos=False, add_eos=False, reverse=False, + emit_unk_piece=False, enable_sampling=False, nbest_size=-1, - alpha=0.1): + alpha=0.1, + num_threads=-1): """Initialzie sentencepieceProcessor. Args: @@ -196,14 +415,16 @@ def Init(self, add_eos: Add to the result (Default = false) / is added after reversing (if enabled). reverse: Reverses the tokenized sequence (Default = false) - nbest_size: sampling parameters for unigram. Invalid for BPE-Dropout. + emit_unk_piece: Emits the unk literal string (Default = false) + nbest_size: sampling parameters for unigram. Invalid in BPE-Dropout. nbest_size = {0,1}: No sampling is performed. nbest_size > 1: samples from the nbest_size results. nbest_size < 0: assuming that nbest_size is infinite and samples from the all hypothesis (lattice) using forward-filtering-and-backward-sampling algorithm. alpha: Soothing parameter for unigram sampling, and dropout probability of - merge operations for BPE-dropout. + merge operations for BPE-dropout. + num_threads: number of threads in batch processing (Default = -1, auto-detected) """ _sentencepiece_processor_init_native(self) @@ -211,9 +432,11 @@ def Init(self, self._add_bos = add_bos self._add_eos = add_eos self._reverse = reverse + self._emit_unk_piece = emit_unk_piece self._enable_sampling = enable_sampling self._nbest_size = nbest_size self._alpha = alpha + self._num_threads = num_threads if model_file or model_proto: self.Load(model_file=model_file, model_proto=model_proto) @@ -224,9 +447,11 @@ def Encode(self, add_bos=None, add_eos=None, reverse=None, + emit_unk_piece=None, enable_sampling=None, nbest_size=None, - alpha=None): + alpha=None, + num_threads=None): """Encode text input to segmented ids or tokens. Args: @@ -234,16 +459,18 @@ def Encode(self, out_type: output type. int or str. add_bos: Add to the result (Default = false) add_eos: Add to the result (Default = false) / is added after - reversing (if enabled). + reversing (if enabled). reverse: Reverses the tokenized sequence (Default = false) - nbest_size: sampling parameters for unigram. Invalid for BPE-Dropout. + emit_unk_piece: Emits the unk literal string (Default = false) + nbest_size: sampling parameters for unigram. Invalid in BPE-Dropout. nbest_size = {0,1}: No sampling is performed. nbest_size > 1: samples from the nbest_size results. nbest_size < 0: assuming that nbest_size is infinite and samples - from the all hypothesis (lattice) using - forward-filtering-and-backward-sampling algorithm. + from the all hypothesis (lattice) using + forward-filtering-and-backward-sampling algorithm. alpha: Soothing parameter for unigram sampling, and merge probability for BPE-dropout (probablity 'p' in BPE-dropout paper). + num_threads: the number of threads used in the batch processing (Default = -1). """ if out_type is None: @@ -254,12 +481,16 @@ def Encode(self, add_eos = self._add_eos if reverse is None: reverse = self._reverse + if emit_unk_piece is None: + emit_unk_piece = self._emit_unk_piece if enable_sampling is None: enable_sampling = self._enable_sampling if nbest_size is None: nbest_size = self._nbest_size if alpha is None: alpha = self._alpha + if num_threads is None: + num_threads = self._num_threads if enable_sampling == True and (nbest_size is None or nbest_size == 0 or nbest_size == 1 or alpha is None): @@ -270,33 +501,224 @@ def Encode(self, 'instead of nbest segmentations.' ) + if num_threads is None or type(num_threads) is not int: + raise RuntimeError('num_threads must be int') + + if type(input) is list: + if out_type is int: + return self._EncodeAsIdsBatch(input, num_threads, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + if out_type is str: + return self._EncodeAsPiecesBatch(input, num_threads, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + if out_type == 'serialized_proto' or out_type == 'proto': + return self._EncodeAsSerializedProtoBatch(input, num_threads, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + if out_type == 'immutable_proto': + return self._EncodeAsImmutableProtoBatch(input, num_threads, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + + if out_type is int: + return self._EncodeAsIds(input, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + if out_type is str: + return self._EncodeAsPieces(input, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + if out_type == 'serialized_proto' or out_type == 'proto': + return self._EncodeAsSerializedProto(input, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + if out_type == 'immutable_proto': + return self._EncodeAsImmutableProto(input, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + + raise RuntimeError('unknown out_type={}'.format(out_type)) + return None + + + def EncodeAsPieces(self, input, **kwargs): + return self.Encode(input=input, out_type=str, **kwargs) + + + def EncodeAsIds(self, input, **kwargs): + return self.Encode(input=input, out_type=int, **kwargs) + + + def EncodeAsSerializedProto(self, input, **kwargs): + return self.Encode(input=input, out_type='serialized_proto', **kwargs) + + + def EncodeAsImmutableProto(self, input, **kwargs): + return self.Encode(input=input, out_type='immutable_proto', **kwargs) + + + def SampleEncodeAsPieces(self, input, nbest_size=None, alpha=None, **kwargs): + return self.Encode(input=input, nbest_size=nbest_size, alpha=alpha, + out_type=str, enable_sampling=True, **kwargs) + + + def SampleEncodeAsIds(self, input, nbest_size=None, alpha=None,**kwargs): + return self.Encode(input=input, nbest_size=nbest_size, alpha=alpha, + out_type=int, enable_sampling=True, **kwargs) + + + def SampleEncodeAsSerializedProto(self, input, nbest_size=None, alpha=None, **kwargs): + return self.Encode(input=input, nbest_size=nbest_size, alpha=alpha, + out_type='serialized_proto', enable_sampling=True, **kwargs) + + + def SampleEncodeAsImmutableProto(self, input, nbest_size=None, alpha=None, **kwargs): + return self.Encode(input=input, nbest_size=nbest_size, alpha=alpha, + out_type='immutable_proto', enable_sampling=True, **kwargs) + + + def NBestEncode(self, + input, + out_type=None, + add_bos=None, + add_eos=None, + reverse=None, + emit_unk_piece=None, + nbest_size=None): + """NBestEncode text input to segmented ids or tokens. + + Args: + input: input string. accepsts list of string. + out_type: output type. int or str. + add_bos: Add to the result (Default = false) + add_eos: Add to the result (Default = false) / is added after reversing (if enabled). + reverse: Reverses the tokenized sequence (Default = false) + emit_unk_piece: Emits the unk literal string (Default = false) + nbest_size: nbest size + """ + + if out_type is None: + out_type = self._out_type + if add_bos is None: + add_bos = self._add_bos + if add_eos is None: + add_eos = self._add_eos + if reverse is None: + reverse = self._reverse + if emit_unk_piece is None: + emit_unk_piece = self._emit_unk_piece + if nbest_size is None: + nbest_size = self._nbest_size + + if nbest_size <= 0: + nbest_size=1 + def _encode(text): if out_type is int: - if enable_sampling: - result = self.SampleEncodeAsIds(text, nbest_size, alpha) - else: - result = self.EncodeAsIds(text) - else: - if enable_sampling: - result = self.SampleEncodeAsPieces(text, nbest_size, alpha) - else: - result = self.EncodeAsPieces(text) + return self._NBestEncodeAsIds(text, nbest_size, + add_bos, add_eos, reverse, emit_unk_piece) + if out_type is str: + return self._NBestEncodeAsPieces(text, nbest_size, + add_bos, add_eos, reverse, emit_unk_piece) + if out_type == 'serialized_proto' or out_type == 'proto': + return self._NBestEncodeAsSerializedProto(text, nbest_size, + add_bos, add_eos, reverse, emit_unk_piece) + if out_type == 'immutable_proto': + return self._NBestEncodeAsImmutableProto(text, nbest_size, + add_bos, add_eos, reverse, emit_unk_piece) + + raise RuntimeError('unknown out_type') - if reverse: - result.reverse() - if add_bos: - if out_type is int: - result = [self.bos_id()] + result - else: - result = [self.IdToPiece(self.bos_id())] + result + if type(input) is list: + return [_encode(n) for n in input] + + return _encode(input) + + + def NBestEncodeAsPieces(self, input, nbest_size=None, **kwargs): + return self.NBestEncode(input=input, nbest_size=nbest_size, + out_type=str, **kwargs) + + + def NBestEncodeAsIds(self, input, nbest_size=None, **kwargs): + return self.NBestEncode(input=input, nbest_size=nbest_size, + out_type=int, **kwargs) + + + def NBestEncodeAsSerializedProto(self, input, nbest_size=None, **kwargs): + return self.NBestEncode(input=input, nbest_size=nbest_size, + out_type='serialized_proto', **kwargs) + + + def NBestEncodeAsImmutableProto(self, input, nbest_size=None, **kwargs): + return self.NBestEncode(input=input, nbest_size=nbest_size, + out_type='immutable_proto', **kwargs) + + + def SampleEncodeAndScore(self, + input, + out_type=None, + add_bos=None, + add_eos=None, + reverse=None, + emit_unk_piece=None, + num_samples=None, + alpha=None, + wor=None, + include_best=None): + """SampleEncodeAndScore text input to segmented ids or tokens. + + Args: + input: input string. accepsts list of string. + out_type: output type. int or str or 'serialized_proto' or 'immutable_proto' + add_bos: Add to the result (Default = false) + add_eos: Add to the result (Default = false) / is added after reversing (if enabled). + reverse: Reverses the tokenized sequence (Default = false) + emit_unk_piece: Emits the unk literal string (Default = false) + num_samples: How many samples to return (Default = 1) + alpha: inverse temperature for sampling + wor: whether to sample without replacement (Default = false) + include_best: whether to include the best tokenization, requires wor=True (Default = false) + """ + + if out_type is None: + out_type = self._out_type + if add_bos is None: + add_bos = self._add_bos + if add_eos is None: + add_eos = self._add_eos + if reverse is None: + reverse = self._reverse + if emit_unk_piece is None: + emit_unk_piece = self._emit_unk_piece + if num_samples is None: + num_samples = 1 + if alpha is None: + alpha = 1. + if wor is None: + wor = False + if include_best is None: + include_best = False + + if num_samples <= 0: + raise RuntimeError('num_examples must be positive') + + if include_best and not wor: + raise RuntimeError('When include_best is True, We must specify "wor = True".') - if add_eos: - if out_type is int: - result = result + [self.eos_id()] - else: - result = result + [self.IdToPiece(self.eos_id())] - return result + def _encode(text): + if out_type is int: + return self._SampleEncodeAndScoreAsIds(text, num_samples, alpha, wor, include_best, + add_bos, add_eos, reverse, emit_unk_piece) + if out_type is str: + return self._SampleEncodeAndScoreAsPieces(text, num_samples, alpha, wor, include_best, + add_bos, add_eos, reverse, emit_unk_piece) + + if out_type == 'serialized_proto' or out_type == 'proto': + return self._SampleEncodeAndScoreAsSerializedProto(text, num_samples, alpha, wor, include_best, + add_bos, add_eos, reverse, emit_unk_piece) + + if out_type == 'immutable_proto': + return self._SampleEncodeAndScoreAsImmutableProto(text, num_samples, alpha, wor, include_best, + add_bos, add_eos, reverse, emit_unk_piece) + + raise RuntimeError('unknown output type') + if type(input) is list: return [_encode(n) for n in input] @@ -304,27 +726,137 @@ def _encode(text): return _encode(input) - def Decode(self, input): - """Decode processed id or token sequences.""" + def SampleEncodeAndScoreAsPieces(self, input, num_samples=None, alpha=None, **kwargs): + return self.SampleEncodeAndScore(input=input, num_samples=num_samples, alpha=alpha, + out_type=str, **kwargs) + + + def SampleEncodeAndScoreAsIds(self, input, num_samples=None, alpha=None, **kwargs): + return self.SampleEncodeAndScore(input=input, num_samples=num_samples, alpha=alpha, + out_type=int, **kwargs) + + + def SampleEncodeAndScoreAsSerializedProto(self, input, num_samples=None, alpha=None, **kwargs): + return self.SampleEncodeAndScore(input=input, num_samples=num_samples, alpha=alpha, + out_type='serialized_proto', **kwargs) + + + def SampleEncodeAndScoreAsImmutableProto(self, input, num_samples=None, alpha=None, **kwargs): + return self.SampleEncodeAndScore(input=input, num_samples=num_samples, alpha=alpha, + out_type='immutable_proto', **kwargs) + + + def Decode(self, input, out_type=str, num_threads=None): + """Decode processed id or token sequences. + + Args: + out_type: output type. str or 'serialized_proto' or 'immutable_proto' (Default = str) + num_threads: the number of threads used in the batch processing (Default = -1). + """ + + if num_threads is None: + num_threads = self._num_threads + + if num_threads is None or type(num_threads) is not int: + raise RuntimeError('num_threads must be int') if not input: - return self.DecodeIds([]) - elif type(input) is int: - return self.DecodeIdsWithCheck([input]) - elif type(input) is str: - return self.DecodePieces([input]) + return '' + + if out_type is str: + if type(input) is int: + return self._DecodeIds([input]) + if type(input) is str: + return self._DecodePieces([input]) + + if type(input) is list: + if len(input) == 0 or type(input[0]) is int: + return self._DecodeIds(input) + if type(input[0]) is str: + return self._DecodePieces(input) + + if type(input[0]) is list: + if len(input[0]) == 0 or type(input[0][0]) is int: + return self._DecodeIdsBatch(input, num_threads) + if type(input[0][0]) is str: + return self._DecodePiecesBatch(input, num_threads) + + if out_type == 'serialized_proto': + if type(input) is int: + return self._DecodeIdsAsSerializedProto([input]) + if type(input) is str: + return self._DecodePiecesAsSerializedProto([input]) + + if type(input) is list: + if len(input) == 0 or type(input[0]) is int: + return self._DecodeIdsAsSerializedProto(input) + if type(input[0]) is str: + return self._DecodePiecesAsSerializedProto(input) + + if type(input[0]) is list: + if len(input[0]) == 0 or type(input[0][0]) is int: + return self._DecodeIdsAsSerializedProtoBatch(input, num_threads) + if type(input[0][0]) is str: + return self._DecodePiecesAsSerializedProtoBatch(input, num_threads) + + + if out_type == 'immutable_proto': + if type(input) is int: + return self._DecodeIdsAsImmutableProto([input]) + if type(input) is str: + return self._DecodePiecesAsImmutableProto([input]) + + if type(input) is list: + if len(input) == 0 or type(input[0]) is int: + return self._DecodeIdsAsImmutableProto(input) + if type(input[0]) is str: + return self._DecodePiecesAsImmutableProto(input) + + if type(input[0]) is list: + if len(input[0]) == 0 or type(input[0][0]) is int: + return self._DecodeIdsAsImmutableProtoBatch(input, num_threads) + if type(input[0][0]) is str: + return self._DecodePiecesAsImmutableProtoBatch(input, num_threads) + + + raise RuntimeError('unknown output or input type') + return None + + + def DecodePieces(self, input, out_type=str, **kwargs): + return self.Decode(input=input, out_type=out_type, **kwargs) + + + def DecodeIds(self, input, out_type=str, **kwargs): + return self.Decode(input=input, out_type=out_type, **kwargs) + + + def DecodePiecesAsSerializedProto(self, input, out_type='serialized_proto', **kwargs): + return self.Decode(input=input, out_type=out_type, **kwargs) + + + def DecodeIdsAsSerializedProto(self, input, out_type='serialized_proto', **kwargs): + return self.Decode(input=input, out_type=out_type, **kwargs) - def _decode(input): - if not input: - return self.DecodeIds([]) - if type(input[0]) is int: - return self.DecodeIdsWithCheck(input) - return self.DecodePieces(input) - if type(input[0]) is list: - return [_decode(n) for n in input] + def DecodePiecesAsImmutableProto(self, input, out_type='immutable_proto', **kwargs): + return self.Decode(input=input, out_type=out_type, **kwargs) - return _decode(input) + + def DecodeIdsAsImmutableProto(self, input, out_type='immutable_proto', **kwargs): + return self.Decode(input=input, out_type=out_type, **kwargs) + + + def CalculateEntropy(self, input, alpha, num_threads=None): + """Calculate sentence entropy""" + if type(input) is list: + if num_threads is None: + num_threads = self._num_threads + if num_threads is None or type(num_threads) is not int: + raise RuntimeError('num_threads must be int') + return self._CalculateEntropyBatch(input, alpha, num_threads) + + return self._CalculateEntropy(input, alpha) def piece_size(self): @@ -370,7 +902,6 @@ def Load(self, model_file=None, model_proto=None): # Register SentencePieceProcessor in _sentencepiece: _sentencepiece.SentencePieceProcessor_swigregister(SentencePieceProcessor) - def SetRandomGeneratorSeed(seed): return _sentencepiece.SetRandomGeneratorSeed(seed) class SentencePieceTrainer(object): @@ -401,7 +932,7 @@ def _TrainFromMap4(args, iter): return _sentencepiece.SentencePieceTrainer__TrainFromMap4(args, iter) @staticmethod - def Train(arg=None, **kwargs): + def _Train(arg=None, **kwargs): """Train Sentencepiece model. Accept both kwargs and legacy string arg.""" if arg is not None and type(arg) is str: return SentencePieceTrainer._TrainFromString(arg) @@ -445,30 +976,20 @@ def _encode(value): return None + @staticmethod + def Train(arg=None, logstream=None, **kwargs): + with _LogStream(ostream=logstream): + SentencePieceTrainer._Train(arg=arg, **kwargs) + # Register SentencePieceTrainer in _sentencepiece: _sentencepiece.SentencePieceTrainer_swigregister(SentencePieceTrainer) -def SentencePieceTrainer__TrainFromString(arg): - return _sentencepiece.SentencePieceTrainer__TrainFromString(arg) - -def SentencePieceTrainer__TrainFromMap(args): - return _sentencepiece.SentencePieceTrainer__TrainFromMap(args) - -def SentencePieceTrainer__TrainFromMap2(args, iter): - return _sentencepiece.SentencePieceTrainer__TrainFromMap2(args, iter) - -def SentencePieceTrainer__TrainFromMap3(args): - return _sentencepiece.SentencePieceTrainer__TrainFromMap3(args) - -def SentencePieceTrainer__TrainFromMap4(args, iter): - return _sentencepiece.SentencePieceTrainer__TrainFromMap4(args, iter) - - import re import csv import sys +import os from io import StringIO from io import BytesIO @@ -508,8 +1029,6 @@ def _batched_func(self, arg): SentencePieceProcessor.Tokenize = SentencePieceProcessor.Encode SentencePieceProcessor.Detokenize = SentencePieceProcessor.Decode -SentencePieceProcessor.DecodeIds = SentencePieceProcessor.DecodeIdsWithCheck -SentencePieceProcessor.DecodeIdsAsSerializedProto = SentencePieceProcessor.DecodeIdsAsSerializedProtoWithCheck for m in [ 'PieceToId', 'IdToPiece', 'GetScore', 'IsUnknown', 'IsControl', 'IsUnused', @@ -521,5 +1040,24 @@ def _batched_func(self, arg): _add_snake_case(SentencePieceTrainer) set_random_generator_seed = SetRandomGeneratorSeed +from ._version import __version__ + +class _LogStream(object): + def __init__(self, ostream=None): + self.ostream = ostream + if self.ostream is not None: + self.orig_stream_fileno = sys.stderr.fileno() + + def __enter__(self): + if self.ostream is not None: + self.orig_stream_dup = os.dup(self.orig_stream_fileno) + os.dup2(self.ostream.fileno(), self.orig_stream_fileno) + + def __exit__(self, type, value, traceback): + if self.ostream is not None: + os.close(self.orig_stream_fileno) + os.dup2(self.orig_stream_dup, self.orig_stream_fileno) + os.close(self.orig_stream_dup) + self.ostream.close() diff --git a/python/src/sentencepiece/_version.py b/python/src/sentencepiece/_version.py new file mode 100644 index 00000000..83130b87 --- /dev/null +++ b/python/src/sentencepiece/_version.py @@ -0,0 +1 @@ +__version__ = '0.2.00' diff --git a/python/src/sentencepiece/sentencepiece.i b/python/src/sentencepiece/sentencepiece.i index 40938e44..bef8298e 100644 --- a/python/src/sentencepiece/sentencepiece.i +++ b/python/src/sentencepiece/sentencepiece.i @@ -2,7 +2,15 @@ %include exception.i %{ + +#include +#include +#include +#include +#include #include +#include +#include #include #include @@ -10,6 +18,8 @@ namespace { PyObject* kUnicodeInput = reinterpret_cast(0x1); PyObject* kByteInput = reinterpret_cast(0x2); +using BytesArray = std::vector; + inline void ReleaseResultObject(PyObject *obj) { if (obj != nullptr && obj != kUnicodeInput && obj != kByteInput) { Py_XDECREF(obj); @@ -19,43 +29,24 @@ inline void ReleaseResultObject(PyObject *obj) { class PyInputString { public: explicit PyInputString(PyObject* obj) { -#if PY_VERSION_HEX >= 0x03000000 if (PyUnicode_Check(obj)) { - // Python3, Unicode str_ = const_cast(PyUnicode_AsUTF8AndSize(obj, &size_)); input_type_ = kUnicodeInput; } else if (PyBytes_Check(obj)) { - // Python3, Bytes PyBytes_AsStringAndSize(obj, &str_, &size_); input_type_ = kByteInput; - } -#else - if (PyUnicode_Check(obj)) { - // Python2, Unicode - PyObject *utf8_obj = PyUnicode_AsUTF8String(obj); - PyString_AsStringAndSize(utf8_obj, &str_, &size_); - input_type_ = utf8_obj; - } else if (PyString_Check(obj)) { - // Python2, Bytes, - PyString_AsStringAndSize(obj, &str_, &size_); - input_type_ = kByteInput; - } -#endif - else { + } else { str_ = nullptr; } } + absl::string_view str() const { return absl::string_view(data(), size()); } const char* data() const { return str_; } Py_ssize_t size() const { return size_; } bool IsAvalable() const { return str_ != nullptr; } PyObject *input_type() const { return input_type_; } static bool IsUnicode(PyObject *resultobj) { -#if PY_VERSION_HEX >= 0x03000000 return (resultobj == nullptr || resultobj == kUnicodeInput); -#else - return (resultobj != nullptr && resultobj != kByteInput); -#endif } private: @@ -69,19 +60,11 @@ PyObject* MakePyOutputString(const std::string& output, if (PyInputString::IsUnicode(resultobj)) { return PyUnicode_FromStringAndSize(output.data(), output.size()); } -#if PY_VERSION_HEX >= 0x03000000 return PyBytes_FromStringAndSize(output.data(), output.size()); -#else - return PyString_FromStringAndSize(output.data(), output.size()); -#endif } -PyObject* MakePyOutputBytes(const std::string& output) { -#if PY_VERSION_HEX >= 0x03000000 +PyObject* MakePyOutputBytes(const sentencepiece::util::bytes& output) { return PyBytes_FromStringAndSize(output.data(), output.size()); -#else - return PyString_FromStringAndSize(output.data(), output.size()); -#endif } int ToSwigError(sentencepiece::util::StatusCode code) { @@ -151,7 +134,159 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { std::string value_; sentencepiece::util::Status status_; }; + +inline void RewriteIds(const sentencepiece::SentencePieceProcessor &sp, + std::vector *ids, + bool add_bos, bool add_eos, bool reverse, bool emit_unk_piece) { + if (!add_bos && !add_eos && !reverse) return; + if (reverse) std::reverse(ids->begin(), ids->end()); + if (add_bos) ids->insert(ids->begin(), sp.bos_id()); + if (add_eos) ids->push_back(sp.eos_id()); } + +inline void RewriteIds(const sentencepiece::SentencePieceProcessor &sp, + std::vector *pieces, + bool add_bos, bool add_eos, bool reverse, bool emit_unk_piece) { + if (!add_bos && !add_eos && !reverse && !emit_unk_piece) return; + if (reverse) std::reverse(pieces->begin(), pieces->end()); + if (add_bos) pieces->insert(pieces->begin(), sp.IdToPiece(sp.bos_id())); + if (add_eos) pieces->push_back(sp.IdToPiece(sp.eos_id())); + if (emit_unk_piece) { + const auto &unk = sp.IdToPiece(sp.unk_id()); + for (auto &piece : *pieces) { + const int id = sp.PieceToId(piece); + if (id == sp.unk_id()) { + piece = unk; + } + } + } +} + +inline void RewriteIds(const sentencepiece::SentencePieceProcessor &sp, + sentencepiece::util::bytes *proto, + bool add_bos, bool add_eos, bool reverse, bool emit_unk_piece) { + if (add_bos || add_eos || reverse || emit_unk_piece) { + throw sentencepiece::util::Status( + sentencepiece::util::StatusCode::kUnimplemented, + "add_bos, add_eos, reverse, and emit_unk_piece is not supported in proto API"); + } +} + +inline void RewriteIds(const sentencepiece::SentencePieceProcessor &sp, + sentencepiece::ImmutableSentencePieceText *proto, + bool add_bos, bool add_eos, bool reverse, bool emit_unk_piece) { + if (add_bos || add_eos || reverse || emit_unk_piece) { + throw sentencepiece::util::Status( + sentencepiece::util::StatusCode::kUnimplemented, + "add_bos, add_eos, reverse, and emit_unk_piece is not supported in proto API"); + } +} + +inline void CheckIds(const std::vector &ids, int num_pieces) { + for (int id : ids) { + if (id < 0 || id >= num_pieces) { + throw sentencepiece::util::Status( + sentencepiece::util::StatusCode::kOutOfRange, + "piece id is out of range."); + } + } +} + +inline void CheckIds(const std::vector &ids, int num_pieces) {} + +template +inline void ConvertToUnicodeSpans(T *proto) {} + +template <> +inline void ConvertToUnicodeSpans(sentencepiece::ImmutableSentencePieceText *proto) { + proto->ConvertToUnicodeSpans(); +} + +template <> +inline void ConvertToUnicodeSpans(sentencepiece::ImmutableNBestSentencePieceText *proto) { + proto->ConvertToUnicodeSpans(); +} + +class ThreadPool { + public: + explicit ThreadPool(size_t request_size) : + request_size_(request_size) {} + + virtual ~ThreadPool() { + for (auto &task : tasks_) { + task.join(); + } + } + + void Schedule(std::function closure) { + static constexpr size_t kMinThreadSize = 2; + if (request_size_ < kMinThreadSize) { + closure(); + } else { + tasks_.emplace_back(closure); + } + } + + private: + size_t request_size_ = 0; + std::vector tasks_; +}; + +template +inline void InitNumThreads(const std::vector &ins, int *num_threads) { + if (*num_threads < 0) { + *num_threads = std::thread::hardware_concurrency(); + } + *num_threads = std::max(1, + std::min({*num_threads, + static_cast(ins.size()), 256})); +} + +#define DEFINE_ENCODE_BATCH_FUNC_IMPL(FuncName, InType, OutType) \ + std::vector outs(ins.size()); \ + InitNumThreads(ins, &num_threads); \ + { \ + ThreadPool pool(ins.size()); \ + std::atomic index = 0; \ + for (int n = 0; n < num_threads; ++n) { \ + pool.Schedule([&]() { \ + size_t i = 0; \ + while ((i = std::atomic_fetch_add(&index, 1)) < outs.size()) { \ + auto out = enable_sampling ? \ + self->Sample##FuncName(ins[i], \ + nbest_size, alpha) : \ + self->FuncName(ins[i]); \ + RewriteIds(*self, &out, add_bos, add_eos, reverse, \ + emit_unk_piece); \ + ConvertToUnicodeSpans(&out); \ + outs[i] = std::move(out); \ + } \ + }); \ + } \ + } \ + return outs; + +#define DEFINE_DECODE_BATCH_FUNC_IMPL(FuncName, InType, OutType) \ + std::vector outs(ins.size()); \ + InitNumThreads(ins, &num_threads); \ + { \ + std::atomic index = 0; \ + ThreadPool pool(ins.size()); \ + for (int n = 0; n < num_threads; ++n) { \ + pool.Schedule([&]() { \ + size_t i = 0; \ + while ((i = std::atomic_fetch_add(&index, 1)) < outs.size()) { \ + CheckIds(ins[i], self->GetPieceSize()); \ + auto out = self->FuncName(ins[i]); \ + ConvertToUnicodeSpans(&out); \ + outs[i] = std::move(out); \ + } \ + }); \ + } \ + } \ + return outs; + +} // namespace %} %exception { @@ -164,25 +299,62 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { } } +%apply unsigned int { uint32_t } + %ignore sentencepiece::util::Status; %ignore sentencepiece::util::StatusCode; %ignore absl::string_view; +%ignore std::string_view; %ignore sentencepiece::SentencePieceText; %ignore sentencepiece::NormalizerSpec; %ignore sentencepiece::TrainerSpec; - %ignore sentencepiece::SentencePieceProcessor::status; +%ignore sentencepiece::ImmutableSentencePieceText::mutable_proto; +%ignore sentencepiece::ImmutableSentencePieceText::pieces() const; +%ignore sentencepiece::ImmutableSentencePieceText::ConvertToUnicodeSpans; +%ignore sentencepiece::ImmutableNBestSentencePieceText::mutable_proto; +%ignore sentencepiece::ImmutableNBestSentencePieceText::nbests() const; +%ignore sentencepiece::ImmutableNBestSentencePieceText::ConvertToUnicodeSpans; + %ignore sentencepiece::SentencePieceProcessor::Encode; %ignore sentencepiece::SentencePieceProcessor::SampleEncode; %ignore sentencepiece::SentencePieceProcessor::NBestEncode; +%ignore sentencepiece::SentencePieceProcessor::SampleEncodeAndScore; %ignore sentencepiece::SentencePieceProcessor::Decode; + +%ignore sentencepiece::SentencePieceProcessor::EncodeAsPieces; +%ignore sentencepiece::SentencePieceProcessor::EncodeAsIds; +%ignore sentencepiece::SentencePieceProcessor::SampleEncodeAsIds; +%ignore sentencepiece::SentencePieceProcessor::SampleEncodeAsPieces; +%ignore sentencepiece::SentencePieceProcessor::NBestEncodeAsIds; +%ignore sentencepiece::SentencePieceProcessor::NBestEncodeAsPieces; +%ignore sentencepiece::SentencePieceProcessor::SampleEncodeAndScoreAsIds; +%ignore sentencepiece::SentencePieceProcessor::SampleEncodeAndScoreAsPieces; %ignore sentencepiece::SentencePieceProcessor::DecodeIds; +%ignore sentencepiece::SentencePieceProcessor::DecodePieces; + +%ignore sentencepiece::SentencePieceProcessor::EncodeAsSerializedProto; +%ignore sentencepiece::SentencePieceProcessor::SampleEncodeAsSerializedProto; +%ignore sentencepiece::SentencePieceProcessor::NBestEncodeAsSerializedProto; +%ignore sentencepiece::SentencePieceProcessor::SampleEncodeAndScoreAsSerializedProto; +%ignore sentencepiece::SentencePieceProcessor::DecodePiecesAsSerializedProto; %ignore sentencepiece::SentencePieceProcessor::DecodeIdsAsSerializedProto; + +%ignore sentencepiece::SentencePieceProcessor::EncodeAsImmutableProto; +%ignore sentencepiece::SentencePieceProcessor::SampleEncodeAsImmutableProto; +%ignore sentencepiece::SentencePieceProcessor::NBestEncodeAsImmutableProto; +%ignore sentencepiece::SentencePieceProcessor::SampleEncodeAndScoreAsImmutableProto; +%ignore sentencepiece::SentencePieceProcessor::DecodePiecesAsImmutableProto; +%ignore sentencepiece::SentencePieceProcessor::DecodeIdsAsImmutableProto; + %ignore sentencepiece::SentencePieceProcessor::model_proto; %ignore sentencepiece::SentencePieceProcessor::Load; %ignore sentencepiece::SentencePieceProcessor::LoadOrDie; +%ignore sentencepiece::SentencePieceProcessor::SetModel; +%ignore sentencepiece::SentencePieceProcessor::SetNormalizer; %ignore sentencepiece::pretokenizer::PretokenizerForTrainingInterface; %ignore sentencepiece::SentenceIterator; +%ignore sentencepiece::ConvertToUnicodeSpans; %ignore sentencepiece::SentencePieceTrainer::Train; %ignore sentencepiece::SentencePieceTrainer::GetNormalizerSpec; %ignore sentencepiece::SentencePieceTrainer::PopulateNormalizerSpec; @@ -193,31 +365,314 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { %ignore sentencepiece::SentencePieceTrainer::SetPretokenizerForTraining; %ignore sentencepiece::SentencePieceTrainer::GetPretokenizerForTraining; +%ignore sentencepiece::io::LoadModelProto; +%ignore sentencepiece::io::SaveModelProto; + %extend sentencepiece::SentencePieceProcessor { sentencepiece::util::Status LoadFromFile(absl::string_view arg) { return $self->Load(arg); } - std::string DecodeIdsWithCheck( - const std::vector &ids) const { - for (int id : ids) - if (id < 0 || id >= $self->GetPieceSize()) - throw sentencepiece::util::Status( - sentencepiece::util::StatusCode::kOutOfRange, - "piece id is out of range."); + ///////////////////////////////////////////////////////////////////////////// + // EncodeAs* (Single request) + std::vector _EncodeAsIds(absl::string_view text, + bool enable_sampling, + int nbest_size, float alpha, + bool add_bos, bool add_eos, bool reverse, + bool emit_unk_piece) const { + auto ids = enable_sampling ? + $self->SampleEncodeAsIds(text, nbest_size, alpha) : + $self->EncodeAsIds(text); + RewriteIds(*$self, &ids, add_bos, add_eos, reverse, emit_unk_piece); + return ids; + } + + std::vector _EncodeAsPieces(absl::string_view text, + bool enable_sampling, + int nbest_size, float alpha, + bool add_bos, bool add_eos, bool reverse, + bool emit_unk_piece) const { + auto pieces = enable_sampling ? + $self->SampleEncodeAsPieces(text, nbest_size, alpha) : + $self->EncodeAsPieces(text); + RewriteIds(*$self, &pieces, add_bos, add_eos, reverse, emit_unk_piece); + return pieces; + } + + sentencepiece::util::bytes _EncodeAsSerializedProto(absl::string_view text, + bool enable_sampling, + int nbest_size, float alpha, + bool add_bos, bool add_eos, bool reverse, + bool emit_unk_piece) const { + auto proto = enable_sampling ? + $self->SampleEncodeAsSerializedProto(text, nbest_size, alpha) : + $self->EncodeAsSerializedProto(text); + RewriteIds(*$self, &proto, add_bos, add_eos, reverse, emit_unk_piece); + return proto; + } + + sentencepiece::ImmutableSentencePieceText + _EncodeAsImmutableProto(absl::string_view text, + bool enable_sampling, + int nbest_size, float alpha, + bool add_bos, bool add_eos, bool reverse, + bool emit_unk_piece) const { + auto proto = enable_sampling ? + $self->SampleEncodeAsImmutableProto(text, nbest_size, alpha) : + $self->EncodeAsImmutableProto(text); + proto.ConvertToUnicodeSpans(); + RewriteIds(*$self, &proto, add_bos, add_eos, reverse, emit_unk_piece); + return proto; + } + + ///////////////////////////////////////////////////////////////////////////// + // EncodeAs* (Batch request) + std::vector> _EncodeAsIdsBatch( + const std::vector &ins, int num_threads, + bool enable_sampling, int nbest_size, float alpha, + bool add_bos, bool add_eos, bool reverse, + bool emit_unk_piece) const { + DEFINE_ENCODE_BATCH_FUNC_IMPL(EncodeAsIds, + absl::string_view, std::vector); + } + + std::vector> _EncodeAsPiecesBatch( + const std::vector &ins, int num_threads, + bool enable_sampling, int nbest_size, float alpha, + bool add_bos, bool add_eos, bool reverse, + bool emit_unk_piece) const { + DEFINE_ENCODE_BATCH_FUNC_IMPL(EncodeAsPieces, + absl::string_view, std::vector); + } + + BytesArray _EncodeAsSerializedProtoBatch( + const std::vector &ins, int num_threads, + bool enable_sampling, int nbest_size, float alpha, + bool add_bos, bool add_eos, bool reverse, + bool emit_unk_piece) const { + DEFINE_ENCODE_BATCH_FUNC_IMPL(EncodeAsSerializedProto, + absl::string_view, + sentencepiece::util::bytes); + } + + std::vector + _EncodeAsImmutableProtoBatch( + const std::vector &ins, int num_threads, + bool enable_sampling, int nbest_size, float alpha, + bool add_bos, bool add_eos, bool reverse, + bool emit_unk_piece) const { + DEFINE_ENCODE_BATCH_FUNC_IMPL(EncodeAsImmutableProto, + absl::string_view, + sentencepiece::ImmutableSentencePieceText); + } + + ///////////////////////////////////////////////////////////////////////////// + // DecodeAs* (Single request) + std::string _DecodeIds(const std::vector &ids) const { + CheckIds(ids, $self->GetPieceSize()); return $self->DecodeIds(ids); } - util::bytes DecodeIdsAsSerializedProtoWithCheck( + std::string _DecodePieces(const std::vector &pieces) const { + return $self->DecodePieces(pieces); + } + + sentencepiece::util::bytes _DecodeIdsAsSerializedProto( const std::vector &ids) const { - for (int id : ids) - if (id < 0 || id >= $self->GetPieceSize()) - throw sentencepiece::util::Status( - sentencepiece::util::StatusCode::kOutOfRange, - "piece id is out of range."); + CheckIds(ids, $self->GetPieceSize()); return $self->DecodeIdsAsSerializedProto(ids); } + sentencepiece::util::bytes _DecodePiecesAsSerializedProto( + const std::vector &pieces) const { + CheckIds(pieces, $self->GetPieceSize()); + return $self->DecodePiecesAsSerializedProto(pieces); + } + + sentencepiece::ImmutableSentencePieceText _DecodeIdsAsImmutableProto( + const std::vector &ids) const { + CheckIds(ids, $self->GetPieceSize()); + auto proto = $self->DecodeIdsAsImmutableProto(ids); + proto.ConvertToUnicodeSpans(); + return proto; + } + + sentencepiece::ImmutableSentencePieceText _DecodePiecesAsImmutableProto( + const std::vector &pieces) const { + CheckIds(pieces, $self->GetPieceSize()); + auto proto= $self->DecodePiecesAsImmutableProto(pieces); + proto.ConvertToUnicodeSpans(); + return proto; + } + + ///////////////////////////////////////////////////////////////////////////// + // DecodeAs* (Batch request) + std::vector _DecodeIdsBatch( + const std::vector> &ins, int num_threads) const { + DEFINE_DECODE_BATCH_FUNC_IMPL(DecodeIds, int, std::string); + } + + BytesArray _DecodeIdsAsSerializedProtoBatch( + const std::vector> &ins, int num_threads) const { + DEFINE_DECODE_BATCH_FUNC_IMPL(DecodeIdsAsSerializedProto, int, + sentencepiece::util::bytes); + } + + std::vector + _DecodeIdsAsImmutableProtoBatch( + const std::vector> &ins, int num_threads) const { + DEFINE_DECODE_BATCH_FUNC_IMPL(DecodeIdsAsImmutableProto, int, + sentencepiece::ImmutableSentencePieceText); + } + + std::vector _DecodePiecesBatch( + const std::vector> &ins, int num_threads) const { + DEFINE_DECODE_BATCH_FUNC_IMPL(DecodePieces, std::string, std::string); + } + + BytesArray _DecodePiecesAsSerializedProtoBatch( + const std::vector> &ins, int num_threads) const { + DEFINE_DECODE_BATCH_FUNC_IMPL(DecodePiecesAsSerializedProto, std::string, + sentencepiece::util::bytes); + } + + std::vector + _DecodePiecesAsImmutableProtoBatch( + const std::vector> &ins, int num_threads) const { + DEFINE_DECODE_BATCH_FUNC_IMPL(DecodePiecesAsImmutableProto, std::string, + sentencepiece::ImmutableSentencePieceText); + } + + //////////////////////////////////////////////////////////////////////////// + // NBestEncodeAs* (Single request) + std::vector> + _NBestEncodeAsIds(absl::string_view text, + int nbest_size, + bool add_bos, bool add_eos, bool reverse, + bool emit_unk_piece) const { + auto idss = $self->NBestEncodeAsIds(text, nbest_size); + for (auto &ids : idss) { + RewriteIds(*$self, &ids, add_bos, add_eos, reverse, emit_unk_piece); + } + return idss; + } + + std::vector> + _NBestEncodeAsPieces(absl::string_view text, + int nbest_size, + bool add_bos, bool add_eos, bool reverse, + bool emit_unk_piece) const { + auto piecess = $self->NBestEncodeAsPieces(text, nbest_size); + for (auto &pieces : piecess) { + RewriteIds(*$self, &pieces, add_bos, add_eos, reverse, emit_unk_piece); + } + return piecess; + } + + sentencepiece::util::bytes + _NBestEncodeAsSerializedProto(absl::string_view text, + int nbest_size, + bool add_bos, bool add_eos, bool reverse, + bool emit_unk_piece) const { + RewriteIds(*$self, static_cast(nullptr), + add_bos, add_eos, reverse, emit_unk_piece); + return $self->NBestEncodeAsSerializedProto(text, nbest_size); + } + + sentencepiece::ImmutableNBestSentencePieceText + _NBestEncodeAsImmutableProto(absl::string_view text, + int nbest_size, + bool add_bos, bool add_eos, bool reverse, + bool emit_unk_piece) const { + RewriteIds(*$self, static_cast(nullptr), + add_bos, add_eos, reverse, emit_unk_piece); + auto proto = $self->NBestEncodeAsImmutableProto(text, nbest_size); + proto.ConvertToUnicodeSpans(); + return proto; + } + + + ///////////////////////////////////////////////////////////////////////////// + // SampleEncodeAndScoreAs* (Single request) + std::vector, float>> + _SampleEncodeAndScoreAsIds(absl::string_view text, + int num_samples, float alpha, bool wor, + bool include_best, + bool add_bos, bool add_eos, bool reverse, + bool emit_unk_piece) const { + auto idss = $self->SampleEncodeAndScoreAsIds(text, num_samples, + alpha, wor, include_best); + for (auto &ids : idss) { + RewriteIds(*$self, &ids.first, add_bos, add_eos, reverse, emit_unk_piece); + } + return idss; + } + + std::vector, float>> + _SampleEncodeAndScoreAsPieces(absl::string_view text, + int num_samples, float alpha, bool wor, + bool include_best, + bool add_bos, bool add_eos, bool reverse, + bool emit_unk_piece) const { + auto piecess = $self->SampleEncodeAndScoreAsPieces(text, num_samples, + alpha, wor, include_best); + for (auto &pieces : piecess) { + RewriteIds(*$self, &pieces.first, add_bos, add_eos, reverse, emit_unk_piece); + } + return piecess; + } + + sentencepiece::util::bytes + _SampleEncodeAndScoreAsSerializedProto(absl::string_view text, + int num_samples, float alpha, bool wor, + bool include_best, + bool add_bos, bool add_eos, bool reverse, + bool emit_unk_piece) const { + RewriteIds(*$self, static_cast(nullptr), + add_bos, add_eos, reverse, emit_unk_piece); + return $self->SampleEncodeAndScoreAsSerializedProto(text, num_samples, + alpha, wor, include_best); + } + + sentencepiece::ImmutableNBestSentencePieceText + _SampleEncodeAndScoreAsImmutableProto(absl::string_view text, + int num_samples, float alpha, bool wor, + bool include_best, + bool add_bos, bool add_eos, bool reverse, + bool emit_unk_piece) const { + RewriteIds(*$self, static_cast(nullptr), + add_bos, add_eos, reverse, emit_unk_piece); + auto proto = $self->SampleEncodeAndScoreAsImmutableProto(text, num_samples, + alpha, wor, include_best); + proto.ConvertToUnicodeSpans(); + return proto; + } + + + // Calculate Entropy + float _CalculateEntropy(absl::string_view text, float alpha) { + return $self->CalculateEntropy(text, alpha); + } + + std::vector _CalculateEntropyBatch(const std::vector &ins, + float alpha, int num_threads) { + std::vector outs(ins.size()); + InitNumThreads(ins, &num_threads); + { + ThreadPool pool(ins.size()); + std::atomic index = 0; + for (int n = 0; n < num_threads; ++n) { + pool.Schedule([&]() { + size_t i = 0; + while ((i = std::atomic_fetch_add(&index, 1)) < outs.size()) { + outs[i] = self->CalculateEntropy(ins[i], alpha); + } + }); + } + } + return outs; + } + %pythoncode { def Init(self, model_file=None, @@ -226,9 +681,11 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { add_bos=False, add_eos=False, reverse=False, + emit_unk_piece=False, enable_sampling=False, nbest_size=-1, - alpha=0.1): + alpha=0.1, + num_threads=-1): """Initialzie sentencepieceProcessor. Args: @@ -239,14 +696,16 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { add_eos: Add to the result (Default = false) / is added after reversing (if enabled). reverse: Reverses the tokenized sequence (Default = false) - nbest_size: sampling parameters for unigram. Invalid for BPE-Dropout. + emit_unk_piece: Emits the unk literal string (Default = false) + nbest_size: sampling parameters for unigram. Invalid in BPE-Dropout. nbest_size = {0,1}: No sampling is performed. nbest_size > 1: samples from the nbest_size results. nbest_size < 0: assuming that nbest_size is infinite and samples from the all hypothesis (lattice) using forward-filtering-and-backward-sampling algorithm. alpha: Soothing parameter for unigram sampling, and dropout probability of - merge operations for BPE-dropout. + merge operations for BPE-dropout. + num_threads: number of threads in batch processing (Default = -1, auto-detected) """ _sentencepiece_processor_init_native(self) @@ -254,9 +713,11 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { self._add_bos = add_bos self._add_eos = add_eos self._reverse = reverse + self._emit_unk_piece = emit_unk_piece self._enable_sampling = enable_sampling self._nbest_size = nbest_size self._alpha = alpha + self._num_threads = num_threads if model_file or model_proto: self.Load(model_file=model_file, model_proto=model_proto) @@ -267,9 +728,11 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { add_bos=None, add_eos=None, reverse=None, + emit_unk_piece=None, enable_sampling=None, nbest_size=None, - alpha=None): + alpha=None, + num_threads=None): """Encode text input to segmented ids or tokens. Args: @@ -277,16 +740,18 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { out_type: output type. int or str. add_bos: Add to the result (Default = false) add_eos: Add to the result (Default = false) / is added after - reversing (if enabled). + reversing (if enabled). reverse: Reverses the tokenized sequence (Default = false) - nbest_size: sampling parameters for unigram. Invalid for BPE-Dropout. + emit_unk_piece: Emits the unk literal string (Default = false) + nbest_size: sampling parameters for unigram. Invalid in BPE-Dropout. nbest_size = {0,1}: No sampling is performed. nbest_size > 1: samples from the nbest_size results. nbest_size < 0: assuming that nbest_size is infinite and samples - from the all hypothesis (lattice) using - forward-filtering-and-backward-sampling algorithm. + from the all hypothesis (lattice) using + forward-filtering-and-backward-sampling algorithm. alpha: Soothing parameter for unigram sampling, and merge probability for BPE-dropout (probablity 'p' in BPE-dropout paper). + num_threads: the number of threads used in the batch processing (Default = -1). """ if out_type is None: @@ -297,12 +762,16 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { add_eos = self._add_eos if reverse is None: reverse = self._reverse + if emit_unk_piece is None: + emit_unk_piece = self._emit_unk_piece if enable_sampling is None: enable_sampling = self._enable_sampling if nbest_size is None: nbest_size = self._nbest_size if alpha is None: alpha = self._alpha + if num_threads is None: + num_threads = self._num_threads if enable_sampling == True and (nbest_size is None or nbest_size == 0 or nbest_size == 1 or alpha is None): @@ -313,33 +782,224 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { 'instead of nbest segmentations.' ) + if num_threads is None or type(num_threads) is not int: + raise RuntimeError('num_threads must be int') + + if type(input) is list: + if out_type is int: + return self._EncodeAsIdsBatch(input, num_threads, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + if out_type is str: + return self._EncodeAsPiecesBatch(input, num_threads, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + if out_type == 'serialized_proto' or out_type == 'proto': + return self._EncodeAsSerializedProtoBatch(input, num_threads, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + if out_type == 'immutable_proto': + return self._EncodeAsImmutableProtoBatch(input, num_threads, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + + if out_type is int: + return self._EncodeAsIds(input, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + if out_type is str: + return self._EncodeAsPieces(input, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + if out_type == 'serialized_proto' or out_type == 'proto': + return self._EncodeAsSerializedProto(input, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + if out_type == 'immutable_proto': + return self._EncodeAsImmutableProto(input, enable_sampling, nbest_size, + alpha, add_bos, add_eos, reverse, emit_unk_piece) + + raise RuntimeError('unknown out_type={}'.format(out_type)) + return None + + + def EncodeAsPieces(self, input, **kwargs): + return self.Encode(input=input, out_type=str, **kwargs) + + + def EncodeAsIds(self, input, **kwargs): + return self.Encode(input=input, out_type=int, **kwargs) + + + def EncodeAsSerializedProto(self, input, **kwargs): + return self.Encode(input=input, out_type='serialized_proto', **kwargs) + + + def EncodeAsImmutableProto(self, input, **kwargs): + return self.Encode(input=input, out_type='immutable_proto', **kwargs) + + + def SampleEncodeAsPieces(self, input, nbest_size=None, alpha=None, **kwargs): + return self.Encode(input=input, nbest_size=nbest_size, alpha=alpha, + out_type=str, enable_sampling=True, **kwargs) + + + def SampleEncodeAsIds(self, input, nbest_size=None, alpha=None,**kwargs): + return self.Encode(input=input, nbest_size=nbest_size, alpha=alpha, + out_type=int, enable_sampling=True, **kwargs) + + + def SampleEncodeAsSerializedProto(self, input, nbest_size=None, alpha=None, **kwargs): + return self.Encode(input=input, nbest_size=nbest_size, alpha=alpha, + out_type='serialized_proto', enable_sampling=True, **kwargs) + + + def SampleEncodeAsImmutableProto(self, input, nbest_size=None, alpha=None, **kwargs): + return self.Encode(input=input, nbest_size=nbest_size, alpha=alpha, + out_type='immutable_proto', enable_sampling=True, **kwargs) + + + def NBestEncode(self, + input, + out_type=None, + add_bos=None, + add_eos=None, + reverse=None, + emit_unk_piece=None, + nbest_size=None): + """NBestEncode text input to segmented ids or tokens. + + Args: + input: input string. accepsts list of string. + out_type: output type. int or str. + add_bos: Add to the result (Default = false) + add_eos: Add to the result (Default = false) / is added after reversing (if enabled). + reverse: Reverses the tokenized sequence (Default = false) + emit_unk_piece: Emits the unk literal string (Default = false) + nbest_size: nbest size + """ + + if out_type is None: + out_type = self._out_type + if add_bos is None: + add_bos = self._add_bos + if add_eos is None: + add_eos = self._add_eos + if reverse is None: + reverse = self._reverse + if emit_unk_piece is None: + emit_unk_piece = self._emit_unk_piece + if nbest_size is None: + nbest_size = self._nbest_size + + if nbest_size <= 0: + nbest_size=1 + def _encode(text): if out_type is int: - if enable_sampling: - result = self.SampleEncodeAsIds(text, nbest_size, alpha) - else: - result = self.EncodeAsIds(text) - else: - if enable_sampling: - result = self.SampleEncodeAsPieces(text, nbest_size, alpha) - else: - result = self.EncodeAsPieces(text) + return self._NBestEncodeAsIds(text, nbest_size, + add_bos, add_eos, reverse, emit_unk_piece) + if out_type is str: + return self._NBestEncodeAsPieces(text, nbest_size, + add_bos, add_eos, reverse, emit_unk_piece) + if out_type == 'serialized_proto' or out_type == 'proto': + return self._NBestEncodeAsSerializedProto(text, nbest_size, + add_bos, add_eos, reverse, emit_unk_piece) + if out_type == 'immutable_proto': + return self._NBestEncodeAsImmutableProto(text, nbest_size, + add_bos, add_eos, reverse, emit_unk_piece) + + raise RuntimeError('unknown out_type') - if reverse: - result.reverse() - if add_bos: - if out_type is int: - result = [self.bos_id()] + result - else: - result = [self.IdToPiece(self.bos_id())] + result + if type(input) is list: + return [_encode(n) for n in input] - if add_eos: - if out_type is int: - result = result + [self.eos_id()] - else: - result = result + [self.IdToPiece(self.eos_id())] + return _encode(input) + + + def NBestEncodeAsPieces(self, input, nbest_size=None, **kwargs): + return self.NBestEncode(input=input, nbest_size=nbest_size, + out_type=str, **kwargs) + + + def NBestEncodeAsIds(self, input, nbest_size=None, **kwargs): + return self.NBestEncode(input=input, nbest_size=nbest_size, + out_type=int, **kwargs) + + + def NBestEncodeAsSerializedProto(self, input, nbest_size=None, **kwargs): + return self.NBestEncode(input=input, nbest_size=nbest_size, + out_type='serialized_proto', **kwargs) + + + def NBestEncodeAsImmutableProto(self, input, nbest_size=None, **kwargs): + return self.NBestEncode(input=input, nbest_size=nbest_size, + out_type='immutable_proto', **kwargs) + + + def SampleEncodeAndScore(self, + input, + out_type=None, + add_bos=None, + add_eos=None, + reverse=None, + emit_unk_piece=None, + num_samples=None, + alpha=None, + wor=None, + include_best=None): + """SampleEncodeAndScore text input to segmented ids or tokens. + + Args: + input: input string. accepsts list of string. + out_type: output type. int or str or 'serialized_proto' or 'immutable_proto' + add_bos: Add to the result (Default = false) + add_eos: Add to the result (Default = false) / is added after reversing (if enabled). + reverse: Reverses the tokenized sequence (Default = false) + emit_unk_piece: Emits the unk literal string (Default = false) + num_samples: How many samples to return (Default = 1) + alpha: inverse temperature for sampling + wor: whether to sample without replacement (Default = false) + include_best: whether to include the best tokenization, requires wor=True (Default = false) + """ + + if out_type is None: + out_type = self._out_type + if add_bos is None: + add_bos = self._add_bos + if add_eos is None: + add_eos = self._add_eos + if reverse is None: + reverse = self._reverse + if emit_unk_piece is None: + emit_unk_piece = self._emit_unk_piece + if num_samples is None: + num_samples = 1 + if alpha is None: + alpha = 1. + if wor is None: + wor = False + if include_best is None: + include_best = False + + if num_samples <= 0: + raise RuntimeError('num_examples must be positive') + + if include_best and not wor: + raise RuntimeError('When include_best is True, We must specify "wor = True".') + + + def _encode(text): + if out_type is int: + return self._SampleEncodeAndScoreAsIds(text, num_samples, alpha, wor, include_best, + add_bos, add_eos, reverse, emit_unk_piece) + if out_type is str: + return self._SampleEncodeAndScoreAsPieces(text, num_samples, alpha, wor, include_best, + add_bos, add_eos, reverse, emit_unk_piece) + + if out_type == 'serialized_proto' or out_type == 'proto': + return self._SampleEncodeAndScoreAsSerializedProto(text, num_samples, alpha, wor, include_best, + add_bos, add_eos, reverse, emit_unk_piece) + + if out_type == 'immutable_proto': + return self._SampleEncodeAndScoreAsImmutableProto(text, num_samples, alpha, wor, include_best, + add_bos, add_eos, reverse, emit_unk_piece) + + raise RuntimeError('unknown output type') - return result if type(input) is list: return [_encode(n) for n in input] @@ -347,27 +1007,137 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { return _encode(input) - def Decode(self, input): - """Decode processed id or token sequences.""" + def SampleEncodeAndScoreAsPieces(self, input, num_samples=None, alpha=None, **kwargs): + return self.SampleEncodeAndScore(input=input, num_samples=num_samples, alpha=alpha, + out_type=str, **kwargs) + + + def SampleEncodeAndScoreAsIds(self, input, num_samples=None, alpha=None, **kwargs): + return self.SampleEncodeAndScore(input=input, num_samples=num_samples, alpha=alpha, + out_type=int, **kwargs) + + + def SampleEncodeAndScoreAsSerializedProto(self, input, num_samples=None, alpha=None, **kwargs): + return self.SampleEncodeAndScore(input=input, num_samples=num_samples, alpha=alpha, + out_type='serialized_proto', **kwargs) + + + def SampleEncodeAndScoreAsImmutableProto(self, input, num_samples=None, alpha=None, **kwargs): + return self.SampleEncodeAndScore(input=input, num_samples=num_samples, alpha=alpha, + out_type='immutable_proto', **kwargs) + + + def Decode(self, input, out_type=str, num_threads=None): + """Decode processed id or token sequences. + + Args: + out_type: output type. str or 'serialized_proto' or 'immutable_proto' (Default = str) + num_threads: the number of threads used in the batch processing (Default = -1). + """ + + if num_threads is None: + num_threads = self._num_threads + + if num_threads is None or type(num_threads) is not int: + raise RuntimeError('num_threads must be int') if not input: - return self.DecodeIds([]) - elif type(input) is int: - return self.DecodeIdsWithCheck([input]) - elif type(input) is str: - return self.DecodePieces([input]) + return '' + + if out_type is str: + if type(input) is int: + return self._DecodeIds([input]) + if type(input) is str: + return self._DecodePieces([input]) + + if type(input) is list: + if len(input) == 0 or type(input[0]) is int: + return self._DecodeIds(input) + if type(input[0]) is str: + return self._DecodePieces(input) + + if type(input[0]) is list: + if len(input[0]) == 0 or type(input[0][0]) is int: + return self._DecodeIdsBatch(input, num_threads) + if type(input[0][0]) is str: + return self._DecodePiecesBatch(input, num_threads) + + if out_type == 'serialized_proto': + if type(input) is int: + return self._DecodeIdsAsSerializedProto([input]) + if type(input) is str: + return self._DecodePiecesAsSerializedProto([input]) + + if type(input) is list: + if len(input) == 0 or type(input[0]) is int: + return self._DecodeIdsAsSerializedProto(input) + if type(input[0]) is str: + return self._DecodePiecesAsSerializedProto(input) + + if type(input[0]) is list: + if len(input[0]) == 0 or type(input[0][0]) is int: + return self._DecodeIdsAsSerializedProtoBatch(input, num_threads) + if type(input[0][0]) is str: + return self._DecodePiecesAsSerializedProtoBatch(input, num_threads) + + + if out_type == 'immutable_proto': + if type(input) is int: + return self._DecodeIdsAsImmutableProto([input]) + if type(input) is str: + return self._DecodePiecesAsImmutableProto([input]) + + if type(input) is list: + if len(input) == 0 or type(input[0]) is int: + return self._DecodeIdsAsImmutableProto(input) + if type(input[0]) is str: + return self._DecodePiecesAsImmutableProto(input) + + if type(input[0]) is list: + if len(input[0]) == 0 or type(input[0][0]) is int: + return self._DecodeIdsAsImmutableProtoBatch(input, num_threads) + if type(input[0][0]) is str: + return self._DecodePiecesAsImmutableProtoBatch(input, num_threads) + + + raise RuntimeError('unknown output or input type') + return None + + + def DecodePieces(self, input, out_type=str, **kwargs): + return self.Decode(input=input, out_type=out_type, **kwargs) + + + def DecodeIds(self, input, out_type=str, **kwargs): + return self.Decode(input=input, out_type=out_type, **kwargs) - def _decode(input): - if not input: - return self.DecodeIds([]) - if type(input[0]) is int: - return self.DecodeIdsWithCheck(input) - return self.DecodePieces(input) - if type(input[0]) is list: - return [_decode(n) for n in input] + def DecodePiecesAsSerializedProto(self, input, out_type='serialized_proto', **kwargs): + return self.Decode(input=input, out_type=out_type, **kwargs) - return _decode(input) + + def DecodeIdsAsSerializedProto(self, input, out_type='serialized_proto', **kwargs): + return self.Decode(input=input, out_type=out_type, **kwargs) + + + def DecodePiecesAsImmutableProto(self, input, out_type='immutable_proto', **kwargs): + return self.Decode(input=input, out_type=out_type, **kwargs) + + + def DecodeIdsAsImmutableProto(self, input, out_type='immutable_proto', **kwargs): + return self.Decode(input=input, out_type=out_type, **kwargs) + + + def CalculateEntropy(self, input, alpha, num_threads=None): + """Calculate sentence entropy""" + if type(input) is list: + if num_threads is None: + num_threads = self._num_threads + if num_threads is None or type(num_threads) is not int: + raise RuntimeError('num_threads must be int') + return self._CalculateEntropyBatch(input, alpha, num_threads) + + return self._CalculateEntropy(input, alpha) def piece_size(self): @@ -448,7 +1218,7 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { %pythoncode { @staticmethod - def Train(arg=None, **kwargs): + def _Train(arg=None, **kwargs): """Train Sentencepiece model. Accept both kwargs and legacy string arg.""" if arg is not None and type(arg) is str: return SentencePieceTrainer._TrainFromString(arg) @@ -491,13 +1261,153 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { return SentencePieceTrainer._TrainFromMap(new_kwargs) return None + + @staticmethod + def Train(arg=None, logstream=None, **kwargs): + with _LogStream(ostream=logstream): + SentencePieceTrainer._Train(arg=arg, **kwargs) +} +} + +%extend sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece { + %rename(_piece) piece; + %rename(_id) id; + %rename(_surface) surface; + %rename(_begin) begin; + %rename(_end) end; + + %pythoncode %{ + piece = property(_piece) + surface = property(_surface) + id = property(_id) + begin = property(_begin) + end = property(_end) + + def __str__(self): + return ('piece: \"{}\"\n' + 'id: {}\n' + 'surface: \"{}\"\n' + 'begin: {}\n' + 'end: {}\n').format(self.piece, self.id, self.surface, + self.begin, self.end) + + def __eq__(self, other): + return self.piece == other.piece and self.id == other.id and self.surface == other.surface and self.begin == other.begin and self.end == other.end + + def __hash__(self): + return hash(str(self)) + + __repr__ = __str__ + %} +} + +%extend sentencepiece::ImmutableSentencePieceText { + %rename(_text) text; + %rename(_score) score; + %rename(_pieces) pieces; + %rename(_pieces_size) pieces_size; + + %pythoncode %{ + text = property(_text) + score = property(_score) + + class ImmutableSentencePieceIterator: + def __init__(self, proto): + self.proto = proto + self.len = self.proto._pieces_size() + + def __len__(self): + return self.len + + def __getitem__(self, index): + if isinstance(index, slice): + return [self.proto._pieces(i) for i in range(self.len)][index.start:index.stop:index.step] + if index < 0: + index = index + self.len + if index < 0 or index >= self.len: + raise IndexError('piece index is out of range') + return self.proto._pieces(index) + + def __str__(self): + return '\n'.join(['pieces {{\n{}}}'.format(str(x)) for x in self]) + + __repr__ = __str__ + + @property + def pieces(self): + return ImmutableSentencePieceText.ImmutableSentencePieceIterator(self) + + def __eq__(self, other): + return self.SerializeAsString() == other.SerializeAsString() + + def __hash__(self): + return hash(self.SerializeAsString()) + + def __str__(self): + return ('text: \"{}\"\n' + 'score: {}\n' + '{}').format(self.text, self.score, + '\n'.join(['pieces {{\n{}}}'.format(str(x)) for x in self.pieces])) + + __repr__ = __str__ + %} } + +%extend sentencepiece::ImmutableNBestSentencePieceText { + %rename(_nbests) nbests; + %rename(_nbests_size) nbests_size; + + %pythoncode %{ + class ImmutableSentencePieceTextIterator: + def __init__(self, proto): + self.proto = proto + self.len = self.proto._nbests_size() + + def __len__(self): + return self.len + + def __getitem__(self, index): + if isinstance(index, slice): + return [self.proto._nbests(i) for i in range(self.len)][index.start:index.stop:index.step] + if index < 0: + index = index + self.len + if index < 0 or index >= self.len: + raise IndexError('nbests index is out of range') + return self.proto._nbests(index) + + def __str__(self): + return '\n'.join(['nbests {{\n{}}}'.format(str(x)) for x in self]) + + __repr__ = __str__ + + @property + def nbests(self): + return ImmutableNBestSentencePieceText.ImmutableSentencePieceTextIterator(self) + + def __eq__(self, other): + return self.SerializeAsString() == other.SerializeAsString() + + def __hash__(self): + return hash(self.SerializeAsString()) + + def __str__(self): + return '\n'.join(['nbests {{\n{}}}'.format(str(x)) for x in self.nbests]) + + __repr__ = __str__ + %} } %typemap(out) std::vector { $result = PyList_New($1.size()); for (size_t i = 0; i < $1.size(); ++i) { - PyList_SetItem($result, i, PyInt_FromLong(static_cast($1[i]))); + PyList_SET_ITEM($result, i, PyInt_FromLong(static_cast($1[i]))); + } +} + +%typemap(out) std::vector { + $result = PyList_New($1.size()); + for (size_t i = 0; i < $1.size(); ++i) { + PyList_SET_ITEM($result, i, PyFloat_FromDouble(static_cast($1[i]))); } } @@ -506,9 +1416,9 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { for (size_t i = 0; i < $1.size(); ++i) { PyObject *obj = PyList_New($1[i].size()); for (size_t j = 0; j < $1[i].size(); ++j) { - PyList_SetItem(obj, j, PyInt_FromLong(static_cast($1[i][j]))); + PyList_SET_ITEM(obj, j, PyInt_FromLong(static_cast($1[i][j]))); } - PyList_SetItem($result, i, obj); + PyList_SET_ITEM($result, i, obj); } } @@ -516,7 +1426,14 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { PyObject *input_type = resultobj; $result = PyList_New($1.size()); for (size_t i = 0; i < $1.size(); ++i) { - PyList_SetItem($result, i, MakePyOutputString($1[i], input_type)); + PyList_SET_ITEM($result, i, MakePyOutputString($1[i], input_type)); + } +} + +%typemap(out) BytesArray { + $result = PyList_New($1.size()); + for (size_t i = 0; i < $1.size(); ++i) { + PyList_SET_ITEM($result, i, MakePyOutputBytes($1[i])); } } @@ -526,9 +1443,9 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { for (size_t i = 0; i < $1.size(); ++i) { PyObject *obj = PyList_New($1[i].size()); for (size_t j = 0; j < $1[i].size(); ++j) { - PyList_SetItem(obj, j, MakePyOutputString($1[i][j], input_type)); + PyList_SET_ITEM(obj, j, MakePyOutputString($1[i][j], input_type)); } - PyList_SetItem($result, i, obj); + PyList_SET_ITEM($result, i, obj); } } @@ -572,18 +1489,18 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { SWIG_fail; } resultobj = ustring.input_type(); - $1 = absl::string_view(ustring.data(), ustring.size()); + $1 = ustring.str(); } -%typemap(in) const std::vector& { - std::vector *out = nullptr; +%typemap(in) const std::vector& { + std::vector *out = nullptr; if (PyList_Check($input)) { const size_t size = PyList_Size($input); - out = new std::vector(size); + out = new std::vector(size); for (size_t i = 0; i < size; ++i) { const PyInputString ustring(PyList_GetItem($input, i)); if (ustring.IsAvalable()) { - (*out)[i] = std::string(ustring.data(), ustring.size()); + (*out)[i] = ustring.str(); } else { PyErr_SetString(PyExc_TypeError, "list must contain strings"); SWIG_fail; @@ -618,6 +1535,69 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { $1 = out; } +%typemap(in) const std::vector>& { + std::vector> *out = nullptr; + if (PyList_Check($input)) { + const size_t size = PyList_Size($input); + out = new std::vector>(size); + for (size_t i = 0; i < size; ++i) { + PyObject *o = PyList_GetItem($input, i); + if (PyList_Check(o)) { + const size_t size2 = PyList_Size(o); + (*out)[i].resize(size2); + for (size_t j = 0; j < size2; ++j) { + const PyInputString ustring(PyList_GetItem(o, j)); + if (ustring.IsAvalable()) { + (*out)[i][j] = ustring.str(); + } else { + PyErr_SetString(PyExc_TypeError,"list must contain integers"); + SWIG_fail; + } + resultobj = ustring.input_type(); + } + } else { + PyErr_SetString(PyExc_TypeError,"not a list"); + SWIG_fail; + } + } + } else { + PyErr_SetString(PyExc_TypeError,"not a list"); + SWIG_fail; + } + $1 = out; +} + +%typemap(in) const std::vector>& { + std::vector> *out = nullptr; + if (PyList_Check($input)) { + const size_t size = PyList_Size($input); + out = new std::vector>(size); + for (size_t i = 0; i < size; ++i) { + PyObject *o = PyList_GetItem($input, i); + if (PyList_Check(o)) { + const size_t size2 = PyList_Size(o); + (*out)[i].resize(size2); + for (size_t j = 0; j < size2; ++j) { + PyObject *o2 = PyList_GetItem(o, j); + if (PyInt_Check(o2)) { + (*out)[i][j] = static_cast(PyInt_AsLong(o2)); + } else { + PyErr_SetString(PyExc_TypeError, "list must contain strings"); + SWIG_fail; + } + } + } else { + PyErr_SetString(PyExc_TypeError, "not a list"); + SWIG_fail; + } + } + } else { + PyErr_SetString(PyExc_TypeError,"not a list"); + SWIG_fail; + } + $1 = out; +} + %typemap(in) const std::unordered_map & { std::unordered_map *out = nullptr; if (PyDict_Check($input)) { @@ -643,6 +1623,37 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { $1 = out; } +%typemap(out) std::vector, float>> { + PyObject *input_type = resultobj; + $result = PyList_New($1.size()); + for (size_t i = 0; i < $1.size(); ++i) { + PyObject *obj = PyList_New($1[i].first.size()); + for (size_t j = 0; j < $1[i].first.size(); ++j) { + PyList_SET_ITEM(obj, j, MakePyOutputString($1[i].first[j], input_type)); + } + PyList_SET_ITEM($result, i, PyTuple_Pack(2, obj, PyFloat_FromDouble(static_cast($1[i].second)))); + } +} + +%typemap(out) std::vector, float>> { + $result = PyList_New($1.size()); + for (size_t i = 0; i < $1.size(); ++i) { + PyObject *obj = PyList_New($1[i].first.size()); + for (size_t j = 0; j < $1[i].first.size(); ++j) { + PyList_SET_ITEM(obj, j, PyInt_FromLong(static_cast($1[i].first[j]))); + } + PyList_SET_ITEM($result, i, PyTuple_Pack(2, obj, PyFloat_FromDouble(static_cast($1[i].second)))); + } +} + +%typemap(out) std::vector { + $result = PyList_New($1.size()); + for (size_t i = 0; i < $1.size(); ++i) { + PyObject *obj = SWIG_NewPointerObj(new sentencepiece::ImmutableSentencePieceText($1.at(i)), SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, SWIG_POINTER_OWN | 0); + PyList_SET_ITEM($result, i, obj); + } +} + %typemap(in) sentencepiece::SentenceIterator * { sentencepiece::SentenceIterator *out = nullptr; if (PyIter_Check($input)) { @@ -662,6 +1673,10 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { delete $1; } +%typemap(freearg) const std::vector& { + delete $1; +} + %typemap(freearg) const std::vector>& { delete $1; } @@ -670,6 +1685,10 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { delete $1; } +%typemap(freearg) const std::vector& { + delete $1; +} + %typemap(freearg) const std::vector>& { delete $1; } @@ -682,6 +1701,18 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { delete $1; } +%typemap(freearg) sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece { + delete $1; +} + +%typemap(freearg) sentencepiece::ImmutableSentencePieceText { + delete $1; +} + +%typemap(freearg) sentencepiece::ImmutableNBestSentencePieceText { + delete $1; +} + %include %include @@ -690,6 +1721,7 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { import re import csv import sys +import os from io import StringIO from io import BytesIO @@ -729,8 +1761,6 @@ setattr(SentencePieceProcessor, '__init__', SentencePieceProcessor.Init) SentencePieceProcessor.Tokenize = SentencePieceProcessor.Encode SentencePieceProcessor.Detokenize = SentencePieceProcessor.Decode -SentencePieceProcessor.DecodeIds = SentencePieceProcessor.DecodeIdsWithCheck -SentencePieceProcessor.DecodeIdsAsSerializedProto = SentencePieceProcessor.DecodeIdsAsSerializedProtoWithCheck for m in [ 'PieceToId', 'IdToPiece', 'GetScore', 'IsUnknown', 'IsControl', 'IsUnused', @@ -741,4 +1771,24 @@ for m in [ _add_snake_case(SentencePieceProcessor) _add_snake_case(SentencePieceTrainer) set_random_generator_seed = SetRandomGeneratorSeed + +from ._version import __version__ + +class _LogStream(object): + def __init__(self, ostream=None): + self.ostream = ostream + if self.ostream is not None: + self.orig_stream_fileno = sys.stderr.fileno() + + def __enter__(self): + if self.ostream is not None: + self.orig_stream_dup = os.dup(self.orig_stream_fileno) + os.dup2(self.ostream.fileno(), self.orig_stream_fileno) + + def __exit__(self, type, value, traceback): + if self.ostream is not None: + os.close(self.orig_stream_fileno) + os.dup2(self.orig_stream_dup, self.orig_stream_fileno) + os.close(self.orig_stream_dup) + self.ostream.close() %} diff --git a/python/src/sentencepiece/sentencepiece_model_pb2.py b/python/src/sentencepiece/sentencepiece_model_pb2.py index 084b8964..b07107d6 100644 --- a/python/src/sentencepiece/sentencepiece_model_pb2.py +++ b/python/src/sentencepiece/sentencepiece_model_pb2.py @@ -1,13 +1,11 @@ +# -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: sentencepiece_model.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +"""Generated protocol buffer code.""" +from google.protobuf.internal import builder as _builder from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection +from google.protobuf import descriptor_pool as _descriptor_pool from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() @@ -15,646 +13,32 @@ -DESCRIPTOR = _descriptor.FileDescriptor( - name='sentencepiece_model.proto', - package='sentencepiece', - syntax='proto2', - serialized_pb=_b('\n\x19sentencepiece_model.proto\x12\rsentencepiece\"\xf4\x08\n\x0bTrainerSpec\x12\r\n\x05input\x18\x01 \x03(\t\x12\x14\n\x0cinput_format\x18\x07 \x01(\t\x12\x14\n\x0cmodel_prefix\x18\x02 \x01(\t\x12\x41\n\nmodel_type\x18\x03 \x01(\x0e\x32$.sentencepiece.TrainerSpec.ModelType:\x07UNIGRAM\x12\x18\n\nvocab_size\x18\x04 \x01(\x05:\x04\x38\x30\x30\x30\x12\x17\n\x0f\x61\x63\x63\x65pt_language\x18\x05 \x03(\t\x12 \n\x15self_test_sample_size\x18\x06 \x01(\x05:\x01\x30\x12\"\n\x12\x63haracter_coverage\x18\n \x01(\x02:\x06\x30.9995\x12\x1e\n\x13input_sentence_size\x18\x0b \x01(\x05:\x01\x30\x12$\n\x16shuffle_input_sentence\x18\x13 \x01(\x08:\x04true\x12 \n\x14mining_sentence_size\x18\x0c \x01(\x05\x42\x02\x18\x01\x12\"\n\x16training_sentence_size\x18\r \x01(\x05\x42\x02\x18\x01\x12(\n\x17seed_sentencepiece_size\x18\x0e \x01(\x05:\x07\x31\x30\x30\x30\x30\x30\x30\x12\x1e\n\x10shrinking_factor\x18\x0f \x01(\x02:\x04\x30.75\x12!\n\x13max_sentence_length\x18\x12 \x01(\x05:\x04\x34\x31\x39\x32\x12\x17\n\x0bnum_threads\x18\x10 \x01(\x05:\x02\x31\x36\x12\x1d\n\x12num_sub_iterations\x18\x11 \x01(\x05:\x01\x32\x12$\n\x18max_sentencepiece_length\x18\x14 \x01(\x05:\x02\x31\x36\x12%\n\x17split_by_unicode_script\x18\x15 \x01(\x08:\x04true\x12\x1d\n\x0fsplit_by_number\x18\x17 \x01(\x08:\x04true\x12!\n\x13split_by_whitespace\x18\x16 \x01(\x08:\x04true\x12)\n\x1atreat_whitespace_as_suffix\x18\x18 \x01(\x08:\x05\x66\x61lse\x12\x17\n\x0f\x63ontrol_symbols\x18\x1e \x03(\t\x12\x1c\n\x14user_defined_symbols\x18\x1f \x03(\t\x12\x1e\n\x10hard_vocab_limit\x18! \x01(\x08:\x04true\x12\x1c\n\ruse_all_vocab\x18\" \x01(\x08:\x05\x66\x61lse\x12\x11\n\x06unk_id\x18( \x01(\x05:\x01\x30\x12\x11\n\x06\x62os_id\x18) \x01(\x05:\x01\x31\x12\x11\n\x06\x65os_id\x18* \x01(\x05:\x01\x32\x12\x12\n\x06pad_id\x18+ \x01(\x05:\x02-1\x12\x18\n\tunk_piece\x18- \x01(\t:\x05\x12\x16\n\tbos_piece\x18. \x01(\t:\x03\x12\x17\n\teos_piece\x18/ \x01(\t:\x04\x12\x18\n\tpad_piece\x18\x30 \x01(\t:\x05\x12\x1a\n\x0bunk_surface\x18, \x01(\t:\x05 \xe2\x81\x87 \"5\n\tModelType\x12\x0b\n\x07UNIGRAM\x10\x01\x12\x07\n\x03\x42PE\x10\x02\x12\x08\n\x04WORD\x10\x03\x12\x08\n\x04\x43HAR\x10\x04*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02\"\xd1\x01\n\x0eNormalizerSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x1c\n\x14precompiled_charsmap\x18\x02 \x01(\x0c\x12\x1e\n\x10\x61\x64\x64_dummy_prefix\x18\x03 \x01(\x08:\x04true\x12&\n\x18remove_extra_whitespaces\x18\x04 \x01(\x08:\x04true\x12 \n\x12\x65scape_whitespaces\x18\x05 \x01(\x08:\x04true\x12\x1e\n\x16normalization_rule_tsv\x18\x06 \x01(\t*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02\"y\n\x0cSelfTestData\x12\x33\n\x07samples\x18\x01 \x03(\x0b\x32\".sentencepiece.SelfTestData.Sample\x1a)\n\x06Sample\x12\r\n\x05input\x18\x01 \x01(\t\x12\x10\n\x08\x65xpected\x18\x02 \x01(\t*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02\"\xba\x03\n\nModelProto\x12\x37\n\x06pieces\x18\x01 \x03(\x0b\x32\'.sentencepiece.ModelProto.SentencePiece\x12\x30\n\x0ctrainer_spec\x18\x02 \x01(\x0b\x32\x1a.sentencepiece.TrainerSpec\x12\x36\n\x0fnormalizer_spec\x18\x03 \x01(\x0b\x32\x1d.sentencepiece.NormalizerSpec\x12\x33\n\x0eself_test_data\x18\x04 \x01(\x0b\x32\x1b.sentencepiece.SelfTestData\x1a\xc8\x01\n\rSentencePiece\x12\r\n\x05piece\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\x42\n\x04type\x18\x03 \x01(\x0e\x32,.sentencepiece.ModelProto.SentencePiece.Type:\x06NORMAL\"J\n\x04Type\x12\n\n\x06NORMAL\x10\x01\x12\x0b\n\x07UNKNOWN\x10\x02\x12\x0b\n\x07\x43ONTROL\x10\x03\x12\x10\n\x0cUSER_DEFINED\x10\x04\x12\n\n\x06UNUSED\x10\x05*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02\x42\x02H\x03') -) -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - - - -_TRAINERSPEC_MODELTYPE = _descriptor.EnumDescriptor( - name='ModelType', - full_name='sentencepiece.TrainerSpec.ModelType', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='UNIGRAM', index=0, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='BPE', index=1, number=2, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='WORD', index=2, number=3, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='CHAR', index=3, number=4, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=1121, - serialized_end=1174, -) -_sym_db.RegisterEnumDescriptor(_TRAINERSPEC_MODELTYPE) - -_MODELPROTO_SENTENCEPIECE_TYPE = _descriptor.EnumDescriptor( - name='Type', - full_name='sentencepiece.ModelProto.SentencePiece.Type', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='NORMAL', index=0, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='UNKNOWN', index=1, number=2, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='CONTROL', index=2, number=3, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='USER_DEFINED', index=3, number=4, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='UNUSED', index=4, number=5, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=1869, - serialized_end=1943, -) -_sym_db.RegisterEnumDescriptor(_MODELPROTO_SENTENCEPIECE_TYPE) - - -_TRAINERSPEC = _descriptor.Descriptor( - name='TrainerSpec', - full_name='sentencepiece.TrainerSpec', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='input', full_name='sentencepiece.TrainerSpec.input', index=0, - number=1, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='input_format', full_name='sentencepiece.TrainerSpec.input_format', index=1, - number=7, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='model_prefix', full_name='sentencepiece.TrainerSpec.model_prefix', index=2, - number=2, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='model_type', full_name='sentencepiece.TrainerSpec.model_type', index=3, - number=3, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='vocab_size', full_name='sentencepiece.TrainerSpec.vocab_size', index=4, - number=4, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=8000, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='accept_language', full_name='sentencepiece.TrainerSpec.accept_language', index=5, - number=5, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='self_test_sample_size', full_name='sentencepiece.TrainerSpec.self_test_sample_size', index=6, - number=6, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='character_coverage', full_name='sentencepiece.TrainerSpec.character_coverage', index=7, - number=10, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.9995), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='input_sentence_size', full_name='sentencepiece.TrainerSpec.input_sentence_size', index=8, - number=11, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='shuffle_input_sentence', full_name='sentencepiece.TrainerSpec.shuffle_input_sentence', index=9, - number=19, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='mining_sentence_size', full_name='sentencepiece.TrainerSpec.mining_sentence_size', index=10, - number=12, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\030\001'))), - _descriptor.FieldDescriptor( - name='training_sentence_size', full_name='sentencepiece.TrainerSpec.training_sentence_size', index=11, - number=13, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\030\001'))), - _descriptor.FieldDescriptor( - name='seed_sentencepiece_size', full_name='sentencepiece.TrainerSpec.seed_sentencepiece_size', index=12, - number=14, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1000000, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='shrinking_factor', full_name='sentencepiece.TrainerSpec.shrinking_factor', index=13, - number=15, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.75), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='max_sentence_length', full_name='sentencepiece.TrainerSpec.max_sentence_length', index=14, - number=18, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=4192, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='num_threads', full_name='sentencepiece.TrainerSpec.num_threads', index=15, - number=16, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=16, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='num_sub_iterations', full_name='sentencepiece.TrainerSpec.num_sub_iterations', index=16, - number=17, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=2, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='max_sentencepiece_length', full_name='sentencepiece.TrainerSpec.max_sentencepiece_length', index=17, - number=20, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=16, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='split_by_unicode_script', full_name='sentencepiece.TrainerSpec.split_by_unicode_script', index=18, - number=21, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='split_by_number', full_name='sentencepiece.TrainerSpec.split_by_number', index=19, - number=23, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='split_by_whitespace', full_name='sentencepiece.TrainerSpec.split_by_whitespace', index=20, - number=22, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='treat_whitespace_as_suffix', full_name='sentencepiece.TrainerSpec.treat_whitespace_as_suffix', index=21, - number=24, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='control_symbols', full_name='sentencepiece.TrainerSpec.control_symbols', index=22, - number=30, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='user_defined_symbols', full_name='sentencepiece.TrainerSpec.user_defined_symbols', index=23, - number=31, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='hard_vocab_limit', full_name='sentencepiece.TrainerSpec.hard_vocab_limit', index=24, - number=33, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='use_all_vocab', full_name='sentencepiece.TrainerSpec.use_all_vocab', index=25, - number=34, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='unk_id', full_name='sentencepiece.TrainerSpec.unk_id', index=26, - number=40, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='bos_id', full_name='sentencepiece.TrainerSpec.bos_id', index=27, - number=41, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='eos_id', full_name='sentencepiece.TrainerSpec.eos_id', index=28, - number=42, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=2, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='pad_id', full_name='sentencepiece.TrainerSpec.pad_id', index=29, - number=43, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=-1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='unk_piece', full_name='sentencepiece.TrainerSpec.unk_piece', index=30, - number=45, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='bos_piece', full_name='sentencepiece.TrainerSpec.bos_piece', index=31, - number=46, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='eos_piece', full_name='sentencepiece.TrainerSpec.eos_piece', index=32, - number=47, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='pad_piece', full_name='sentencepiece.TrainerSpec.pad_piece', index=33, - number=48, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='unk_surface', full_name='sentencepiece.TrainerSpec.unk_surface', index=34, - number=44, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b(" \342\201\207 ").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _TRAINERSPEC_MODELTYPE, - ], - options=None, - is_extendable=True, - syntax='proto2', - extension_ranges=[(200, 536870912), ], - oneofs=[ - ], - serialized_start=45, - serialized_end=1185, -) - - -_NORMALIZERSPEC = _descriptor.Descriptor( - name='NormalizerSpec', - full_name='sentencepiece.NormalizerSpec', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='name', full_name='sentencepiece.NormalizerSpec.name', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='precompiled_charsmap', full_name='sentencepiece.NormalizerSpec.precompiled_charsmap', index=1, - number=2, type=12, cpp_type=9, label=1, - has_default_value=False, default_value=_b(""), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='add_dummy_prefix', full_name='sentencepiece.NormalizerSpec.add_dummy_prefix', index=2, - number=3, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='remove_extra_whitespaces', full_name='sentencepiece.NormalizerSpec.remove_extra_whitespaces', index=3, - number=4, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='escape_whitespaces', full_name='sentencepiece.NormalizerSpec.escape_whitespaces', index=4, - number=5, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='normalization_rule_tsv', full_name='sentencepiece.NormalizerSpec.normalization_rule_tsv', index=5, - number=6, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=True, - syntax='proto2', - extension_ranges=[(200, 536870912), ], - oneofs=[ - ], - serialized_start=1188, - serialized_end=1397, -) - - -_SELFTESTDATA_SAMPLE = _descriptor.Descriptor( - name='Sample', - full_name='sentencepiece.SelfTestData.Sample', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='input', full_name='sentencepiece.SelfTestData.Sample.input', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='expected', full_name='sentencepiece.SelfTestData.Sample.expected', index=1, - number=2, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=1468, - serialized_end=1509, -) - -_SELFTESTDATA = _descriptor.Descriptor( - name='SelfTestData', - full_name='sentencepiece.SelfTestData', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='samples', full_name='sentencepiece.SelfTestData.samples', index=0, - number=1, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[_SELFTESTDATA_SAMPLE, ], - enum_types=[ - ], - options=None, - is_extendable=True, - syntax='proto2', - extension_ranges=[(200, 536870912), ], - oneofs=[ - ], - serialized_start=1399, - serialized_end=1520, -) - - -_MODELPROTO_SENTENCEPIECE = _descriptor.Descriptor( - name='SentencePiece', - full_name='sentencepiece.ModelProto.SentencePiece', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='piece', full_name='sentencepiece.ModelProto.SentencePiece.piece', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='score', full_name='sentencepiece.ModelProto.SentencePiece.score', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='type', full_name='sentencepiece.ModelProto.SentencePiece.type', index=2, - number=3, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _MODELPROTO_SENTENCEPIECE_TYPE, - ], - options=None, - is_extendable=True, - syntax='proto2', - extension_ranges=[(200, 536870912), ], - oneofs=[ - ], - serialized_start=1754, - serialized_end=1954, -) - -_MODELPROTO = _descriptor.Descriptor( - name='ModelProto', - full_name='sentencepiece.ModelProto', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='pieces', full_name='sentencepiece.ModelProto.pieces', index=0, - number=1, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='trainer_spec', full_name='sentencepiece.ModelProto.trainer_spec', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='normalizer_spec', full_name='sentencepiece.ModelProto.normalizer_spec', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='self_test_data', full_name='sentencepiece.ModelProto.self_test_data', index=3, - number=4, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[_MODELPROTO_SENTENCEPIECE, ], - enum_types=[ - ], - options=None, - is_extendable=True, - syntax='proto2', - extension_ranges=[(200, 536870912), ], - oneofs=[ - ], - serialized_start=1523, - serialized_end=1965, -) - -_TRAINERSPEC.fields_by_name['model_type'].enum_type = _TRAINERSPEC_MODELTYPE -_TRAINERSPEC_MODELTYPE.containing_type = _TRAINERSPEC -_SELFTESTDATA_SAMPLE.containing_type = _SELFTESTDATA -_SELFTESTDATA.fields_by_name['samples'].message_type = _SELFTESTDATA_SAMPLE -_MODELPROTO_SENTENCEPIECE.fields_by_name['type'].enum_type = _MODELPROTO_SENTENCEPIECE_TYPE -_MODELPROTO_SENTENCEPIECE.containing_type = _MODELPROTO -_MODELPROTO_SENTENCEPIECE_TYPE.containing_type = _MODELPROTO_SENTENCEPIECE -_MODELPROTO.fields_by_name['pieces'].message_type = _MODELPROTO_SENTENCEPIECE -_MODELPROTO.fields_by_name['trainer_spec'].message_type = _TRAINERSPEC -_MODELPROTO.fields_by_name['normalizer_spec'].message_type = _NORMALIZERSPEC -_MODELPROTO.fields_by_name['self_test_data'].message_type = _SELFTESTDATA -DESCRIPTOR.message_types_by_name['TrainerSpec'] = _TRAINERSPEC -DESCRIPTOR.message_types_by_name['NormalizerSpec'] = _NORMALIZERSPEC -DESCRIPTOR.message_types_by_name['SelfTestData'] = _SELFTESTDATA -DESCRIPTOR.message_types_by_name['ModelProto'] = _MODELPROTO - -TrainerSpec = _reflection.GeneratedProtocolMessageType('TrainerSpec', (_message.Message,), dict( - DESCRIPTOR = _TRAINERSPEC, - __module__ = 'sentencepiece_model_pb2' - # @@protoc_insertion_point(class_scope:sentencepiece.TrainerSpec) - )) -_sym_db.RegisterMessage(TrainerSpec) - -NormalizerSpec = _reflection.GeneratedProtocolMessageType('NormalizerSpec', (_message.Message,), dict( - DESCRIPTOR = _NORMALIZERSPEC, - __module__ = 'sentencepiece_model_pb2' - # @@protoc_insertion_point(class_scope:sentencepiece.NormalizerSpec) - )) -_sym_db.RegisterMessage(NormalizerSpec) - -SelfTestData = _reflection.GeneratedProtocolMessageType('SelfTestData', (_message.Message,), dict( - - Sample = _reflection.GeneratedProtocolMessageType('Sample', (_message.Message,), dict( - DESCRIPTOR = _SELFTESTDATA_SAMPLE, - __module__ = 'sentencepiece_model_pb2' - # @@protoc_insertion_point(class_scope:sentencepiece.SelfTestData.Sample) - )) - , - DESCRIPTOR = _SELFTESTDATA, - __module__ = 'sentencepiece_model_pb2' - # @@protoc_insertion_point(class_scope:sentencepiece.SelfTestData) - )) -_sym_db.RegisterMessage(SelfTestData) -_sym_db.RegisterMessage(SelfTestData.Sample) - -ModelProto = _reflection.GeneratedProtocolMessageType('ModelProto', (_message.Message,), dict( - - SentencePiece = _reflection.GeneratedProtocolMessageType('SentencePiece', (_message.Message,), dict( - DESCRIPTOR = _MODELPROTO_SENTENCEPIECE, - __module__ = 'sentencepiece_model_pb2' - # @@protoc_insertion_point(class_scope:sentencepiece.ModelProto.SentencePiece) - )) - , - DESCRIPTOR = _MODELPROTO, - __module__ = 'sentencepiece_model_pb2' - # @@protoc_insertion_point(class_scope:sentencepiece.ModelProto) - )) -_sym_db.RegisterMessage(ModelProto) -_sym_db.RegisterMessage(ModelProto.SentencePiece) - - -DESCRIPTOR.has_options = True -DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('H\003')) -_TRAINERSPEC.fields_by_name['mining_sentence_size'].has_options = True -_TRAINERSPEC.fields_by_name['mining_sentence_size']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\030\001')) -_TRAINERSPEC.fields_by_name['training_sentence_size'].has_options = True -_TRAINERSPEC.fields_by_name['training_sentence_size']._options = _descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\030\001')) +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x19sentencepiece_model.proto\x12\rsentencepiece\"\x80\x0c\n\x0bTrainerSpec\x12\r\n\x05input\x18\x01 \x03(\t\x12\x14\n\x0cinput_format\x18\x07 \x01(\t\x12\x14\n\x0cmodel_prefix\x18\x02 \x01(\t\x12\x41\n\nmodel_type\x18\x03 \x01(\x0e\x32$.sentencepiece.TrainerSpec.ModelType:\x07UNIGRAM\x12\x18\n\nvocab_size\x18\x04 \x01(\x05:\x04\x38\x30\x30\x30\x12\x17\n\x0f\x61\x63\x63\x65pt_language\x18\x05 \x03(\t\x12 \n\x15self_test_sample_size\x18\x06 \x01(\x05:\x01\x30\x12*\n\x1b\x65nable_differential_privacy\x18\x32 \x01(\x08:\x05\x66\x61lse\x12+\n differential_privacy_noise_level\x18\x33 \x01(\x02:\x01\x30\x12\x32\n\'differential_privacy_clipping_threshold\x18\x34 \x01(\x04:\x01\x30\x12\"\n\x12\x63haracter_coverage\x18\n \x01(\x02:\x06\x30.9995\x12\x1e\n\x13input_sentence_size\x18\x0b \x01(\x04:\x01\x30\x12$\n\x16shuffle_input_sentence\x18\x13 \x01(\x08:\x04true\x12 \n\x14mining_sentence_size\x18\x0c \x01(\x05\x42\x02\x18\x01\x12\"\n\x16training_sentence_size\x18\r \x01(\x05\x42\x02\x18\x01\x12(\n\x17seed_sentencepiece_size\x18\x0e \x01(\x05:\x07\x31\x30\x30\x30\x30\x30\x30\x12\x1e\n\x10shrinking_factor\x18\x0f \x01(\x02:\x04\x30.75\x12!\n\x13max_sentence_length\x18\x12 \x01(\x05:\x04\x34\x31\x39\x32\x12\x17\n\x0bnum_threads\x18\x10 \x01(\x05:\x02\x31\x36\x12\x1d\n\x12num_sub_iterations\x18\x11 \x01(\x05:\x01\x32\x12$\n\x18max_sentencepiece_length\x18\x14 \x01(\x05:\x02\x31\x36\x12%\n\x17split_by_unicode_script\x18\x15 \x01(\x08:\x04true\x12\x1d\n\x0fsplit_by_number\x18\x17 \x01(\x08:\x04true\x12!\n\x13split_by_whitespace\x18\x16 \x01(\x08:\x04true\x12)\n\x1atreat_whitespace_as_suffix\x18\x18 \x01(\x08:\x05\x66\x61lse\x12+\n\x1c\x61llow_whitespace_only_pieces\x18\x1a \x01(\x08:\x05\x66\x61lse\x12\x1b\n\x0csplit_digits\x18\x19 \x01(\x08:\x05\x66\x61lse\x12#\n\x19pretokenization_delimiter\x18\x35 \x01(\t:\x00\x12\x17\n\x0f\x63ontrol_symbols\x18\x1e \x03(\t\x12\x1c\n\x14user_defined_symbols\x18\x1f \x03(\t\x12\x16\n\x0erequired_chars\x18$ \x01(\t\x12\x1c\n\rbyte_fallback\x18# \x01(\x08:\x05\x66\x61lse\x12+\n\x1dvocabulary_output_piece_score\x18 \x01(\x08:\x04true\x12\x1e\n\x10hard_vocab_limit\x18! \x01(\x08:\x04true\x12\x1c\n\ruse_all_vocab\x18\" \x01(\x08:\x05\x66\x61lse\x12\x11\n\x06unk_id\x18( \x01(\x05:\x01\x30\x12\x11\n\x06\x62os_id\x18) \x01(\x05:\x01\x31\x12\x11\n\x06\x65os_id\x18* \x01(\x05:\x01\x32\x12\x12\n\x06pad_id\x18+ \x01(\x05:\x02-1\x12\x18\n\tunk_piece\x18- \x01(\t:\x05\x12\x16\n\tbos_piece\x18. \x01(\t:\x03\x12\x17\n\teos_piece\x18/ \x01(\t:\x04\x12\x18\n\tpad_piece\x18\x30 \x01(\t:\x05\x12\x1a\n\x0bunk_surface\x18, \x01(\t:\x05 \xe2\x81\x87 \x12+\n\x1ctrain_extremely_large_corpus\x18\x31 \x01(\x08:\x05\x66\x61lse\"5\n\tModelType\x12\x0b\n\x07UNIGRAM\x10\x01\x12\x07\n\x03\x42PE\x10\x02\x12\x08\n\x04WORD\x10\x03\x12\x08\n\x04\x43HAR\x10\x04*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02\"\xd1\x01\n\x0eNormalizerSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x1c\n\x14precompiled_charsmap\x18\x02 \x01(\x0c\x12\x1e\n\x10\x61\x64\x64_dummy_prefix\x18\x03 \x01(\x08:\x04true\x12&\n\x18remove_extra_whitespaces\x18\x04 \x01(\x08:\x04true\x12 \n\x12\x65scape_whitespaces\x18\x05 \x01(\x08:\x04true\x12\x1e\n\x16normalization_rule_tsv\x18\x06 \x01(\t*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02\"y\n\x0cSelfTestData\x12\x33\n\x07samples\x18\x01 \x03(\x0b\x32\".sentencepiece.SelfTestData.Sample\x1a)\n\x06Sample\x12\r\n\x05input\x18\x01 \x01(\t\x12\x10\n\x08\x65xpected\x18\x02 \x01(\t*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02\"\xfe\x03\n\nModelProto\x12\x37\n\x06pieces\x18\x01 \x03(\x0b\x32\'.sentencepiece.ModelProto.SentencePiece\x12\x30\n\x0ctrainer_spec\x18\x02 \x01(\x0b\x32\x1a.sentencepiece.TrainerSpec\x12\x36\n\x0fnormalizer_spec\x18\x03 \x01(\x0b\x32\x1d.sentencepiece.NormalizerSpec\x12\x33\n\x0eself_test_data\x18\x04 \x01(\x0b\x32\x1b.sentencepiece.SelfTestData\x12\x38\n\x11\x64\x65normalizer_spec\x18\x05 \x01(\x0b\x32\x1d.sentencepiece.NormalizerSpec\x1a\xd2\x01\n\rSentencePiece\x12\r\n\x05piece\x18\x01 \x01(\t\x12\r\n\x05score\x18\x02 \x01(\x02\x12\x42\n\x04type\x18\x03 \x01(\x0e\x32,.sentencepiece.ModelProto.SentencePiece.Type:\x06NORMAL\"T\n\x04Type\x12\n\n\x06NORMAL\x10\x01\x12\x0b\n\x07UNKNOWN\x10\x02\x12\x0b\n\x07\x43ONTROL\x10\x03\x12\x10\n\x0cUSER_DEFINED\x10\x04\x12\x08\n\x04\x42YTE\x10\x06\x12\n\n\x06UNUSED\x10\x05*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02\x42\x02H\x03') + +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'sentencepiece_model_pb2', globals()) +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'H\003' + _TRAINERSPEC.fields_by_name['mining_sentence_size']._options = None + _TRAINERSPEC.fields_by_name['mining_sentence_size']._serialized_options = b'\030\001' + _TRAINERSPEC.fields_by_name['training_sentence_size']._options = None + _TRAINERSPEC.fields_by_name['training_sentence_size']._serialized_options = b'\030\001' + _TRAINERSPEC._serialized_start=45 + _TRAINERSPEC._serialized_end=1581 + _TRAINERSPEC_MODELTYPE._serialized_start=1517 + _TRAINERSPEC_MODELTYPE._serialized_end=1570 + _NORMALIZERSPEC._serialized_start=1584 + _NORMALIZERSPEC._serialized_end=1793 + _SELFTESTDATA._serialized_start=1795 + _SELFTESTDATA._serialized_end=1916 + _SELFTESTDATA_SAMPLE._serialized_start=1864 + _SELFTESTDATA_SAMPLE._serialized_end=1905 + _MODELPROTO._serialized_start=1919 + _MODELPROTO._serialized_end=2429 + _MODELPROTO_SENTENCEPIECE._serialized_start=2208 + _MODELPROTO_SENTENCEPIECE._serialized_end=2418 + _MODELPROTO_SENTENCEPIECE_TYPE._serialized_start=2323 + _MODELPROTO_SENTENCEPIECE_TYPE._serialized_end=2407 # @@protoc_insertion_point(module_scope) diff --git a/python/src/sentencepiece/sentencepiece_pb2.py b/python/src/sentencepiece/sentencepiece_pb2.py index 8347974d..840cfd21 100644 --- a/python/src/sentencepiece/sentencepiece_pb2.py +++ b/python/src/sentencepiece/sentencepiece_pb2.py @@ -1,13 +1,11 @@ +# -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: sentencepiece.proto - -import sys -_b=sys.version_info[0]<3 and (lambda x:x) or (lambda x:x.encode('latin1')) +"""Generated protocol buffer code.""" +from google.protobuf.internal import builder as _builder from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from google.protobuf import reflection as _reflection +from google.protobuf import descriptor_pool as _descriptor_pool from google.protobuf import symbol_database as _symbol_database -from google.protobuf import descriptor_pb2 # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() @@ -15,179 +13,18 @@ -DESCRIPTOR = _descriptor.FileDescriptor( - name='sentencepiece.proto', - package='sentencepiece', - syntax='proto2', - serialized_pb=_b('\n\x13sentencepiece.proto\x12\rsentencepiece\"\xdf\x01\n\x11SentencePieceText\x12\x0c\n\x04text\x18\x01 \x01(\t\x12>\n\x06pieces\x18\x02 \x03(\x0b\x32..sentencepiece.SentencePieceText.SentencePiece\x12\r\n\x05score\x18\x03 \x01(\x02\x1a\x62\n\rSentencePiece\x12\r\n\x05piece\x18\x01 \x01(\t\x12\n\n\x02id\x18\x02 \x01(\r\x12\x0f\n\x07surface\x18\x03 \x01(\t\x12\r\n\x05\x62\x65gin\x18\x04 \x01(\r\x12\x0b\n\x03\x65nd\x18\x05 \x01(\r*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02\"J\n\x16NBestSentencePieceText\x12\x30\n\x06nbests\x18\x01 \x03(\x0b\x32 .sentencepiece.SentencePieceTextB\x02H\x03') -) -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - - - - -_SENTENCEPIECETEXT_SENTENCEPIECE = _descriptor.Descriptor( - name='SentencePiece', - full_name='sentencepiece.SentencePieceText.SentencePiece', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='piece', full_name='sentencepiece.SentencePieceText.SentencePiece.piece', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='id', full_name='sentencepiece.SentencePieceText.SentencePiece.id', index=1, - number=2, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='surface', full_name='sentencepiece.SentencePieceText.SentencePiece.surface', index=2, - number=3, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='begin', full_name='sentencepiece.SentencePieceText.SentencePiece.begin', index=3, - number=4, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='end', full_name='sentencepiece.SentencePieceText.SentencePiece.end', index=4, - number=5, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=True, - syntax='proto2', - extension_ranges=[(200, 536870912), ], - oneofs=[ - ], - serialized_start=153, - serialized_end=251, -) - -_SENTENCEPIECETEXT = _descriptor.Descriptor( - name='SentencePieceText', - full_name='sentencepiece.SentencePieceText', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='text', full_name='sentencepiece.SentencePieceText.text', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='pieces', full_name='sentencepiece.SentencePieceText.pieces', index=1, - number=2, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='score', full_name='sentencepiece.SentencePieceText.score', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[_SENTENCEPIECETEXT_SENTENCEPIECE, ], - enum_types=[ - ], - options=None, - is_extendable=True, - syntax='proto2', - extension_ranges=[(200, 536870912), ], - oneofs=[ - ], - serialized_start=39, - serialized_end=262, -) - - -_NBESTSENTENCEPIECETEXT = _descriptor.Descriptor( - name='NBestSentencePieceText', - full_name='sentencepiece.NBestSentencePieceText', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='nbests', full_name='sentencepiece.NBestSentencePieceText.nbests', index=0, - number=1, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=264, - serialized_end=338, -) - -_SENTENCEPIECETEXT_SENTENCEPIECE.containing_type = _SENTENCEPIECETEXT -_SENTENCEPIECETEXT.fields_by_name['pieces'].message_type = _SENTENCEPIECETEXT_SENTENCEPIECE -_NBESTSENTENCEPIECETEXT.fields_by_name['nbests'].message_type = _SENTENCEPIECETEXT -DESCRIPTOR.message_types_by_name['SentencePieceText'] = _SENTENCEPIECETEXT -DESCRIPTOR.message_types_by_name['NBestSentencePieceText'] = _NBESTSENTENCEPIECETEXT - -SentencePieceText = _reflection.GeneratedProtocolMessageType('SentencePieceText', (_message.Message,), dict( - - SentencePiece = _reflection.GeneratedProtocolMessageType('SentencePiece', (_message.Message,), dict( - DESCRIPTOR = _SENTENCEPIECETEXT_SENTENCEPIECE, - __module__ = 'sentencepiece_pb2' - # @@protoc_insertion_point(class_scope:sentencepiece.SentencePieceText.SentencePiece) - )) - , - DESCRIPTOR = _SENTENCEPIECETEXT, - __module__ = 'sentencepiece_pb2' - # @@protoc_insertion_point(class_scope:sentencepiece.SentencePieceText) - )) -_sym_db.RegisterMessage(SentencePieceText) -_sym_db.RegisterMessage(SentencePieceText.SentencePiece) - -NBestSentencePieceText = _reflection.GeneratedProtocolMessageType('NBestSentencePieceText', (_message.Message,), dict( - DESCRIPTOR = _NBESTSENTENCEPIECETEXT, - __module__ = 'sentencepiece_pb2' - # @@protoc_insertion_point(class_scope:sentencepiece.NBestSentencePieceText) - )) -_sym_db.RegisterMessage(NBestSentencePieceText) +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x13sentencepiece.proto\x12\rsentencepiece\"\xdf\x01\n\x11SentencePieceText\x12\x0c\n\x04text\x18\x01 \x01(\t\x12>\n\x06pieces\x18\x02 \x03(\x0b\x32..sentencepiece.SentencePieceText.SentencePiece\x12\r\n\x05score\x18\x03 \x01(\x02\x1a\x62\n\rSentencePiece\x12\r\n\x05piece\x18\x01 \x01(\t\x12\n\n\x02id\x18\x02 \x01(\r\x12\x0f\n\x07surface\x18\x03 \x01(\t\x12\r\n\x05\x62\x65gin\x18\x04 \x01(\r\x12\x0b\n\x03\x65nd\x18\x05 \x01(\r*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02*\t\x08\xc8\x01\x10\x80\x80\x80\x80\x02\"J\n\x16NBestSentencePieceText\x12\x30\n\x06nbests\x18\x01 \x03(\x0b\x32 .sentencepiece.SentencePieceTextB\x02H\x03') +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'sentencepiece_pb2', globals()) +if _descriptor._USE_C_DESCRIPTORS == False: -DESCRIPTOR.has_options = True -DESCRIPTOR._options = _descriptor._ParseOptions(descriptor_pb2.FileOptions(), _b('H\003')) + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'H\003' + _SENTENCEPIECETEXT._serialized_start=39 + _SENTENCEPIECETEXT._serialized_end=262 + _SENTENCEPIECETEXT_SENTENCEPIECE._serialized_start=153 + _SENTENCEPIECETEXT_SENTENCEPIECE._serialized_end=251 + _NBESTSENTENCEPIECETEXT._serialized_start=264 + _NBESTSENTENCEPIECETEXT._serialized_end=338 # @@protoc_insertion_point(module_scope) diff --git a/python/src/sentencepiece/sentencepiece_wrap.cxx b/python/src/sentencepiece/sentencepiece_wrap.cxx index a358b393..8e831d67 100644 --- a/python/src/sentencepiece/sentencepiece_wrap.cxx +++ b/python/src/sentencepiece/sentencepiece_wrap.cxx @@ -1,44 +1,16 @@ /* ---------------------------------------------------------------------------- - * This file was automatically generated by SWIG (http://www.swig.org). - * Version 4.0.1 + * This file was automatically generated by SWIG (https://www.swig.org). + * Version 4.1.0 * - * This file is not intended to be easily readable and contains a number of - * coding conventions designed to improve portability and efficiency. Do not make - * changes to this file unless you know what you are doing--modify the SWIG - * interface file instead. + * Do not make changes to this file unless you know what you are doing - modify + * the SWIG interface file instead. * ----------------------------------------------------------------------------- */ -#ifndef SWIGPYTHON +#define SWIG_VERSION 0x040100 #define SWIGPYTHON -#endif - #define SWIG_PYTHON_DIRECTOR_NO_VTABLE - -#ifdef __cplusplus -/* SwigValueWrapper is described in swig.swg */ -template class SwigValueWrapper { - struct SwigMovePointer { - T *ptr; - SwigMovePointer(T *p) : ptr(p) { } - ~SwigMovePointer() { delete ptr; } - SwigMovePointer& operator=(SwigMovePointer& rhs) { T* oldptr = ptr; ptr = 0; delete oldptr; ptr = rhs.ptr; rhs.ptr = 0; return *this; } - } pointer; - SwigValueWrapper& operator=(const SwigValueWrapper& rhs); - SwigValueWrapper(const SwigValueWrapper& rhs); -public: - SwigValueWrapper() : pointer(0) { } - SwigValueWrapper& operator=(const T& t) { SwigMovePointer tmp(new T(t)); pointer = tmp; return *this; } - operator T&() const { return *pointer.ptr; } - T *operator&() { return pointer.ptr; } -}; - -template T SwigValueInit() { - return T(); -} -#endif - /* ----------------------------------------------------------------------------- * This section contains generic SWIG labels for method/variable * declarations/attributes, and other compiler dependent labels. @@ -169,8 +141,26 @@ template T SwigValueInit() { # include #endif +#if !defined(PY_SSIZE_T_CLEAN) && !defined(SWIG_NO_PY_SSIZE_T_CLEAN) +#define PY_SSIZE_T_CLEAN +#endif + +#if __GNUC__ >= 7 +#pragma GCC diagnostic push +#if defined(__cplusplus) && __cplusplus >=201703L +#pragma GCC diagnostic ignored "-Wregister" /* For python-2.7 headers that use register */ +#endif +#endif + #if defined(_DEBUG) && defined(SWIG_PYTHON_INTERPRETER_NO_DEBUG) /* Use debug wrappers with the Python release dll */ + +#if defined(_MSC_VER) && _MSC_VER >= 1929 +/* Workaround compilation errors when redefining _DEBUG in MSVC 2019 version 16.10 and later + * See https://github.com/swig/swig/issues/2090 */ +# include +#endif + # undef _DEBUG # include # define _DEBUG 1 @@ -178,6 +168,10 @@ template T SwigValueInit() { # include #endif +#if __GNUC__ >= 7 +#pragma GCC diagnostic pop +#endif + /* ----------------------------------------------------------------------------- * swigrun.swg * @@ -224,6 +218,8 @@ template T SwigValueInit() { #define SWIG_POINTER_DISOWN 0x1 #define SWIG_CAST_NEW_MEMORY 0x2 #define SWIG_POINTER_NO_NULL 0x4 +#define SWIG_POINTER_CLEAR 0x8 +#define SWIG_POINTER_RELEASE (SWIG_POINTER_CLEAR | SWIG_POINTER_DISOWN) /* Flags for new pointer objects */ #define SWIG_POINTER_OWN 0x1 @@ -295,7 +291,7 @@ template T SwigValueInit() { SWIG errors code. Finally, if the SWIG_CASTRANK_MODE is enabled, the result code - allows to return the 'cast rank', for example, if you have this + allows returning the 'cast rank', for example, if you have this int food(double) int fooi(int); @@ -309,7 +305,13 @@ template T SwigValueInit() { */ #define SWIG_OK (0) +/* Runtime errors are < 0 */ #define SWIG_ERROR (-1) +/* Errors in range -1 to -99 are in swigerrors.swg (errors for all languages including those not using the runtime) */ +/* Errors in range -100 to -199 are language specific errors defined in *errors.swg */ +/* Errors < -200 are generic runtime specific errors */ +#define SWIG_ERROR_RELEASE_NOT_OWNED (-200) + #define SWIG_IsOK(r) (r >= 0) #define SWIG_ArgError(r) ((r != SWIG_ERROR) ? r : SWIG_TypeError) @@ -324,7 +326,7 @@ template T SwigValueInit() { #define SWIG_OLDOBJ (SWIG_OK) #define SWIG_NEWOBJ (SWIG_OK | SWIG_NEWOBJMASK) #define SWIG_TMPOBJ (SWIG_OK | SWIG_TMPOBJMASK) -/* Check, add and del mask methods */ +/* Check, add and del object mask methods */ #define SWIG_AddNewMask(r) (SWIG_IsOK(r) ? (r | SWIG_NEWOBJMASK) : r) #define SWIG_DelNewMask(r) (SWIG_IsOK(r) ? (r & ~SWIG_NEWOBJMASK) : r) #define SWIG_IsNewObj(r) (SWIG_IsOK(r) && (r & SWIG_NEWOBJMASK)) @@ -470,7 +472,7 @@ SWIG_TypeCheck(const char *c, swig_type_info *ty) { Identical to SWIG_TypeCheck, except strcmp is replaced with a pointer comparison */ SWIGRUNTIME swig_cast_info * -SWIG_TypeCheckStruct(swig_type_info *from, swig_type_info *ty) { +SWIG_TypeCheckStruct(const swig_type_info *from, swig_type_info *ty) { if (ty) { swig_cast_info *iter = ty->cast; while (iter) { @@ -530,9 +532,9 @@ SWIG_TypeName(const swig_type_info *ty) { SWIGRUNTIME const char * SWIG_TypePrettyName(const swig_type_info *type) { /* The "str" field contains the equivalent pretty names of the - type, separated by vertical-bar characters. We choose - to print the last name, as it is often (?) the most - specific. */ + type, separated by vertical-bar characters. Choose the last + name. It should be the most specific; a fully resolved name + but not necessarily with default template parameters expanded. */ if (!type) return NULL; if (type->str != NULL) { const char *last_name = type->str; @@ -752,7 +754,7 @@ SWIG_UnpackDataName(const char *c, void *ptr, size_t sz, const char *name) { } #endif -/* Errors in SWIG */ +/* SWIG Errors applicable to all language modules, values are reserved from -1 to -99 */ #define SWIG_UnknownError -1 #define SWIG_IOError -2 #define SWIG_RuntimeError -3 @@ -768,7 +770,6 @@ SWIG_UnpackDataName(const char *c, void *ptr, size_t sz, const char *name) { #define SWIG_NullReferenceError -13 - /* Compatibility macros for Python 3 */ #if PY_VERSION_HEX >= 0x03000000 @@ -784,7 +785,6 @@ SWIG_UnpackDataName(const char *c, void *ptr, size_t sz, const char *name) { #define PyString_Size(str) PyBytes_Size(str) #define PyString_InternFromString(key) PyUnicode_InternFromString(key) #define Py_TPFLAGS_HAVE_CLASS Py_TPFLAGS_BASETYPE -#define PyString_AS_STRING(x) PyUnicode_AS_STRING(x) #define _PyLong_FromSsize_t(x) PyLong_FromSsize_t(x) #endif @@ -802,34 +802,19 @@ SWIG_UnpackDataName(const char *c, void *ptr, size_t sz, const char *name) { #endif -/* Warning: This function will allocate a new string in Python 3, - * so please call SWIG_Python_str_DelForPy3(x) to free the space. - */ SWIGINTERN char* SWIG_Python_str_AsChar(PyObject *str) { -#if PY_VERSION_HEX >= 0x03000000 - char *newstr = 0; - str = PyUnicode_AsUTF8String(str); - if (str) { - char *cstr; - Py_ssize_t len; - PyBytes_AsStringAndSize(str, &cstr, &len); - newstr = (char *) malloc(len+1); - memcpy(newstr, cstr, len+1); - Py_XDECREF(str); - } - return newstr; +#if PY_VERSION_HEX >= 0x03030000 + return (char *)PyUnicode_AsUTF8(str); #else return PyString_AsString(str); #endif } -#if PY_VERSION_HEX >= 0x03000000 -# define SWIG_Python_str_DelForPy3(x) free( (void*) (x) ) -#else -# define SWIG_Python_str_DelForPy3(x) -#endif +/* Was useful for Python 3.0.x-3.2.x - now provided only for compatibility + * with any uses in user interface files. */ +#define SWIG_Python_str_DelForPy3(x) SWIGINTERN PyObject* @@ -846,10 +831,14 @@ SWIG_Python_str_FromChar(const char *c) # define PyObject_DEL PyObject_Del #endif -// SWIGPY_USE_CAPSULE is no longer used within SWIG itself, but some user -// interface files check for it. +/* SWIGPY_USE_CAPSULE is no longer used within SWIG itself, but some user interface files check for it. */ # define SWIGPY_USE_CAPSULE -# define SWIGPY_CAPSULE_NAME ("swig_runtime_data" SWIG_RUNTIME_VERSION ".type_pointer_capsule" SWIG_TYPE_TABLE_NAME) +#ifdef SWIGPYTHON_BUILTIN +# define SWIGPY_CAPSULE_ATTR_NAME "type_pointer_capsule_builtin" SWIG_TYPE_TABLE_NAME +#else +# define SWIGPY_CAPSULE_ATTR_NAME "type_pointer_capsule" SWIG_TYPE_TABLE_NAME +#endif +# define SWIGPY_CAPSULE_NAME ("swig_runtime_data" SWIG_RUNTIME_VERSION "." SWIGPY_CAPSULE_ATTR_NAME) #if PY_VERSION_HEX < 0x03020000 #define PyDescr_TYPE(x) (((PyDescrObject *)(x))->d_type) @@ -923,7 +912,6 @@ SWIG_Python_AddErrorMsg(const char* mesg) PyErr_Format(type, "%s %s", tmp, mesg); else PyErr_Format(type, "%s", mesg); - SWIG_Python_str_DelForPy3(tmp); Py_DECREF(old_str); Py_DECREF(value); } else { @@ -954,8 +942,12 @@ SWIG_Python_RaiseOrModifyTypeError(const char *message) #else newvalue = PyString_FromFormat("%s\nAdditional information:\n%s", PyString_AsString(value), message); #endif - Py_XDECREF(value); - PyErr_Restore(type, newvalue, traceback); + if (newvalue) { + Py_XDECREF(value); + PyErr_Restore(type, newvalue, traceback); + } else { + PyErr_Restore(type, value, traceback); + } } else { /* Raise TypeError using given message */ PyErr_SetString(PyExc_TypeError, message); @@ -972,8 +964,12 @@ SWIG_Python_RaiseOrModifyTypeError(const char *message) # define SWIG_PYTHON_USE_GIL # endif # if defined(SWIG_PYTHON_USE_GIL) /* Use PyGILState threads calls */ -# ifndef SWIG_PYTHON_INITIALIZE_THREADS -# define SWIG_PYTHON_INITIALIZE_THREADS PyEval_InitThreads() +# if !defined(SWIG_PYTHON_INITIALIZE_THREADS) +# if PY_VERSION_HEX < 0x03070000 +# define SWIG_PYTHON_INITIALIZE_THREADS PyEval_InitThreads() +# else +# define SWIG_PYTHON_INITIALIZE_THREADS +# endif # endif # ifdef __cplusplus /* C++ code */ class SWIG_Python_Thread_Block { @@ -1071,8 +1067,8 @@ typedef struct swig_const_info { # error "This version of SWIG only supports Python >= 2.7" #endif -#if PY_VERSION_HEX >= 0x03000000 && PY_VERSION_HEX < 0x03020000 -# error "This version of SWIG only supports Python 3 >= 3.2" +#if PY_VERSION_HEX >= 0x03000000 && PY_VERSION_HEX < 0x03030000 +# error "This version of SWIG only supports Python 3 >= 3.3" #endif /* Common SWIG API */ @@ -1187,7 +1183,12 @@ SWIG_Python_AppendOutput(PyObject* result, PyObject* obj) { if (!PyList_Check(result)) { PyObject *o2 = result; result = PyList_New(1); - PyList_SetItem(result, 0, o2); + if (result) { + PyList_SET_ITEM(result, 0, o2); + } else { + Py_DECREF(obj); + return o2; + } } PyList_Append(result,obj); Py_DECREF(obj); @@ -1243,6 +1244,19 @@ SWIG_Python_UnpackTuple(PyObject *args, const char *name, Py_ssize_t min, Py_ssi } } +SWIGINTERN int +SWIG_Python_CheckNoKeywords(PyObject *kwargs, const char *name) { + int no_kwargs = 1; + if (kwargs) { + assert(PyDict_Check(kwargs)); + if (PyDict_Size(kwargs) > 0) { + PyErr_Format(PyExc_TypeError, "%s() does not take keyword arguments", name); + no_kwargs = 0; + } + } + return no_kwargs; +} + /* A functor is a function object with one single object argument */ #define SWIG_Python_CallFunctor(functor, obj) PyObject_CallFunctionObjArgs(functor, obj, NULL); @@ -1256,6 +1270,238 @@ SWIG_Python_UnpackTuple(PyObject *args, const char *name, Py_ssize_t min, Py_ssi #define SWIG_STATIC_POINTER(var) var = 0; if (!var) var #endif +#ifdef __cplusplus +extern "C" { +#endif + +/* Python-specific SWIG API */ +#define SWIG_newvarlink() SWIG_Python_newvarlink() +#define SWIG_addvarlink(p, name, get_attr, set_attr) SWIG_Python_addvarlink(p, name, get_attr, set_attr) +#define SWIG_InstallConstants(d, constants) SWIG_Python_InstallConstants(d, constants) + +/* ----------------------------------------------------------------------------- + * global variable support code. + * ----------------------------------------------------------------------------- */ + +typedef struct swig_globalvar { + char *name; /* Name of global variable */ + PyObject *(*get_attr)(void); /* Return the current value */ + int (*set_attr)(PyObject *); /* Set the value */ + struct swig_globalvar *next; +} swig_globalvar; + +typedef struct swig_varlinkobject { + PyObject_HEAD + swig_globalvar *vars; +} swig_varlinkobject; + +SWIGINTERN PyObject * +swig_varlink_repr(PyObject *SWIGUNUSEDPARM(v)) { +#if PY_VERSION_HEX >= 0x03000000 + return PyUnicode_InternFromString(""); +#else + return PyString_FromString(""); +#endif +} + +SWIGINTERN PyObject * +swig_varlink_str(PyObject *o) { + swig_varlinkobject *v = (swig_varlinkobject *) o; +#if PY_VERSION_HEX >= 0x03000000 + PyObject *str = PyUnicode_InternFromString("("); + PyObject *tail; + PyObject *joined; + swig_globalvar *var; + for (var = v->vars; var; var=var->next) { + tail = PyUnicode_FromString(var->name); + joined = PyUnicode_Concat(str, tail); + Py_DecRef(str); + Py_DecRef(tail); + str = joined; + if (var->next) { + tail = PyUnicode_InternFromString(", "); + joined = PyUnicode_Concat(str, tail); + Py_DecRef(str); + Py_DecRef(tail); + str = joined; + } + } + tail = PyUnicode_InternFromString(")"); + joined = PyUnicode_Concat(str, tail); + Py_DecRef(str); + Py_DecRef(tail); + str = joined; +#else + PyObject *str = PyString_FromString("("); + swig_globalvar *var; + for (var = v->vars; var; var=var->next) { + PyString_ConcatAndDel(&str,PyString_FromString(var->name)); + if (var->next) PyString_ConcatAndDel(&str,PyString_FromString(", ")); + } + PyString_ConcatAndDel(&str,PyString_FromString(")")); +#endif + return str; +} + +SWIGINTERN void +swig_varlink_dealloc(PyObject *o) { + swig_varlinkobject *v = (swig_varlinkobject *) o; + swig_globalvar *var = v->vars; + while (var) { + swig_globalvar *n = var->next; + free(var->name); + free(var); + var = n; + } +} + +SWIGINTERN PyObject * +swig_varlink_getattr(PyObject *o, char *n) { + swig_varlinkobject *v = (swig_varlinkobject *) o; + PyObject *res = NULL; + swig_globalvar *var = v->vars; + while (var) { + if (strcmp(var->name,n) == 0) { + res = (*var->get_attr)(); + break; + } + var = var->next; + } + if (res == NULL && !PyErr_Occurred()) { + PyErr_Format(PyExc_AttributeError, "Unknown C global variable '%s'", n); + } + return res; +} + +SWIGINTERN int +swig_varlink_setattr(PyObject *o, char *n, PyObject *p) { + swig_varlinkobject *v = (swig_varlinkobject *) o; + int res = 1; + swig_globalvar *var = v->vars; + while (var) { + if (strcmp(var->name,n) == 0) { + res = (*var->set_attr)(p); + break; + } + var = var->next; + } + if (res == 1 && !PyErr_Occurred()) { + PyErr_Format(PyExc_AttributeError, "Unknown C global variable '%s'", n); + } + return res; +} + +SWIGINTERN PyTypeObject* +swig_varlink_type(void) { + static char varlink__doc__[] = "Swig var link object"; + static PyTypeObject varlink_type; + static int type_init = 0; + if (!type_init) { + const PyTypeObject tmp = { +#if PY_VERSION_HEX >= 0x03000000 + PyVarObject_HEAD_INIT(NULL, 0) +#else + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ +#endif + "swigvarlink", /* tp_name */ + sizeof(swig_varlinkobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor) swig_varlink_dealloc, /* tp_dealloc */ +#if PY_VERSION_HEX < 0x030800b4 + (printfunc)0, /*tp_print*/ +#else + (Py_ssize_t)0, /*tp_vectorcall_offset*/ +#endif + (getattrfunc) swig_varlink_getattr, /* tp_getattr */ + (setattrfunc) swig_varlink_setattr, /* tp_setattr */ + 0, /* tp_compare */ + (reprfunc) swig_varlink_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + (reprfunc) swig_varlink_str, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + 0, /* tp_flags */ + varlink__doc__, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* tp_iter -> tp_weaklist */ + 0, /* tp_del */ + 0, /* tp_version_tag */ +#if PY_VERSION_HEX >= 0x03040000 + 0, /* tp_finalize */ +#endif +#if PY_VERSION_HEX >= 0x03080000 + 0, /* tp_vectorcall */ +#endif +#if (PY_VERSION_HEX >= 0x03080000) && (PY_VERSION_HEX < 0x03090000) + 0, /* tp_print */ +#endif +#ifdef COUNT_ALLOCS + 0, /* tp_allocs */ + 0, /* tp_frees */ + 0, /* tp_maxalloc */ + 0, /* tp_prev */ + 0 /* tp_next */ +#endif + }; + varlink_type = tmp; + type_init = 1; + if (PyType_Ready(&varlink_type) < 0) + return NULL; + } + return &varlink_type; +} + +/* Create a variable linking object for use later */ +SWIGINTERN PyObject * +SWIG_Python_newvarlink(void) { + swig_varlinkobject *result = PyObject_NEW(swig_varlinkobject, swig_varlink_type()); + if (result) { + result->vars = 0; + } + return ((PyObject*) result); +} + +SWIGINTERN void +SWIG_Python_addvarlink(PyObject *p, const char *name, PyObject *(*get_attr)(void), int (*set_attr)(PyObject *p)) { + swig_varlinkobject *v = (swig_varlinkobject *) p; + swig_globalvar *gv = (swig_globalvar *) malloc(sizeof(swig_globalvar)); + if (gv) { + size_t size = strlen(name)+1; + gv->name = (char *)malloc(size); + if (gv->name) { + memcpy(gv->name, name, size); + gv->get_attr = get_attr; + gv->set_attr = set_attr; + gv->next = v->vars; + } + } + v->vars = gv; +} + + +static PyObject *Swig_Globals_global = NULL; + +SWIGINTERN PyObject * +SWIG_globals(void) { + if (Swig_Globals_global == NULL) { + Swig_Globals_global = SWIG_newvarlink(); + } + return Swig_Globals_global; +} + +#ifdef __cplusplus +} +#endif + /* ----------------------------------------------------------------------------- * Pointer declarations * ----------------------------------------------------------------------------- */ @@ -1326,18 +1572,25 @@ SwigPyClientData_New(PyObject* obj) /* the newraw method and newargs arguments used to create a new raw instance */ if (PyClass_Check(obj)) { data->newraw = 0; - data->newargs = obj; Py_INCREF(obj); + data->newargs = obj; } else { data->newraw = PyObject_GetAttrString(data->klass, "__new__"); if (data->newraw) { - Py_INCREF(data->newraw); - data->newargs = PyTuple_New(1); - PyTuple_SetItem(data->newargs, 0, obj); + data->newargs = PyTuple_New(1); + if (data->newargs) { + Py_INCREF(obj); + PyTuple_SET_ITEM(data->newargs, 0, obj); + } else { + Py_DECREF(data->newraw); + Py_DECREF(data->klass); + free(data); + return 0; + } } else { - data->newargs = obj; + Py_INCREF(obj); + data->newargs = obj; } - Py_INCREF(data->newargs); } /* the destroy method, aka as the C++ delete method */ data->destroy = PyObject_GetAttrString(data->klass, "__swig_destroy__"); @@ -1346,10 +1599,7 @@ SwigPyClientData_New(PyObject* obj) data->destroy = 0; } if (data->destroy) { - int flags; - Py_INCREF(data->destroy); - flags = PyCFunction_GET_FLAGS(data->destroy); - data->delargs = !(flags & (METH_O)); + data->delargs = !(PyCFunction_GET_FLAGS(data->destroy) & METH_O); } else { data->delargs = 0; } @@ -1360,10 +1610,13 @@ SwigPyClientData_New(PyObject* obj) } SWIGRUNTIME void -SwigPyClientData_Del(SwigPyClientData *data) { +SwigPyClientData_Del(SwigPyClientData *data) +{ + Py_XDECREF(data->klass); Py_XDECREF(data->newraw); Py_XDECREF(data->newargs); Py_XDECREF(data->destroy); + free(data); } /* =============== SwigPyObject =====================*/ @@ -1390,7 +1643,7 @@ SwigPyObject_get___dict__(PyObject *v, PyObject *SWIGUNUSEDPARM(args)) if (!sobj->dict) sobj->dict = PyDict_New(); - Py_INCREF(sobj->dict); + Py_XINCREF(sobj->dict); return sobj->dict; } @@ -1408,18 +1661,21 @@ SwigPyObject_format(const char* fmt, SwigPyObject *v) PyObject *res = NULL; PyObject *args = PyTuple_New(1); if (args) { - if (PyTuple_SetItem(args, 0, SwigPyObject_long(v)) == 0) { - PyObject *ofmt = SWIG_Python_str_FromChar(fmt); + PyObject *val = SwigPyObject_long(v); + if (val) { + PyObject *ofmt; + PyTuple_SET_ITEM(args, 0, val); + ofmt = SWIG_Python_str_FromChar(fmt); if (ofmt) { #if PY_VERSION_HEX >= 0x03000000 - res = PyUnicode_Format(ofmt,args); + res = PyUnicode_Format(ofmt,args); #else - res = PyString_Format(ofmt,args); + res = PyString_Format(ofmt,args); #endif - Py_DECREF(ofmt); + Py_DECREF(ofmt); } - Py_DECREF(args); } + Py_DECREF(args); } return res; } @@ -1441,18 +1697,23 @@ SwigPyObject_repr(SwigPyObject *v) { const char *name = SWIG_TypePrettyName(v->ty); PyObject *repr = SWIG_Python_str_FromFormat("", (name ? name : "unknown"), (void *)v); - if (v->next) { + if (repr && v->next) { PyObject *nrep = SwigPyObject_repr((SwigPyObject *)v->next); + if (nrep) { # if PY_VERSION_HEX >= 0x03000000 - PyObject *joined = PyUnicode_Concat(repr, nrep); - Py_DecRef(repr); - Py_DecRef(nrep); - repr = joined; + PyObject *joined = PyUnicode_Concat(repr, nrep); + Py_DecRef(repr); + Py_DecRef(nrep); + repr = joined; # else - PyString_ConcatAndDel(&repr,nrep); + PyString_ConcatAndDel(&repr,nrep); # endif + } else { + Py_DecRef(repr); + repr = NULL; + } } - return repr; + return repr; } /* We need a version taking two PyObject* parameters so it's a valid @@ -1522,6 +1783,8 @@ SwigPyObject_Check(PyObject *op) { SWIGRUNTIME PyObject * SwigPyObject_New(void *ptr, swig_type_info *ty, int own); +static PyObject* Swig_Capsule_global = NULL; + SWIGRUNTIME void SwigPyObject_dealloc(PyObject *v) { @@ -1548,8 +1811,12 @@ SwigPyObject_dealloc(PyObject *v) if (data->delargs) { /* we need to create a temporary object to carry the destroy operation */ PyObject *tmp = SwigPyObject_New(sobj->ptr, ty, 0); - res = SWIG_Python_CallFunctor(destroy, tmp); - Py_DECREF(tmp); + if (tmp) { + res = SWIG_Python_CallFunctor(destroy, tmp); + } else { + res = 0; + } + Py_XDECREF(tmp); } else { PyCFunction meth = PyCFunction_GET_FUNCTION(destroy); PyObject *mself = PyCFunction_GET_SELF(destroy); @@ -1568,8 +1835,12 @@ SwigPyObject_dealloc(PyObject *v) printf("swig/python detected a memory leak of type '%s', no destructor found.\n", (name ? name : "unknown")); } #endif - } + Py_XDECREF(Swig_Capsule_global); + } Py_XDECREF(next); +#ifdef SWIGPYTHON_BUILTIN + Py_XDECREF(sobj->dict); +#endif PyObject_DEL(v); } @@ -1581,6 +1852,7 @@ SwigPyObject_append(PyObject* v, PyObject* next) PyErr_SetString(PyExc_TypeError, "Attempt to append a non SwigPyObject"); return NULL; } + ((SwigPyObject *)next)->next = sobj->next; sobj->next = next; Py_INCREF(next); return SWIG_Py_Void(); @@ -1625,9 +1897,9 @@ SwigPyObject_own(PyObject *v, PyObject *args) PyObject *obj = PyBool_FromLong(sobj->own); if (val) { if (PyObject_IsTrue(val)) { - SwigPyObject_acquire(v,args); + Py_DECREF(SwigPyObject_acquire(v,args)); } else { - SwigPyObject_disown(v,args); + Py_DECREF(SwigPyObject_disown(v,args)); } } return obj; @@ -1707,7 +1979,11 @@ SwigPyObject_TypeOnce(void) { sizeof(SwigPyObject), /* tp_basicsize */ 0, /* tp_itemsize */ (destructor)SwigPyObject_dealloc, /* tp_dealloc */ - 0, /* tp_print */ +#if PY_VERSION_HEX < 0x030800b4 + (printfunc)0, /*tp_print*/ +#else + (Py_ssize_t)0, /*tp_vectorcall_offset*/ +#endif (getattrfunc)0, /* tp_getattr */ (setattrfunc)0, /* tp_setattr */ #if PY_VERSION_HEX >= 0x03000000 @@ -1756,6 +2032,12 @@ SwigPyObject_TypeOnce(void) { #if PY_VERSION_HEX >= 0x03040000 0, /* tp_finalize */ #endif +#if PY_VERSION_HEX >= 0x03080000 + 0, /* tp_vectorcall */ +#endif +#if (PY_VERSION_HEX >= 0x03080000) && (PY_VERSION_HEX < 0x03090000) + 0, /* tp_print */ +#endif #ifdef COUNT_ALLOCS 0, /* tp_allocs */ 0, /* tp_frees */ @@ -1766,7 +2048,7 @@ SwigPyObject_TypeOnce(void) { }; swigpyobject_type = tmp; type_init = 1; - if (PyType_Ready(&swigpyobject_type) < 0) + if (PyType_Ready(&swigpyobject_type) != 0) return NULL; } return &swigpyobject_type; @@ -1781,6 +2063,15 @@ SwigPyObject_New(void *ptr, swig_type_info *ty, int own) sobj->ty = ty; sobj->own = own; sobj->next = 0; +#ifdef SWIGPYTHON_BUILTIN + sobj->dict = 0; +#endif + if (own == SWIG_POINTER_OWN) { + /* Obtain a reference to the Python capsule wrapping the module information, so that the + * module information is correctly destroyed after all SWIG python objects have been freed + * by the GC (and corresponding destructors invoked) */ + Py_XINCREF(Swig_Capsule_global); + } } return (PyObject *)sobj; } @@ -1868,7 +2159,11 @@ SwigPyPacked_TypeOnce(void) { sizeof(SwigPyPacked), /* tp_basicsize */ 0, /* tp_itemsize */ (destructor)SwigPyPacked_dealloc, /* tp_dealloc */ - 0, /* tp_print */ +#if PY_VERSION_HEX < 0x030800b4 + (printfunc)0, /*tp_print*/ +#else + (Py_ssize_t)0, /*tp_vectorcall_offset*/ +#endif (getattrfunc)0, /* tp_getattr */ (setattrfunc)0, /* tp_setattr */ #if PY_VERSION_HEX>=0x03000000 @@ -1917,6 +2212,12 @@ SwigPyPacked_TypeOnce(void) { #if PY_VERSION_HEX >= 0x03040000 0, /* tp_finalize */ #endif +#if PY_VERSION_HEX >= 0x03080000 + 0, /* tp_vectorcall */ +#endif +#if (PY_VERSION_HEX >= 0x03080000) && (PY_VERSION_HEX < 0x03090000) + 0, /* tp_print */ +#endif #ifdef COUNT_ALLOCS 0, /* tp_allocs */ 0, /* tp_frees */ @@ -1927,7 +2228,7 @@ SwigPyPacked_TypeOnce(void) { }; swigpypacked_type = tmp; type_init = 1; - if (PyType_Ready(&swigpypacked_type) < 0) + if (PyType_Ready(&swigpypacked_type) != 0) return NULL; } return &swigpypacked_type; @@ -2117,12 +2418,19 @@ SWIG_Python_ConvertPtrAndOwn(PyObject *obj, void **ptr, swig_type_info *ty, int } } if (sobj) { - if (own) - *own = *own | sobj->own; - if (flags & SWIG_POINTER_DISOWN) { - sobj->own = 0; + if (((flags & SWIG_POINTER_RELEASE) == SWIG_POINTER_RELEASE) && !sobj->own) { + res = SWIG_ERROR_RELEASE_NOT_OWNED; + } else { + if (own) + *own = *own | sobj->own; + if (flags & SWIG_POINTER_DISOWN) { + sobj->own = 0; + } + if (flags & SWIG_POINTER_CLEAR) { + sobj->ptr = 0; + } + res = SWIG_OK; } - res = SWIG_OK; } else { if (implicit_conv) { SwigPyClientData *data = ty ? (SwigPyClientData *) ty->clientdata : 0; @@ -2235,16 +2543,23 @@ SWIG_Python_NewShadowInstance(SwigPyClientData *data, PyObject *swig_this) #if !defined(SWIG_PYTHON_SLOW_GETSET_THIS) PyObject **dictptr = _PyObject_GetDictPtr(inst); if (dictptr != NULL) { - PyObject *dict = *dictptr; - if (dict == NULL) { - dict = PyDict_New(); - *dictptr = dict; - PyDict_SetItem(dict, SWIG_This(), swig_this); - } + PyObject *dict = *dictptr; + if (dict == NULL) { + dict = PyDict_New(); + *dictptr = dict; + } + if (dict) { + PyDict_SetItem(dict, SWIG_This(), swig_this); + } else{ + Py_DECREF(inst); + inst = 0; + } } #else - PyObject *key = SWIG_This(); - PyObject_SetAttr(inst, key, swig_this); + if (PyObject_SetAttr(inst, SWIG_This(), swig_this) == -1) { + Py_DECREF(inst); + inst = 0; + } #endif } } else { @@ -2256,8 +2571,12 @@ SWIG_Python_NewShadowInstance(SwigPyClientData *data, PyObject *swig_this) inst = ((PyTypeObject *)data->newargs)->tp_new((PyTypeObject *)data->newargs, empty_args, empty_kwargs); Py_DECREF(empty_kwargs); if (inst) { - PyObject_SetAttr(inst, SWIG_This(), swig_this); - Py_TYPE(inst)->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG; + if (PyObject_SetAttr(inst, SWIG_This(), swig_this) == -1) { + Py_DECREF(inst); + inst = 0; + } else { + PyType_Modified(Py_TYPE(inst)); + } } } Py_DECREF(empty_args); @@ -2274,25 +2593,25 @@ SWIG_Python_NewShadowInstance(SwigPyClientData *data, PyObject *swig_this) return inst; } -SWIGRUNTIME void +SWIGRUNTIME int SWIG_Python_SetSwigThis(PyObject *inst, PyObject *swig_this) { - PyObject *dict; #if !defined(SWIG_PYTHON_SLOW_GETSET_THIS) - PyObject **dictptr = _PyObject_GetDictPtr(inst); - if (dictptr != NULL) { - dict = *dictptr; - if (dict == NULL) { - dict = PyDict_New(); - *dictptr = dict; - } - PyDict_SetItem(dict, SWIG_This(), swig_this); - return; - } + PyObject **dictptr = _PyObject_GetDictPtr(inst); + if (dictptr != NULL) { + PyObject *dict = *dictptr; + if (dict == NULL) { + dict = PyDict_New(); + *dictptr = dict; + } + if (dict) { + return PyDict_SetItem(dict, SWIG_This(), swig_this); + } else{ + return -1; + } + } #endif - dict = PyObject_GetAttrString(inst, "__dict__"); - PyDict_SetItem(dict, SWIG_This(), swig_this); - Py_DECREF(dict); + return PyObject_SetAttr(inst, SWIG_This(), swig_this); } @@ -2304,9 +2623,10 @@ SWIG_Python_InitShadowInstance(PyObject *args) { } else { SwigPyObject *sthis = SWIG_Python_GetSwigThis(obj[0]); if (sthis) { - SwigPyObject_append((PyObject*) sthis, obj[1]); + Py_DECREF(SwigPyObject_append((PyObject*) sthis, obj[1])); } else { - SWIG_Python_SetSwigThis(obj[0], obj[1]); + if (SWIG_Python_SetSwigThis(obj[0], obj[1]) != 0) + return NULL; } return SWIG_Py_Void(); } @@ -2342,7 +2662,9 @@ SWIG_Python_NewPointerObj(PyObject *self, void *ptr, swig_type_info *type, int f } else { newobj = PyObject_New(SwigPyObject, clientdata->pytype); #ifdef SWIGPYTHON_BUILTIN - newobj->dict = 0; + if (newobj) { + newobj->dict = 0; + } #endif } if (newobj) { @@ -2381,39 +2703,61 @@ SWIG_Python_NewPackedObj(void *ptr, size_t sz, swig_type_info *type) { void *SWIG_ReturnGlobalTypeList(void *); #endif +static PyObject *Swig_TypeCache_global = NULL; + +/* The python cached type query */ +SWIGRUNTIME PyObject * +SWIG_Python_TypeCache(void) { + if (Swig_TypeCache_global == NULL) { + Swig_TypeCache_global = PyDict_New(); + } + return Swig_TypeCache_global; +} + SWIGRUNTIME swig_module_info * SWIG_Python_GetModule(void *SWIGUNUSEDPARM(clientdata)) { +#ifdef SWIG_LINK_RUNTIME static void *type_pointer = (void *)0; /* first check if module already created */ if (!type_pointer) { -#ifdef SWIG_LINK_RUNTIME type_pointer = SWIG_ReturnGlobalTypeList((void *)0); + } #else - type_pointer = PyCapsule_Import(SWIGPY_CAPSULE_NAME, 0); - if (PyErr_Occurred()) { - PyErr_Clear(); - type_pointer = (void *)0; - } -#endif + void *type_pointer = PyCapsule_Import(SWIGPY_CAPSULE_NAME, 0); + if (PyErr_Occurred()) { + PyErr_Clear(); + type_pointer = (void *)0; } +#endif return (swig_module_info *) type_pointer; } + +static int interpreter_counter = 0; // how many (sub-)interpreters are using swig_module's types + SWIGRUNTIME void SWIG_Python_DestroyModule(PyObject *obj) { swig_module_info *swig_module = (swig_module_info *) PyCapsule_GetPointer(obj, SWIGPY_CAPSULE_NAME); swig_type_info **types = swig_module->types; size_t i; + if (--interpreter_counter != 0) // another sub-interpreter may still be using the swig_module's types + return; for (i =0; i < swig_module->size; ++i) { swig_type_info *ty = types[i]; if (ty->owndata) { SwigPyClientData *data = (SwigPyClientData *) ty->clientdata; + ty->clientdata = 0; if (data) SwigPyClientData_Del(data); } } Py_DECREF(SWIG_This()); Swig_This_global = NULL; + Py_DECREF(SWIG_globals()); + Swig_Globals_global = NULL; + Py_DECREF(SWIG_Python_TypeCache()); + Swig_TypeCache_global = NULL; + Swig_Capsule_global = NULL; } SWIGRUNTIME void @@ -2427,19 +2771,17 @@ SWIG_Python_SetModule(swig_module_info *swig_module) { #endif PyObject *pointer = PyCapsule_New((void *) swig_module, SWIGPY_CAPSULE_NAME, SWIG_Python_DestroyModule); if (pointer && module) { - PyModule_AddObject(module, "type_pointer_capsule" SWIG_TYPE_TABLE_NAME, pointer); + if (PyModule_AddObject(module, SWIGPY_CAPSULE_ATTR_NAME, pointer) == 0) { + ++interpreter_counter; + Swig_Capsule_global = pointer; + } else { + Py_DECREF(pointer); + } } else { Py_XDECREF(pointer); } } -/* The python cached type query */ -SWIGRUNTIME PyObject * -SWIG_Python_TypeCache(void) { - static PyObject *SWIG_STATIC_POINTER(cache) = PyDict_New(); - return cache; -} - SWIGRUNTIME swig_type_info * SWIG_Python_TypeQuery(const char *type) { @@ -2454,8 +2796,10 @@ SWIG_Python_TypeQuery(const char *type) descriptor = SWIG_TypeQueryModule(swig_module, swig_module, type); if (descriptor) { obj = PyCapsule_New((void*) descriptor, NULL, NULL); - PyDict_SetItem(cache, key, obj); - Py_DECREF(obj); + if (obj) { + PyDict_SetItem(cache, key, obj); + Py_DECREF(obj); + } } } Py_DECREF(key); @@ -2488,7 +2832,6 @@ SWIG_Python_AddErrMesg(const char* mesg, int infront) } else { PyErr_Format(type, "%s %s", errmesg, mesg); } - SWIG_Python_str_DelForPy3(tmp); Py_DECREF(old_str); } return 1; @@ -2540,7 +2883,6 @@ SWIG_Python_TypeError(const char *type, PyObject *obj) if (cstr) { PyErr_Format(PyExc_TypeError, "a '%s' is expected, '%s(%s)' is received", type, otype, cstr); - SWIG_Python_str_DelForPy3(cstr); } else { PyErr_Format(PyExc_TypeError, "a '%s' is expected, '%s' is received", type, otype); @@ -2562,12 +2904,6 @@ SWIG_Python_MustGetPtr(PyObject *obj, swig_type_info *ty, int SWIGUNUSEDPARM(arg void *result; if (SWIG_Python_ConvertPtr(obj, &result, ty, flags) == -1) { PyErr_Clear(); -#if SWIG_POINTER_EXCEPTION - if (flags) { - SWIG_Python_TypeError(SWIG_TypePrettyName(ty), obj); - SWIG_Python_ArgFail(argnum); - } -#endif } return result; } @@ -2598,7 +2934,7 @@ SWIG_Python_NonDynamicSetAttr(PyObject *obj, PyObject *name, PyObject *value) { } if (!tp->tp_dict) { - if (PyType_Ready(tp) < 0) + if (PyType_Ready(tp) != 0) goto done; } @@ -2613,7 +2949,7 @@ SWIG_Python_NonDynamicSetAttr(PyObject *obj, PyObject *name, PyObject *value) { } else { encoded_name = PyUnicode_AsUTF8String(name); if (!encoded_name) - return -1; + goto done; } PyErr_Format(PyExc_AttributeError, "'%.100s' object has no attribute '%.200s'", tp->tp_name, PyString_AsString(encoded_name)); Py_DECREF(encoded_name); @@ -2636,23 +2972,8 @@ SWIG_Python_NonDynamicSetAttr(PyObject *obj, PyObject *name, PyObject *value) { #define SWIG_exception_fail(code, msg) do { SWIG_Error(code, msg); SWIG_fail; } while(0) -#define SWIG_contract_assert(expr, msg) if (!(expr)) { SWIG_Error(SWIG_RuntimeError, msg); SWIG_fail; } else - - - -#ifdef __cplusplus -extern "C" { -#endif - -/* Method creation and docstring support functions */ - -SWIGINTERN PyMethodDef *SWIG_PythonGetProxyDoc(const char *name); -SWIGINTERN PyObject *SWIG_PyInstanceMethod_New(PyObject *SWIGUNUSEDPARM(self), PyObject *func); -SWIGINTERN PyObject *SWIG_PyStaticMethod_New(PyObject *SWIGUNUSEDPARM(self), PyObject *func); +#define SWIG_contract_assert(expr, msg) do { if (!(expr)) { SWIG_Error(SWIG_RuntimeError, msg); SWIG_fail; } } while (0) -#ifdef __cplusplus -} -#endif #define SWIG_exception(code, msg) do { SWIG_Error(code, msg); SWIG_fail;; } while(0) @@ -2661,15 +2982,21 @@ SWIGINTERN PyObject *SWIG_PyStaticMethod_New(PyObject *SWIGUNUSEDPARM(self), PyO /* -------- TYPES TABLE (BEGIN) -------- */ #define SWIGTYPE_p_char swig_types[0] -#define SWIGTYPE_p_sentencepiece__SentenceIterator swig_types[1] -#define SWIGTYPE_p_sentencepiece__SentencePieceProcessor swig_types[2] -#define SWIGTYPE_p_sentencepiece__SentencePieceTrainer swig_types[3] -#define SWIGTYPE_p_std__string swig_types[4] -#define SWIGTYPE_p_std__unordered_mapT_std__string_std__string_t swig_types[5] -#define SWIGTYPE_p_std__vectorT_int_t swig_types[6] -#define SWIGTYPE_p_std__vectorT_std__string_t swig_types[7] -static swig_type_info *swig_types[9]; -static swig_module_info swig_module = {swig_types, 8, 0, 0, 0, 0}; +#define SWIGTYPE_p_float swig_types[1] +#define SWIGTYPE_p_sentencepiece__ImmutableNBestSentencePieceText swig_types[2] +#define SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText swig_types[3] +#define SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece swig_types[4] +#define SWIGTYPE_p_sentencepiece__SentenceIterator swig_types[5] +#define SWIGTYPE_p_sentencepiece__SentencePieceProcessor swig_types[6] +#define SWIGTYPE_p_sentencepiece__SentencePieceTrainer swig_types[7] +#define SWIGTYPE_p_std__string swig_types[8] +#define SWIGTYPE_p_std__unordered_mapT_std__string_std__string_t swig_types[9] +#define SWIGTYPE_p_std__vectorT_absl__string_view_t swig_types[10] +#define SWIGTYPE_p_std__vectorT_int_t swig_types[11] +#define SWIGTYPE_p_std__vectorT_std__vectorT_absl__string_view_t_t swig_types[12] +#define SWIGTYPE_p_std__vectorT_std__vectorT_int_t_t swig_types[13] +static swig_type_info *swig_types[15]; +static swig_module_info swig_module = {swig_types, 14, 0, 0, 0, 0}; #define SWIG_TypeQuery(name) SWIG_TypeQueryModule(&swig_module, &swig_module, name) #define SWIG_MangledTypeQuery(name) SWIG_MangledTypeQueryModule(&swig_module, &swig_module, name) @@ -2692,12 +3019,56 @@ static swig_module_info swig_module = {swig_types, 8, 0, 0, 0, 0}; #endif #define SWIG_name "_sentencepiece" -#define SWIGVERSION 0x040001 -#define SWIG_VERSION SWIGVERSION - - -#define SWIG_as_voidptr(a) const_cast< void * >(static_cast< const void * >(a)) -#define SWIG_as_voidptrptr(a) ((void)SWIG_as_voidptr(*a),reinterpret_cast< void** >(a)) +#ifdef __cplusplus +#include +/* SwigValueWrapper is described in swig.swg */ +template class SwigValueWrapper { + struct SwigSmartPointer { + T *ptr; + SwigSmartPointer(T *p) : ptr(p) { } + ~SwigSmartPointer() { delete ptr; } + SwigSmartPointer& operator=(SwigSmartPointer& rhs) { T* oldptr = ptr; ptr = 0; delete oldptr; ptr = rhs.ptr; rhs.ptr = 0; return *this; } + void reset(T *p) { T* oldptr = ptr; ptr = 0; delete oldptr; ptr = p; } + } pointer; + SwigValueWrapper& operator=(const SwigValueWrapper& rhs); + SwigValueWrapper(const SwigValueWrapper& rhs); +public: + SwigValueWrapper() : pointer(0) { } + SwigValueWrapper& operator=(const T& t) { SwigSmartPointer tmp(new T(t)); pointer = tmp; return *this; } +#if __cplusplus >=201103L + SwigValueWrapper& operator=(T&& t) { SwigSmartPointer tmp(new T(std::move(t))); pointer = tmp; return *this; } + operator T&&() const { return std::move(*pointer.ptr); } +#else + operator T&() const { return *pointer.ptr; } +#endif + T *operator&() const { return pointer.ptr; } + static void reset(SwigValueWrapper& t, T *p) { t.pointer.reset(p); } +}; + +/* + * SwigValueInit() is a generic initialisation solution as the following approach: + * + * T c_result = T(); + * + * doesn't compile for all types for example: + * + * unsigned int c_result = unsigned int(); + */ +template T SwigValueInit() { + return T(); +} + +#if __cplusplus >=201103L +# define SWIG_STD_MOVE(OBJ) std::move(OBJ) +#else +# define SWIG_STD_MOVE(OBJ) OBJ +#endif + +#endif + + +#define SWIG_as_voidptr(a) const_cast< void * >(static_cast< const void * >(a)) +#define SWIG_as_voidptrptr(a) ((void)SWIG_as_voidptr(*a),reinterpret_cast< void** >(a)) #include @@ -2773,7 +3144,15 @@ namespace swig { } + +#include +#include +#include +#include +#include #include +#include +#include #include #include @@ -2781,6 +3160,8 @@ namespace { PyObject* kUnicodeInput = reinterpret_cast(0x1); PyObject* kByteInput = reinterpret_cast(0x2); +using BytesArray = std::vector; + inline void ReleaseResultObject(PyObject *obj) { if (obj != nullptr && obj != kUnicodeInput && obj != kByteInput) { Py_XDECREF(obj); @@ -2790,43 +3171,24 @@ inline void ReleaseResultObject(PyObject *obj) { class PyInputString { public: explicit PyInputString(PyObject* obj) { -#if PY_VERSION_HEX >= 0x03000000 if (PyUnicode_Check(obj)) { - // Python3, Unicode str_ = const_cast(PyUnicode_AsUTF8AndSize(obj, &size_)); input_type_ = kUnicodeInput; } else if (PyBytes_Check(obj)) { - // Python3, Bytes PyBytes_AsStringAndSize(obj, &str_, &size_); input_type_ = kByteInput; - } -#else - if (PyUnicode_Check(obj)) { - // Python2, Unicode - PyObject *utf8_obj = PyUnicode_AsUTF8String(obj); - PyString_AsStringAndSize(utf8_obj, &str_, &size_); - input_type_ = utf8_obj; - } else if (PyString_Check(obj)) { - // Python2, Bytes, - PyString_AsStringAndSize(obj, &str_, &size_); - input_type_ = kByteInput; - } -#endif - else { + } else { str_ = nullptr; } } + absl::string_view str() const { return absl::string_view(data(), size()); } const char* data() const { return str_; } Py_ssize_t size() const { return size_; } bool IsAvalable() const { return str_ != nullptr; } PyObject *input_type() const { return input_type_; } static bool IsUnicode(PyObject *resultobj) { -#if PY_VERSION_HEX >= 0x03000000 return (resultobj == nullptr || resultobj == kUnicodeInput); -#else - return (resultobj != nullptr && resultobj != kByteInput); -#endif } private: @@ -2840,19 +3202,11 @@ PyObject* MakePyOutputString(const std::string& output, if (PyInputString::IsUnicode(resultobj)) { return PyUnicode_FromStringAndSize(output.data(), output.size()); } -#if PY_VERSION_HEX >= 0x03000000 return PyBytes_FromStringAndSize(output.data(), output.size()); -#else - return PyString_FromStringAndSize(output.data(), output.size()); -#endif } -PyObject* MakePyOutputBytes(const std::string& output) { -#if PY_VERSION_HEX >= 0x03000000 +PyObject* MakePyOutputBytes(const sentencepiece::util::bytes& output) { return PyBytes_FromStringAndSize(output.data(), output.size()); -#else - return PyString_FromStringAndSize(output.data(), output.size()); -#endif } int ToSwigError(sentencepiece::util::StatusCode code) { @@ -2922,138 +3276,178 @@ class PySentenceIterator : public sentencepiece::SentenceIterator { std::string value_; sentencepiece::util::Status status_; }; -} +inline void RewriteIds(const sentencepiece::SentencePieceProcessor &sp, + std::vector *ids, + bool add_bos, bool add_eos, bool reverse, bool emit_unk_piece) { + if (!add_bos && !add_eos && !reverse) return; + if (reverse) std::reverse(ids->begin(), ids->end()); + if (add_bos) ids->insert(ids->begin(), sp.bos_id()); + if (add_eos) ids->push_back(sp.eos_id()); +} + +inline void RewriteIds(const sentencepiece::SentencePieceProcessor &sp, + std::vector *pieces, + bool add_bos, bool add_eos, bool reverse, bool emit_unk_piece) { + if (!add_bos && !add_eos && !reverse && !emit_unk_piece) return; + if (reverse) std::reverse(pieces->begin(), pieces->end()); + if (add_bos) pieces->insert(pieces->begin(), sp.IdToPiece(sp.bos_id())); + if (add_eos) pieces->push_back(sp.IdToPiece(sp.eos_id())); + if (emit_unk_piece) { + const auto &unk = sp.IdToPiece(sp.unk_id()); + for (auto &piece : *pieces) { + const int id = sp.PieceToId(piece); + if (id == sp.unk_id()) { + piece = unk; + } + } + } +} -SWIGINTERNINLINE PyObject* - SWIG_From_int (int value) -{ - return PyInt_FromLong((long) value); +inline void RewriteIds(const sentencepiece::SentencePieceProcessor &sp, + sentencepiece::util::bytes *proto, + bool add_bos, bool add_eos, bool reverse, bool emit_unk_piece) { + if (add_bos || add_eos || reverse || emit_unk_piece) { + throw sentencepiece::util::Status( + sentencepiece::util::StatusCode::kUnimplemented, + "add_bos, add_eos, reverse, and emit_unk_piece is not supported in proto API"); + } } +inline void RewriteIds(const sentencepiece::SentencePieceProcessor &sp, + sentencepiece::ImmutableSentencePieceText *proto, + bool add_bos, bool add_eos, bool reverse, bool emit_unk_piece) { + if (add_bos || add_eos || reverse || emit_unk_piece) { + throw sentencepiece::util::Status( + sentencepiece::util::StatusCode::kUnimplemented, + "add_bos, add_eos, reverse, and emit_unk_piece is not supported in proto API"); + } +} -SWIGINTERN swig_type_info* -SWIG_pchar_descriptor(void) -{ - static int init = 0; - static swig_type_info* info = 0; - if (!init) { - info = SWIG_TypeQuery("_p_char"); - init = 1; +inline void CheckIds(const std::vector &ids, int num_pieces) { + for (int id : ids) { + if (id < 0 || id >= num_pieces) { + throw sentencepiece::util::Status( + sentencepiece::util::StatusCode::kOutOfRange, + "piece id is out of range."); + } } - return info; } +inline void CheckIds(const std::vector &ids, int num_pieces) {} -SWIGINTERN int -SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc) -{ -#if PY_VERSION_HEX>=0x03000000 -#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR) - if (PyBytes_Check(obj)) -#else - if (PyUnicode_Check(obj)) -#endif -#else - if (PyString_Check(obj)) -#endif - { - char *cstr; Py_ssize_t len; - int ret = SWIG_OK; -#if PY_VERSION_HEX>=0x03000000 -#if !defined(SWIG_PYTHON_STRICT_BYTE_CHAR) - if (!alloc && cptr) { - /* We can't allow converting without allocation, since the internal - representation of string in Python 3 is UCS-2/UCS-4 but we require - a UTF-8 representation. - TODO(bhy) More detailed explanation */ - return SWIG_RuntimeError; - } - obj = PyUnicode_AsUTF8String(obj); - if (!obj) - return SWIG_TypeError; - if (alloc) - *alloc = SWIG_NEWOBJ; -#endif - PyBytes_AsStringAndSize(obj, &cstr, &len); -#else - PyString_AsStringAndSize(obj, &cstr, &len); -#endif - if (cptr) { - if (alloc) { - if (*alloc == SWIG_NEWOBJ) { - *cptr = reinterpret_cast< char* >(memcpy(new char[len + 1], cstr, sizeof(char)*(len + 1))); - *alloc = SWIG_NEWOBJ; - } else { - *cptr = cstr; - *alloc = SWIG_OLDOBJ; - } - } else { -#if PY_VERSION_HEX>=0x03000000 -#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR) - *cptr = PyBytes_AsString(obj); -#else - assert(0); /* Should never reach here with Unicode strings in Python 3 */ -#endif -#else - *cptr = SWIG_Python_str_AsChar(obj); - if (!*cptr) - ret = SWIG_TypeError; -#endif - } - } - if (psize) *psize = len + 1; -#if PY_VERSION_HEX>=0x03000000 && !defined(SWIG_PYTHON_STRICT_BYTE_CHAR) - Py_XDECREF(obj); -#endif - return ret; - } else { -#if defined(SWIG_PYTHON_2_UNICODE) -#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR) -#error "Cannot use both SWIG_PYTHON_2_UNICODE and SWIG_PYTHON_STRICT_BYTE_CHAR at once" -#endif -#if PY_VERSION_HEX<0x03000000 - if (PyUnicode_Check(obj)) { - char *cstr; Py_ssize_t len; - if (!alloc && cptr) { - return SWIG_RuntimeError; - } - obj = PyUnicode_AsUTF8String(obj); - if (!obj) - return SWIG_TypeError; - if (PyString_AsStringAndSize(obj, &cstr, &len) != -1) { - if (cptr) { - if (alloc) *alloc = SWIG_NEWOBJ; - *cptr = reinterpret_cast< char* >(memcpy(new char[len + 1], cstr, sizeof(char)*(len + 1))); - } - if (psize) *psize = len + 1; +template +inline void ConvertToUnicodeSpans(T *proto) {} - Py_XDECREF(obj); - return SWIG_OK; - } else { - Py_XDECREF(obj); - } +template <> +inline void ConvertToUnicodeSpans(sentencepiece::ImmutableSentencePieceText *proto) { + proto->ConvertToUnicodeSpans(); +} + +template <> +inline void ConvertToUnicodeSpans(sentencepiece::ImmutableNBestSentencePieceText *proto) { + proto->ConvertToUnicodeSpans(); +} + +class ThreadPool { + public: + explicit ThreadPool(size_t request_size) : + request_size_(request_size) {} + + virtual ~ThreadPool() { + for (auto &task : tasks_) { + task.join(); } -#endif -#endif + } - swig_type_info* pchar_descriptor = SWIG_pchar_descriptor(); - if (pchar_descriptor) { - void* vptr = 0; - if (SWIG_ConvertPtr(obj, &vptr, pchar_descriptor, 0) == SWIG_OK) { - if (cptr) *cptr = (char *) vptr; - if (psize) *psize = vptr ? (strlen((char *)vptr) + 1) : 0; - if (alloc) *alloc = SWIG_OLDOBJ; - return SWIG_OK; - } + void Schedule(std::function closure) { + static constexpr size_t kMinThreadSize = 2; + if (request_size_ < kMinThreadSize) { + closure(); + } else { + tasks_.emplace_back(closure); } } - return SWIG_TypeError; + + private: + size_t request_size_ = 0; + std::vector tasks_; +}; + +template +inline void InitNumThreads(const std::vector &ins, int *num_threads) { + if (*num_threads < 0) { + *num_threads = std::thread::hardware_concurrency(); + } + *num_threads = std::max(1, + std::min({*num_threads, + static_cast(ins.size()), 256})); +} + +#define DEFINE_ENCODE_BATCH_FUNC_IMPL(FuncName, InType, OutType) \ + std::vector outs(ins.size()); \ + InitNumThreads(ins, &num_threads); \ + { \ + ThreadPool pool(ins.size()); \ + std::atomic index = 0; \ + for (int n = 0; n < num_threads; ++n) { \ + pool.Schedule([&]() { \ + size_t i = 0; \ + while ((i = std::atomic_fetch_add(&index, 1)) < outs.size()) { \ + auto out = enable_sampling ? \ + self->Sample##FuncName(ins[i], \ + nbest_size, alpha) : \ + self->FuncName(ins[i]); \ + RewriteIds(*self, &out, add_bos, add_eos, reverse, \ + emit_unk_piece); \ + ConvertToUnicodeSpans(&out); \ + outs[i] = std::move(out); \ + } \ + }); \ + } \ + } \ + return outs; + +#define DEFINE_DECODE_BATCH_FUNC_IMPL(FuncName, InType, OutType) \ + std::vector outs(ins.size()); \ + InitNumThreads(ins, &num_threads); \ + { \ + std::atomic index = 0; \ + ThreadPool pool(ins.size()); \ + for (int n = 0; n < num_threads; ++n) { \ + pool.Schedule([&]() { \ + size_t i = 0; \ + while ((i = std::atomic_fetch_add(&index, 1)) < outs.size()) { \ + CheckIds(ins[i], self->GetPieceSize()); \ + auto out = self->FuncName(ins[i]); \ + ConvertToUnicodeSpans(&out); \ + outs[i] = std::move(out); \ + } \ + }); \ + } \ + } \ + return outs; + +} // namespace + + +SWIGINTERNINLINE PyObject* + SWIG_From_unsigned_SS_int (unsigned int value) +{ + return PyInt_FromSize_t((size_t) value); } + #define SWIG_From_long PyInt_FromLong +SWIGINTERNINLINE PyObject* +SWIG_From_unsigned_SS_long (unsigned long value) +{ + return (value > LONG_MAX) ? + PyLong_FromUnsignedLong(value) : PyInt_FromLong(static_cast< long >(value)); +} + #include #if !defined(SWIG_NO_LLONG_MAX) @@ -3065,6 +3459,37 @@ SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc) #endif +#if defined(LLONG_MAX) && !defined(SWIG_LONG_LONG_AVAILABLE) +# define SWIG_LONG_LONG_AVAILABLE +#endif + + +#ifdef SWIG_LONG_LONG_AVAILABLE +SWIGINTERNINLINE PyObject* +SWIG_From_unsigned_SS_long_SS_long (unsigned long long value) +{ + return (value > LONG_MAX) ? + PyLong_FromUnsignedLongLong(value) : PyInt_FromLong(static_cast< long >(value)); +} +#endif + + +SWIGINTERNINLINE PyObject * +SWIG_From_size_t (size_t value) +{ +#ifdef SWIG_LONG_LONG_AVAILABLE + if (sizeof(size_t) <= sizeof(unsigned long)) { +#endif + return SWIG_From_unsigned_SS_long (static_cast< unsigned long >(value)); +#ifdef SWIG_LONG_LONG_AVAILABLE + } else { + /* assume sizeof(size_t) <= sizeof(unsigned long long) */ + return SWIG_From_unsigned_SS_long_SS_long (static_cast< unsigned long long >(value)); + } +#endif +} + + SWIGINTERN int SWIG_AsVal_double (PyObject *obj, double *val) { @@ -3206,66 +3631,6 @@ SWIG_AsVal_int (PyObject * obj, int *val) } -/* Getting isfinite working pre C99 across multiple platforms is non-trivial. Users can provide SWIG_isfinite on older platforms. */ -#ifndef SWIG_isfinite -/* isfinite() is a macro for C99 */ -# if defined(isfinite) -# define SWIG_isfinite(X) (isfinite(X)) -# elif defined(__cplusplus) && __cplusplus >= 201103L -/* Use a template so that this works whether isfinite() is std::isfinite() or - * in the global namespace. The reality seems to vary between compiler - * versions. - * - * Make sure namespace std exists to avoid compiler warnings. - * - * extern "C++" is required as this fragment can end up inside an extern "C" { } block - */ -namespace std { } -extern "C++" template -inline int SWIG_isfinite_func(T x) { - using namespace std; - return isfinite(x); -} -# define SWIG_isfinite(X) (SWIG_isfinite_func(X)) -# elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2)) -# define SWIG_isfinite(X) (__builtin_isfinite(X)) -# elif defined(__clang__) && defined(__has_builtin) -# if __has_builtin(__builtin_isfinite) -# define SWIG_isfinite(X) (__builtin_isfinite(X)) -# endif -# elif defined(_MSC_VER) -# define SWIG_isfinite(X) (_finite(X)) -# elif defined(__sun) && defined(__SVR4) -# include -# define SWIG_isfinite(X) (finite(X)) -# endif -#endif - - -/* Accept infinite as a valid float value unless we are unable to check if a value is finite */ -#ifdef SWIG_isfinite -# define SWIG_Float_Overflow_Check(X) ((X < -FLT_MAX || X > FLT_MAX) && SWIG_isfinite(X)) -#else -# define SWIG_Float_Overflow_Check(X) ((X < -FLT_MAX || X > FLT_MAX)) -#endif - - -SWIGINTERN int -SWIG_AsVal_float (PyObject * obj, float *val) -{ - double v; - int res = SWIG_AsVal_double (obj, &v); - if (SWIG_IsOK(res)) { - if (SWIG_Float_Overflow_Check(v)) { - return SWIG_OverflowError; - } else { - if (val) *val = static_cast< float >(v); - } - } - return res; -} - - #define SWIG_From_double PyFloat_FromDouble @@ -3276,31 +3641,389 @@ SWIG_From_float (float value) } -SWIGINTERNINLINE PyObject* - SWIG_From_bool (bool value) +SWIGINTERN swig_type_info* +SWIG_pchar_descriptor(void) { - return PyBool_FromLong(value ? 1 : 0); -} - -SWIGINTERN sentencepiece::util::Status sentencepiece_SentencePieceProcessor_LoadFromFile(sentencepiece::SentencePieceProcessor *self,absl::string_view arg){ - return self->Load(arg); + static int init = 0; + static swig_type_info* info = 0; + if (!init) { + info = SWIG_TypeQuery("_p_char"); + init = 1; + } + return info; +} + + +SWIGINTERN int +SWIG_AsCharPtrAndSize(PyObject *obj, char** cptr, size_t* psize, int *alloc) +{ +#if PY_VERSION_HEX>=0x03000000 +#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR) + if (PyBytes_Check(obj)) +#else + if (PyUnicode_Check(obj)) +#endif +#else + if (PyString_Check(obj)) +#endif + { + char *cstr; Py_ssize_t len; + int ret = SWIG_OK; +#if PY_VERSION_HEX>=0x03000000 +#if !defined(SWIG_PYTHON_STRICT_BYTE_CHAR) + if (!alloc && cptr) { + /* We can't allow converting without allocation, since the internal + representation of string in Python 3 is UCS-2/UCS-4 but we require + a UTF-8 representation. + TODO(bhy) More detailed explanation */ + return SWIG_RuntimeError; + } + obj = PyUnicode_AsUTF8String(obj); + if (!obj) + return SWIG_TypeError; + if (alloc) + *alloc = SWIG_NEWOBJ; +#endif + if (PyBytes_AsStringAndSize(obj, &cstr, &len) == -1) + return SWIG_TypeError; +#else + if (PyString_AsStringAndSize(obj, &cstr, &len) == -1) + return SWIG_TypeError; +#endif + if (cptr) { + if (alloc) { + if (*alloc == SWIG_NEWOBJ) { + *cptr = reinterpret_cast< char* >(memcpy(new char[len + 1], cstr, sizeof(char)*(len + 1))); + *alloc = SWIG_NEWOBJ; + } else { + *cptr = cstr; + *alloc = SWIG_OLDOBJ; + } + } else { +#if PY_VERSION_HEX>=0x03000000 +#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR) + *cptr = PyBytes_AsString(obj); +#else + assert(0); /* Should never reach here with Unicode strings in Python 3 */ +#endif +#else + *cptr = SWIG_Python_str_AsChar(obj); + if (!*cptr) + ret = SWIG_TypeError; +#endif + } + } + if (psize) *psize = len + 1; +#if PY_VERSION_HEX>=0x03000000 && !defined(SWIG_PYTHON_STRICT_BYTE_CHAR) + Py_XDECREF(obj); +#endif + return ret; + } else { +#if defined(SWIG_PYTHON_2_UNICODE) +#if defined(SWIG_PYTHON_STRICT_BYTE_CHAR) +#error "Cannot use both SWIG_PYTHON_2_UNICODE and SWIG_PYTHON_STRICT_BYTE_CHAR at once" +#endif +#if PY_VERSION_HEX<0x03000000 + if (PyUnicode_Check(obj)) { + char *cstr; Py_ssize_t len; + if (!alloc && cptr) { + return SWIG_RuntimeError; + } + obj = PyUnicode_AsUTF8String(obj); + if (!obj) + return SWIG_TypeError; + if (PyString_AsStringAndSize(obj, &cstr, &len) != -1) { + if (cptr) { + if (alloc) *alloc = SWIG_NEWOBJ; + *cptr = reinterpret_cast< char* >(memcpy(new char[len + 1], cstr, sizeof(char)*(len + 1))); + } + if (psize) *psize = len + 1; + + Py_XDECREF(obj); + return SWIG_OK; + } else { + Py_XDECREF(obj); + } + } +#endif +#endif + + swig_type_info* pchar_descriptor = SWIG_pchar_descriptor(); + if (pchar_descriptor) { + void* vptr = 0; + if (SWIG_ConvertPtr(obj, &vptr, pchar_descriptor, 0) == SWIG_OK) { + if (cptr) *cptr = (char *) vptr; + if (psize) *psize = vptr ? (strlen((char *)vptr) + 1) : 0; + if (alloc) *alloc = SWIG_OLDOBJ; + return SWIG_OK; + } + } + } + return SWIG_TypeError; +} + + + + + +/* Getting isfinite working pre C99 across multiple platforms is non-trivial. Users can provide SWIG_isfinite on older platforms. */ +#ifndef SWIG_isfinite +/* isfinite() is a macro for C99 */ +# if defined(isfinite) +# define SWIG_isfinite(X) (isfinite(X)) +# elif defined(__cplusplus) && __cplusplus >= 201103L +/* Use a template so that this works whether isfinite() is std::isfinite() or + * in the global namespace. The reality seems to vary between compiler + * versions. + * + * Make sure namespace std exists to avoid compiler warnings. + * + * extern "C++" is required as this fragment can end up inside an extern "C" { } block + */ +namespace std { } +extern "C++" template +inline int SWIG_isfinite_func(T x) { + using namespace std; + return isfinite(x); +} +# define SWIG_isfinite(X) (SWIG_isfinite_func(X)) +# elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2)) +# define SWIG_isfinite(X) (__builtin_isfinite(X)) +# elif defined(_MSC_VER) +# define SWIG_isfinite(X) (_finite(X)) +# elif defined(__sun) && defined(__SVR4) +# include +# define SWIG_isfinite(X) (finite(X)) +# endif +#endif + + +/* Accept infinite as a valid float value unless we are unable to check if a value is finite */ +#ifdef SWIG_isfinite +# define SWIG_Float_Overflow_Check(X) ((X < -FLT_MAX || X > FLT_MAX) && SWIG_isfinite(X)) +#else +# define SWIG_Float_Overflow_Check(X) ((X < -FLT_MAX || X > FLT_MAX)) +#endif + + +SWIGINTERN int +SWIG_AsVal_float (PyObject * obj, float *val) +{ + double v; + int res = SWIG_AsVal_double (obj, &v); + if (SWIG_IsOK(res)) { + if (SWIG_Float_Overflow_Check(v)) { + return SWIG_OverflowError; + } else { + if (val) *val = static_cast< float >(v); + } + } + return res; +} + + +SWIGINTERNINLINE PyObject* + SWIG_From_int (int value) +{ + return PyInt_FromLong((long) value); +} + + +SWIGINTERNINLINE PyObject* + SWIG_From_bool (bool value) +{ + return PyBool_FromLong(value ? 1 : 0); +} + +SWIGINTERN sentencepiece::util::Status sentencepiece_SentencePieceProcessor_LoadFromFile(sentencepiece::SentencePieceProcessor *self,absl::string_view arg){ + return self->Load(arg); } -SWIGINTERN std::string sentencepiece_SentencePieceProcessor_DecodeIdsWithCheck(sentencepiece::SentencePieceProcessor const *self,std::vector< int > const &ids){ - for (int id : ids) - if (id < 0 || id >= self->GetPieceSize()) - throw sentencepiece::util::Status( - sentencepiece::util::StatusCode::kOutOfRange, - "piece id is out of range."); + +SWIGINTERN int +SWIG_AsVal_bool (PyObject *obj, bool *val) +{ + int r; + if (!PyBool_Check(obj)) + return SWIG_ERROR; + r = PyObject_IsTrue(obj); + if (r == -1) + return SWIG_ERROR; + if (val) *val = r ? true : false; + return SWIG_OK; +} + +SWIGINTERN std::vector< int > sentencepiece_SentencePieceProcessor__EncodeAsIds(sentencepiece::SentencePieceProcessor const *self,absl::string_view text,bool enable_sampling,int nbest_size,float alpha,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){ + auto ids = enable_sampling ? + self->SampleEncodeAsIds(text, nbest_size, alpha) : + self->EncodeAsIds(text); + RewriteIds(*self, &ids, add_bos, add_eos, reverse, emit_unk_piece); + return ids; + } +SWIGINTERN std::vector< std::string > sentencepiece_SentencePieceProcessor__EncodeAsPieces(sentencepiece::SentencePieceProcessor const *self,absl::string_view text,bool enable_sampling,int nbest_size,float alpha,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){ + auto pieces = enable_sampling ? + self->SampleEncodeAsPieces(text, nbest_size, alpha) : + self->EncodeAsPieces(text); + RewriteIds(*self, &pieces, add_bos, add_eos, reverse, emit_unk_piece); + return pieces; + } +SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceProcessor__EncodeAsSerializedProto(sentencepiece::SentencePieceProcessor const *self,absl::string_view text,bool enable_sampling,int nbest_size,float alpha,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){ + auto proto = enable_sampling ? + self->SampleEncodeAsSerializedProto(text, nbest_size, alpha) : + self->EncodeAsSerializedProto(text); + RewriteIds(*self, &proto, add_bos, add_eos, reverse, emit_unk_piece); + return proto; + } +SWIGINTERN sentencepiece::ImmutableSentencePieceText sentencepiece_SentencePieceProcessor__EncodeAsImmutableProto(sentencepiece::SentencePieceProcessor const *self,absl::string_view text,bool enable_sampling,int nbest_size,float alpha,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){ + auto proto = enable_sampling ? + self->SampleEncodeAsImmutableProto(text, nbest_size, alpha) : + self->EncodeAsImmutableProto(text); + proto.ConvertToUnicodeSpans(); + RewriteIds(*self, &proto, add_bos, add_eos, reverse, emit_unk_piece); + return proto; + } +SWIGINTERN std::vector< std::vector< int > > sentencepiece_SentencePieceProcessor__EncodeAsIdsBatch(sentencepiece::SentencePieceProcessor const *self,std::vector< absl::string_view > const &ins,int num_threads,bool enable_sampling,int nbest_size,float alpha,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){ + DEFINE_ENCODE_BATCH_FUNC_IMPL(EncodeAsIds, + absl::string_view, std::vector); + } +SWIGINTERN std::vector< std::vector< std::string > > sentencepiece_SentencePieceProcessor__EncodeAsPiecesBatch(sentencepiece::SentencePieceProcessor const *self,std::vector< absl::string_view > const &ins,int num_threads,bool enable_sampling,int nbest_size,float alpha,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){ + DEFINE_ENCODE_BATCH_FUNC_IMPL(EncodeAsPieces, + absl::string_view, std::vector); + } +SWIGINTERN BytesArray sentencepiece_SentencePieceProcessor__EncodeAsSerializedProtoBatch(sentencepiece::SentencePieceProcessor const *self,std::vector< absl::string_view > const &ins,int num_threads,bool enable_sampling,int nbest_size,float alpha,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){ + DEFINE_ENCODE_BATCH_FUNC_IMPL(EncodeAsSerializedProto, + absl::string_view, + sentencepiece::util::bytes); + } +SWIGINTERN std::vector< sentencepiece::ImmutableSentencePieceText > sentencepiece_SentencePieceProcessor__EncodeAsImmutableProtoBatch(sentencepiece::SentencePieceProcessor const *self,std::vector< absl::string_view > const &ins,int num_threads,bool enable_sampling,int nbest_size,float alpha,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){ + DEFINE_ENCODE_BATCH_FUNC_IMPL(EncodeAsImmutableProto, + absl::string_view, + sentencepiece::ImmutableSentencePieceText); + } +SWIGINTERN std::string sentencepiece_SentencePieceProcessor__DecodeIds(sentencepiece::SentencePieceProcessor const *self,std::vector< int > const &ids){ + CheckIds(ids, self->GetPieceSize()); return self->DecodeIds(ids); } -SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceProcessor_DecodeIdsAsSerializedProtoWithCheck(sentencepiece::SentencePieceProcessor const *self,std::vector< int > const &ids){ - for (int id : ids) - if (id < 0 || id >= self->GetPieceSize()) - throw sentencepiece::util::Status( - sentencepiece::util::StatusCode::kOutOfRange, - "piece id is out of range."); +SWIGINTERN std::string sentencepiece_SentencePieceProcessor__DecodePieces(sentencepiece::SentencePieceProcessor const *self,std::vector< absl::string_view > const &pieces){ + return self->DecodePieces(pieces); + } +SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceProcessor__DecodeIdsAsSerializedProto(sentencepiece::SentencePieceProcessor const *self,std::vector< int > const &ids){ + CheckIds(ids, self->GetPieceSize()); return self->DecodeIdsAsSerializedProto(ids); } +SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceProcessor__DecodePiecesAsSerializedProto(sentencepiece::SentencePieceProcessor const *self,std::vector< absl::string_view > const &pieces){ + CheckIds(pieces, self->GetPieceSize()); + return self->DecodePiecesAsSerializedProto(pieces); + } +SWIGINTERN sentencepiece::ImmutableSentencePieceText sentencepiece_SentencePieceProcessor__DecodeIdsAsImmutableProto(sentencepiece::SentencePieceProcessor const *self,std::vector< int > const &ids){ + CheckIds(ids, self->GetPieceSize()); + auto proto = self->DecodeIdsAsImmutableProto(ids); + proto.ConvertToUnicodeSpans(); + return proto; + } +SWIGINTERN sentencepiece::ImmutableSentencePieceText sentencepiece_SentencePieceProcessor__DecodePiecesAsImmutableProto(sentencepiece::SentencePieceProcessor const *self,std::vector< absl::string_view > const &pieces){ + CheckIds(pieces, self->GetPieceSize()); + auto proto= self->DecodePiecesAsImmutableProto(pieces); + proto.ConvertToUnicodeSpans(); + return proto; + } +SWIGINTERN std::vector< std::string > sentencepiece_SentencePieceProcessor__DecodeIdsBatch(sentencepiece::SentencePieceProcessor const *self,std::vector< std::vector< int > > const &ins,int num_threads){ + DEFINE_DECODE_BATCH_FUNC_IMPL(DecodeIds, int, std::string); + } +SWIGINTERN BytesArray sentencepiece_SentencePieceProcessor__DecodeIdsAsSerializedProtoBatch(sentencepiece::SentencePieceProcessor const *self,std::vector< std::vector< int > > const &ins,int num_threads){ + DEFINE_DECODE_BATCH_FUNC_IMPL(DecodeIdsAsSerializedProto, int, + sentencepiece::util::bytes); + } +SWIGINTERN std::vector< sentencepiece::ImmutableSentencePieceText > sentencepiece_SentencePieceProcessor__DecodeIdsAsImmutableProtoBatch(sentencepiece::SentencePieceProcessor const *self,std::vector< std::vector< int > > const &ins,int num_threads){ + DEFINE_DECODE_BATCH_FUNC_IMPL(DecodeIdsAsImmutableProto, int, + sentencepiece::ImmutableSentencePieceText); + } +SWIGINTERN std::vector< std::string > sentencepiece_SentencePieceProcessor__DecodePiecesBatch(sentencepiece::SentencePieceProcessor const *self,std::vector< std::vector< absl::string_view > > const &ins,int num_threads){ + DEFINE_DECODE_BATCH_FUNC_IMPL(DecodePieces, std::string, std::string); + } +SWIGINTERN BytesArray sentencepiece_SentencePieceProcessor__DecodePiecesAsSerializedProtoBatch(sentencepiece::SentencePieceProcessor const *self,std::vector< std::vector< absl::string_view > > const &ins,int num_threads){ + DEFINE_DECODE_BATCH_FUNC_IMPL(DecodePiecesAsSerializedProto, std::string, + sentencepiece::util::bytes); + } +SWIGINTERN std::vector< sentencepiece::ImmutableSentencePieceText > sentencepiece_SentencePieceProcessor__DecodePiecesAsImmutableProtoBatch(sentencepiece::SentencePieceProcessor const *self,std::vector< std::vector< absl::string_view > > const &ins,int num_threads){ + DEFINE_DECODE_BATCH_FUNC_IMPL(DecodePiecesAsImmutableProto, std::string, + sentencepiece::ImmutableSentencePieceText); + } +SWIGINTERN std::vector< std::vector< int > > sentencepiece_SentencePieceProcessor__NBestEncodeAsIds(sentencepiece::SentencePieceProcessor const *self,absl::string_view text,int nbest_size,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){ + auto idss = self->NBestEncodeAsIds(text, nbest_size); + for (auto &ids : idss) { + RewriteIds(*self, &ids, add_bos, add_eos, reverse, emit_unk_piece); + } + return idss; + } +SWIGINTERN std::vector< std::vector< std::string > > sentencepiece_SentencePieceProcessor__NBestEncodeAsPieces(sentencepiece::SentencePieceProcessor const *self,absl::string_view text,int nbest_size,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){ + auto piecess = self->NBestEncodeAsPieces(text, nbest_size); + for (auto &pieces : piecess) { + RewriteIds(*self, &pieces, add_bos, add_eos, reverse, emit_unk_piece); + } + return piecess; + } +SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceProcessor__NBestEncodeAsSerializedProto(sentencepiece::SentencePieceProcessor const *self,absl::string_view text,int nbest_size,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){ + RewriteIds(*self, static_cast(nullptr), + add_bos, add_eos, reverse, emit_unk_piece); + return self->NBestEncodeAsSerializedProto(text, nbest_size); + } +SWIGINTERN sentencepiece::ImmutableNBestSentencePieceText sentencepiece_SentencePieceProcessor__NBestEncodeAsImmutableProto(sentencepiece::SentencePieceProcessor const *self,absl::string_view text,int nbest_size,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){ + RewriteIds(*self, static_cast(nullptr), + add_bos, add_eos, reverse, emit_unk_piece); + auto proto = self->NBestEncodeAsImmutableProto(text, nbest_size); + proto.ConvertToUnicodeSpans(); + return proto; + } +SWIGINTERN std::vector< std::pair< std::vector< int >,float > > sentencepiece_SentencePieceProcessor__SampleEncodeAndScoreAsIds(sentencepiece::SentencePieceProcessor const *self,absl::string_view text,int num_samples,float alpha,bool wor,bool include_best,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){ + auto idss = self->SampleEncodeAndScoreAsIds(text, num_samples, + alpha, wor, include_best); + for (auto &ids : idss) { + RewriteIds(*self, &ids.first, add_bos, add_eos, reverse, emit_unk_piece); + } + return idss; + } +SWIGINTERN std::vector< std::pair< std::vector< std::string >,float > > sentencepiece_SentencePieceProcessor__SampleEncodeAndScoreAsPieces(sentencepiece::SentencePieceProcessor const *self,absl::string_view text,int num_samples,float alpha,bool wor,bool include_best,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){ + auto piecess = self->SampleEncodeAndScoreAsPieces(text, num_samples, + alpha, wor, include_best); + for (auto &pieces : piecess) { + RewriteIds(*self, &pieces.first, add_bos, add_eos, reverse, emit_unk_piece); + } + return piecess; + } +SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto(sentencepiece::SentencePieceProcessor const *self,absl::string_view text,int num_samples,float alpha,bool wor,bool include_best,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){ + RewriteIds(*self, static_cast(nullptr), + add_bos, add_eos, reverse, emit_unk_piece); + return self->SampleEncodeAndScoreAsSerializedProto(text, num_samples, + alpha, wor, include_best); + } +SWIGINTERN sentencepiece::ImmutableNBestSentencePieceText sentencepiece_SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto(sentencepiece::SentencePieceProcessor const *self,absl::string_view text,int num_samples,float alpha,bool wor,bool include_best,bool add_bos,bool add_eos,bool reverse,bool emit_unk_piece){ + RewriteIds(*self, static_cast(nullptr), + add_bos, add_eos, reverse, emit_unk_piece); + auto proto = self->SampleEncodeAndScoreAsImmutableProto(text, num_samples, + alpha, wor, include_best); + proto.ConvertToUnicodeSpans(); + return proto; + } +SWIGINTERN float sentencepiece_SentencePieceProcessor__CalculateEntropy(sentencepiece::SentencePieceProcessor *self,absl::string_view text,float alpha){ + return self->CalculateEntropy(text, alpha); + } +SWIGINTERN std::vector< float > sentencepiece_SentencePieceProcessor__CalculateEntropyBatch(sentencepiece::SentencePieceProcessor *self,std::vector< absl::string_view > const &ins,float alpha,int num_threads){ + std::vector outs(ins.size()); + InitNumThreads(ins, &num_threads); + { + ThreadPool pool(ins.size()); + std::atomic index = 0; + for (int n = 0; n < num_threads; ++n) { + pool.Schedule([&]() { + size_t i = 0; + while ((i = std::atomic_fetch_add(&index, 1)) < outs.size()) { + outs[i] = self->CalculateEntropy(ins[i], alpha); + } + }); + } + } + return outs; + } SWIGINTERN int SWIG_AsVal_unsigned_SS_long (PyObject *obj, unsigned long *val) @@ -3395,41 +4118,41 @@ SWIGINTERN sentencepiece::util::bytes sentencepiece_SentencePieceTrainer__TrainF #ifdef __cplusplus extern "C" { #endif -SWIGINTERN PyObject *_wrap_new_SentencePieceProcessor(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_new_ImmutableSentencePieceText_ImmutableSentencePiece(PyObject *self, PyObject *args) { PyObject *resultobj = 0; - sentencepiece::SentencePieceProcessor *result = 0 ; + sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *result = 0 ; - if (!SWIG_Python_UnpackTuple(args, "new_SentencePieceProcessor", 0, 0, 0)) SWIG_fail; + if (!SWIG_Python_UnpackTuple(args, "new_ImmutableSentencePieceText_ImmutableSentencePiece", 0, 0, 0)) SWIG_fail; { try { - result = (sentencepiece::SentencePieceProcessor *)new sentencepiece::SentencePieceProcessor(); + result = (sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *)new sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece(); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); } } - resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_sentencepiece__SentencePieceProcessor, SWIG_POINTER_NEW | 0 ); + resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece, SWIG_POINTER_NEW | 0 ); return resultobj; fail: return NULL; } -SWIGINTERN PyObject *_wrap_delete_SentencePieceProcessor(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_delete_ImmutableSentencePieceText_ImmutableSentencePiece(PyObject *self, PyObject *args) { PyObject *resultobj = 0; - sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *arg1 = (sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *) 0 ; void *argp1 = 0 ; int res1 = 0 ; PyObject *swig_obj[1] ; if (!args) SWIG_fail; swig_obj[0] = args; - res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, SWIG_POINTER_DISOWN | 0 ); + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece, SWIG_POINTER_DISOWN | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_SentencePieceProcessor" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor *""'"); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_ImmutableSentencePieceText_ImmutableSentencePiece" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *""'"); } - arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece * >(argp1); { try { delete arg1; @@ -3446,33 +4169,24 @@ SWIGINTERN PyObject *_wrap_delete_SentencePieceProcessor(PyObject *SWIGUNUSEDPAR } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_LoadFromSerializedProto(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_ImmutableSentencePiece__piece(PyObject *self, PyObject *args) { PyObject *resultobj = 0; - sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; - absl::string_view arg2 ; + sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *arg1 = (sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *) 0 ; void *argp1 = 0 ; int res1 = 0 ; - PyObject *swig_obj[2] ; - sentencepiece::util::Status result; + PyObject *swig_obj[1] ; + std::string *result = 0 ; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_LoadFromSerializedProto", 2, 2, swig_obj)) SWIG_fail; - res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!args) SWIG_fail; + swig_obj[0] = args; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_LoadFromSerializedProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor *""'"); - } - arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); - { - const PyInputString ustring(swig_obj[1]); - if (!ustring.IsAvalable()) { - PyErr_SetString(PyExc_TypeError, "not a string"); - SWIG_fail; - } - resultobj = ustring.input_type(); - arg2 = absl::string_view(ustring.data(), ustring.size()); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableSentencePieceText_ImmutableSentencePiece__piece" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece const *""'"); } + arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece * >(argp1); { try { - result = (arg1)->LoadFromSerializedProto(arg2); + result = (std::string *) &((sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece const *)arg1)->piece(); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { @@ -3480,10 +4194,8 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_LoadFromSerializedProto(PyObje } } { - if (!(&result)->ok()) { - SWIG_exception(ToSwigError((&result)->code()), (&result)->ToString().c_str()); - } - resultobj = SWIG_From_bool((&result)->ok()); + PyObject *input_type = resultobj; + resultobj = MakePyOutputString(*result, input_type); } return resultobj; fail: @@ -3491,33 +4203,24 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_LoadFromSerializedProto(PyObje } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_SetEncodeExtraOptions(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_ImmutableSentencePiece__surface(PyObject *self, PyObject *args) { PyObject *resultobj = 0; - sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; - absl::string_view arg2 ; + sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *arg1 = (sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *) 0 ; void *argp1 = 0 ; int res1 = 0 ; - PyObject *swig_obj[2] ; - sentencepiece::util::Status result; + PyObject *swig_obj[1] ; + std::string *result = 0 ; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_SetEncodeExtraOptions", 2, 2, swig_obj)) SWIG_fail; - res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!args) SWIG_fail; + swig_obj[0] = args; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_SetEncodeExtraOptions" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor *""'"); - } - arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); - { - const PyInputString ustring(swig_obj[1]); - if (!ustring.IsAvalable()) { - PyErr_SetString(PyExc_TypeError, "not a string"); - SWIG_fail; - } - resultobj = ustring.input_type(); - arg2 = absl::string_view(ustring.data(), ustring.size()); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableSentencePieceText_ImmutableSentencePiece__surface" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece const *""'"); } + arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece * >(argp1); { try { - result = (arg1)->SetEncodeExtraOptions(arg2); + result = (std::string *) &((sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece const *)arg1)->surface(); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { @@ -3525,10 +4228,8 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_SetEncodeExtraOptions(PyObject } } { - if (!(&result)->ok()) { - SWIG_exception(ToSwigError((&result)->code()), (&result)->ToString().c_str()); - } - resultobj = SWIG_From_bool((&result)->ok()); + PyObject *input_type = resultobj; + resultobj = MakePyOutputString(*result, input_type); } return resultobj; fail: @@ -3536,350 +4237,248 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_SetEncodeExtraOptions(PyObject } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_SetDecodeExtraOptions(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_ImmutableSentencePiece__id(PyObject *self, PyObject *args) { PyObject *resultobj = 0; - sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; - absl::string_view arg2 ; + sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *arg1 = (sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *) 0 ; void *argp1 = 0 ; int res1 = 0 ; - PyObject *swig_obj[2] ; - sentencepiece::util::Status result; + PyObject *swig_obj[1] ; + uint32_t result; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_SetDecodeExtraOptions", 2, 2, swig_obj)) SWIG_fail; - res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!args) SWIG_fail; + swig_obj[0] = args; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_SetDecodeExtraOptions" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor *""'"); - } - arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); - { - const PyInputString ustring(swig_obj[1]); - if (!ustring.IsAvalable()) { - PyErr_SetString(PyExc_TypeError, "not a string"); - SWIG_fail; - } - resultobj = ustring.input_type(); - arg2 = absl::string_view(ustring.data(), ustring.size()); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableSentencePieceText_ImmutableSentencePiece__id" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece const *""'"); } + arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece * >(argp1); { try { - result = (arg1)->SetDecodeExtraOptions(arg2); + result = ((sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece const *)arg1)->id(); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); } } - { - if (!(&result)->ok()) { - SWIG_exception(ToSwigError((&result)->code()), (&result)->ToString().c_str()); - } - resultobj = SWIG_From_bool((&result)->ok()); - } + resultobj = SWIG_From_unsigned_SS_int(static_cast< unsigned int >(result)); return resultobj; fail: return NULL; } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_SetVocabulary(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_ImmutableSentencePiece__begin(PyObject *self, PyObject *args) { PyObject *resultobj = 0; - sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; - std::vector< std::string > *arg2 = 0 ; + sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *arg1 = (sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *) 0 ; void *argp1 = 0 ; int res1 = 0 ; - PyObject *swig_obj[2] ; - sentencepiece::util::Status result; + PyObject *swig_obj[1] ; + uint32_t result; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_SetVocabulary", 2, 2, swig_obj)) SWIG_fail; - res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!args) SWIG_fail; + swig_obj[0] = args; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_SetVocabulary" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor *""'"); - } - arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); - { - std::vector *out = nullptr; - if (PyList_Check(swig_obj[1])) { - const size_t size = PyList_Size(swig_obj[1]); - out = new std::vector(size); - for (size_t i = 0; i < size; ++i) { - const PyInputString ustring(PyList_GetItem(swig_obj[1], i)); - if (ustring.IsAvalable()) { - (*out)[i] = std::string(ustring.data(), ustring.size()); - } else { - PyErr_SetString(PyExc_TypeError, "list must contain strings"); - SWIG_fail; - } - resultobj = ustring.input_type(); - } - } else { - PyErr_SetString(PyExc_TypeError, "not a list"); - SWIG_fail; - } - arg2 = out; + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableSentencePieceText_ImmutableSentencePiece__begin" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece const *""'"); } + arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece * >(argp1); { try { - result = (arg1)->SetVocabulary((std::vector< std::string > const &)*arg2); + result = ((sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece const *)arg1)->begin(); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); } } - { - if (!(&result)->ok()) { - SWIG_exception(ToSwigError((&result)->code()), (&result)->ToString().c_str()); - } - resultobj = SWIG_From_bool((&result)->ok()); - } - { - delete arg2; - } + resultobj = SWIG_From_unsigned_SS_int(static_cast< unsigned int >(result)); return resultobj; fail: - { - delete arg2; - } return NULL; } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_ResetVocabulary(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_ImmutableSentencePiece__end(PyObject *self, PyObject *args) { PyObject *resultobj = 0; - sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *arg1 = (sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *) 0 ; void *argp1 = 0 ; int res1 = 0 ; PyObject *swig_obj[1] ; - sentencepiece::util::Status result; + uint32_t result; if (!args) SWIG_fail; swig_obj[0] = args; - res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_ResetVocabulary" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor *""'"); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableSentencePieceText_ImmutableSentencePiece__end" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece const *""'"); } - arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece * >(argp1); { try { - result = (arg1)->ResetVocabulary(); + result = ((sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece const *)arg1)->end(); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); } } - { - if (!(&result)->ok()) { - SWIG_exception(ToSwigError((&result)->code()), (&result)->ToString().c_str()); - } - resultobj = SWIG_From_bool((&result)->ok()); - } + resultobj = SWIG_From_unsigned_SS_int(static_cast< unsigned int >(result)); return resultobj; fail: return NULL; } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_LoadVocabulary(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *ImmutableSentencePieceText_ImmutableSentencePiece_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { + PyObject *obj; + if (!SWIG_Python_UnpackTuple(args, "swigregister", 1, 1, &obj)) return NULL; + SWIG_TypeNewClientData(SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece, SWIG_NewClientData(obj)); + return SWIG_Py_Void(); +} + +SWIGINTERN PyObject *ImmutableSentencePieceText_ImmutableSentencePiece_swiginit(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { + return SWIG_Python_InitShadowInstance(args); +} + +SWIGINTERN PyObject *_wrap_new_ImmutableSentencePieceText(PyObject *self, PyObject *args) { PyObject *resultobj = 0; - sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; - absl::string_view arg2 ; - int arg3 ; - void *argp1 = 0 ; - int res1 = 0 ; - int val3 ; - int ecode3 = 0 ; - PyObject *swig_obj[3] ; - sentencepiece::util::Status result; + sentencepiece::ImmutableSentencePieceText *result = 0 ; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_LoadVocabulary", 3, 3, swig_obj)) SWIG_fail; - res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); - if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_LoadVocabulary" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor *""'"); - } - arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); - { - const PyInputString ustring(swig_obj[1]); - if (!ustring.IsAvalable()) { - PyErr_SetString(PyExc_TypeError, "not a string"); - SWIG_fail; - } - resultobj = ustring.input_type(); - arg2 = absl::string_view(ustring.data(), ustring.size()); - } - ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); - if (!SWIG_IsOK(ecode3)) { - SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor_LoadVocabulary" "', argument " "3"" of type '" "int""'"); - } - arg3 = static_cast< int >(val3); + if (!SWIG_Python_UnpackTuple(args, "new_ImmutableSentencePieceText", 0, 0, 0)) SWIG_fail; { try { - result = (arg1)->LoadVocabulary(arg2,arg3); + result = (sentencepiece::ImmutableSentencePieceText *)new sentencepiece::ImmutableSentencePieceText(); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); } } - { - if (!(&result)->ok()) { - SWIG_exception(ToSwigError((&result)->code()), (&result)->ToString().c_str()); - } - resultobj = SWIG_From_bool((&result)->ok()); - } + resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, SWIG_POINTER_NEW | 0 ); return resultobj; fail: return NULL; } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_SetEncoderVersion(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_delete_ImmutableSentencePieceText(PyObject *self, PyObject *args) { PyObject *resultobj = 0; - sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; - sentencepiece::EncoderVersion arg2 ; + sentencepiece::ImmutableSentencePieceText *arg1 = (sentencepiece::ImmutableSentencePieceText *) 0 ; void *argp1 = 0 ; int res1 = 0 ; - int val2 ; - int ecode2 = 0 ; - PyObject *swig_obj[2] ; - sentencepiece::util::Status result; + PyObject *swig_obj[1] ; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_SetEncoderVersion", 2, 2, swig_obj)) SWIG_fail; - res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!args) SWIG_fail; + swig_obj[0] = args; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, SWIG_POINTER_DISOWN | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_SetEncoderVersion" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor *""'"); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_ImmutableSentencePieceText" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText *""'"); } - arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); - ecode2 = SWIG_AsVal_int(swig_obj[1], &val2); - if (!SWIG_IsOK(ecode2)) { - SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "SentencePieceProcessor_SetEncoderVersion" "', argument " "2"" of type '" "sentencepiece::EncoderVersion""'"); - } - arg2 = static_cast< sentencepiece::EncoderVersion >(val2); + arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText * >(argp1); { try { - result = (arg1)->SetEncoderVersion(arg2); + delete arg1; ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); } } - { - if (!(&result)->ok()) { - SWIG_exception(ToSwigError((&result)->code()), (&result)->ToString().c_str()); - } - resultobj = SWIG_From_bool((&result)->ok()); - } + resultobj = SWIG_Py_Void(); return resultobj; fail: return NULL; } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_GetEncoderVersion(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText__pieces_size(PyObject *self, PyObject *args) { PyObject *resultobj = 0; - sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + sentencepiece::ImmutableSentencePieceText *arg1 = (sentencepiece::ImmutableSentencePieceText *) 0 ; void *argp1 = 0 ; int res1 = 0 ; PyObject *swig_obj[1] ; - sentencepiece::EncoderVersion result; + size_t result; if (!args) SWIG_fail; swig_obj[0] = args; - res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_GetEncoderVersion" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableSentencePieceText__pieces_size" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText const *""'"); } - arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText * >(argp1); { try { - result = (sentencepiece::EncoderVersion)((sentencepiece::SentencePieceProcessor const *)arg1)->GetEncoderVersion(); + result = ((sentencepiece::ImmutableSentencePieceText const *)arg1)->pieces_size(); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); } } - resultobj = SWIG_From_int(static_cast< int >(result)); + resultobj = SWIG_From_size_t(static_cast< size_t >(result)); return resultobj; fail: return NULL; } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_EncodeAsPieces(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText__pieces(PyObject *self, PyObject *args) { PyObject *resultobj = 0; - sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; - absl::string_view arg2 ; + sentencepiece::ImmutableSentencePieceText *arg1 = (sentencepiece::ImmutableSentencePieceText *) 0 ; + int arg2 ; void *argp1 = 0 ; int res1 = 0 ; + int val2 ; + int ecode2 = 0 ; PyObject *swig_obj[2] ; - std::vector< std::string > result; + sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece result; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_EncodeAsPieces", 2, 2, swig_obj)) SWIG_fail; - res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_Python_UnpackTuple(args, "ImmutableSentencePieceText__pieces", 2, 2, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_EncodeAsPieces" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); - } - arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); - { - const PyInputString ustring(swig_obj[1]); - if (!ustring.IsAvalable()) { - PyErr_SetString(PyExc_TypeError, "not a string"); - SWIG_fail; - } - resultobj = ustring.input_type(); - arg2 = absl::string_view(ustring.data(), ustring.size()); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableSentencePieceText__pieces" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText const *""'"); } + arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText * >(argp1); + ecode2 = SWIG_AsVal_int(swig_obj[1], &val2); + if (!SWIG_IsOK(ecode2)) { + SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "ImmutableSentencePieceText__pieces" "', argument " "2"" of type '" "int""'"); + } + arg2 = static_cast< int >(val2); { try { - result = ((sentencepiece::SentencePieceProcessor const *)arg1)->EncodeAsPieces(arg2); + result = ((sentencepiece::ImmutableSentencePieceText const *)arg1)->pieces(arg2); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); } } - { - PyObject *input_type = resultobj; - resultobj = PyList_New((&result)->size()); - for (size_t i = 0; i < (&result)->size(); ++i) { - PyList_SetItem(resultobj, i, MakePyOutputString(result[i], input_type)); - } - } + resultobj = SWIG_NewPointerObj((new sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece(result)), SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece, SWIG_POINTER_OWN | 0 ); return resultobj; fail: return NULL; } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_EncodeAsIds(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText__text(PyObject *self, PyObject *args) { PyObject *resultobj = 0; - sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; - absl::string_view arg2 ; + sentencepiece::ImmutableSentencePieceText *arg1 = (sentencepiece::ImmutableSentencePieceText *) 0 ; void *argp1 = 0 ; int res1 = 0 ; - PyObject *swig_obj[2] ; - std::vector< int > result; + PyObject *swig_obj[1] ; + std::string *result = 0 ; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_EncodeAsIds", 2, 2, swig_obj)) SWIG_fail; - res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!args) SWIG_fail; + swig_obj[0] = args; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_EncodeAsIds" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); - } - arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); - { - const PyInputString ustring(swig_obj[1]); - if (!ustring.IsAvalable()) { - PyErr_SetString(PyExc_TypeError, "not a string"); - SWIG_fail; - } - resultobj = ustring.input_type(); - arg2 = absl::string_view(ustring.data(), ustring.size()); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableSentencePieceText__text" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText const *""'"); } + arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText * >(argp1); { try { - result = ((sentencepiece::SentencePieceProcessor const *)arg1)->EncodeAsIds(arg2); + result = (std::string *) &((sentencepiece::ImmutableSentencePieceText const *)arg1)->text(); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { @@ -3887,10 +4486,8 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_EncodeAsIds(PyObject *SWIGUNUS } } { - resultobj = PyList_New((&result)->size()); - for (size_t i = 0; i < (&result)->size(); ++i) { - PyList_SetItem(resultobj, i, PyInt_FromLong(static_cast(result[i]))); - } + PyObject *input_type = resultobj; + resultobj = MakePyOutputString(*result, input_type); } return resultobj; fail: @@ -3898,99 +4495,55 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_EncodeAsIds(PyObject *SWIGUNUS } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_NBestEncodeAsPieces(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText__score(PyObject *self, PyObject *args) { PyObject *resultobj = 0; - sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; - absl::string_view arg2 ; - int arg3 ; + sentencepiece::ImmutableSentencePieceText *arg1 = (sentencepiece::ImmutableSentencePieceText *) 0 ; void *argp1 = 0 ; int res1 = 0 ; - int val3 ; - int ecode3 = 0 ; - PyObject *swig_obj[3] ; - std::vector< std::vector< std::string > > result; + PyObject *swig_obj[1] ; + float result; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_NBestEncodeAsPieces", 3, 3, swig_obj)) SWIG_fail; - res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!args) SWIG_fail; + swig_obj[0] = args; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_NBestEncodeAsPieces" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); - } - arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); - { - const PyInputString ustring(swig_obj[1]); - if (!ustring.IsAvalable()) { - PyErr_SetString(PyExc_TypeError, "not a string"); - SWIG_fail; - } - resultobj = ustring.input_type(); - arg2 = absl::string_view(ustring.data(), ustring.size()); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableSentencePieceText__score" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText const *""'"); } - ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); - if (!SWIG_IsOK(ecode3)) { - SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor_NBestEncodeAsPieces" "', argument " "3"" of type '" "int""'"); - } - arg3 = static_cast< int >(val3); + arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText * >(argp1); { try { - result = ((sentencepiece::SentencePieceProcessor const *)arg1)->NBestEncodeAsPieces(arg2,arg3); + result = (float)((sentencepiece::ImmutableSentencePieceText const *)arg1)->score(); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); } } - { - PyObject *input_type = resultobj; - resultobj = PyList_New((&result)->size()); - for (size_t i = 0; i < (&result)->size(); ++i) { - PyObject *obj = PyList_New(result[i].size()); - for (size_t j = 0; j < result[i].size(); ++j) { - PyList_SetItem(obj, j, MakePyOutputString(result[i][j], input_type)); - } - PyList_SetItem(resultobj, i, obj); - } - } + resultobj = SWIG_From_float(static_cast< float >(result)); return resultobj; fail: return NULL; } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_NBestEncodeAsIds(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_ImmutableSentencePieceText_SerializeAsString(PyObject *self, PyObject *args) { PyObject *resultobj = 0; - sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; - absl::string_view arg2 ; - int arg3 ; + sentencepiece::ImmutableSentencePieceText *arg1 = (sentencepiece::ImmutableSentencePieceText *) 0 ; void *argp1 = 0 ; int res1 = 0 ; - int val3 ; - int ecode3 = 0 ; - PyObject *swig_obj[3] ; - std::vector< std::vector< int > > result; + PyObject *swig_obj[1] ; + sentencepiece::util::bytes result; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_NBestEncodeAsIds", 3, 3, swig_obj)) SWIG_fail; - res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!args) SWIG_fail; + swig_obj[0] = args; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_NBestEncodeAsIds" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableSentencePieceText_SerializeAsString" "', argument " "1"" of type '" "sentencepiece::ImmutableSentencePieceText const *""'"); } - arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); - { - const PyInputString ustring(swig_obj[1]); - if (!ustring.IsAvalable()) { - PyErr_SetString(PyExc_TypeError, "not a string"); - SWIG_fail; - } - resultobj = ustring.input_type(); - arg2 = absl::string_view(ustring.data(), ustring.size()); - } - ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); - if (!SWIG_IsOK(ecode3)) { - SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor_NBestEncodeAsIds" "', argument " "3"" of type '" "int""'"); - } - arg3 = static_cast< int >(val3); + arg1 = reinterpret_cast< sentencepiece::ImmutableSentencePieceText * >(argp1); { try { - result = ((sentencepiece::SentencePieceProcessor const *)arg1)->NBestEncodeAsIds(arg2,arg3); + result = ((sentencepiece::ImmutableSentencePieceText const *)arg1)->SerializeAsString(); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { @@ -3998,232 +4551,163 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_NBestEncodeAsIds(PyObject *SWI } } { - resultobj = PyList_New((&result)->size()); - for (size_t i = 0; i < (&result)->size(); ++i) { - PyObject *obj = PyList_New(result[i].size()); - for (size_t j = 0; j < result[i].size(); ++j) { - PyList_SetItem(obj, j, PyInt_FromLong(static_cast(result[i][j]))); - } - PyList_SetItem(resultobj, i, obj); + resultobj = MakePyOutputBytes(result); + } + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *ImmutableSentencePieceText_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { + PyObject *obj; + if (!SWIG_Python_UnpackTuple(args, "swigregister", 1, 1, &obj)) return NULL; + SWIG_TypeNewClientData(SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, SWIG_NewClientData(obj)); + return SWIG_Py_Void(); +} + +SWIGINTERN PyObject *ImmutableSentencePieceText_swiginit(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { + return SWIG_Python_InitShadowInstance(args); +} + +SWIGINTERN PyObject *_wrap_new_ImmutableNBestSentencePieceText(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::ImmutableNBestSentencePieceText *result = 0 ; + + if (!SWIG_Python_UnpackTuple(args, "new_ImmutableNBestSentencePieceText", 0, 0, 0)) SWIG_fail; + { + try { + result = (sentencepiece::ImmutableNBestSentencePieceText *)new sentencepiece::ImmutableNBestSentencePieceText(); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); } } + resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_sentencepiece__ImmutableNBestSentencePieceText, SWIG_POINTER_NEW | 0 ); return resultobj; fail: return NULL; } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_SampleEncodeAsPieces(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_delete_ImmutableNBestSentencePieceText(PyObject *self, PyObject *args) { PyObject *resultobj = 0; - sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; - absl::string_view arg2 ; - int arg3 ; - float arg4 ; + sentencepiece::ImmutableNBestSentencePieceText *arg1 = (sentencepiece::ImmutableNBestSentencePieceText *) 0 ; void *argp1 = 0 ; int res1 = 0 ; - int val3 ; - int ecode3 = 0 ; - float val4 ; - int ecode4 = 0 ; - PyObject *swig_obj[4] ; - std::vector< std::string > result; + PyObject *swig_obj[1] ; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_SampleEncodeAsPieces", 4, 4, swig_obj)) SWIG_fail; - res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!args) SWIG_fail; + swig_obj[0] = args; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableNBestSentencePieceText, SWIG_POINTER_DISOWN | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_SampleEncodeAsPieces" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); - } - arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); - { - const PyInputString ustring(swig_obj[1]); - if (!ustring.IsAvalable()) { - PyErr_SetString(PyExc_TypeError, "not a string"); - SWIG_fail; - } - resultobj = ustring.input_type(); - arg2 = absl::string_view(ustring.data(), ustring.size()); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_ImmutableNBestSentencePieceText" "', argument " "1"" of type '" "sentencepiece::ImmutableNBestSentencePieceText *""'"); } - ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); - if (!SWIG_IsOK(ecode3)) { - SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor_SampleEncodeAsPieces" "', argument " "3"" of type '" "int""'"); - } - arg3 = static_cast< int >(val3); - ecode4 = SWIG_AsVal_float(swig_obj[3], &val4); - if (!SWIG_IsOK(ecode4)) { - SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor_SampleEncodeAsPieces" "', argument " "4"" of type '" "float""'"); - } - arg4 = static_cast< float >(val4); + arg1 = reinterpret_cast< sentencepiece::ImmutableNBestSentencePieceText * >(argp1); { try { - result = ((sentencepiece::SentencePieceProcessor const *)arg1)->SampleEncodeAsPieces(arg2,arg3,arg4); + delete arg1; ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); } } - { - PyObject *input_type = resultobj; - resultobj = PyList_New((&result)->size()); - for (size_t i = 0; i < (&result)->size(); ++i) { - PyList_SetItem(resultobj, i, MakePyOutputString(result[i], input_type)); - } - } + resultobj = SWIG_Py_Void(); return resultobj; fail: return NULL; } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_SampleEncodeAsIds(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_ImmutableNBestSentencePieceText__nbests_size(PyObject *self, PyObject *args) { PyObject *resultobj = 0; - sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; - absl::string_view arg2 ; - int arg3 ; - float arg4 ; + sentencepiece::ImmutableNBestSentencePieceText *arg1 = (sentencepiece::ImmutableNBestSentencePieceText *) 0 ; void *argp1 = 0 ; int res1 = 0 ; - int val3 ; - int ecode3 = 0 ; - float val4 ; - int ecode4 = 0 ; - PyObject *swig_obj[4] ; - std::vector< int > result; + PyObject *swig_obj[1] ; + size_t result; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_SampleEncodeAsIds", 4, 4, swig_obj)) SWIG_fail; - res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!args) SWIG_fail; + swig_obj[0] = args; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableNBestSentencePieceText, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_SampleEncodeAsIds" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); - } - arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); - { - const PyInputString ustring(swig_obj[1]); - if (!ustring.IsAvalable()) { - PyErr_SetString(PyExc_TypeError, "not a string"); - SWIG_fail; - } - resultobj = ustring.input_type(); - arg2 = absl::string_view(ustring.data(), ustring.size()); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableNBestSentencePieceText__nbests_size" "', argument " "1"" of type '" "sentencepiece::ImmutableNBestSentencePieceText const *""'"); } - ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); - if (!SWIG_IsOK(ecode3)) { - SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor_SampleEncodeAsIds" "', argument " "3"" of type '" "int""'"); - } - arg3 = static_cast< int >(val3); - ecode4 = SWIG_AsVal_float(swig_obj[3], &val4); - if (!SWIG_IsOK(ecode4)) { - SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor_SampleEncodeAsIds" "', argument " "4"" of type '" "float""'"); - } - arg4 = static_cast< float >(val4); + arg1 = reinterpret_cast< sentencepiece::ImmutableNBestSentencePieceText * >(argp1); { try { - result = ((sentencepiece::SentencePieceProcessor const *)arg1)->SampleEncodeAsIds(arg2,arg3,arg4); + result = ((sentencepiece::ImmutableNBestSentencePieceText const *)arg1)->nbests_size(); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); } } - { - resultobj = PyList_New((&result)->size()); - for (size_t i = 0; i < (&result)->size(); ++i) { - PyList_SetItem(resultobj, i, PyInt_FromLong(static_cast(result[i]))); - } - } + resultobj = SWIG_From_size_t(static_cast< size_t >(result)); return resultobj; fail: return NULL; } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_DecodePieces(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_ImmutableNBestSentencePieceText__nbests(PyObject *self, PyObject *args) { PyObject *resultobj = 0; - sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; - std::vector< std::string > *arg2 = 0 ; + sentencepiece::ImmutableNBestSentencePieceText *arg1 = (sentencepiece::ImmutableNBestSentencePieceText *) 0 ; + int arg2 ; void *argp1 = 0 ; int res1 = 0 ; + int val2 ; + int ecode2 = 0 ; PyObject *swig_obj[2] ; - std::string result; + sentencepiece::ImmutableSentencePieceText result; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_DecodePieces", 2, 2, swig_obj)) SWIG_fail; - res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_Python_UnpackTuple(args, "ImmutableNBestSentencePieceText__nbests", 2, 2, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableNBestSentencePieceText, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_DecodePieces" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); - } - arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); - { - std::vector *out = nullptr; - if (PyList_Check(swig_obj[1])) { - const size_t size = PyList_Size(swig_obj[1]); - out = new std::vector(size); - for (size_t i = 0; i < size; ++i) { - const PyInputString ustring(PyList_GetItem(swig_obj[1], i)); - if (ustring.IsAvalable()) { - (*out)[i] = std::string(ustring.data(), ustring.size()); - } else { - PyErr_SetString(PyExc_TypeError, "list must contain strings"); - SWIG_fail; - } - resultobj = ustring.input_type(); - } - } else { - PyErr_SetString(PyExc_TypeError, "not a list"); - SWIG_fail; - } - arg2 = out; + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableNBestSentencePieceText__nbests" "', argument " "1"" of type '" "sentencepiece::ImmutableNBestSentencePieceText const *""'"); } + arg1 = reinterpret_cast< sentencepiece::ImmutableNBestSentencePieceText * >(argp1); + ecode2 = SWIG_AsVal_int(swig_obj[1], &val2); + if (!SWIG_IsOK(ecode2)) { + SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "ImmutableNBestSentencePieceText__nbests" "', argument " "2"" of type '" "int""'"); + } + arg2 = static_cast< int >(val2); { try { - result = ((sentencepiece::SentencePieceProcessor const *)arg1)->DecodePieces((std::vector< std::string > const &)*arg2); + result = ((sentencepiece::ImmutableNBestSentencePieceText const *)arg1)->nbests(arg2); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); } } - { - PyObject *input_type = resultobj; - resultobj = MakePyOutputString(result, input_type); - } - { - delete arg2; - } + resultobj = SWIG_NewPointerObj((new sentencepiece::ImmutableSentencePieceText(result)), SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, SWIG_POINTER_OWN | 0 ); return resultobj; fail: - { - delete arg2; - } return NULL; } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_EncodeAsSerializedProto(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_ImmutableNBestSentencePieceText_SerializeAsString(PyObject *self, PyObject *args) { PyObject *resultobj = 0; - sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; - absl::string_view arg2 ; + sentencepiece::ImmutableNBestSentencePieceText *arg1 = (sentencepiece::ImmutableNBestSentencePieceText *) 0 ; void *argp1 = 0 ; int res1 = 0 ; - PyObject *swig_obj[2] ; + PyObject *swig_obj[1] ; sentencepiece::util::bytes result; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_EncodeAsSerializedProto", 2, 2, swig_obj)) SWIG_fail; - res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!args) SWIG_fail; + swig_obj[0] = args; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__ImmutableNBestSentencePieceText, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_EncodeAsSerializedProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); - } - arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); - { - const PyInputString ustring(swig_obj[1]); - if (!ustring.IsAvalable()) { - PyErr_SetString(PyExc_TypeError, "not a string"); - SWIG_fail; - } - resultobj = ustring.input_type(); - arg2 = absl::string_view(ustring.data(), ustring.size()); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "ImmutableNBestSentencePieceText_SerializeAsString" "', argument " "1"" of type '" "sentencepiece::ImmutableNBestSentencePieceText const *""'"); } + arg1 = reinterpret_cast< sentencepiece::ImmutableNBestSentencePieceText * >(argp1); { try { - result = ((sentencepiece::SentencePieceProcessor const *)arg1)->EncodeAsSerializedProto(arg2); + result = ((sentencepiece::ImmutableNBestSentencePieceText const *)arg1)->SerializeAsString(); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { @@ -4239,80 +4723,81 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_EncodeAsSerializedProto(PyObje } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_SampleEncodeAsSerializedProto(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *ImmutableNBestSentencePieceText_swigregister(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { + PyObject *obj; + if (!SWIG_Python_UnpackTuple(args, "swigregister", 1, 1, &obj)) return NULL; + SWIG_TypeNewClientData(SWIGTYPE_p_sentencepiece__ImmutableNBestSentencePieceText, SWIG_NewClientData(obj)); + return SWIG_Py_Void(); +} + +SWIGINTERN PyObject *ImmutableNBestSentencePieceText_swiginit(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { + return SWIG_Python_InitShadowInstance(args); +} + +SWIGINTERN PyObject *_wrap_new_SentencePieceProcessor(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *result = 0 ; + + if (!SWIG_Python_UnpackTuple(args, "new_SentencePieceProcessor", 0, 0, 0)) SWIG_fail; + { + try { + result = (sentencepiece::SentencePieceProcessor *)new sentencepiece::SentencePieceProcessor(); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + resultobj = SWIG_NewPointerObj(SWIG_as_voidptr(result), SWIGTYPE_p_sentencepiece__SentencePieceProcessor, SWIG_POINTER_NEW | 0 ); + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_delete_SentencePieceProcessor(PyObject *self, PyObject *args) { PyObject *resultobj = 0; sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; - absl::string_view arg2 ; - int arg3 ; - float arg4 ; void *argp1 = 0 ; int res1 = 0 ; - int val3 ; - int ecode3 = 0 ; - float val4 ; - int ecode4 = 0 ; - PyObject *swig_obj[4] ; - sentencepiece::util::bytes result; + PyObject *swig_obj[1] ; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_SampleEncodeAsSerializedProto", 4, 4, swig_obj)) SWIG_fail; - res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!args) SWIG_fail; + swig_obj[0] = args; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, SWIG_POINTER_DISOWN | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_SampleEncodeAsSerializedProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "delete_SentencePieceProcessor" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor *""'"); } arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); - { - const PyInputString ustring(swig_obj[1]); - if (!ustring.IsAvalable()) { - PyErr_SetString(PyExc_TypeError, "not a string"); - SWIG_fail; - } - resultobj = ustring.input_type(); - arg2 = absl::string_view(ustring.data(), ustring.size()); - } - ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); - if (!SWIG_IsOK(ecode3)) { - SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor_SampleEncodeAsSerializedProto" "', argument " "3"" of type '" "int""'"); - } - arg3 = static_cast< int >(val3); - ecode4 = SWIG_AsVal_float(swig_obj[3], &val4); - if (!SWIG_IsOK(ecode4)) { - SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor_SampleEncodeAsSerializedProto" "', argument " "4"" of type '" "float""'"); - } - arg4 = static_cast< float >(val4); { try { - result = ((sentencepiece::SentencePieceProcessor const *)arg1)->SampleEncodeAsSerializedProto(arg2,arg3,arg4); + delete arg1; ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); } } - { - resultobj = MakePyOutputBytes(result); - } + resultobj = SWIG_Py_Void(); return resultobj; fail: return NULL; } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_NBestEncodeAsSerializedProto(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_LoadFromSerializedProto(PyObject *self, PyObject *args) { PyObject *resultobj = 0; sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; absl::string_view arg2 ; - int arg3 ; void *argp1 = 0 ; int res1 = 0 ; - int val3 ; - int ecode3 = 0 ; - PyObject *swig_obj[3] ; - sentencepiece::util::bytes result; + PyObject *swig_obj[2] ; + sentencepiece::util::Status result; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_NBestEncodeAsSerializedProto", 3, 3, swig_obj)) SWIG_fail; + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_LoadFromSerializedProto", 2, 2, swig_obj)) SWIG_fail; res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_NBestEncodeAsSerializedProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_LoadFromSerializedProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor *""'"); } arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); { @@ -4322,16 +4807,11 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_NBestEncodeAsSerializedProto(P SWIG_fail; } resultobj = ustring.input_type(); - arg2 = absl::string_view(ustring.data(), ustring.size()); + arg2 = ustring.str(); } - ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); - if (!SWIG_IsOK(ecode3)) { - SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor_NBestEncodeAsSerializedProto" "', argument " "3"" of type '" "int""'"); - } - arg3 = static_cast< int >(val3); { try { - result = ((sentencepiece::SentencePieceProcessor const *)arg1)->NBestEncodeAsSerializedProto(arg2,arg3); + result = (arg1)->LoadFromSerializedProto(arg2); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { @@ -4339,7 +4819,10 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_NBestEncodeAsSerializedProto(P } } { - resultobj = MakePyOutputBytes(result); + if (!(&result)->ok()) { + SWIG_exception(ToSwigError((&result)->code()), (&result)->ToString().c_str()); + } + resultobj = SWIG_From_bool((&result)->ok()); } return resultobj; fail: @@ -4347,30 +4830,120 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_NBestEncodeAsSerializedProto(P } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_DecodePiecesAsSerializedProto(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_SetEncodeExtraOptions(PyObject *self, PyObject *args) { PyObject *resultobj = 0; sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; - std::vector< std::string > *arg2 = 0 ; + absl::string_view arg2 ; void *argp1 = 0 ; int res1 = 0 ; PyObject *swig_obj[2] ; - sentencepiece::util::bytes result; + sentencepiece::util::Status result; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_DecodePiecesAsSerializedProto", 2, 2, swig_obj)) SWIG_fail; + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_SetEncodeExtraOptions", 2, 2, swig_obj)) SWIG_fail; res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_DecodePiecesAsSerializedProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_SetEncodeExtraOptions" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor *""'"); } arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); { - std::vector *out = nullptr; + const PyInputString ustring(swig_obj[1]); + if (!ustring.IsAvalable()) { + PyErr_SetString(PyExc_TypeError, "not a string"); + SWIG_fail; + } + resultobj = ustring.input_type(); + arg2 = ustring.str(); + } + { + try { + result = (arg1)->SetEncodeExtraOptions(arg2); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + if (!(&result)->ok()) { + SWIG_exception(ToSwigError((&result)->code()), (&result)->ToString().c_str()); + } + resultobj = SWIG_From_bool((&result)->ok()); + } + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_SetDecodeExtraOptions(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + absl::string_view arg2 ; + void *argp1 = 0 ; + int res1 = 0 ; + PyObject *swig_obj[2] ; + sentencepiece::util::Status result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_SetDecodeExtraOptions", 2, 2, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_SetDecodeExtraOptions" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + const PyInputString ustring(swig_obj[1]); + if (!ustring.IsAvalable()) { + PyErr_SetString(PyExc_TypeError, "not a string"); + SWIG_fail; + } + resultobj = ustring.input_type(); + arg2 = ustring.str(); + } + { + try { + result = (arg1)->SetDecodeExtraOptions(arg2); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + if (!(&result)->ok()) { + SWIG_exception(ToSwigError((&result)->code()), (&result)->ToString().c_str()); + } + resultobj = SWIG_From_bool((&result)->ok()); + } + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_SetVocabulary(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + std::vector< absl::string_view > *arg2 = 0 ; + void *argp1 = 0 ; + int res1 = 0 ; + PyObject *swig_obj[2] ; + sentencepiece::util::Status result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_SetVocabulary", 2, 2, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_SetVocabulary" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + std::vector *out = nullptr; if (PyList_Check(swig_obj[1])) { const size_t size = PyList_Size(swig_obj[1]); - out = new std::vector(size); + out = new std::vector(size); for (size_t i = 0; i < size; ++i) { const PyInputString ustring(PyList_GetItem(swig_obj[1], i)); if (ustring.IsAvalable()) { - (*out)[i] = std::string(ustring.data(), ustring.size()); + (*out)[i] = ustring.str(); } else { PyErr_SetString(PyExc_TypeError, "list must contain strings"); SWIG_fail; @@ -4385,7 +4958,7 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_DecodePiecesAsSerializedProto( } { try { - result = ((sentencepiece::SentencePieceProcessor const *)arg1)->DecodePiecesAsSerializedProto((std::vector< std::string > const &)*arg2); + result = (arg1)->SetVocabulary((std::vector< absl::string_view > const &)*arg2); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { @@ -4393,7 +4966,10 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_DecodePiecesAsSerializedProto( } } { - resultobj = MakePyOutputBytes(result); + if (!(&result)->ok()) { + SWIG_exception(ToSwigError((&result)->code()), (&result)->ToString().c_str()); + } + resultobj = SWIG_From_bool((&result)->ok()); } { delete arg2; @@ -4407,50 +4983,58 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_DecodePiecesAsSerializedProto( } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_GetPieceSize(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_ResetVocabulary(PyObject *self, PyObject *args) { PyObject *resultobj = 0; sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; void *argp1 = 0 ; int res1 = 0 ; PyObject *swig_obj[1] ; - int result; + sentencepiece::util::Status result; if (!args) SWIG_fail; swig_obj[0] = args; res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_GetPieceSize" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_ResetVocabulary" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor *""'"); } arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); { try { - result = (int)((sentencepiece::SentencePieceProcessor const *)arg1)->GetPieceSize(); + result = (arg1)->ResetVocabulary(); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); } } - resultobj = SWIG_From_int(static_cast< int >(result)); + { + if (!(&result)->ok()) { + SWIG_exception(ToSwigError((&result)->code()), (&result)->ToString().c_str()); + } + resultobj = SWIG_From_bool((&result)->ok()); + } return resultobj; fail: return NULL; } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_PieceToId(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_LoadVocabulary(PyObject *self, PyObject *args) { PyObject *resultobj = 0; sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; absl::string_view arg2 ; + int arg3 ; void *argp1 = 0 ; int res1 = 0 ; - PyObject *swig_obj[2] ; - int result; + int val3 ; + int ecode3 = 0 ; + PyObject *swig_obj[3] ; + sentencepiece::util::Status result; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_PieceToId", 2, 2, swig_obj)) SWIG_fail; + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_LoadVocabulary", 3, 3, swig_obj)) SWIG_fail; res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_PieceToId" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_LoadVocabulary" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor *""'"); } arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); { @@ -4460,49 +5044,76 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_PieceToId(PyObject *SWIGUNUSED SWIG_fail; } resultobj = ustring.input_type(); - arg2 = absl::string_view(ustring.data(), ustring.size()); + arg2 = ustring.str(); } + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor_LoadVocabulary" "', argument " "3"" of type '" "int""'"); + } + arg3 = static_cast< int >(val3); { try { - result = (int)((sentencepiece::SentencePieceProcessor const *)arg1)->PieceToId(arg2); + result = (arg1)->LoadVocabulary(arg2,arg3); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); } } - resultobj = SWIG_From_int(static_cast< int >(result)); + { + if (!(&result)->ok()) { + SWIG_exception(ToSwigError((&result)->code()), (&result)->ToString().c_str()); + } + resultobj = SWIG_From_bool((&result)->ok()); + } return resultobj; fail: return NULL; } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_IdToPiece(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_CalculateEntropy__SWIG_0(PyObject *self, Py_ssize_t nobjs, PyObject **swig_obj) { PyObject *resultobj = 0; sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; - int arg2 ; + absl::string_view arg2 ; + float arg3 ; + float *arg4 = (float *) 0 ; void *argp1 = 0 ; int res1 = 0 ; - int val2 ; - int ecode2 = 0 ; - PyObject *swig_obj[2] ; - std::string *result = 0 ; + float val3 ; + int ecode3 = 0 ; + void *argp4 = 0 ; + int res4 = 0 ; + sentencepiece::util::Status result; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_IdToPiece", 2, 2, swig_obj)) SWIG_fail; + if ((nobjs < 4) || (nobjs > 4)) SWIG_fail; res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_IdToPiece" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_CalculateEntropy" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); } arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); - ecode2 = SWIG_AsVal_int(swig_obj[1], &val2); - if (!SWIG_IsOK(ecode2)) { - SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "SentencePieceProcessor_IdToPiece" "', argument " "2"" of type '" "int""'"); + { + const PyInputString ustring(swig_obj[1]); + if (!ustring.IsAvalable()) { + PyErr_SetString(PyExc_TypeError, "not a string"); + SWIG_fail; + } + resultobj = ustring.input_type(); + arg2 = ustring.str(); + } + ecode3 = SWIG_AsVal_float(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor_CalculateEntropy" "', argument " "3"" of type '" "float""'"); } - arg2 = static_cast< int >(val2); + arg3 = static_cast< float >(val3); + res4 = SWIG_ConvertPtr(swig_obj[3], &argp4,SWIGTYPE_p_float, 0 | 0 ); + if (!SWIG_IsOK(res4)) { + SWIG_exception_fail(SWIG_ArgError(res4), "in method '" "SentencePieceProcessor_CalculateEntropy" "', argument " "4"" of type '" "float *""'"); + } + arg4 = reinterpret_cast< float * >(argp4); { try { - result = (std::string *) &((sentencepiece::SentencePieceProcessor const *)arg1)->IdToPiece(arg2); + result = ((sentencepiece::SentencePieceProcessor const *)arg1)->CalculateEntropy(arg2,arg3,arg4); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { @@ -4510,8 +5121,10 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_IdToPiece(PyObject *SWIGUNUSED } } { - PyObject *input_type = resultobj; - resultobj = MakePyOutputString(*result, input_type); + if (!(&result)->ok()) { + SWIG_exception(ToSwigError((&result)->code()), (&result)->ToString().c_str()); + } + resultobj = SWIG_From_bool((&result)->ok()); } return resultobj; fail: @@ -4519,31 +5132,40 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_IdToPiece(PyObject *SWIGUNUSED } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_GetScore(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_CalculateEntropy__SWIG_1(PyObject *self, Py_ssize_t nobjs, PyObject **swig_obj) { PyObject *resultobj = 0; sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; - int arg2 ; + absl::string_view arg2 ; + float arg3 ; void *argp1 = 0 ; int res1 = 0 ; - int val2 ; - int ecode2 = 0 ; - PyObject *swig_obj[2] ; + float val3 ; + int ecode3 = 0 ; float result; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_GetScore", 2, 2, swig_obj)) SWIG_fail; + if ((nobjs < 3) || (nobjs > 3)) SWIG_fail; res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_GetScore" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_CalculateEntropy" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); } arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); - ecode2 = SWIG_AsVal_int(swig_obj[1], &val2); - if (!SWIG_IsOK(ecode2)) { - SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "SentencePieceProcessor_GetScore" "', argument " "2"" of type '" "int""'"); + { + const PyInputString ustring(swig_obj[1]); + if (!ustring.IsAvalable()) { + PyErr_SetString(PyExc_TypeError, "not a string"); + SWIG_fail; + } + resultobj = ustring.input_type(); + arg2 = ustring.str(); + } + ecode3 = SWIG_AsVal_float(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor_CalculateEntropy" "', argument " "3"" of type '" "float""'"); } - arg2 = static_cast< int >(val2); + arg3 = static_cast< float >(val3); { try { - result = (float)((sentencepiece::SentencePieceProcessor const *)arg1)->GetScore(arg2); + result = (float)((sentencepiece::SentencePieceProcessor const *)arg1)->CalculateEntropy(arg2,arg3); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { @@ -4557,83 +5179,139 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_GetScore(PyObject *SWIGUNUSEDP } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_IsUnknown(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_CalculateEntropy(PyObject *self, PyObject *args) { + Py_ssize_t argc; + PyObject *argv[5] = { + 0 + }; + + if (!(argc = SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_CalculateEntropy", 0, 4, argv))) SWIG_fail; + --argc; + if (argc == 3) { + int _v = 0; + void *vptr = 0; + int res = SWIG_ConvertPtr(argv[0], &vptr, SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0); + _v = SWIG_CheckState(res); + if (_v) { + int res = SWIG_AsCharPtrAndSize(argv[1], 0, NULL, 0); + _v = SWIG_CheckState(res); + if (_v) { + { + int res = SWIG_AsVal_float(argv[2], NULL); + _v = SWIG_CheckState(res); + } + if (_v) { + return _wrap_SentencePieceProcessor_CalculateEntropy__SWIG_1(self, argc, argv); + } + } + } + } + if (argc == 4) { + int _v = 0; + void *vptr = 0; + int res = SWIG_ConvertPtr(argv[0], &vptr, SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0); + _v = SWIG_CheckState(res); + if (_v) { + int res = SWIG_AsCharPtrAndSize(argv[1], 0, NULL, 0); + _v = SWIG_CheckState(res); + if (_v) { + { + int res = SWIG_AsVal_float(argv[2], NULL); + _v = SWIG_CheckState(res); + } + if (_v) { + void *vptr = 0; + int res = SWIG_ConvertPtr(argv[3], &vptr, SWIGTYPE_p_float, 0); + _v = SWIG_CheckState(res); + if (_v) { + return _wrap_SentencePieceProcessor_CalculateEntropy__SWIG_0(self, argc, argv); + } + } + } + } + } + +fail: + SWIG_Python_RaiseOrModifyTypeError("Wrong number or type of arguments for overloaded function 'SentencePieceProcessor_CalculateEntropy'.\n" + " Possible C/C++ prototypes are:\n" + " sentencepiece::SentencePieceProcessor::CalculateEntropy(absl::string_view,float,float *) const\n" + " sentencepiece::SentencePieceProcessor::CalculateEntropy(absl::string_view,float) const\n"); + return 0; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_GetPieceSize(PyObject *self, PyObject *args) { PyObject *resultobj = 0; sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; - int arg2 ; void *argp1 = 0 ; int res1 = 0 ; - int val2 ; - int ecode2 = 0 ; - PyObject *swig_obj[2] ; - bool result; + PyObject *swig_obj[1] ; + int result; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_IsUnknown", 2, 2, swig_obj)) SWIG_fail; + if (!args) SWIG_fail; + swig_obj[0] = args; res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_IsUnknown" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_GetPieceSize" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); } arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); - ecode2 = SWIG_AsVal_int(swig_obj[1], &val2); - if (!SWIG_IsOK(ecode2)) { - SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "SentencePieceProcessor_IsUnknown" "', argument " "2"" of type '" "int""'"); - } - arg2 = static_cast< int >(val2); { try { - result = (bool)((sentencepiece::SentencePieceProcessor const *)arg1)->IsUnknown(arg2); + result = (int)((sentencepiece::SentencePieceProcessor const *)arg1)->GetPieceSize(); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); } } - resultobj = SWIG_From_bool(static_cast< bool >(result)); + resultobj = SWIG_From_int(static_cast< int >(result)); return resultobj; fail: return NULL; } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_IsControl(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_PieceToId(PyObject *self, PyObject *args) { PyObject *resultobj = 0; sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; - int arg2 ; + absl::string_view arg2 ; void *argp1 = 0 ; int res1 = 0 ; - int val2 ; - int ecode2 = 0 ; PyObject *swig_obj[2] ; - bool result; + int result; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_IsControl", 2, 2, swig_obj)) SWIG_fail; + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_PieceToId", 2, 2, swig_obj)) SWIG_fail; res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_IsControl" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_PieceToId" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); } arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); - ecode2 = SWIG_AsVal_int(swig_obj[1], &val2); - if (!SWIG_IsOK(ecode2)) { - SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "SentencePieceProcessor_IsControl" "', argument " "2"" of type '" "int""'"); - } - arg2 = static_cast< int >(val2); + { + const PyInputString ustring(swig_obj[1]); + if (!ustring.IsAvalable()) { + PyErr_SetString(PyExc_TypeError, "not a string"); + SWIG_fail; + } + resultobj = ustring.input_type(); + arg2 = ustring.str(); + } { try { - result = (bool)((sentencepiece::SentencePieceProcessor const *)arg1)->IsControl(arg2); + result = (int)((sentencepiece::SentencePieceProcessor const *)arg1)->PieceToId(arg2); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); } } - resultobj = SWIG_From_bool(static_cast< bool >(result)); + resultobj = SWIG_From_int(static_cast< int >(result)); return resultobj; fail: return NULL; } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_IsUnused(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_IdToPiece(PyObject *self, PyObject *args) { PyObject *resultobj = 0; sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; int arg2 ; @@ -4642,12 +5320,167 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_IsUnused(PyObject *SWIGUNUSEDP int val2 ; int ecode2 = 0 ; PyObject *swig_obj[2] ; - bool result; + std::string *result = 0 ; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_IsUnused", 2, 2, swig_obj)) SWIG_fail; + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_IdToPiece", 2, 2, swig_obj)) SWIG_fail; res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_IsUnused" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_IdToPiece" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + ecode2 = SWIG_AsVal_int(swig_obj[1], &val2); + if (!SWIG_IsOK(ecode2)) { + SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "SentencePieceProcessor_IdToPiece" "', argument " "2"" of type '" "int""'"); + } + arg2 = static_cast< int >(val2); + { + try { + result = (std::string *) &((sentencepiece::SentencePieceProcessor const *)arg1)->IdToPiece(arg2); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + PyObject *input_type = resultobj; + resultobj = MakePyOutputString(*result, input_type); + } + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_GetScore(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + int arg2 ; + void *argp1 = 0 ; + int res1 = 0 ; + int val2 ; + int ecode2 = 0 ; + PyObject *swig_obj[2] ; + float result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_GetScore", 2, 2, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_GetScore" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + ecode2 = SWIG_AsVal_int(swig_obj[1], &val2); + if (!SWIG_IsOK(ecode2)) { + SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "SentencePieceProcessor_GetScore" "', argument " "2"" of type '" "int""'"); + } + arg2 = static_cast< int >(val2); + { + try { + result = (float)((sentencepiece::SentencePieceProcessor const *)arg1)->GetScore(arg2); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + resultobj = SWIG_From_float(static_cast< float >(result)); + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_IsUnknown(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + int arg2 ; + void *argp1 = 0 ; + int res1 = 0 ; + int val2 ; + int ecode2 = 0 ; + PyObject *swig_obj[2] ; + bool result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_IsUnknown", 2, 2, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_IsUnknown" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + ecode2 = SWIG_AsVal_int(swig_obj[1], &val2); + if (!SWIG_IsOK(ecode2)) { + SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "SentencePieceProcessor_IsUnknown" "', argument " "2"" of type '" "int""'"); + } + arg2 = static_cast< int >(val2); + { + try { + result = (bool)((sentencepiece::SentencePieceProcessor const *)arg1)->IsUnknown(arg2); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + resultobj = SWIG_From_bool(static_cast< bool >(result)); + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_IsControl(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + int arg2 ; + void *argp1 = 0 ; + int res1 = 0 ; + int val2 ; + int ecode2 = 0 ; + PyObject *swig_obj[2] ; + bool result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_IsControl", 2, 2, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_IsControl" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + ecode2 = SWIG_AsVal_int(swig_obj[1], &val2); + if (!SWIG_IsOK(ecode2)) { + SWIG_exception_fail(SWIG_ArgError(ecode2), "in method '" "SentencePieceProcessor_IsControl" "', argument " "2"" of type '" "int""'"); + } + arg2 = static_cast< int >(val2); + { + try { + result = (bool)((sentencepiece::SentencePieceProcessor const *)arg1)->IsControl(arg2); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + resultobj = SWIG_From_bool(static_cast< bool >(result)); + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_IsUnused(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + int arg2 ; + void *argp1 = 0 ; + int res1 = 0 ; + int val2 ; + int ecode2 = 0 ; + PyObject *swig_obj[2] ; + bool result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_IsUnused", 2, 2, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_IsUnused" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); } arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); ecode2 = SWIG_AsVal_int(swig_obj[1], &val2); @@ -4671,7 +5504,7 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_IsUnused(PyObject *SWIGUNUSEDP } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_IsByte(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_IsByte(PyObject *self, PyObject *args) { PyObject *resultobj = 0; sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; int arg2 ; @@ -4709,7 +5542,7 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_IsByte(PyObject *SWIGUNUSEDPAR } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_unk_id(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_unk_id(PyObject *self, PyObject *args) { PyObject *resultobj = 0; sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; void *argp1 = 0 ; @@ -4740,7 +5573,7 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_unk_id(PyObject *SWIGUNUSEDPAR } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_bos_id(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_bos_id(PyObject *self, PyObject *args) { PyObject *resultobj = 0; sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; void *argp1 = 0 ; @@ -4771,7 +5604,7 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_bos_id(PyObject *SWIGUNUSEDPAR } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_eos_id(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_eos_id(PyObject *self, PyObject *args) { PyObject *resultobj = 0; sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; void *argp1 = 0 ; @@ -4802,7 +5635,7 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_eos_id(PyObject *SWIGUNUSEDPAR } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_pad_id(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_pad_id(PyObject *self, PyObject *args) { PyObject *resultobj = 0; sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; void *argp1 = 0 ; @@ -4816,41 +5649,2499 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_pad_id(PyObject *SWIGUNUSEDPAR if (!SWIG_IsOK(res1)) { SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_pad_id" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); } - arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + try { + result = (int)((sentencepiece::SentencePieceProcessor const *)arg1)->pad_id(); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + resultobj = SWIG_From_int(static_cast< int >(result)); + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_serialized_model_proto(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + void *argp1 = 0 ; + int res1 = 0 ; + PyObject *swig_obj[1] ; + sentencepiece::util::bytes result; + + if (!args) SWIG_fail; + swig_obj[0] = args; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_serialized_model_proto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + try { + result = ((sentencepiece::SentencePieceProcessor const *)arg1)->serialized_model_proto(); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + resultobj = MakePyOutputBytes(result); + } + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor_LoadFromFile(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + absl::string_view arg2 ; + void *argp1 = 0 ; + int res1 = 0 ; + PyObject *swig_obj[2] ; + sentencepiece::util::Status result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_LoadFromFile", 2, 2, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_LoadFromFile" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + const PyInputString ustring(swig_obj[1]); + if (!ustring.IsAvalable()) { + PyErr_SetString(PyExc_TypeError, "not a string"); + SWIG_fail; + } + resultobj = ustring.input_type(); + arg2 = ustring.str(); + } + { + try { + result = sentencepiece_SentencePieceProcessor_LoadFromFile(arg1,SWIG_STD_MOVE(arg2)); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + if (!(&result)->ok()) { + SWIG_exception(ToSwigError((&result)->code()), (&result)->ToString().c_str()); + } + resultobj = SWIG_From_bool((&result)->ok()); + } + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsIds(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + absl::string_view arg2 ; + bool arg3 ; + int arg4 ; + float arg5 ; + bool arg6 ; + bool arg7 ; + bool arg8 ; + bool arg9 ; + void *argp1 = 0 ; + int res1 = 0 ; + bool val3 ; + int ecode3 = 0 ; + int val4 ; + int ecode4 = 0 ; + float val5 ; + int ecode5 = 0 ; + bool val6 ; + int ecode6 = 0 ; + bool val7 ; + int ecode7 = 0 ; + bool val8 ; + int ecode8 = 0 ; + bool val9 ; + int ecode9 = 0 ; + PyObject *swig_obj[9] ; + std::vector< int > result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__EncodeAsIds", 9, 9, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__EncodeAsIds" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + const PyInputString ustring(swig_obj[1]); + if (!ustring.IsAvalable()) { + PyErr_SetString(PyExc_TypeError, "not a string"); + SWIG_fail; + } + resultobj = ustring.input_type(); + arg2 = ustring.str(); + } + ecode3 = SWIG_AsVal_bool(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__EncodeAsIds" "', argument " "3"" of type '" "bool""'"); + } + arg3 = static_cast< bool >(val3); + ecode4 = SWIG_AsVal_int(swig_obj[3], &val4); + if (!SWIG_IsOK(ecode4)) { + SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__EncodeAsIds" "', argument " "4"" of type '" "int""'"); + } + arg4 = static_cast< int >(val4); + ecode5 = SWIG_AsVal_float(swig_obj[4], &val5); + if (!SWIG_IsOK(ecode5)) { + SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__EncodeAsIds" "', argument " "5"" of type '" "float""'"); + } + arg5 = static_cast< float >(val5); + ecode6 = SWIG_AsVal_bool(swig_obj[5], &val6); + if (!SWIG_IsOK(ecode6)) { + SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__EncodeAsIds" "', argument " "6"" of type '" "bool""'"); + } + arg6 = static_cast< bool >(val6); + ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7); + if (!SWIG_IsOK(ecode7)) { + SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__EncodeAsIds" "', argument " "7"" of type '" "bool""'"); + } + arg7 = static_cast< bool >(val7); + ecode8 = SWIG_AsVal_bool(swig_obj[7], &val8); + if (!SWIG_IsOK(ecode8)) { + SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "SentencePieceProcessor__EncodeAsIds" "', argument " "8"" of type '" "bool""'"); + } + arg8 = static_cast< bool >(val8); + ecode9 = SWIG_AsVal_bool(swig_obj[8], &val9); + if (!SWIG_IsOK(ecode9)) { + SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "SentencePieceProcessor__EncodeAsIds" "', argument " "9"" of type '" "bool""'"); + } + arg9 = static_cast< bool >(val9); + { + try { + result = sentencepiece_SentencePieceProcessor__EncodeAsIds((sentencepiece::SentencePieceProcessor const *)arg1,SWIG_STD_MOVE(arg2),arg3,arg4,arg5,arg6,arg7,arg8,arg9); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + resultobj = PyList_New((&result)->size()); + for (size_t i = 0; i < (&result)->size(); ++i) { + PyList_SET_ITEM(resultobj, i, PyInt_FromLong(static_cast(result[i]))); + } + } + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsPieces(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + absl::string_view arg2 ; + bool arg3 ; + int arg4 ; + float arg5 ; + bool arg6 ; + bool arg7 ; + bool arg8 ; + bool arg9 ; + void *argp1 = 0 ; + int res1 = 0 ; + bool val3 ; + int ecode3 = 0 ; + int val4 ; + int ecode4 = 0 ; + float val5 ; + int ecode5 = 0 ; + bool val6 ; + int ecode6 = 0 ; + bool val7 ; + int ecode7 = 0 ; + bool val8 ; + int ecode8 = 0 ; + bool val9 ; + int ecode9 = 0 ; + PyObject *swig_obj[9] ; + std::vector< std::string > result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__EncodeAsPieces", 9, 9, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__EncodeAsPieces" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + const PyInputString ustring(swig_obj[1]); + if (!ustring.IsAvalable()) { + PyErr_SetString(PyExc_TypeError, "not a string"); + SWIG_fail; + } + resultobj = ustring.input_type(); + arg2 = ustring.str(); + } + ecode3 = SWIG_AsVal_bool(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__EncodeAsPieces" "', argument " "3"" of type '" "bool""'"); + } + arg3 = static_cast< bool >(val3); + ecode4 = SWIG_AsVal_int(swig_obj[3], &val4); + if (!SWIG_IsOK(ecode4)) { + SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__EncodeAsPieces" "', argument " "4"" of type '" "int""'"); + } + arg4 = static_cast< int >(val4); + ecode5 = SWIG_AsVal_float(swig_obj[4], &val5); + if (!SWIG_IsOK(ecode5)) { + SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__EncodeAsPieces" "', argument " "5"" of type '" "float""'"); + } + arg5 = static_cast< float >(val5); + ecode6 = SWIG_AsVal_bool(swig_obj[5], &val6); + if (!SWIG_IsOK(ecode6)) { + SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__EncodeAsPieces" "', argument " "6"" of type '" "bool""'"); + } + arg6 = static_cast< bool >(val6); + ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7); + if (!SWIG_IsOK(ecode7)) { + SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__EncodeAsPieces" "', argument " "7"" of type '" "bool""'"); + } + arg7 = static_cast< bool >(val7); + ecode8 = SWIG_AsVal_bool(swig_obj[7], &val8); + if (!SWIG_IsOK(ecode8)) { + SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "SentencePieceProcessor__EncodeAsPieces" "', argument " "8"" of type '" "bool""'"); + } + arg8 = static_cast< bool >(val8); + ecode9 = SWIG_AsVal_bool(swig_obj[8], &val9); + if (!SWIG_IsOK(ecode9)) { + SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "SentencePieceProcessor__EncodeAsPieces" "', argument " "9"" of type '" "bool""'"); + } + arg9 = static_cast< bool >(val9); + { + try { + result = sentencepiece_SentencePieceProcessor__EncodeAsPieces((sentencepiece::SentencePieceProcessor const *)arg1,SWIG_STD_MOVE(arg2),arg3,arg4,arg5,arg6,arg7,arg8,arg9); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + PyObject *input_type = resultobj; + resultobj = PyList_New((&result)->size()); + for (size_t i = 0; i < (&result)->size(); ++i) { + PyList_SET_ITEM(resultobj, i, MakePyOutputString(result[i], input_type)); + } + } + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsSerializedProto(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + absl::string_view arg2 ; + bool arg3 ; + int arg4 ; + float arg5 ; + bool arg6 ; + bool arg7 ; + bool arg8 ; + bool arg9 ; + void *argp1 = 0 ; + int res1 = 0 ; + bool val3 ; + int ecode3 = 0 ; + int val4 ; + int ecode4 = 0 ; + float val5 ; + int ecode5 = 0 ; + bool val6 ; + int ecode6 = 0 ; + bool val7 ; + int ecode7 = 0 ; + bool val8 ; + int ecode8 = 0 ; + bool val9 ; + int ecode9 = 0 ; + PyObject *swig_obj[9] ; + sentencepiece::util::bytes result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__EncodeAsSerializedProto", 9, 9, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__EncodeAsSerializedProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + const PyInputString ustring(swig_obj[1]); + if (!ustring.IsAvalable()) { + PyErr_SetString(PyExc_TypeError, "not a string"); + SWIG_fail; + } + resultobj = ustring.input_type(); + arg2 = ustring.str(); + } + ecode3 = SWIG_AsVal_bool(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__EncodeAsSerializedProto" "', argument " "3"" of type '" "bool""'"); + } + arg3 = static_cast< bool >(val3); + ecode4 = SWIG_AsVal_int(swig_obj[3], &val4); + if (!SWIG_IsOK(ecode4)) { + SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__EncodeAsSerializedProto" "', argument " "4"" of type '" "int""'"); + } + arg4 = static_cast< int >(val4); + ecode5 = SWIG_AsVal_float(swig_obj[4], &val5); + if (!SWIG_IsOK(ecode5)) { + SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__EncodeAsSerializedProto" "', argument " "5"" of type '" "float""'"); + } + arg5 = static_cast< float >(val5); + ecode6 = SWIG_AsVal_bool(swig_obj[5], &val6); + if (!SWIG_IsOK(ecode6)) { + SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__EncodeAsSerializedProto" "', argument " "6"" of type '" "bool""'"); + } + arg6 = static_cast< bool >(val6); + ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7); + if (!SWIG_IsOK(ecode7)) { + SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__EncodeAsSerializedProto" "', argument " "7"" of type '" "bool""'"); + } + arg7 = static_cast< bool >(val7); + ecode8 = SWIG_AsVal_bool(swig_obj[7], &val8); + if (!SWIG_IsOK(ecode8)) { + SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "SentencePieceProcessor__EncodeAsSerializedProto" "', argument " "8"" of type '" "bool""'"); + } + arg8 = static_cast< bool >(val8); + ecode9 = SWIG_AsVal_bool(swig_obj[8], &val9); + if (!SWIG_IsOK(ecode9)) { + SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "SentencePieceProcessor__EncodeAsSerializedProto" "', argument " "9"" of type '" "bool""'"); + } + arg9 = static_cast< bool >(val9); + { + try { + result = sentencepiece_SentencePieceProcessor__EncodeAsSerializedProto((sentencepiece::SentencePieceProcessor const *)arg1,SWIG_STD_MOVE(arg2),arg3,arg4,arg5,arg6,arg7,arg8,arg9); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + resultobj = MakePyOutputBytes(result); + } + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsImmutableProto(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + absl::string_view arg2 ; + bool arg3 ; + int arg4 ; + float arg5 ; + bool arg6 ; + bool arg7 ; + bool arg8 ; + bool arg9 ; + void *argp1 = 0 ; + int res1 = 0 ; + bool val3 ; + int ecode3 = 0 ; + int val4 ; + int ecode4 = 0 ; + float val5 ; + int ecode5 = 0 ; + bool val6 ; + int ecode6 = 0 ; + bool val7 ; + int ecode7 = 0 ; + bool val8 ; + int ecode8 = 0 ; + bool val9 ; + int ecode9 = 0 ; + PyObject *swig_obj[9] ; + sentencepiece::ImmutableSentencePieceText result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__EncodeAsImmutableProto", 9, 9, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__EncodeAsImmutableProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + const PyInputString ustring(swig_obj[1]); + if (!ustring.IsAvalable()) { + PyErr_SetString(PyExc_TypeError, "not a string"); + SWIG_fail; + } + resultobj = ustring.input_type(); + arg2 = ustring.str(); + } + ecode3 = SWIG_AsVal_bool(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__EncodeAsImmutableProto" "', argument " "3"" of type '" "bool""'"); + } + arg3 = static_cast< bool >(val3); + ecode4 = SWIG_AsVal_int(swig_obj[3], &val4); + if (!SWIG_IsOK(ecode4)) { + SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__EncodeAsImmutableProto" "', argument " "4"" of type '" "int""'"); + } + arg4 = static_cast< int >(val4); + ecode5 = SWIG_AsVal_float(swig_obj[4], &val5); + if (!SWIG_IsOK(ecode5)) { + SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__EncodeAsImmutableProto" "', argument " "5"" of type '" "float""'"); + } + arg5 = static_cast< float >(val5); + ecode6 = SWIG_AsVal_bool(swig_obj[5], &val6); + if (!SWIG_IsOK(ecode6)) { + SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__EncodeAsImmutableProto" "', argument " "6"" of type '" "bool""'"); + } + arg6 = static_cast< bool >(val6); + ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7); + if (!SWIG_IsOK(ecode7)) { + SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__EncodeAsImmutableProto" "', argument " "7"" of type '" "bool""'"); + } + arg7 = static_cast< bool >(val7); + ecode8 = SWIG_AsVal_bool(swig_obj[7], &val8); + if (!SWIG_IsOK(ecode8)) { + SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "SentencePieceProcessor__EncodeAsImmutableProto" "', argument " "8"" of type '" "bool""'"); + } + arg8 = static_cast< bool >(val8); + ecode9 = SWIG_AsVal_bool(swig_obj[8], &val9); + if (!SWIG_IsOK(ecode9)) { + SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "SentencePieceProcessor__EncodeAsImmutableProto" "', argument " "9"" of type '" "bool""'"); + } + arg9 = static_cast< bool >(val9); + { + try { + result = sentencepiece_SentencePieceProcessor__EncodeAsImmutableProto((sentencepiece::SentencePieceProcessor const *)arg1,SWIG_STD_MOVE(arg2),arg3,arg4,arg5,arg6,arg7,arg8,arg9); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + resultobj = SWIG_NewPointerObj((new sentencepiece::ImmutableSentencePieceText(result)), SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, SWIG_POINTER_OWN | 0 ); + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsIdsBatch(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + std::vector< absl::string_view > *arg2 = 0 ; + int arg3 ; + bool arg4 ; + int arg5 ; + float arg6 ; + bool arg7 ; + bool arg8 ; + bool arg9 ; + bool arg10 ; + void *argp1 = 0 ; + int res1 = 0 ; + int val3 ; + int ecode3 = 0 ; + bool val4 ; + int ecode4 = 0 ; + int val5 ; + int ecode5 = 0 ; + float val6 ; + int ecode6 = 0 ; + bool val7 ; + int ecode7 = 0 ; + bool val8 ; + int ecode8 = 0 ; + bool val9 ; + int ecode9 = 0 ; + bool val10 ; + int ecode10 = 0 ; + PyObject *swig_obj[10] ; + std::vector< std::vector< int > > result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__EncodeAsIdsBatch", 10, 10, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + std::vector *out = nullptr; + if (PyList_Check(swig_obj[1])) { + const size_t size = PyList_Size(swig_obj[1]); + out = new std::vector(size); + for (size_t i = 0; i < size; ++i) { + const PyInputString ustring(PyList_GetItem(swig_obj[1], i)); + if (ustring.IsAvalable()) { + (*out)[i] = ustring.str(); + } else { + PyErr_SetString(PyExc_TypeError, "list must contain strings"); + SWIG_fail; + } + resultobj = ustring.input_type(); + } + } else { + PyErr_SetString(PyExc_TypeError, "not a list"); + SWIG_fail; + } + arg2 = out; + } + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "3"" of type '" "int""'"); + } + arg3 = static_cast< int >(val3); + ecode4 = SWIG_AsVal_bool(swig_obj[3], &val4); + if (!SWIG_IsOK(ecode4)) { + SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "4"" of type '" "bool""'"); + } + arg4 = static_cast< bool >(val4); + ecode5 = SWIG_AsVal_int(swig_obj[4], &val5); + if (!SWIG_IsOK(ecode5)) { + SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "5"" of type '" "int""'"); + } + arg5 = static_cast< int >(val5); + ecode6 = SWIG_AsVal_float(swig_obj[5], &val6); + if (!SWIG_IsOK(ecode6)) { + SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "6"" of type '" "float""'"); + } + arg6 = static_cast< float >(val6); + ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7); + if (!SWIG_IsOK(ecode7)) { + SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "7"" of type '" "bool""'"); + } + arg7 = static_cast< bool >(val7); + ecode8 = SWIG_AsVal_bool(swig_obj[7], &val8); + if (!SWIG_IsOK(ecode8)) { + SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "8"" of type '" "bool""'"); + } + arg8 = static_cast< bool >(val8); + ecode9 = SWIG_AsVal_bool(swig_obj[8], &val9); + if (!SWIG_IsOK(ecode9)) { + SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "9"" of type '" "bool""'"); + } + arg9 = static_cast< bool >(val9); + ecode10 = SWIG_AsVal_bool(swig_obj[9], &val10); + if (!SWIG_IsOK(ecode10)) { + SWIG_exception_fail(SWIG_ArgError(ecode10), "in method '" "SentencePieceProcessor__EncodeAsIdsBatch" "', argument " "10"" of type '" "bool""'"); + } + arg10 = static_cast< bool >(val10); + { + try { + result = sentencepiece_SentencePieceProcessor__EncodeAsIdsBatch((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< absl::string_view > const &)*arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + resultobj = PyList_New((&result)->size()); + for (size_t i = 0; i < (&result)->size(); ++i) { + PyObject *obj = PyList_New(result[i].size()); + for (size_t j = 0; j < result[i].size(); ++j) { + PyList_SET_ITEM(obj, j, PyInt_FromLong(static_cast(result[i][j]))); + } + PyList_SET_ITEM(resultobj, i, obj); + } + } + { + delete arg2; + } + return resultobj; +fail: + { + delete arg2; + } + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsPiecesBatch(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + std::vector< absl::string_view > *arg2 = 0 ; + int arg3 ; + bool arg4 ; + int arg5 ; + float arg6 ; + bool arg7 ; + bool arg8 ; + bool arg9 ; + bool arg10 ; + void *argp1 = 0 ; + int res1 = 0 ; + int val3 ; + int ecode3 = 0 ; + bool val4 ; + int ecode4 = 0 ; + int val5 ; + int ecode5 = 0 ; + float val6 ; + int ecode6 = 0 ; + bool val7 ; + int ecode7 = 0 ; + bool val8 ; + int ecode8 = 0 ; + bool val9 ; + int ecode9 = 0 ; + bool val10 ; + int ecode10 = 0 ; + PyObject *swig_obj[10] ; + std::vector< std::vector< std::string > > result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__EncodeAsPiecesBatch", 10, 10, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + std::vector *out = nullptr; + if (PyList_Check(swig_obj[1])) { + const size_t size = PyList_Size(swig_obj[1]); + out = new std::vector(size); + for (size_t i = 0; i < size; ++i) { + const PyInputString ustring(PyList_GetItem(swig_obj[1], i)); + if (ustring.IsAvalable()) { + (*out)[i] = ustring.str(); + } else { + PyErr_SetString(PyExc_TypeError, "list must contain strings"); + SWIG_fail; + } + resultobj = ustring.input_type(); + } + } else { + PyErr_SetString(PyExc_TypeError, "not a list"); + SWIG_fail; + } + arg2 = out; + } + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "3"" of type '" "int""'"); + } + arg3 = static_cast< int >(val3); + ecode4 = SWIG_AsVal_bool(swig_obj[3], &val4); + if (!SWIG_IsOK(ecode4)) { + SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "4"" of type '" "bool""'"); + } + arg4 = static_cast< bool >(val4); + ecode5 = SWIG_AsVal_int(swig_obj[4], &val5); + if (!SWIG_IsOK(ecode5)) { + SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "5"" of type '" "int""'"); + } + arg5 = static_cast< int >(val5); + ecode6 = SWIG_AsVal_float(swig_obj[5], &val6); + if (!SWIG_IsOK(ecode6)) { + SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "6"" of type '" "float""'"); + } + arg6 = static_cast< float >(val6); + ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7); + if (!SWIG_IsOK(ecode7)) { + SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "7"" of type '" "bool""'"); + } + arg7 = static_cast< bool >(val7); + ecode8 = SWIG_AsVal_bool(swig_obj[7], &val8); + if (!SWIG_IsOK(ecode8)) { + SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "8"" of type '" "bool""'"); + } + arg8 = static_cast< bool >(val8); + ecode9 = SWIG_AsVal_bool(swig_obj[8], &val9); + if (!SWIG_IsOK(ecode9)) { + SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "9"" of type '" "bool""'"); + } + arg9 = static_cast< bool >(val9); + ecode10 = SWIG_AsVal_bool(swig_obj[9], &val10); + if (!SWIG_IsOK(ecode10)) { + SWIG_exception_fail(SWIG_ArgError(ecode10), "in method '" "SentencePieceProcessor__EncodeAsPiecesBatch" "', argument " "10"" of type '" "bool""'"); + } + arg10 = static_cast< bool >(val10); + { + try { + result = sentencepiece_SentencePieceProcessor__EncodeAsPiecesBatch((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< absl::string_view > const &)*arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + PyObject *input_type = resultobj; + resultobj = PyList_New((&result)->size()); + for (size_t i = 0; i < (&result)->size(); ++i) { + PyObject *obj = PyList_New(result[i].size()); + for (size_t j = 0; j < result[i].size(); ++j) { + PyList_SET_ITEM(obj, j, MakePyOutputString(result[i][j], input_type)); + } + PyList_SET_ITEM(resultobj, i, obj); + } + } + { + delete arg2; + } + return resultobj; +fail: + { + delete arg2; + } + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsSerializedProtoBatch(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + std::vector< absl::string_view > *arg2 = 0 ; + int arg3 ; + bool arg4 ; + int arg5 ; + float arg6 ; + bool arg7 ; + bool arg8 ; + bool arg9 ; + bool arg10 ; + void *argp1 = 0 ; + int res1 = 0 ; + int val3 ; + int ecode3 = 0 ; + bool val4 ; + int ecode4 = 0 ; + int val5 ; + int ecode5 = 0 ; + float val6 ; + int ecode6 = 0 ; + bool val7 ; + int ecode7 = 0 ; + bool val8 ; + int ecode8 = 0 ; + bool val9 ; + int ecode9 = 0 ; + bool val10 ; + int ecode10 = 0 ; + PyObject *swig_obj[10] ; + BytesArray result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__EncodeAsSerializedProtoBatch", 10, 10, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + std::vector *out = nullptr; + if (PyList_Check(swig_obj[1])) { + const size_t size = PyList_Size(swig_obj[1]); + out = new std::vector(size); + for (size_t i = 0; i < size; ++i) { + const PyInputString ustring(PyList_GetItem(swig_obj[1], i)); + if (ustring.IsAvalable()) { + (*out)[i] = ustring.str(); + } else { + PyErr_SetString(PyExc_TypeError, "list must contain strings"); + SWIG_fail; + } + resultobj = ustring.input_type(); + } + } else { + PyErr_SetString(PyExc_TypeError, "not a list"); + SWIG_fail; + } + arg2 = out; + } + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "3"" of type '" "int""'"); + } + arg3 = static_cast< int >(val3); + ecode4 = SWIG_AsVal_bool(swig_obj[3], &val4); + if (!SWIG_IsOK(ecode4)) { + SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "4"" of type '" "bool""'"); + } + arg4 = static_cast< bool >(val4); + ecode5 = SWIG_AsVal_int(swig_obj[4], &val5); + if (!SWIG_IsOK(ecode5)) { + SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "5"" of type '" "int""'"); + } + arg5 = static_cast< int >(val5); + ecode6 = SWIG_AsVal_float(swig_obj[5], &val6); + if (!SWIG_IsOK(ecode6)) { + SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "6"" of type '" "float""'"); + } + arg6 = static_cast< float >(val6); + ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7); + if (!SWIG_IsOK(ecode7)) { + SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "7"" of type '" "bool""'"); + } + arg7 = static_cast< bool >(val7); + ecode8 = SWIG_AsVal_bool(swig_obj[7], &val8); + if (!SWIG_IsOK(ecode8)) { + SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "8"" of type '" "bool""'"); + } + arg8 = static_cast< bool >(val8); + ecode9 = SWIG_AsVal_bool(swig_obj[8], &val9); + if (!SWIG_IsOK(ecode9)) { + SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "9"" of type '" "bool""'"); + } + arg9 = static_cast< bool >(val9); + ecode10 = SWIG_AsVal_bool(swig_obj[9], &val10); + if (!SWIG_IsOK(ecode10)) { + SWIG_exception_fail(SWIG_ArgError(ecode10), "in method '" "SentencePieceProcessor__EncodeAsSerializedProtoBatch" "', argument " "10"" of type '" "bool""'"); + } + arg10 = static_cast< bool >(val10); + { + try { + result = sentencepiece_SentencePieceProcessor__EncodeAsSerializedProtoBatch((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< absl::string_view > const &)*arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + resultobj = PyList_New((&result)->size()); + for (size_t i = 0; i < (&result)->size(); ++i) { + PyList_SET_ITEM(resultobj, i, MakePyOutputBytes(result[i])); + } + } + { + delete arg2; + } + return resultobj; +fail: + { + delete arg2; + } + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__EncodeAsImmutableProtoBatch(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + std::vector< absl::string_view > *arg2 = 0 ; + int arg3 ; + bool arg4 ; + int arg5 ; + float arg6 ; + bool arg7 ; + bool arg8 ; + bool arg9 ; + bool arg10 ; + void *argp1 = 0 ; + int res1 = 0 ; + int val3 ; + int ecode3 = 0 ; + bool val4 ; + int ecode4 = 0 ; + int val5 ; + int ecode5 = 0 ; + float val6 ; + int ecode6 = 0 ; + bool val7 ; + int ecode7 = 0 ; + bool val8 ; + int ecode8 = 0 ; + bool val9 ; + int ecode9 = 0 ; + bool val10 ; + int ecode10 = 0 ; + PyObject *swig_obj[10] ; + SwigValueWrapper< std::vector< sentencepiece::ImmutableSentencePieceText > > result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__EncodeAsImmutableProtoBatch", 10, 10, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__EncodeAsImmutableProtoBatch" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + std::vector *out = nullptr; + if (PyList_Check(swig_obj[1])) { + const size_t size = PyList_Size(swig_obj[1]); + out = new std::vector(size); + for (size_t i = 0; i < size; ++i) { + const PyInputString ustring(PyList_GetItem(swig_obj[1], i)); + if (ustring.IsAvalable()) { + (*out)[i] = ustring.str(); + } else { + PyErr_SetString(PyExc_TypeError, "list must contain strings"); + SWIG_fail; + } + resultobj = ustring.input_type(); + } + } else { + PyErr_SetString(PyExc_TypeError, "not a list"); + SWIG_fail; + } + arg2 = out; + } + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__EncodeAsImmutableProtoBatch" "', argument " "3"" of type '" "int""'"); + } + arg3 = static_cast< int >(val3); + ecode4 = SWIG_AsVal_bool(swig_obj[3], &val4); + if (!SWIG_IsOK(ecode4)) { + SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__EncodeAsImmutableProtoBatch" "', argument " "4"" of type '" "bool""'"); + } + arg4 = static_cast< bool >(val4); + ecode5 = SWIG_AsVal_int(swig_obj[4], &val5); + if (!SWIG_IsOK(ecode5)) { + SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__EncodeAsImmutableProtoBatch" "', argument " "5"" of type '" "int""'"); + } + arg5 = static_cast< int >(val5); + ecode6 = SWIG_AsVal_float(swig_obj[5], &val6); + if (!SWIG_IsOK(ecode6)) { + SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__EncodeAsImmutableProtoBatch" "', argument " "6"" of type '" "float""'"); + } + arg6 = static_cast< float >(val6); + ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7); + if (!SWIG_IsOK(ecode7)) { + SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__EncodeAsImmutableProtoBatch" "', argument " "7"" of type '" "bool""'"); + } + arg7 = static_cast< bool >(val7); + ecode8 = SWIG_AsVal_bool(swig_obj[7], &val8); + if (!SWIG_IsOK(ecode8)) { + SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "SentencePieceProcessor__EncodeAsImmutableProtoBatch" "', argument " "8"" of type '" "bool""'"); + } + arg8 = static_cast< bool >(val8); + ecode9 = SWIG_AsVal_bool(swig_obj[8], &val9); + if (!SWIG_IsOK(ecode9)) { + SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "SentencePieceProcessor__EncodeAsImmutableProtoBatch" "', argument " "9"" of type '" "bool""'"); + } + arg9 = static_cast< bool >(val9); + ecode10 = SWIG_AsVal_bool(swig_obj[9], &val10); + if (!SWIG_IsOK(ecode10)) { + SWIG_exception_fail(SWIG_ArgError(ecode10), "in method '" "SentencePieceProcessor__EncodeAsImmutableProtoBatch" "', argument " "10"" of type '" "bool""'"); + } + arg10 = static_cast< bool >(val10); + { + try { + result = sentencepiece_SentencePieceProcessor__EncodeAsImmutableProtoBatch((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< absl::string_view > const &)*arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + resultobj = PyList_New((&result)->size()); + for (size_t i = 0; i < (&result)->size(); ++i) { + PyObject *obj = SWIG_NewPointerObj(new sentencepiece::ImmutableSentencePieceText((&result)->at(i)), SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, SWIG_POINTER_OWN | 0); + PyList_SET_ITEM(resultobj, i, obj); + } + } + { + delete arg2; + } + return resultobj; +fail: + { + delete arg2; + } + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodeIds(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + std::vector< int > *arg2 = 0 ; + void *argp1 = 0 ; + int res1 = 0 ; + PyObject *swig_obj[2] ; + std::string result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__DecodeIds", 2, 2, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__DecodeIds" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + std::vector *out = nullptr; + if (PyList_Check(swig_obj[1])) { + const size_t size = PyList_Size(swig_obj[1]); + out = new std::vector(size); + for (size_t i = 0; i < size; ++i) { + PyObject *o = PyList_GetItem(swig_obj[1], i); + if (PyInt_Check(o)) { + (*out)[i] = static_cast(PyInt_AsLong(o)); + } else { + PyErr_SetString(PyExc_TypeError,"list must contain integers"); + SWIG_fail; + } + } + } else { + PyErr_SetString(PyExc_TypeError,"not a list"); + SWIG_fail; + } + arg2 = out; + } + { + try { + result = sentencepiece_SentencePieceProcessor__DecodeIds((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< int > const &)*arg2); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + PyObject *input_type = resultobj; + resultobj = MakePyOutputString(result, input_type); + } + { + delete arg2; + } + return resultobj; +fail: + { + delete arg2; + } + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodePieces(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + std::vector< absl::string_view > *arg2 = 0 ; + void *argp1 = 0 ; + int res1 = 0 ; + PyObject *swig_obj[2] ; + std::string result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__DecodePieces", 2, 2, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__DecodePieces" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + std::vector *out = nullptr; + if (PyList_Check(swig_obj[1])) { + const size_t size = PyList_Size(swig_obj[1]); + out = new std::vector(size); + for (size_t i = 0; i < size; ++i) { + const PyInputString ustring(PyList_GetItem(swig_obj[1], i)); + if (ustring.IsAvalable()) { + (*out)[i] = ustring.str(); + } else { + PyErr_SetString(PyExc_TypeError, "list must contain strings"); + SWIG_fail; + } + resultobj = ustring.input_type(); + } + } else { + PyErr_SetString(PyExc_TypeError, "not a list"); + SWIG_fail; + } + arg2 = out; + } + { + try { + result = sentencepiece_SentencePieceProcessor__DecodePieces((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< absl::string_view > const &)*arg2); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + PyObject *input_type = resultobj; + resultobj = MakePyOutputString(result, input_type); + } + { + delete arg2; + } + return resultobj; +fail: + { + delete arg2; + } + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodeIdsAsSerializedProto(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + std::vector< int > *arg2 = 0 ; + void *argp1 = 0 ; + int res1 = 0 ; + PyObject *swig_obj[2] ; + sentencepiece::util::bytes result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__DecodeIdsAsSerializedProto", 2, 2, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__DecodeIdsAsSerializedProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + std::vector *out = nullptr; + if (PyList_Check(swig_obj[1])) { + const size_t size = PyList_Size(swig_obj[1]); + out = new std::vector(size); + for (size_t i = 0; i < size; ++i) { + PyObject *o = PyList_GetItem(swig_obj[1], i); + if (PyInt_Check(o)) { + (*out)[i] = static_cast(PyInt_AsLong(o)); + } else { + PyErr_SetString(PyExc_TypeError,"list must contain integers"); + SWIG_fail; + } + } + } else { + PyErr_SetString(PyExc_TypeError,"not a list"); + SWIG_fail; + } + arg2 = out; + } + { + try { + result = sentencepiece_SentencePieceProcessor__DecodeIdsAsSerializedProto((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< int > const &)*arg2); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + resultobj = MakePyOutputBytes(result); + } + { + delete arg2; + } + return resultobj; +fail: + { + delete arg2; + } + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodePiecesAsSerializedProto(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + std::vector< absl::string_view > *arg2 = 0 ; + void *argp1 = 0 ; + int res1 = 0 ; + PyObject *swig_obj[2] ; + sentencepiece::util::bytes result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__DecodePiecesAsSerializedProto", 2, 2, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__DecodePiecesAsSerializedProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + std::vector *out = nullptr; + if (PyList_Check(swig_obj[1])) { + const size_t size = PyList_Size(swig_obj[1]); + out = new std::vector(size); + for (size_t i = 0; i < size; ++i) { + const PyInputString ustring(PyList_GetItem(swig_obj[1], i)); + if (ustring.IsAvalable()) { + (*out)[i] = ustring.str(); + } else { + PyErr_SetString(PyExc_TypeError, "list must contain strings"); + SWIG_fail; + } + resultobj = ustring.input_type(); + } + } else { + PyErr_SetString(PyExc_TypeError, "not a list"); + SWIG_fail; + } + arg2 = out; + } + { + try { + result = sentencepiece_SentencePieceProcessor__DecodePiecesAsSerializedProto((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< absl::string_view > const &)*arg2); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + resultobj = MakePyOutputBytes(result); + } + { + delete arg2; + } + return resultobj; +fail: + { + delete arg2; + } + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodeIdsAsImmutableProto(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + std::vector< int > *arg2 = 0 ; + void *argp1 = 0 ; + int res1 = 0 ; + PyObject *swig_obj[2] ; + sentencepiece::ImmutableSentencePieceText result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__DecodeIdsAsImmutableProto", 2, 2, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__DecodeIdsAsImmutableProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + std::vector *out = nullptr; + if (PyList_Check(swig_obj[1])) { + const size_t size = PyList_Size(swig_obj[1]); + out = new std::vector(size); + for (size_t i = 0; i < size; ++i) { + PyObject *o = PyList_GetItem(swig_obj[1], i); + if (PyInt_Check(o)) { + (*out)[i] = static_cast(PyInt_AsLong(o)); + } else { + PyErr_SetString(PyExc_TypeError,"list must contain integers"); + SWIG_fail; + } + } + } else { + PyErr_SetString(PyExc_TypeError,"not a list"); + SWIG_fail; + } + arg2 = out; + } + { + try { + result = sentencepiece_SentencePieceProcessor__DecodeIdsAsImmutableProto((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< int > const &)*arg2); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + resultobj = SWIG_NewPointerObj((new sentencepiece::ImmutableSentencePieceText(result)), SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, SWIG_POINTER_OWN | 0 ); + { + delete arg2; + } + return resultobj; +fail: + { + delete arg2; + } + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodePiecesAsImmutableProto(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + std::vector< absl::string_view > *arg2 = 0 ; + void *argp1 = 0 ; + int res1 = 0 ; + PyObject *swig_obj[2] ; + sentencepiece::ImmutableSentencePieceText result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__DecodePiecesAsImmutableProto", 2, 2, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__DecodePiecesAsImmutableProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + std::vector *out = nullptr; + if (PyList_Check(swig_obj[1])) { + const size_t size = PyList_Size(swig_obj[1]); + out = new std::vector(size); + for (size_t i = 0; i < size; ++i) { + const PyInputString ustring(PyList_GetItem(swig_obj[1], i)); + if (ustring.IsAvalable()) { + (*out)[i] = ustring.str(); + } else { + PyErr_SetString(PyExc_TypeError, "list must contain strings"); + SWIG_fail; + } + resultobj = ustring.input_type(); + } + } else { + PyErr_SetString(PyExc_TypeError, "not a list"); + SWIG_fail; + } + arg2 = out; + } + { + try { + result = sentencepiece_SentencePieceProcessor__DecodePiecesAsImmutableProto((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< absl::string_view > const &)*arg2); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + resultobj = SWIG_NewPointerObj((new sentencepiece::ImmutableSentencePieceText(result)), SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, SWIG_POINTER_OWN | 0 ); + { + delete arg2; + } + return resultobj; +fail: + { + delete arg2; + } + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodeIdsBatch(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + std::vector< std::vector< int > > *arg2 = 0 ; + int arg3 ; + void *argp1 = 0 ; + int res1 = 0 ; + int val3 ; + int ecode3 = 0 ; + PyObject *swig_obj[3] ; + std::vector< std::string > result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__DecodeIdsBatch", 3, 3, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__DecodeIdsBatch" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + std::vector> *out = nullptr; + if (PyList_Check(swig_obj[1])) { + const size_t size = PyList_Size(swig_obj[1]); + out = new std::vector>(size); + for (size_t i = 0; i < size; ++i) { + PyObject *o = PyList_GetItem(swig_obj[1], i); + if (PyList_Check(o)) { + const size_t size2 = PyList_Size(o); + (*out)[i].resize(size2); + for (size_t j = 0; j < size2; ++j) { + PyObject *o2 = PyList_GetItem(o, j); + if (PyInt_Check(o2)) { + (*out)[i][j] = static_cast(PyInt_AsLong(o2)); + } else { + PyErr_SetString(PyExc_TypeError, "list must contain strings"); + SWIG_fail; + } + } + } else { + PyErr_SetString(PyExc_TypeError, "not a list"); + SWIG_fail; + } + } + } else { + PyErr_SetString(PyExc_TypeError,"not a list"); + SWIG_fail; + } + arg2 = out; + } + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__DecodeIdsBatch" "', argument " "3"" of type '" "int""'"); + } + arg3 = static_cast< int >(val3); + { + try { + result = sentencepiece_SentencePieceProcessor__DecodeIdsBatch((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< std::vector< int > > const &)*arg2,arg3); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + PyObject *input_type = resultobj; + resultobj = PyList_New((&result)->size()); + for (size_t i = 0; i < (&result)->size(); ++i) { + PyList_SET_ITEM(resultobj, i, MakePyOutputString(result[i], input_type)); + } + } + { + delete arg2; + } + return resultobj; +fail: + { + delete arg2; + } + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodeIdsAsSerializedProtoBatch(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + std::vector< std::vector< int > > *arg2 = 0 ; + int arg3 ; + void *argp1 = 0 ; + int res1 = 0 ; + int val3 ; + int ecode3 = 0 ; + PyObject *swig_obj[3] ; + BytesArray result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__DecodeIdsAsSerializedProtoBatch", 3, 3, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__DecodeIdsAsSerializedProtoBatch" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + std::vector> *out = nullptr; + if (PyList_Check(swig_obj[1])) { + const size_t size = PyList_Size(swig_obj[1]); + out = new std::vector>(size); + for (size_t i = 0; i < size; ++i) { + PyObject *o = PyList_GetItem(swig_obj[1], i); + if (PyList_Check(o)) { + const size_t size2 = PyList_Size(o); + (*out)[i].resize(size2); + for (size_t j = 0; j < size2; ++j) { + PyObject *o2 = PyList_GetItem(o, j); + if (PyInt_Check(o2)) { + (*out)[i][j] = static_cast(PyInt_AsLong(o2)); + } else { + PyErr_SetString(PyExc_TypeError, "list must contain strings"); + SWIG_fail; + } + } + } else { + PyErr_SetString(PyExc_TypeError, "not a list"); + SWIG_fail; + } + } + } else { + PyErr_SetString(PyExc_TypeError,"not a list"); + SWIG_fail; + } + arg2 = out; + } + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__DecodeIdsAsSerializedProtoBatch" "', argument " "3"" of type '" "int""'"); + } + arg3 = static_cast< int >(val3); + { + try { + result = sentencepiece_SentencePieceProcessor__DecodeIdsAsSerializedProtoBatch((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< std::vector< int > > const &)*arg2,arg3); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + resultobj = PyList_New((&result)->size()); + for (size_t i = 0; i < (&result)->size(); ++i) { + PyList_SET_ITEM(resultobj, i, MakePyOutputBytes(result[i])); + } + } + { + delete arg2; + } + return resultobj; +fail: + { + delete arg2; + } + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodeIdsAsImmutableProtoBatch(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + std::vector< std::vector< int > > *arg2 = 0 ; + int arg3 ; + void *argp1 = 0 ; + int res1 = 0 ; + int val3 ; + int ecode3 = 0 ; + PyObject *swig_obj[3] ; + SwigValueWrapper< std::vector< sentencepiece::ImmutableSentencePieceText > > result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__DecodeIdsAsImmutableProtoBatch", 3, 3, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__DecodeIdsAsImmutableProtoBatch" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + std::vector> *out = nullptr; + if (PyList_Check(swig_obj[1])) { + const size_t size = PyList_Size(swig_obj[1]); + out = new std::vector>(size); + for (size_t i = 0; i < size; ++i) { + PyObject *o = PyList_GetItem(swig_obj[1], i); + if (PyList_Check(o)) { + const size_t size2 = PyList_Size(o); + (*out)[i].resize(size2); + for (size_t j = 0; j < size2; ++j) { + PyObject *o2 = PyList_GetItem(o, j); + if (PyInt_Check(o2)) { + (*out)[i][j] = static_cast(PyInt_AsLong(o2)); + } else { + PyErr_SetString(PyExc_TypeError, "list must contain strings"); + SWIG_fail; + } + } + } else { + PyErr_SetString(PyExc_TypeError, "not a list"); + SWIG_fail; + } + } + } else { + PyErr_SetString(PyExc_TypeError,"not a list"); + SWIG_fail; + } + arg2 = out; + } + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__DecodeIdsAsImmutableProtoBatch" "', argument " "3"" of type '" "int""'"); + } + arg3 = static_cast< int >(val3); + { + try { + result = sentencepiece_SentencePieceProcessor__DecodeIdsAsImmutableProtoBatch((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< std::vector< int > > const &)*arg2,arg3); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + resultobj = PyList_New((&result)->size()); + for (size_t i = 0; i < (&result)->size(); ++i) { + PyObject *obj = SWIG_NewPointerObj(new sentencepiece::ImmutableSentencePieceText((&result)->at(i)), SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, SWIG_POINTER_OWN | 0); + PyList_SET_ITEM(resultobj, i, obj); + } + } + { + delete arg2; + } + return resultobj; +fail: + { + delete arg2; + } + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodePiecesBatch(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + std::vector< std::vector< absl::string_view > > *arg2 = 0 ; + int arg3 ; + void *argp1 = 0 ; + int res1 = 0 ; + int val3 ; + int ecode3 = 0 ; + PyObject *swig_obj[3] ; + std::vector< std::string > result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__DecodePiecesBatch", 3, 3, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__DecodePiecesBatch" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + std::vector> *out = nullptr; + if (PyList_Check(swig_obj[1])) { + const size_t size = PyList_Size(swig_obj[1]); + out = new std::vector>(size); + for (size_t i = 0; i < size; ++i) { + PyObject *o = PyList_GetItem(swig_obj[1], i); + if (PyList_Check(o)) { + const size_t size2 = PyList_Size(o); + (*out)[i].resize(size2); + for (size_t j = 0; j < size2; ++j) { + const PyInputString ustring(PyList_GetItem(o, j)); + if (ustring.IsAvalable()) { + (*out)[i][j] = ustring.str(); + } else { + PyErr_SetString(PyExc_TypeError,"list must contain integers"); + SWIG_fail; + } + resultobj = ustring.input_type(); + } + } else { + PyErr_SetString(PyExc_TypeError,"not a list"); + SWIG_fail; + } + } + } else { + PyErr_SetString(PyExc_TypeError,"not a list"); + SWIG_fail; + } + arg2 = out; + } + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__DecodePiecesBatch" "', argument " "3"" of type '" "int""'"); + } + arg3 = static_cast< int >(val3); + { + try { + result = sentencepiece_SentencePieceProcessor__DecodePiecesBatch((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< std::vector< absl::string_view > > const &)*arg2,arg3); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + PyObject *input_type = resultobj; + resultobj = PyList_New((&result)->size()); + for (size_t i = 0; i < (&result)->size(); ++i) { + PyList_SET_ITEM(resultobj, i, MakePyOutputString(result[i], input_type)); + } + } + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodePiecesAsSerializedProtoBatch(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + std::vector< std::vector< absl::string_view > > *arg2 = 0 ; + int arg3 ; + void *argp1 = 0 ; + int res1 = 0 ; + int val3 ; + int ecode3 = 0 ; + PyObject *swig_obj[3] ; + BytesArray result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__DecodePiecesAsSerializedProtoBatch", 3, 3, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__DecodePiecesAsSerializedProtoBatch" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + std::vector> *out = nullptr; + if (PyList_Check(swig_obj[1])) { + const size_t size = PyList_Size(swig_obj[1]); + out = new std::vector>(size); + for (size_t i = 0; i < size; ++i) { + PyObject *o = PyList_GetItem(swig_obj[1], i); + if (PyList_Check(o)) { + const size_t size2 = PyList_Size(o); + (*out)[i].resize(size2); + for (size_t j = 0; j < size2; ++j) { + const PyInputString ustring(PyList_GetItem(o, j)); + if (ustring.IsAvalable()) { + (*out)[i][j] = ustring.str(); + } else { + PyErr_SetString(PyExc_TypeError,"list must contain integers"); + SWIG_fail; + } + resultobj = ustring.input_type(); + } + } else { + PyErr_SetString(PyExc_TypeError,"not a list"); + SWIG_fail; + } + } + } else { + PyErr_SetString(PyExc_TypeError,"not a list"); + SWIG_fail; + } + arg2 = out; + } + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__DecodePiecesAsSerializedProtoBatch" "', argument " "3"" of type '" "int""'"); + } + arg3 = static_cast< int >(val3); + { + try { + result = sentencepiece_SentencePieceProcessor__DecodePiecesAsSerializedProtoBatch((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< std::vector< absl::string_view > > const &)*arg2,arg3); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + resultobj = PyList_New((&result)->size()); + for (size_t i = 0; i < (&result)->size(); ++i) { + PyList_SET_ITEM(resultobj, i, MakePyOutputBytes(result[i])); + } + } + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__DecodePiecesAsImmutableProtoBatch(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + std::vector< std::vector< absl::string_view > > *arg2 = 0 ; + int arg3 ; + void *argp1 = 0 ; + int res1 = 0 ; + int val3 ; + int ecode3 = 0 ; + PyObject *swig_obj[3] ; + SwigValueWrapper< std::vector< sentencepiece::ImmutableSentencePieceText > > result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__DecodePiecesAsImmutableProtoBatch", 3, 3, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__DecodePiecesAsImmutableProtoBatch" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + std::vector> *out = nullptr; + if (PyList_Check(swig_obj[1])) { + const size_t size = PyList_Size(swig_obj[1]); + out = new std::vector>(size); + for (size_t i = 0; i < size; ++i) { + PyObject *o = PyList_GetItem(swig_obj[1], i); + if (PyList_Check(o)) { + const size_t size2 = PyList_Size(o); + (*out)[i].resize(size2); + for (size_t j = 0; j < size2; ++j) { + const PyInputString ustring(PyList_GetItem(o, j)); + if (ustring.IsAvalable()) { + (*out)[i][j] = ustring.str(); + } else { + PyErr_SetString(PyExc_TypeError,"list must contain integers"); + SWIG_fail; + } + resultobj = ustring.input_type(); + } + } else { + PyErr_SetString(PyExc_TypeError,"not a list"); + SWIG_fail; + } + } + } else { + PyErr_SetString(PyExc_TypeError,"not a list"); + SWIG_fail; + } + arg2 = out; + } + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__DecodePiecesAsImmutableProtoBatch" "', argument " "3"" of type '" "int""'"); + } + arg3 = static_cast< int >(val3); + { + try { + result = sentencepiece_SentencePieceProcessor__DecodePiecesAsImmutableProtoBatch((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< std::vector< absl::string_view > > const &)*arg2,arg3); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + resultobj = PyList_New((&result)->size()); + for (size_t i = 0; i < (&result)->size(); ++i) { + PyObject *obj = SWIG_NewPointerObj(new sentencepiece::ImmutableSentencePieceText((&result)->at(i)), SWIGTYPE_p_sentencepiece__ImmutableSentencePieceText, SWIG_POINTER_OWN | 0); + PyList_SET_ITEM(resultobj, i, obj); + } + } + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__NBestEncodeAsIds(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + absl::string_view arg2 ; + int arg3 ; + bool arg4 ; + bool arg5 ; + bool arg6 ; + bool arg7 ; + void *argp1 = 0 ; + int res1 = 0 ; + int val3 ; + int ecode3 = 0 ; + bool val4 ; + int ecode4 = 0 ; + bool val5 ; + int ecode5 = 0 ; + bool val6 ; + int ecode6 = 0 ; + bool val7 ; + int ecode7 = 0 ; + PyObject *swig_obj[7] ; + std::vector< std::vector< int > > result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__NBestEncodeAsIds", 7, 7, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__NBestEncodeAsIds" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + const PyInputString ustring(swig_obj[1]); + if (!ustring.IsAvalable()) { + PyErr_SetString(PyExc_TypeError, "not a string"); + SWIG_fail; + } + resultobj = ustring.input_type(); + arg2 = ustring.str(); + } + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__NBestEncodeAsIds" "', argument " "3"" of type '" "int""'"); + } + arg3 = static_cast< int >(val3); + ecode4 = SWIG_AsVal_bool(swig_obj[3], &val4); + if (!SWIG_IsOK(ecode4)) { + SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__NBestEncodeAsIds" "', argument " "4"" of type '" "bool""'"); + } + arg4 = static_cast< bool >(val4); + ecode5 = SWIG_AsVal_bool(swig_obj[4], &val5); + if (!SWIG_IsOK(ecode5)) { + SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__NBestEncodeAsIds" "', argument " "5"" of type '" "bool""'"); + } + arg5 = static_cast< bool >(val5); + ecode6 = SWIG_AsVal_bool(swig_obj[5], &val6); + if (!SWIG_IsOK(ecode6)) { + SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__NBestEncodeAsIds" "', argument " "6"" of type '" "bool""'"); + } + arg6 = static_cast< bool >(val6); + ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7); + if (!SWIG_IsOK(ecode7)) { + SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__NBestEncodeAsIds" "', argument " "7"" of type '" "bool""'"); + } + arg7 = static_cast< bool >(val7); + { + try { + result = sentencepiece_SentencePieceProcessor__NBestEncodeAsIds((sentencepiece::SentencePieceProcessor const *)arg1,SWIG_STD_MOVE(arg2),arg3,arg4,arg5,arg6,arg7); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + resultobj = PyList_New((&result)->size()); + for (size_t i = 0; i < (&result)->size(); ++i) { + PyObject *obj = PyList_New(result[i].size()); + for (size_t j = 0; j < result[i].size(); ++j) { + PyList_SET_ITEM(obj, j, PyInt_FromLong(static_cast(result[i][j]))); + } + PyList_SET_ITEM(resultobj, i, obj); + } + } + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__NBestEncodeAsPieces(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + absl::string_view arg2 ; + int arg3 ; + bool arg4 ; + bool arg5 ; + bool arg6 ; + bool arg7 ; + void *argp1 = 0 ; + int res1 = 0 ; + int val3 ; + int ecode3 = 0 ; + bool val4 ; + int ecode4 = 0 ; + bool val5 ; + int ecode5 = 0 ; + bool val6 ; + int ecode6 = 0 ; + bool val7 ; + int ecode7 = 0 ; + PyObject *swig_obj[7] ; + std::vector< std::vector< std::string > > result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__NBestEncodeAsPieces", 7, 7, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__NBestEncodeAsPieces" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + const PyInputString ustring(swig_obj[1]); + if (!ustring.IsAvalable()) { + PyErr_SetString(PyExc_TypeError, "not a string"); + SWIG_fail; + } + resultobj = ustring.input_type(); + arg2 = ustring.str(); + } + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__NBestEncodeAsPieces" "', argument " "3"" of type '" "int""'"); + } + arg3 = static_cast< int >(val3); + ecode4 = SWIG_AsVal_bool(swig_obj[3], &val4); + if (!SWIG_IsOK(ecode4)) { + SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__NBestEncodeAsPieces" "', argument " "4"" of type '" "bool""'"); + } + arg4 = static_cast< bool >(val4); + ecode5 = SWIG_AsVal_bool(swig_obj[4], &val5); + if (!SWIG_IsOK(ecode5)) { + SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__NBestEncodeAsPieces" "', argument " "5"" of type '" "bool""'"); + } + arg5 = static_cast< bool >(val5); + ecode6 = SWIG_AsVal_bool(swig_obj[5], &val6); + if (!SWIG_IsOK(ecode6)) { + SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__NBestEncodeAsPieces" "', argument " "6"" of type '" "bool""'"); + } + arg6 = static_cast< bool >(val6); + ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7); + if (!SWIG_IsOK(ecode7)) { + SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__NBestEncodeAsPieces" "', argument " "7"" of type '" "bool""'"); + } + arg7 = static_cast< bool >(val7); + { + try { + result = sentencepiece_SentencePieceProcessor__NBestEncodeAsPieces((sentencepiece::SentencePieceProcessor const *)arg1,SWIG_STD_MOVE(arg2),arg3,arg4,arg5,arg6,arg7); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + PyObject *input_type = resultobj; + resultobj = PyList_New((&result)->size()); + for (size_t i = 0; i < (&result)->size(); ++i) { + PyObject *obj = PyList_New(result[i].size()); + for (size_t j = 0; j < result[i].size(); ++j) { + PyList_SET_ITEM(obj, j, MakePyOutputString(result[i][j], input_type)); + } + PyList_SET_ITEM(resultobj, i, obj); + } + } + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__NBestEncodeAsSerializedProto(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + absl::string_view arg2 ; + int arg3 ; + bool arg4 ; + bool arg5 ; + bool arg6 ; + bool arg7 ; + void *argp1 = 0 ; + int res1 = 0 ; + int val3 ; + int ecode3 = 0 ; + bool val4 ; + int ecode4 = 0 ; + bool val5 ; + int ecode5 = 0 ; + bool val6 ; + int ecode6 = 0 ; + bool val7 ; + int ecode7 = 0 ; + PyObject *swig_obj[7] ; + sentencepiece::util::bytes result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__NBestEncodeAsSerializedProto", 7, 7, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__NBestEncodeAsSerializedProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + const PyInputString ustring(swig_obj[1]); + if (!ustring.IsAvalable()) { + PyErr_SetString(PyExc_TypeError, "not a string"); + SWIG_fail; + } + resultobj = ustring.input_type(); + arg2 = ustring.str(); + } + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__NBestEncodeAsSerializedProto" "', argument " "3"" of type '" "int""'"); + } + arg3 = static_cast< int >(val3); + ecode4 = SWIG_AsVal_bool(swig_obj[3], &val4); + if (!SWIG_IsOK(ecode4)) { + SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__NBestEncodeAsSerializedProto" "', argument " "4"" of type '" "bool""'"); + } + arg4 = static_cast< bool >(val4); + ecode5 = SWIG_AsVal_bool(swig_obj[4], &val5); + if (!SWIG_IsOK(ecode5)) { + SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__NBestEncodeAsSerializedProto" "', argument " "5"" of type '" "bool""'"); + } + arg5 = static_cast< bool >(val5); + ecode6 = SWIG_AsVal_bool(swig_obj[5], &val6); + if (!SWIG_IsOK(ecode6)) { + SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__NBestEncodeAsSerializedProto" "', argument " "6"" of type '" "bool""'"); + } + arg6 = static_cast< bool >(val6); + ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7); + if (!SWIG_IsOK(ecode7)) { + SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__NBestEncodeAsSerializedProto" "', argument " "7"" of type '" "bool""'"); + } + arg7 = static_cast< bool >(val7); + { + try { + result = sentencepiece_SentencePieceProcessor__NBestEncodeAsSerializedProto((sentencepiece::SentencePieceProcessor const *)arg1,SWIG_STD_MOVE(arg2),arg3,arg4,arg5,arg6,arg7); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + resultobj = MakePyOutputBytes(result); + } + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__NBestEncodeAsImmutableProto(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + absl::string_view arg2 ; + int arg3 ; + bool arg4 ; + bool arg5 ; + bool arg6 ; + bool arg7 ; + void *argp1 = 0 ; + int res1 = 0 ; + int val3 ; + int ecode3 = 0 ; + bool val4 ; + int ecode4 = 0 ; + bool val5 ; + int ecode5 = 0 ; + bool val6 ; + int ecode6 = 0 ; + bool val7 ; + int ecode7 = 0 ; + PyObject *swig_obj[7] ; + sentencepiece::ImmutableNBestSentencePieceText result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__NBestEncodeAsImmutableProto", 7, 7, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__NBestEncodeAsImmutableProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + const PyInputString ustring(swig_obj[1]); + if (!ustring.IsAvalable()) { + PyErr_SetString(PyExc_TypeError, "not a string"); + SWIG_fail; + } + resultobj = ustring.input_type(); + arg2 = ustring.str(); + } + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__NBestEncodeAsImmutableProto" "', argument " "3"" of type '" "int""'"); + } + arg3 = static_cast< int >(val3); + ecode4 = SWIG_AsVal_bool(swig_obj[3], &val4); + if (!SWIG_IsOK(ecode4)) { + SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__NBestEncodeAsImmutableProto" "', argument " "4"" of type '" "bool""'"); + } + arg4 = static_cast< bool >(val4); + ecode5 = SWIG_AsVal_bool(swig_obj[4], &val5); + if (!SWIG_IsOK(ecode5)) { + SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__NBestEncodeAsImmutableProto" "', argument " "5"" of type '" "bool""'"); + } + arg5 = static_cast< bool >(val5); + ecode6 = SWIG_AsVal_bool(swig_obj[5], &val6); + if (!SWIG_IsOK(ecode6)) { + SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__NBestEncodeAsImmutableProto" "', argument " "6"" of type '" "bool""'"); + } + arg6 = static_cast< bool >(val6); + ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7); + if (!SWIG_IsOK(ecode7)) { + SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__NBestEncodeAsImmutableProto" "', argument " "7"" of type '" "bool""'"); + } + arg7 = static_cast< bool >(val7); + { + try { + result = sentencepiece_SentencePieceProcessor__NBestEncodeAsImmutableProto((sentencepiece::SentencePieceProcessor const *)arg1,SWIG_STD_MOVE(arg2),arg3,arg4,arg5,arg6,arg7); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + resultobj = SWIG_NewPointerObj((new sentencepiece::ImmutableNBestSentencePieceText(result)), SWIGTYPE_p_sentencepiece__ImmutableNBestSentencePieceText, SWIG_POINTER_OWN | 0 ); + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__SampleEncodeAndScoreAsIds(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + absl::string_view arg2 ; + int arg3 ; + float arg4 ; + bool arg5 ; + bool arg6 ; + bool arg7 ; + bool arg8 ; + bool arg9 ; + bool arg10 ; + void *argp1 = 0 ; + int res1 = 0 ; + int val3 ; + int ecode3 = 0 ; + float val4 ; + int ecode4 = 0 ; + bool val5 ; + int ecode5 = 0 ; + bool val6 ; + int ecode6 = 0 ; + bool val7 ; + int ecode7 = 0 ; + bool val8 ; + int ecode8 = 0 ; + bool val9 ; + int ecode9 = 0 ; + bool val10 ; + int ecode10 = 0 ; + PyObject *swig_obj[10] ; + std::vector< std::pair< std::vector< int >,float > > result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__SampleEncodeAndScoreAsIds", 10, 10, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsIds" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + const PyInputString ustring(swig_obj[1]); + if (!ustring.IsAvalable()) { + PyErr_SetString(PyExc_TypeError, "not a string"); + SWIG_fail; + } + resultobj = ustring.input_type(); + arg2 = ustring.str(); + } + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsIds" "', argument " "3"" of type '" "int""'"); + } + arg3 = static_cast< int >(val3); + ecode4 = SWIG_AsVal_float(swig_obj[3], &val4); + if (!SWIG_IsOK(ecode4)) { + SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsIds" "', argument " "4"" of type '" "float""'"); + } + arg4 = static_cast< float >(val4); + ecode5 = SWIG_AsVal_bool(swig_obj[4], &val5); + if (!SWIG_IsOK(ecode5)) { + SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsIds" "', argument " "5"" of type '" "bool""'"); + } + arg5 = static_cast< bool >(val5); + ecode6 = SWIG_AsVal_bool(swig_obj[5], &val6); + if (!SWIG_IsOK(ecode6)) { + SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsIds" "', argument " "6"" of type '" "bool""'"); + } + arg6 = static_cast< bool >(val6); + ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7); + if (!SWIG_IsOK(ecode7)) { + SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsIds" "', argument " "7"" of type '" "bool""'"); + } + arg7 = static_cast< bool >(val7); + ecode8 = SWIG_AsVal_bool(swig_obj[7], &val8); + if (!SWIG_IsOK(ecode8)) { + SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsIds" "', argument " "8"" of type '" "bool""'"); + } + arg8 = static_cast< bool >(val8); + ecode9 = SWIG_AsVal_bool(swig_obj[8], &val9); + if (!SWIG_IsOK(ecode9)) { + SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsIds" "', argument " "9"" of type '" "bool""'"); + } + arg9 = static_cast< bool >(val9); + ecode10 = SWIG_AsVal_bool(swig_obj[9], &val10); + if (!SWIG_IsOK(ecode10)) { + SWIG_exception_fail(SWIG_ArgError(ecode10), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsIds" "', argument " "10"" of type '" "bool""'"); + } + arg10 = static_cast< bool >(val10); + { + try { + result = sentencepiece_SentencePieceProcessor__SampleEncodeAndScoreAsIds((sentencepiece::SentencePieceProcessor const *)arg1,SWIG_STD_MOVE(arg2),arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10); + ReleaseResultObject(resultobj); + } + catch (const sentencepiece::util::Status &status) { + SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); + } + } + { + resultobj = PyList_New((&result)->size()); + for (size_t i = 0; i < (&result)->size(); ++i) { + PyObject *obj = PyList_New(result[i].first.size()); + for (size_t j = 0; j < result[i].first.size(); ++j) { + PyList_SET_ITEM(obj, j, PyInt_FromLong(static_cast(result[i].first[j]))); + } + PyList_SET_ITEM(resultobj, i, PyTuple_Pack(2, obj, PyFloat_FromDouble(static_cast(result[i].second)))); + } + } + return resultobj; +fail: + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__SampleEncodeAndScoreAsPieces(PyObject *self, PyObject *args) { + PyObject *resultobj = 0; + sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + absl::string_view arg2 ; + int arg3 ; + float arg4 ; + bool arg5 ; + bool arg6 ; + bool arg7 ; + bool arg8 ; + bool arg9 ; + bool arg10 ; + void *argp1 = 0 ; + int res1 = 0 ; + int val3 ; + int ecode3 = 0 ; + float val4 ; + int ecode4 = 0 ; + bool val5 ; + int ecode5 = 0 ; + bool val6 ; + int ecode6 = 0 ; + bool val7 ; + int ecode7 = 0 ; + bool val8 ; + int ecode8 = 0 ; + bool val9 ; + int ecode9 = 0 ; + bool val10 ; + int ecode10 = 0 ; + PyObject *swig_obj[10] ; + std::vector< std::pair< std::vector< std::string >,float > > result; + + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__SampleEncodeAndScoreAsPieces", 10, 10, swig_obj)) SWIG_fail; + res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsPieces" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + } + arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + const PyInputString ustring(swig_obj[1]); + if (!ustring.IsAvalable()) { + PyErr_SetString(PyExc_TypeError, "not a string"); + SWIG_fail; + } + resultobj = ustring.input_type(); + arg2 = ustring.str(); + } + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsPieces" "', argument " "3"" of type '" "int""'"); + } + arg3 = static_cast< int >(val3); + ecode4 = SWIG_AsVal_float(swig_obj[3], &val4); + if (!SWIG_IsOK(ecode4)) { + SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsPieces" "', argument " "4"" of type '" "float""'"); + } + arg4 = static_cast< float >(val4); + ecode5 = SWIG_AsVal_bool(swig_obj[4], &val5); + if (!SWIG_IsOK(ecode5)) { + SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsPieces" "', argument " "5"" of type '" "bool""'"); + } + arg5 = static_cast< bool >(val5); + ecode6 = SWIG_AsVal_bool(swig_obj[5], &val6); + if (!SWIG_IsOK(ecode6)) { + SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsPieces" "', argument " "6"" of type '" "bool""'"); + } + arg6 = static_cast< bool >(val6); + ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7); + if (!SWIG_IsOK(ecode7)) { + SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsPieces" "', argument " "7"" of type '" "bool""'"); + } + arg7 = static_cast< bool >(val7); + ecode8 = SWIG_AsVal_bool(swig_obj[7], &val8); + if (!SWIG_IsOK(ecode8)) { + SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsPieces" "', argument " "8"" of type '" "bool""'"); + } + arg8 = static_cast< bool >(val8); + ecode9 = SWIG_AsVal_bool(swig_obj[8], &val9); + if (!SWIG_IsOK(ecode9)) { + SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsPieces" "', argument " "9"" of type '" "bool""'"); + } + arg9 = static_cast< bool >(val9); + ecode10 = SWIG_AsVal_bool(swig_obj[9], &val10); + if (!SWIG_IsOK(ecode10)) { + SWIG_exception_fail(SWIG_ArgError(ecode10), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsPieces" "', argument " "10"" of type '" "bool""'"); + } + arg10 = static_cast< bool >(val10); { try { - result = (int)((sentencepiece::SentencePieceProcessor const *)arg1)->pad_id(); + result = sentencepiece_SentencePieceProcessor__SampleEncodeAndScoreAsPieces((sentencepiece::SentencePieceProcessor const *)arg1,SWIG_STD_MOVE(arg2),arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); } } - resultobj = SWIG_From_int(static_cast< int >(result)); + { + PyObject *input_type = resultobj; + resultobj = PyList_New((&result)->size()); + for (size_t i = 0; i < (&result)->size(); ++i) { + PyObject *obj = PyList_New(result[i].first.size()); + for (size_t j = 0; j < result[i].first.size(); ++j) { + PyList_SET_ITEM(obj, j, MakePyOutputString(result[i].first[j], input_type)); + } + PyList_SET_ITEM(resultobj, i, PyTuple_Pack(2, obj, PyFloat_FromDouble(static_cast(result[i].second)))); + } + } return resultobj; fail: return NULL; } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_serialized_model_proto(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto(PyObject *self, PyObject *args) { PyObject *resultobj = 0; sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; + absl::string_view arg2 ; + int arg3 ; + float arg4 ; + bool arg5 ; + bool arg6 ; + bool arg7 ; + bool arg8 ; + bool arg9 ; + bool arg10 ; void *argp1 = 0 ; int res1 = 0 ; - PyObject *swig_obj[1] ; + int val3 ; + int ecode3 = 0 ; + float val4 ; + int ecode4 = 0 ; + bool val5 ; + int ecode5 = 0 ; + bool val6 ; + int ecode6 = 0 ; + bool val7 ; + int ecode7 = 0 ; + bool val8 ; + int ecode8 = 0 ; + bool val9 ; + int ecode9 = 0 ; + bool val10 ; + int ecode10 = 0 ; + PyObject *swig_obj[10] ; sentencepiece::util::bytes result; - if (!args) SWIG_fail; - swig_obj[0] = args; + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto", 10, 10, swig_obj)) SWIG_fail; res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_serialized_model_proto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); } arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); + { + const PyInputString ustring(swig_obj[1]); + if (!ustring.IsAvalable()) { + PyErr_SetString(PyExc_TypeError, "not a string"); + SWIG_fail; + } + resultobj = ustring.input_type(); + arg2 = ustring.str(); + } + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto" "', argument " "3"" of type '" "int""'"); + } + arg3 = static_cast< int >(val3); + ecode4 = SWIG_AsVal_float(swig_obj[3], &val4); + if (!SWIG_IsOK(ecode4)) { + SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto" "', argument " "4"" of type '" "float""'"); + } + arg4 = static_cast< float >(val4); + ecode5 = SWIG_AsVal_bool(swig_obj[4], &val5); + if (!SWIG_IsOK(ecode5)) { + SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto" "', argument " "5"" of type '" "bool""'"); + } + arg5 = static_cast< bool >(val5); + ecode6 = SWIG_AsVal_bool(swig_obj[5], &val6); + if (!SWIG_IsOK(ecode6)) { + SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto" "', argument " "6"" of type '" "bool""'"); + } + arg6 = static_cast< bool >(val6); + ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7); + if (!SWIG_IsOK(ecode7)) { + SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto" "', argument " "7"" of type '" "bool""'"); + } + arg7 = static_cast< bool >(val7); + ecode8 = SWIG_AsVal_bool(swig_obj[7], &val8); + if (!SWIG_IsOK(ecode8)) { + SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto" "', argument " "8"" of type '" "bool""'"); + } + arg8 = static_cast< bool >(val8); + ecode9 = SWIG_AsVal_bool(swig_obj[8], &val9); + if (!SWIG_IsOK(ecode9)) { + SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto" "', argument " "9"" of type '" "bool""'"); + } + arg9 = static_cast< bool >(val9); + ecode10 = SWIG_AsVal_bool(swig_obj[9], &val10); + if (!SWIG_IsOK(ecode10)) { + SWIG_exception_fail(SWIG_ArgError(ecode10), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto" "', argument " "10"" of type '" "bool""'"); + } + arg10 = static_cast< bool >(val10); { try { - result = ((sentencepiece::SentencePieceProcessor const *)arg1)->serialized_model_proto(); + result = sentencepiece_SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto((sentencepiece::SentencePieceProcessor const *)arg1,SWIG_STD_MOVE(arg2),arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { @@ -4866,19 +8157,43 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_serialized_model_proto(PyObjec } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_LoadFromFile(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto(PyObject *self, PyObject *args) { PyObject *resultobj = 0; sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; absl::string_view arg2 ; + int arg3 ; + float arg4 ; + bool arg5 ; + bool arg6 ; + bool arg7 ; + bool arg8 ; + bool arg9 ; + bool arg10 ; void *argp1 = 0 ; int res1 = 0 ; - PyObject *swig_obj[2] ; - sentencepiece::util::Status result; + int val3 ; + int ecode3 = 0 ; + float val4 ; + int ecode4 = 0 ; + bool val5 ; + int ecode5 = 0 ; + bool val6 ; + int ecode6 = 0 ; + bool val7 ; + int ecode7 = 0 ; + bool val8 ; + int ecode8 = 0 ; + bool val9 ; + int ecode9 = 0 ; + bool val10 ; + int ecode10 = 0 ; + PyObject *swig_obj[10] ; + sentencepiece::ImmutableNBestSentencePieceText result; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_LoadFromFile", 2, 2, swig_obj)) SWIG_fail; + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto", 10, 10, swig_obj)) SWIG_fail; res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_LoadFromFile" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor *""'"); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); } arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); { @@ -4888,127 +8203,167 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_LoadFromFile(PyObject *SWIGUNU SWIG_fail; } resultobj = ustring.input_type(); - arg2 = absl::string_view(ustring.data(), ustring.size()); + arg2 = ustring.str(); } + ecode3 = SWIG_AsVal_int(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto" "', argument " "3"" of type '" "int""'"); + } + arg3 = static_cast< int >(val3); + ecode4 = SWIG_AsVal_float(swig_obj[3], &val4); + if (!SWIG_IsOK(ecode4)) { + SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto" "', argument " "4"" of type '" "float""'"); + } + arg4 = static_cast< float >(val4); + ecode5 = SWIG_AsVal_bool(swig_obj[4], &val5); + if (!SWIG_IsOK(ecode5)) { + SWIG_exception_fail(SWIG_ArgError(ecode5), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto" "', argument " "5"" of type '" "bool""'"); + } + arg5 = static_cast< bool >(val5); + ecode6 = SWIG_AsVal_bool(swig_obj[5], &val6); + if (!SWIG_IsOK(ecode6)) { + SWIG_exception_fail(SWIG_ArgError(ecode6), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto" "', argument " "6"" of type '" "bool""'"); + } + arg6 = static_cast< bool >(val6); + ecode7 = SWIG_AsVal_bool(swig_obj[6], &val7); + if (!SWIG_IsOK(ecode7)) { + SWIG_exception_fail(SWIG_ArgError(ecode7), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto" "', argument " "7"" of type '" "bool""'"); + } + arg7 = static_cast< bool >(val7); + ecode8 = SWIG_AsVal_bool(swig_obj[7], &val8); + if (!SWIG_IsOK(ecode8)) { + SWIG_exception_fail(SWIG_ArgError(ecode8), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto" "', argument " "8"" of type '" "bool""'"); + } + arg8 = static_cast< bool >(val8); + ecode9 = SWIG_AsVal_bool(swig_obj[8], &val9); + if (!SWIG_IsOK(ecode9)) { + SWIG_exception_fail(SWIG_ArgError(ecode9), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto" "', argument " "9"" of type '" "bool""'"); + } + arg9 = static_cast< bool >(val9); + ecode10 = SWIG_AsVal_bool(swig_obj[9], &val10); + if (!SWIG_IsOK(ecode10)) { + SWIG_exception_fail(SWIG_ArgError(ecode10), "in method '" "SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto" "', argument " "10"" of type '" "bool""'"); + } + arg10 = static_cast< bool >(val10); { try { - result = sentencepiece_SentencePieceProcessor_LoadFromFile(arg1,arg2); + result = sentencepiece_SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto((sentencepiece::SentencePieceProcessor const *)arg1,SWIG_STD_MOVE(arg2),arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); } } - { - if (!(&result)->ok()) { - SWIG_exception(ToSwigError((&result)->code()), (&result)->ToString().c_str()); - } - resultobj = SWIG_From_bool((&result)->ok()); - } + resultobj = SWIG_NewPointerObj((new sentencepiece::ImmutableNBestSentencePieceText(result)), SWIGTYPE_p_sentencepiece__ImmutableNBestSentencePieceText, SWIG_POINTER_OWN | 0 ); return resultobj; fail: return NULL; } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_DecodeIdsWithCheck(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__CalculateEntropy(PyObject *self, PyObject *args) { PyObject *resultobj = 0; sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; - std::vector< int > *arg2 = 0 ; + absl::string_view arg2 ; + float arg3 ; void *argp1 = 0 ; int res1 = 0 ; - PyObject *swig_obj[2] ; - std::string result; + float val3 ; + int ecode3 = 0 ; + PyObject *swig_obj[3] ; + float result; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_DecodeIdsWithCheck", 2, 2, swig_obj)) SWIG_fail; + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__CalculateEntropy", 3, 3, swig_obj)) SWIG_fail; res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_DecodeIdsWithCheck" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__CalculateEntropy" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor *""'"); } arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); { - std::vector *out = nullptr; - if (PyList_Check(swig_obj[1])) { - const size_t size = PyList_Size(swig_obj[1]); - out = new std::vector(size); - for (size_t i = 0; i < size; ++i) { - PyObject *o = PyList_GetItem(swig_obj[1], i); - if (PyInt_Check(o)) { - (*out)[i] = static_cast(PyInt_AsLong(o)); - } else { - PyErr_SetString(PyExc_TypeError,"list must contain integers"); - SWIG_fail; - } - } - } else { - PyErr_SetString(PyExc_TypeError,"not a list"); + const PyInputString ustring(swig_obj[1]); + if (!ustring.IsAvalable()) { + PyErr_SetString(PyExc_TypeError, "not a string"); SWIG_fail; } - arg2 = out; + resultobj = ustring.input_type(); + arg2 = ustring.str(); } + ecode3 = SWIG_AsVal_float(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__CalculateEntropy" "', argument " "3"" of type '" "float""'"); + } + arg3 = static_cast< float >(val3); { try { - result = sentencepiece_SentencePieceProcessor_DecodeIdsWithCheck((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< int > const &)*arg2); + result = (float)sentencepiece_SentencePieceProcessor__CalculateEntropy(arg1,SWIG_STD_MOVE(arg2),arg3); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { SWIG_exception(ToSwigError(status.code()), status.ToString().c_str()); } } - { - PyObject *input_type = resultobj; - resultobj = MakePyOutputString(result, input_type); - } - { - delete arg2; - } + resultobj = SWIG_From_float(static_cast< float >(result)); return resultobj; fail: - { - delete arg2; - } return NULL; } -SWIGINTERN PyObject *_wrap_SentencePieceProcessor_DecodeIdsAsSerializedProtoWithCheck(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceProcessor__CalculateEntropyBatch(PyObject *self, PyObject *args) { PyObject *resultobj = 0; sentencepiece::SentencePieceProcessor *arg1 = (sentencepiece::SentencePieceProcessor *) 0 ; - std::vector< int > *arg2 = 0 ; + std::vector< absl::string_view > *arg2 = 0 ; + float arg3 ; + int arg4 ; void *argp1 = 0 ; int res1 = 0 ; - PyObject *swig_obj[2] ; - sentencepiece::util::bytes result; + float val3 ; + int ecode3 = 0 ; + int val4 ; + int ecode4 = 0 ; + PyObject *swig_obj[4] ; + std::vector< float > result; - if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor_DecodeIdsAsSerializedProtoWithCheck", 2, 2, swig_obj)) SWIG_fail; + if (!SWIG_Python_UnpackTuple(args, "SentencePieceProcessor__CalculateEntropyBatch", 4, 4, swig_obj)) SWIG_fail; res1 = SWIG_ConvertPtr(swig_obj[0], &argp1,SWIGTYPE_p_sentencepiece__SentencePieceProcessor, 0 | 0 ); if (!SWIG_IsOK(res1)) { - SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor_DecodeIdsAsSerializedProtoWithCheck" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor const *""'"); + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SentencePieceProcessor__CalculateEntropyBatch" "', argument " "1"" of type '" "sentencepiece::SentencePieceProcessor *""'"); } arg1 = reinterpret_cast< sentencepiece::SentencePieceProcessor * >(argp1); { - std::vector *out = nullptr; + std::vector *out = nullptr; if (PyList_Check(swig_obj[1])) { const size_t size = PyList_Size(swig_obj[1]); - out = new std::vector(size); + out = new std::vector(size); for (size_t i = 0; i < size; ++i) { - PyObject *o = PyList_GetItem(swig_obj[1], i); - if (PyInt_Check(o)) { - (*out)[i] = static_cast(PyInt_AsLong(o)); + const PyInputString ustring(PyList_GetItem(swig_obj[1], i)); + if (ustring.IsAvalable()) { + (*out)[i] = ustring.str(); } else { - PyErr_SetString(PyExc_TypeError,"list must contain integers"); + PyErr_SetString(PyExc_TypeError, "list must contain strings"); SWIG_fail; } + resultobj = ustring.input_type(); } } else { - PyErr_SetString(PyExc_TypeError,"not a list"); + PyErr_SetString(PyExc_TypeError, "not a list"); SWIG_fail; } arg2 = out; } + ecode3 = SWIG_AsVal_float(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SentencePieceProcessor__CalculateEntropyBatch" "', argument " "3"" of type '" "float""'"); + } + arg3 = static_cast< float >(val3); + ecode4 = SWIG_AsVal_int(swig_obj[3], &val4); + if (!SWIG_IsOK(ecode4)) { + SWIG_exception_fail(SWIG_ArgError(ecode4), "in method '" "SentencePieceProcessor__CalculateEntropyBatch" "', argument " "4"" of type '" "int""'"); + } + arg4 = static_cast< int >(val4); { try { - result = sentencepiece_SentencePieceProcessor_DecodeIdsAsSerializedProtoWithCheck((sentencepiece::SentencePieceProcessor const *)arg1,(std::vector< int > const &)*arg2); + result = sentencepiece_SentencePieceProcessor__CalculateEntropyBatch(arg1,(std::vector< absl::string_view > const &)*arg2,arg3,arg4); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { @@ -5016,7 +8371,10 @@ SWIGINTERN PyObject *_wrap_SentencePieceProcessor_DecodeIdsAsSerializedProtoWith } } { - resultobj = MakePyOutputBytes(result); + resultobj = PyList_New((&result)->size()); + for (size_t i = 0; i < (&result)->size(); ++i) { + PyList_SET_ITEM(resultobj, i, PyFloat_FromDouble(static_cast(result[i]))); + } } { delete arg2; @@ -5041,7 +8399,7 @@ SWIGINTERN PyObject *SentencePieceProcessor_swiginit(PyObject *SWIGUNUSEDPARM(se return SWIG_Python_InitShadowInstance(args); } -SWIGINTERN PyObject *_wrap_SetRandomGeneratorSeed(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SetRandomGeneratorSeed(PyObject *self, PyObject *args) { PyObject *resultobj = 0; unsigned int arg1 ; unsigned int val1 ; @@ -5071,7 +8429,7 @@ SWIGINTERN PyObject *_wrap_SetRandomGeneratorSeed(PyObject *SWIGUNUSEDPARM(self) } -SWIGINTERN PyObject *_wrap_SentencePieceTrainer__TrainFromString(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceTrainer__TrainFromString(PyObject *self, PyObject *args) { PyObject *resultobj = 0; absl::string_view arg1 ; PyObject *swig_obj[1] ; @@ -5085,11 +8443,11 @@ SWIGINTERN PyObject *_wrap_SentencePieceTrainer__TrainFromString(PyObject *SWIGU SWIG_fail; } resultobj = ustring.input_type(); - arg1 = absl::string_view(ustring.data(), ustring.size()); + arg1 = ustring.str(); } { try { - sentencepiece_SentencePieceTrainer__TrainFromString(arg1); + sentencepiece_SentencePieceTrainer__TrainFromString(SWIG_STD_MOVE(arg1)); ReleaseResultObject(resultobj); } catch (const sentencepiece::util::Status &status) { @@ -5103,7 +8461,7 @@ SWIGINTERN PyObject *_wrap_SentencePieceTrainer__TrainFromString(PyObject *SWIGU } -SWIGINTERN PyObject *_wrap_SentencePieceTrainer__TrainFromMap(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceTrainer__TrainFromMap(PyObject *self, PyObject *args) { PyObject *resultobj = 0; std::unordered_map< std::string,std::string > *arg1 = 0 ; PyObject *swig_obj[1] ; @@ -5156,7 +8514,7 @@ SWIGINTERN PyObject *_wrap_SentencePieceTrainer__TrainFromMap(PyObject *SWIGUNUS } -SWIGINTERN PyObject *_wrap_SentencePieceTrainer__TrainFromMap2(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceTrainer__TrainFromMap2(PyObject *self, PyObject *args) { PyObject *resultobj = 0; std::unordered_map< std::string,std::string > *arg1 = 0 ; sentencepiece::SentenceIterator *arg2 = (sentencepiece::SentenceIterator *) 0 ; @@ -5225,7 +8583,7 @@ SWIGINTERN PyObject *_wrap_SentencePieceTrainer__TrainFromMap2(PyObject *SWIGUNU } -SWIGINTERN PyObject *_wrap_SentencePieceTrainer__TrainFromMap3(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceTrainer__TrainFromMap3(PyObject *self, PyObject *args) { PyObject *resultobj = 0; std::unordered_map< std::string,std::string > *arg1 = 0 ; PyObject *swig_obj[1] ; @@ -5281,7 +8639,7 @@ SWIGINTERN PyObject *_wrap_SentencePieceTrainer__TrainFromMap3(PyObject *SWIGUNU } -SWIGINTERN PyObject *_wrap_SentencePieceTrainer__TrainFromMap4(PyObject *SWIGUNUSEDPARM(self), PyObject *args) { +SWIGINTERN PyObject *_wrap_SentencePieceTrainer__TrainFromMap4(PyObject *self, PyObject *args) { PyObject *resultobj = 0; std::unordered_map< std::string,std::string > *arg1 = 0 ; sentencepiece::SentenceIterator *arg2 = (sentencepiece::SentenceIterator *) 0 ; @@ -5361,7 +8719,31 @@ SWIGINTERN PyObject *SentencePieceTrainer_swigregister(PyObject *SWIGUNUSEDPARM( } static PyMethodDef SwigMethods[] = { - { "SWIG_PyInstanceMethod_New", SWIG_PyInstanceMethod_New, METH_O, NULL}, + { "new_ImmutableSentencePieceText_ImmutableSentencePiece", _wrap_new_ImmutableSentencePieceText_ImmutableSentencePiece, METH_NOARGS, NULL}, + { "delete_ImmutableSentencePieceText_ImmutableSentencePiece", _wrap_delete_ImmutableSentencePieceText_ImmutableSentencePiece, METH_O, NULL}, + { "ImmutableSentencePieceText_ImmutableSentencePiece__piece", _wrap_ImmutableSentencePieceText_ImmutableSentencePiece__piece, METH_O, NULL}, + { "ImmutableSentencePieceText_ImmutableSentencePiece__surface", _wrap_ImmutableSentencePieceText_ImmutableSentencePiece__surface, METH_O, NULL}, + { "ImmutableSentencePieceText_ImmutableSentencePiece__id", _wrap_ImmutableSentencePieceText_ImmutableSentencePiece__id, METH_O, NULL}, + { "ImmutableSentencePieceText_ImmutableSentencePiece__begin", _wrap_ImmutableSentencePieceText_ImmutableSentencePiece__begin, METH_O, NULL}, + { "ImmutableSentencePieceText_ImmutableSentencePiece__end", _wrap_ImmutableSentencePieceText_ImmutableSentencePiece__end, METH_O, NULL}, + { "ImmutableSentencePieceText_ImmutableSentencePiece_swigregister", ImmutableSentencePieceText_ImmutableSentencePiece_swigregister, METH_O, NULL}, + { "ImmutableSentencePieceText_ImmutableSentencePiece_swiginit", ImmutableSentencePieceText_ImmutableSentencePiece_swiginit, METH_VARARGS, NULL}, + { "new_ImmutableSentencePieceText", _wrap_new_ImmutableSentencePieceText, METH_NOARGS, NULL}, + { "delete_ImmutableSentencePieceText", _wrap_delete_ImmutableSentencePieceText, METH_O, NULL}, + { "ImmutableSentencePieceText__pieces_size", _wrap_ImmutableSentencePieceText__pieces_size, METH_O, NULL}, + { "ImmutableSentencePieceText__pieces", _wrap_ImmutableSentencePieceText__pieces, METH_VARARGS, NULL}, + { "ImmutableSentencePieceText__text", _wrap_ImmutableSentencePieceText__text, METH_O, NULL}, + { "ImmutableSentencePieceText__score", _wrap_ImmutableSentencePieceText__score, METH_O, NULL}, + { "ImmutableSentencePieceText_SerializeAsString", _wrap_ImmutableSentencePieceText_SerializeAsString, METH_O, NULL}, + { "ImmutableSentencePieceText_swigregister", ImmutableSentencePieceText_swigregister, METH_O, NULL}, + { "ImmutableSentencePieceText_swiginit", ImmutableSentencePieceText_swiginit, METH_VARARGS, NULL}, + { "new_ImmutableNBestSentencePieceText", _wrap_new_ImmutableNBestSentencePieceText, METH_NOARGS, NULL}, + { "delete_ImmutableNBestSentencePieceText", _wrap_delete_ImmutableNBestSentencePieceText, METH_O, NULL}, + { "ImmutableNBestSentencePieceText__nbests_size", _wrap_ImmutableNBestSentencePieceText__nbests_size, METH_O, NULL}, + { "ImmutableNBestSentencePieceText__nbests", _wrap_ImmutableNBestSentencePieceText__nbests, METH_VARARGS, NULL}, + { "ImmutableNBestSentencePieceText_SerializeAsString", _wrap_ImmutableNBestSentencePieceText_SerializeAsString, METH_O, NULL}, + { "ImmutableNBestSentencePieceText_swigregister", ImmutableNBestSentencePieceText_swigregister, METH_O, NULL}, + { "ImmutableNBestSentencePieceText_swiginit", ImmutableNBestSentencePieceText_swiginit, METH_VARARGS, NULL}, { "new_SentencePieceProcessor", _wrap_new_SentencePieceProcessor, METH_NOARGS, NULL}, { "delete_SentencePieceProcessor", _wrap_delete_SentencePieceProcessor, METH_O, NULL}, { "SentencePieceProcessor_LoadFromSerializedProto", _wrap_SentencePieceProcessor_LoadFromSerializedProto, METH_VARARGS, NULL}, @@ -5370,19 +8752,7 @@ static PyMethodDef SwigMethods[] = { { "SentencePieceProcessor_SetVocabulary", _wrap_SentencePieceProcessor_SetVocabulary, METH_VARARGS, NULL}, { "SentencePieceProcessor_ResetVocabulary", _wrap_SentencePieceProcessor_ResetVocabulary, METH_O, NULL}, { "SentencePieceProcessor_LoadVocabulary", _wrap_SentencePieceProcessor_LoadVocabulary, METH_VARARGS, NULL}, - { "SentencePieceProcessor_SetEncoderVersion", _wrap_SentencePieceProcessor_SetEncoderVersion, METH_VARARGS, NULL}, - { "SentencePieceProcessor_GetEncoderVersion", _wrap_SentencePieceProcessor_GetEncoderVersion, METH_O, NULL}, - { "SentencePieceProcessor_EncodeAsPieces", _wrap_SentencePieceProcessor_EncodeAsPieces, METH_VARARGS, NULL}, - { "SentencePieceProcessor_EncodeAsIds", _wrap_SentencePieceProcessor_EncodeAsIds, METH_VARARGS, NULL}, - { "SentencePieceProcessor_NBestEncodeAsPieces", _wrap_SentencePieceProcessor_NBestEncodeAsPieces, METH_VARARGS, NULL}, - { "SentencePieceProcessor_NBestEncodeAsIds", _wrap_SentencePieceProcessor_NBestEncodeAsIds, METH_VARARGS, NULL}, - { "SentencePieceProcessor_SampleEncodeAsPieces", _wrap_SentencePieceProcessor_SampleEncodeAsPieces, METH_VARARGS, NULL}, - { "SentencePieceProcessor_SampleEncodeAsIds", _wrap_SentencePieceProcessor_SampleEncodeAsIds, METH_VARARGS, NULL}, - { "SentencePieceProcessor_DecodePieces", _wrap_SentencePieceProcessor_DecodePieces, METH_VARARGS, NULL}, - { "SentencePieceProcessor_EncodeAsSerializedProto", _wrap_SentencePieceProcessor_EncodeAsSerializedProto, METH_VARARGS, NULL}, - { "SentencePieceProcessor_SampleEncodeAsSerializedProto", _wrap_SentencePieceProcessor_SampleEncodeAsSerializedProto, METH_VARARGS, NULL}, - { "SentencePieceProcessor_NBestEncodeAsSerializedProto", _wrap_SentencePieceProcessor_NBestEncodeAsSerializedProto, METH_VARARGS, NULL}, - { "SentencePieceProcessor_DecodePiecesAsSerializedProto", _wrap_SentencePieceProcessor_DecodePiecesAsSerializedProto, METH_VARARGS, NULL}, + { "SentencePieceProcessor_CalculateEntropy", _wrap_SentencePieceProcessor_CalculateEntropy, METH_VARARGS, NULL}, { "SentencePieceProcessor_GetPieceSize", _wrap_SentencePieceProcessor_GetPieceSize, METH_O, NULL}, { "SentencePieceProcessor_PieceToId", _wrap_SentencePieceProcessor_PieceToId, METH_VARARGS, NULL}, { "SentencePieceProcessor_IdToPiece", _wrap_SentencePieceProcessor_IdToPiece, METH_VARARGS, NULL}, @@ -5397,8 +8767,36 @@ static PyMethodDef SwigMethods[] = { { "SentencePieceProcessor_pad_id", _wrap_SentencePieceProcessor_pad_id, METH_O, NULL}, { "SentencePieceProcessor_serialized_model_proto", _wrap_SentencePieceProcessor_serialized_model_proto, METH_O, NULL}, { "SentencePieceProcessor_LoadFromFile", _wrap_SentencePieceProcessor_LoadFromFile, METH_VARARGS, NULL}, - { "SentencePieceProcessor_DecodeIdsWithCheck", _wrap_SentencePieceProcessor_DecodeIdsWithCheck, METH_VARARGS, NULL}, - { "SentencePieceProcessor_DecodeIdsAsSerializedProtoWithCheck", _wrap_SentencePieceProcessor_DecodeIdsAsSerializedProtoWithCheck, METH_VARARGS, NULL}, + { "SentencePieceProcessor__EncodeAsIds", _wrap_SentencePieceProcessor__EncodeAsIds, METH_VARARGS, NULL}, + { "SentencePieceProcessor__EncodeAsPieces", _wrap_SentencePieceProcessor__EncodeAsPieces, METH_VARARGS, NULL}, + { "SentencePieceProcessor__EncodeAsSerializedProto", _wrap_SentencePieceProcessor__EncodeAsSerializedProto, METH_VARARGS, NULL}, + { "SentencePieceProcessor__EncodeAsImmutableProto", _wrap_SentencePieceProcessor__EncodeAsImmutableProto, METH_VARARGS, NULL}, + { "SentencePieceProcessor__EncodeAsIdsBatch", _wrap_SentencePieceProcessor__EncodeAsIdsBatch, METH_VARARGS, NULL}, + { "SentencePieceProcessor__EncodeAsPiecesBatch", _wrap_SentencePieceProcessor__EncodeAsPiecesBatch, METH_VARARGS, NULL}, + { "SentencePieceProcessor__EncodeAsSerializedProtoBatch", _wrap_SentencePieceProcessor__EncodeAsSerializedProtoBatch, METH_VARARGS, NULL}, + { "SentencePieceProcessor__EncodeAsImmutableProtoBatch", _wrap_SentencePieceProcessor__EncodeAsImmutableProtoBatch, METH_VARARGS, NULL}, + { "SentencePieceProcessor__DecodeIds", _wrap_SentencePieceProcessor__DecodeIds, METH_VARARGS, NULL}, + { "SentencePieceProcessor__DecodePieces", _wrap_SentencePieceProcessor__DecodePieces, METH_VARARGS, NULL}, + { "SentencePieceProcessor__DecodeIdsAsSerializedProto", _wrap_SentencePieceProcessor__DecodeIdsAsSerializedProto, METH_VARARGS, NULL}, + { "SentencePieceProcessor__DecodePiecesAsSerializedProto", _wrap_SentencePieceProcessor__DecodePiecesAsSerializedProto, METH_VARARGS, NULL}, + { "SentencePieceProcessor__DecodeIdsAsImmutableProto", _wrap_SentencePieceProcessor__DecodeIdsAsImmutableProto, METH_VARARGS, NULL}, + { "SentencePieceProcessor__DecodePiecesAsImmutableProto", _wrap_SentencePieceProcessor__DecodePiecesAsImmutableProto, METH_VARARGS, NULL}, + { "SentencePieceProcessor__DecodeIdsBatch", _wrap_SentencePieceProcessor__DecodeIdsBatch, METH_VARARGS, NULL}, + { "SentencePieceProcessor__DecodeIdsAsSerializedProtoBatch", _wrap_SentencePieceProcessor__DecodeIdsAsSerializedProtoBatch, METH_VARARGS, NULL}, + { "SentencePieceProcessor__DecodeIdsAsImmutableProtoBatch", _wrap_SentencePieceProcessor__DecodeIdsAsImmutableProtoBatch, METH_VARARGS, NULL}, + { "SentencePieceProcessor__DecodePiecesBatch", _wrap_SentencePieceProcessor__DecodePiecesBatch, METH_VARARGS, NULL}, + { "SentencePieceProcessor__DecodePiecesAsSerializedProtoBatch", _wrap_SentencePieceProcessor__DecodePiecesAsSerializedProtoBatch, METH_VARARGS, NULL}, + { "SentencePieceProcessor__DecodePiecesAsImmutableProtoBatch", _wrap_SentencePieceProcessor__DecodePiecesAsImmutableProtoBatch, METH_VARARGS, NULL}, + { "SentencePieceProcessor__NBestEncodeAsIds", _wrap_SentencePieceProcessor__NBestEncodeAsIds, METH_VARARGS, NULL}, + { "SentencePieceProcessor__NBestEncodeAsPieces", _wrap_SentencePieceProcessor__NBestEncodeAsPieces, METH_VARARGS, NULL}, + { "SentencePieceProcessor__NBestEncodeAsSerializedProto", _wrap_SentencePieceProcessor__NBestEncodeAsSerializedProto, METH_VARARGS, NULL}, + { "SentencePieceProcessor__NBestEncodeAsImmutableProto", _wrap_SentencePieceProcessor__NBestEncodeAsImmutableProto, METH_VARARGS, NULL}, + { "SentencePieceProcessor__SampleEncodeAndScoreAsIds", _wrap_SentencePieceProcessor__SampleEncodeAndScoreAsIds, METH_VARARGS, NULL}, + { "SentencePieceProcessor__SampleEncodeAndScoreAsPieces", _wrap_SentencePieceProcessor__SampleEncodeAndScoreAsPieces, METH_VARARGS, NULL}, + { "SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto", _wrap_SentencePieceProcessor__SampleEncodeAndScoreAsSerializedProto, METH_VARARGS, NULL}, + { "SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto", _wrap_SentencePieceProcessor__SampleEncodeAndScoreAsImmutableProto, METH_VARARGS, NULL}, + { "SentencePieceProcessor__CalculateEntropy", _wrap_SentencePieceProcessor__CalculateEntropy, METH_VARARGS, NULL}, + { "SentencePieceProcessor__CalculateEntropyBatch", _wrap_SentencePieceProcessor__CalculateEntropyBatch, METH_VARARGS, NULL}, { "SentencePieceProcessor_swigregister", SentencePieceProcessor_swigregister, METH_O, NULL}, { "SentencePieceProcessor_swiginit", SentencePieceProcessor_swiginit, METH_VARARGS, NULL}, { "SetRandomGeneratorSeed", _wrap_SetRandomGeneratorSeed, METH_O, NULL}, @@ -5411,51 +8809,71 @@ static PyMethodDef SwigMethods[] = { { NULL, NULL, 0, NULL } }; -static PyMethodDef SwigMethods_proxydocs[] = { - { NULL, NULL, 0, NULL } -}; - /* -------- TYPE CONVERSION AND EQUIVALENCE RULES (BEGIN) -------- */ static swig_type_info _swigt__p_char = {"_p_char", "char *", 0, 0, (void*)0, 0}; +static swig_type_info _swigt__p_float = {"_p_float", "float *", 0, 0, (void*)0, 0}; +static swig_type_info _swigt__p_sentencepiece__ImmutableNBestSentencePieceText = {"_p_sentencepiece__ImmutableNBestSentencePieceText", "sentencepiece::ImmutableNBestSentencePieceText *", 0, 0, (void*)0, 0}; +static swig_type_info _swigt__p_sentencepiece__ImmutableSentencePieceText = {"_p_sentencepiece__ImmutableSentencePieceText", "sentencepiece::ImmutableSentencePieceText *", 0, 0, (void*)0, 0}; +static swig_type_info _swigt__p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece = {"_p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece", "sentencepiece::ImmutableSentencePieceText_ImmutableSentencePiece *", 0, 0, (void*)0, 0}; static swig_type_info _swigt__p_sentencepiece__SentenceIterator = {"_p_sentencepiece__SentenceIterator", "sentencepiece::SentenceIterator *", 0, 0, (void*)0, 0}; static swig_type_info _swigt__p_sentencepiece__SentencePieceProcessor = {"_p_sentencepiece__SentencePieceProcessor", "sentencepiece::SentencePieceProcessor *", 0, 0, (void*)0, 0}; static swig_type_info _swigt__p_sentencepiece__SentencePieceTrainer = {"_p_sentencepiece__SentencePieceTrainer", "sentencepiece::SentencePieceTrainer *", 0, 0, (void*)0, 0}; static swig_type_info _swigt__p_std__string = {"_p_std__string", "sentencepiece::util::bytes *|std::string *", 0, 0, (void*)0, 0}; static swig_type_info _swigt__p_std__unordered_mapT_std__string_std__string_t = {"_p_std__unordered_mapT_std__string_std__string_t", "std::unordered_map< std::string,std::string > *", 0, 0, (void*)0, 0}; +static swig_type_info _swigt__p_std__vectorT_absl__string_view_t = {"_p_std__vectorT_absl__string_view_t", "std::vector< absl::string_view > *", 0, 0, (void*)0, 0}; static swig_type_info _swigt__p_std__vectorT_int_t = {"_p_std__vectorT_int_t", "std::vector< int > *", 0, 0, (void*)0, 0}; -static swig_type_info _swigt__p_std__vectorT_std__string_t = {"_p_std__vectorT_std__string_t", "std::vector< std::string > *", 0, 0, (void*)0, 0}; +static swig_type_info _swigt__p_std__vectorT_std__vectorT_absl__string_view_t_t = {"_p_std__vectorT_std__vectorT_absl__string_view_t_t", "std::vector< std::vector< absl::string_view > > *", 0, 0, (void*)0, 0}; +static swig_type_info _swigt__p_std__vectorT_std__vectorT_int_t_t = {"_p_std__vectorT_std__vectorT_int_t_t", "std::vector< std::vector< int > > *", 0, 0, (void*)0, 0}; static swig_type_info *swig_type_initial[] = { &_swigt__p_char, + &_swigt__p_float, + &_swigt__p_sentencepiece__ImmutableNBestSentencePieceText, + &_swigt__p_sentencepiece__ImmutableSentencePieceText, + &_swigt__p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece, &_swigt__p_sentencepiece__SentenceIterator, &_swigt__p_sentencepiece__SentencePieceProcessor, &_swigt__p_sentencepiece__SentencePieceTrainer, &_swigt__p_std__string, &_swigt__p_std__unordered_mapT_std__string_std__string_t, + &_swigt__p_std__vectorT_absl__string_view_t, &_swigt__p_std__vectorT_int_t, - &_swigt__p_std__vectorT_std__string_t, + &_swigt__p_std__vectorT_std__vectorT_absl__string_view_t_t, + &_swigt__p_std__vectorT_std__vectorT_int_t_t, }; static swig_cast_info _swigc__p_char[] = { {&_swigt__p_char, 0, 0, 0},{0, 0, 0, 0}}; +static swig_cast_info _swigc__p_float[] = { {&_swigt__p_float, 0, 0, 0},{0, 0, 0, 0}}; +static swig_cast_info _swigc__p_sentencepiece__ImmutableNBestSentencePieceText[] = { {&_swigt__p_sentencepiece__ImmutableNBestSentencePieceText, 0, 0, 0},{0, 0, 0, 0}}; +static swig_cast_info _swigc__p_sentencepiece__ImmutableSentencePieceText[] = { {&_swigt__p_sentencepiece__ImmutableSentencePieceText, 0, 0, 0},{0, 0, 0, 0}}; +static swig_cast_info _swigc__p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece[] = { {&_swigt__p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece, 0, 0, 0},{0, 0, 0, 0}}; static swig_cast_info _swigc__p_sentencepiece__SentenceIterator[] = { {&_swigt__p_sentencepiece__SentenceIterator, 0, 0, 0},{0, 0, 0, 0}}; static swig_cast_info _swigc__p_sentencepiece__SentencePieceProcessor[] = { {&_swigt__p_sentencepiece__SentencePieceProcessor, 0, 0, 0},{0, 0, 0, 0}}; static swig_cast_info _swigc__p_sentencepiece__SentencePieceTrainer[] = { {&_swigt__p_sentencepiece__SentencePieceTrainer, 0, 0, 0},{0, 0, 0, 0}}; static swig_cast_info _swigc__p_std__string[] = { {&_swigt__p_std__string, 0, 0, 0},{0, 0, 0, 0}}; static swig_cast_info _swigc__p_std__unordered_mapT_std__string_std__string_t[] = { {&_swigt__p_std__unordered_mapT_std__string_std__string_t, 0, 0, 0},{0, 0, 0, 0}}; +static swig_cast_info _swigc__p_std__vectorT_absl__string_view_t[] = { {&_swigt__p_std__vectorT_absl__string_view_t, 0, 0, 0},{0, 0, 0, 0}}; static swig_cast_info _swigc__p_std__vectorT_int_t[] = { {&_swigt__p_std__vectorT_int_t, 0, 0, 0},{0, 0, 0, 0}}; -static swig_cast_info _swigc__p_std__vectorT_std__string_t[] = { {&_swigt__p_std__vectorT_std__string_t, 0, 0, 0},{0, 0, 0, 0}}; +static swig_cast_info _swigc__p_std__vectorT_std__vectorT_absl__string_view_t_t[] = { {&_swigt__p_std__vectorT_std__vectorT_absl__string_view_t_t, 0, 0, 0},{0, 0, 0, 0}}; +static swig_cast_info _swigc__p_std__vectorT_std__vectorT_int_t_t[] = { {&_swigt__p_std__vectorT_std__vectorT_int_t_t, 0, 0, 0},{0, 0, 0, 0}}; static swig_cast_info *swig_cast_initial[] = { _swigc__p_char, + _swigc__p_float, + _swigc__p_sentencepiece__ImmutableNBestSentencePieceText, + _swigc__p_sentencepiece__ImmutableSentencePieceText, + _swigc__p_sentencepiece__ImmutableSentencePieceText_ImmutableSentencePiece, _swigc__p_sentencepiece__SentenceIterator, _swigc__p_sentencepiece__SentencePieceProcessor, _swigc__p_sentencepiece__SentencePieceTrainer, _swigc__p_std__string, _swigc__p_std__unordered_mapT_std__string_std__string_t, + _swigc__p_std__vectorT_absl__string_view_t, _swigc__p_std__vectorT_int_t, - _swigc__p_std__vectorT_std__string_t, + _swigc__p_std__vectorT_std__vectorT_absl__string_view_t_t, + _swigc__p_std__vectorT_std__vectorT_int_t_t, }; @@ -5519,9 +8937,12 @@ extern "C" { #define SWIGRUNTIME_DEBUG #endif +#ifndef SWIG_INIT_CLIENT_DATA_TYPE +#define SWIG_INIT_CLIENT_DATA_TYPE void * +#endif SWIGRUNTIME void -SWIG_InitializeModule(void *clientdata) { +SWIG_InitializeModule(SWIG_INIT_CLIENT_DATA_TYPE clientdata) { size_t i; swig_module_info *module_head, *iter; int init; @@ -5704,214 +9125,6 @@ SWIG_PropagateClientData(void) { extern "C" { #endif - /* Python-specific SWIG API */ -#define SWIG_newvarlink() SWIG_Python_newvarlink() -#define SWIG_addvarlink(p, name, get_attr, set_attr) SWIG_Python_addvarlink(p, name, get_attr, set_attr) -#define SWIG_InstallConstants(d, constants) SWIG_Python_InstallConstants(d, constants) - - /* ----------------------------------------------------------------------------- - * global variable support code. - * ----------------------------------------------------------------------------- */ - - typedef struct swig_globalvar { - char *name; /* Name of global variable */ - PyObject *(*get_attr)(void); /* Return the current value */ - int (*set_attr)(PyObject *); /* Set the value */ - struct swig_globalvar *next; - } swig_globalvar; - - typedef struct swig_varlinkobject { - PyObject_HEAD - swig_globalvar *vars; - } swig_varlinkobject; - - SWIGINTERN PyObject * - swig_varlink_repr(swig_varlinkobject *SWIGUNUSEDPARM(v)) { -#if PY_VERSION_HEX >= 0x03000000 - return PyUnicode_InternFromString(""); -#else - return PyString_FromString(""); -#endif - } - - SWIGINTERN PyObject * - swig_varlink_str(swig_varlinkobject *v) { -#if PY_VERSION_HEX >= 0x03000000 - PyObject *str = PyUnicode_InternFromString("("); - PyObject *tail; - PyObject *joined; - swig_globalvar *var; - for (var = v->vars; var; var=var->next) { - tail = PyUnicode_FromString(var->name); - joined = PyUnicode_Concat(str, tail); - Py_DecRef(str); - Py_DecRef(tail); - str = joined; - if (var->next) { - tail = PyUnicode_InternFromString(", "); - joined = PyUnicode_Concat(str, tail); - Py_DecRef(str); - Py_DecRef(tail); - str = joined; - } - } - tail = PyUnicode_InternFromString(")"); - joined = PyUnicode_Concat(str, tail); - Py_DecRef(str); - Py_DecRef(tail); - str = joined; -#else - PyObject *str = PyString_FromString("("); - swig_globalvar *var; - for (var = v->vars; var; var=var->next) { - PyString_ConcatAndDel(&str,PyString_FromString(var->name)); - if (var->next) PyString_ConcatAndDel(&str,PyString_FromString(", ")); - } - PyString_ConcatAndDel(&str,PyString_FromString(")")); -#endif - return str; - } - - SWIGINTERN void - swig_varlink_dealloc(swig_varlinkobject *v) { - swig_globalvar *var = v->vars; - while (var) { - swig_globalvar *n = var->next; - free(var->name); - free(var); - var = n; - } - } - - SWIGINTERN PyObject * - swig_varlink_getattr(swig_varlinkobject *v, char *n) { - PyObject *res = NULL; - swig_globalvar *var = v->vars; - while (var) { - if (strcmp(var->name,n) == 0) { - res = (*var->get_attr)(); - break; - } - var = var->next; - } - if (res == NULL && !PyErr_Occurred()) { - PyErr_Format(PyExc_AttributeError, "Unknown C global variable '%s'", n); - } - return res; - } - - SWIGINTERN int - swig_varlink_setattr(swig_varlinkobject *v, char *n, PyObject *p) { - int res = 1; - swig_globalvar *var = v->vars; - while (var) { - if (strcmp(var->name,n) == 0) { - res = (*var->set_attr)(p); - break; - } - var = var->next; - } - if (res == 1 && !PyErr_Occurred()) { - PyErr_Format(PyExc_AttributeError, "Unknown C global variable '%s'", n); - } - return res; - } - - SWIGINTERN PyTypeObject* - swig_varlink_type(void) { - static char varlink__doc__[] = "Swig var link object"; - static PyTypeObject varlink_type; - static int type_init = 0; - if (!type_init) { - const PyTypeObject tmp = { -#if PY_VERSION_HEX >= 0x03000000 - PyVarObject_HEAD_INIT(NULL, 0) -#else - PyObject_HEAD_INIT(NULL) - 0, /* ob_size */ -#endif - "swigvarlink", /* tp_name */ - sizeof(swig_varlinkobject), /* tp_basicsize */ - 0, /* tp_itemsize */ - (destructor) swig_varlink_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - (getattrfunc) swig_varlink_getattr, /* tp_getattr */ - (setattrfunc) swig_varlink_setattr, /* tp_setattr */ - 0, /* tp_compare */ - (reprfunc) swig_varlink_repr, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - (reprfunc) swig_varlink_str, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - 0, /* tp_flags */ - varlink__doc__, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* tp_iter -> tp_weaklist */ - 0, /* tp_del */ - 0, /* tp_version_tag */ -#if PY_VERSION_HEX >= 0x03040000 - 0, /* tp_finalize */ -#endif -#ifdef COUNT_ALLOCS - 0, /* tp_allocs */ - 0, /* tp_frees */ - 0, /* tp_maxalloc */ - 0, /* tp_prev */ - 0 /* tp_next */ -#endif - }; - varlink_type = tmp; - type_init = 1; - if (PyType_Ready(&varlink_type) < 0) - return NULL; - } - return &varlink_type; - } - - /* Create a variable linking object for use later */ - SWIGINTERN PyObject * - SWIG_Python_newvarlink(void) { - swig_varlinkobject *result = PyObject_NEW(swig_varlinkobject, swig_varlink_type()); - if (result) { - result->vars = 0; - } - return ((PyObject*) result); - } - - SWIGINTERN void - SWIG_Python_addvarlink(PyObject *p, const char *name, PyObject *(*get_attr)(void), int (*set_attr)(PyObject *p)) { - swig_varlinkobject *v = (swig_varlinkobject *) p; - swig_globalvar *gv = (swig_globalvar *) malloc(sizeof(swig_globalvar)); - if (gv) { - size_t size = strlen(name)+1; - gv->name = (char *)malloc(size); - if (gv->name) { - memcpy(gv->name, name, size); - gv->get_attr = get_attr; - gv->set_attr = set_attr; - gv->next = v->vars; - } - } - v->vars = gv; - } - - SWIGINTERN PyObject * - SWIG_globals(void) { - static PyObject *globals = 0; - if (!globals) { - globals = SWIG_newvarlink(); - } - return globals; - } - /* ----------------------------------------------------------------------------- * constants/methods manipulation * ----------------------------------------------------------------------------- */ @@ -5940,15 +9153,12 @@ extern "C" { } } - /* -----------------------------------------------------------------------------*/ - /* Fix SwigMethods to carry the callback ptrs when needed */ - /* -----------------------------------------------------------------------------*/ + /* ----------------------------------------------------------------------------- + * Patch %callback methods' docstrings to hold the callback ptrs + * -----------------------------------------------------------------------------*/ SWIGINTERN void - SWIG_Python_FixMethods(PyMethodDef *methods, - swig_const_info *const_table, - swig_type_info **types, - swig_type_info **types_initial) { + SWIG_Python_FixMethods(PyMethodDef *methods, const swig_const_info *const_table, swig_type_info **types, swig_type_info **types_initial) { size_t i; for (i = 0; methods[i].ml_name; ++i) { const char *c = methods[i].ml_doc; @@ -5956,7 +9166,7 @@ extern "C" { c = strstr(c, "swig_ptr: "); if (c) { int j; - swig_const_info *ci = 0; + const swig_const_info *ci = 0; const char *name = c + 10; for (j = 0; const_table[j].type; ++j) { if (strncmp(const_table[j].name, name, @@ -5988,68 +9198,13 @@ extern "C" { } } - /* ----------------------------------------------------------------------------- - * Method creation and docstring support functions - * ----------------------------------------------------------------------------- */ - - /* ----------------------------------------------------------------------------- - * Function to find the method definition with the correct docstring for the - * proxy module as opposed to the low-level API - * ----------------------------------------------------------------------------- */ - - SWIGINTERN PyMethodDef *SWIG_PythonGetProxyDoc(const char *name) { - /* Find the function in the modified method table */ - size_t offset = 0; - int found = 0; - while (SwigMethods_proxydocs[offset].ml_meth != NULL) { - if (strcmp(SwigMethods_proxydocs[offset].ml_name, name) == 0) { - found = 1; - break; - } - offset++; - } - /* Use the copy with the modified docstring if available */ - return found ? &SwigMethods_proxydocs[offset] : NULL; - } - - /* ----------------------------------------------------------------------------- - * Wrapper of PyInstanceMethod_New() used in Python 3 - * It is exported to the generated module, used for -fastproxy - * ----------------------------------------------------------------------------- */ - - SWIGINTERN PyObject *SWIG_PyInstanceMethod_New(PyObject *SWIGUNUSEDPARM(self), PyObject *func) { - if (PyCFunction_Check(func)) { - PyCFunctionObject *funcobj = (PyCFunctionObject *)func; - PyMethodDef *ml = SWIG_PythonGetProxyDoc(funcobj->m_ml->ml_name); - if (ml) - func = PyCFunction_NewEx(ml, funcobj->m_self, funcobj->m_module); - } -#if PY_VERSION_HEX >= 0x03000000 - return PyInstanceMethod_New(func); -#else - return PyMethod_New(func, NULL, NULL); -#endif - } - - /* ----------------------------------------------------------------------------- - * Wrapper of PyStaticMethod_New() - * It is exported to the generated module, used for -fastproxy - * ----------------------------------------------------------------------------- */ - - SWIGINTERN PyObject *SWIG_PyStaticMethod_New(PyObject *SWIGUNUSEDPARM(self), PyObject *func) { - if (PyCFunction_Check(func)) { - PyCFunctionObject *funcobj = (PyCFunctionObject *)func; - PyMethodDef *ml = SWIG_PythonGetProxyDoc(funcobj->m_ml->ml_name); - if (ml) - func = PyCFunction_NewEx(ml, funcobj->m_self, funcobj->m_module); - } - return PyStaticMethod_New(func); - } - #ifdef __cplusplus } #endif + + + /* -----------------------------------------------------------------------------* * Partial Init method * -----------------------------------------------------------------------------*/ @@ -6185,8 +9340,6 @@ SWIG_init(void) { SWIG_InstallConstants(d,swig_const_table); - SWIG_Python_SetConstant(d, "EncoderVersion_kOptimized",SWIG_From_int(static_cast< int >(sentencepiece::EncoderVersion::kOptimized))); - SWIG_Python_SetConstant(d, "EncoderVersion_kOriginal",SWIG_From_int(static_cast< int >(sentencepiece::EncoderVersion::kOriginal))); #if PY_VERSION_HEX >= 0x03000000 return m; #else diff --git a/python/test/sentencepiece_test.py b/python/test/sentencepiece_test.py index 7bf1c133..2b9ad282 100755 --- a/python/test/sentencepiece_test.py +++ b/python/test/sentencepiece_test.py @@ -15,7 +15,6 @@ # See the License for the specific language governing permissions and # limitations under the License.! -import codecs import io import sentencepiece as spm import unittest @@ -25,6 +24,8 @@ from collections import defaultdict +print('VERSION={}'.format(spm.__version__)) + data_dir = 'test' if sys.platform == 'win32': data_dir = os.path.join('..', 'data') @@ -60,6 +61,17 @@ def test_load(self): piece = self.sp_.IdToPiece(i) self.assertEqual(i, self.sp_.PieceToId(piece)) + self.assertEqual(1000, self.sp_.get_piece_size()) + self.assertEqual(0, self.sp_.piece_to_id('')) + self.assertEqual(1, self.sp_.piece_to_id('')) + self.assertEqual(2, self.sp_.piece_to_id('')) + self.assertEqual('', self.sp_.id_to_piece(0)) + self.assertEqual('', self.sp_.id_to_piece(1)) + self.assertEqual('', self.sp_.id_to_piece(2)) + for i in range(self.sp_.get_piece_size()): + piece = self.sp_.id_to_piece(i) + self.assertEqual(i, self.sp_.piece_to_id(piece)) + def test_roundtrip(self): text = 'I saw a girl with a telescope.' ids = self.sp_.EncodeAsIds(text) @@ -80,6 +92,34 @@ def test_roundtrip(self): self.assertEqual( text, self.sp_.DecodeIds(self.sp_.SampleEncodeAsIds(text, -1, 0.5))) + ids2 = self.sp_.encode_as_ids(text) + pieces3 = self.sp_.encode_as_pieces(text) + pieces4 = self.sp_.nbest_encode_as_pieces(text, 10)[0] + self.assertEqual(pieces3, pieces4) + self.assertEqual(pieces1, pieces3) + self.assertEqual(ids, ids2) + self.assertEqual(text, self.sp_.decode_pieces(pieces3)) + self.assertEqual(text, self.sp_.decode_ids(ids2)) + for n in range(100): + self.assertEqual( + text, + self.sp_.decode_pieces( + self.sp_.sample_encode_as_pieces(text, 64, 0.5))) + self.assertEqual( + text, + self.sp_.decode_pieces( + self.sp_.sample_encode_as_pieces(text, -1, 0.5))) + self.assertEqual( + text, + self.sp_.decode_ids(self.sp_.sample_encode_as_ids(text, 64, 0.5))) + self.assertEqual( + text, + self.sp_.decode_ids(self.sp_.sample_encode_as_ids(text, -1, 0.5))) + + self.assertEqual( + self.sp_.calculate_entropy(text, 0.1), + self.sp_.CalculateEntropy(text, 0.1)) + def test_ja_load(self): self.assertEqual(8000, self.jasp_.GetPieceSize()) self.assertEqual(0, self.jasp_.PieceToId('')) @@ -92,6 +132,17 @@ def test_ja_load(self): piece = self.jasp_.IdToPiece(i) self.assertEqual(i, self.jasp_.PieceToId(piece)) + self.assertEqual(8000, self.jasp_.get_piece_size()) + self.assertEqual(0, self.jasp_.piece_to_id('')) + self.assertEqual(1, self.jasp_.piece_to_id('')) + self.assertEqual(2, self.jasp_.piece_to_id('')) + self.assertEqual('', self.jasp_.id_to_piece(0)) + self.assertEqual('', self.jasp_.id_to_piece(1)) + self.assertEqual('', self.jasp_.id_to_piece(2)) + for i in range(self.jasp_.get_piece_size()): + piece = self.jasp_.id_to_piece(i) + self.assertEqual(i, self.jasp_.piece_to_id(piece)) + def test_ja_roundtrip(self): text = '清水寺は京都にある。' ids = self.jasp_.EncodeAsIds(text) @@ -110,40 +161,27 @@ def test_ja_roundtrip(self): self.jasp_.DecodePieces( self.jasp_.SampleEncodeAsPieces(text, -1, 0.5))) - def test_unicode_roundtrip(self): - text = u'I saw a girl with a telescope.' - ids = self.sp_.EncodeAsIds(text) - pieces = self.sp_.EncodeAsPieces(text) - self.assertEqual(text, self.sp_.DecodePieces(pieces)) - self.assertEqual(text, self.sp_.DecodeIds(ids)) - # python2 returns `str`. - if sys.version_info < (3, 0, 0): - text = text.encode('utf-8') - self.assertEqual(text, self.sp_.DecodeIds(ids)) - self.assertEqual(text, self.sp_.DecodePieces(pieces)) - - def test_unicode_ja_roundtrip(self): - text = u'清水寺は京都にある。' - ids = self.jasp_.EncodeAsIds(text) - pieces = self.jasp_.EncodeAsPieces(text) - self.assertEqual(text, self.jasp_.DecodePieces(pieces)) - # python2 returns `str`. - if sys.version_info < (3, 0, 0): - text = text.encode('utf-8') - self.assertEqual(text, self.jasp_.DecodeIds(ids)) - - def test_pickle(self): - with open('sp.pickle', 'wb') as f: - pickle.dump(self.sp_, f) - - id1 = self.sp_.encode('hello world.', out_type=int) - - with open('sp.pickle', 'rb') as f: - sp = pickle.load(f) - - id2 = sp.encode('hello world.', out_type=int) + ids2 = self.jasp_.encode_as_ids(text) + pieces3 = self.jasp_.encode_as_pieces(text) + pieces4 = self.jasp_.nbest_encode_as_pieces(text, 10)[0] + self.assertEqual(pieces3, pieces4) + self.assertEqual(pieces1, pieces3) + self.assertEqual(ids, ids2) + self.assertEqual(text, self.jasp_.decode_pieces(pieces1)) + self.assertEqual(text, self.jasp_.decode_ids(ids2)) + for n in range(100): + self.assertEqual( + text, + self.jasp_.decode_pieces( + self.jasp_.sample_encode_as_pieces(text, 64, 0.5))) + self.assertEqual( + text, + self.jasp_.decode_pieces( + self.jasp_.sample_encode_as_pieces(text, -1, 0.5))) - self.assertEqual(id1, id2) + self.assertEqual( + self.jasp_.calculate_entropy(text, 0.1), + self.jasp_.CalculateEntropy(text, 0.1)) def test_train(self): spm.SentencePieceTrainer.Train('--input=' + @@ -151,37 +189,45 @@ def test_train(self): ' --model_prefix=m --vocab_size=1000') sp = spm.SentencePieceProcessor() sp.Load('m.model') - with codecs.open( - os.path.join(data_dir, 'botchan.txt'), 'r', encoding='utf-8') as file: + with open(os.path.join(data_dir, 'botchan.txt'), 'r') as file: for line in file: sp.DecodePieces(sp.EncodeAsPieces(line)) sp.DecodeIds(sp.EncodeAsIds(line)) - def test_train(self): + def test_train_iterator(self): spm.SentencePieceTrainer.Train('--input=' + os.path.join(data_dir, 'botchan.txt') + ' --model_prefix=m --vocab_size=1000') # Load as 'rb' for Python3.5/2.7. - is1 = open(os.path.join(data_dir, 'botchan.txt'), 'rb') - is2 = open(os.path.join(data_dir, 'botchan.txt'), 'rb') os1 = io.BytesIO() os2 = io.BytesIO() + # suppress logging (redirect to /dev/null) spm.SentencePieceTrainer.train( input=os.path.join(data_dir, 'botchan.txt'), model_prefix='m', - vocab_size=1000) + vocab_size=1000, + logstream=open(os.devnull, 'w')) - spm.SentencePieceTrainer.train( - sentence_iterator=is1, model_prefix='m', vocab_size=1000) + with open(os.path.join(data_dir, 'botchan.txt'), 'rb') as is1: + spm.SentencePieceTrainer.train( + sentence_iterator=is1, + model_prefix='m', + vocab_size=1000, + logstream=open(os.devnull, 'w')) spm.SentencePieceTrainer.train( input=os.path.join(data_dir, 'botchan.txt'), model_writer=os1, - vocab_size=1000) + vocab_size=1000, + logstream=open(os.devnull, 'w')) - spm.SentencePieceTrainer.train( - sentence_iterator=is2, model_writer=os2, vocab_size=1000) + with open(os.path.join(data_dir, 'botchan.txt'), 'rb') as is2: + spm.SentencePieceTrainer.train( + sentence_iterator=is2, + model_writer=os2, + vocab_size=1000, + logstream=open(os.devnull, 'w')) sp1 = spm.SentencePieceProcessor(model_proto=os1.getvalue()) sp2 = spm.SentencePieceProcessor(model_proto=os2.getvalue()) @@ -189,134 +235,183 @@ def test_train(self): [sp2.id_to_piece(i) for i in range(sp2.get_piece_size())]) def test_train_kwargs(self): + # suppress logging (redirect to /dev/null) spm.SentencePieceTrainer.train( input=[os.path.join(data_dir, 'botchan.txt')], model_prefix='m', vocab_size=1002, - user_defined_symbols=['foo', 'bar', ',']) + user_defined_symbols=['foo', 'bar', ',', ' ', '\t', '\b', '\n', '\r'], + logstream=open(os.devnull, 'w')) sp = spm.SentencePieceProcessor() sp.Load('m.model') - with codecs.open( - os.path.join(data_dir, 'botchan.txt'), 'r', encoding='utf-8') as file: + with open(os.path.join(data_dir, 'botchan.txt'), 'r') as file: for line in file: sp.DecodePieces(sp.EncodeAsPieces(line)) sp.DecodeIds(sp.EncodeAsIds(line)) - # snake case API. - def test_load_snake(self): - self.assertEqual(1000, self.sp_.get_piece_size()) - self.assertEqual(0, self.sp_.piece_to_id('')) - self.assertEqual(1, self.sp_.piece_to_id('')) - self.assertEqual(2, self.sp_.piece_to_id('')) - self.assertEqual('', self.sp_.id_to_piece(0)) - self.assertEqual('', self.sp_.id_to_piece(1)) - self.assertEqual('', self.sp_.id_to_piece(2)) - for i in range(self.sp_.get_piece_size()): - piece = self.sp_.id_to_piece(i) - self.assertEqual(i, self.sp_.piece_to_id(piece)) + s = 'hello\tworld\r\nthis\tis a \b pen' + self.assertEqual(s, sp.decode(sp.encode(s))) - def test_roundtrip_snake(self): + def test_serialized_proto(self): text = 'I saw a girl with a telescope.' - ids = self.sp_.encode_as_ids(text) - pieces1 = self.sp_.encode_as_pieces(text) - pieces2 = self.sp_.nbest_encode_as_pieces(text, 10)[0] - self.assertEqual(pieces1, pieces2) - self.assertEqual(text, self.sp_.decode_pieces(pieces1)) - self.assertEqual(text, self.sp_.decode_ids(ids)) - for n in range(100): - self.assertEqual( - text, - self.sp_.decode_pieces( - self.sp_.sample_encode_as_pieces(text, 64, 0.5))) - self.assertEqual( - text, - self.sp_.decode_pieces( - self.sp_.sample_encode_as_pieces(text, -1, 0.5))) - self.assertEqual( - text, - self.sp_.decode_ids(self.sp_.sample_encode_as_ids(text, 64, 0.5))) - self.assertEqual( - text, - self.sp_.decode_ids(self.sp_.sample_encode_as_ids(text, -1, 0.5))) + s1 = self.sp_.EncodeAsSerializedProto(text) + s2 = self.sp_.SampleEncodeAsSerializedProto(text, 10, 0.2) + s3 = self.sp_.NBestEncodeAsSerializedProto(text, 10) + s4 = self.sp_.DecodePiecesAsSerializedProto(['foo', 'bar']) + s5 = self.sp_.DecodeIdsAsSerializedProto([20, 30]) + + t1 = self.sp_.encode_as_serialized_proto(text) + t2 = self.sp_.sample_encode_as_serialized_proto(text, 10, 0.2) + t3 = self.sp_.nbest_encode_as_serialized_proto(text, 10) + t4 = self.sp_.decode_pieces_as_serialized_proto(['foo', 'bar']) + t5 = self.sp_.decode_ids_as_serialized_proto([20, 30]) + + y1 = self.sp_.encode(text, out_type='serialized_proto') + y2 = self.sp_.encode( + text, enable_sampling=True, out_type='serialized_proto') + y3 = self.sp_.nbest_encode(text, out_type='serialized_proto', nbest_size=10) + y4 = self.sp_.decode(['foo', 'bar'], out_type='serialized_proto') + y5 = self.sp_.decode([20, 30], out_type='serialized_proto') + + self.assertEqual(type(s1), bytes) + self.assertEqual(type(s2), bytes) + self.assertEqual(type(t2), bytes) + self.assertEqual(type(s3), bytes) + self.assertEqual(type(s4), bytes) + self.assertEqual(type(s5), bytes) + + self.assertEqual(s1, t1) + self.assertEqual(s3, t3) + self.assertEqual(s4, t4) + self.assertEqual(s5, t5) + self.assertEqual(s1, y1) + self.assertEqual(s3, y3) + self.assertEqual(s4, y4) + self.assertEqual(s5, y5) - def test_ja_load_snake(self): - self.assertEqual(8000, self.jasp_.get_piece_size()) - self.assertEqual(0, self.jasp_.piece_to_id('')) - self.assertEqual(1, self.jasp_.piece_to_id('')) - self.assertEqual(2, self.jasp_.piece_to_id('')) - self.assertEqual('', self.jasp_.id_to_piece(0)) - self.assertEqual('', self.jasp_.id_to_piece(1)) - self.assertEqual('', self.jasp_.id_to_piece(2)) - for i in range(self.jasp_.get_piece_size()): - piece = self.jasp_.id_to_piece(i) - self.assertEqual(i, self.jasp_.piece_to_id(piece)) + ids = self.jasp_.EncodeAsIds(text) + pieces = self.jasp_.EncodeAsPieces(text) + s1 = self.jasp_.EncodeAsSerializedProto(text) + s2 = self.jasp_.DecodeIdsAsSerializedProto(ids) + s3 = self.jasp_.DecodePiecesAsSerializedProto(ids) + self.assertEqual(s2, s1) + self.assertEqual(s3, s1) - def test_ja_roundtrip_snake(self): - text = '清水寺は京都にある。' - ids = self.jasp_.encode_as_ids(text) - pieces1 = self.jasp_.encode_as_pieces(text) - pieces2 = self.jasp_.nbest_encode_as_pieces(text, 10)[0] - self.assertEqual(pieces1, pieces2) - self.assertEqual(text, self.jasp_.decode_pieces(pieces1)) - self.assertEqual(text, self.jasp_.decode_ids(ids)) - for n in range(100): - self.assertEqual( - text, - self.jasp_.decode_pieces( - self.jasp_.sample_encode_as_pieces(text, 64, 0.5))) + def test_immutable_proto(self): + text = 'I saw a girl with a telescope.' + s1 = self.sp_.EncodeAsImmutableProto(text) + s2 = self.sp_.SampleEncodeAsImmutableProto(text, 10, 0.2) + s3 = self.sp_.NBestEncodeAsImmutableProto(text, 10) + s4 = self.sp_.DecodePiecesAsImmutableProto(['foo', 'bar']) + s5 = self.sp_.DecodeIdsAsImmutableProto([20, 30]) + + print(s1) + print(s2) + print(s3) + print(s4) + print(s5) + + t1 = self.sp_.encode_as_immutable_proto(text) + t2 = self.sp_.sample_encode_as_immutable_proto(text, 10, 0.2) + t3 = self.sp_.nbest_encode_as_immutable_proto(text, 10) + t4 = self.sp_.decode_pieces_as_immutable_proto(['foo', 'bar']) + t5 = self.sp_.decode_ids_as_immutable_proto([20, 30]) + + y1 = self.sp_.encode(text, out_type='immutable_proto') + y2 = self.sp_.encode(text, enable_sampling=True, out_type='immutable_proto') + y3 = self.sp_.nbest_encode(text, out_type='immutable_proto', nbest_size=10) + y4 = self.sp_.decode(['foo', 'bar'], out_type='immutable_proto') + y5 = self.sp_.decode([20, 30], out_type='immutable_proto') + + self.assertEqual(s1, t1) + self.assertEqual(s3, t3) + self.assertEqual(s4, t4) + self.assertEqual(s5, t5) + self.assertEqual(s1, y1) + self.assertEqual(s3, y3) + self.assertEqual(s4, y4) + self.assertEqual(s5, y5) + + hset_piece = defaultdict(int) + + # eq test + for i in range(len(s1.pieces)): + self.assertEqual(s1.pieces[i], t1.pieces[i]) + hset_piece[s1.pieces[i]] += 1 + hset_piece[t1.pieces[i]] += 1 + + self.assertEqual(len(hset_piece), len(s1.pieces)) + + # has test + hset = defaultdict(int) + hset[s1] += 1 + hset[t1] += 1 + hset[s3] += 1 + hset[t3] += 1 + + self.assertEqual(len(hset), 2) + self.assertEqual(hset[s1], 2) + self.assertEqual(hset[s3], 2) + self.assertEqual(hset[t1], 2) + self.assertEqual(hset[t3], 2) + + x1 = self.sp_.encode_as_serialized_proto(text) + x2 = self.sp_.sample_encode_as_serialized_proto(text, 10, 0.2) + x3 = self.sp_.nbest_encode_as_serialized_proto(text, 10) + x4 = self.sp_.decode_pieces_as_serialized_proto(['foo', 'bar']) + x5 = self.sp_.decode_ids_as_serialized_proto([20, 30]) + + self.assertEqual(x1, t1.SerializeAsString()) + self.assertEqual(x3, t3.SerializeAsString()) + self.assertEqual(x4, t4.SerializeAsString()) + self.assertEqual(x5, t5.SerializeAsString()) + + v1 = self.sp_.EncodeAsIds(text) + v2 = self.sp_.EncodeAsPieces(text) + self.assertEqual([x.id for x in s1.pieces], v1) + self.assertEqual([x.piece for x in s1.pieces], v2) + self.assertEqual(text, s1.text) + + surfaces1 = [s1.text[x.begin:x.end] for x in s1.pieces] + surfaces2 = [x.surface for x in s1.pieces] + self.assertEqual(surfaces1, surfaces2) + + ids = [] + for i in range(len(s1.pieces)): + ids.append(s1.pieces[i].id) + self.assertEqual(ids, v1) + + pieces = [] + for i in range(len(s1.pieces)): + pieces.append(s1.pieces[i].piece) + self.assertEqual(pieces, v2) + + for v in s3.nbests: + self.assertEqual(text, v.text) + self.assertEqual(self.sp_.Decode([x.id for x in v.pieces]), text) + + for i in range(len(s3.nbests)): + self.assertEqual(text, s3.nbests[i].text) self.assertEqual( - text, - self.jasp_.decode_pieces( - self.jasp_.sample_encode_as_pieces(text, -1, 0.5))) + self.sp_.Decode([x.id for x in s3.nbests[i].pieces]), text) - def test_unicode_roundtrip_snake(self): - text = u'I saw a girl with a telescope.' - ids = self.sp_.encode_as_ids(text) - pieces = self.sp_.encode_as_pieces(text) - self.assertEqual(text, self.sp_.decode_pieces(pieces)) - # python2 returns `str`. - if sys.version_info < (3, 0, 0): - text = text.encode('utf-8') - self.assertEqual(text, self.sp_.decode_ids(ids)) - - def test_unicode_ja_roundtrip_snake(self): - text = u'清水寺は京都にある。' - ids = self.jasp_.encode_as_ids(text) - pieces = self.jasp_.encode_as_pieces(text) - self.assertEqual(text, self.jasp_.decode_pieces(pieces)) - # python2 returns `str`. - if sys.version_info < (3, 0, 0): - text = text.encode('utf-8') - self.assertEqual(text, self.jasp_.decode_ids(ids)) - - def test_train_snake(self): - spm.SentencePieceTrainer.train('--input=' + - os.path.join(data_dir, 'botchan.txt') + - ' --model_prefix=m --vocab_size=1000') - sp = spm.SentencePieceProcessor() - sp.load('m.model') - with codecs.open( - os.path.join(data_dir, 'botchan.txt'), 'r', encoding='utf-8') as file: - for line in file: - sp.decode_pieces(sp.encode_as_pieces(line)) - sp.decode_ids(sp.encode_as_ids(line)) + # slice + self.assertEqual(s1.pieces[::-1], list(reversed(s1.pieces))) + self.assertEqual(s3.nbests[::-1], list(reversed(s3.nbests))) - def test_serialized_proto(self): - text = u'I saw a girl with a telescope.' - self.assertNotEqual('', self.sp_.EncodeAsSerializedProto(text)) - self.assertNotEqual('', - self.sp_.SampleEncodeAsSerializedProto(text, 10, 0.2)) - self.assertNotEqual('', self.sp_.NBestEncodeAsSerializedProto(text, 10)) - self.assertNotEqual('', - self.sp_.DecodePiecesAsSerializedProto(['foo', 'bar'])) - self.assertNotEqual('', self.sp_.DecodeIdsAsSerializedProto([20, 30])) - self.assertNotEqual('', self.sp_.encode_as_serialized_proto(text)) - self.assertNotEqual( - '', self.sp_.sample_encode_as_serialized_proto(text, 10, 0.2)) - self.assertNotEqual('', self.sp_.nbest_encode_as_serialized_proto(text, 10)) - self.assertNotEqual( - '', self.sp_.decode_pieces_as_serialized_proto(['foo', 'bar'])) - self.assertNotEqual('', self.sp_.decode_ids_as_serialized_proto([20, 30])) + # Japanese offset + s1 = self.jasp_.EncodeAsImmutableProto('吾輩は猫である。Hello world. ABC 123') + surfaces1 = [s1.text[x.begin:x.end] for x in s1.pieces] + surfaces2 = [x.surface for x in s1.pieces] + self.assertEqual(surfaces1, surfaces2) + + ids = [x.id for x in s1.pieces] + s2 = self.jasp_.DecodeIdsAsImmutableProto(ids) + self.assertEqual(s2, s1) + + pieces = [x.piece for x in s1.pieces] + s2 = self.jasp_.DecodePiecesAsImmutableProto(pieces) + self.assertEqual(s2, s1) def test_new_api(self): sp = spm.SentencePieceProcessor( @@ -327,19 +422,61 @@ def test_new_api(self): ids2 = self.sp_.EncodeAsIds(text2) pieces = self.sp_.EncodeAsPieces(text) pieces2 = self.sp_.EncodeAsPieces(text2) - self.assertEqual(sp.encode(text), ids) + sprotos = self.sp_.EncodeAsSerializedProto(text) + sproto2 = self.sp_.EncodeAsSerializedProto(text2) + iprotos = self.sp_.EncodeAsImmutableProto(text) + iprotos2 = self.sp_.EncodeAsImmutableProto(text2) + + self.assertEqual(sp.encode(text, out_type=int), ids) self.assertEqual(sp.encode(text, out_type=str), pieces) + self.assertEqual(sp.encode(text, out_type='serialized_proto'), sprotos) + self.assertEqual(sp.encode(text, out_type='immutable_proto'), iprotos) + + self.assertEqual(sp.encode([text], out_type=int), [ids]) + self.assertEqual(sp.encode([text], out_type=str), [pieces]) + self.assertEqual(sp.encode([text], out_type='serialized_proto'), [sprotos]) + self.assertEqual(sp.encode([text], out_type='immutable_proto'), [iprotos]) + + self.assertEqual(len(iprotos.pieces), len(pieces)) + self.assertEqual(len(iprotos.pieces), len(ids)) + self.assertEqual(iprotos.text, text) + + self.assertEqual(len(iprotos2.pieces), len(pieces2)) + self.assertEqual(len(iprotos2.pieces), len(ids2)) + self.assertEqual(iprotos2.text, text2) + + for i in range(len(iprotos.pieces)): + self.assertEqual(ids[i], iprotos.pieces[i].id) + self.assertEqual(pieces[i], iprotos.pieces[i].piece) + + for i, piece in enumerate(iprotos.pieces): + self.assertEqual(ids[i], piece.id) + self.assertEqual(pieces[i], piece.piece) + + for i in range(len(iprotos2.pieces)): + self.assertEqual(ids2[i], iprotos2.pieces[i].id) + self.assertEqual(pieces2[i], iprotos2.pieces[i].piece) + + for i, piece in enumerate(iprotos2.pieces): + self.assertEqual(ids2[i], piece.id) + self.assertEqual(pieces2[i], piece.piece) + detok_ids = self.sp_.DecodeIds(ids) detok_pieces = self.sp_.DecodePieces(pieces) self.assertEqual(sp.decode(ids), detok_ids) self.assertEqual(sp.decode(pieces), detok_pieces) + self.assertEqual(sp.decode([]), '') + self.assertEqual(sp.decode([[]]), ['']) # add_bos, add_eos, reverse self.assertEqual([sp.bos_id()] + ids, sp.encode(text, add_bos=True)) self.assertEqual(ids + [sp.eos_id()], sp.encode(text, add_eos=True)) + self.assertEqual(ids + [sp.eos_id()], sp.EncodeAsIds(text, add_eos=True)) rids = ids[:] rids.reverse() + self.assertEqual(rids, sp.encode(text, reverse=True)) + self.assertEqual(rids, sp.EncodeAsIds(text, reverse=True)) # different shape. self.assertEqual([ids, ids2], sp.encode([text, text2])) @@ -347,6 +484,29 @@ def test_new_api(self): self.assertEqual([text, text2], sp.decode([ids, ids2])) self.assertEqual([text, text2], sp.decode([pieces, pieces2])) + pieces = list(reversed(self.sp_.EncodeAsPieces(text))) + self.assertEqual(pieces, sp.encode(text, reverse=True, out_type=str)) + + # emit unk piece + unk_char = '藤' + pieces = self.sp_.EncodeAsIds(unk_char, emit_unk_piece=True) + pieces2 = self.sp_.encode(unk_char, out_type=int, emit_unk_piece=True) + self.assertEqual(pieces[1], sp.unk_id()) + self.assertEqual(pieces2[1], sp.unk_id()) + self.assertEqual(pieces, pieces2) + + pieces = self.sp_.EncodeAsPieces(unk_char, emit_unk_piece=True) + pieces2 = self.sp_.encode(unk_char, out_type=str, emit_unk_piece=True) + self.assertEqual(pieces[1], '') + self.assertEqual(pieces2[1], '') + self.assertEqual(pieces, pieces2) + + pieces = self.sp_.EncodeAsPieces(unk_char, emit_unk_piece=False) + pieces2 = self.sp_.encode(unk_char, out_type=str, emit_unk_piece=False) + self.assertEqual(pieces[1], unk_char) + self.assertEqual(pieces2[1], unk_char) + self.assertEqual(pieces, pieces2) + def test_new_api_init(self): sp = spm.SentencePieceProcessor( model_file=os.path.join('test', 'test_model.model'), @@ -357,20 +517,124 @@ def test_new_api_init(self): pieces = [''] + self.sp_.EncodeAsPieces(text) + [''] self.assertEqual(pieces, sp.encode(text)) - def test_new_api_sampling(self): - sp = spm.SentencePieceProcessor( - model_file=os.path.join('test', 'test_model.model'), - out_type=str, - enable_sampling=True) - ids = defaultdict(int) - for n in range(100): - ++ids[' '.join(sp.encode('hello world'))] - self.assertGreater(len(ids), 1) + pieces = self.sp_.EncodeAsPieces(text) + [''] + self.assertEqual(pieces, sp.encode(text, add_bos=False, add_eos=True)) + + def test_sampling(self): + sp = self.sp_ + + for out_type in [str, int, 'serialized_proto', 'immutable_proto']: + ids = defaultdict(int) + for n in range(100): + out = sp.encode('hello world', out_type=out_type, enable_sampling=True) + if type(out) is list: + out = tuple(out) + ++ids[out] + self.assertGreater(len(ids), 1) + + ids2 = defaultdict(int) + for n in range(100): + out = sp.encode('hello world', out_type=out_type, enable_sampling=False) + if type(out) is list: + out = tuple(out) + ++ids2[out] + self.assertEqual(len(ids2), 1) + + out = sp.encode(['hello world', 'this is a test'], + out_type=out_type, + enable_sampling=True) + self.assertEqual(len(out), 2) + out = sp.encode(['hello world', 'this is a test'], + out_type=out_type, + enable_sampling=False) + self.assertEqual(len(out), 2) + + def test_nbest(self): + sp = self.sp_ + text = 'hello world' + text2 = 'I have a pen.' - ids2 = defaultdict(int) - for n in range(100): - ++ids2[' '.join(sp.encode('hello world', enable_sampling=False))] - self.assertEqual(len(ids2), 1) + for out_type in [str, int, 'serialized_proto', 'immutable_proto']: + results = sp.nbest_encode(text, nbest_size=10, out_type=out_type) + self.assertEqual(results, + sp.NBestEncode(text, nbest_size=10, out_type=out_type)) + + if out_type in [str, int]: + for n in results: + self.assertEqual(sp.decode(n), text) + + for n in sp.decode(results): + self.assertEqual(n, text) + + # batch test + results = sp.nbest_encode([text, text2], nbest_size=10, out_type=out_type) + self.assertEqual( + results, + sp.NBestEncode([text, text2], nbest_size=10, out_type=out_type)) + self.assertEqual(len(results), 2) + + if out_type in [str, int]: + for n in results[0]: + self.assertEqual(sp.decode(n), text) + + for n in results[1]: + self.assertEqual(sp.decode(n), text2) + + decoded = sp.decode(results[0]) + self.assertEqual(len(decoded), 10) + for n in decoded: + self.assertEqual(n, text) + decoded = sp.decode(results[1]) + self.assertEqual(len(decoded), 10) + for n in decoded: + self.assertEqual(n, text2) + + self.assertEqual( + sp.nbest_encode(text, nbest_size=10, out_type=str), + sp.nbest_encode_as_pieces(text, nbest_size=10)) + self.assertEqual( + sp.nbest_encode(text, nbest_size=10, out_type=int), + sp.nbest_encode_as_ids(text, nbest_size=10)) + self.assertEqual( + sp.nbest_encode(text, nbest_size=10, out_type='serialized_proto'), + sp.nbest_encode_as_serialized_proto(text, nbest_size=10)) + self.assertEqual( + sp.nbest_encode(text, nbest_size=10, out_type='immutable_proto'), + sp.nbest_encode_as_immutable_proto(text, nbest_size=10)) + + def test_sample_and_score(self): + sp = self.sp_ + text = 'hello world' + text2 = 'I have a pen.' + for out_type in [str, int, 'serialized_proto', 'immutable_proto']: + results = sp.sample_encode_and_score( + text, wor=True, num_samples=10, out_type=out_type) + results = sp.SampleEncodeAndScore( + text, wor=False, num_samples=10, out_type=out_type) + + if out_type in [str, int]: + for n in results: + self.assertEqual(sp.decode(n[0]), text) + + results = sp.sample_encode_and_score([text, text2], + wor=True, + num_samples=10, + out_type=out_type) + results = sp.SampleEncodeAndScore([text, text2], + wor=True, + num_samples=10, + out_type=out_type) + + if out_type in [str, int]: + for n in results[0]: + self.assertEqual(sp.decode(n[0]), text) + for n in results[1]: + self.assertEqual(sp.decode(n[0]), text2) + + sp.sample_encode_and_score_as_pieces(text, 10) + sp.sample_encode_and_score_as_ids(text, 10) + sp.sample_encode_and_score_as_immutable_proto(text, 10) + sp.sample_encode_and_score_as_serialized_proto(text, 10) def test_valid_range(self): size = self.sp_.piece_size() @@ -388,6 +652,54 @@ def test_valid_range(self): except: self.assertTrue(True) + def test_batch(self): + sp = spm.SentencePieceProcessor( + model_file=os.path.join('test', 'test_model.model')) + with open(os.path.join(data_dir, 'botchan.txt'), 'r') as file: + texts = file.readlines() + + for out_type in [str, int, 'serialized_proto', 'immutable_proto']: + r1 = sp.encode(texts, out_type=out_type, num_threads=None) + r2 = sp.encode(texts, out_type=out_type, num_threads=1) + r3 = sp.encode(texts, out_type=out_type, num_threads=-1) + r4 = sp.encode(texts, out_type=out_type, num_threads=8) + r5 = [sp.encode(s, out_type=out_type) for s in texts] + self.assertEqual(r1, r2) + self.assertEqual(r1, r3) + self.assertEqual(r1, r4) + self.assertEqual(r1, r5) + + if out_type in [str, int]: + d1 = sp.decode(r1, num_threads=None) + d2 = sp.decode(r2, num_threads=1) + d3 = sp.decode(r3, num_threads=-1) + d4 = sp.decode(r4, num_threads=8) + d5 = [sp.decode(s) for s in r5] + + self.assertEqual(d1, d2) + self.assertEqual(d1, d3) + self.assertEqual(d1, d4) + self.assertEqual(d1, d5) + + e1 = sp.calculate_entropy(texts, alpha=1.0, num_threads=10) + e2 = sp.CalculateEntropy(texts, alpha=1.0, num_threads=10) + e3 = [sp.calculate_entropy(s, alpha=1.0) for s in texts] + self.assertEqual(e1, e2) + self.assertEqual(e1, e3) + + def test_pickle(self): + with open('sp.pickle', 'wb') as f: + pickle.dump(self.sp_, f) + + id1 = self.sp_.encode('hello world.', out_type=int) + + with open('sp.pickle', 'rb') as f: + sp = pickle.load(f) + + id2 = sp.encode('hello world.', out_type=int) + + self.assertEqual(id1, id2) + def suite(): suite = unittest.TestSuite() diff --git a/sentencepiece.pc.in b/sentencepiece.pc.in index ac7fef68..1251dd22 100644 --- a/sentencepiece.pc.in +++ b/sentencepiece.pc.in @@ -1,10 +1,11 @@ prefix=@prefix@ exec_prefix=@exec_prefix@ -libdir=@libdir@ -includedir=@includedir@ +libdir=@libdir_for_pc_file@ +includedir=@includedir_for_pc_file@ Name: @PROJECT_NAME@ Description: Unsupervised text tokenizer and detokenizer for Neural Network-based text generation. Version: @PROJECT_VERSION@ -Libs: -L${libdir} -lsentencepiece -lsentencepiece_train @libprotobuf_lite@ @pkgconfiglibs@ -Cflags: -I${includedir} @pkgconfigcflags@ +Libs: -L${libdir} -lsentencepiece -lsentencepiece_train +Cflags: -I${includedir} +Requires.private: @libprotobuf_lite@ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 28c4f7f1..c69da9f1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -12,6 +12,21 @@ # See the License for the specific language governing permissions and # limitations under the License.! +if (SPM_USE_EXTERNAL_ABSL) + set(ABSL_FLAGS_SRCS "") + set(ABSL_STRINGS_SRCS "") + list(APPEND SPM_LIBS absl::strings) + list(APPEND SPM_LIBS absl::flags) + list(APPEND SPM_LIBS absl::flags_parse) + if (MSVC) + add_definitions("/D_USE_EXTERNAL_ABSL") + else() + add_definitions("-D_USE_EXTERNAL_ABSL") + endif() +else() + set(ABSL_FLAGS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/absl/flags/flag.cc) +endif() + if (SPM_USE_BUILTIN_PROTOBUF) set(SPM_PROTO_HDRS builtin_pb/sentencepiece.pb.h) set(SPM_PROTO_SRCS builtin_pb/sentencepiece.pb.cc) @@ -25,12 +40,14 @@ if (SPM_USE_BUILTIN_PROTOBUF) ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/coded_stream.cc ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/common.cc ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/extension_set.cc + ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/generated_enum_util.cc ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/generated_message_table_driven_lite.cc ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/generated_message_util.cc ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/implicit_weak_message.cc ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/int128.cc ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/io_win32.cc ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/message_lite.cc + ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/parse_context.cc ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/repeated_field.cc ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/status.cc ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/statusor.cc @@ -41,11 +58,12 @@ if (SPM_USE_BUILTIN_PROTOBUF) ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/time.cc ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/wire_format_lite.cc ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/zero_copy_stream.cc + ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/zero_copy_stream_impl.cc ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite/zero_copy_stream_impl_lite.cc) if (MSVC) add_definitions("/DHAVE_PTHREAD /wd4018 /wd4514") else() - add_definitions("-pthread -DHAVE_PTHREAD=1 -Wno-sign-compare") + add_definitions("-pthread -DHAVE_PTHREAD=1 -Wno-sign-compare -Wno-deprecated-declarations") endif() include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../third_party/protobuf-lite) include_directories(builtin_pb) @@ -56,9 +74,15 @@ else() protobuf_generate_cpp(SPM_MODEL_PROTO_SRCS SPM_MODEL_PROTO_HDRS sentencepiece_model.proto) set(PROTOBUF_LITE_SRCS "") include_directories(${PROTOBUF_INCLUDE_DIR}) + if (MSVC) + add_definitions("/D_USE_EXTERNAL_PROTOBUF") + else() + add_definitions("-D_USE_EXTERNAL_PROTOBUF") + endif() endif() include_directories(${CMAKE_CURRENT_BINARY_DIR}) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../third_party) if (MSVC) add_definitions("/D_USE_INTERNAL_STRING_VIEW") @@ -90,7 +114,6 @@ set(SPM_SRCS char_model.cc error.cc filesystem.cc - init.cc model_factory.cc model_interface.cc normalizer.cc @@ -99,8 +122,8 @@ set(SPM_SRCS unigram_model.cc util.cc word_model.cc - ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/absl/strings/string_view.cc - ${CMAKE_CURRENT_SOURCE_DIR}/../third_party/absl/flags/flag.cc) + ${ABSL_STRINGS_SRCS} + ${ABSL_FLAGS_SRCS}) set(SPM_TRAIN_SRCS ${SPM_PROTO_HDRS} @@ -158,7 +181,7 @@ set(SPM_TEST_SRCS find_package(Threads REQUIRED) -set(SPM_LIBS ${PROTOBUF_LITE_LIBRARY} Threads::Threads) +list(APPEND SPM_LIBS ${PROTOBUF_LITE_LIBRARY} Threads::Threads) if (SPM_ENABLE_NFKC_COMPILE) find_package(ICU 4.4 COMPONENTS i18n data uc REQUIRED) @@ -182,6 +205,21 @@ if (SPM_ENABLE_TCMALLOC) endif() endif() +if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR + (${CMAKE_SYSTEM_PROCESSOR} MATCHES "mips") OR + (${CMAKE_SYSTEM_PROCESSOR} MATCHES "m68k") OR + (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc") OR + (${CMAKE_SYSTEM_PROCESSOR} MATCHES "powerpc") OR + (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch") OR + (${CMAKE_SYSTEM_PROCESSOR} MATCHES "sh4")) + find_library(ATOMIC_LIB NAMES atomic libatomic.so libatomic.so.1) + if (ATOMIC_LIB) + message(STATUS "Found atomic: ${ATOMIC_LIB}") + list(APPEND SPM_LIBS "atomic") + endif() +endif() + + if (SPM_ENABLE_SHARED) add_library(sentencepiece SHARED ${SPM_SRCS}) add_library(sentencepiece_train SHARED ${SPM_TRAIN_SRCS}) @@ -196,13 +234,6 @@ target_link_libraries(sentencepiece_train-static INTERFACE sentencepiece-static if (SPM_ENABLE_SHARED) target_link_libraries(sentencepiece ${SPM_LIBS}) target_link_libraries(sentencepiece_train ${SPM_LIBS} sentencepiece) - if ((${CMAKE_SYSTEM_PROCESSOR} STREQUAL "armv7l") OR - (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "mips") OR - (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "m68k") OR - (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ppc") OR - (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "sh4")) - list(APPEND SPM_LIBS "atomic") - endif() set(SPM_INSTALLTARGETS sentencepiece sentencepiece_train sentencepiece-static sentencepiece_train-static) set_target_properties(sentencepiece sentencepiece_train PROPERTIES SOVERSION 0 VERSION 0.0.0) set_target_properties(sentencepiece PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS YES) @@ -267,12 +298,24 @@ endif() list(APPEND SPM_INSTALLTARGETS spm_encode spm_decode spm_normalize spm_train spm_export_vocab) +if (CMAKE_SYSTEM_NAME STREQUAL "iOS") + install(TARGETS ${SPM_INSTALLTARGETS} + BUNDLE DESTINATION ${CMAKE_INSTALL_BINDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) +else() install(TARGETS ${SPM_INSTALLTARGETS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) -install(FILES sentencepiece_trainer.h sentencepiece_processor.h builtin_pb/sentencepiece.pb.h +endif() + +install(FILES sentencepiece_trainer.h sentencepiece_processor.h DESTINATION ${CMAKE_INSTALL_INCDIR}) +if (NOT SPM_USE_BUILTIN_PROTOBUF) + install(FILES ${SPM_PROTO_HDRS} DESTINATION ${CMAKE_INSTALL_INCDIR}) +endif() file(TO_NATIVE_PATH "${PROJECT_SOURCE_DIR}/data" data_dir) @@ -304,3 +347,11 @@ if (SPM_COVERAGE) COMMAND genhtml -o lcov_html coverage.info) add_dependencies(coverage spm_test) endif() + +if (CMAKE_SYSTEM_NAME STREQUAL "iOS") + set_xcode_property(spm_encode PRODUCT_BUNDLE_IDENTIFIER "SentencePiece" All) + set_xcode_property(spm_decode PRODUCT_BUNDLE_IDENTIFIER "SentencePiece" All) + set_xcode_property(spm_normalize PRODUCT_BUNDLE_IDENTIFIER "SentencePiece" All) + set_xcode_property(spm_train PRODUCT_BUNDLE_IDENTIFIER "SentencePiece" All) + set_xcode_property(spm_export_vocab PRODUCT_BUNDLE_IDENTIFIER "SentencePiece" All) +endif() diff --git a/src/bpe_model.cc b/src/bpe_model.cc index 5d77baad..bc7ada13 100644 --- a/src/bpe_model.cc +++ b/src/bpe_model.cc @@ -72,8 +72,7 @@ std::vector> Model::SampleEncode( // Reverse merge rules. // key: merged symbol, value: pair of original symbols. absl::flat_hash_map, - string_util::string_view_hash> + std::pair> rev_merge; // Pre-allocates SymbolPair for efficiency. diff --git a/src/bpe_model_trainer.cc b/src/bpe_model_trainer.cc index 964d44ea..de86f14a 100644 --- a/src/bpe_model_trainer.cc +++ b/src/bpe_model_trainer.cc @@ -12,13 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License.! +#include "bpe_model_trainer.h" + #include #include #include #include -#include "bpe_model_trainer.h" +#include "pretokenizer_for_training.h" #include "third_party/absl/container/flat_hash_set.h" +#include "third_party/absl/strings/str_join.h" +#include "third_party/absl/strings/str_replace.h" #include "util.h" namespace sentencepiece { @@ -82,28 +86,16 @@ void Trainer::ComputeFreq(Symbol *symbol) const { if (symbol->freq > 0) { // if freq == 0, re-computation is required. return; } - // Avoids double-count. ("AAA" => only count the first "AA"). - Position prev_pos = {-1, 0}; CHECK_EQ(0, symbol->freq); for (auto it = symbol->positions.begin(); it != symbol->positions.end();) { const Position pos = DecodePos(*it); - // There are two same bigrams in "AAA", [AA] [AA], and we want to - // remove the second one to avoid double counts. - // If the right symbol in the first bigram and the left symbol in the - // second bigram have the same position, (pos.left == prev_pos.right), - // duplicated bigram exisit. - // Also, symbols_[sid][left] and symbols_[sid]right] must store + // symbols_[sid][left] and symbols_[sid]right] must store // the same symbols in symbol->left and symbols->right. - if ((pos.sid == prev_pos.sid && pos.left == prev_pos.right) || - symbol->left != symbols_[pos.sid][pos.left] || + if (symbol->left != symbols_[pos.sid][pos.left] || symbol->right != symbols_[pos.sid][pos.right]) { it = symbol->positions.erase(it); - // Initializes prev_pos. - // In "AAAA", the last "AA" can be counted. - prev_pos = {-1, 0}; } else { symbol->freq += sentences_[pos.sid].second; - prev_pos = pos; ++it; } } @@ -189,6 +181,24 @@ util::Status Trainer::Train() { SplitSentencesByWhitespace(); } + // Pretokenizer applied only in training time. + // Pretokenizer is used as a constraint of piece extractions. + const auto *pretokenizer = SentencePieceTrainer::GetPretokenizerForTraining(); + + if (pretokenizer || !trainer_spec_.pretokenization_delimiter().empty()) { + absl::string_view delimiter = trainer_spec_.pretokenization_delimiter(); + LOG(INFO) << "Preprocessing with pretokenizer..."; + for (auto &w : sentences_) { + if (pretokenizer) { + w.first = absl::StrJoin(pretokenizer->PreTokenize(w.first), + TrainerInterface::kUPPBoundaryStr); + } else if (!delimiter.empty()) { + w.first = absl::StrReplaceAll( + w.first, {{delimiter, TrainerInterface::kUPPBoundaryStr}}); + } + } + } + // Initializes symbols_. symbols_[sid][i] stores an unary symbol. symbols_.resize(sentences_.size()); for (size_t i = 0; i < sentences_.size(); ++i) { diff --git a/src/bpe_model_trainer.h b/src/bpe_model_trainer.h index e011a378..2879d29e 100644 --- a/src/bpe_model_trainer.h +++ b/src/bpe_model_trainer.h @@ -15,6 +15,8 @@ #ifndef BPE_MODEL_TRAINER_H_ #define BPE_MODEL_TRAINER_H_ +#include +#include #include #include #include @@ -44,12 +46,12 @@ class Trainer : public TrainerInterface { const Symbol *right; // right symbol in bigram string_util::UnicodeText chars; // all flattend chracter sequence bool is_unk; // true if this symbol is unknown. - uint64 fp; // fingerprint of this symbol. - uint64 freq; // frequency of this symbol. + uint64_t fp; // fingerprint of this symbol. + uint64_t freq; // frequency of this symbol. // Position list. Use set so that we can keep the order of occurrence. // See EncodePos/DecodePos. - std::set positions; + std::set positions; bool IsBigram() const { return left != nullptr && right != nullptr; } std::string ToString() const; @@ -62,19 +64,21 @@ class Trainer : public TrainerInterface { int right; // right symbol index }; - // Encodes sid, left and right bigram index into uint64. + // Encodes sid, left and right bigram index into uint64_t. // Encoded value keeps the order of sid, left and right. - static uint64 EncodePos(int sid, int l, int r) { + static uint64_t EncodePos(int sid, int l, int r) { CHECK_GE(l, 0); CHECK_GE(r, 0); - CHECK_LE(l, kuint16max); - CHECK_LE(r, kuint16max); - const uint64 n = (static_cast(sid) << 32 | (l << 16 | r)); + CHECK_LE(l, std::numeric_limits::max()); + CHECK_LE(r, std::numeric_limits::max()); + const uint64_t n = (static_cast(sid) << 32) | + (static_cast(l) << 16) | + r; return n; } - // Decodes sid, left and right bigram index from uint64. - static Position DecodePos(uint64 n) { + // Decodes sid, left and right bigram index from uint64_t. + static Position DecodePos(uint64_t n) { Position p; p.sid = n >> 32; p.left = (n >> 16) & 0xffff; @@ -111,7 +115,7 @@ class Trainer : public TrainerInterface { void UpdateActiveSymbols(); // All unique symbols. Key is a fingerprint of Symbol. - absl::flat_hash_map symbols_cache_; + absl::flat_hash_map symbols_cache_; // Set of symbols from which we find the best symbol in each iteration. std::set active_symbols_; diff --git a/src/builder.cc b/src/builder.cc index 8b43670b..3c541c5f 100644 --- a/src/builder.cc +++ b/src/builder.cc @@ -49,6 +49,12 @@ constexpr int kMaxUnicode = 0x10FFFF; static constexpr char kDefaultNormalizerName[] = "nfkc"; +#ifndef ENABLE_NFKC_COMPILE +static constexpr char kCompileError[] = + "NFK compile is not enabled. rebuild with ./configure " + "--enable-nfkc-compile"; +#endif + #ifdef ENABLE_NFKC_COMPILE // Normalize `input` with ICU's normalizer with `mode`. Builder::Chars UnicodeNormalize(UNormalizationMode mode, @@ -267,7 +273,7 @@ util::Status Builder::DecompileCharsMap(absl::string_view blob, } // static -util::Status Builder::GetPrecompiledCharsMap(const std::string &name, +util::Status Builder::GetPrecompiledCharsMap(absl::string_view name, std::string *output) { CHECK_OR_RETURN(output); @@ -340,8 +346,7 @@ util::Status Builder::BuildNFKCMap(CharsMap *chars_map) { *chars_map = std::move(nfkc_map); #else - LOG(ERROR) << "NFKC compile is not enabled." - << " rebuild with ./configure --enable-nfkc-compile"; + LOG(ERROR) << kCompileError; #endif return util::OkStatus(); @@ -369,7 +374,7 @@ util::Status Builder::BuildNmtNFKCMap(CharsMap *chars_map) { nfkc_map[{0xFEFF}] = {0x20}; // ZERO WIDTH NO-BREAK nfkc_map[{0xFFFD}] = {0x20}; // REPLACEMENT CHARACTER nfkc_map[{0x200C}] = {0x20}; // ZERO WIDTH NON-JOINER - nfkc_map[{0x200D}] = {0x20}; // ZERO WIDTH JOINER + // nfkc_map[{0x200D}] = {0x20}; // ZERO WIDTH JOINER // Ascii Control characters nfkc_map[{0x0001}] = {}; @@ -414,8 +419,7 @@ util::Status Builder::BuildNmtNFKCMap(CharsMap *chars_map) { *chars_map = std::move(nfkc_map); #else - LOG(ERROR) << "NFKC compile is not enabled." - << " rebuild with ./configure --enable-nfkc-compile"; + LOG(ERROR) << kCompileError; #endif return util::OkStatus(); @@ -454,8 +458,7 @@ util::Status Builder::BuildNFKC_CFMap(CharsMap *chars_map) { RETURN_IF_ERROR(Builder::MergeUnicodeCaseFoldMap(&nfkc_map)); *chars_map = std::move(nfkc_map); #else - LOG(ERROR) << "NFKC_CF compile is not enabled." - << " rebuild with ./configure --enable-nfkc-compile"; + LOG(ERROR) << kCompileError; #endif return util::OkStatus(); @@ -469,8 +472,7 @@ util::Status Builder::BuildNmtNFKC_CFMap(CharsMap *chars_map) { RETURN_IF_ERROR(Builder::MergeUnicodeCaseFoldMap(&nfkc_map)); *chars_map = std::move(nfkc_map); #else - LOG(ERROR) << "NMT_NFKC_CF compile is not enabled." - << " rebuild with ./configure --enable-nfkc-compile"; + LOG(ERROR) << kCompileError; #endif return util::OkStatus(); @@ -552,6 +554,25 @@ util::Status Builder::ComposeCharsMaps(const Builder::CharsMap &outer_chars_map, return util::OkStatus(); } +// static +util::Status Builder::BuildNFKDMap(CharsMap *chars_map) { +#ifdef ENABLE_NFKC_COMPILE + constexpr int kMaxUnicode = 0x10FFFF; + for (char32 cp = 1; cp <= kMaxUnicode; ++cp) { + if (!U_IS_UNICODE_CHAR(cp)) { + continue; + } + const auto nfkd = ToNFKD({cp}); + if (nfkd.size() >= 2 || (nfkd.size() == 1 && nfkd[0] != cp)) { + (*chars_map)[{cp}] = nfkd; + } + } +#else + LOG(ERROR) << kCompileError; +#endif + return util::OkStatus(); +} + // static util::Status Builder::LoadCharsMap(absl::string_view filename, CharsMap *chars_map) { diff --git a/src/builder.h b/src/builder.h index bbd10638..647ea642 100644 --- a/src/builder.h +++ b/src/builder.h @@ -51,7 +51,7 @@ class Builder { CharsMap *chars_map); // Returns a pre-compiled binary index with `name`. - static util::Status GetPrecompiledCharsMap(const std::string &name, + static util::Status GetPrecompiledCharsMap(absl::string_view name, std::string *output); // Makes a normalization mapping based on NFKC. @@ -104,6 +104,9 @@ class Builder { // Makes NMT NFKC with Unicode case folding. static util::Status BuildNmtNFKC_CFMap(CharsMap *chars_map); + // Given NFKC maps, convert them to NFKD. + static util::Status BuildNFKDMap(CharsMap *chars_map); + static util::Status BuildUncaserMap(CharsMap *chars_map); static util::Status BuildRecaserMap(CharsMap *chars_map); diff --git a/src/builtin_pb/sentencepiece.pb.cc b/src/builtin_pb/sentencepiece.pb.cc index 0da509f7..7bc54944 100644 --- a/src/builtin_pb/sentencepiece.pb.cc +++ b/src/builtin_pb/sentencepiece.pb.cc @@ -5,124 +5,114 @@ #include -#include -#include #include -#include +#include +#include #include -// This is a temporary google only hack -#ifdef GOOGLE_PROTOBUF_ENFORCE_UNIQUENESS -#include "third_party/protobuf/version.h" -#endif // @@protoc_insertion_point(includes) - -namespace protobuf_sentencepiece_2eproto { -extern PROTOBUF_INTERNAL_EXPORT_protobuf_sentencepiece_2eproto ::google::protobuf::internal::SCCInfo<0> scc_info_SentencePieceText_SentencePiece; -extern PROTOBUF_INTERNAL_EXPORT_protobuf_sentencepiece_2eproto ::google::protobuf::internal::SCCInfo<1> scc_info_SentencePieceText; -} // namespace protobuf_sentencepiece_2eproto +#include +extern PROTOBUF_INTERNAL_EXPORT_sentencepiece_2eproto ::PROTOBUF_NAMESPACE_ID::internal::SCCInfo<1> scc_info_SentencePieceText_sentencepiece_2eproto; +extern PROTOBUF_INTERNAL_EXPORT_sentencepiece_2eproto ::PROTOBUF_NAMESPACE_ID::internal::SCCInfo<0> scc_info_SentencePieceText_SentencePiece_sentencepiece_2eproto; namespace sentencepiece { class SentencePieceText_SentencePieceDefaultTypeInternal { public: - ::google::protobuf::internal::ExplicitlyConstructed - _instance; + ::PROTOBUF_NAMESPACE_ID::internal::ExplicitlyConstructed _instance; } _SentencePieceText_SentencePiece_default_instance_; class SentencePieceTextDefaultTypeInternal { public: - ::google::protobuf::internal::ExplicitlyConstructed - _instance; + ::PROTOBUF_NAMESPACE_ID::internal::ExplicitlyConstructed _instance; } _SentencePieceText_default_instance_; class NBestSentencePieceTextDefaultTypeInternal { public: - ::google::protobuf::internal::ExplicitlyConstructed - _instance; + ::PROTOBUF_NAMESPACE_ID::internal::ExplicitlyConstructed _instance; } _NBestSentencePieceText_default_instance_; } // namespace sentencepiece -namespace protobuf_sentencepiece_2eproto { -static void InitDefaultsSentencePieceText_SentencePiece() { +static void InitDefaultsscc_info_NBestSentencePieceText_sentencepiece_2eproto() { GOOGLE_PROTOBUF_VERIFY_VERSION; { - void* ptr = &::sentencepiece::_SentencePieceText_SentencePiece_default_instance_; - new (ptr) ::sentencepiece::SentencePieceText_SentencePiece(); - ::google::protobuf::internal::OnShutdownDestroyMessage(ptr); + void* ptr = &::sentencepiece::_NBestSentencePieceText_default_instance_; + new (ptr) ::sentencepiece::NBestSentencePieceText(); + ::PROTOBUF_NAMESPACE_ID::internal::OnShutdownDestroyMessage(ptr); } - ::sentencepiece::SentencePieceText_SentencePiece::InitAsDefaultInstance(); } -::google::protobuf::internal::SCCInfo<0> scc_info_SentencePieceText_SentencePiece = - {{ATOMIC_VAR_INIT(::google::protobuf::internal::SCCInfoBase::kUninitialized), 0, InitDefaultsSentencePieceText_SentencePiece}, {}}; +::PROTOBUF_NAMESPACE_ID::internal::SCCInfo<1> scc_info_NBestSentencePieceText_sentencepiece_2eproto = + {{ATOMIC_VAR_INIT(::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase::kUninitialized), 1, 0, InitDefaultsscc_info_NBestSentencePieceText_sentencepiece_2eproto}, { + &scc_info_SentencePieceText_sentencepiece_2eproto.base,}}; -static void InitDefaultsSentencePieceText() { +static void InitDefaultsscc_info_SentencePieceText_sentencepiece_2eproto() { GOOGLE_PROTOBUF_VERIFY_VERSION; { void* ptr = &::sentencepiece::_SentencePieceText_default_instance_; new (ptr) ::sentencepiece::SentencePieceText(); - ::google::protobuf::internal::OnShutdownDestroyMessage(ptr); + ::PROTOBUF_NAMESPACE_ID::internal::OnShutdownDestroyMessage(ptr); } - ::sentencepiece::SentencePieceText::InitAsDefaultInstance(); } -::google::protobuf::internal::SCCInfo<1> scc_info_SentencePieceText = - {{ATOMIC_VAR_INIT(::google::protobuf::internal::SCCInfoBase::kUninitialized), 1, InitDefaultsSentencePieceText}, { - &protobuf_sentencepiece_2eproto::scc_info_SentencePieceText_SentencePiece.base,}}; +::PROTOBUF_NAMESPACE_ID::internal::SCCInfo<1> scc_info_SentencePieceText_sentencepiece_2eproto = + {{ATOMIC_VAR_INIT(::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase::kUninitialized), 1, 0, InitDefaultsscc_info_SentencePieceText_sentencepiece_2eproto}, { + &scc_info_SentencePieceText_SentencePiece_sentencepiece_2eproto.base,}}; -static void InitDefaultsNBestSentencePieceText() { +static void InitDefaultsscc_info_SentencePieceText_SentencePiece_sentencepiece_2eproto() { GOOGLE_PROTOBUF_VERIFY_VERSION; { - void* ptr = &::sentencepiece::_NBestSentencePieceText_default_instance_; - new (ptr) ::sentencepiece::NBestSentencePieceText(); - ::google::protobuf::internal::OnShutdownDestroyMessage(ptr); + void* ptr = &::sentencepiece::_SentencePieceText_SentencePiece_default_instance_; + new (ptr) ::sentencepiece::SentencePieceText_SentencePiece(); + ::PROTOBUF_NAMESPACE_ID::internal::OnShutdownDestroyMessage(ptr); } - ::sentencepiece::NBestSentencePieceText::InitAsDefaultInstance(); } -::google::protobuf::internal::SCCInfo<1> scc_info_NBestSentencePieceText = - {{ATOMIC_VAR_INIT(::google::protobuf::internal::SCCInfoBase::kUninitialized), 1, InitDefaultsNBestSentencePieceText}, { - &protobuf_sentencepiece_2eproto::scc_info_SentencePieceText.base,}}; - -void InitDefaults() { - ::google::protobuf::internal::InitSCC(&scc_info_SentencePieceText_SentencePiece.base); - ::google::protobuf::internal::InitSCC(&scc_info_SentencePieceText.base); - ::google::protobuf::internal::InitSCC(&scc_info_NBestSentencePieceText.base); -} +::PROTOBUF_NAMESPACE_ID::internal::SCCInfo<0> scc_info_SentencePieceText_SentencePiece_sentencepiece_2eproto = + {{ATOMIC_VAR_INIT(::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase::kUninitialized), 0, 0, InitDefaultsscc_info_SentencePieceText_SentencePiece_sentencepiece_2eproto}, {}}; -} // namespace protobuf_sentencepiece_2eproto namespace sentencepiece { // =================================================================== -void SentencePieceText_SentencePiece::InitAsDefaultInstance() { -} -#if !defined(_MSC_VER) || _MSC_VER >= 1900 -const int SentencePieceText_SentencePiece::kPieceFieldNumber; -const int SentencePieceText_SentencePiece::kIdFieldNumber; -const int SentencePieceText_SentencePiece::kSurfaceFieldNumber; -const int SentencePieceText_SentencePiece::kBeginFieldNumber; -const int SentencePieceText_SentencePiece::kEndFieldNumber; -#endif // !defined(_MSC_VER) || _MSC_VER >= 1900 +class SentencePieceText_SentencePiece::_Internal { + public: + using HasBits = decltype(std::declval()._has_bits_); + static void set_has_piece(HasBits* has_bits) { + (*has_bits)[0] |= 1u; + } + static void set_has_id(HasBits* has_bits) { + (*has_bits)[0] |= 4u; + } + static void set_has_surface(HasBits* has_bits) { + (*has_bits)[0] |= 2u; + } + static void set_has_begin(HasBits* has_bits) { + (*has_bits)[0] |= 8u; + } + static void set_has_end(HasBits* has_bits) { + (*has_bits)[0] |= 16u; + } +}; -SentencePieceText_SentencePiece::SentencePieceText_SentencePiece() - : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) { - ::google::protobuf::internal::InitSCC( - &protobuf_sentencepiece_2eproto::scc_info_SentencePieceText_SentencePiece.base); +SentencePieceText_SentencePiece::SentencePieceText_SentencePiece(::PROTOBUF_NAMESPACE_ID::Arena* arena) + : ::PROTOBUF_NAMESPACE_ID::MessageLite(arena), + _extensions_(arena) { SharedCtor(); - // @@protoc_insertion_point(constructor:sentencepiece.SentencePieceText.SentencePiece) + RegisterArenaDtor(arena); + // @@protoc_insertion_point(arena_constructor:sentencepiece.SentencePieceText.SentencePiece) } SentencePieceText_SentencePiece::SentencePieceText_SentencePiece(const SentencePieceText_SentencePiece& from) - : ::google::protobuf::MessageLite(), - _internal_metadata_(NULL), + : ::PROTOBUF_NAMESPACE_ID::MessageLite(), _has_bits_(from._has_bits_) { - _internal_metadata_.MergeFrom(from._internal_metadata_); + _internal_metadata_.MergeFrom(from._internal_metadata_); _extensions_.MergeFrom(from._extensions_); - piece_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - if (from.has_piece()) { - piece_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.piece_); + piece_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + if (from._internal_has_piece()) { + piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, from._internal_piece(), + GetArena()); } - surface_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - if (from.has_surface()) { - surface_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.surface_); + surface_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + if (from._internal_has_surface()) { + surface_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, from._internal_surface(), + GetArena()); } ::memcpy(&id_, &from.id_, static_cast(reinterpret_cast(&end_) - @@ -131,206 +121,191 @@ SentencePieceText_SentencePiece::SentencePieceText_SentencePiece(const SentenceP } void SentencePieceText_SentencePiece::SharedCtor() { - piece_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - surface_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - ::memset(&id_, 0, static_cast( - reinterpret_cast(&end_) - + ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&scc_info_SentencePieceText_SentencePiece_sentencepiece_2eproto.base); + piece_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + surface_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + ::memset(reinterpret_cast(this) + static_cast( + reinterpret_cast(&id_) - reinterpret_cast(this)), + 0, static_cast(reinterpret_cast(&end_) - reinterpret_cast(&id_)) + sizeof(end_)); } SentencePieceText_SentencePiece::~SentencePieceText_SentencePiece() { // @@protoc_insertion_point(destructor:sentencepiece.SentencePieceText.SentencePiece) SharedDtor(); + _internal_metadata_.Delete(); } void SentencePieceText_SentencePiece::SharedDtor() { - piece_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - surface_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); + GOOGLE_DCHECK(GetArena() == nullptr); + piece_.DestroyNoArena(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + surface_.DestroyNoArena(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); } +void SentencePieceText_SentencePiece::ArenaDtor(void* object) { + SentencePieceText_SentencePiece* _this = reinterpret_cast< SentencePieceText_SentencePiece* >(object); + (void)_this; +} +void SentencePieceText_SentencePiece::RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena*) { +} void SentencePieceText_SentencePiece::SetCachedSize(int size) const { _cached_size_.Set(size); } const SentencePieceText_SentencePiece& SentencePieceText_SentencePiece::default_instance() { - ::google::protobuf::internal::InitSCC(&protobuf_sentencepiece_2eproto::scc_info_SentencePieceText_SentencePiece.base); + ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&::scc_info_SentencePieceText_SentencePiece_sentencepiece_2eproto.base); return *internal_default_instance(); } void SentencePieceText_SentencePiece::Clear() { // @@protoc_insertion_point(message_clear_start:sentencepiece.SentencePieceText.SentencePiece) - ::google::protobuf::uint32 cached_has_bits = 0; + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; _extensions_.Clear(); cached_has_bits = _has_bits_[0]; - if (cached_has_bits & 3u) { + if (cached_has_bits & 0x00000003u) { if (cached_has_bits & 0x00000001u) { - piece_.ClearNonDefaultToEmptyNoArena(); + piece_.ClearNonDefaultToEmpty(); } if (cached_has_bits & 0x00000002u) { - surface_.ClearNonDefaultToEmptyNoArena(); + surface_.ClearNonDefaultToEmpty(); } } - if (cached_has_bits & 28u) { + if (cached_has_bits & 0x0000001cu) { ::memset(&id_, 0, static_cast( reinterpret_cast(&end_) - reinterpret_cast(&id_)) + sizeof(end_)); } _has_bits_.Clear(); - _internal_metadata_.Clear(); -} - -bool SentencePieceText_SentencePiece::MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input) { -#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure - ::google::protobuf::uint32 tag; - ::google::protobuf::internal::LiteUnknownFieldSetter unknown_fields_setter( - &_internal_metadata_); - ::google::protobuf::io::StringOutputStream unknown_fields_output( - unknown_fields_setter.buffer()); - ::google::protobuf::io::CodedOutputStream unknown_fields_stream( - &unknown_fields_output, false); - // @@protoc_insertion_point(parse_start:sentencepiece.SentencePieceText.SentencePiece) - for (;;) { - ::std::pair<::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u); - tag = p.first; - if (!p.second) goto handle_unusual; - switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) { - // optional string piece = 1; - case 1: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(10u /* 10 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->mutable_piece())); - } else { - goto handle_unusual; - } - break; - } + _internal_metadata_.Clear(); +} +const char* SentencePieceText_SentencePiece::_InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) { +#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure + _Internal::HasBits has_bits{}; + while (!ctx->Done(&ptr)) { + ::PROTOBUF_NAMESPACE_ID::uint32 tag; + ptr = ::PROTOBUF_NAMESPACE_ID::internal::ReadTag(ptr, &tag); + CHK_(ptr); + switch (tag >> 3) { + // optional string piece = 1; + case 1: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 10)) { + auto str = _internal_mutable_piece(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional uint32 id = 2; - case 2: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(16u /* 16 & 0xFF */)) { - set_has_id(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::uint32, ::google::protobuf::internal::WireFormatLite::TYPE_UINT32>( - input, &id_))); - } else { - goto handle_unusual; - } - break; - } - + case 2: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 16)) { + _Internal::set_has_id(&has_bits); + id_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint32(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional string surface = 3; - case 3: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(26u /* 26 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->mutable_surface())); - } else { - goto handle_unusual; - } - break; - } - + case 3: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 26)) { + auto str = _internal_mutable_surface(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional uint32 begin = 4; - case 4: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(32u /* 32 & 0xFF */)) { - set_has_begin(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::uint32, ::google::protobuf::internal::WireFormatLite::TYPE_UINT32>( - input, &begin_))); - } else { - goto handle_unusual; - } - break; - } - + case 4: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 32)) { + _Internal::set_has_begin(&has_bits); + begin_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint32(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional uint32 end = 5; - case 5: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(40u /* 40 & 0xFF */)) { - set_has_end(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::uint32, ::google::protobuf::internal::WireFormatLite::TYPE_UINT32>( - input, &end_))); - } else { - goto handle_unusual; - } - break; - } - + case 5: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 40)) { + _Internal::set_has_end(&has_bits); + end_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint32(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; default: { handle_unusual: - if (tag == 0) { + if ((tag & 7) == 4 || tag == 0) { + ctx->SetLastTag(tag); goto success; } - if ((1600u <= tag)) { - DO_(_extensions_.ParseField(tag, input, - internal_default_instance(), - &unknown_fields_stream)); - continue; - } - DO_(::google::protobuf::internal::WireFormatLite::SkipField( - input, tag, &unknown_fields_stream)); - break; + if ((1600u <= tag)) { + ptr = _extensions_.ParseField(tag, ptr, + internal_default_instance(), &_internal_metadata_, ctx); + CHK_(ptr != nullptr); + continue; } - } - } + ptr = UnknownFieldParse(tag, + _internal_metadata_.mutable_unknown_fields(), + ptr, ctx); + CHK_(ptr != nullptr); + continue; + } + } // switch + } // while success: - // @@protoc_insertion_point(parse_success:sentencepiece.SentencePieceText.SentencePiece) - return true; + _has_bits_.Or(has_bits); + return ptr; failure: - // @@protoc_insertion_point(parse_failure:sentencepiece.SentencePieceText.SentencePiece) - return false; -#undef DO_ + ptr = nullptr; + goto success; +#undef CHK_ } -void SentencePieceText_SentencePiece::SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const { - // @@protoc_insertion_point(serialize_start:sentencepiece.SentencePieceText.SentencePiece) - ::google::protobuf::uint32 cached_has_bits = 0; +::PROTOBUF_NAMESPACE_ID::uint8* SentencePieceText_SentencePiece::_InternalSerialize( + ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const { + // @@protoc_insertion_point(serialize_to_array_start:sentencepiece.SentencePieceText.SentencePiece) + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; cached_has_bits = _has_bits_[0]; // optional string piece = 1; if (cached_has_bits & 0x00000001u) { - ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased( - 1, this->piece(), output); + target = stream->WriteStringMaybeAliased( + 1, this->_internal_piece(), target); } // optional uint32 id = 2; if (cached_has_bits & 0x00000004u) { - ::google::protobuf::internal::WireFormatLite::WriteUInt32(2, this->id(), output); + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteUInt32ToArray(2, this->_internal_id(), target); } // optional string surface = 3; if (cached_has_bits & 0x00000002u) { - ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased( - 3, this->surface(), output); + target = stream->WriteStringMaybeAliased( + 3, this->_internal_surface(), target); } // optional uint32 begin = 4; if (cached_has_bits & 0x00000008u) { - ::google::protobuf::internal::WireFormatLite::WriteUInt32(4, this->begin(), output); + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteUInt32ToArray(4, this->_internal_begin(), target); } // optional uint32 end = 5; if (cached_has_bits & 0x00000010u) { - ::google::protobuf::internal::WireFormatLite::WriteUInt32(5, this->end(), output); + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteUInt32ToArray(5, this->_internal_end(), target); } // Extension range [200, 536870912) - _extensions_.SerializeWithCachedSizes( - 200, 536870912, output); + target = _extensions_._InternalSerialize( + 200, 536870912, target, stream); - output->WriteRaw(_internal_metadata_.unknown_fields().data(), - static_cast(_internal_metadata_.unknown_fields().size())); - // @@protoc_insertion_point(serialize_end:sentencepiece.SentencePieceText.SentencePiece) + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + target = stream->WriteRaw(_internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).data(), + static_cast(_internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).size()), target); + } + // @@protoc_insertion_point(serialize_to_array_end:sentencepiece.SentencePieceText.SentencePiece) + return target; } size_t SentencePieceText_SentencePiece::ByteSizeLong() const { @@ -339,72 +314,77 @@ size_t SentencePieceText_SentencePiece::ByteSizeLong() const { total_size += _extensions_.ByteSize(); - total_size += _internal_metadata_.unknown_fields().size(); + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; + // Prevent compiler warnings about cached_has_bits being unused + (void) cached_has_bits; - if (_has_bits_[0 / 32] & 31u) { + cached_has_bits = _has_bits_[0]; + if (cached_has_bits & 0x0000001fu) { // optional string piece = 1; - if (has_piece()) { + if (cached_has_bits & 0x00000001u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::StringSize( - this->piece()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( + this->_internal_piece()); } // optional string surface = 3; - if (has_surface()) { + if (cached_has_bits & 0x00000002u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::StringSize( - this->surface()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( + this->_internal_surface()); } // optional uint32 id = 2; - if (has_id()) { + if (cached_has_bits & 0x00000004u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::UInt32Size( - this->id()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::UInt32Size( + this->_internal_id()); } // optional uint32 begin = 4; - if (has_begin()) { + if (cached_has_bits & 0x00000008u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::UInt32Size( - this->begin()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::UInt32Size( + this->_internal_begin()); } // optional uint32 end = 5; - if (has_end()) { + if (cached_has_bits & 0x00000010u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::UInt32Size( - this->end()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::UInt32Size( + this->_internal_end()); } } - int cached_size = ::google::protobuf::internal::ToCachedSize(total_size); + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + total_size += _internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).size(); + } + int cached_size = ::PROTOBUF_NAMESPACE_ID::internal::ToCachedSize(total_size); SetCachedSize(cached_size); return total_size; } void SentencePieceText_SentencePiece::CheckTypeAndMergeFrom( - const ::google::protobuf::MessageLite& from) { - MergeFrom(*::google::protobuf::down_cast(&from)); + const ::PROTOBUF_NAMESPACE_ID::MessageLite& from) { + MergeFrom(*::PROTOBUF_NAMESPACE_ID::internal::DownCast( + &from)); } void SentencePieceText_SentencePiece::MergeFrom(const SentencePieceText_SentencePiece& from) { // @@protoc_insertion_point(class_specific_merge_from_start:sentencepiece.SentencePieceText.SentencePiece) GOOGLE_DCHECK_NE(&from, this); _extensions_.MergeFrom(from._extensions_); - _internal_metadata_.MergeFrom(from._internal_metadata_); - ::google::protobuf::uint32 cached_has_bits = 0; + _internal_metadata_.MergeFrom(from._internal_metadata_); + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; cached_has_bits = from._has_bits_[0]; - if (cached_has_bits & 31u) { + if (cached_has_bits & 0x0000001fu) { if (cached_has_bits & 0x00000001u) { - set_has_piece(); - piece_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.piece_); + _internal_set_piece(from._internal_piece()); } if (cached_has_bits & 0x00000002u) { - set_has_surface(); - surface_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.surface_); + _internal_set_surface(from._internal_surface()); } if (cached_has_bits & 0x00000004u) { id_ = from.id_; @@ -434,87 +414,97 @@ bool SentencePieceText_SentencePiece::IsInitialized() const { return true; } -void SentencePieceText_SentencePiece::Swap(SentencePieceText_SentencePiece* other) { - if (other == this) return; - InternalSwap(other); -} void SentencePieceText_SentencePiece::InternalSwap(SentencePieceText_SentencePiece* other) { using std::swap; - piece_.Swap(&other->piece_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(), - GetArenaNoVirtual()); - surface_.Swap(&other->surface_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(), - GetArenaNoVirtual()); - swap(id_, other->id_); - swap(begin_, other->begin_); - swap(end_, other->end_); - swap(_has_bits_[0], other->_has_bits_[0]); - _internal_metadata_.Swap(&other->_internal_metadata_); _extensions_.Swap(&other->_extensions_); + _internal_metadata_.Swap(&other->_internal_metadata_); + swap(_has_bits_[0], other->_has_bits_[0]); + piece_.Swap(&other->piece_, &::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); + surface_.Swap(&other->surface_, &::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); + ::PROTOBUF_NAMESPACE_ID::internal::memswap< + PROTOBUF_FIELD_OFFSET(SentencePieceText_SentencePiece, end_) + + sizeof(SentencePieceText_SentencePiece::end_) + - PROTOBUF_FIELD_OFFSET(SentencePieceText_SentencePiece, id_)>( + reinterpret_cast(&id_), + reinterpret_cast(&other->id_)); } -::std::string SentencePieceText_SentencePiece::GetTypeName() const { +std::string SentencePieceText_SentencePiece::GetTypeName() const { return "sentencepiece.SentencePieceText.SentencePiece"; } // =================================================================== -void SentencePieceText::InitAsDefaultInstance() { -} -#if !defined(_MSC_VER) || _MSC_VER >= 1900 -const int SentencePieceText::kTextFieldNumber; -const int SentencePieceText::kPiecesFieldNumber; -const int SentencePieceText::kScoreFieldNumber; -#endif // !defined(_MSC_VER) || _MSC_VER >= 1900 +class SentencePieceText::_Internal { + public: + using HasBits = decltype(std::declval()._has_bits_); + static void set_has_text(HasBits* has_bits) { + (*has_bits)[0] |= 1u; + } + static void set_has_score(HasBits* has_bits) { + (*has_bits)[0] |= 2u; + } +}; -SentencePieceText::SentencePieceText() - : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) { - ::google::protobuf::internal::InitSCC( - &protobuf_sentencepiece_2eproto::scc_info_SentencePieceText.base); +SentencePieceText::SentencePieceText(::PROTOBUF_NAMESPACE_ID::Arena* arena) + : ::PROTOBUF_NAMESPACE_ID::MessageLite(arena), + _extensions_(arena), + pieces_(arena) { SharedCtor(); - // @@protoc_insertion_point(constructor:sentencepiece.SentencePieceText) + RegisterArenaDtor(arena); + // @@protoc_insertion_point(arena_constructor:sentencepiece.SentencePieceText) } SentencePieceText::SentencePieceText(const SentencePieceText& from) - : ::google::protobuf::MessageLite(), - _internal_metadata_(NULL), + : ::PROTOBUF_NAMESPACE_ID::MessageLite(), _has_bits_(from._has_bits_), pieces_(from.pieces_) { - _internal_metadata_.MergeFrom(from._internal_metadata_); + _internal_metadata_.MergeFrom(from._internal_metadata_); _extensions_.MergeFrom(from._extensions_); - text_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - if (from.has_text()) { - text_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.text_); + text_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + if (from._internal_has_text()) { + text_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, from._internal_text(), + GetArena()); } score_ = from.score_; // @@protoc_insertion_point(copy_constructor:sentencepiece.SentencePieceText) } void SentencePieceText::SharedCtor() { - text_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); + ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&scc_info_SentencePieceText_sentencepiece_2eproto.base); + text_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); score_ = 0; } SentencePieceText::~SentencePieceText() { // @@protoc_insertion_point(destructor:sentencepiece.SentencePieceText) SharedDtor(); + _internal_metadata_.Delete(); } void SentencePieceText::SharedDtor() { - text_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); + GOOGLE_DCHECK(GetArena() == nullptr); + text_.DestroyNoArena(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); } +void SentencePieceText::ArenaDtor(void* object) { + SentencePieceText* _this = reinterpret_cast< SentencePieceText* >(object); + (void)_this; +} +void SentencePieceText::RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena*) { +} void SentencePieceText::SetCachedSize(int size) const { _cached_size_.Set(size); } const SentencePieceText& SentencePieceText::default_instance() { - ::google::protobuf::internal::InitSCC(&protobuf_sentencepiece_2eproto::scc_info_SentencePieceText.base); + ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&::scc_info_SentencePieceText_sentencepiece_2eproto.base); return *internal_default_instance(); } void SentencePieceText::Clear() { // @@protoc_insertion_point(message_clear_start:sentencepiece.SentencePieceText) - ::google::protobuf::uint32 cached_has_bits = 0; + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; @@ -522,127 +512,115 @@ void SentencePieceText::Clear() { pieces_.Clear(); cached_has_bits = _has_bits_[0]; if (cached_has_bits & 0x00000001u) { - text_.ClearNonDefaultToEmptyNoArena(); + text_.ClearNonDefaultToEmpty(); } score_ = 0; _has_bits_.Clear(); - _internal_metadata_.Clear(); -} - -bool SentencePieceText::MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input) { -#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure - ::google::protobuf::uint32 tag; - ::google::protobuf::internal::LiteUnknownFieldSetter unknown_fields_setter( - &_internal_metadata_); - ::google::protobuf::io::StringOutputStream unknown_fields_output( - unknown_fields_setter.buffer()); - ::google::protobuf::io::CodedOutputStream unknown_fields_stream( - &unknown_fields_output, false); - // @@protoc_insertion_point(parse_start:sentencepiece.SentencePieceText) - for (;;) { - ::std::pair<::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u); - tag = p.first; - if (!p.second) goto handle_unusual; - switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) { - // optional string text = 1; - case 1: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(10u /* 10 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->mutable_text())); - } else { - goto handle_unusual; - } - break; - } + _internal_metadata_.Clear(); +} +const char* SentencePieceText::_InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) { +#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure + _Internal::HasBits has_bits{}; + while (!ctx->Done(&ptr)) { + ::PROTOBUF_NAMESPACE_ID::uint32 tag; + ptr = ::PROTOBUF_NAMESPACE_ID::internal::ReadTag(ptr, &tag); + CHK_(ptr); + switch (tag >> 3) { + // optional string text = 1; + case 1: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 10)) { + auto str = _internal_mutable_text(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + } else goto handle_unusual; + continue; // repeated .sentencepiece.SentencePieceText.SentencePiece pieces = 2; - case 2: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(18u /* 18 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadMessage( - input, add_pieces())); - } else { - goto handle_unusual; - } - break; - } - + case 2: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 18)) { + ptr -= 1; + do { + ptr += 1; + ptr = ctx->ParseMessage(_internal_add_pieces(), ptr); + CHK_(ptr); + if (!ctx->DataAvailable(ptr)) break; + } while (::PROTOBUF_NAMESPACE_ID::internal::ExpectTag<18>(ptr)); + } else goto handle_unusual; + continue; // optional float score = 3; - case 3: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(29u /* 29 & 0xFF */)) { - set_has_score(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>( - input, &score_))); - } else { - goto handle_unusual; - } - break; - } - + case 3: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 29)) { + _Internal::set_has_score(&has_bits); + score_ = ::PROTOBUF_NAMESPACE_ID::internal::UnalignedLoad(ptr); + ptr += sizeof(float); + } else goto handle_unusual; + continue; default: { handle_unusual: - if (tag == 0) { + if ((tag & 7) == 4 || tag == 0) { + ctx->SetLastTag(tag); goto success; } - if ((1600u <= tag)) { - DO_(_extensions_.ParseField(tag, input, - internal_default_instance(), - &unknown_fields_stream)); - continue; - } - DO_(::google::protobuf::internal::WireFormatLite::SkipField( - input, tag, &unknown_fields_stream)); - break; + if ((1600u <= tag)) { + ptr = _extensions_.ParseField(tag, ptr, + internal_default_instance(), &_internal_metadata_, ctx); + CHK_(ptr != nullptr); + continue; } - } - } + ptr = UnknownFieldParse(tag, + _internal_metadata_.mutable_unknown_fields(), + ptr, ctx); + CHK_(ptr != nullptr); + continue; + } + } // switch + } // while success: - // @@protoc_insertion_point(parse_success:sentencepiece.SentencePieceText) - return true; + _has_bits_.Or(has_bits); + return ptr; failure: - // @@protoc_insertion_point(parse_failure:sentencepiece.SentencePieceText) - return false; -#undef DO_ + ptr = nullptr; + goto success; +#undef CHK_ } -void SentencePieceText::SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const { - // @@protoc_insertion_point(serialize_start:sentencepiece.SentencePieceText) - ::google::protobuf::uint32 cached_has_bits = 0; +::PROTOBUF_NAMESPACE_ID::uint8* SentencePieceText::_InternalSerialize( + ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const { + // @@protoc_insertion_point(serialize_to_array_start:sentencepiece.SentencePieceText) + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; cached_has_bits = _has_bits_[0]; // optional string text = 1; if (cached_has_bits & 0x00000001u) { - ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased( - 1, this->text(), output); + target = stream->WriteStringMaybeAliased( + 1, this->_internal_text(), target); } // repeated .sentencepiece.SentencePieceText.SentencePiece pieces = 2; for (unsigned int i = 0, - n = static_cast(this->pieces_size()); i < n; i++) { - ::google::protobuf::internal::WireFormatLite::WriteMessage( - 2, - this->pieces(static_cast(i)), - output); + n = static_cast(this->_internal_pieces_size()); i < n; i++) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite:: + InternalWriteMessage(2, this->_internal_pieces(i), target, stream); } // optional float score = 3; if (cached_has_bits & 0x00000002u) { - ::google::protobuf::internal::WireFormatLite::WriteFloat(3, this->score(), output); + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteFloatToArray(3, this->_internal_score(), target); } // Extension range [200, 536870912) - _extensions_.SerializeWithCachedSizes( - 200, 536870912, output); + target = _extensions_._InternalSerialize( + 200, 536870912, target, stream); - output->WriteRaw(_internal_metadata_.unknown_fields().data(), - static_cast(_internal_metadata_.unknown_fields().size())); - // @@protoc_insertion_point(serialize_end:sentencepiece.SentencePieceText) + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + target = stream->WriteRaw(_internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).data(), + static_cast(_internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).size()), target); + } + // @@protoc_insertion_point(serialize_to_array_end:sentencepiece.SentencePieceText) + return target; } size_t SentencePieceText::ByteSizeLong() const { @@ -651,57 +629,59 @@ size_t SentencePieceText::ByteSizeLong() const { total_size += _extensions_.ByteSize(); - total_size += _internal_metadata_.unknown_fields().size(); + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; + // Prevent compiler warnings about cached_has_bits being unused + (void) cached_has_bits; // repeated .sentencepiece.SentencePieceText.SentencePiece pieces = 2; - { - unsigned int count = static_cast(this->pieces_size()); - total_size += 1UL * count; - for (unsigned int i = 0; i < count; i++) { - total_size += - ::google::protobuf::internal::WireFormatLite::MessageSize( - this->pieces(static_cast(i))); - } + total_size += 1UL * this->_internal_pieces_size(); + for (const auto& msg : this->pieces_) { + total_size += + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::MessageSize(msg); } - if (_has_bits_[0 / 32] & 3u) { + cached_has_bits = _has_bits_[0]; + if (cached_has_bits & 0x00000003u) { // optional string text = 1; - if (has_text()) { + if (cached_has_bits & 0x00000001u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::StringSize( - this->text()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( + this->_internal_text()); } // optional float score = 3; - if (has_score()) { + if (cached_has_bits & 0x00000002u) { total_size += 1 + 4; } } - int cached_size = ::google::protobuf::internal::ToCachedSize(total_size); + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + total_size += _internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).size(); + } + int cached_size = ::PROTOBUF_NAMESPACE_ID::internal::ToCachedSize(total_size); SetCachedSize(cached_size); return total_size; } void SentencePieceText::CheckTypeAndMergeFrom( - const ::google::protobuf::MessageLite& from) { - MergeFrom(*::google::protobuf::down_cast(&from)); + const ::PROTOBUF_NAMESPACE_ID::MessageLite& from) { + MergeFrom(*::PROTOBUF_NAMESPACE_ID::internal::DownCast( + &from)); } void SentencePieceText::MergeFrom(const SentencePieceText& from) { // @@protoc_insertion_point(class_specific_merge_from_start:sentencepiece.SentencePieceText) GOOGLE_DCHECK_NE(&from, this); _extensions_.MergeFrom(from._extensions_); - _internal_metadata_.MergeFrom(from._internal_metadata_); - ::google::protobuf::uint32 cached_has_bits = 0; + _internal_metadata_.MergeFrom(from._internal_metadata_); + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; pieces_.MergeFrom(from.pieces_); cached_has_bits = from._has_bits_[0]; - if (cached_has_bits & 3u) { + if (cached_has_bits & 0x00000003u) { if (cached_has_bits & 0x00000001u) { - set_has_text(); - text_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.text_); + _internal_set_text(from._internal_text()); } if (cached_has_bits & 0x00000002u) { score_ = from.score_; @@ -722,185 +702,181 @@ bool SentencePieceText::IsInitialized() const { return false; } - if (!::google::protobuf::internal::AllAreInitialized(this->pieces())) return false; + if (!::PROTOBUF_NAMESPACE_ID::internal::AllAreInitialized(pieces_)) return false; return true; } -void SentencePieceText::Swap(SentencePieceText* other) { - if (other == this) return; - InternalSwap(other); -} void SentencePieceText::InternalSwap(SentencePieceText* other) { using std::swap; - CastToBase(&pieces_)->InternalSwap(CastToBase(&other->pieces_)); - text_.Swap(&other->text_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(), - GetArenaNoVirtual()); - swap(score_, other->score_); - swap(_has_bits_[0], other->_has_bits_[0]); - _internal_metadata_.Swap(&other->_internal_metadata_); _extensions_.Swap(&other->_extensions_); + _internal_metadata_.Swap(&other->_internal_metadata_); + swap(_has_bits_[0], other->_has_bits_[0]); + pieces_.InternalSwap(&other->pieces_); + text_.Swap(&other->text_, &::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); + swap(score_, other->score_); } -::std::string SentencePieceText::GetTypeName() const { +std::string SentencePieceText::GetTypeName() const { return "sentencepiece.SentencePieceText"; } // =================================================================== -void NBestSentencePieceText::InitAsDefaultInstance() { -} -#if !defined(_MSC_VER) || _MSC_VER >= 1900 -const int NBestSentencePieceText::kNbestsFieldNumber; -#endif // !defined(_MSC_VER) || _MSC_VER >= 1900 +class NBestSentencePieceText::_Internal { + public: +}; -NBestSentencePieceText::NBestSentencePieceText() - : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) { - ::google::protobuf::internal::InitSCC( - &protobuf_sentencepiece_2eproto::scc_info_NBestSentencePieceText.base); +NBestSentencePieceText::NBestSentencePieceText(::PROTOBUF_NAMESPACE_ID::Arena* arena) + : ::PROTOBUF_NAMESPACE_ID::MessageLite(arena), + nbests_(arena) { SharedCtor(); - // @@protoc_insertion_point(constructor:sentencepiece.NBestSentencePieceText) + RegisterArenaDtor(arena); + // @@protoc_insertion_point(arena_constructor:sentencepiece.NBestSentencePieceText) } NBestSentencePieceText::NBestSentencePieceText(const NBestSentencePieceText& from) - : ::google::protobuf::MessageLite(), - _internal_metadata_(NULL), - _has_bits_(from._has_bits_), + : ::PROTOBUF_NAMESPACE_ID::MessageLite(), nbests_(from.nbests_) { - _internal_metadata_.MergeFrom(from._internal_metadata_); + _internal_metadata_.MergeFrom(from._internal_metadata_); // @@protoc_insertion_point(copy_constructor:sentencepiece.NBestSentencePieceText) } void NBestSentencePieceText::SharedCtor() { + ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&scc_info_NBestSentencePieceText_sentencepiece_2eproto.base); } NBestSentencePieceText::~NBestSentencePieceText() { // @@protoc_insertion_point(destructor:sentencepiece.NBestSentencePieceText) SharedDtor(); + _internal_metadata_.Delete(); } void NBestSentencePieceText::SharedDtor() { + GOOGLE_DCHECK(GetArena() == nullptr); } +void NBestSentencePieceText::ArenaDtor(void* object) { + NBestSentencePieceText* _this = reinterpret_cast< NBestSentencePieceText* >(object); + (void)_this; +} +void NBestSentencePieceText::RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena*) { +} void NBestSentencePieceText::SetCachedSize(int size) const { _cached_size_.Set(size); } const NBestSentencePieceText& NBestSentencePieceText::default_instance() { - ::google::protobuf::internal::InitSCC(&protobuf_sentencepiece_2eproto::scc_info_NBestSentencePieceText.base); + ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&::scc_info_NBestSentencePieceText_sentencepiece_2eproto.base); return *internal_default_instance(); } void NBestSentencePieceText::Clear() { // @@protoc_insertion_point(message_clear_start:sentencepiece.NBestSentencePieceText) - ::google::protobuf::uint32 cached_has_bits = 0; + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; nbests_.Clear(); - _has_bits_.Clear(); - _internal_metadata_.Clear(); -} - -bool NBestSentencePieceText::MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input) { -#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure - ::google::protobuf::uint32 tag; - ::google::protobuf::internal::LiteUnknownFieldSetter unknown_fields_setter( - &_internal_metadata_); - ::google::protobuf::io::StringOutputStream unknown_fields_output( - unknown_fields_setter.buffer()); - ::google::protobuf::io::CodedOutputStream unknown_fields_stream( - &unknown_fields_output, false); - // @@protoc_insertion_point(parse_start:sentencepiece.NBestSentencePieceText) - for (;;) { - ::std::pair<::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u); - tag = p.first; - if (!p.second) goto handle_unusual; - switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) { - // repeated .sentencepiece.SentencePieceText nbests = 1; - case 1: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(10u /* 10 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadMessage( - input, add_nbests())); - } else { - goto handle_unusual; - } - break; - } + _internal_metadata_.Clear(); +} +const char* NBestSentencePieceText::_InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) { +#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure + while (!ctx->Done(&ptr)) { + ::PROTOBUF_NAMESPACE_ID::uint32 tag; + ptr = ::PROTOBUF_NAMESPACE_ID::internal::ReadTag(ptr, &tag); + CHK_(ptr); + switch (tag >> 3) { + // repeated .sentencepiece.SentencePieceText nbests = 1; + case 1: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 10)) { + ptr -= 1; + do { + ptr += 1; + ptr = ctx->ParseMessage(_internal_add_nbests(), ptr); + CHK_(ptr); + if (!ctx->DataAvailable(ptr)) break; + } while (::PROTOBUF_NAMESPACE_ID::internal::ExpectTag<10>(ptr)); + } else goto handle_unusual; + continue; default: { handle_unusual: - if (tag == 0) { + if ((tag & 7) == 4 || tag == 0) { + ctx->SetLastTag(tag); goto success; } - DO_(::google::protobuf::internal::WireFormatLite::SkipField( - input, tag, &unknown_fields_stream)); - break; + ptr = UnknownFieldParse(tag, + _internal_metadata_.mutable_unknown_fields(), + ptr, ctx); + CHK_(ptr != nullptr); + continue; } - } - } + } // switch + } // while success: - // @@protoc_insertion_point(parse_success:sentencepiece.NBestSentencePieceText) - return true; + return ptr; failure: - // @@protoc_insertion_point(parse_failure:sentencepiece.NBestSentencePieceText) - return false; -#undef DO_ + ptr = nullptr; + goto success; +#undef CHK_ } -void NBestSentencePieceText::SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const { - // @@protoc_insertion_point(serialize_start:sentencepiece.NBestSentencePieceText) - ::google::protobuf::uint32 cached_has_bits = 0; +::PROTOBUF_NAMESPACE_ID::uint8* NBestSentencePieceText::_InternalSerialize( + ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const { + // @@protoc_insertion_point(serialize_to_array_start:sentencepiece.NBestSentencePieceText) + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; // repeated .sentencepiece.SentencePieceText nbests = 1; for (unsigned int i = 0, - n = static_cast(this->nbests_size()); i < n; i++) { - ::google::protobuf::internal::WireFormatLite::WriteMessage( - 1, - this->nbests(static_cast(i)), - output); + n = static_cast(this->_internal_nbests_size()); i < n; i++) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite:: + InternalWriteMessage(1, this->_internal_nbests(i), target, stream); } - output->WriteRaw(_internal_metadata_.unknown_fields().data(), - static_cast(_internal_metadata_.unknown_fields().size())); - // @@protoc_insertion_point(serialize_end:sentencepiece.NBestSentencePieceText) + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + target = stream->WriteRaw(_internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).data(), + static_cast(_internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).size()), target); + } + // @@protoc_insertion_point(serialize_to_array_end:sentencepiece.NBestSentencePieceText) + return target; } size_t NBestSentencePieceText::ByteSizeLong() const { // @@protoc_insertion_point(message_byte_size_start:sentencepiece.NBestSentencePieceText) size_t total_size = 0; - total_size += _internal_metadata_.unknown_fields().size(); + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; + // Prevent compiler warnings about cached_has_bits being unused + (void) cached_has_bits; // repeated .sentencepiece.SentencePieceText nbests = 1; - { - unsigned int count = static_cast(this->nbests_size()); - total_size += 1UL * count; - for (unsigned int i = 0; i < count; i++) { - total_size += - ::google::protobuf::internal::WireFormatLite::MessageSize( - this->nbests(static_cast(i))); - } + total_size += 1UL * this->_internal_nbests_size(); + for (const auto& msg : this->nbests_) { + total_size += + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::MessageSize(msg); } - int cached_size = ::google::protobuf::internal::ToCachedSize(total_size); + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + total_size += _internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).size(); + } + int cached_size = ::PROTOBUF_NAMESPACE_ID::internal::ToCachedSize(total_size); SetCachedSize(cached_size); return total_size; } void NBestSentencePieceText::CheckTypeAndMergeFrom( - const ::google::protobuf::MessageLite& from) { - MergeFrom(*::google::protobuf::down_cast(&from)); + const ::PROTOBUF_NAMESPACE_ID::MessageLite& from) { + MergeFrom(*::PROTOBUF_NAMESPACE_ID::internal::DownCast( + &from)); } void NBestSentencePieceText::MergeFrom(const NBestSentencePieceText& from) { // @@protoc_insertion_point(class_specific_merge_from_start:sentencepiece.NBestSentencePieceText) GOOGLE_DCHECK_NE(&from, this); - _internal_metadata_.MergeFrom(from._internal_metadata_); - ::google::protobuf::uint32 cached_has_bits = 0; + _internal_metadata_.MergeFrom(from._internal_metadata_); + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; nbests_.MergeFrom(from.nbests_); @@ -914,40 +890,34 @@ void NBestSentencePieceText::CopyFrom(const NBestSentencePieceText& from) { } bool NBestSentencePieceText::IsInitialized() const { - if (!::google::protobuf::internal::AllAreInitialized(this->nbests())) return false; + if (!::PROTOBUF_NAMESPACE_ID::internal::AllAreInitialized(nbests_)) return false; return true; } -void NBestSentencePieceText::Swap(NBestSentencePieceText* other) { - if (other == this) return; - InternalSwap(other); -} void NBestSentencePieceText::InternalSwap(NBestSentencePieceText* other) { using std::swap; - CastToBase(&nbests_)->InternalSwap(CastToBase(&other->nbests_)); - swap(_has_bits_[0], other->_has_bits_[0]); - _internal_metadata_.Swap(&other->_internal_metadata_); + _internal_metadata_.Swap(&other->_internal_metadata_); + nbests_.InternalSwap(&other->nbests_); } -::std::string NBestSentencePieceText::GetTypeName() const { +std::string NBestSentencePieceText::GetTypeName() const { return "sentencepiece.NBestSentencePieceText"; } // @@protoc_insertion_point(namespace_scope) } // namespace sentencepiece -namespace google { -namespace protobuf { -template<> GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE ::sentencepiece::SentencePieceText_SentencePiece* Arena::CreateMaybeMessage< ::sentencepiece::SentencePieceText_SentencePiece >(Arena* arena) { - return Arena::CreateInternal< ::sentencepiece::SentencePieceText_SentencePiece >(arena); +PROTOBUF_NAMESPACE_OPEN +template<> PROTOBUF_NOINLINE ::sentencepiece::SentencePieceText_SentencePiece* Arena::CreateMaybeMessage< ::sentencepiece::SentencePieceText_SentencePiece >(Arena* arena) { + return Arena::CreateMessageInternal< ::sentencepiece::SentencePieceText_SentencePiece >(arena); } -template<> GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE ::sentencepiece::SentencePieceText* Arena::CreateMaybeMessage< ::sentencepiece::SentencePieceText >(Arena* arena) { - return Arena::CreateInternal< ::sentencepiece::SentencePieceText >(arena); +template<> PROTOBUF_NOINLINE ::sentencepiece::SentencePieceText* Arena::CreateMaybeMessage< ::sentencepiece::SentencePieceText >(Arena* arena) { + return Arena::CreateMessageInternal< ::sentencepiece::SentencePieceText >(arena); } -template<> GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE ::sentencepiece::NBestSentencePieceText* Arena::CreateMaybeMessage< ::sentencepiece::NBestSentencePieceText >(Arena* arena) { - return Arena::CreateInternal< ::sentencepiece::NBestSentencePieceText >(arena); +template<> PROTOBUF_NOINLINE ::sentencepiece::NBestSentencePieceText* Arena::CreateMaybeMessage< ::sentencepiece::NBestSentencePieceText >(Arena* arena) { + return Arena::CreateMessageInternal< ::sentencepiece::NBestSentencePieceText >(arena); } -} // namespace protobuf -} // namespace google +PROTOBUF_NAMESPACE_CLOSE // @@protoc_insertion_point(global_scope) +#include diff --git a/src/builtin_pb/sentencepiece.pb.h b/src/builtin_pb/sentencepiece.pb.h index 91dc85d9..24ec3370 100644 --- a/src/builtin_pb/sentencepiece.pb.h +++ b/src/builtin_pb/sentencepiece.pb.h @@ -1,48 +1,55 @@ // Generated by the protocol buffer compiler. DO NOT EDIT! // source: sentencepiece.proto -#ifndef PROTOBUF_INCLUDED_sentencepiece_2eproto -#define PROTOBUF_INCLUDED_sentencepiece_2eproto +#ifndef GOOGLE_PROTOBUF_INCLUDED_sentencepiece_2eproto +#define GOOGLE_PROTOBUF_INCLUDED_sentencepiece_2eproto +#include #include -#include - -#if GOOGLE_PROTOBUF_VERSION < 3006001 +#include +#if PROTOBUF_VERSION < 3014000 #error This file was generated by a newer version of protoc which is -#error incompatible with your Protocol Buffer headers. Please update +#error incompatible with your Protocol Buffer headers. Please update #error your headers. #endif -#if 3006001 < GOOGLE_PROTOBUF_MIN_PROTOC_VERSION +#if 3014000 < PROTOBUF_MIN_PROTOC_VERSION #error This file was generated by an older version of protoc which is -#error incompatible with your Protocol Buffer headers. Please +#error incompatible with your Protocol Buffer headers. Please #error regenerate this file with a newer version of protoc. #endif +#include #include #include #include #include #include -#include #include #include #include // IWYU pragma: export #include // IWYU pragma: export // @@protoc_insertion_point(includes) -#define PROTOBUF_INTERNAL_EXPORT_protobuf_sentencepiece_2eproto +#include +#define PROTOBUF_INTERNAL_EXPORT_sentencepiece_2eproto +PROTOBUF_NAMESPACE_OPEN +namespace internal { +class AnyMetadata; +} // namespace internal +PROTOBUF_NAMESPACE_CLOSE -namespace protobuf_sentencepiece_2eproto { // Internal implementation detail -- do not use these members. -struct TableStruct { - static const ::google::protobuf::internal::ParseTableField entries[]; - static const ::google::protobuf::internal::AuxillaryParseTableField aux[]; - static const ::google::protobuf::internal::ParseTable schema[3]; - static const ::google::protobuf::internal::FieldMetadata field_metadata[]; - static const ::google::protobuf::internal::SerializationTable serialization_table[]; - static const ::google::protobuf::uint32 offsets[]; +struct TableStruct_sentencepiece_2eproto { + static const ::PROTOBUF_NAMESPACE_ID::internal::ParseTableField entries[] + PROTOBUF_SECTION_VARIABLE(protodesc_cold); + static const ::PROTOBUF_NAMESPACE_ID::internal::AuxiliaryParseTableField aux[] + PROTOBUF_SECTION_VARIABLE(protodesc_cold); + static const ::PROTOBUF_NAMESPACE_ID::internal::ParseTable schema[3] + PROTOBUF_SECTION_VARIABLE(protodesc_cold); + static const ::PROTOBUF_NAMESPACE_ID::internal::FieldMetadata field_metadata[]; + static const ::PROTOBUF_NAMESPACE_ID::internal::SerializationTable serialization_table[]; + static const ::PROTOBUF_NAMESPACE_ID::uint32 offsets[]; }; -} // namespace protobuf_sentencepiece_2eproto namespace sentencepiece { class NBestSentencePieceText; class NBestSentencePieceTextDefaultTypeInternal; @@ -54,53 +61,49 @@ class SentencePieceText_SentencePiece; class SentencePieceText_SentencePieceDefaultTypeInternal; extern SentencePieceText_SentencePieceDefaultTypeInternal _SentencePieceText_SentencePiece_default_instance_; } // namespace sentencepiece -namespace google { -namespace protobuf { +PROTOBUF_NAMESPACE_OPEN template<> ::sentencepiece::NBestSentencePieceText* Arena::CreateMaybeMessage<::sentencepiece::NBestSentencePieceText>(Arena*); template<> ::sentencepiece::SentencePieceText* Arena::CreateMaybeMessage<::sentencepiece::SentencePieceText>(Arena*); template<> ::sentencepiece::SentencePieceText_SentencePiece* Arena::CreateMaybeMessage<::sentencepiece::SentencePieceText_SentencePiece>(Arena*); -} // namespace protobuf -} // namespace google +PROTOBUF_NAMESPACE_CLOSE namespace sentencepiece { // =================================================================== -class SentencePieceText_SentencePiece : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:sentencepiece.SentencePieceText.SentencePiece) */ { +class SentencePieceText_SentencePiece PROTOBUF_FINAL : + public ::PROTOBUF_NAMESPACE_ID::MessageLite /* @@protoc_insertion_point(class_definition:sentencepiece.SentencePieceText.SentencePiece) */ { public: - SentencePieceText_SentencePiece(); + inline SentencePieceText_SentencePiece() : SentencePieceText_SentencePiece(nullptr) {} virtual ~SentencePieceText_SentencePiece(); SentencePieceText_SentencePiece(const SentencePieceText_SentencePiece& from); - - inline SentencePieceText_SentencePiece& operator=(const SentencePieceText_SentencePiece& from) { - CopyFrom(from); - return *this; - } - #if LANG_CXX11 SentencePieceText_SentencePiece(SentencePieceText_SentencePiece&& from) noexcept : SentencePieceText_SentencePiece() { *this = ::std::move(from); } + inline SentencePieceText_SentencePiece& operator=(const SentencePieceText_SentencePiece& from) { + CopyFrom(from); + return *this; + } inline SentencePieceText_SentencePiece& operator=(SentencePieceText_SentencePiece&& from) noexcept { - if (GetArenaNoVirtual() == from.GetArenaNoVirtual()) { + if (GetArena() == from.GetArena()) { if (this != &from) InternalSwap(&from); } else { CopyFrom(from); } return *this; } - #endif - inline const ::std::string& unknown_fields() const { - return _internal_metadata_.unknown_fields(); + + inline const std::string& unknown_fields() const { + return _internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString); } - inline ::std::string* mutable_unknown_fields() { - return _internal_metadata_.mutable_unknown_fields(); + inline std::string* mutable_unknown_fields() { + return _internal_metadata_.mutable_unknown_fields(); } static const SentencePieceText_SentencePiece& default_instance(); - static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY static inline const SentencePieceText_SentencePiece* internal_default_instance() { return reinterpret_cast( &_SentencePieceText_SentencePiece_default_instance_); @@ -108,170 +111,209 @@ class SentencePieceText_SentencePiece : public ::google::protobuf::MessageLite / static constexpr int kIndexInFileMessages = 0; - void Swap(SentencePieceText_SentencePiece* other); friend void swap(SentencePieceText_SentencePiece& a, SentencePieceText_SentencePiece& b) { a.Swap(&b); } + inline void Swap(SentencePieceText_SentencePiece* other) { + if (other == this) return; + if (GetArena() == other->GetArena()) { + InternalSwap(other); + } else { + ::PROTOBUF_NAMESPACE_ID::internal::GenericSwap(this, other); + } + } + void UnsafeArenaSwap(SentencePieceText_SentencePiece* other) { + if (other == this) return; + GOOGLE_DCHECK(GetArena() == other->GetArena()); + InternalSwap(other); + } // implements Message ---------------------------------------------- inline SentencePieceText_SentencePiece* New() const final { - return CreateMaybeMessage(NULL); + return CreateMaybeMessage(nullptr); } - SentencePieceText_SentencePiece* New(::google::protobuf::Arena* arena) const final { + SentencePieceText_SentencePiece* New(::PROTOBUF_NAMESPACE_ID::Arena* arena) const final { return CreateMaybeMessage(arena); } - void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from) + void CheckTypeAndMergeFrom(const ::PROTOBUF_NAMESPACE_ID::MessageLite& from) final; void CopyFrom(const SentencePieceText_SentencePiece& from); void MergeFrom(const SentencePieceText_SentencePiece& from); - void Clear() final; + PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final; bool IsInitialized() const final; size_t ByteSizeLong() const final; - bool MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input) final; - void SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const final; + const char* _InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) final; + ::PROTOBUF_NAMESPACE_ID::uint8* _InternalSerialize( + ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const final; void DiscardUnknownFields(); int GetCachedSize() const final { return _cached_size_.Get(); } private: - void SharedCtor(); - void SharedDtor(); + inline void SharedCtor(); + inline void SharedDtor(); void SetCachedSize(int size) const; void InternalSwap(SentencePieceText_SentencePiece* other); - private: - inline ::google::protobuf::Arena* GetArenaNoVirtual() const { - return NULL; - } - inline void* MaybeArenaPtr() const { - return NULL; + friend class ::PROTOBUF_NAMESPACE_ID::internal::AnyMetadata; + static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() { + return "sentencepiece.SentencePieceText.SentencePiece"; } + protected: + explicit SentencePieceText_SentencePiece(::PROTOBUF_NAMESPACE_ID::Arena* arena); + private: + static void ArenaDtor(void* object); + inline void RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena* arena); public: - ::std::string GetTypeName() const final; + std::string GetTypeName() const final; // nested types ---------------------------------------------------- // accessors ------------------------------------------------------- + enum : int { + kPieceFieldNumber = 1, + kSurfaceFieldNumber = 3, + kIdFieldNumber = 2, + kBeginFieldNumber = 4, + kEndFieldNumber = 5, + }; // optional string piece = 1; bool has_piece() const; + private: + bool _internal_has_piece() const; + public: void clear_piece(); - static const int kPieceFieldNumber = 1; - const ::std::string& piece() const; - void set_piece(const ::std::string& value); - #if LANG_CXX11 - void set_piece(::std::string&& value); - #endif + const std::string& piece() const; + void set_piece(const std::string& value); + void set_piece(std::string&& value); void set_piece(const char* value); void set_piece(const char* value, size_t size); - ::std::string* mutable_piece(); - ::std::string* release_piece(); - void set_allocated_piece(::std::string* piece); + std::string* mutable_piece(); + std::string* release_piece(); + void set_allocated_piece(std::string* piece); + private: + const std::string& _internal_piece() const; + void _internal_set_piece(const std::string& value); + std::string* _internal_mutable_piece(); + public: // optional string surface = 3; bool has_surface() const; + private: + bool _internal_has_surface() const; + public: void clear_surface(); - static const int kSurfaceFieldNumber = 3; - const ::std::string& surface() const; - void set_surface(const ::std::string& value); - #if LANG_CXX11 - void set_surface(::std::string&& value); - #endif + const std::string& surface() const; + void set_surface(const std::string& value); + void set_surface(std::string&& value); void set_surface(const char* value); void set_surface(const char* value, size_t size); - ::std::string* mutable_surface(); - ::std::string* release_surface(); - void set_allocated_surface(::std::string* surface); + std::string* mutable_surface(); + std::string* release_surface(); + void set_allocated_surface(std::string* surface); + private: + const std::string& _internal_surface() const; + void _internal_set_surface(const std::string& value); + std::string* _internal_mutable_surface(); + public: // optional uint32 id = 2; bool has_id() const; + private: + bool _internal_has_id() const; + public: void clear_id(); - static const int kIdFieldNumber = 2; - ::google::protobuf::uint32 id() const; - void set_id(::google::protobuf::uint32 value); + ::PROTOBUF_NAMESPACE_ID::uint32 id() const; + void set_id(::PROTOBUF_NAMESPACE_ID::uint32 value); + private: + ::PROTOBUF_NAMESPACE_ID::uint32 _internal_id() const; + void _internal_set_id(::PROTOBUF_NAMESPACE_ID::uint32 value); + public: // optional uint32 begin = 4; bool has_begin() const; + private: + bool _internal_has_begin() const; + public: void clear_begin(); - static const int kBeginFieldNumber = 4; - ::google::protobuf::uint32 begin() const; - void set_begin(::google::protobuf::uint32 value); + ::PROTOBUF_NAMESPACE_ID::uint32 begin() const; + void set_begin(::PROTOBUF_NAMESPACE_ID::uint32 value); + private: + ::PROTOBUF_NAMESPACE_ID::uint32 _internal_begin() const; + void _internal_set_begin(::PROTOBUF_NAMESPACE_ID::uint32 value); + public: // optional uint32 end = 5; bool has_end() const; + private: + bool _internal_has_end() const; + public: void clear_end(); - static const int kEndFieldNumber = 5; - ::google::protobuf::uint32 end() const; - void set_end(::google::protobuf::uint32 value); + ::PROTOBUF_NAMESPACE_ID::uint32 end() const; + void set_end(::PROTOBUF_NAMESPACE_ID::uint32 value); + private: + ::PROTOBUF_NAMESPACE_ID::uint32 _internal_end() const; + void _internal_set_end(::PROTOBUF_NAMESPACE_ID::uint32 value); + public: GOOGLE_PROTOBUF_EXTENSION_ACCESSORS(SentencePieceText_SentencePiece) // @@protoc_insertion_point(class_scope:sentencepiece.SentencePieceText.SentencePiece) private: - void set_has_piece(); - void clear_has_piece(); - void set_has_id(); - void clear_has_id(); - void set_has_surface(); - void clear_has_surface(); - void set_has_begin(); - void clear_has_begin(); - void set_has_end(); - void clear_has_end(); - - ::google::protobuf::internal::ExtensionSet _extensions_; - - ::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_; - ::google::protobuf::internal::HasBits<1> _has_bits_; - mutable ::google::protobuf::internal::CachedSize _cached_size_; - ::google::protobuf::internal::ArenaStringPtr piece_; - ::google::protobuf::internal::ArenaStringPtr surface_; - ::google::protobuf::uint32 id_; - ::google::protobuf::uint32 begin_; - ::google::protobuf::uint32 end_; - friend struct ::protobuf_sentencepiece_2eproto::TableStruct; + class _Internal; + + ::PROTOBUF_NAMESPACE_ID::internal::ExtensionSet _extensions_; + + template friend class ::PROTOBUF_NAMESPACE_ID::Arena::InternalHelper; + typedef void InternalArenaConstructable_; + typedef void DestructorSkippable_; + ::PROTOBUF_NAMESPACE_ID::internal::HasBits<1> _has_bits_; + mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr piece_; + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr surface_; + ::PROTOBUF_NAMESPACE_ID::uint32 id_; + ::PROTOBUF_NAMESPACE_ID::uint32 begin_; + ::PROTOBUF_NAMESPACE_ID::uint32 end_; + friend struct ::TableStruct_sentencepiece_2eproto; }; // ------------------------------------------------------------------- -class SentencePieceText : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:sentencepiece.SentencePieceText) */ { +class SentencePieceText PROTOBUF_FINAL : + public ::PROTOBUF_NAMESPACE_ID::MessageLite /* @@protoc_insertion_point(class_definition:sentencepiece.SentencePieceText) */ { public: - SentencePieceText(); + inline SentencePieceText() : SentencePieceText(nullptr) {} virtual ~SentencePieceText(); SentencePieceText(const SentencePieceText& from); - - inline SentencePieceText& operator=(const SentencePieceText& from) { - CopyFrom(from); - return *this; - } - #if LANG_CXX11 SentencePieceText(SentencePieceText&& from) noexcept : SentencePieceText() { *this = ::std::move(from); } + inline SentencePieceText& operator=(const SentencePieceText& from) { + CopyFrom(from); + return *this; + } inline SentencePieceText& operator=(SentencePieceText&& from) noexcept { - if (GetArenaNoVirtual() == from.GetArenaNoVirtual()) { + if (GetArena() == from.GetArena()) { if (this != &from) InternalSwap(&from); } else { CopyFrom(from); } return *this; } - #endif - inline const ::std::string& unknown_fields() const { - return _internal_metadata_.unknown_fields(); + + inline const std::string& unknown_fields() const { + return _internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString); } - inline ::std::string* mutable_unknown_fields() { - return _internal_metadata_.mutable_unknown_fields(); + inline std::string* mutable_unknown_fields() { + return _internal_metadata_.mutable_unknown_fields(); } static const SentencePieceText& default_instance(); - static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY static inline const SentencePieceText* internal_default_instance() { return reinterpret_cast( &_SentencePieceText_default_instance_); @@ -279,50 +321,63 @@ class SentencePieceText : public ::google::protobuf::MessageLite /* @@protoc_ins static constexpr int kIndexInFileMessages = 1; - void Swap(SentencePieceText* other); friend void swap(SentencePieceText& a, SentencePieceText& b) { a.Swap(&b); } + inline void Swap(SentencePieceText* other) { + if (other == this) return; + if (GetArena() == other->GetArena()) { + InternalSwap(other); + } else { + ::PROTOBUF_NAMESPACE_ID::internal::GenericSwap(this, other); + } + } + void UnsafeArenaSwap(SentencePieceText* other) { + if (other == this) return; + GOOGLE_DCHECK(GetArena() == other->GetArena()); + InternalSwap(other); + } // implements Message ---------------------------------------------- inline SentencePieceText* New() const final { - return CreateMaybeMessage(NULL); + return CreateMaybeMessage(nullptr); } - SentencePieceText* New(::google::protobuf::Arena* arena) const final { + SentencePieceText* New(::PROTOBUF_NAMESPACE_ID::Arena* arena) const final { return CreateMaybeMessage(arena); } - void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from) + void CheckTypeAndMergeFrom(const ::PROTOBUF_NAMESPACE_ID::MessageLite& from) final; void CopyFrom(const SentencePieceText& from); void MergeFrom(const SentencePieceText& from); - void Clear() final; + PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final; bool IsInitialized() const final; size_t ByteSizeLong() const final; - bool MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input) final; - void SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const final; + const char* _InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) final; + ::PROTOBUF_NAMESPACE_ID::uint8* _InternalSerialize( + ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const final; void DiscardUnknownFields(); int GetCachedSize() const final { return _cached_size_.Get(); } private: - void SharedCtor(); - void SharedDtor(); + inline void SharedCtor(); + inline void SharedDtor(); void SetCachedSize(int size) const; void InternalSwap(SentencePieceText* other); - private: - inline ::google::protobuf::Arena* GetArenaNoVirtual() const { - return NULL; - } - inline void* MaybeArenaPtr() const { - return NULL; + friend class ::PROTOBUF_NAMESPACE_ID::internal::AnyMetadata; + static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() { + return "sentencepiece.SentencePieceText"; } + protected: + explicit SentencePieceText(::PROTOBUF_NAMESPACE_ID::Arena* arena); + private: + static void ArenaDtor(void* object); + inline void RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena* arena); public: - ::std::string GetTypeName() const final; + std::string GetTypeName() const final; // nested types ---------------------------------------------------- @@ -330,96 +385,115 @@ class SentencePieceText : public ::google::protobuf::MessageLite /* @@protoc_ins // accessors ------------------------------------------------------- + enum : int { + kPiecesFieldNumber = 2, + kTextFieldNumber = 1, + kScoreFieldNumber = 3, + }; // repeated .sentencepiece.SentencePieceText.SentencePiece pieces = 2; int pieces_size() const; + private: + int _internal_pieces_size() const; + public: void clear_pieces(); - static const int kPiecesFieldNumber = 2; ::sentencepiece::SentencePieceText_SentencePiece* mutable_pieces(int index); - ::google::protobuf::RepeatedPtrField< ::sentencepiece::SentencePieceText_SentencePiece >* + ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::SentencePieceText_SentencePiece >* mutable_pieces(); + private: + const ::sentencepiece::SentencePieceText_SentencePiece& _internal_pieces(int index) const; + ::sentencepiece::SentencePieceText_SentencePiece* _internal_add_pieces(); + public: const ::sentencepiece::SentencePieceText_SentencePiece& pieces(int index) const; ::sentencepiece::SentencePieceText_SentencePiece* add_pieces(); - const ::google::protobuf::RepeatedPtrField< ::sentencepiece::SentencePieceText_SentencePiece >& + const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::SentencePieceText_SentencePiece >& pieces() const; // optional string text = 1; bool has_text() const; + private: + bool _internal_has_text() const; + public: void clear_text(); - static const int kTextFieldNumber = 1; - const ::std::string& text() const; - void set_text(const ::std::string& value); - #if LANG_CXX11 - void set_text(::std::string&& value); - #endif + const std::string& text() const; + void set_text(const std::string& value); + void set_text(std::string&& value); void set_text(const char* value); void set_text(const char* value, size_t size); - ::std::string* mutable_text(); - ::std::string* release_text(); - void set_allocated_text(::std::string* text); + std::string* mutable_text(); + std::string* release_text(); + void set_allocated_text(std::string* text); + private: + const std::string& _internal_text() const; + void _internal_set_text(const std::string& value); + std::string* _internal_mutable_text(); + public: // optional float score = 3; bool has_score() const; + private: + bool _internal_has_score() const; + public: void clear_score(); - static const int kScoreFieldNumber = 3; float score() const; void set_score(float value); + private: + float _internal_score() const; + void _internal_set_score(float value); + public: GOOGLE_PROTOBUF_EXTENSION_ACCESSORS(SentencePieceText) // @@protoc_insertion_point(class_scope:sentencepiece.SentencePieceText) private: - void set_has_text(); - void clear_has_text(); - void set_has_score(); - void clear_has_score(); - - ::google::protobuf::internal::ExtensionSet _extensions_; - - ::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_; - ::google::protobuf::internal::HasBits<1> _has_bits_; - mutable ::google::protobuf::internal::CachedSize _cached_size_; - ::google::protobuf::RepeatedPtrField< ::sentencepiece::SentencePieceText_SentencePiece > pieces_; - ::google::protobuf::internal::ArenaStringPtr text_; + class _Internal; + + ::PROTOBUF_NAMESPACE_ID::internal::ExtensionSet _extensions_; + + template friend class ::PROTOBUF_NAMESPACE_ID::Arena::InternalHelper; + typedef void InternalArenaConstructable_; + typedef void DestructorSkippable_; + ::PROTOBUF_NAMESPACE_ID::internal::HasBits<1> _has_bits_; + mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; + ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::SentencePieceText_SentencePiece > pieces_; + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr text_; float score_; - friend struct ::protobuf_sentencepiece_2eproto::TableStruct; + friend struct ::TableStruct_sentencepiece_2eproto; }; // ------------------------------------------------------------------- -class NBestSentencePieceText : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:sentencepiece.NBestSentencePieceText) */ { +class NBestSentencePieceText PROTOBUF_FINAL : + public ::PROTOBUF_NAMESPACE_ID::MessageLite /* @@protoc_insertion_point(class_definition:sentencepiece.NBestSentencePieceText) */ { public: - NBestSentencePieceText(); + inline NBestSentencePieceText() : NBestSentencePieceText(nullptr) {} virtual ~NBestSentencePieceText(); NBestSentencePieceText(const NBestSentencePieceText& from); - - inline NBestSentencePieceText& operator=(const NBestSentencePieceText& from) { - CopyFrom(from); - return *this; - } - #if LANG_CXX11 NBestSentencePieceText(NBestSentencePieceText&& from) noexcept : NBestSentencePieceText() { *this = ::std::move(from); } + inline NBestSentencePieceText& operator=(const NBestSentencePieceText& from) { + CopyFrom(from); + return *this; + } inline NBestSentencePieceText& operator=(NBestSentencePieceText&& from) noexcept { - if (GetArenaNoVirtual() == from.GetArenaNoVirtual()) { + if (GetArena() == from.GetArena()) { if (this != &from) InternalSwap(&from); } else { CopyFrom(from); } return *this; } - #endif - inline const ::std::string& unknown_fields() const { - return _internal_metadata_.unknown_fields(); + + inline const std::string& unknown_fields() const { + return _internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString); } - inline ::std::string* mutable_unknown_fields() { - return _internal_metadata_.mutable_unknown_fields(); + inline std::string* mutable_unknown_fields() { + return _internal_metadata_.mutable_unknown_fields(); } static const NBestSentencePieceText& default_instance(); - static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY static inline const NBestSentencePieceText* internal_default_instance() { return reinterpret_cast( &_NBestSentencePieceText_default_instance_); @@ -427,75 +501,99 @@ class NBestSentencePieceText : public ::google::protobuf::MessageLite /* @@proto static constexpr int kIndexInFileMessages = 2; - void Swap(NBestSentencePieceText* other); friend void swap(NBestSentencePieceText& a, NBestSentencePieceText& b) { a.Swap(&b); } + inline void Swap(NBestSentencePieceText* other) { + if (other == this) return; + if (GetArena() == other->GetArena()) { + InternalSwap(other); + } else { + ::PROTOBUF_NAMESPACE_ID::internal::GenericSwap(this, other); + } + } + void UnsafeArenaSwap(NBestSentencePieceText* other) { + if (other == this) return; + GOOGLE_DCHECK(GetArena() == other->GetArena()); + InternalSwap(other); + } // implements Message ---------------------------------------------- inline NBestSentencePieceText* New() const final { - return CreateMaybeMessage(NULL); + return CreateMaybeMessage(nullptr); } - NBestSentencePieceText* New(::google::protobuf::Arena* arena) const final { + NBestSentencePieceText* New(::PROTOBUF_NAMESPACE_ID::Arena* arena) const final { return CreateMaybeMessage(arena); } - void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from) + void CheckTypeAndMergeFrom(const ::PROTOBUF_NAMESPACE_ID::MessageLite& from) final; void CopyFrom(const NBestSentencePieceText& from); void MergeFrom(const NBestSentencePieceText& from); - void Clear() final; + PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final; bool IsInitialized() const final; size_t ByteSizeLong() const final; - bool MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input) final; - void SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const final; + const char* _InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) final; + ::PROTOBUF_NAMESPACE_ID::uint8* _InternalSerialize( + ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const final; void DiscardUnknownFields(); int GetCachedSize() const final { return _cached_size_.Get(); } private: - void SharedCtor(); - void SharedDtor(); + inline void SharedCtor(); + inline void SharedDtor(); void SetCachedSize(int size) const; void InternalSwap(NBestSentencePieceText* other); - private: - inline ::google::protobuf::Arena* GetArenaNoVirtual() const { - return NULL; - } - inline void* MaybeArenaPtr() const { - return NULL; + friend class ::PROTOBUF_NAMESPACE_ID::internal::AnyMetadata; + static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() { + return "sentencepiece.NBestSentencePieceText"; } + protected: + explicit NBestSentencePieceText(::PROTOBUF_NAMESPACE_ID::Arena* arena); + private: + static void ArenaDtor(void* object); + inline void RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena* arena); public: - ::std::string GetTypeName() const final; + std::string GetTypeName() const final; // nested types ---------------------------------------------------- // accessors ------------------------------------------------------- + enum : int { + kNbestsFieldNumber = 1, + }; // repeated .sentencepiece.SentencePieceText nbests = 1; int nbests_size() const; + private: + int _internal_nbests_size() const; + public: void clear_nbests(); - static const int kNbestsFieldNumber = 1; ::sentencepiece::SentencePieceText* mutable_nbests(int index); - ::google::protobuf::RepeatedPtrField< ::sentencepiece::SentencePieceText >* + ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::SentencePieceText >* mutable_nbests(); + private: + const ::sentencepiece::SentencePieceText& _internal_nbests(int index) const; + ::sentencepiece::SentencePieceText* _internal_add_nbests(); + public: const ::sentencepiece::SentencePieceText& nbests(int index) const; ::sentencepiece::SentencePieceText* add_nbests(); - const ::google::protobuf::RepeatedPtrField< ::sentencepiece::SentencePieceText >& + const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::SentencePieceText >& nbests() const; // @@protoc_insertion_point(class_scope:sentencepiece.NBestSentencePieceText) private: - - ::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_; - ::google::protobuf::internal::HasBits<1> _has_bits_; - mutable ::google::protobuf::internal::CachedSize _cached_size_; - ::google::protobuf::RepeatedPtrField< ::sentencepiece::SentencePieceText > nbests_; - friend struct ::protobuf_sentencepiece_2eproto::TableStruct; + class _Internal; + + template friend class ::PROTOBUF_NAMESPACE_ID::Arena::InternalHelper; + typedef void InternalArenaConstructable_; + typedef void DestructorSkippable_; + ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::SentencePieceText > nbests_; + mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; + friend struct ::TableStruct_sentencepiece_2eproto; }; // =================================================================== @@ -509,206 +607,232 @@ class NBestSentencePieceText : public ::google::protobuf::MessageLite /* @@proto // SentencePieceText_SentencePiece // optional string piece = 1; -inline bool SentencePieceText_SentencePiece::has_piece() const { - return (_has_bits_[0] & 0x00000001u) != 0; +inline bool SentencePieceText_SentencePiece::_internal_has_piece() const { + bool value = (_has_bits_[0] & 0x00000001u) != 0; + return value; } -inline void SentencePieceText_SentencePiece::set_has_piece() { - _has_bits_[0] |= 0x00000001u; -} -inline void SentencePieceText_SentencePiece::clear_has_piece() { - _has_bits_[0] &= ~0x00000001u; +inline bool SentencePieceText_SentencePiece::has_piece() const { + return _internal_has_piece(); } inline void SentencePieceText_SentencePiece::clear_piece() { - piece_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - clear_has_piece(); + piece_.ClearToEmpty(); + _has_bits_[0] &= ~0x00000001u; } -inline const ::std::string& SentencePieceText_SentencePiece::piece() const { +inline const std::string& SentencePieceText_SentencePiece::piece() const { // @@protoc_insertion_point(field_get:sentencepiece.SentencePieceText.SentencePiece.piece) - return piece_.GetNoArena(); + return _internal_piece(); } -inline void SentencePieceText_SentencePiece::set_piece(const ::std::string& value) { - set_has_piece(); - piece_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value); +inline void SentencePieceText_SentencePiece::set_piece(const std::string& value) { + _internal_set_piece(value); // @@protoc_insertion_point(field_set:sentencepiece.SentencePieceText.SentencePiece.piece) } -#if LANG_CXX11 -inline void SentencePieceText_SentencePiece::set_piece(::std::string&& value) { - set_has_piece(); - piece_.SetNoArena( - &::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value)); +inline std::string* SentencePieceText_SentencePiece::mutable_piece() { + // @@protoc_insertion_point(field_mutable:sentencepiece.SentencePieceText.SentencePiece.piece) + return _internal_mutable_piece(); +} +inline const std::string& SentencePieceText_SentencePiece::_internal_piece() const { + return piece_.Get(); +} +inline void SentencePieceText_SentencePiece::_internal_set_piece(const std::string& value) { + _has_bits_[0] |= 0x00000001u; + piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, value, GetArena()); +} +inline void SentencePieceText_SentencePiece::set_piece(std::string&& value) { + _has_bits_[0] |= 0x00000001u; + piece_.Set( + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::move(value), GetArena()); // @@protoc_insertion_point(field_set_rvalue:sentencepiece.SentencePieceText.SentencePiece.piece) } -#endif inline void SentencePieceText_SentencePiece::set_piece(const char* value) { - GOOGLE_DCHECK(value != NULL); - set_has_piece(); - piece_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value)); + GOOGLE_DCHECK(value != nullptr); + _has_bits_[0] |= 0x00000001u; + piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string(value), GetArena()); // @@protoc_insertion_point(field_set_char:sentencepiece.SentencePieceText.SentencePiece.piece) } -inline void SentencePieceText_SentencePiece::set_piece(const char* value, size_t size) { - set_has_piece(); - piece_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(reinterpret_cast(value), size)); +inline void SentencePieceText_SentencePiece::set_piece(const char* value, + size_t size) { + _has_bits_[0] |= 0x00000001u; + piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string( + reinterpret_cast(value), size), GetArena()); // @@protoc_insertion_point(field_set_pointer:sentencepiece.SentencePieceText.SentencePiece.piece) } -inline ::std::string* SentencePieceText_SentencePiece::mutable_piece() { - set_has_piece(); - // @@protoc_insertion_point(field_mutable:sentencepiece.SentencePieceText.SentencePiece.piece) - return piece_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); +inline std::string* SentencePieceText_SentencePiece::_internal_mutable_piece() { + _has_bits_[0] |= 0x00000001u; + return piece_.Mutable(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, GetArena()); } -inline ::std::string* SentencePieceText_SentencePiece::release_piece() { +inline std::string* SentencePieceText_SentencePiece::release_piece() { // @@protoc_insertion_point(field_release:sentencepiece.SentencePieceText.SentencePiece.piece) - if (!has_piece()) { - return NULL; + if (!_internal_has_piece()) { + return nullptr; } - clear_has_piece(); - return piece_.ReleaseNonDefaultNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); + _has_bits_[0] &= ~0x00000001u; + return piece_.ReleaseNonDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); } -inline void SentencePieceText_SentencePiece::set_allocated_piece(::std::string* piece) { - if (piece != NULL) { - set_has_piece(); +inline void SentencePieceText_SentencePiece::set_allocated_piece(std::string* piece) { + if (piece != nullptr) { + _has_bits_[0] |= 0x00000001u; } else { - clear_has_piece(); + _has_bits_[0] &= ~0x00000001u; } - piece_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), piece); + piece_.SetAllocated(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), piece, + GetArena()); // @@protoc_insertion_point(field_set_allocated:sentencepiece.SentencePieceText.SentencePiece.piece) } // optional uint32 id = 2; -inline bool SentencePieceText_SentencePiece::has_id() const { - return (_has_bits_[0] & 0x00000004u) != 0; -} -inline void SentencePieceText_SentencePiece::set_has_id() { - _has_bits_[0] |= 0x00000004u; +inline bool SentencePieceText_SentencePiece::_internal_has_id() const { + bool value = (_has_bits_[0] & 0x00000004u) != 0; + return value; } -inline void SentencePieceText_SentencePiece::clear_has_id() { - _has_bits_[0] &= ~0x00000004u; +inline bool SentencePieceText_SentencePiece::has_id() const { + return _internal_has_id(); } inline void SentencePieceText_SentencePiece::clear_id() { id_ = 0u; - clear_has_id(); + _has_bits_[0] &= ~0x00000004u; } -inline ::google::protobuf::uint32 SentencePieceText_SentencePiece::id() const { - // @@protoc_insertion_point(field_get:sentencepiece.SentencePieceText.SentencePiece.id) +inline ::PROTOBUF_NAMESPACE_ID::uint32 SentencePieceText_SentencePiece::_internal_id() const { return id_; } -inline void SentencePieceText_SentencePiece::set_id(::google::protobuf::uint32 value) { - set_has_id(); +inline ::PROTOBUF_NAMESPACE_ID::uint32 SentencePieceText_SentencePiece::id() const { + // @@protoc_insertion_point(field_get:sentencepiece.SentencePieceText.SentencePiece.id) + return _internal_id(); +} +inline void SentencePieceText_SentencePiece::_internal_set_id(::PROTOBUF_NAMESPACE_ID::uint32 value) { + _has_bits_[0] |= 0x00000004u; id_ = value; +} +inline void SentencePieceText_SentencePiece::set_id(::PROTOBUF_NAMESPACE_ID::uint32 value) { + _internal_set_id(value); // @@protoc_insertion_point(field_set:sentencepiece.SentencePieceText.SentencePiece.id) } // optional string surface = 3; -inline bool SentencePieceText_SentencePiece::has_surface() const { - return (_has_bits_[0] & 0x00000002u) != 0; +inline bool SentencePieceText_SentencePiece::_internal_has_surface() const { + bool value = (_has_bits_[0] & 0x00000002u) != 0; + return value; } -inline void SentencePieceText_SentencePiece::set_has_surface() { - _has_bits_[0] |= 0x00000002u; -} -inline void SentencePieceText_SentencePiece::clear_has_surface() { - _has_bits_[0] &= ~0x00000002u; +inline bool SentencePieceText_SentencePiece::has_surface() const { + return _internal_has_surface(); } inline void SentencePieceText_SentencePiece::clear_surface() { - surface_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - clear_has_surface(); + surface_.ClearToEmpty(); + _has_bits_[0] &= ~0x00000002u; } -inline const ::std::string& SentencePieceText_SentencePiece::surface() const { +inline const std::string& SentencePieceText_SentencePiece::surface() const { // @@protoc_insertion_point(field_get:sentencepiece.SentencePieceText.SentencePiece.surface) - return surface_.GetNoArena(); + return _internal_surface(); } -inline void SentencePieceText_SentencePiece::set_surface(const ::std::string& value) { - set_has_surface(); - surface_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value); +inline void SentencePieceText_SentencePiece::set_surface(const std::string& value) { + _internal_set_surface(value); // @@protoc_insertion_point(field_set:sentencepiece.SentencePieceText.SentencePiece.surface) } -#if LANG_CXX11 -inline void SentencePieceText_SentencePiece::set_surface(::std::string&& value) { - set_has_surface(); - surface_.SetNoArena( - &::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value)); +inline std::string* SentencePieceText_SentencePiece::mutable_surface() { + // @@protoc_insertion_point(field_mutable:sentencepiece.SentencePieceText.SentencePiece.surface) + return _internal_mutable_surface(); +} +inline const std::string& SentencePieceText_SentencePiece::_internal_surface() const { + return surface_.Get(); +} +inline void SentencePieceText_SentencePiece::_internal_set_surface(const std::string& value) { + _has_bits_[0] |= 0x00000002u; + surface_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, value, GetArena()); +} +inline void SentencePieceText_SentencePiece::set_surface(std::string&& value) { + _has_bits_[0] |= 0x00000002u; + surface_.Set( + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::move(value), GetArena()); // @@protoc_insertion_point(field_set_rvalue:sentencepiece.SentencePieceText.SentencePiece.surface) } -#endif inline void SentencePieceText_SentencePiece::set_surface(const char* value) { - GOOGLE_DCHECK(value != NULL); - set_has_surface(); - surface_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value)); + GOOGLE_DCHECK(value != nullptr); + _has_bits_[0] |= 0x00000002u; + surface_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string(value), GetArena()); // @@protoc_insertion_point(field_set_char:sentencepiece.SentencePieceText.SentencePiece.surface) } -inline void SentencePieceText_SentencePiece::set_surface(const char* value, size_t size) { - set_has_surface(); - surface_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(reinterpret_cast(value), size)); +inline void SentencePieceText_SentencePiece::set_surface(const char* value, + size_t size) { + _has_bits_[0] |= 0x00000002u; + surface_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string( + reinterpret_cast(value), size), GetArena()); // @@protoc_insertion_point(field_set_pointer:sentencepiece.SentencePieceText.SentencePiece.surface) } -inline ::std::string* SentencePieceText_SentencePiece::mutable_surface() { - set_has_surface(); - // @@protoc_insertion_point(field_mutable:sentencepiece.SentencePieceText.SentencePiece.surface) - return surface_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); +inline std::string* SentencePieceText_SentencePiece::_internal_mutable_surface() { + _has_bits_[0] |= 0x00000002u; + return surface_.Mutable(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, GetArena()); } -inline ::std::string* SentencePieceText_SentencePiece::release_surface() { +inline std::string* SentencePieceText_SentencePiece::release_surface() { // @@protoc_insertion_point(field_release:sentencepiece.SentencePieceText.SentencePiece.surface) - if (!has_surface()) { - return NULL; + if (!_internal_has_surface()) { + return nullptr; } - clear_has_surface(); - return surface_.ReleaseNonDefaultNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); + _has_bits_[0] &= ~0x00000002u; + return surface_.ReleaseNonDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); } -inline void SentencePieceText_SentencePiece::set_allocated_surface(::std::string* surface) { - if (surface != NULL) { - set_has_surface(); +inline void SentencePieceText_SentencePiece::set_allocated_surface(std::string* surface) { + if (surface != nullptr) { + _has_bits_[0] |= 0x00000002u; } else { - clear_has_surface(); + _has_bits_[0] &= ~0x00000002u; } - surface_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), surface); + surface_.SetAllocated(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), surface, + GetArena()); // @@protoc_insertion_point(field_set_allocated:sentencepiece.SentencePieceText.SentencePiece.surface) } // optional uint32 begin = 4; -inline bool SentencePieceText_SentencePiece::has_begin() const { - return (_has_bits_[0] & 0x00000008u) != 0; -} -inline void SentencePieceText_SentencePiece::set_has_begin() { - _has_bits_[0] |= 0x00000008u; +inline bool SentencePieceText_SentencePiece::_internal_has_begin() const { + bool value = (_has_bits_[0] & 0x00000008u) != 0; + return value; } -inline void SentencePieceText_SentencePiece::clear_has_begin() { - _has_bits_[0] &= ~0x00000008u; +inline bool SentencePieceText_SentencePiece::has_begin() const { + return _internal_has_begin(); } inline void SentencePieceText_SentencePiece::clear_begin() { begin_ = 0u; - clear_has_begin(); + _has_bits_[0] &= ~0x00000008u; } -inline ::google::protobuf::uint32 SentencePieceText_SentencePiece::begin() const { - // @@protoc_insertion_point(field_get:sentencepiece.SentencePieceText.SentencePiece.begin) +inline ::PROTOBUF_NAMESPACE_ID::uint32 SentencePieceText_SentencePiece::_internal_begin() const { return begin_; } -inline void SentencePieceText_SentencePiece::set_begin(::google::protobuf::uint32 value) { - set_has_begin(); +inline ::PROTOBUF_NAMESPACE_ID::uint32 SentencePieceText_SentencePiece::begin() const { + // @@protoc_insertion_point(field_get:sentencepiece.SentencePieceText.SentencePiece.begin) + return _internal_begin(); +} +inline void SentencePieceText_SentencePiece::_internal_set_begin(::PROTOBUF_NAMESPACE_ID::uint32 value) { + _has_bits_[0] |= 0x00000008u; begin_ = value; +} +inline void SentencePieceText_SentencePiece::set_begin(::PROTOBUF_NAMESPACE_ID::uint32 value) { + _internal_set_begin(value); // @@protoc_insertion_point(field_set:sentencepiece.SentencePieceText.SentencePiece.begin) } // optional uint32 end = 5; -inline bool SentencePieceText_SentencePiece::has_end() const { - return (_has_bits_[0] & 0x00000010u) != 0; +inline bool SentencePieceText_SentencePiece::_internal_has_end() const { + bool value = (_has_bits_[0] & 0x00000010u) != 0; + return value; } -inline void SentencePieceText_SentencePiece::set_has_end() { - _has_bits_[0] |= 0x00000010u; -} -inline void SentencePieceText_SentencePiece::clear_has_end() { - _has_bits_[0] &= ~0x00000010u; +inline bool SentencePieceText_SentencePiece::has_end() const { + return _internal_has_end(); } inline void SentencePieceText_SentencePiece::clear_end() { end_ = 0u; - clear_has_end(); + _has_bits_[0] &= ~0x00000010u; } -inline ::google::protobuf::uint32 SentencePieceText_SentencePiece::end() const { - // @@protoc_insertion_point(field_get:sentencepiece.SentencePieceText.SentencePiece.end) +inline ::PROTOBUF_NAMESPACE_ID::uint32 SentencePieceText_SentencePiece::_internal_end() const { return end_; } -inline void SentencePieceText_SentencePiece::set_end(::google::protobuf::uint32 value) { - set_has_end(); +inline ::PROTOBUF_NAMESPACE_ID::uint32 SentencePieceText_SentencePiece::end() const { + // @@protoc_insertion_point(field_get:sentencepiece.SentencePieceText.SentencePiece.end) + return _internal_end(); +} +inline void SentencePieceText_SentencePiece::_internal_set_end(::PROTOBUF_NAMESPACE_ID::uint32 value) { + _has_bits_[0] |= 0x00000010u; end_ = value; +} +inline void SentencePieceText_SentencePiece::set_end(::PROTOBUF_NAMESPACE_ID::uint32 value) { + _internal_set_end(value); // @@protoc_insertion_point(field_set:sentencepiece.SentencePieceText.SentencePiece.end) } @@ -717,75 +841,85 @@ inline void SentencePieceText_SentencePiece::set_end(::google::protobuf::uint32 // SentencePieceText // optional string text = 1; -inline bool SentencePieceText::has_text() const { - return (_has_bits_[0] & 0x00000001u) != 0; +inline bool SentencePieceText::_internal_has_text() const { + bool value = (_has_bits_[0] & 0x00000001u) != 0; + return value; } -inline void SentencePieceText::set_has_text() { - _has_bits_[0] |= 0x00000001u; -} -inline void SentencePieceText::clear_has_text() { - _has_bits_[0] &= ~0x00000001u; +inline bool SentencePieceText::has_text() const { + return _internal_has_text(); } inline void SentencePieceText::clear_text() { - text_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - clear_has_text(); + text_.ClearToEmpty(); + _has_bits_[0] &= ~0x00000001u; } -inline const ::std::string& SentencePieceText::text() const { +inline const std::string& SentencePieceText::text() const { // @@protoc_insertion_point(field_get:sentencepiece.SentencePieceText.text) - return text_.GetNoArena(); + return _internal_text(); } -inline void SentencePieceText::set_text(const ::std::string& value) { - set_has_text(); - text_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value); +inline void SentencePieceText::set_text(const std::string& value) { + _internal_set_text(value); // @@protoc_insertion_point(field_set:sentencepiece.SentencePieceText.text) } -#if LANG_CXX11 -inline void SentencePieceText::set_text(::std::string&& value) { - set_has_text(); - text_.SetNoArena( - &::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value)); +inline std::string* SentencePieceText::mutable_text() { + // @@protoc_insertion_point(field_mutable:sentencepiece.SentencePieceText.text) + return _internal_mutable_text(); +} +inline const std::string& SentencePieceText::_internal_text() const { + return text_.Get(); +} +inline void SentencePieceText::_internal_set_text(const std::string& value) { + _has_bits_[0] |= 0x00000001u; + text_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, value, GetArena()); +} +inline void SentencePieceText::set_text(std::string&& value) { + _has_bits_[0] |= 0x00000001u; + text_.Set( + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::move(value), GetArena()); // @@protoc_insertion_point(field_set_rvalue:sentencepiece.SentencePieceText.text) } -#endif inline void SentencePieceText::set_text(const char* value) { - GOOGLE_DCHECK(value != NULL); - set_has_text(); - text_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value)); + GOOGLE_DCHECK(value != nullptr); + _has_bits_[0] |= 0x00000001u; + text_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string(value), GetArena()); // @@protoc_insertion_point(field_set_char:sentencepiece.SentencePieceText.text) } -inline void SentencePieceText::set_text(const char* value, size_t size) { - set_has_text(); - text_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(reinterpret_cast(value), size)); +inline void SentencePieceText::set_text(const char* value, + size_t size) { + _has_bits_[0] |= 0x00000001u; + text_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string( + reinterpret_cast(value), size), GetArena()); // @@protoc_insertion_point(field_set_pointer:sentencepiece.SentencePieceText.text) } -inline ::std::string* SentencePieceText::mutable_text() { - set_has_text(); - // @@protoc_insertion_point(field_mutable:sentencepiece.SentencePieceText.text) - return text_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); +inline std::string* SentencePieceText::_internal_mutable_text() { + _has_bits_[0] |= 0x00000001u; + return text_.Mutable(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, GetArena()); } -inline ::std::string* SentencePieceText::release_text() { +inline std::string* SentencePieceText::release_text() { // @@protoc_insertion_point(field_release:sentencepiece.SentencePieceText.text) - if (!has_text()) { - return NULL; + if (!_internal_has_text()) { + return nullptr; } - clear_has_text(); - return text_.ReleaseNonDefaultNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); + _has_bits_[0] &= ~0x00000001u; + return text_.ReleaseNonDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); } -inline void SentencePieceText::set_allocated_text(::std::string* text) { - if (text != NULL) { - set_has_text(); +inline void SentencePieceText::set_allocated_text(std::string* text) { + if (text != nullptr) { + _has_bits_[0] |= 0x00000001u; } else { - clear_has_text(); + _has_bits_[0] &= ~0x00000001u; } - text_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), text); + text_.SetAllocated(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), text, + GetArena()); // @@protoc_insertion_point(field_set_allocated:sentencepiece.SentencePieceText.text) } // repeated .sentencepiece.SentencePieceText.SentencePiece pieces = 2; -inline int SentencePieceText::pieces_size() const { +inline int SentencePieceText::_internal_pieces_size() const { return pieces_.size(); } +inline int SentencePieceText::pieces_size() const { + return _internal_pieces_size(); +} inline void SentencePieceText::clear_pieces() { pieces_.Clear(); } @@ -793,46 +927,56 @@ inline ::sentencepiece::SentencePieceText_SentencePiece* SentencePieceText::muta // @@protoc_insertion_point(field_mutable:sentencepiece.SentencePieceText.pieces) return pieces_.Mutable(index); } -inline ::google::protobuf::RepeatedPtrField< ::sentencepiece::SentencePieceText_SentencePiece >* +inline ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::SentencePieceText_SentencePiece >* SentencePieceText::mutable_pieces() { // @@protoc_insertion_point(field_mutable_list:sentencepiece.SentencePieceText.pieces) return &pieces_; } +inline const ::sentencepiece::SentencePieceText_SentencePiece& SentencePieceText::_internal_pieces(int index) const { + return pieces_.Get(index); +} inline const ::sentencepiece::SentencePieceText_SentencePiece& SentencePieceText::pieces(int index) const { // @@protoc_insertion_point(field_get:sentencepiece.SentencePieceText.pieces) - return pieces_.Get(index); + return _internal_pieces(index); +} +inline ::sentencepiece::SentencePieceText_SentencePiece* SentencePieceText::_internal_add_pieces() { + return pieces_.Add(); } inline ::sentencepiece::SentencePieceText_SentencePiece* SentencePieceText::add_pieces() { // @@protoc_insertion_point(field_add:sentencepiece.SentencePieceText.pieces) - return pieces_.Add(); + return _internal_add_pieces(); } -inline const ::google::protobuf::RepeatedPtrField< ::sentencepiece::SentencePieceText_SentencePiece >& +inline const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::SentencePieceText_SentencePiece >& SentencePieceText::pieces() const { // @@protoc_insertion_point(field_list:sentencepiece.SentencePieceText.pieces) return pieces_; } // optional float score = 3; -inline bool SentencePieceText::has_score() const { - return (_has_bits_[0] & 0x00000002u) != 0; -} -inline void SentencePieceText::set_has_score() { - _has_bits_[0] |= 0x00000002u; +inline bool SentencePieceText::_internal_has_score() const { + bool value = (_has_bits_[0] & 0x00000002u) != 0; + return value; } -inline void SentencePieceText::clear_has_score() { - _has_bits_[0] &= ~0x00000002u; +inline bool SentencePieceText::has_score() const { + return _internal_has_score(); } inline void SentencePieceText::clear_score() { score_ = 0; - clear_has_score(); + _has_bits_[0] &= ~0x00000002u; +} +inline float SentencePieceText::_internal_score() const { + return score_; } inline float SentencePieceText::score() const { // @@protoc_insertion_point(field_get:sentencepiece.SentencePieceText.score) - return score_; + return _internal_score(); } -inline void SentencePieceText::set_score(float value) { - set_has_score(); +inline void SentencePieceText::_internal_set_score(float value) { + _has_bits_[0] |= 0x00000002u; score_ = value; +} +inline void SentencePieceText::set_score(float value) { + _internal_set_score(value); // @@protoc_insertion_point(field_set:sentencepiece.SentencePieceText.score) } @@ -841,9 +985,12 @@ inline void SentencePieceText::set_score(float value) { // NBestSentencePieceText // repeated .sentencepiece.SentencePieceText nbests = 1; -inline int NBestSentencePieceText::nbests_size() const { +inline int NBestSentencePieceText::_internal_nbests_size() const { return nbests_.size(); } +inline int NBestSentencePieceText::nbests_size() const { + return _internal_nbests_size(); +} inline void NBestSentencePieceText::clear_nbests() { nbests_.Clear(); } @@ -851,20 +998,26 @@ inline ::sentencepiece::SentencePieceText* NBestSentencePieceText::mutable_nbest // @@protoc_insertion_point(field_mutable:sentencepiece.NBestSentencePieceText.nbests) return nbests_.Mutable(index); } -inline ::google::protobuf::RepeatedPtrField< ::sentencepiece::SentencePieceText >* +inline ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::SentencePieceText >* NBestSentencePieceText::mutable_nbests() { // @@protoc_insertion_point(field_mutable_list:sentencepiece.NBestSentencePieceText.nbests) return &nbests_; } +inline const ::sentencepiece::SentencePieceText& NBestSentencePieceText::_internal_nbests(int index) const { + return nbests_.Get(index); +} inline const ::sentencepiece::SentencePieceText& NBestSentencePieceText::nbests(int index) const { // @@protoc_insertion_point(field_get:sentencepiece.NBestSentencePieceText.nbests) - return nbests_.Get(index); + return _internal_nbests(index); +} +inline ::sentencepiece::SentencePieceText* NBestSentencePieceText::_internal_add_nbests() { + return nbests_.Add(); } inline ::sentencepiece::SentencePieceText* NBestSentencePieceText::add_nbests() { // @@protoc_insertion_point(field_add:sentencepiece.NBestSentencePieceText.nbests) - return nbests_.Add(); + return _internal_add_nbests(); } -inline const ::google::protobuf::RepeatedPtrField< ::sentencepiece::SentencePieceText >& +inline const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::SentencePieceText >& NBestSentencePieceText::nbests() const { // @@protoc_insertion_point(field_list:sentencepiece.NBestSentencePieceText.nbests) return nbests_; @@ -884,4 +1037,5 @@ NBestSentencePieceText::nbests() const { // @@protoc_insertion_point(global_scope) -#endif // PROTOBUF_INCLUDED_sentencepiece_2eproto +#include +#endif // GOOGLE_PROTOBUF_INCLUDED_GOOGLE_PROTOBUF_INCLUDED_sentencepiece_2eproto diff --git a/src/builtin_pb/sentencepiece_model.pb.cc b/src/builtin_pb/sentencepiece_model.pb.cc index 4c5326fa..88f995de 100644 --- a/src/builtin_pb/sentencepiece_model.pb.cc +++ b/src/builtin_pb/sentencepiece_model.pb.cc @@ -5,176 +5,126 @@ #include -#include -#include #include -#include +#include +#include #include -// This is a temporary google only hack -#ifdef GOOGLE_PROTOBUF_ENFORCE_UNIQUENESS -#include "third_party/protobuf/version.h" -#endif // @@protoc_insertion_point(includes) - -namespace protobuf_sentencepiece_5fmodel_2eproto { -extern PROTOBUF_INTERNAL_EXPORT_protobuf_sentencepiece_5fmodel_2eproto ::google::protobuf::internal::SCCInfo<0> scc_info_ModelProto_SentencePiece; -extern PROTOBUF_INTERNAL_EXPORT_protobuf_sentencepiece_5fmodel_2eproto ::google::protobuf::internal::SCCInfo<0> scc_info_NormalizerSpec; -extern PROTOBUF_INTERNAL_EXPORT_protobuf_sentencepiece_5fmodel_2eproto ::google::protobuf::internal::SCCInfo<0> scc_info_SelfTestData_Sample; -extern PROTOBUF_INTERNAL_EXPORT_protobuf_sentencepiece_5fmodel_2eproto ::google::protobuf::internal::SCCInfo<0> scc_info_TrainerSpec; -extern PROTOBUF_INTERNAL_EXPORT_protobuf_sentencepiece_5fmodel_2eproto ::google::protobuf::internal::SCCInfo<1> scc_info_SelfTestData; -} // namespace protobuf_sentencepiece_5fmodel_2eproto +#include +extern PROTOBUF_INTERNAL_EXPORT_sentencepiece_5fmodel_2eproto ::PROTOBUF_NAMESPACE_ID::internal::SCCInfo<0> scc_info_ModelProto_SentencePiece_sentencepiece_5fmodel_2eproto; +extern PROTOBUF_INTERNAL_EXPORT_sentencepiece_5fmodel_2eproto ::PROTOBUF_NAMESPACE_ID::internal::SCCInfo<0> scc_info_NormalizerSpec_sentencepiece_5fmodel_2eproto; +extern PROTOBUF_INTERNAL_EXPORT_sentencepiece_5fmodel_2eproto ::PROTOBUF_NAMESPACE_ID::internal::SCCInfo<1> scc_info_SelfTestData_sentencepiece_5fmodel_2eproto; +extern PROTOBUF_INTERNAL_EXPORT_sentencepiece_5fmodel_2eproto ::PROTOBUF_NAMESPACE_ID::internal::SCCInfo<0> scc_info_SelfTestData_Sample_sentencepiece_5fmodel_2eproto; +extern PROTOBUF_INTERNAL_EXPORT_sentencepiece_5fmodel_2eproto ::PROTOBUF_NAMESPACE_ID::internal::SCCInfo<0> scc_info_TrainerSpec_sentencepiece_5fmodel_2eproto; namespace sentencepiece { class TrainerSpecDefaultTypeInternal { public: - ::google::protobuf::internal::ExplicitlyConstructed - _instance; + ::PROTOBUF_NAMESPACE_ID::internal::ExplicitlyConstructed _instance; } _TrainerSpec_default_instance_; class NormalizerSpecDefaultTypeInternal { public: - ::google::protobuf::internal::ExplicitlyConstructed - _instance; + ::PROTOBUF_NAMESPACE_ID::internal::ExplicitlyConstructed _instance; } _NormalizerSpec_default_instance_; class SelfTestData_SampleDefaultTypeInternal { public: - ::google::protobuf::internal::ExplicitlyConstructed - _instance; + ::PROTOBUF_NAMESPACE_ID::internal::ExplicitlyConstructed _instance; } _SelfTestData_Sample_default_instance_; class SelfTestDataDefaultTypeInternal { public: - ::google::protobuf::internal::ExplicitlyConstructed - _instance; + ::PROTOBUF_NAMESPACE_ID::internal::ExplicitlyConstructed _instance; } _SelfTestData_default_instance_; class ModelProto_SentencePieceDefaultTypeInternal { public: - ::google::protobuf::internal::ExplicitlyConstructed - _instance; + ::PROTOBUF_NAMESPACE_ID::internal::ExplicitlyConstructed _instance; } _ModelProto_SentencePiece_default_instance_; class ModelProtoDefaultTypeInternal { public: - ::google::protobuf::internal::ExplicitlyConstructed - _instance; + ::PROTOBUF_NAMESPACE_ID::internal::ExplicitlyConstructed _instance; } _ModelProto_default_instance_; } // namespace sentencepiece -namespace protobuf_sentencepiece_5fmodel_2eproto { -static void InitDefaultsTrainerSpec() { +static void InitDefaultsscc_info_ModelProto_sentencepiece_5fmodel_2eproto() { GOOGLE_PROTOBUF_VERIFY_VERSION; - ::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_.DefaultConstruct(); - *::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_.get_mutable() = ::std::string("", 5); - ::google::protobuf::internal::OnShutdownDestroyString( - ::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_.get_mutable()); - ::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_.DefaultConstruct(); - *::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_.get_mutable() = ::std::string("", 3); - ::google::protobuf::internal::OnShutdownDestroyString( - ::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_.get_mutable()); - ::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_.DefaultConstruct(); - *::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_.get_mutable() = ::std::string("", 4); - ::google::protobuf::internal::OnShutdownDestroyString( - ::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_.get_mutable()); - ::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_.DefaultConstruct(); - *::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_.get_mutable() = ::std::string("", 5); - ::google::protobuf::internal::OnShutdownDestroyString( - ::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_.get_mutable()); - ::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_.DefaultConstruct(); - *::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_.get_mutable() = ::std::string(" \342\201\207 ", 5); - ::google::protobuf::internal::OnShutdownDestroyString( - ::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_.get_mutable()); { - void* ptr = &::sentencepiece::_TrainerSpec_default_instance_; - new (ptr) ::sentencepiece::TrainerSpec(); - ::google::protobuf::internal::OnShutdownDestroyMessage(ptr); + void* ptr = &::sentencepiece::_ModelProto_default_instance_; + new (ptr) ::sentencepiece::ModelProto(); + ::PROTOBUF_NAMESPACE_ID::internal::OnShutdownDestroyMessage(ptr); } - ::sentencepiece::TrainerSpec::InitAsDefaultInstance(); } -::google::protobuf::internal::SCCInfo<0> scc_info_TrainerSpec = - {{ATOMIC_VAR_INIT(::google::protobuf::internal::SCCInfoBase::kUninitialized), 0, InitDefaultsTrainerSpec}, {}}; +::PROTOBUF_NAMESPACE_ID::internal::SCCInfo<4> scc_info_ModelProto_sentencepiece_5fmodel_2eproto = + {{ATOMIC_VAR_INIT(::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase::kUninitialized), 4, 0, InitDefaultsscc_info_ModelProto_sentencepiece_5fmodel_2eproto}, { + &scc_info_ModelProto_SentencePiece_sentencepiece_5fmodel_2eproto.base, + &scc_info_TrainerSpec_sentencepiece_5fmodel_2eproto.base, + &scc_info_NormalizerSpec_sentencepiece_5fmodel_2eproto.base, + &scc_info_SelfTestData_sentencepiece_5fmodel_2eproto.base,}}; -static void InitDefaultsNormalizerSpec() { +static void InitDefaultsscc_info_ModelProto_SentencePiece_sentencepiece_5fmodel_2eproto() { GOOGLE_PROTOBUF_VERIFY_VERSION; { - void* ptr = &::sentencepiece::_NormalizerSpec_default_instance_; - new (ptr) ::sentencepiece::NormalizerSpec(); - ::google::protobuf::internal::OnShutdownDestroyMessage(ptr); + void* ptr = &::sentencepiece::_ModelProto_SentencePiece_default_instance_; + new (ptr) ::sentencepiece::ModelProto_SentencePiece(); + ::PROTOBUF_NAMESPACE_ID::internal::OnShutdownDestroyMessage(ptr); } - ::sentencepiece::NormalizerSpec::InitAsDefaultInstance(); } -::google::protobuf::internal::SCCInfo<0> scc_info_NormalizerSpec = - {{ATOMIC_VAR_INIT(::google::protobuf::internal::SCCInfoBase::kUninitialized), 0, InitDefaultsNormalizerSpec}, {}}; +::PROTOBUF_NAMESPACE_ID::internal::SCCInfo<0> scc_info_ModelProto_SentencePiece_sentencepiece_5fmodel_2eproto = + {{ATOMIC_VAR_INIT(::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase::kUninitialized), 0, 0, InitDefaultsscc_info_ModelProto_SentencePiece_sentencepiece_5fmodel_2eproto}, {}}; -static void InitDefaultsSelfTestData_Sample() { +static void InitDefaultsscc_info_NormalizerSpec_sentencepiece_5fmodel_2eproto() { GOOGLE_PROTOBUF_VERIFY_VERSION; { - void* ptr = &::sentencepiece::_SelfTestData_Sample_default_instance_; - new (ptr) ::sentencepiece::SelfTestData_Sample(); - ::google::protobuf::internal::OnShutdownDestroyMessage(ptr); + void* ptr = &::sentencepiece::_NormalizerSpec_default_instance_; + new (ptr) ::sentencepiece::NormalizerSpec(); + ::PROTOBUF_NAMESPACE_ID::internal::OnShutdownDestroyMessage(ptr); } - ::sentencepiece::SelfTestData_Sample::InitAsDefaultInstance(); } -::google::protobuf::internal::SCCInfo<0> scc_info_SelfTestData_Sample = - {{ATOMIC_VAR_INIT(::google::protobuf::internal::SCCInfoBase::kUninitialized), 0, InitDefaultsSelfTestData_Sample}, {}}; +::PROTOBUF_NAMESPACE_ID::internal::SCCInfo<0> scc_info_NormalizerSpec_sentencepiece_5fmodel_2eproto = + {{ATOMIC_VAR_INIT(::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase::kUninitialized), 0, 0, InitDefaultsscc_info_NormalizerSpec_sentencepiece_5fmodel_2eproto}, {}}; -static void InitDefaultsSelfTestData() { +static void InitDefaultsscc_info_SelfTestData_sentencepiece_5fmodel_2eproto() { GOOGLE_PROTOBUF_VERIFY_VERSION; { void* ptr = &::sentencepiece::_SelfTestData_default_instance_; new (ptr) ::sentencepiece::SelfTestData(); - ::google::protobuf::internal::OnShutdownDestroyMessage(ptr); + ::PROTOBUF_NAMESPACE_ID::internal::OnShutdownDestroyMessage(ptr); } - ::sentencepiece::SelfTestData::InitAsDefaultInstance(); } -::google::protobuf::internal::SCCInfo<1> scc_info_SelfTestData = - {{ATOMIC_VAR_INIT(::google::protobuf::internal::SCCInfoBase::kUninitialized), 1, InitDefaultsSelfTestData}, { - &protobuf_sentencepiece_5fmodel_2eproto::scc_info_SelfTestData_Sample.base,}}; +::PROTOBUF_NAMESPACE_ID::internal::SCCInfo<1> scc_info_SelfTestData_sentencepiece_5fmodel_2eproto = + {{ATOMIC_VAR_INIT(::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase::kUninitialized), 1, 0, InitDefaultsscc_info_SelfTestData_sentencepiece_5fmodel_2eproto}, { + &scc_info_SelfTestData_Sample_sentencepiece_5fmodel_2eproto.base,}}; -static void InitDefaultsModelProto_SentencePiece() { +static void InitDefaultsscc_info_SelfTestData_Sample_sentencepiece_5fmodel_2eproto() { GOOGLE_PROTOBUF_VERIFY_VERSION; { - void* ptr = &::sentencepiece::_ModelProto_SentencePiece_default_instance_; - new (ptr) ::sentencepiece::ModelProto_SentencePiece(); - ::google::protobuf::internal::OnShutdownDestroyMessage(ptr); + void* ptr = &::sentencepiece::_SelfTestData_Sample_default_instance_; + new (ptr) ::sentencepiece::SelfTestData_Sample(); + ::PROTOBUF_NAMESPACE_ID::internal::OnShutdownDestroyMessage(ptr); } - ::sentencepiece::ModelProto_SentencePiece::InitAsDefaultInstance(); } -::google::protobuf::internal::SCCInfo<0> scc_info_ModelProto_SentencePiece = - {{ATOMIC_VAR_INIT(::google::protobuf::internal::SCCInfoBase::kUninitialized), 0, InitDefaultsModelProto_SentencePiece}, {}}; +::PROTOBUF_NAMESPACE_ID::internal::SCCInfo<0> scc_info_SelfTestData_Sample_sentencepiece_5fmodel_2eproto = + {{ATOMIC_VAR_INIT(::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase::kUninitialized), 0, 0, InitDefaultsscc_info_SelfTestData_Sample_sentencepiece_5fmodel_2eproto}, {}}; -static void InitDefaultsModelProto() { +static void InitDefaultsscc_info_TrainerSpec_sentencepiece_5fmodel_2eproto() { GOOGLE_PROTOBUF_VERIFY_VERSION; { - void* ptr = &::sentencepiece::_ModelProto_default_instance_; - new (ptr) ::sentencepiece::ModelProto(); - ::google::protobuf::internal::OnShutdownDestroyMessage(ptr); + void* ptr = &::sentencepiece::_TrainerSpec_default_instance_; + new (ptr) ::sentencepiece::TrainerSpec(); + ::PROTOBUF_NAMESPACE_ID::internal::OnShutdownDestroyMessage(ptr); } - ::sentencepiece::ModelProto::InitAsDefaultInstance(); } -::google::protobuf::internal::SCCInfo<4> scc_info_ModelProto = - {{ATOMIC_VAR_INIT(::google::protobuf::internal::SCCInfoBase::kUninitialized), 4, InitDefaultsModelProto}, { - &protobuf_sentencepiece_5fmodel_2eproto::scc_info_ModelProto_SentencePiece.base, - &protobuf_sentencepiece_5fmodel_2eproto::scc_info_TrainerSpec.base, - &protobuf_sentencepiece_5fmodel_2eproto::scc_info_NormalizerSpec.base, - &protobuf_sentencepiece_5fmodel_2eproto::scc_info_SelfTestData.base,}}; +::PROTOBUF_NAMESPACE_ID::internal::SCCInfo<0> scc_info_TrainerSpec_sentencepiece_5fmodel_2eproto = + {{ATOMIC_VAR_INIT(::PROTOBUF_NAMESPACE_ID::internal::SCCInfoBase::kUninitialized), 0, 0, InitDefaultsscc_info_TrainerSpec_sentencepiece_5fmodel_2eproto}, {}}; -void InitDefaults() { - ::google::protobuf::internal::InitSCC(&scc_info_TrainerSpec.base); - ::google::protobuf::internal::InitSCC(&scc_info_NormalizerSpec.base); - ::google::protobuf::internal::InitSCC(&scc_info_SelfTestData_Sample.base); - ::google::protobuf::internal::InitSCC(&scc_info_SelfTestData.base); - ::google::protobuf::internal::InitSCC(&scc_info_ModelProto_SentencePiece.base); - ::google::protobuf::internal::InitSCC(&scc_info_ModelProto.base); -} - -} // namespace protobuf_sentencepiece_5fmodel_2eproto namespace sentencepiece { bool TrainerSpec_ModelType_IsValid(int value) { switch (value) { @@ -188,15 +138,62 @@ bool TrainerSpec_ModelType_IsValid(int value) { } } -#if !defined(_MSC_VER) || _MSC_VER >= 1900 -const TrainerSpec_ModelType TrainerSpec::UNIGRAM; -const TrainerSpec_ModelType TrainerSpec::BPE; -const TrainerSpec_ModelType TrainerSpec::WORD; -const TrainerSpec_ModelType TrainerSpec::CHAR; -const TrainerSpec_ModelType TrainerSpec::ModelType_MIN; -const TrainerSpec_ModelType TrainerSpec::ModelType_MAX; -const int TrainerSpec::ModelType_ARRAYSIZE; -#endif // !defined(_MSC_VER) || _MSC_VER >= 1900 +static ::PROTOBUF_NAMESPACE_ID::internal::ExplicitlyConstructed TrainerSpec_ModelType_strings[4] = {}; + +static const char TrainerSpec_ModelType_names[] = + "BPE" + "CHAR" + "UNIGRAM" + "WORD"; + +static const ::PROTOBUF_NAMESPACE_ID::internal::EnumEntry TrainerSpec_ModelType_entries[] = { + { {TrainerSpec_ModelType_names + 0, 3}, 2 }, + { {TrainerSpec_ModelType_names + 3, 4}, 4 }, + { {TrainerSpec_ModelType_names + 7, 7}, 1 }, + { {TrainerSpec_ModelType_names + 14, 4}, 3 }, +}; + +static const int TrainerSpec_ModelType_entries_by_number[] = { + 2, // 1 -> UNIGRAM + 0, // 2 -> BPE + 3, // 3 -> WORD + 1, // 4 -> CHAR +}; + +const std::string& TrainerSpec_ModelType_Name( + TrainerSpec_ModelType value) { + static const bool dummy = + ::PROTOBUF_NAMESPACE_ID::internal::InitializeEnumStrings( + TrainerSpec_ModelType_entries, + TrainerSpec_ModelType_entries_by_number, + 4, TrainerSpec_ModelType_strings); + (void) dummy; + int idx = ::PROTOBUF_NAMESPACE_ID::internal::LookUpEnumName( + TrainerSpec_ModelType_entries, + TrainerSpec_ModelType_entries_by_number, + 4, value); + return idx == -1 ? ::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString() : + TrainerSpec_ModelType_strings[idx].get(); +} +bool TrainerSpec_ModelType_Parse( + ::PROTOBUF_NAMESPACE_ID::ConstStringParam name, TrainerSpec_ModelType* value) { + int int_value; + bool success = ::PROTOBUF_NAMESPACE_ID::internal::LookUpEnumValue( + TrainerSpec_ModelType_entries, 4, name, &int_value); + if (success) { + *value = static_cast(int_value); + } + return success; +} +#if (__cplusplus < 201703) && (!defined(_MSC_VER) || _MSC_VER >= 1900) +constexpr TrainerSpec_ModelType TrainerSpec::UNIGRAM; +constexpr TrainerSpec_ModelType TrainerSpec::BPE; +constexpr TrainerSpec_ModelType TrainerSpec::WORD; +constexpr TrainerSpec_ModelType TrainerSpec::CHAR; +constexpr TrainerSpec_ModelType TrainerSpec::ModelType_MIN; +constexpr TrainerSpec_ModelType TrainerSpec::ModelType_MAX; +constexpr int TrainerSpec::ModelType_ARRAYSIZE; +#endif // (__cplusplus < 201703) && (!defined(_MSC_VER) || _MSC_VER >= 1900) bool ModelProto_SentencePiece_Type_IsValid(int value) { switch (value) { case 1: @@ -211,118 +208,270 @@ bool ModelProto_SentencePiece_Type_IsValid(int value) { } } -#if !defined(_MSC_VER) || _MSC_VER >= 1900 -const ModelProto_SentencePiece_Type ModelProto_SentencePiece::NORMAL; -const ModelProto_SentencePiece_Type ModelProto_SentencePiece::UNKNOWN; -const ModelProto_SentencePiece_Type ModelProto_SentencePiece::CONTROL; -const ModelProto_SentencePiece_Type ModelProto_SentencePiece::USER_DEFINED; -const ModelProto_SentencePiece_Type ModelProto_SentencePiece::BYTE; -const ModelProto_SentencePiece_Type ModelProto_SentencePiece::UNUSED; -const ModelProto_SentencePiece_Type ModelProto_SentencePiece::Type_MIN; -const ModelProto_SentencePiece_Type ModelProto_SentencePiece::Type_MAX; -const int ModelProto_SentencePiece::Type_ARRAYSIZE; -#endif // !defined(_MSC_VER) || _MSC_VER >= 1900 +static ::PROTOBUF_NAMESPACE_ID::internal::ExplicitlyConstructed ModelProto_SentencePiece_Type_strings[6] = {}; + +static const char ModelProto_SentencePiece_Type_names[] = + "BYTE" + "CONTROL" + "NORMAL" + "UNKNOWN" + "UNUSED" + "USER_DEFINED"; + +static const ::PROTOBUF_NAMESPACE_ID::internal::EnumEntry ModelProto_SentencePiece_Type_entries[] = { + { {ModelProto_SentencePiece_Type_names + 0, 4}, 6 }, + { {ModelProto_SentencePiece_Type_names + 4, 7}, 3 }, + { {ModelProto_SentencePiece_Type_names + 11, 6}, 1 }, + { {ModelProto_SentencePiece_Type_names + 17, 7}, 2 }, + { {ModelProto_SentencePiece_Type_names + 24, 6}, 5 }, + { {ModelProto_SentencePiece_Type_names + 30, 12}, 4 }, +}; + +static const int ModelProto_SentencePiece_Type_entries_by_number[] = { + 2, // 1 -> NORMAL + 3, // 2 -> UNKNOWN + 1, // 3 -> CONTROL + 5, // 4 -> USER_DEFINED + 4, // 5 -> UNUSED + 0, // 6 -> BYTE +}; + +const std::string& ModelProto_SentencePiece_Type_Name( + ModelProto_SentencePiece_Type value) { + static const bool dummy = + ::PROTOBUF_NAMESPACE_ID::internal::InitializeEnumStrings( + ModelProto_SentencePiece_Type_entries, + ModelProto_SentencePiece_Type_entries_by_number, + 6, ModelProto_SentencePiece_Type_strings); + (void) dummy; + int idx = ::PROTOBUF_NAMESPACE_ID::internal::LookUpEnumName( + ModelProto_SentencePiece_Type_entries, + ModelProto_SentencePiece_Type_entries_by_number, + 6, value); + return idx == -1 ? ::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString() : + ModelProto_SentencePiece_Type_strings[idx].get(); +} +bool ModelProto_SentencePiece_Type_Parse( + ::PROTOBUF_NAMESPACE_ID::ConstStringParam name, ModelProto_SentencePiece_Type* value) { + int int_value; + bool success = ::PROTOBUF_NAMESPACE_ID::internal::LookUpEnumValue( + ModelProto_SentencePiece_Type_entries, 6, name, &int_value); + if (success) { + *value = static_cast(int_value); + } + return success; +} +#if (__cplusplus < 201703) && (!defined(_MSC_VER) || _MSC_VER >= 1900) +constexpr ModelProto_SentencePiece_Type ModelProto_SentencePiece::NORMAL; +constexpr ModelProto_SentencePiece_Type ModelProto_SentencePiece::UNKNOWN; +constexpr ModelProto_SentencePiece_Type ModelProto_SentencePiece::CONTROL; +constexpr ModelProto_SentencePiece_Type ModelProto_SentencePiece::USER_DEFINED; +constexpr ModelProto_SentencePiece_Type ModelProto_SentencePiece::BYTE; +constexpr ModelProto_SentencePiece_Type ModelProto_SentencePiece::UNUSED; +constexpr ModelProto_SentencePiece_Type ModelProto_SentencePiece::Type_MIN; +constexpr ModelProto_SentencePiece_Type ModelProto_SentencePiece::Type_MAX; +constexpr int ModelProto_SentencePiece::Type_ARRAYSIZE; +#endif // (__cplusplus < 201703) && (!defined(_MSC_VER) || _MSC_VER >= 1900) // =================================================================== -void TrainerSpec::InitAsDefaultInstance() { -} -::google::protobuf::internal::ExplicitlyConstructed<::std::string> TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_; -::google::protobuf::internal::ExplicitlyConstructed<::std::string> TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_; -::google::protobuf::internal::ExplicitlyConstructed<::std::string> TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_; -::google::protobuf::internal::ExplicitlyConstructed<::std::string> TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_; -::google::protobuf::internal::ExplicitlyConstructed<::std::string> TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_; -#if !defined(_MSC_VER) || _MSC_VER >= 1900 -const int TrainerSpec::kInputFieldNumber; -const int TrainerSpec::kInputFormatFieldNumber; -const int TrainerSpec::kModelPrefixFieldNumber; -const int TrainerSpec::kModelTypeFieldNumber; -const int TrainerSpec::kVocabSizeFieldNumber; -const int TrainerSpec::kAcceptLanguageFieldNumber; -const int TrainerSpec::kSelfTestSampleSizeFieldNumber; -const int TrainerSpec::kCharacterCoverageFieldNumber; -const int TrainerSpec::kInputSentenceSizeFieldNumber; -const int TrainerSpec::kShuffleInputSentenceFieldNumber; -const int TrainerSpec::kMiningSentenceSizeFieldNumber; -const int TrainerSpec::kTrainingSentenceSizeFieldNumber; -const int TrainerSpec::kSeedSentencepieceSizeFieldNumber; -const int TrainerSpec::kShrinkingFactorFieldNumber; -const int TrainerSpec::kMaxSentenceLengthFieldNumber; -const int TrainerSpec::kNumThreadsFieldNumber; -const int TrainerSpec::kNumSubIterationsFieldNumber; -const int TrainerSpec::kMaxSentencepieceLengthFieldNumber; -const int TrainerSpec::kSplitByUnicodeScriptFieldNumber; -const int TrainerSpec::kSplitByNumberFieldNumber; -const int TrainerSpec::kSplitByWhitespaceFieldNumber; -const int TrainerSpec::kTreatWhitespaceAsSuffixFieldNumber; -const int TrainerSpec::kSplitDigitsFieldNumber; -const int TrainerSpec::kControlSymbolsFieldNumber; -const int TrainerSpec::kUserDefinedSymbolsFieldNumber; -const int TrainerSpec::kRequiredCharsFieldNumber; -const int TrainerSpec::kByteFallbackFieldNumber; -const int TrainerSpec::kVocabularyOutputPieceScoreFieldNumber; -const int TrainerSpec::kHardVocabLimitFieldNumber; -const int TrainerSpec::kUseAllVocabFieldNumber; -const int TrainerSpec::kUnkIdFieldNumber; -const int TrainerSpec::kBosIdFieldNumber; -const int TrainerSpec::kEosIdFieldNumber; -const int TrainerSpec::kPadIdFieldNumber; -const int TrainerSpec::kUnkPieceFieldNumber; -const int TrainerSpec::kBosPieceFieldNumber; -const int TrainerSpec::kEosPieceFieldNumber; -const int TrainerSpec::kPadPieceFieldNumber; -const int TrainerSpec::kUnkSurfaceFieldNumber; -const int TrainerSpec::kTrainExtremelyLargeCorpusFieldNumber; -#endif // !defined(_MSC_VER) || _MSC_VER >= 1900 - -TrainerSpec::TrainerSpec() - : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) { - ::google::protobuf::internal::InitSCC( - &protobuf_sentencepiece_5fmodel_2eproto::scc_info_TrainerSpec.base); +class TrainerSpec::_Internal { + public: + using HasBits = decltype(std::declval()._has_bits_); + static void set_has_input_format(HasBits* has_bits) { + (*has_bits)[0] |= 2u; + } + static void set_has_model_prefix(HasBits* has_bits) { + (*has_bits)[0] |= 1u; + } + static void set_has_model_type(HasBits* has_bits) { + (*has_bits)[0] |= 8388608u; + } + static void set_has_vocab_size(HasBits* has_bits) { + (*has_bits)[0] |= 16777216u; + } + static void set_has_self_test_sample_size(HasBits* has_bits) { + (*has_bits)[0] |= 512u; + } + static void set_has_enable_differential_privacy(HasBits* has_bits) { + (*has_bits)[0] |= 8192u; + } + static void set_has_differential_privacy_noise_level(HasBits* has_bits) { + (*has_bits)[0] |= 2097152u; + } + static void set_has_differential_privacy_clipping_threshold(HasBits* has_bits) { + (*has_bits)[0] |= 4194304u; + } + static void set_has_character_coverage(HasBits* has_bits) { + (*has_bits)[0] |= 33554432u; + } + static void set_has_input_sentence_size(HasBits* has_bits) { + (*has_bits)[0] |= 2048u; + } + static void set_has_shuffle_input_sentence(HasBits* has_bits) { + (*has_bits)[1] |= 1u; + } + static void set_has_mining_sentence_size(HasBits* has_bits) { + (*has_bits)[0] |= 1024u; + } + static void set_has_training_sentence_size(HasBits* has_bits) { + (*has_bits)[0] |= 4096u; + } + static void set_has_seed_sentencepiece_size(HasBits* has_bits) { + (*has_bits)[0] |= 67108864u; + } + static void set_has_shrinking_factor(HasBits* has_bits) { + (*has_bits)[0] |= 134217728u; + } + static void set_has_max_sentence_length(HasBits* has_bits) { + (*has_bits)[0] |= 1073741824u; + } + static void set_has_num_threads(HasBits* has_bits) { + (*has_bits)[0] |= 268435456u; + } + static void set_has_num_sub_iterations(HasBits* has_bits) { + (*has_bits)[0] |= 536870912u; + } + static void set_has_max_sentencepiece_length(HasBits* has_bits) { + (*has_bits)[0] |= 2147483648u; + } + static void set_has_split_by_unicode_script(HasBits* has_bits) { + (*has_bits)[1] |= 2u; + } + static void set_has_split_by_number(HasBits* has_bits) { + (*has_bits)[1] |= 4u; + } + static void set_has_split_by_whitespace(HasBits* has_bits) { + (*has_bits)[1] |= 8u; + } + static void set_has_treat_whitespace_as_suffix(HasBits* has_bits) { + (*has_bits)[0] |= 16384u; + } + static void set_has_allow_whitespace_only_pieces(HasBits* has_bits) { + (*has_bits)[0] |= 32768u; + } + static void set_has_split_digits(HasBits* has_bits) { + (*has_bits)[0] |= 65536u; + } + static void set_has_pretokenization_delimiter(HasBits* has_bits) { + (*has_bits)[0] |= 256u; + } + static void set_has_required_chars(HasBits* has_bits) { + (*has_bits)[0] |= 4u; + } + static void set_has_byte_fallback(HasBits* has_bits) { + (*has_bits)[0] |= 131072u; + } + static void set_has_vocabulary_output_piece_score(HasBits* has_bits) { + (*has_bits)[1] |= 16u; + } + static void set_has_hard_vocab_limit(HasBits* has_bits) { + (*has_bits)[1] |= 32u; + } + static void set_has_use_all_vocab(HasBits* has_bits) { + (*has_bits)[0] |= 262144u; + } + static void set_has_unk_id(HasBits* has_bits) { + (*has_bits)[0] |= 1048576u; + } + static void set_has_bos_id(HasBits* has_bits) { + (*has_bits)[1] |= 64u; + } + static void set_has_eos_id(HasBits* has_bits) { + (*has_bits)[1] |= 128u; + } + static void set_has_pad_id(HasBits* has_bits) { + (*has_bits)[1] |= 256u; + } + static void set_has_unk_piece(HasBits* has_bits) { + (*has_bits)[0] |= 16u; + } + static void set_has_bos_piece(HasBits* has_bits) { + (*has_bits)[0] |= 32u; + } + static void set_has_eos_piece(HasBits* has_bits) { + (*has_bits)[0] |= 64u; + } + static void set_has_pad_piece(HasBits* has_bits) { + (*has_bits)[0] |= 128u; + } + static void set_has_unk_surface(HasBits* has_bits) { + (*has_bits)[0] |= 8u; + } + static void set_has_train_extremely_large_corpus(HasBits* has_bits) { + (*has_bits)[0] |= 524288u; + } +}; + +const ::PROTOBUF_NAMESPACE_ID::internal::LazyString TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_{{{"", 5}}, {nullptr}}; +const ::PROTOBUF_NAMESPACE_ID::internal::LazyString TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_{{{"", 3}}, {nullptr}}; +const ::PROTOBUF_NAMESPACE_ID::internal::LazyString TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_{{{"", 4}}, {nullptr}}; +const ::PROTOBUF_NAMESPACE_ID::internal::LazyString TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_{{{"", 5}}, {nullptr}}; +const ::PROTOBUF_NAMESPACE_ID::internal::LazyString TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_{{{" \342\201\207 ", 5}}, {nullptr}}; +TrainerSpec::TrainerSpec(::PROTOBUF_NAMESPACE_ID::Arena* arena) + : ::PROTOBUF_NAMESPACE_ID::MessageLite(arena), + _extensions_(arena), + input_(arena), + accept_language_(arena), + control_symbols_(arena), + user_defined_symbols_(arena) { SharedCtor(); - // @@protoc_insertion_point(constructor:sentencepiece.TrainerSpec) + RegisterArenaDtor(arena); + // @@protoc_insertion_point(arena_constructor:sentencepiece.TrainerSpec) } TrainerSpec::TrainerSpec(const TrainerSpec& from) - : ::google::protobuf::MessageLite(), - _internal_metadata_(NULL), + : ::PROTOBUF_NAMESPACE_ID::MessageLite(), _has_bits_(from._has_bits_), input_(from.input_), accept_language_(from.accept_language_), control_symbols_(from.control_symbols_), user_defined_symbols_(from.user_defined_symbols_) { - _internal_metadata_.MergeFrom(from._internal_metadata_); + _internal_metadata_.MergeFrom(from._internal_metadata_); _extensions_.MergeFrom(from._extensions_); - model_prefix_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - if (from.has_model_prefix()) { - model_prefix_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.model_prefix_); - } - input_format_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - if (from.has_input_format()) { - input_format_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.input_format_); - } - required_chars_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - if (from.has_required_chars()) { - required_chars_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.required_chars_); - } - unk_surface_.UnsafeSetDefault(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_.get()); - if (from.has_unk_surface()) { - unk_surface_.AssignWithDefault(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_.get(), from.unk_surface_); - } - unk_piece_.UnsafeSetDefault(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_.get()); - if (from.has_unk_piece()) { - unk_piece_.AssignWithDefault(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_.get(), from.unk_piece_); - } - bos_piece_.UnsafeSetDefault(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_.get()); - if (from.has_bos_piece()) { - bos_piece_.AssignWithDefault(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_.get(), from.bos_piece_); - } - eos_piece_.UnsafeSetDefault(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_.get()); - if (from.has_eos_piece()) { - eos_piece_.AssignWithDefault(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_.get(), from.eos_piece_); - } - pad_piece_.UnsafeSetDefault(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_.get()); - if (from.has_pad_piece()) { - pad_piece_.AssignWithDefault(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_.get(), from.pad_piece_); + model_prefix_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + if (from._internal_has_model_prefix()) { + model_prefix_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, from._internal_model_prefix(), + GetArena()); + } + input_format_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + if (from._internal_has_input_format()) { + input_format_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, from._internal_input_format(), + GetArena()); + } + required_chars_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + if (from._internal_has_required_chars()) { + required_chars_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, from._internal_required_chars(), + GetArena()); + } + unk_surface_.UnsafeSetDefault(nullptr); + if (from._internal_has_unk_surface()) { + unk_surface_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, from._internal_unk_surface(), + GetArena()); + } + unk_piece_.UnsafeSetDefault(nullptr); + if (from._internal_has_unk_piece()) { + unk_piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, from._internal_unk_piece(), + GetArena()); + } + bos_piece_.UnsafeSetDefault(nullptr); + if (from._internal_has_bos_piece()) { + bos_piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, from._internal_bos_piece(), + GetArena()); + } + eos_piece_.UnsafeSetDefault(nullptr); + if (from._internal_has_eos_piece()) { + eos_piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, from._internal_eos_piece(), + GetArena()); + } + pad_piece_.UnsafeSetDefault(nullptr); + if (from._internal_has_pad_piece()) { + pad_piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, from._internal_pad_piece(), + GetArena()); + } + pretokenization_delimiter_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + if (from._internal_has_pretokenization_delimiter()) { + pretokenization_delimiter_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, from._internal_pretokenization_delimiter(), + GetArena()); } ::memcpy(&self_test_sample_size_, &from.self_test_sample_size_, static_cast(reinterpret_cast(&pad_id_) - @@ -331,17 +480,20 @@ TrainerSpec::TrainerSpec(const TrainerSpec& from) } void TrainerSpec::SharedCtor() { - model_prefix_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - input_format_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - required_chars_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - unk_surface_.UnsafeSetDefault(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_.get()); - unk_piece_.UnsafeSetDefault(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_.get()); - bos_piece_.UnsafeSetDefault(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_.get()); - eos_piece_.UnsafeSetDefault(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_.get()); - pad_piece_.UnsafeSetDefault(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_.get()); - ::memset(&self_test_sample_size_, 0, static_cast( - reinterpret_cast(&train_extremely_large_corpus_) - - reinterpret_cast(&self_test_sample_size_)) + sizeof(train_extremely_large_corpus_)); + ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&scc_info_TrainerSpec_sentencepiece_5fmodel_2eproto.base); + model_prefix_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + input_format_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + required_chars_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + unk_surface_.UnsafeSetDefault(nullptr); + unk_piece_.UnsafeSetDefault(nullptr); + bos_piece_.UnsafeSetDefault(nullptr); + eos_piece_.UnsafeSetDefault(nullptr); + pad_piece_.UnsafeSetDefault(nullptr); + pretokenization_delimiter_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + ::memset(reinterpret_cast(this) + static_cast( + reinterpret_cast(&self_test_sample_size_) - reinterpret_cast(this)), + 0, static_cast(reinterpret_cast(&differential_privacy_clipping_threshold_) - + reinterpret_cast(&self_test_sample_size_)) + sizeof(differential_privacy_clipping_threshold_)); model_type_ = 1; vocab_size_ = 8000; character_coverage_ = 0.9995f; @@ -365,31 +517,40 @@ void TrainerSpec::SharedCtor() { TrainerSpec::~TrainerSpec() { // @@protoc_insertion_point(destructor:sentencepiece.TrainerSpec) SharedDtor(); + _internal_metadata_.Delete(); } void TrainerSpec::SharedDtor() { - model_prefix_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - input_format_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - required_chars_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - unk_surface_.DestroyNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_.get()); - unk_piece_.DestroyNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_.get()); - bos_piece_.DestroyNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_.get()); - eos_piece_.DestroyNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_.get()); - pad_piece_.DestroyNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_.get()); + GOOGLE_DCHECK(GetArena() == nullptr); + model_prefix_.DestroyNoArena(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + input_format_.DestroyNoArena(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + required_chars_.DestroyNoArena(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + unk_surface_.DestroyNoArena(nullptr); + unk_piece_.DestroyNoArena(nullptr); + bos_piece_.DestroyNoArena(nullptr); + eos_piece_.DestroyNoArena(nullptr); + pad_piece_.DestroyNoArena(nullptr); + pretokenization_delimiter_.DestroyNoArena(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); } +void TrainerSpec::ArenaDtor(void* object) { + TrainerSpec* _this = reinterpret_cast< TrainerSpec* >(object); + (void)_this; +} +void TrainerSpec::RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena*) { +} void TrainerSpec::SetCachedSize(int size) const { _cached_size_.Set(size); } const TrainerSpec& TrainerSpec::default_instance() { - ::google::protobuf::internal::InitSCC(&protobuf_sentencepiece_5fmodel_2eproto::scc_info_TrainerSpec.base); + ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&::scc_info_TrainerSpec_sentencepiece_5fmodel_2eproto.base); return *internal_default_instance(); } void TrainerSpec::Clear() { // @@protoc_insertion_point(message_clear_start:sentencepiece.TrainerSpec) - ::google::protobuf::uint32 cached_has_bits = 0; + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; @@ -399,885 +560,787 @@ void TrainerSpec::Clear() { control_symbols_.Clear(); user_defined_symbols_.Clear(); cached_has_bits = _has_bits_[0]; - if (cached_has_bits & 255u) { + if (cached_has_bits & 0x000000ffu) { if (cached_has_bits & 0x00000001u) { - model_prefix_.ClearNonDefaultToEmptyNoArena(); + model_prefix_.ClearNonDefaultToEmpty(); } if (cached_has_bits & 0x00000002u) { - input_format_.ClearNonDefaultToEmptyNoArena(); + input_format_.ClearNonDefaultToEmpty(); } if (cached_has_bits & 0x00000004u) { - required_chars_.ClearNonDefaultToEmptyNoArena(); + required_chars_.ClearNonDefaultToEmpty(); } if (cached_has_bits & 0x00000008u) { - unk_surface_.UnsafeMutablePointer()->assign(*&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_.get()); - } + unk_surface_.ClearToDefault(::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_, GetArena()); + } if (cached_has_bits & 0x00000010u) { - unk_piece_.UnsafeMutablePointer()->assign(*&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_.get()); - } + unk_piece_.ClearToDefault(::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_, GetArena()); + } if (cached_has_bits & 0x00000020u) { - bos_piece_.UnsafeMutablePointer()->assign(*&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_.get()); - } + bos_piece_.ClearToDefault(::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_, GetArena()); + } if (cached_has_bits & 0x00000040u) { - eos_piece_.UnsafeMutablePointer()->assign(*&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_.get()); - } + eos_piece_.ClearToDefault(::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_, GetArena()); + } if (cached_has_bits & 0x00000080u) { - pad_piece_.UnsafeMutablePointer()->assign(*&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_.get()); - } + pad_piece_.ClearToDefault(::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_, GetArena()); + } + } + if (cached_has_bits & 0x00000100u) { + pretokenization_delimiter_.ClearNonDefaultToEmpty(); } - if (cached_has_bits & 65280u) { + if (cached_has_bits & 0x0000fe00u) { ::memset(&self_test_sample_size_, 0, static_cast( - reinterpret_cast(&use_all_vocab_) - - reinterpret_cast(&self_test_sample_size_)) + sizeof(use_all_vocab_)); + reinterpret_cast(&allow_whitespace_only_pieces_) - + reinterpret_cast(&self_test_sample_size_)) + sizeof(allow_whitespace_only_pieces_)); } - if (cached_has_bits & 16711680u) { - ::memset(&unk_id_, 0, static_cast( - reinterpret_cast(&train_extremely_large_corpus_) - - reinterpret_cast(&unk_id_)) + sizeof(train_extremely_large_corpus_)); + if (cached_has_bits & 0x00ff0000u) { + ::memset(&split_digits_, 0, static_cast( + reinterpret_cast(&differential_privacy_clipping_threshold_) - + reinterpret_cast(&split_digits_)) + sizeof(differential_privacy_clipping_threshold_)); model_type_ = 1; + } + if (cached_has_bits & 0xff000000u) { vocab_size_ = 8000; character_coverage_ = 0.9995f; seed_sentencepiece_size_ = 1000000; shrinking_factor_ = 0.75f; num_threads_ = 16; - } - if (cached_has_bits & 4278190080u) { num_sub_iterations_ = 2; max_sentence_length_ = 4192; max_sentencepiece_length_ = 16; + } + cached_has_bits = _has_bits_[1]; + if (cached_has_bits & 0x000000ffu) { shuffle_input_sentence_ = true; split_by_unicode_script_ = true; split_by_number_ = true; split_by_whitespace_ = true; vocabulary_output_piece_score_ = true; - } - cached_has_bits = _has_bits_[1]; - if (cached_has_bits & 15u) { hard_vocab_limit_ = true; bos_id_ = 1; eos_id_ = 2; - pad_id_ = -1; } + pad_id_ = -1; _has_bits_.Clear(); - _internal_metadata_.Clear(); -} - -bool TrainerSpec::MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input) { -#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure - ::google::protobuf::uint32 tag; - ::google::protobuf::internal::LiteUnknownFieldSetter unknown_fields_setter( - &_internal_metadata_); - ::google::protobuf::io::StringOutputStream unknown_fields_output( - unknown_fields_setter.buffer()); - ::google::protobuf::io::CodedOutputStream unknown_fields_stream( - &unknown_fields_output, false); - // @@protoc_insertion_point(parse_start:sentencepiece.TrainerSpec) - for (;;) { - ::std::pair<::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(16383u); - tag = p.first; - if (!p.second) goto handle_unusual; - switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) { - // repeated string input = 1; - case 1: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(10u /* 10 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->add_input())); - } else { - goto handle_unusual; - } - break; - } + _internal_metadata_.Clear(); +} +const char* TrainerSpec::_InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) { +#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure + while (!ctx->Done(&ptr)) { + ::PROTOBUF_NAMESPACE_ID::uint32 tag; + ptr = ::PROTOBUF_NAMESPACE_ID::internal::ReadTag(ptr, &tag); + CHK_(ptr); + switch (tag >> 3) { + // repeated string input = 1; + case 1: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 10)) { + ptr -= 1; + do { + ptr += 1; + auto str = _internal_add_input(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + if (!ctx->DataAvailable(ptr)) break; + } while (::PROTOBUF_NAMESPACE_ID::internal::ExpectTag<10>(ptr)); + } else goto handle_unusual; + continue; // optional string model_prefix = 2; - case 2: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(18u /* 18 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->mutable_model_prefix())); - } else { - goto handle_unusual; - } - break; - } - + case 2: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 18)) { + auto str = _internal_mutable_model_prefix(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional .sentencepiece.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM]; - case 3: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(24u /* 24 & 0xFF */)) { - int value; - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - int, ::google::protobuf::internal::WireFormatLite::TYPE_ENUM>( - input, &value))); - if (::sentencepiece::TrainerSpec_ModelType_IsValid(value)) { - set_model_type(static_cast< ::sentencepiece::TrainerSpec_ModelType >(value)); + case 3: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 24)) { + ::PROTOBUF_NAMESPACE_ID::uint64 val = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + if (PROTOBUF_PREDICT_TRUE(::sentencepiece::TrainerSpec_ModelType_IsValid(val))) { + _internal_set_model_type(static_cast<::sentencepiece::TrainerSpec_ModelType>(val)); } else { - unknown_fields_stream.WriteVarint32(24u); - unknown_fields_stream.WriteVarint32( - static_cast< ::google::protobuf::uint32>(value)); + ::PROTOBUF_NAMESPACE_ID::internal::WriteVarint(3, val, mutable_unknown_fields()); } - } else { - goto handle_unusual; - } - break; - } - + } else goto handle_unusual; + continue; // optional int32 vocab_size = 4 [default = 8000]; - case 4: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(32u /* 32 & 0xFF */)) { - set_has_vocab_size(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( - input, &vocab_size_))); - } else { - goto handle_unusual; - } - break; - } - + case 4: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 32)) { + _Internal::set_has_vocab_size(&_has_bits_); + vocab_size_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // repeated string accept_language = 5; - case 5: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(42u /* 42 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->add_accept_language())); - } else { - goto handle_unusual; - } - break; - } - + case 5: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 42)) { + ptr -= 1; + do { + ptr += 1; + auto str = _internal_add_accept_language(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + if (!ctx->DataAvailable(ptr)) break; + } while (::PROTOBUF_NAMESPACE_ID::internal::ExpectTag<42>(ptr)); + } else goto handle_unusual; + continue; // optional int32 self_test_sample_size = 6 [default = 0]; - case 6: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(48u /* 48 & 0xFF */)) { - set_has_self_test_sample_size(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( - input, &self_test_sample_size_))); - } else { - goto handle_unusual; - } - break; - } - + case 6: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 48)) { + _Internal::set_has_self_test_sample_size(&_has_bits_); + self_test_sample_size_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional string input_format = 7; - case 7: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(58u /* 58 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->mutable_input_format())); - } else { - goto handle_unusual; - } - break; - } - + case 7: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 58)) { + auto str = _internal_mutable_input_format(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional float character_coverage = 10 [default = 0.9995]; - case 10: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(85u /* 85 & 0xFF */)) { - set_has_character_coverage(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>( - input, &character_coverage_))); - } else { - goto handle_unusual; - } - break; - } - - // optional int32 input_sentence_size = 11 [default = 0]; - case 11: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(88u /* 88 & 0xFF */)) { - set_has_input_sentence_size(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( - input, &input_sentence_size_))); - } else { - goto handle_unusual; - } - break; - } - + case 10: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 85)) { + _Internal::set_has_character_coverage(&_has_bits_); + character_coverage_ = ::PROTOBUF_NAMESPACE_ID::internal::UnalignedLoad(ptr); + ptr += sizeof(float); + } else goto handle_unusual; + continue; + // optional uint64 input_sentence_size = 11 [default = 0]; + case 11: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 88)) { + _Internal::set_has_input_sentence_size(&_has_bits_); + input_sentence_size_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional int32 mining_sentence_size = 12 [deprecated = true]; - case 12: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(96u /* 96 & 0xFF */)) { - set_has_mining_sentence_size(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( - input, &mining_sentence_size_))); - } else { - goto handle_unusual; - } - break; - } - + case 12: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 96)) { + _Internal::set_has_mining_sentence_size(&_has_bits_); + mining_sentence_size_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional int32 training_sentence_size = 13 [deprecated = true]; - case 13: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(104u /* 104 & 0xFF */)) { - set_has_training_sentence_size(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( - input, &training_sentence_size_))); - } else { - goto handle_unusual; - } - break; - } - + case 13: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 104)) { + _Internal::set_has_training_sentence_size(&_has_bits_); + training_sentence_size_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional int32 seed_sentencepiece_size = 14 [default = 1000000]; - case 14: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(112u /* 112 & 0xFF */)) { - set_has_seed_sentencepiece_size(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( - input, &seed_sentencepiece_size_))); - } else { - goto handle_unusual; - } - break; - } - + case 14: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 112)) { + _Internal::set_has_seed_sentencepiece_size(&_has_bits_); + seed_sentencepiece_size_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional float shrinking_factor = 15 [default = 0.75]; - case 15: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(125u /* 125 & 0xFF */)) { - set_has_shrinking_factor(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>( - input, &shrinking_factor_))); - } else { - goto handle_unusual; - } - break; - } - + case 15: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 125)) { + _Internal::set_has_shrinking_factor(&_has_bits_); + shrinking_factor_ = ::PROTOBUF_NAMESPACE_ID::internal::UnalignedLoad(ptr); + ptr += sizeof(float); + } else goto handle_unusual; + continue; // optional int32 num_threads = 16 [default = 16]; - case 16: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(128u /* 128 & 0xFF */)) { - set_has_num_threads(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( - input, &num_threads_))); - } else { - goto handle_unusual; - } - break; - } - + case 16: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 128)) { + _Internal::set_has_num_threads(&_has_bits_); + num_threads_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional int32 num_sub_iterations = 17 [default = 2]; - case 17: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(136u /* 136 & 0xFF */)) { - set_has_num_sub_iterations(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( - input, &num_sub_iterations_))); - } else { - goto handle_unusual; - } - break; - } - + case 17: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 136)) { + _Internal::set_has_num_sub_iterations(&_has_bits_); + num_sub_iterations_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional int32 max_sentence_length = 18 [default = 4192]; - case 18: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(144u /* 144 & 0xFF */)) { - set_has_max_sentence_length(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( - input, &max_sentence_length_))); - } else { - goto handle_unusual; - } - break; - } - + case 18: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 144)) { + _Internal::set_has_max_sentence_length(&_has_bits_); + max_sentence_length_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional bool shuffle_input_sentence = 19 [default = true]; - case 19: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(152u /* 152 & 0xFF */)) { - set_has_shuffle_input_sentence(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>( - input, &shuffle_input_sentence_))); - } else { - goto handle_unusual; - } - break; - } - + case 19: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 152)) { + _Internal::set_has_shuffle_input_sentence(&_has_bits_); + shuffle_input_sentence_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional int32 max_sentencepiece_length = 20 [default = 16]; - case 20: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(160u /* 160 & 0xFF */)) { - set_has_max_sentencepiece_length(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( - input, &max_sentencepiece_length_))); - } else { - goto handle_unusual; - } - break; - } - + case 20: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 160)) { + _Internal::set_has_max_sentencepiece_length(&_has_bits_); + max_sentencepiece_length_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional bool split_by_unicode_script = 21 [default = true]; - case 21: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(168u /* 168 & 0xFF */)) { - set_has_split_by_unicode_script(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>( - input, &split_by_unicode_script_))); - } else { - goto handle_unusual; - } - break; - } - + case 21: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 168)) { + _Internal::set_has_split_by_unicode_script(&_has_bits_); + split_by_unicode_script_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional bool split_by_whitespace = 22 [default = true]; - case 22: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(176u /* 176 & 0xFF */)) { - set_has_split_by_whitespace(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>( - input, &split_by_whitespace_))); - } else { - goto handle_unusual; - } - break; - } - + case 22: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 176)) { + _Internal::set_has_split_by_whitespace(&_has_bits_); + split_by_whitespace_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional bool split_by_number = 23 [default = true]; - case 23: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(184u /* 184 & 0xFF */)) { - set_has_split_by_number(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>( - input, &split_by_number_))); - } else { - goto handle_unusual; - } - break; - } - + case 23: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 184)) { + _Internal::set_has_split_by_number(&_has_bits_); + split_by_number_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional bool treat_whitespace_as_suffix = 24 [default = false]; - case 24: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(192u /* 192 & 0xFF */)) { - set_has_treat_whitespace_as_suffix(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>( - input, &treat_whitespace_as_suffix_))); - } else { - goto handle_unusual; - } - break; - } - + case 24: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 192)) { + _Internal::set_has_treat_whitespace_as_suffix(&_has_bits_); + treat_whitespace_as_suffix_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional bool split_digits = 25 [default = false]; - case 25: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(200u /* 200 & 0xFF */)) { - set_has_split_digits(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>( - input, &split_digits_))); - } else { - goto handle_unusual; - } - break; - } - + case 25: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 200)) { + _Internal::set_has_split_digits(&_has_bits_); + split_digits_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; + // optional bool allow_whitespace_only_pieces = 26 [default = false]; + case 26: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 208)) { + _Internal::set_has_allow_whitespace_only_pieces(&_has_bits_); + allow_whitespace_only_pieces_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // repeated string control_symbols = 30; - case 30: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(242u /* 242 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->add_control_symbols())); - } else { - goto handle_unusual; - } - break; - } - + case 30: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 242)) { + ptr -= 2; + do { + ptr += 2; + auto str = _internal_add_control_symbols(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + if (!ctx->DataAvailable(ptr)) break; + } while (::PROTOBUF_NAMESPACE_ID::internal::ExpectTag<242>(ptr)); + } else goto handle_unusual; + continue; // repeated string user_defined_symbols = 31; - case 31: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(250u /* 250 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->add_user_defined_symbols())); - } else { - goto handle_unusual; - } - break; - } - + case 31: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 250)) { + ptr -= 2; + do { + ptr += 2; + auto str = _internal_add_user_defined_symbols(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + if (!ctx->DataAvailable(ptr)) break; + } while (::PROTOBUF_NAMESPACE_ID::internal::ExpectTag<250>(ptr)); + } else goto handle_unusual; + continue; // optional bool vocabulary_output_piece_score = 32 [default = true]; - case 32: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(0u /* 256 & 0xFF */)) { - set_has_vocabulary_output_piece_score(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>( - input, &vocabulary_output_piece_score_))); - } else { - goto handle_unusual; - } - break; - } - + case 32: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 0)) { + _Internal::set_has_vocabulary_output_piece_score(&_has_bits_); + vocabulary_output_piece_score_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional bool hard_vocab_limit = 33 [default = true]; - case 33: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(8u /* 264 & 0xFF */)) { - set_has_hard_vocab_limit(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>( - input, &hard_vocab_limit_))); - } else { - goto handle_unusual; - } - break; - } - + case 33: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 8)) { + _Internal::set_has_hard_vocab_limit(&_has_bits_); + hard_vocab_limit_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional bool use_all_vocab = 34 [default = false]; - case 34: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(16u /* 272 & 0xFF */)) { - set_has_use_all_vocab(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>( - input, &use_all_vocab_))); - } else { - goto handle_unusual; - } - break; - } - + case 34: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 16)) { + _Internal::set_has_use_all_vocab(&_has_bits_); + use_all_vocab_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional bool byte_fallback = 35 [default = false]; - case 35: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(24u /* 280 & 0xFF */)) { - set_has_byte_fallback(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>( - input, &byte_fallback_))); - } else { - goto handle_unusual; - } - break; - } - + case 35: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 24)) { + _Internal::set_has_byte_fallback(&_has_bits_); + byte_fallback_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional string required_chars = 36; - case 36: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(34u /* 290 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->mutable_required_chars())); - } else { - goto handle_unusual; - } - break; - } - + case 36: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 34)) { + auto str = _internal_mutable_required_chars(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional int32 unk_id = 40 [default = 0]; - case 40: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(64u /* 320 & 0xFF */)) { - set_has_unk_id(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( - input, &unk_id_))); - } else { - goto handle_unusual; - } - break; - } - + case 40: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 64)) { + _Internal::set_has_unk_id(&_has_bits_); + unk_id_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional int32 bos_id = 41 [default = 1]; - case 41: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(72u /* 328 & 0xFF */)) { - set_has_bos_id(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( - input, &bos_id_))); - } else { - goto handle_unusual; - } - break; - } - + case 41: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 72)) { + _Internal::set_has_bos_id(&_has_bits_); + bos_id_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional int32 eos_id = 42 [default = 2]; - case 42: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(80u /* 336 & 0xFF */)) { - set_has_eos_id(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( - input, &eos_id_))); - } else { - goto handle_unusual; - } - break; - } - + case 42: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 80)) { + _Internal::set_has_eos_id(&_has_bits_); + eos_id_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional int32 pad_id = 43 [default = -1]; - case 43: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(88u /* 344 & 0xFF */)) { - set_has_pad_id(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - ::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>( - input, &pad_id_))); - } else { - goto handle_unusual; - } - break; - } - + case 43: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 88)) { + _Internal::set_has_pad_id(&_has_bits_); + pad_id_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional string unk_surface = 44 [default = " \342\201\207 "]; - case 44: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(98u /* 354 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->mutable_unk_surface())); - } else { - goto handle_unusual; - } - break; - } - + case 44: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 98)) { + auto str = _internal_mutable_unk_surface(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional string unk_piece = 45 [default = ""]; - case 45: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(106u /* 362 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->mutable_unk_piece())); - } else { - goto handle_unusual; - } - break; - } - + case 45: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 106)) { + auto str = _internal_mutable_unk_piece(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional string bos_piece = 46 [default = ""]; - case 46: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(114u /* 370 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->mutable_bos_piece())); - } else { - goto handle_unusual; - } - break; - } - + case 46: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 114)) { + auto str = _internal_mutable_bos_piece(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional string eos_piece = 47 [default = ""]; - case 47: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(122u /* 378 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->mutable_eos_piece())); - } else { - goto handle_unusual; - } - break; - } - + case 47: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 122)) { + auto str = _internal_mutable_eos_piece(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional string pad_piece = 48 [default = ""]; - case 48: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(130u /* 386 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->mutable_pad_piece())); - } else { - goto handle_unusual; - } - break; - } - + case 48: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 130)) { + auto str = _internal_mutable_pad_piece(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional bool train_extremely_large_corpus = 49 [default = false]; - case 49: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(136u /* 392 & 0xFF */)) { - set_has_train_extremely_large_corpus(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>( - input, &train_extremely_large_corpus_))); - } else { - goto handle_unusual; - } - break; - } - + case 49: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 136)) { + _Internal::set_has_train_extremely_large_corpus(&_has_bits_); + train_extremely_large_corpus_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; + // optional bool enable_differential_privacy = 50 [default = false]; + case 50: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 144)) { + _Internal::set_has_enable_differential_privacy(&_has_bits_); + enable_differential_privacy_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; + // optional float differential_privacy_noise_level = 51 [default = 0]; + case 51: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 157)) { + _Internal::set_has_differential_privacy_noise_level(&_has_bits_); + differential_privacy_noise_level_ = ::PROTOBUF_NAMESPACE_ID::internal::UnalignedLoad(ptr); + ptr += sizeof(float); + } else goto handle_unusual; + continue; + // optional uint64 differential_privacy_clipping_threshold = 52 [default = 0]; + case 52: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 160)) { + _Internal::set_has_differential_privacy_clipping_threshold(&_has_bits_); + differential_privacy_clipping_threshold_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; + // optional string pretokenization_delimiter = 53 [default = ""]; + case 53: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 170)) { + auto str = _internal_mutable_pretokenization_delimiter(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + } else goto handle_unusual; + continue; default: { handle_unusual: - if (tag == 0) { + if ((tag & 7) == 4 || tag == 0) { + ctx->SetLastTag(tag); goto success; } - if ((1600u <= tag)) { - DO_(_extensions_.ParseField(tag, input, - internal_default_instance(), - &unknown_fields_stream)); - continue; - } - DO_(::google::protobuf::internal::WireFormatLite::SkipField( - input, tag, &unknown_fields_stream)); - break; + if ((1600u <= tag)) { + ptr = _extensions_.ParseField(tag, ptr, + internal_default_instance(), &_internal_metadata_, ctx); + CHK_(ptr != nullptr); + continue; } - } - } + ptr = UnknownFieldParse(tag, + _internal_metadata_.mutable_unknown_fields(), + ptr, ctx); + CHK_(ptr != nullptr); + continue; + } + } // switch + } // while success: - // @@protoc_insertion_point(parse_success:sentencepiece.TrainerSpec) - return true; + return ptr; failure: - // @@protoc_insertion_point(parse_failure:sentencepiece.TrainerSpec) - return false; -#undef DO_ + ptr = nullptr; + goto success; +#undef CHK_ } -void TrainerSpec::SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const { - // @@protoc_insertion_point(serialize_start:sentencepiece.TrainerSpec) - ::google::protobuf::uint32 cached_has_bits = 0; +::PROTOBUF_NAMESPACE_ID::uint8* TrainerSpec::_InternalSerialize( + ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const { + // @@protoc_insertion_point(serialize_to_array_start:sentencepiece.TrainerSpec) + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; // repeated string input = 1; - for (int i = 0, n = this->input_size(); i < n; i++) { - ::google::protobuf::internal::WireFormatLite::WriteString( - 1, this->input(i), output); + for (int i = 0, n = this->_internal_input_size(); i < n; i++) { + const auto& s = this->_internal_input(i); + target = stream->WriteString(1, s, target); } cached_has_bits = _has_bits_[0]; // optional string model_prefix = 2; if (cached_has_bits & 0x00000001u) { - ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased( - 2, this->model_prefix(), output); + target = stream->WriteStringMaybeAliased( + 2, this->_internal_model_prefix(), target); } // optional .sentencepiece.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM]; - if (cached_has_bits & 0x00040000u) { - ::google::protobuf::internal::WireFormatLite::WriteEnum( - 3, this->model_type(), output); + if (cached_has_bits & 0x00800000u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteEnumToArray( + 3, this->_internal_model_type(), target); } // optional int32 vocab_size = 4 [default = 8000]; - if (cached_has_bits & 0x00080000u) { - ::google::protobuf::internal::WireFormatLite::WriteInt32(4, this->vocab_size(), output); + if (cached_has_bits & 0x01000000u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(4, this->_internal_vocab_size(), target); } // repeated string accept_language = 5; - for (int i = 0, n = this->accept_language_size(); i < n; i++) { - ::google::protobuf::internal::WireFormatLite::WriteString( - 5, this->accept_language(i), output); + for (int i = 0, n = this->_internal_accept_language_size(); i < n; i++) { + const auto& s = this->_internal_accept_language(i); + target = stream->WriteString(5, s, target); } // optional int32 self_test_sample_size = 6 [default = 0]; - if (cached_has_bits & 0x00000100u) { - ::google::protobuf::internal::WireFormatLite::WriteInt32(6, this->self_test_sample_size(), output); + if (cached_has_bits & 0x00000200u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(6, this->_internal_self_test_sample_size(), target); } // optional string input_format = 7; if (cached_has_bits & 0x00000002u) { - ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased( - 7, this->input_format(), output); + target = stream->WriteStringMaybeAliased( + 7, this->_internal_input_format(), target); } // optional float character_coverage = 10 [default = 0.9995]; - if (cached_has_bits & 0x00100000u) { - ::google::protobuf::internal::WireFormatLite::WriteFloat(10, this->character_coverage(), output); + if (cached_has_bits & 0x02000000u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteFloatToArray(10, this->_internal_character_coverage(), target); } - // optional int32 input_sentence_size = 11 [default = 0]; - if (cached_has_bits & 0x00000200u) { - ::google::protobuf::internal::WireFormatLite::WriteInt32(11, this->input_sentence_size(), output); + // optional uint64 input_sentence_size = 11 [default = 0]; + if (cached_has_bits & 0x00000800u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteUInt64ToArray(11, this->_internal_input_sentence_size(), target); } // optional int32 mining_sentence_size = 12 [deprecated = true]; if (cached_has_bits & 0x00000400u) { - ::google::protobuf::internal::WireFormatLite::WriteInt32(12, this->mining_sentence_size(), output); + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(12, this->_internal_mining_sentence_size(), target); } // optional int32 training_sentence_size = 13 [deprecated = true]; - if (cached_has_bits & 0x00000800u) { - ::google::protobuf::internal::WireFormatLite::WriteInt32(13, this->training_sentence_size(), output); + if (cached_has_bits & 0x00001000u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(13, this->_internal_training_sentence_size(), target); } // optional int32 seed_sentencepiece_size = 14 [default = 1000000]; - if (cached_has_bits & 0x00200000u) { - ::google::protobuf::internal::WireFormatLite::WriteInt32(14, this->seed_sentencepiece_size(), output); + if (cached_has_bits & 0x04000000u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(14, this->_internal_seed_sentencepiece_size(), target); } // optional float shrinking_factor = 15 [default = 0.75]; - if (cached_has_bits & 0x00400000u) { - ::google::protobuf::internal::WireFormatLite::WriteFloat(15, this->shrinking_factor(), output); + if (cached_has_bits & 0x08000000u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteFloatToArray(15, this->_internal_shrinking_factor(), target); } // optional int32 num_threads = 16 [default = 16]; - if (cached_has_bits & 0x00800000u) { - ::google::protobuf::internal::WireFormatLite::WriteInt32(16, this->num_threads(), output); + if (cached_has_bits & 0x10000000u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(16, this->_internal_num_threads(), target); } // optional int32 num_sub_iterations = 17 [default = 2]; - if (cached_has_bits & 0x01000000u) { - ::google::protobuf::internal::WireFormatLite::WriteInt32(17, this->num_sub_iterations(), output); + if (cached_has_bits & 0x20000000u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(17, this->_internal_num_sub_iterations(), target); } // optional int32 max_sentence_length = 18 [default = 4192]; - if (cached_has_bits & 0x02000000u) { - ::google::protobuf::internal::WireFormatLite::WriteInt32(18, this->max_sentence_length(), output); + if (cached_has_bits & 0x40000000u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(18, this->_internal_max_sentence_length(), target); } + cached_has_bits = _has_bits_[1]; // optional bool shuffle_input_sentence = 19 [default = true]; - if (cached_has_bits & 0x08000000u) { - ::google::protobuf::internal::WireFormatLite::WriteBool(19, this->shuffle_input_sentence(), output); + if (cached_has_bits & 0x00000001u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(19, this->_internal_shuffle_input_sentence(), target); } + cached_has_bits = _has_bits_[0]; // optional int32 max_sentencepiece_length = 20 [default = 16]; - if (cached_has_bits & 0x04000000u) { - ::google::protobuf::internal::WireFormatLite::WriteInt32(20, this->max_sentencepiece_length(), output); + if (cached_has_bits & 0x80000000u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(20, this->_internal_max_sentencepiece_length(), target); } + cached_has_bits = _has_bits_[1]; // optional bool split_by_unicode_script = 21 [default = true]; - if (cached_has_bits & 0x10000000u) { - ::google::protobuf::internal::WireFormatLite::WriteBool(21, this->split_by_unicode_script(), output); + if (cached_has_bits & 0x00000002u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(21, this->_internal_split_by_unicode_script(), target); } // optional bool split_by_whitespace = 22 [default = true]; - if (cached_has_bits & 0x40000000u) { - ::google::protobuf::internal::WireFormatLite::WriteBool(22, this->split_by_whitespace(), output); + if (cached_has_bits & 0x00000008u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(22, this->_internal_split_by_whitespace(), target); } // optional bool split_by_number = 23 [default = true]; - if (cached_has_bits & 0x20000000u) { - ::google::protobuf::internal::WireFormatLite::WriteBool(23, this->split_by_number(), output); + if (cached_has_bits & 0x00000004u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(23, this->_internal_split_by_number(), target); } + cached_has_bits = _has_bits_[0]; // optional bool treat_whitespace_as_suffix = 24 [default = false]; - if (cached_has_bits & 0x00001000u) { - ::google::protobuf::internal::WireFormatLite::WriteBool(24, this->treat_whitespace_as_suffix(), output); + if (cached_has_bits & 0x00004000u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(24, this->_internal_treat_whitespace_as_suffix(), target); } // optional bool split_digits = 25 [default = false]; - if (cached_has_bits & 0x00002000u) { - ::google::protobuf::internal::WireFormatLite::WriteBool(25, this->split_digits(), output); + if (cached_has_bits & 0x00010000u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(25, this->_internal_split_digits(), target); + } + + // optional bool allow_whitespace_only_pieces = 26 [default = false]; + if (cached_has_bits & 0x00008000u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(26, this->_internal_allow_whitespace_only_pieces(), target); } // repeated string control_symbols = 30; - for (int i = 0, n = this->control_symbols_size(); i < n; i++) { - ::google::protobuf::internal::WireFormatLite::WriteString( - 30, this->control_symbols(i), output); + for (int i = 0, n = this->_internal_control_symbols_size(); i < n; i++) { + const auto& s = this->_internal_control_symbols(i); + target = stream->WriteString(30, s, target); } // repeated string user_defined_symbols = 31; - for (int i = 0, n = this->user_defined_symbols_size(); i < n; i++) { - ::google::protobuf::internal::WireFormatLite::WriteString( - 31, this->user_defined_symbols(i), output); + for (int i = 0, n = this->_internal_user_defined_symbols_size(); i < n; i++) { + const auto& s = this->_internal_user_defined_symbols(i); + target = stream->WriteString(31, s, target); } + cached_has_bits = _has_bits_[1]; // optional bool vocabulary_output_piece_score = 32 [default = true]; - if (cached_has_bits & 0x80000000u) { - ::google::protobuf::internal::WireFormatLite::WriteBool(32, this->vocabulary_output_piece_score(), output); + if (cached_has_bits & 0x00000010u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(32, this->_internal_vocabulary_output_piece_score(), target); } - cached_has_bits = _has_bits_[1]; // optional bool hard_vocab_limit = 33 [default = true]; - if (cached_has_bits & 0x00000001u) { - ::google::protobuf::internal::WireFormatLite::WriteBool(33, this->hard_vocab_limit(), output); + if (cached_has_bits & 0x00000020u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(33, this->_internal_hard_vocab_limit(), target); } cached_has_bits = _has_bits_[0]; // optional bool use_all_vocab = 34 [default = false]; - if (cached_has_bits & 0x00008000u) { - ::google::protobuf::internal::WireFormatLite::WriteBool(34, this->use_all_vocab(), output); + if (cached_has_bits & 0x00040000u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(34, this->_internal_use_all_vocab(), target); } // optional bool byte_fallback = 35 [default = false]; - if (cached_has_bits & 0x00004000u) { - ::google::protobuf::internal::WireFormatLite::WriteBool(35, this->byte_fallback(), output); + if (cached_has_bits & 0x00020000u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(35, this->_internal_byte_fallback(), target); } // optional string required_chars = 36; if (cached_has_bits & 0x00000004u) { - ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased( - 36, this->required_chars(), output); + target = stream->WriteStringMaybeAliased( + 36, this->_internal_required_chars(), target); } // optional int32 unk_id = 40 [default = 0]; - if (cached_has_bits & 0x00010000u) { - ::google::protobuf::internal::WireFormatLite::WriteInt32(40, this->unk_id(), output); + if (cached_has_bits & 0x00100000u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(40, this->_internal_unk_id(), target); } cached_has_bits = _has_bits_[1]; // optional int32 bos_id = 41 [default = 1]; - if (cached_has_bits & 0x00000002u) { - ::google::protobuf::internal::WireFormatLite::WriteInt32(41, this->bos_id(), output); + if (cached_has_bits & 0x00000040u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(41, this->_internal_bos_id(), target); } // optional int32 eos_id = 42 [default = 2]; - if (cached_has_bits & 0x00000004u) { - ::google::protobuf::internal::WireFormatLite::WriteInt32(42, this->eos_id(), output); + if (cached_has_bits & 0x00000080u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(42, this->_internal_eos_id(), target); } // optional int32 pad_id = 43 [default = -1]; - if (cached_has_bits & 0x00000008u) { - ::google::protobuf::internal::WireFormatLite::WriteInt32(43, this->pad_id(), output); + if (cached_has_bits & 0x00000100u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteInt32ToArray(43, this->_internal_pad_id(), target); } cached_has_bits = _has_bits_[0]; // optional string unk_surface = 44 [default = " \342\201\207 "]; if (cached_has_bits & 0x00000008u) { - ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased( - 44, this->unk_surface(), output); + target = stream->WriteStringMaybeAliased( + 44, this->_internal_unk_surface(), target); } // optional string unk_piece = 45 [default = ""]; if (cached_has_bits & 0x00000010u) { - ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased( - 45, this->unk_piece(), output); + target = stream->WriteStringMaybeAliased( + 45, this->_internal_unk_piece(), target); } // optional string bos_piece = 46 [default = ""]; if (cached_has_bits & 0x00000020u) { - ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased( - 46, this->bos_piece(), output); + target = stream->WriteStringMaybeAliased( + 46, this->_internal_bos_piece(), target); } // optional string eos_piece = 47 [default = ""]; if (cached_has_bits & 0x00000040u) { - ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased( - 47, this->eos_piece(), output); + target = stream->WriteStringMaybeAliased( + 47, this->_internal_eos_piece(), target); } // optional string pad_piece = 48 [default = ""]; if (cached_has_bits & 0x00000080u) { - ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased( - 48, this->pad_piece(), output); + target = stream->WriteStringMaybeAliased( + 48, this->_internal_pad_piece(), target); } // optional bool train_extremely_large_corpus = 49 [default = false]; - if (cached_has_bits & 0x00020000u) { - ::google::protobuf::internal::WireFormatLite::WriteBool(49, this->train_extremely_large_corpus(), output); + if (cached_has_bits & 0x00080000u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(49, this->_internal_train_extremely_large_corpus(), target); + } + + // optional bool enable_differential_privacy = 50 [default = false]; + if (cached_has_bits & 0x00002000u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(50, this->_internal_enable_differential_privacy(), target); + } + + // optional float differential_privacy_noise_level = 51 [default = 0]; + if (cached_has_bits & 0x00200000u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteFloatToArray(51, this->_internal_differential_privacy_noise_level(), target); + } + + // optional uint64 differential_privacy_clipping_threshold = 52 [default = 0]; + if (cached_has_bits & 0x00400000u) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteUInt64ToArray(52, this->_internal_differential_privacy_clipping_threshold(), target); + } + + // optional string pretokenization_delimiter = 53 [default = ""]; + if (cached_has_bits & 0x00000100u) { + target = stream->WriteStringMaybeAliased( + 53, this->_internal_pretokenization_delimiter(), target); } // Extension range [200, 536870912) - _extensions_.SerializeWithCachedSizes( - 200, 536870912, output); + target = _extensions_._InternalSerialize( + 200, 536870912, target, stream); - output->WriteRaw(_internal_metadata_.unknown_fields().data(), - static_cast(_internal_metadata_.unknown_fields().size())); - // @@protoc_insertion_point(serialize_end:sentencepiece.TrainerSpec) + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + target = stream->WriteRaw(_internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).data(), + static_cast(_internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).size()), target); + } + // @@protoc_insertion_point(serialize_to_array_end:sentencepiece.TrainerSpec) + return target; } size_t TrainerSpec::ByteSizeLong() const { @@ -1286,291 +1349,328 @@ size_t TrainerSpec::ByteSizeLong() const { total_size += _extensions_.ByteSize(); - total_size += _internal_metadata_.unknown_fields().size(); + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; + // Prevent compiler warnings about cached_has_bits being unused + (void) cached_has_bits; // repeated string input = 1; total_size += 1 * - ::google::protobuf::internal::FromIntSize(this->input_size()); - for (int i = 0, n = this->input_size(); i < n; i++) { - total_size += ::google::protobuf::internal::WireFormatLite::StringSize( - this->input(i)); + ::PROTOBUF_NAMESPACE_ID::internal::FromIntSize(input_.size()); + for (int i = 0, n = input_.size(); i < n; i++) { + total_size += ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( + input_.Get(i)); } // repeated string accept_language = 5; total_size += 1 * - ::google::protobuf::internal::FromIntSize(this->accept_language_size()); - for (int i = 0, n = this->accept_language_size(); i < n; i++) { - total_size += ::google::protobuf::internal::WireFormatLite::StringSize( - this->accept_language(i)); + ::PROTOBUF_NAMESPACE_ID::internal::FromIntSize(accept_language_.size()); + for (int i = 0, n = accept_language_.size(); i < n; i++) { + total_size += ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( + accept_language_.Get(i)); } // repeated string control_symbols = 30; total_size += 2 * - ::google::protobuf::internal::FromIntSize(this->control_symbols_size()); - for (int i = 0, n = this->control_symbols_size(); i < n; i++) { - total_size += ::google::protobuf::internal::WireFormatLite::StringSize( - this->control_symbols(i)); + ::PROTOBUF_NAMESPACE_ID::internal::FromIntSize(control_symbols_.size()); + for (int i = 0, n = control_symbols_.size(); i < n; i++) { + total_size += ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( + control_symbols_.Get(i)); } // repeated string user_defined_symbols = 31; total_size += 2 * - ::google::protobuf::internal::FromIntSize(this->user_defined_symbols_size()); - for (int i = 0, n = this->user_defined_symbols_size(); i < n; i++) { - total_size += ::google::protobuf::internal::WireFormatLite::StringSize( - this->user_defined_symbols(i)); + ::PROTOBUF_NAMESPACE_ID::internal::FromIntSize(user_defined_symbols_.size()); + for (int i = 0, n = user_defined_symbols_.size(); i < n; i++) { + total_size += ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( + user_defined_symbols_.Get(i)); } - if (_has_bits_[0 / 32] & 255u) { + cached_has_bits = _has_bits_[0]; + if (cached_has_bits & 0x000000ffu) { // optional string model_prefix = 2; - if (has_model_prefix()) { + if (cached_has_bits & 0x00000001u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::StringSize( - this->model_prefix()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( + this->_internal_model_prefix()); } // optional string input_format = 7; - if (has_input_format()) { + if (cached_has_bits & 0x00000002u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::StringSize( - this->input_format()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( + this->_internal_input_format()); } // optional string required_chars = 36; - if (has_required_chars()) { + if (cached_has_bits & 0x00000004u) { total_size += 2 + - ::google::protobuf::internal::WireFormatLite::StringSize( - this->required_chars()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( + this->_internal_required_chars()); } // optional string unk_surface = 44 [default = " \342\201\207 "]; - if (has_unk_surface()) { + if (cached_has_bits & 0x00000008u) { total_size += 2 + - ::google::protobuf::internal::WireFormatLite::StringSize( - this->unk_surface()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( + this->_internal_unk_surface()); } // optional string unk_piece = 45 [default = ""]; - if (has_unk_piece()) { + if (cached_has_bits & 0x00000010u) { total_size += 2 + - ::google::protobuf::internal::WireFormatLite::StringSize( - this->unk_piece()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( + this->_internal_unk_piece()); } // optional string bos_piece = 46 [default = ""]; - if (has_bos_piece()) { + if (cached_has_bits & 0x00000020u) { total_size += 2 + - ::google::protobuf::internal::WireFormatLite::StringSize( - this->bos_piece()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( + this->_internal_bos_piece()); } // optional string eos_piece = 47 [default = ""]; - if (has_eos_piece()) { + if (cached_has_bits & 0x00000040u) { total_size += 2 + - ::google::protobuf::internal::WireFormatLite::StringSize( - this->eos_piece()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( + this->_internal_eos_piece()); } // optional string pad_piece = 48 [default = ""]; - if (has_pad_piece()) { + if (cached_has_bits & 0x00000080u) { total_size += 2 + - ::google::protobuf::internal::WireFormatLite::StringSize( - this->pad_piece()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( + this->_internal_pad_piece()); } } - if (_has_bits_[8 / 32] & 65280u) { + if (cached_has_bits & 0x0000ff00u) { + // optional string pretokenization_delimiter = 53 [default = ""]; + if (cached_has_bits & 0x00000100u) { + total_size += 2 + + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( + this->_internal_pretokenization_delimiter()); + } + // optional int32 self_test_sample_size = 6 [default = 0]; - if (has_self_test_sample_size()) { + if (cached_has_bits & 0x00000200u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::Int32Size( - this->self_test_sample_size()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( + this->_internal_self_test_sample_size()); } - // optional int32 input_sentence_size = 11 [default = 0]; - if (has_input_sentence_size()) { + // optional int32 mining_sentence_size = 12 [deprecated = true]; + if (cached_has_bits & 0x00000400u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::Int32Size( - this->input_sentence_size()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( + this->_internal_mining_sentence_size()); } - // optional int32 mining_sentence_size = 12 [deprecated = true]; - if (has_mining_sentence_size()) { + // optional uint64 input_sentence_size = 11 [default = 0]; + if (cached_has_bits & 0x00000800u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::Int32Size( - this->mining_sentence_size()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::UInt64Size( + this->_internal_input_sentence_size()); } // optional int32 training_sentence_size = 13 [deprecated = true]; - if (has_training_sentence_size()) { + if (cached_has_bits & 0x00001000u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::Int32Size( - this->training_sentence_size()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( + this->_internal_training_sentence_size()); + } + + // optional bool enable_differential_privacy = 50 [default = false]; + if (cached_has_bits & 0x00002000u) { + total_size += 2 + 1; } // optional bool treat_whitespace_as_suffix = 24 [default = false]; - if (has_treat_whitespace_as_suffix()) { + if (cached_has_bits & 0x00004000u) { total_size += 2 + 1; } + // optional bool allow_whitespace_only_pieces = 26 [default = false]; + if (cached_has_bits & 0x00008000u) { + total_size += 2 + 1; + } + + } + if (cached_has_bits & 0x00ff0000u) { // optional bool split_digits = 25 [default = false]; - if (has_split_digits()) { + if (cached_has_bits & 0x00010000u) { total_size += 2 + 1; } // optional bool byte_fallback = 35 [default = false]; - if (has_byte_fallback()) { + if (cached_has_bits & 0x00020000u) { total_size += 2 + 1; } // optional bool use_all_vocab = 34 [default = false]; - if (has_use_all_vocab()) { + if (cached_has_bits & 0x00040000u) { + total_size += 2 + 1; + } + + // optional bool train_extremely_large_corpus = 49 [default = false]; + if (cached_has_bits & 0x00080000u) { total_size += 2 + 1; } - } - if (_has_bits_[16 / 32] & 16711680u) { // optional int32 unk_id = 40 [default = 0]; - if (has_unk_id()) { + if (cached_has_bits & 0x00100000u) { total_size += 2 + - ::google::protobuf::internal::WireFormatLite::Int32Size( - this->unk_id()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( + this->_internal_unk_id()); } - // optional bool train_extremely_large_corpus = 49 [default = false]; - if (has_train_extremely_large_corpus()) { - total_size += 2 + 1; + // optional float differential_privacy_noise_level = 51 [default = 0]; + if (cached_has_bits & 0x00200000u) { + total_size += 2 + 4; + } + + // optional uint64 differential_privacy_clipping_threshold = 52 [default = 0]; + if (cached_has_bits & 0x00400000u) { + total_size += 2 + + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::UInt64Size( + this->_internal_differential_privacy_clipping_threshold()); } // optional .sentencepiece.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM]; - if (has_model_type()) { + if (cached_has_bits & 0x00800000u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::EnumSize(this->model_type()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::EnumSize(this->_internal_model_type()); } + } + if (cached_has_bits & 0xff000000u) { // optional int32 vocab_size = 4 [default = 8000]; - if (has_vocab_size()) { + if (cached_has_bits & 0x01000000u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::Int32Size( - this->vocab_size()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( + this->_internal_vocab_size()); } // optional float character_coverage = 10 [default = 0.9995]; - if (has_character_coverage()) { + if (cached_has_bits & 0x02000000u) { total_size += 1 + 4; } // optional int32 seed_sentencepiece_size = 14 [default = 1000000]; - if (has_seed_sentencepiece_size()) { + if (cached_has_bits & 0x04000000u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::Int32Size( - this->seed_sentencepiece_size()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( + this->_internal_seed_sentencepiece_size()); } // optional float shrinking_factor = 15 [default = 0.75]; - if (has_shrinking_factor()) { + if (cached_has_bits & 0x08000000u) { total_size += 1 + 4; } // optional int32 num_threads = 16 [default = 16]; - if (has_num_threads()) { + if (cached_has_bits & 0x10000000u) { total_size += 2 + - ::google::protobuf::internal::WireFormatLite::Int32Size( - this->num_threads()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( + this->_internal_num_threads()); } - } - if (_has_bits_[24 / 32] & 4278190080u) { // optional int32 num_sub_iterations = 17 [default = 2]; - if (has_num_sub_iterations()) { + if (cached_has_bits & 0x20000000u) { total_size += 2 + - ::google::protobuf::internal::WireFormatLite::Int32Size( - this->num_sub_iterations()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( + this->_internal_num_sub_iterations()); } // optional int32 max_sentence_length = 18 [default = 4192]; - if (has_max_sentence_length()) { + if (cached_has_bits & 0x40000000u) { total_size += 2 + - ::google::protobuf::internal::WireFormatLite::Int32Size( - this->max_sentence_length()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( + this->_internal_max_sentence_length()); } // optional int32 max_sentencepiece_length = 20 [default = 16]; - if (has_max_sentencepiece_length()) { + if (cached_has_bits & 0x80000000u) { total_size += 2 + - ::google::protobuf::internal::WireFormatLite::Int32Size( - this->max_sentencepiece_length()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( + this->_internal_max_sentencepiece_length()); } + } + cached_has_bits = _has_bits_[1]; + if (cached_has_bits & 0x000000ffu) { // optional bool shuffle_input_sentence = 19 [default = true]; - if (has_shuffle_input_sentence()) { + if (cached_has_bits & 0x00000001u) { total_size += 2 + 1; } // optional bool split_by_unicode_script = 21 [default = true]; - if (has_split_by_unicode_script()) { + if (cached_has_bits & 0x00000002u) { total_size += 2 + 1; } // optional bool split_by_number = 23 [default = true]; - if (has_split_by_number()) { + if (cached_has_bits & 0x00000004u) { total_size += 2 + 1; } // optional bool split_by_whitespace = 22 [default = true]; - if (has_split_by_whitespace()) { + if (cached_has_bits & 0x00000008u) { total_size += 2 + 1; } // optional bool vocabulary_output_piece_score = 32 [default = true]; - if (has_vocabulary_output_piece_score()) { + if (cached_has_bits & 0x00000010u) { total_size += 2 + 1; } - } - if (_has_bits_[32 / 32] & 15u) { // optional bool hard_vocab_limit = 33 [default = true]; - if (has_hard_vocab_limit()) { + if (cached_has_bits & 0x00000020u) { total_size += 2 + 1; } // optional int32 bos_id = 41 [default = 1]; - if (has_bos_id()) { + if (cached_has_bits & 0x00000040u) { total_size += 2 + - ::google::protobuf::internal::WireFormatLite::Int32Size( - this->bos_id()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( + this->_internal_bos_id()); } // optional int32 eos_id = 42 [default = 2]; - if (has_eos_id()) { + if (cached_has_bits & 0x00000080u) { total_size += 2 + - ::google::protobuf::internal::WireFormatLite::Int32Size( - this->eos_id()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( + this->_internal_eos_id()); } - // optional int32 pad_id = 43 [default = -1]; - if (has_pad_id()) { - total_size += 2 + - ::google::protobuf::internal::WireFormatLite::Int32Size( - this->pad_id()); - } + } + // optional int32 pad_id = 43 [default = -1]; + if (cached_has_bits & 0x00000100u) { + total_size += 2 + + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::Int32Size( + this->_internal_pad_id()); + } + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + total_size += _internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).size(); } - int cached_size = ::google::protobuf::internal::ToCachedSize(total_size); + int cached_size = ::PROTOBUF_NAMESPACE_ID::internal::ToCachedSize(total_size); SetCachedSize(cached_size); return total_size; } void TrainerSpec::CheckTypeAndMergeFrom( - const ::google::protobuf::MessageLite& from) { - MergeFrom(*::google::protobuf::down_cast(&from)); + const ::PROTOBUF_NAMESPACE_ID::MessageLite& from) { + MergeFrom(*::PROTOBUF_NAMESPACE_ID::internal::DownCast( + &from)); } void TrainerSpec::MergeFrom(const TrainerSpec& from) { // @@protoc_insertion_point(class_specific_merge_from_start:sentencepiece.TrainerSpec) GOOGLE_DCHECK_NE(&from, this); _extensions_.MergeFrom(from._extensions_); - _internal_metadata_.MergeFrom(from._internal_metadata_); - ::google::protobuf::uint32 cached_has_bits = 0; + _internal_metadata_.MergeFrom(from._internal_metadata_); + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; input_.MergeFrom(from.input_); @@ -1578,137 +1678,144 @@ void TrainerSpec::MergeFrom(const TrainerSpec& from) { control_symbols_.MergeFrom(from.control_symbols_); user_defined_symbols_.MergeFrom(from.user_defined_symbols_); cached_has_bits = from._has_bits_[0]; - if (cached_has_bits & 255u) { + if (cached_has_bits & 0x000000ffu) { if (cached_has_bits & 0x00000001u) { - set_has_model_prefix(); - model_prefix_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.model_prefix_); + _internal_set_model_prefix(from._internal_model_prefix()); } if (cached_has_bits & 0x00000002u) { - set_has_input_format(); - input_format_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.input_format_); + _internal_set_input_format(from._internal_input_format()); } if (cached_has_bits & 0x00000004u) { - set_has_required_chars(); - required_chars_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.required_chars_); + _internal_set_required_chars(from._internal_required_chars()); } if (cached_has_bits & 0x00000008u) { - set_has_unk_surface(); - unk_surface_.AssignWithDefault(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_.get(), from.unk_surface_); + _internal_set_unk_surface(from._internal_unk_surface()); } if (cached_has_bits & 0x00000010u) { - set_has_unk_piece(); - unk_piece_.AssignWithDefault(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_.get(), from.unk_piece_); + _internal_set_unk_piece(from._internal_unk_piece()); } if (cached_has_bits & 0x00000020u) { - set_has_bos_piece(); - bos_piece_.AssignWithDefault(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_.get(), from.bos_piece_); + _internal_set_bos_piece(from._internal_bos_piece()); } if (cached_has_bits & 0x00000040u) { - set_has_eos_piece(); - eos_piece_.AssignWithDefault(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_.get(), from.eos_piece_); + _internal_set_eos_piece(from._internal_eos_piece()); } if (cached_has_bits & 0x00000080u) { - set_has_pad_piece(); - pad_piece_.AssignWithDefault(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_.get(), from.pad_piece_); + _internal_set_pad_piece(from._internal_pad_piece()); } } - if (cached_has_bits & 65280u) { + if (cached_has_bits & 0x0000ff00u) { if (cached_has_bits & 0x00000100u) { - self_test_sample_size_ = from.self_test_sample_size_; + _internal_set_pretokenization_delimiter(from._internal_pretokenization_delimiter()); } if (cached_has_bits & 0x00000200u) { - input_sentence_size_ = from.input_sentence_size_; + self_test_sample_size_ = from.self_test_sample_size_; } if (cached_has_bits & 0x00000400u) { mining_sentence_size_ = from.mining_sentence_size_; } if (cached_has_bits & 0x00000800u) { - training_sentence_size_ = from.training_sentence_size_; + input_sentence_size_ = from.input_sentence_size_; } if (cached_has_bits & 0x00001000u) { - treat_whitespace_as_suffix_ = from.treat_whitespace_as_suffix_; + training_sentence_size_ = from.training_sentence_size_; } if (cached_has_bits & 0x00002000u) { - split_digits_ = from.split_digits_; + enable_differential_privacy_ = from.enable_differential_privacy_; } if (cached_has_bits & 0x00004000u) { - byte_fallback_ = from.byte_fallback_; + treat_whitespace_as_suffix_ = from.treat_whitespace_as_suffix_; } if (cached_has_bits & 0x00008000u) { - use_all_vocab_ = from.use_all_vocab_; + allow_whitespace_only_pieces_ = from.allow_whitespace_only_pieces_; } _has_bits_[0] |= cached_has_bits; } - if (cached_has_bits & 16711680u) { + if (cached_has_bits & 0x00ff0000u) { if (cached_has_bits & 0x00010000u) { - unk_id_ = from.unk_id_; + split_digits_ = from.split_digits_; } if (cached_has_bits & 0x00020000u) { - train_extremely_large_corpus_ = from.train_extremely_large_corpus_; + byte_fallback_ = from.byte_fallback_; } if (cached_has_bits & 0x00040000u) { - model_type_ = from.model_type_; + use_all_vocab_ = from.use_all_vocab_; } if (cached_has_bits & 0x00080000u) { - vocab_size_ = from.vocab_size_; + train_extremely_large_corpus_ = from.train_extremely_large_corpus_; } if (cached_has_bits & 0x00100000u) { - character_coverage_ = from.character_coverage_; + unk_id_ = from.unk_id_; } if (cached_has_bits & 0x00200000u) { - seed_sentencepiece_size_ = from.seed_sentencepiece_size_; + differential_privacy_noise_level_ = from.differential_privacy_noise_level_; } if (cached_has_bits & 0x00400000u) { - shrinking_factor_ = from.shrinking_factor_; + differential_privacy_clipping_threshold_ = from.differential_privacy_clipping_threshold_; } if (cached_has_bits & 0x00800000u) { - num_threads_ = from.num_threads_; + model_type_ = from.model_type_; } _has_bits_[0] |= cached_has_bits; } - if (cached_has_bits & 4278190080u) { + if (cached_has_bits & 0xff000000u) { if (cached_has_bits & 0x01000000u) { - num_sub_iterations_ = from.num_sub_iterations_; + vocab_size_ = from.vocab_size_; } if (cached_has_bits & 0x02000000u) { - max_sentence_length_ = from.max_sentence_length_; + character_coverage_ = from.character_coverage_; } if (cached_has_bits & 0x04000000u) { - max_sentencepiece_length_ = from.max_sentencepiece_length_; + seed_sentencepiece_size_ = from.seed_sentencepiece_size_; } if (cached_has_bits & 0x08000000u) { - shuffle_input_sentence_ = from.shuffle_input_sentence_; + shrinking_factor_ = from.shrinking_factor_; } if (cached_has_bits & 0x10000000u) { - split_by_unicode_script_ = from.split_by_unicode_script_; + num_threads_ = from.num_threads_; } if (cached_has_bits & 0x20000000u) { - split_by_number_ = from.split_by_number_; + num_sub_iterations_ = from.num_sub_iterations_; } if (cached_has_bits & 0x40000000u) { - split_by_whitespace_ = from.split_by_whitespace_; + max_sentence_length_ = from.max_sentence_length_; } if (cached_has_bits & 0x80000000u) { - vocabulary_output_piece_score_ = from.vocabulary_output_piece_score_; + max_sentencepiece_length_ = from.max_sentencepiece_length_; } _has_bits_[0] |= cached_has_bits; } cached_has_bits = from._has_bits_[1]; - if (cached_has_bits & 15u) { + if (cached_has_bits & 0x000000ffu) { if (cached_has_bits & 0x00000001u) { - hard_vocab_limit_ = from.hard_vocab_limit_; + shuffle_input_sentence_ = from.shuffle_input_sentence_; } if (cached_has_bits & 0x00000002u) { - bos_id_ = from.bos_id_; + split_by_unicode_script_ = from.split_by_unicode_script_; } if (cached_has_bits & 0x00000004u) { - eos_id_ = from.eos_id_; + split_by_number_ = from.split_by_number_; } if (cached_has_bits & 0x00000008u) { - pad_id_ = from.pad_id_; + split_by_whitespace_ = from.split_by_whitespace_; + } + if (cached_has_bits & 0x00000010u) { + vocabulary_output_piece_score_ = from.vocabulary_output_piece_score_; + } + if (cached_has_bits & 0x00000020u) { + hard_vocab_limit_ = from.hard_vocab_limit_; + } + if (cached_has_bits & 0x00000040u) { + bos_id_ = from.bos_id_; + } + if (cached_has_bits & 0x00000080u) { + eos_id_ = from.eos_id_; } _has_bits_[1] |= cached_has_bits; } + if (cached_has_bits & 0x00000100u) { + _internal_set_pad_id(from._internal_pad_id()); + } } void TrainerSpec::CopyFrom(const TrainerSpec& from) { @@ -1726,42 +1833,31 @@ bool TrainerSpec::IsInitialized() const { return true; } -void TrainerSpec::Swap(TrainerSpec* other) { - if (other == this) return; - InternalSwap(other); -} void TrainerSpec::InternalSwap(TrainerSpec* other) { using std::swap; - input_.InternalSwap(CastToBase(&other->input_)); - accept_language_.InternalSwap(CastToBase(&other->accept_language_)); - control_symbols_.InternalSwap(CastToBase(&other->control_symbols_)); - user_defined_symbols_.InternalSwap(CastToBase(&other->user_defined_symbols_)); - model_prefix_.Swap(&other->model_prefix_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(), - GetArenaNoVirtual()); - input_format_.Swap(&other->input_format_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(), - GetArenaNoVirtual()); - required_chars_.Swap(&other->required_chars_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(), - GetArenaNoVirtual()); - unk_surface_.Swap(&other->unk_surface_, &::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_.get(), - GetArenaNoVirtual()); - unk_piece_.Swap(&other->unk_piece_, &::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_.get(), - GetArenaNoVirtual()); - bos_piece_.Swap(&other->bos_piece_, &::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_.get(), - GetArenaNoVirtual()); - eos_piece_.Swap(&other->eos_piece_, &::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_.get(), - GetArenaNoVirtual()); - pad_piece_.Swap(&other->pad_piece_, &::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_.get(), - GetArenaNoVirtual()); - swap(self_test_sample_size_, other->self_test_sample_size_); - swap(input_sentence_size_, other->input_sentence_size_); - swap(mining_sentence_size_, other->mining_sentence_size_); - swap(training_sentence_size_, other->training_sentence_size_); - swap(treat_whitespace_as_suffix_, other->treat_whitespace_as_suffix_); - swap(split_digits_, other->split_digits_); - swap(byte_fallback_, other->byte_fallback_); - swap(use_all_vocab_, other->use_all_vocab_); - swap(unk_id_, other->unk_id_); - swap(train_extremely_large_corpus_, other->train_extremely_large_corpus_); + _extensions_.Swap(&other->_extensions_); + _internal_metadata_.Swap(&other->_internal_metadata_); + swap(_has_bits_[0], other->_has_bits_[0]); + swap(_has_bits_[1], other->_has_bits_[1]); + input_.InternalSwap(&other->input_); + accept_language_.InternalSwap(&other->accept_language_); + control_symbols_.InternalSwap(&other->control_symbols_); + user_defined_symbols_.InternalSwap(&other->user_defined_symbols_); + model_prefix_.Swap(&other->model_prefix_, &::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); + input_format_.Swap(&other->input_format_, &::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); + required_chars_.Swap(&other->required_chars_, &::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); + unk_surface_.Swap(&other->unk_surface_, nullptr, GetArena()); + unk_piece_.Swap(&other->unk_piece_, nullptr, GetArena()); + bos_piece_.Swap(&other->bos_piece_, nullptr, GetArena()); + eos_piece_.Swap(&other->eos_piece_, nullptr, GetArena()); + pad_piece_.Swap(&other->pad_piece_, nullptr, GetArena()); + pretokenization_delimiter_.Swap(&other->pretokenization_delimiter_, &::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); + ::PROTOBUF_NAMESPACE_ID::internal::memswap< + PROTOBUF_FIELD_OFFSET(TrainerSpec, differential_privacy_clipping_threshold_) + + sizeof(TrainerSpec::differential_privacy_clipping_threshold_) + - PROTOBUF_FIELD_OFFSET(TrainerSpec, self_test_sample_size_)>( + reinterpret_cast(&self_test_sample_size_), + reinterpret_cast(&other->self_test_sample_size_)); swap(model_type_, other->model_type_); swap(vocab_size_, other->vocab_size_); swap(character_coverage_, other->character_coverage_); @@ -1780,56 +1876,70 @@ void TrainerSpec::InternalSwap(TrainerSpec* other) { swap(bos_id_, other->bos_id_); swap(eos_id_, other->eos_id_); swap(pad_id_, other->pad_id_); - swap(_has_bits_[0], other->_has_bits_[0]); - swap(_has_bits_[1], other->_has_bits_[1]); - _internal_metadata_.Swap(&other->_internal_metadata_); - _extensions_.Swap(&other->_extensions_); } -::std::string TrainerSpec::GetTypeName() const { +std::string TrainerSpec::GetTypeName() const { return "sentencepiece.TrainerSpec"; } // =================================================================== -void NormalizerSpec::InitAsDefaultInstance() { -} -#if !defined(_MSC_VER) || _MSC_VER >= 1900 -const int NormalizerSpec::kNameFieldNumber; -const int NormalizerSpec::kPrecompiledCharsmapFieldNumber; -const int NormalizerSpec::kAddDummyPrefixFieldNumber; -const int NormalizerSpec::kRemoveExtraWhitespacesFieldNumber; -const int NormalizerSpec::kEscapeWhitespacesFieldNumber; -const int NormalizerSpec::kNormalizationRuleTsvFieldNumber; -const int NormalizerSpec::kEncodeCaseFieldNumber; -const int NormalizerSpec::kDecodeCaseFieldNumber; -#endif // !defined(_MSC_VER) || _MSC_VER >= 1900 - -NormalizerSpec::NormalizerSpec() - : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) { - ::google::protobuf::internal::InitSCC( - &protobuf_sentencepiece_5fmodel_2eproto::scc_info_NormalizerSpec.base); +class NormalizerSpec::_Internal { + public: + using HasBits = decltype(std::declval()._has_bits_); + static void set_has_name(HasBits* has_bits) { + (*has_bits)[0] |= 1u; + } + static void set_has_precompiled_charsmap(HasBits* has_bits) { + (*has_bits)[0] |= 2u; + } + static void set_has_add_dummy_prefix(HasBits* has_bits) { + (*has_bits)[0] |= 32u; + } + static void set_has_remove_extra_whitespaces(HasBits* has_bits) { + (*has_bits)[0] |= 64u; + } + static void set_has_escape_whitespaces(HasBits* has_bits) { + (*has_bits)[0] |= 128u; + } + static void set_has_normalization_rule_tsv(HasBits* has_bits) { + (*has_bits)[0] |= 4u; + } + static void set_has_encode_case(HasBits* has_bits) { + (*has_bits)[0] |= 8u; + } + static void set_has_decode_case(HasBits* has_bits) { + (*has_bits)[0] |= 16u; + } +}; + +NormalizerSpec::NormalizerSpec(::PROTOBUF_NAMESPACE_ID::Arena* arena) + : ::PROTOBUF_NAMESPACE_ID::MessageLite(arena), + _extensions_(arena) { SharedCtor(); - // @@protoc_insertion_point(constructor:sentencepiece.NormalizerSpec) + RegisterArenaDtor(arena); + // @@protoc_insertion_point(arena_constructor:sentencepiece.NormalizerSpec) } NormalizerSpec::NormalizerSpec(const NormalizerSpec& from) - : ::google::protobuf::MessageLite(), - _internal_metadata_(NULL), + : ::PROTOBUF_NAMESPACE_ID::MessageLite(), _has_bits_(from._has_bits_) { - _internal_metadata_.MergeFrom(from._internal_metadata_); + _internal_metadata_.MergeFrom(from._internal_metadata_); _extensions_.MergeFrom(from._extensions_); - name_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - if (from.has_name()) { - name_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.name_); + name_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + if (from._internal_has_name()) { + name_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, from._internal_name(), + GetArena()); } - precompiled_charsmap_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - if (from.has_precompiled_charsmap()) { - precompiled_charsmap_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.precompiled_charsmap_); + precompiled_charsmap_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + if (from._internal_has_precompiled_charsmap()) { + precompiled_charsmap_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, from._internal_precompiled_charsmap(), + GetArena()); } - normalization_rule_tsv_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - if (from.has_normalization_rule_tsv()) { - normalization_rule_tsv_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.normalization_rule_tsv_); + normalization_rule_tsv_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + if (from._internal_has_normalization_rule_tsv()) { + normalization_rule_tsv_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, from._internal_normalization_rule_tsv(), + GetArena()); } ::memcpy(&encode_case_, &from.encode_case_, static_cast(reinterpret_cast(&escape_whitespaces_) - @@ -1838,11 +1948,13 @@ NormalizerSpec::NormalizerSpec(const NormalizerSpec& from) } void NormalizerSpec::SharedCtor() { - name_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - precompiled_charsmap_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - normalization_rule_tsv_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - ::memset(&encode_case_, 0, static_cast( - reinterpret_cast(&decode_case_) - + ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&scc_info_NormalizerSpec_sentencepiece_5fmodel_2eproto.base); + name_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + precompiled_charsmap_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + normalization_rule_tsv_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + ::memset(reinterpret_cast(this) + static_cast( + reinterpret_cast(&encode_case_) - reinterpret_cast(this)), + 0, static_cast(reinterpret_cast(&decode_case_) - reinterpret_cast(&encode_case_)) + sizeof(decode_case_)); add_dummy_prefix_ = true; remove_extra_whitespaces_ = true; @@ -1852,259 +1964,228 @@ void NormalizerSpec::SharedCtor() { NormalizerSpec::~NormalizerSpec() { // @@protoc_insertion_point(destructor:sentencepiece.NormalizerSpec) SharedDtor(); + _internal_metadata_.Delete(); } void NormalizerSpec::SharedDtor() { - name_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - precompiled_charsmap_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - normalization_rule_tsv_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); + GOOGLE_DCHECK(GetArena() == nullptr); + name_.DestroyNoArena(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + precompiled_charsmap_.DestroyNoArena(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + normalization_rule_tsv_.DestroyNoArena(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); } +void NormalizerSpec::ArenaDtor(void* object) { + NormalizerSpec* _this = reinterpret_cast< NormalizerSpec* >(object); + (void)_this; +} +void NormalizerSpec::RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena*) { +} void NormalizerSpec::SetCachedSize(int size) const { _cached_size_.Set(size); } const NormalizerSpec& NormalizerSpec::default_instance() { - ::google::protobuf::internal::InitSCC(&protobuf_sentencepiece_5fmodel_2eproto::scc_info_NormalizerSpec.base); + ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&::scc_info_NormalizerSpec_sentencepiece_5fmodel_2eproto.base); return *internal_default_instance(); } void NormalizerSpec::Clear() { // @@protoc_insertion_point(message_clear_start:sentencepiece.NormalizerSpec) - ::google::protobuf::uint32 cached_has_bits = 0; + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; _extensions_.Clear(); cached_has_bits = _has_bits_[0]; - if (cached_has_bits & 7u) { + if (cached_has_bits & 0x00000007u) { if (cached_has_bits & 0x00000001u) { - name_.ClearNonDefaultToEmptyNoArena(); + name_.ClearNonDefaultToEmpty(); } if (cached_has_bits & 0x00000002u) { - precompiled_charsmap_.ClearNonDefaultToEmptyNoArena(); + precompiled_charsmap_.ClearNonDefaultToEmpty(); } if (cached_has_bits & 0x00000004u) { - normalization_rule_tsv_.ClearNonDefaultToEmptyNoArena(); + normalization_rule_tsv_.ClearNonDefaultToEmpty(); } } ::memset(&encode_case_, 0, static_cast( reinterpret_cast(&decode_case_) - reinterpret_cast(&encode_case_)) + sizeof(decode_case_)); - if (cached_has_bits & 224u) { + if (cached_has_bits & 0x000000e0u) { add_dummy_prefix_ = true; remove_extra_whitespaces_ = true; escape_whitespaces_ = true; } _has_bits_.Clear(); - _internal_metadata_.Clear(); -} - -bool NormalizerSpec::MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input) { -#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure - ::google::protobuf::uint32 tag; - ::google::protobuf::internal::LiteUnknownFieldSetter unknown_fields_setter( - &_internal_metadata_); - ::google::protobuf::io::StringOutputStream unknown_fields_output( - unknown_fields_setter.buffer()); - ::google::protobuf::io::CodedOutputStream unknown_fields_stream( - &unknown_fields_output, false); - // @@protoc_insertion_point(parse_start:sentencepiece.NormalizerSpec) - for (;;) { - ::std::pair<::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u); - tag = p.first; - if (!p.second) goto handle_unusual; - switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) { - // optional string name = 1; - case 1: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(10u /* 10 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->mutable_name())); - } else { - goto handle_unusual; - } - break; - } + _internal_metadata_.Clear(); +} +const char* NormalizerSpec::_InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) { +#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure + _Internal::HasBits has_bits{}; + while (!ctx->Done(&ptr)) { + ::PROTOBUF_NAMESPACE_ID::uint32 tag; + ptr = ::PROTOBUF_NAMESPACE_ID::internal::ReadTag(ptr, &tag); + CHK_(ptr); + switch (tag >> 3) { + // optional string name = 1; + case 1: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 10)) { + auto str = _internal_mutable_name(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional bytes precompiled_charsmap = 2; - case 2: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(18u /* 18 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadBytes( - input, this->mutable_precompiled_charsmap())); - } else { - goto handle_unusual; - } - break; - } - + case 2: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 18)) { + auto str = _internal_mutable_precompiled_charsmap(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional bool add_dummy_prefix = 3 [default = true]; - case 3: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(24u /* 24 & 0xFF */)) { - set_has_add_dummy_prefix(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>( - input, &add_dummy_prefix_))); - } else { - goto handle_unusual; - } - break; - } - + case 3: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 24)) { + _Internal::set_has_add_dummy_prefix(&has_bits); + add_dummy_prefix_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional bool remove_extra_whitespaces = 4 [default = true]; - case 4: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(32u /* 32 & 0xFF */)) { - set_has_remove_extra_whitespaces(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>( - input, &remove_extra_whitespaces_))); - } else { - goto handle_unusual; - } - break; - } - + case 4: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 32)) { + _Internal::set_has_remove_extra_whitespaces(&has_bits); + remove_extra_whitespaces_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional bool escape_whitespaces = 5 [default = true]; - case 5: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(40u /* 40 & 0xFF */)) { - set_has_escape_whitespaces(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>( - input, &escape_whitespaces_))); - } else { - goto handle_unusual; - } - break; - } - + case 5: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 40)) { + _Internal::set_has_escape_whitespaces(&has_bits); + escape_whitespaces_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional string normalization_rule_tsv = 6; - case 6: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(50u /* 50 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->mutable_normalization_rule_tsv())); - } else { - goto handle_unusual; - } - break; - } - + case 6: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 50)) { + auto str = _internal_mutable_normalization_rule_tsv(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional bool encode_case = 7 [default = false]; - case 7: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(56u /* 56 & 0xFF */)) { - set_has_encode_case(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>( - input, &encode_case_))); - } else { - goto handle_unusual; - } - break; - } - + case 7: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 56)) { + _Internal::set_has_encode_case(&has_bits); + encode_case_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional bool decode_case = 8 [default = false]; - case 8: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(64u /* 64 & 0xFF */)) { - set_has_decode_case(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - bool, ::google::protobuf::internal::WireFormatLite::TYPE_BOOL>( - input, &decode_case_))); - } else { - goto handle_unusual; - } - break; - } - + case 8: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 64)) { + _Internal::set_has_decode_case(&has_bits); + decode_case_ = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; default: { handle_unusual: - if (tag == 0) { + if ((tag & 7) == 4 || tag == 0) { + ctx->SetLastTag(tag); goto success; } - if ((1600u <= tag)) { - DO_(_extensions_.ParseField(tag, input, - internal_default_instance(), - &unknown_fields_stream)); - continue; - } - DO_(::google::protobuf::internal::WireFormatLite::SkipField( - input, tag, &unknown_fields_stream)); - break; + if ((1600u <= tag)) { + ptr = _extensions_.ParseField(tag, ptr, + internal_default_instance(), &_internal_metadata_, ctx); + CHK_(ptr != nullptr); + continue; } - } - } + ptr = UnknownFieldParse(tag, + _internal_metadata_.mutable_unknown_fields(), + ptr, ctx); + CHK_(ptr != nullptr); + continue; + } + } // switch + } // while success: - // @@protoc_insertion_point(parse_success:sentencepiece.NormalizerSpec) - return true; + _has_bits_.Or(has_bits); + return ptr; failure: - // @@protoc_insertion_point(parse_failure:sentencepiece.NormalizerSpec) - return false; -#undef DO_ + ptr = nullptr; + goto success; +#undef CHK_ } -void NormalizerSpec::SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const { - // @@protoc_insertion_point(serialize_start:sentencepiece.NormalizerSpec) - ::google::protobuf::uint32 cached_has_bits = 0; +::PROTOBUF_NAMESPACE_ID::uint8* NormalizerSpec::_InternalSerialize( + ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const { + // @@protoc_insertion_point(serialize_to_array_start:sentencepiece.NormalizerSpec) + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; cached_has_bits = _has_bits_[0]; // optional string name = 1; if (cached_has_bits & 0x00000001u) { - ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased( - 1, this->name(), output); + target = stream->WriteStringMaybeAliased( + 1, this->_internal_name(), target); } // optional bytes precompiled_charsmap = 2; if (cached_has_bits & 0x00000002u) { - ::google::protobuf::internal::WireFormatLite::WriteBytesMaybeAliased( - 2, this->precompiled_charsmap(), output); + target = stream->WriteBytesMaybeAliased( + 2, this->_internal_precompiled_charsmap(), target); } // optional bool add_dummy_prefix = 3 [default = true]; if (cached_has_bits & 0x00000020u) { - ::google::protobuf::internal::WireFormatLite::WriteBool(3, this->add_dummy_prefix(), output); + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(3, this->_internal_add_dummy_prefix(), target); } // optional bool remove_extra_whitespaces = 4 [default = true]; if (cached_has_bits & 0x00000040u) { - ::google::protobuf::internal::WireFormatLite::WriteBool(4, this->remove_extra_whitespaces(), output); + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(4, this->_internal_remove_extra_whitespaces(), target); } // optional bool escape_whitespaces = 5 [default = true]; if (cached_has_bits & 0x00000080u) { - ::google::protobuf::internal::WireFormatLite::WriteBool(5, this->escape_whitespaces(), output); + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(5, this->_internal_escape_whitespaces(), target); } // optional string normalization_rule_tsv = 6; if (cached_has_bits & 0x00000004u) { - ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased( - 6, this->normalization_rule_tsv(), output); + target = stream->WriteStringMaybeAliased( + 6, this->_internal_normalization_rule_tsv(), target); } // optional bool encode_case = 7 [default = false]; if (cached_has_bits & 0x00000008u) { - ::google::protobuf::internal::WireFormatLite::WriteBool(7, this->encode_case(), output); + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(7, this->_internal_encode_case(), target); } // optional bool decode_case = 8 [default = false]; if (cached_has_bits & 0x00000010u) { - ::google::protobuf::internal::WireFormatLite::WriteBool(8, this->decode_case(), output); + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteBoolToArray(8, this->_internal_decode_case(), target); } // Extension range [200, 536870912) - _extensions_.SerializeWithCachedSizes( - 200, 536870912, output); + target = _extensions_._InternalSerialize( + 200, 536870912, target, stream); - output->WriteRaw(_internal_metadata_.unknown_fields().data(), - static_cast(_internal_metadata_.unknown_fields().size())); - // @@protoc_insertion_point(serialize_end:sentencepiece.NormalizerSpec) + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + target = stream->WriteRaw(_internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).data(), + static_cast(_internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).size()), target); + } + // @@protoc_insertion_point(serialize_to_array_end:sentencepiece.NormalizerSpec) + return target; } size_t NormalizerSpec::ByteSizeLong() const { @@ -2113,87 +2194,91 @@ size_t NormalizerSpec::ByteSizeLong() const { total_size += _extensions_.ByteSize(); - total_size += _internal_metadata_.unknown_fields().size(); + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; + // Prevent compiler warnings about cached_has_bits being unused + (void) cached_has_bits; - if (_has_bits_[0 / 32] & 255u) { + cached_has_bits = _has_bits_[0]; + if (cached_has_bits & 0x000000ffu) { // optional string name = 1; - if (has_name()) { + if (cached_has_bits & 0x00000001u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::StringSize( - this->name()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( + this->_internal_name()); } // optional bytes precompiled_charsmap = 2; - if (has_precompiled_charsmap()) { + if (cached_has_bits & 0x00000002u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::BytesSize( - this->precompiled_charsmap()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::BytesSize( + this->_internal_precompiled_charsmap()); } // optional string normalization_rule_tsv = 6; - if (has_normalization_rule_tsv()) { + if (cached_has_bits & 0x00000004u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::StringSize( - this->normalization_rule_tsv()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( + this->_internal_normalization_rule_tsv()); } // optional bool encode_case = 7 [default = false]; - if (has_encode_case()) { + if (cached_has_bits & 0x00000008u) { total_size += 1 + 1; } // optional bool decode_case = 8 [default = false]; - if (has_decode_case()) { + if (cached_has_bits & 0x00000010u) { total_size += 1 + 1; } // optional bool add_dummy_prefix = 3 [default = true]; - if (has_add_dummy_prefix()) { + if (cached_has_bits & 0x00000020u) { total_size += 1 + 1; } // optional bool remove_extra_whitespaces = 4 [default = true]; - if (has_remove_extra_whitespaces()) { + if (cached_has_bits & 0x00000040u) { total_size += 1 + 1; } // optional bool escape_whitespaces = 5 [default = true]; - if (has_escape_whitespaces()) { + if (cached_has_bits & 0x00000080u) { total_size += 1 + 1; } } - int cached_size = ::google::protobuf::internal::ToCachedSize(total_size); + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + total_size += _internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).size(); + } + int cached_size = ::PROTOBUF_NAMESPACE_ID::internal::ToCachedSize(total_size); SetCachedSize(cached_size); return total_size; } void NormalizerSpec::CheckTypeAndMergeFrom( - const ::google::protobuf::MessageLite& from) { - MergeFrom(*::google::protobuf::down_cast(&from)); + const ::PROTOBUF_NAMESPACE_ID::MessageLite& from) { + MergeFrom(*::PROTOBUF_NAMESPACE_ID::internal::DownCast( + &from)); } void NormalizerSpec::MergeFrom(const NormalizerSpec& from) { // @@protoc_insertion_point(class_specific_merge_from_start:sentencepiece.NormalizerSpec) GOOGLE_DCHECK_NE(&from, this); _extensions_.MergeFrom(from._extensions_); - _internal_metadata_.MergeFrom(from._internal_metadata_); - ::google::protobuf::uint32 cached_has_bits = 0; + _internal_metadata_.MergeFrom(from._internal_metadata_); + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; cached_has_bits = from._has_bits_[0]; - if (cached_has_bits & 255u) { + if (cached_has_bits & 0x000000ffu) { if (cached_has_bits & 0x00000001u) { - set_has_name(); - name_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.name_); + _internal_set_name(from._internal_name()); } if (cached_has_bits & 0x00000002u) { - set_has_precompiled_charsmap(); - precompiled_charsmap_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.precompiled_charsmap_); + _internal_set_precompiled_charsmap(from._internal_precompiled_charsmap()); } if (cached_has_bits & 0x00000004u) { - set_has_normalization_rule_tsv(); - normalization_rule_tsv_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.normalization_rule_tsv_); + _internal_set_normalization_rule_tsv(from._internal_normalization_rule_tsv()); } if (cached_has_bits & 0x00000008u) { encode_case_ = from.encode_case_; @@ -2229,240 +2314,245 @@ bool NormalizerSpec::IsInitialized() const { return true; } -void NormalizerSpec::Swap(NormalizerSpec* other) { - if (other == this) return; - InternalSwap(other); -} void NormalizerSpec::InternalSwap(NormalizerSpec* other) { using std::swap; - name_.Swap(&other->name_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(), - GetArenaNoVirtual()); - precompiled_charsmap_.Swap(&other->precompiled_charsmap_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(), - GetArenaNoVirtual()); - normalization_rule_tsv_.Swap(&other->normalization_rule_tsv_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(), - GetArenaNoVirtual()); - swap(encode_case_, other->encode_case_); - swap(decode_case_, other->decode_case_); + _extensions_.Swap(&other->_extensions_); + _internal_metadata_.Swap(&other->_internal_metadata_); + swap(_has_bits_[0], other->_has_bits_[0]); + name_.Swap(&other->name_, &::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); + precompiled_charsmap_.Swap(&other->precompiled_charsmap_, &::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); + normalization_rule_tsv_.Swap(&other->normalization_rule_tsv_, &::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); + ::PROTOBUF_NAMESPACE_ID::internal::memswap< + PROTOBUF_FIELD_OFFSET(NormalizerSpec, decode_case_) + + sizeof(NormalizerSpec::decode_case_) + - PROTOBUF_FIELD_OFFSET(NormalizerSpec, encode_case_)>( + reinterpret_cast(&encode_case_), + reinterpret_cast(&other->encode_case_)); swap(add_dummy_prefix_, other->add_dummy_prefix_); swap(remove_extra_whitespaces_, other->remove_extra_whitespaces_); swap(escape_whitespaces_, other->escape_whitespaces_); - swap(_has_bits_[0], other->_has_bits_[0]); - _internal_metadata_.Swap(&other->_internal_metadata_); - _extensions_.Swap(&other->_extensions_); } -::std::string NormalizerSpec::GetTypeName() const { +std::string NormalizerSpec::GetTypeName() const { return "sentencepiece.NormalizerSpec"; } // =================================================================== -void SelfTestData_Sample::InitAsDefaultInstance() { -} -#if !defined(_MSC_VER) || _MSC_VER >= 1900 -const int SelfTestData_Sample::kInputFieldNumber; -const int SelfTestData_Sample::kExpectedFieldNumber; -#endif // !defined(_MSC_VER) || _MSC_VER >= 1900 +class SelfTestData_Sample::_Internal { + public: + using HasBits = decltype(std::declval()._has_bits_); + static void set_has_input(HasBits* has_bits) { + (*has_bits)[0] |= 1u; + } + static void set_has_expected(HasBits* has_bits) { + (*has_bits)[0] |= 2u; + } +}; -SelfTestData_Sample::SelfTestData_Sample() - : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) { - ::google::protobuf::internal::InitSCC( - &protobuf_sentencepiece_5fmodel_2eproto::scc_info_SelfTestData_Sample.base); +SelfTestData_Sample::SelfTestData_Sample(::PROTOBUF_NAMESPACE_ID::Arena* arena) + : ::PROTOBUF_NAMESPACE_ID::MessageLite(arena) { SharedCtor(); - // @@protoc_insertion_point(constructor:sentencepiece.SelfTestData.Sample) + RegisterArenaDtor(arena); + // @@protoc_insertion_point(arena_constructor:sentencepiece.SelfTestData.Sample) } SelfTestData_Sample::SelfTestData_Sample(const SelfTestData_Sample& from) - : ::google::protobuf::MessageLite(), - _internal_metadata_(NULL), + : ::PROTOBUF_NAMESPACE_ID::MessageLite(), _has_bits_(from._has_bits_) { - _internal_metadata_.MergeFrom(from._internal_metadata_); - input_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - if (from.has_input()) { - input_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.input_); + _internal_metadata_.MergeFrom(from._internal_metadata_); + input_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + if (from._internal_has_input()) { + input_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, from._internal_input(), + GetArena()); } - expected_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - if (from.has_expected()) { - expected_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.expected_); + expected_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + if (from._internal_has_expected()) { + expected_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, from._internal_expected(), + GetArena()); } // @@protoc_insertion_point(copy_constructor:sentencepiece.SelfTestData.Sample) } void SelfTestData_Sample::SharedCtor() { - input_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - expected_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); + ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&scc_info_SelfTestData_Sample_sentencepiece_5fmodel_2eproto.base); + input_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + expected_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); } SelfTestData_Sample::~SelfTestData_Sample() { // @@protoc_insertion_point(destructor:sentencepiece.SelfTestData.Sample) SharedDtor(); + _internal_metadata_.Delete(); } void SelfTestData_Sample::SharedDtor() { - input_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - expected_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); + GOOGLE_DCHECK(GetArena() == nullptr); + input_.DestroyNoArena(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + expected_.DestroyNoArena(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); } +void SelfTestData_Sample::ArenaDtor(void* object) { + SelfTestData_Sample* _this = reinterpret_cast< SelfTestData_Sample* >(object); + (void)_this; +} +void SelfTestData_Sample::RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena*) { +} void SelfTestData_Sample::SetCachedSize(int size) const { _cached_size_.Set(size); } const SelfTestData_Sample& SelfTestData_Sample::default_instance() { - ::google::protobuf::internal::InitSCC(&protobuf_sentencepiece_5fmodel_2eproto::scc_info_SelfTestData_Sample.base); + ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&::scc_info_SelfTestData_Sample_sentencepiece_5fmodel_2eproto.base); return *internal_default_instance(); } void SelfTestData_Sample::Clear() { // @@protoc_insertion_point(message_clear_start:sentencepiece.SelfTestData.Sample) - ::google::protobuf::uint32 cached_has_bits = 0; + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; cached_has_bits = _has_bits_[0]; - if (cached_has_bits & 3u) { + if (cached_has_bits & 0x00000003u) { if (cached_has_bits & 0x00000001u) { - input_.ClearNonDefaultToEmptyNoArena(); + input_.ClearNonDefaultToEmpty(); } if (cached_has_bits & 0x00000002u) { - expected_.ClearNonDefaultToEmptyNoArena(); + expected_.ClearNonDefaultToEmpty(); } } _has_bits_.Clear(); - _internal_metadata_.Clear(); -} - -bool SelfTestData_Sample::MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input) { -#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure - ::google::protobuf::uint32 tag; - ::google::protobuf::internal::LiteUnknownFieldSetter unknown_fields_setter( - &_internal_metadata_); - ::google::protobuf::io::StringOutputStream unknown_fields_output( - unknown_fields_setter.buffer()); - ::google::protobuf::io::CodedOutputStream unknown_fields_stream( - &unknown_fields_output, false); - // @@protoc_insertion_point(parse_start:sentencepiece.SelfTestData.Sample) - for (;;) { - ::std::pair<::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u); - tag = p.first; - if (!p.second) goto handle_unusual; - switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) { - // optional string input = 1; - case 1: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(10u /* 10 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->mutable_input())); - } else { - goto handle_unusual; - } - break; - } + _internal_metadata_.Clear(); +} +const char* SelfTestData_Sample::_InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) { +#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure + _Internal::HasBits has_bits{}; + while (!ctx->Done(&ptr)) { + ::PROTOBUF_NAMESPACE_ID::uint32 tag; + ptr = ::PROTOBUF_NAMESPACE_ID::internal::ReadTag(ptr, &tag); + CHK_(ptr); + switch (tag >> 3) { + // optional string input = 1; + case 1: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 10)) { + auto str = _internal_mutable_input(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional string expected = 2; - case 2: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(18u /* 18 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->mutable_expected())); - } else { - goto handle_unusual; - } - break; - } - + case 2: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 18)) { + auto str = _internal_mutable_expected(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + } else goto handle_unusual; + continue; default: { handle_unusual: - if (tag == 0) { + if ((tag & 7) == 4 || tag == 0) { + ctx->SetLastTag(tag); goto success; } - DO_(::google::protobuf::internal::WireFormatLite::SkipField( - input, tag, &unknown_fields_stream)); - break; + ptr = UnknownFieldParse(tag, + _internal_metadata_.mutable_unknown_fields(), + ptr, ctx); + CHK_(ptr != nullptr); + continue; } - } - } + } // switch + } // while success: - // @@protoc_insertion_point(parse_success:sentencepiece.SelfTestData.Sample) - return true; + _has_bits_.Or(has_bits); + return ptr; failure: - // @@protoc_insertion_point(parse_failure:sentencepiece.SelfTestData.Sample) - return false; -#undef DO_ + ptr = nullptr; + goto success; +#undef CHK_ } -void SelfTestData_Sample::SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const { - // @@protoc_insertion_point(serialize_start:sentencepiece.SelfTestData.Sample) - ::google::protobuf::uint32 cached_has_bits = 0; +::PROTOBUF_NAMESPACE_ID::uint8* SelfTestData_Sample::_InternalSerialize( + ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const { + // @@protoc_insertion_point(serialize_to_array_start:sentencepiece.SelfTestData.Sample) + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; cached_has_bits = _has_bits_[0]; // optional string input = 1; if (cached_has_bits & 0x00000001u) { - ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased( - 1, this->input(), output); + target = stream->WriteStringMaybeAliased( + 1, this->_internal_input(), target); } // optional string expected = 2; if (cached_has_bits & 0x00000002u) { - ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased( - 2, this->expected(), output); + target = stream->WriteStringMaybeAliased( + 2, this->_internal_expected(), target); } - output->WriteRaw(_internal_metadata_.unknown_fields().data(), - static_cast(_internal_metadata_.unknown_fields().size())); - // @@protoc_insertion_point(serialize_end:sentencepiece.SelfTestData.Sample) + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + target = stream->WriteRaw(_internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).data(), + static_cast(_internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).size()), target); + } + // @@protoc_insertion_point(serialize_to_array_end:sentencepiece.SelfTestData.Sample) + return target; } size_t SelfTestData_Sample::ByteSizeLong() const { // @@protoc_insertion_point(message_byte_size_start:sentencepiece.SelfTestData.Sample) size_t total_size = 0; - total_size += _internal_metadata_.unknown_fields().size(); + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; + // Prevent compiler warnings about cached_has_bits being unused + (void) cached_has_bits; - if (_has_bits_[0 / 32] & 3u) { + cached_has_bits = _has_bits_[0]; + if (cached_has_bits & 0x00000003u) { // optional string input = 1; - if (has_input()) { + if (cached_has_bits & 0x00000001u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::StringSize( - this->input()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( + this->_internal_input()); } // optional string expected = 2; - if (has_expected()) { + if (cached_has_bits & 0x00000002u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::StringSize( - this->expected()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( + this->_internal_expected()); } } - int cached_size = ::google::protobuf::internal::ToCachedSize(total_size); + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + total_size += _internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).size(); + } + int cached_size = ::PROTOBUF_NAMESPACE_ID::internal::ToCachedSize(total_size); SetCachedSize(cached_size); return total_size; } void SelfTestData_Sample::CheckTypeAndMergeFrom( - const ::google::protobuf::MessageLite& from) { - MergeFrom(*::google::protobuf::down_cast(&from)); + const ::PROTOBUF_NAMESPACE_ID::MessageLite& from) { + MergeFrom(*::PROTOBUF_NAMESPACE_ID::internal::DownCast( + &from)); } void SelfTestData_Sample::MergeFrom(const SelfTestData_Sample& from) { // @@protoc_insertion_point(class_specific_merge_from_start:sentencepiece.SelfTestData.Sample) GOOGLE_DCHECK_NE(&from, this); - _internal_metadata_.MergeFrom(from._internal_metadata_); - ::google::protobuf::uint32 cached_has_bits = 0; + _internal_metadata_.MergeFrom(from._internal_metadata_); + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; cached_has_bits = from._has_bits_[0]; - if (cached_has_bits & 3u) { + if (cached_has_bits & 0x00000003u) { if (cached_has_bits & 0x00000001u) { - set_has_input(); - input_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.input_); + _internal_set_input(from._internal_input()); } if (cached_has_bits & 0x00000002u) { - set_has_expected(); - expected_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.expected_); + _internal_set_expected(from._internal_expected()); } } } @@ -2478,158 +2568,152 @@ bool SelfTestData_Sample::IsInitialized() const { return true; } -void SelfTestData_Sample::Swap(SelfTestData_Sample* other) { - if (other == this) return; - InternalSwap(other); -} void SelfTestData_Sample::InternalSwap(SelfTestData_Sample* other) { using std::swap; - input_.Swap(&other->input_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(), - GetArenaNoVirtual()); - expected_.Swap(&other->expected_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(), - GetArenaNoVirtual()); + _internal_metadata_.Swap(&other->_internal_metadata_); swap(_has_bits_[0], other->_has_bits_[0]); - _internal_metadata_.Swap(&other->_internal_metadata_); + input_.Swap(&other->input_, &::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); + expected_.Swap(&other->expected_, &::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); } -::std::string SelfTestData_Sample::GetTypeName() const { +std::string SelfTestData_Sample::GetTypeName() const { return "sentencepiece.SelfTestData.Sample"; } // =================================================================== -void SelfTestData::InitAsDefaultInstance() { -} -#if !defined(_MSC_VER) || _MSC_VER >= 1900 -const int SelfTestData::kSamplesFieldNumber; -#endif // !defined(_MSC_VER) || _MSC_VER >= 1900 +class SelfTestData::_Internal { + public: +}; -SelfTestData::SelfTestData() - : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) { - ::google::protobuf::internal::InitSCC( - &protobuf_sentencepiece_5fmodel_2eproto::scc_info_SelfTestData.base); +SelfTestData::SelfTestData(::PROTOBUF_NAMESPACE_ID::Arena* arena) + : ::PROTOBUF_NAMESPACE_ID::MessageLite(arena), + _extensions_(arena), + samples_(arena) { SharedCtor(); - // @@protoc_insertion_point(constructor:sentencepiece.SelfTestData) + RegisterArenaDtor(arena); + // @@protoc_insertion_point(arena_constructor:sentencepiece.SelfTestData) } SelfTestData::SelfTestData(const SelfTestData& from) - : ::google::protobuf::MessageLite(), - _internal_metadata_(NULL), - _has_bits_(from._has_bits_), + : ::PROTOBUF_NAMESPACE_ID::MessageLite(), samples_(from.samples_) { - _internal_metadata_.MergeFrom(from._internal_metadata_); + _internal_metadata_.MergeFrom(from._internal_metadata_); _extensions_.MergeFrom(from._extensions_); // @@protoc_insertion_point(copy_constructor:sentencepiece.SelfTestData) } void SelfTestData::SharedCtor() { + ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&scc_info_SelfTestData_sentencepiece_5fmodel_2eproto.base); } SelfTestData::~SelfTestData() { // @@protoc_insertion_point(destructor:sentencepiece.SelfTestData) SharedDtor(); + _internal_metadata_.Delete(); } void SelfTestData::SharedDtor() { + GOOGLE_DCHECK(GetArena() == nullptr); } +void SelfTestData::ArenaDtor(void* object) { + SelfTestData* _this = reinterpret_cast< SelfTestData* >(object); + (void)_this; +} +void SelfTestData::RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena*) { +} void SelfTestData::SetCachedSize(int size) const { _cached_size_.Set(size); } const SelfTestData& SelfTestData::default_instance() { - ::google::protobuf::internal::InitSCC(&protobuf_sentencepiece_5fmodel_2eproto::scc_info_SelfTestData.base); + ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&::scc_info_SelfTestData_sentencepiece_5fmodel_2eproto.base); return *internal_default_instance(); } void SelfTestData::Clear() { // @@protoc_insertion_point(message_clear_start:sentencepiece.SelfTestData) - ::google::protobuf::uint32 cached_has_bits = 0; + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; _extensions_.Clear(); samples_.Clear(); - _has_bits_.Clear(); - _internal_metadata_.Clear(); -} - -bool SelfTestData::MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input) { -#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure - ::google::protobuf::uint32 tag; - ::google::protobuf::internal::LiteUnknownFieldSetter unknown_fields_setter( - &_internal_metadata_); - ::google::protobuf::io::StringOutputStream unknown_fields_output( - unknown_fields_setter.buffer()); - ::google::protobuf::io::CodedOutputStream unknown_fields_stream( - &unknown_fields_output, false); - // @@protoc_insertion_point(parse_start:sentencepiece.SelfTestData) - for (;;) { - ::std::pair<::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u); - tag = p.first; - if (!p.second) goto handle_unusual; - switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) { - // repeated .sentencepiece.SelfTestData.Sample samples = 1; - case 1: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(10u /* 10 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadMessage( - input, add_samples())); - } else { - goto handle_unusual; - } - break; - } + _internal_metadata_.Clear(); +} +const char* SelfTestData::_InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) { +#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure + while (!ctx->Done(&ptr)) { + ::PROTOBUF_NAMESPACE_ID::uint32 tag; + ptr = ::PROTOBUF_NAMESPACE_ID::internal::ReadTag(ptr, &tag); + CHK_(ptr); + switch (tag >> 3) { + // repeated .sentencepiece.SelfTestData.Sample samples = 1; + case 1: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 10)) { + ptr -= 1; + do { + ptr += 1; + ptr = ctx->ParseMessage(_internal_add_samples(), ptr); + CHK_(ptr); + if (!ctx->DataAvailable(ptr)) break; + } while (::PROTOBUF_NAMESPACE_ID::internal::ExpectTag<10>(ptr)); + } else goto handle_unusual; + continue; default: { handle_unusual: - if (tag == 0) { + if ((tag & 7) == 4 || tag == 0) { + ctx->SetLastTag(tag); goto success; } - if ((1600u <= tag)) { - DO_(_extensions_.ParseField(tag, input, - internal_default_instance(), - &unknown_fields_stream)); - continue; - } - DO_(::google::protobuf::internal::WireFormatLite::SkipField( - input, tag, &unknown_fields_stream)); - break; + if ((1600u <= tag)) { + ptr = _extensions_.ParseField(tag, ptr, + internal_default_instance(), &_internal_metadata_, ctx); + CHK_(ptr != nullptr); + continue; } - } - } + ptr = UnknownFieldParse(tag, + _internal_metadata_.mutable_unknown_fields(), + ptr, ctx); + CHK_(ptr != nullptr); + continue; + } + } // switch + } // while success: - // @@protoc_insertion_point(parse_success:sentencepiece.SelfTestData) - return true; + return ptr; failure: - // @@protoc_insertion_point(parse_failure:sentencepiece.SelfTestData) - return false; -#undef DO_ + ptr = nullptr; + goto success; +#undef CHK_ } -void SelfTestData::SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const { - // @@protoc_insertion_point(serialize_start:sentencepiece.SelfTestData) - ::google::protobuf::uint32 cached_has_bits = 0; +::PROTOBUF_NAMESPACE_ID::uint8* SelfTestData::_InternalSerialize( + ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const { + // @@protoc_insertion_point(serialize_to_array_start:sentencepiece.SelfTestData) + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; // repeated .sentencepiece.SelfTestData.Sample samples = 1; for (unsigned int i = 0, - n = static_cast(this->samples_size()); i < n; i++) { - ::google::protobuf::internal::WireFormatLite::WriteMessage( - 1, - this->samples(static_cast(i)), - output); + n = static_cast(this->_internal_samples_size()); i < n; i++) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite:: + InternalWriteMessage(1, this->_internal_samples(i), target, stream); } // Extension range [200, 536870912) - _extensions_.SerializeWithCachedSizes( - 200, 536870912, output); + target = _extensions_._InternalSerialize( + 200, 536870912, target, stream); - output->WriteRaw(_internal_metadata_.unknown_fields().data(), - static_cast(_internal_metadata_.unknown_fields().size())); - // @@protoc_insertion_point(serialize_end:sentencepiece.SelfTestData) + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + target = stream->WriteRaw(_internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).data(), + static_cast(_internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).size()), target); + } + // @@protoc_insertion_point(serialize_to_array_end:sentencepiece.SelfTestData) + return target; } size_t SelfTestData::ByteSizeLong() const { @@ -2638,35 +2722,37 @@ size_t SelfTestData::ByteSizeLong() const { total_size += _extensions_.ByteSize(); - total_size += _internal_metadata_.unknown_fields().size(); + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; + // Prevent compiler warnings about cached_has_bits being unused + (void) cached_has_bits; // repeated .sentencepiece.SelfTestData.Sample samples = 1; - { - unsigned int count = static_cast(this->samples_size()); - total_size += 1UL * count; - for (unsigned int i = 0; i < count; i++) { - total_size += - ::google::protobuf::internal::WireFormatLite::MessageSize( - this->samples(static_cast(i))); - } + total_size += 1UL * this->_internal_samples_size(); + for (const auto& msg : this->samples_) { + total_size += + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::MessageSize(msg); } - int cached_size = ::google::protobuf::internal::ToCachedSize(total_size); + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + total_size += _internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).size(); + } + int cached_size = ::PROTOBUF_NAMESPACE_ID::internal::ToCachedSize(total_size); SetCachedSize(cached_size); return total_size; } void SelfTestData::CheckTypeAndMergeFrom( - const ::google::protobuf::MessageLite& from) { - MergeFrom(*::google::protobuf::down_cast(&from)); + const ::PROTOBUF_NAMESPACE_ID::MessageLite& from) { + MergeFrom(*::PROTOBUF_NAMESPACE_ID::internal::DownCast( + &from)); } void SelfTestData::MergeFrom(const SelfTestData& from) { // @@protoc_insertion_point(class_specific_merge_from_start:sentencepiece.SelfTestData) GOOGLE_DCHECK_NE(&from, this); _extensions_.MergeFrom(from._extensions_); - _internal_metadata_.MergeFrom(from._internal_metadata_); - ::google::protobuf::uint32 cached_has_bits = 0; + _internal_metadata_.MergeFrom(from._internal_metadata_); + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; samples_.MergeFrom(from.samples_); @@ -2687,49 +2773,50 @@ bool SelfTestData::IsInitialized() const { return true; } -void SelfTestData::Swap(SelfTestData* other) { - if (other == this) return; - InternalSwap(other); -} void SelfTestData::InternalSwap(SelfTestData* other) { using std::swap; - CastToBase(&samples_)->InternalSwap(CastToBase(&other->samples_)); - swap(_has_bits_[0], other->_has_bits_[0]); - _internal_metadata_.Swap(&other->_internal_metadata_); _extensions_.Swap(&other->_extensions_); + _internal_metadata_.Swap(&other->_internal_metadata_); + samples_.InternalSwap(&other->samples_); } -::std::string SelfTestData::GetTypeName() const { +std::string SelfTestData::GetTypeName() const { return "sentencepiece.SelfTestData"; } // =================================================================== -void ModelProto_SentencePiece::InitAsDefaultInstance() { -} -#if !defined(_MSC_VER) || _MSC_VER >= 1900 -const int ModelProto_SentencePiece::kPieceFieldNumber; -const int ModelProto_SentencePiece::kScoreFieldNumber; -const int ModelProto_SentencePiece::kTypeFieldNumber; -#endif // !defined(_MSC_VER) || _MSC_VER >= 1900 +class ModelProto_SentencePiece::_Internal { + public: + using HasBits = decltype(std::declval()._has_bits_); + static void set_has_piece(HasBits* has_bits) { + (*has_bits)[0] |= 1u; + } + static void set_has_score(HasBits* has_bits) { + (*has_bits)[0] |= 2u; + } + static void set_has_type(HasBits* has_bits) { + (*has_bits)[0] |= 4u; + } +}; -ModelProto_SentencePiece::ModelProto_SentencePiece() - : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) { - ::google::protobuf::internal::InitSCC( - &protobuf_sentencepiece_5fmodel_2eproto::scc_info_ModelProto_SentencePiece.base); +ModelProto_SentencePiece::ModelProto_SentencePiece(::PROTOBUF_NAMESPACE_ID::Arena* arena) + : ::PROTOBUF_NAMESPACE_ID::MessageLite(arena), + _extensions_(arena) { SharedCtor(); - // @@protoc_insertion_point(constructor:sentencepiece.ModelProto.SentencePiece) + RegisterArenaDtor(arena); + // @@protoc_insertion_point(arena_constructor:sentencepiece.ModelProto.SentencePiece) } ModelProto_SentencePiece::ModelProto_SentencePiece(const ModelProto_SentencePiece& from) - : ::google::protobuf::MessageLite(), - _internal_metadata_(NULL), + : ::PROTOBUF_NAMESPACE_ID::MessageLite(), _has_bits_(from._has_bits_) { - _internal_metadata_.MergeFrom(from._internal_metadata_); + _internal_metadata_.MergeFrom(from._internal_metadata_); _extensions_.MergeFrom(from._extensions_); - piece_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - if (from.has_piece()) { - piece_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.piece_); + piece_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); + if (from._internal_has_piece()) { + piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, from._internal_piece(), + GetArena()); } ::memcpy(&score_, &from.score_, static_cast(reinterpret_cast(&type_) - @@ -2738,7 +2825,8 @@ ModelProto_SentencePiece::ModelProto_SentencePiece(const ModelProto_SentencePiec } void ModelProto_SentencePiece::SharedCtor() { - piece_.UnsafeSetDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); + ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&scc_info_ModelProto_SentencePiece_sentencepiece_5fmodel_2eproto.base); + piece_.UnsafeSetDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); score_ = 0; type_ = 1; } @@ -2746,160 +2834,149 @@ void ModelProto_SentencePiece::SharedCtor() { ModelProto_SentencePiece::~ModelProto_SentencePiece() { // @@protoc_insertion_point(destructor:sentencepiece.ModelProto.SentencePiece) SharedDtor(); + _internal_metadata_.Delete(); } void ModelProto_SentencePiece::SharedDtor() { - piece_.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); + GOOGLE_DCHECK(GetArena() == nullptr); + piece_.DestroyNoArena(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited()); } +void ModelProto_SentencePiece::ArenaDtor(void* object) { + ModelProto_SentencePiece* _this = reinterpret_cast< ModelProto_SentencePiece* >(object); + (void)_this; +} +void ModelProto_SentencePiece::RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena*) { +} void ModelProto_SentencePiece::SetCachedSize(int size) const { _cached_size_.Set(size); } const ModelProto_SentencePiece& ModelProto_SentencePiece::default_instance() { - ::google::protobuf::internal::InitSCC(&protobuf_sentencepiece_5fmodel_2eproto::scc_info_ModelProto_SentencePiece.base); + ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&::scc_info_ModelProto_SentencePiece_sentencepiece_5fmodel_2eproto.base); return *internal_default_instance(); } void ModelProto_SentencePiece::Clear() { // @@protoc_insertion_point(message_clear_start:sentencepiece.ModelProto.SentencePiece) - ::google::protobuf::uint32 cached_has_bits = 0; + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; _extensions_.Clear(); cached_has_bits = _has_bits_[0]; if (cached_has_bits & 0x00000001u) { - piece_.ClearNonDefaultToEmptyNoArena(); + piece_.ClearNonDefaultToEmpty(); } - if (cached_has_bits & 6u) { + if (cached_has_bits & 0x00000006u) { score_ = 0; type_ = 1; } _has_bits_.Clear(); - _internal_metadata_.Clear(); -} - -bool ModelProto_SentencePiece::MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input) { -#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure - ::google::protobuf::uint32 tag; - ::google::protobuf::internal::LiteUnknownFieldSetter unknown_fields_setter( - &_internal_metadata_); - ::google::protobuf::io::StringOutputStream unknown_fields_output( - unknown_fields_setter.buffer()); - ::google::protobuf::io::CodedOutputStream unknown_fields_stream( - &unknown_fields_output, false); - // @@protoc_insertion_point(parse_start:sentencepiece.ModelProto.SentencePiece) - for (;;) { - ::std::pair<::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u); - tag = p.first; - if (!p.second) goto handle_unusual; - switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) { - // optional string piece = 1; - case 1: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(10u /* 10 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadString( - input, this->mutable_piece())); - } else { - goto handle_unusual; - } - break; - } + _internal_metadata_.Clear(); +} +const char* ModelProto_SentencePiece::_InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) { +#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure + _Internal::HasBits has_bits{}; + while (!ctx->Done(&ptr)) { + ::PROTOBUF_NAMESPACE_ID::uint32 tag; + ptr = ::PROTOBUF_NAMESPACE_ID::internal::ReadTag(ptr, &tag); + CHK_(ptr); + switch (tag >> 3) { + // optional string piece = 1; + case 1: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 10)) { + auto str = _internal_mutable_piece(); + ptr = ::PROTOBUF_NAMESPACE_ID::internal::InlineGreedyStringParser(str, ptr, ctx); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional float score = 2; - case 2: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(21u /* 21 & 0xFF */)) { - set_has_score(); - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>( - input, &score_))); - } else { - goto handle_unusual; - } - break; - } - + case 2: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 21)) { + _Internal::set_has_score(&has_bits); + score_ = ::PROTOBUF_NAMESPACE_ID::internal::UnalignedLoad(ptr); + ptr += sizeof(float); + } else goto handle_unusual; + continue; // optional .sentencepiece.ModelProto.SentencePiece.Type type = 3 [default = NORMAL]; - case 3: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(24u /* 24 & 0xFF */)) { - int value; - DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive< - int, ::google::protobuf::internal::WireFormatLite::TYPE_ENUM>( - input, &value))); - if (::sentencepiece::ModelProto_SentencePiece_Type_IsValid(value)) { - set_type(static_cast< ::sentencepiece::ModelProto_SentencePiece_Type >(value)); + case 3: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 24)) { + ::PROTOBUF_NAMESPACE_ID::uint64 val = ::PROTOBUF_NAMESPACE_ID::internal::ReadVarint64(&ptr); + CHK_(ptr); + if (PROTOBUF_PREDICT_TRUE(::sentencepiece::ModelProto_SentencePiece_Type_IsValid(val))) { + _internal_set_type(static_cast<::sentencepiece::ModelProto_SentencePiece_Type>(val)); } else { - unknown_fields_stream.WriteVarint32(24u); - unknown_fields_stream.WriteVarint32( - static_cast< ::google::protobuf::uint32>(value)); + ::PROTOBUF_NAMESPACE_ID::internal::WriteVarint(3, val, mutable_unknown_fields()); } - } else { - goto handle_unusual; - } - break; - } - + } else goto handle_unusual; + continue; default: { handle_unusual: - if (tag == 0) { + if ((tag & 7) == 4 || tag == 0) { + ctx->SetLastTag(tag); goto success; } - if ((1600u <= tag)) { - DO_(_extensions_.ParseField(tag, input, - internal_default_instance(), - &unknown_fields_stream)); - continue; - } - DO_(::google::protobuf::internal::WireFormatLite::SkipField( - input, tag, &unknown_fields_stream)); - break; + if ((1600u <= tag)) { + ptr = _extensions_.ParseField(tag, ptr, + internal_default_instance(), &_internal_metadata_, ctx); + CHK_(ptr != nullptr); + continue; } - } - } + ptr = UnknownFieldParse(tag, + _internal_metadata_.mutable_unknown_fields(), + ptr, ctx); + CHK_(ptr != nullptr); + continue; + } + } // switch + } // while success: - // @@protoc_insertion_point(parse_success:sentencepiece.ModelProto.SentencePiece) - return true; + _has_bits_.Or(has_bits); + return ptr; failure: - // @@protoc_insertion_point(parse_failure:sentencepiece.ModelProto.SentencePiece) - return false; -#undef DO_ + ptr = nullptr; + goto success; +#undef CHK_ } -void ModelProto_SentencePiece::SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const { - // @@protoc_insertion_point(serialize_start:sentencepiece.ModelProto.SentencePiece) - ::google::protobuf::uint32 cached_has_bits = 0; +::PROTOBUF_NAMESPACE_ID::uint8* ModelProto_SentencePiece::_InternalSerialize( + ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const { + // @@protoc_insertion_point(serialize_to_array_start:sentencepiece.ModelProto.SentencePiece) + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; cached_has_bits = _has_bits_[0]; // optional string piece = 1; if (cached_has_bits & 0x00000001u) { - ::google::protobuf::internal::WireFormatLite::WriteStringMaybeAliased( - 1, this->piece(), output); + target = stream->WriteStringMaybeAliased( + 1, this->_internal_piece(), target); } // optional float score = 2; if (cached_has_bits & 0x00000002u) { - ::google::protobuf::internal::WireFormatLite::WriteFloat(2, this->score(), output); + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteFloatToArray(2, this->_internal_score(), target); } // optional .sentencepiece.ModelProto.SentencePiece.Type type = 3 [default = NORMAL]; if (cached_has_bits & 0x00000004u) { - ::google::protobuf::internal::WireFormatLite::WriteEnum( - 3, this->type(), output); + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::WriteEnumToArray( + 3, this->_internal_type(), target); } // Extension range [200, 536870912) - _extensions_.SerializeWithCachedSizes( - 200, 536870912, output); + target = _extensions_._InternalSerialize( + 200, 536870912, target, stream); - output->WriteRaw(_internal_metadata_.unknown_fields().data(), - static_cast(_internal_metadata_.unknown_fields().size())); - // @@protoc_insertion_point(serialize_end:sentencepiece.ModelProto.SentencePiece) + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + target = stream->WriteRaw(_internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).data(), + static_cast(_internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).size()), target); + } + // @@protoc_insertion_point(serialize_to_array_end:sentencepiece.ModelProto.SentencePiece) + return target; } size_t ModelProto_SentencePiece::ByteSizeLong() const { @@ -2908,51 +2985,57 @@ size_t ModelProto_SentencePiece::ByteSizeLong() const { total_size += _extensions_.ByteSize(); - total_size += _internal_metadata_.unknown_fields().size(); + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; + // Prevent compiler warnings about cached_has_bits being unused + (void) cached_has_bits; - if (_has_bits_[0 / 32] & 7u) { + cached_has_bits = _has_bits_[0]; + if (cached_has_bits & 0x00000007u) { // optional string piece = 1; - if (has_piece()) { + if (cached_has_bits & 0x00000001u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::StringSize( - this->piece()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::StringSize( + this->_internal_piece()); } // optional float score = 2; - if (has_score()) { + if (cached_has_bits & 0x00000002u) { total_size += 1 + 4; } // optional .sentencepiece.ModelProto.SentencePiece.Type type = 3 [default = NORMAL]; - if (has_type()) { + if (cached_has_bits & 0x00000004u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::EnumSize(this->type()); + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::EnumSize(this->_internal_type()); } } - int cached_size = ::google::protobuf::internal::ToCachedSize(total_size); + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + total_size += _internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).size(); + } + int cached_size = ::PROTOBUF_NAMESPACE_ID::internal::ToCachedSize(total_size); SetCachedSize(cached_size); return total_size; } void ModelProto_SentencePiece::CheckTypeAndMergeFrom( - const ::google::protobuf::MessageLite& from) { - MergeFrom(*::google::protobuf::down_cast(&from)); + const ::PROTOBUF_NAMESPACE_ID::MessageLite& from) { + MergeFrom(*::PROTOBUF_NAMESPACE_ID::internal::DownCast( + &from)); } void ModelProto_SentencePiece::MergeFrom(const ModelProto_SentencePiece& from) { // @@protoc_insertion_point(class_specific_merge_from_start:sentencepiece.ModelProto.SentencePiece) GOOGLE_DCHECK_NE(&from, this); _extensions_.MergeFrom(from._extensions_); - _internal_metadata_.MergeFrom(from._internal_metadata_); - ::google::protobuf::uint32 cached_has_bits = 0; + _internal_metadata_.MergeFrom(from._internal_metadata_); + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; cached_has_bits = from._has_bits_[0]; - if (cached_has_bits & 7u) { + if (cached_has_bits & 0x00000007u) { if (cached_has_bits & 0x00000001u) { - set_has_piece(); - piece_.AssignWithDefault(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from.piece_); + _internal_set_piece(from._internal_piece()); } if (cached_has_bits & 0x00000002u) { score_ = from.score_; @@ -2979,290 +3062,299 @@ bool ModelProto_SentencePiece::IsInitialized() const { return true; } -void ModelProto_SentencePiece::Swap(ModelProto_SentencePiece* other) { - if (other == this) return; - InternalSwap(other); -} void ModelProto_SentencePiece::InternalSwap(ModelProto_SentencePiece* other) { using std::swap; - piece_.Swap(&other->piece_, &::google::protobuf::internal::GetEmptyStringAlreadyInited(), - GetArenaNoVirtual()); + _extensions_.Swap(&other->_extensions_); + _internal_metadata_.Swap(&other->_internal_metadata_); + swap(_has_bits_[0], other->_has_bits_[0]); + piece_.Swap(&other->piece_, &::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); swap(score_, other->score_); swap(type_, other->type_); - swap(_has_bits_[0], other->_has_bits_[0]); - _internal_metadata_.Swap(&other->_internal_metadata_); - _extensions_.Swap(&other->_extensions_); } -::std::string ModelProto_SentencePiece::GetTypeName() const { +std::string ModelProto_SentencePiece::GetTypeName() const { return "sentencepiece.ModelProto.SentencePiece"; } // =================================================================== -void ModelProto::InitAsDefaultInstance() { - ::sentencepiece::_ModelProto_default_instance_._instance.get_mutable()->trainer_spec_ = const_cast< ::sentencepiece::TrainerSpec*>( - ::sentencepiece::TrainerSpec::internal_default_instance()); - ::sentencepiece::_ModelProto_default_instance_._instance.get_mutable()->normalizer_spec_ = const_cast< ::sentencepiece::NormalizerSpec*>( - ::sentencepiece::NormalizerSpec::internal_default_instance()); - ::sentencepiece::_ModelProto_default_instance_._instance.get_mutable()->self_test_data_ = const_cast< ::sentencepiece::SelfTestData*>( - ::sentencepiece::SelfTestData::internal_default_instance()); - ::sentencepiece::_ModelProto_default_instance_._instance.get_mutable()->denormalizer_spec_ = const_cast< ::sentencepiece::NormalizerSpec*>( - ::sentencepiece::NormalizerSpec::internal_default_instance()); -} -#if !defined(_MSC_VER) || _MSC_VER >= 1900 -const int ModelProto::kPiecesFieldNumber; -const int ModelProto::kTrainerSpecFieldNumber; -const int ModelProto::kNormalizerSpecFieldNumber; -const int ModelProto::kSelfTestDataFieldNumber; -const int ModelProto::kDenormalizerSpecFieldNumber; -#endif // !defined(_MSC_VER) || _MSC_VER >= 1900 - -ModelProto::ModelProto() - : ::google::protobuf::MessageLite(), _internal_metadata_(NULL) { - ::google::protobuf::internal::InitSCC( - &protobuf_sentencepiece_5fmodel_2eproto::scc_info_ModelProto.base); +class ModelProto::_Internal { + public: + using HasBits = decltype(std::declval()._has_bits_); + static const ::sentencepiece::TrainerSpec& trainer_spec(const ModelProto* msg); + static void set_has_trainer_spec(HasBits* has_bits) { + (*has_bits)[0] |= 1u; + } + static const ::sentencepiece::NormalizerSpec& normalizer_spec(const ModelProto* msg); + static void set_has_normalizer_spec(HasBits* has_bits) { + (*has_bits)[0] |= 2u; + } + static const ::sentencepiece::SelfTestData& self_test_data(const ModelProto* msg); + static void set_has_self_test_data(HasBits* has_bits) { + (*has_bits)[0] |= 4u; + } + static const ::sentencepiece::NormalizerSpec& denormalizer_spec(const ModelProto* msg); + static void set_has_denormalizer_spec(HasBits* has_bits) { + (*has_bits)[0] |= 8u; + } +}; + +const ::sentencepiece::TrainerSpec& +ModelProto::_Internal::trainer_spec(const ModelProto* msg) { + return *msg->trainer_spec_; +} +const ::sentencepiece::NormalizerSpec& +ModelProto::_Internal::normalizer_spec(const ModelProto* msg) { + return *msg->normalizer_spec_; +} +const ::sentencepiece::SelfTestData& +ModelProto::_Internal::self_test_data(const ModelProto* msg) { + return *msg->self_test_data_; +} +const ::sentencepiece::NormalizerSpec& +ModelProto::_Internal::denormalizer_spec(const ModelProto* msg) { + return *msg->denormalizer_spec_; +} +ModelProto::ModelProto(::PROTOBUF_NAMESPACE_ID::Arena* arena) + : ::PROTOBUF_NAMESPACE_ID::MessageLite(arena), + _extensions_(arena), + pieces_(arena) { SharedCtor(); - // @@protoc_insertion_point(constructor:sentencepiece.ModelProto) + RegisterArenaDtor(arena); + // @@protoc_insertion_point(arena_constructor:sentencepiece.ModelProto) } ModelProto::ModelProto(const ModelProto& from) - : ::google::protobuf::MessageLite(), - _internal_metadata_(NULL), + : ::PROTOBUF_NAMESPACE_ID::MessageLite(), _has_bits_(from._has_bits_), pieces_(from.pieces_) { - _internal_metadata_.MergeFrom(from._internal_metadata_); + _internal_metadata_.MergeFrom(from._internal_metadata_); _extensions_.MergeFrom(from._extensions_); - if (from.has_trainer_spec()) { + if (from._internal_has_trainer_spec()) { trainer_spec_ = new ::sentencepiece::TrainerSpec(*from.trainer_spec_); } else { - trainer_spec_ = NULL; + trainer_spec_ = nullptr; } - if (from.has_normalizer_spec()) { + if (from._internal_has_normalizer_spec()) { normalizer_spec_ = new ::sentencepiece::NormalizerSpec(*from.normalizer_spec_); } else { - normalizer_spec_ = NULL; + normalizer_spec_ = nullptr; } - if (from.has_self_test_data()) { + if (from._internal_has_self_test_data()) { self_test_data_ = new ::sentencepiece::SelfTestData(*from.self_test_data_); } else { - self_test_data_ = NULL; + self_test_data_ = nullptr; } - if (from.has_denormalizer_spec()) { + if (from._internal_has_denormalizer_spec()) { denormalizer_spec_ = new ::sentencepiece::NormalizerSpec(*from.denormalizer_spec_); } else { - denormalizer_spec_ = NULL; + denormalizer_spec_ = nullptr; } // @@protoc_insertion_point(copy_constructor:sentencepiece.ModelProto) } void ModelProto::SharedCtor() { - ::memset(&trainer_spec_, 0, static_cast( - reinterpret_cast(&denormalizer_spec_) - + ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&scc_info_ModelProto_sentencepiece_5fmodel_2eproto.base); + ::memset(reinterpret_cast(this) + static_cast( + reinterpret_cast(&trainer_spec_) - reinterpret_cast(this)), + 0, static_cast(reinterpret_cast(&denormalizer_spec_) - reinterpret_cast(&trainer_spec_)) + sizeof(denormalizer_spec_)); } ModelProto::~ModelProto() { // @@protoc_insertion_point(destructor:sentencepiece.ModelProto) SharedDtor(); + _internal_metadata_.Delete(); } void ModelProto::SharedDtor() { + GOOGLE_DCHECK(GetArena() == nullptr); if (this != internal_default_instance()) delete trainer_spec_; if (this != internal_default_instance()) delete normalizer_spec_; if (this != internal_default_instance()) delete self_test_data_; if (this != internal_default_instance()) delete denormalizer_spec_; } +void ModelProto::ArenaDtor(void* object) { + ModelProto* _this = reinterpret_cast< ModelProto* >(object); + (void)_this; +} +void ModelProto::RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena*) { +} void ModelProto::SetCachedSize(int size) const { _cached_size_.Set(size); } const ModelProto& ModelProto::default_instance() { - ::google::protobuf::internal::InitSCC(&protobuf_sentencepiece_5fmodel_2eproto::scc_info_ModelProto.base); + ::PROTOBUF_NAMESPACE_ID::internal::InitSCC(&::scc_info_ModelProto_sentencepiece_5fmodel_2eproto.base); return *internal_default_instance(); } void ModelProto::Clear() { // @@protoc_insertion_point(message_clear_start:sentencepiece.ModelProto) - ::google::protobuf::uint32 cached_has_bits = 0; + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; // Prevent compiler warnings about cached_has_bits being unused (void) cached_has_bits; _extensions_.Clear(); pieces_.Clear(); cached_has_bits = _has_bits_[0]; - if (cached_has_bits & 15u) { + if (cached_has_bits & 0x0000000fu) { if (cached_has_bits & 0x00000001u) { - GOOGLE_DCHECK(trainer_spec_ != NULL); + GOOGLE_DCHECK(trainer_spec_ != nullptr); trainer_spec_->Clear(); } if (cached_has_bits & 0x00000002u) { - GOOGLE_DCHECK(normalizer_spec_ != NULL); + GOOGLE_DCHECK(normalizer_spec_ != nullptr); normalizer_spec_->Clear(); } if (cached_has_bits & 0x00000004u) { - GOOGLE_DCHECK(self_test_data_ != NULL); + GOOGLE_DCHECK(self_test_data_ != nullptr); self_test_data_->Clear(); } if (cached_has_bits & 0x00000008u) { - GOOGLE_DCHECK(denormalizer_spec_ != NULL); + GOOGLE_DCHECK(denormalizer_spec_ != nullptr); denormalizer_spec_->Clear(); } } _has_bits_.Clear(); - _internal_metadata_.Clear(); -} - -bool ModelProto::MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input) { -#define DO_(EXPRESSION) if (!GOOGLE_PREDICT_TRUE(EXPRESSION)) goto failure - ::google::protobuf::uint32 tag; - ::google::protobuf::internal::LiteUnknownFieldSetter unknown_fields_setter( - &_internal_metadata_); - ::google::protobuf::io::StringOutputStream unknown_fields_output( - unknown_fields_setter.buffer()); - ::google::protobuf::io::CodedOutputStream unknown_fields_stream( - &unknown_fields_output, false); - // @@protoc_insertion_point(parse_start:sentencepiece.ModelProto) - for (;;) { - ::std::pair<::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(127u); - tag = p.first; - if (!p.second) goto handle_unusual; - switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) { - // repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; - case 1: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(10u /* 10 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadMessage( - input, add_pieces())); - } else { - goto handle_unusual; - } - break; - } + _internal_metadata_.Clear(); +} +const char* ModelProto::_InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) { +#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure + _Internal::HasBits has_bits{}; + while (!ctx->Done(&ptr)) { + ::PROTOBUF_NAMESPACE_ID::uint32 tag; + ptr = ::PROTOBUF_NAMESPACE_ID::internal::ReadTag(ptr, &tag); + CHK_(ptr); + switch (tag >> 3) { + // repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; + case 1: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 10)) { + ptr -= 1; + do { + ptr += 1; + ptr = ctx->ParseMessage(_internal_add_pieces(), ptr); + CHK_(ptr); + if (!ctx->DataAvailable(ptr)) break; + } while (::PROTOBUF_NAMESPACE_ID::internal::ExpectTag<10>(ptr)); + } else goto handle_unusual; + continue; // optional .sentencepiece.TrainerSpec trainer_spec = 2; - case 2: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(18u /* 18 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadMessage( - input, mutable_trainer_spec())); - } else { - goto handle_unusual; - } - break; - } - + case 2: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 18)) { + ptr = ctx->ParseMessage(_internal_mutable_trainer_spec(), ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional .sentencepiece.NormalizerSpec normalizer_spec = 3; - case 3: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(26u /* 26 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadMessage( - input, mutable_normalizer_spec())); - } else { - goto handle_unusual; - } - break; - } - + case 3: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 26)) { + ptr = ctx->ParseMessage(_internal_mutable_normalizer_spec(), ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional .sentencepiece.SelfTestData self_test_data = 4; - case 4: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(34u /* 34 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadMessage( - input, mutable_self_test_data())); - } else { - goto handle_unusual; - } - break; - } - + case 4: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 34)) { + ptr = ctx->ParseMessage(_internal_mutable_self_test_data(), ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; // optional .sentencepiece.NormalizerSpec denormalizer_spec = 5; - case 5: { - if (static_cast< ::google::protobuf::uint8>(tag) == - static_cast< ::google::protobuf::uint8>(42u /* 42 & 0xFF */)) { - DO_(::google::protobuf::internal::WireFormatLite::ReadMessage( - input, mutable_denormalizer_spec())); - } else { - goto handle_unusual; - } - break; - } - + case 5: + if (PROTOBUF_PREDICT_TRUE(static_cast<::PROTOBUF_NAMESPACE_ID::uint8>(tag) == 42)) { + ptr = ctx->ParseMessage(_internal_mutable_denormalizer_spec(), ptr); + CHK_(ptr); + } else goto handle_unusual; + continue; default: { handle_unusual: - if (tag == 0) { + if ((tag & 7) == 4 || tag == 0) { + ctx->SetLastTag(tag); goto success; } - if ((1600u <= tag)) { - DO_(_extensions_.ParseField(tag, input, - internal_default_instance(), - &unknown_fields_stream)); - continue; - } - DO_(::google::protobuf::internal::WireFormatLite::SkipField( - input, tag, &unknown_fields_stream)); - break; + if ((1600u <= tag)) { + ptr = _extensions_.ParseField(tag, ptr, + internal_default_instance(), &_internal_metadata_, ctx); + CHK_(ptr != nullptr); + continue; } - } - } + ptr = UnknownFieldParse(tag, + _internal_metadata_.mutable_unknown_fields(), + ptr, ctx); + CHK_(ptr != nullptr); + continue; + } + } // switch + } // while success: - // @@protoc_insertion_point(parse_success:sentencepiece.ModelProto) - return true; + _has_bits_.Or(has_bits); + return ptr; failure: - // @@protoc_insertion_point(parse_failure:sentencepiece.ModelProto) - return false; -#undef DO_ + ptr = nullptr; + goto success; +#undef CHK_ } -void ModelProto::SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const { - // @@protoc_insertion_point(serialize_start:sentencepiece.ModelProto) - ::google::protobuf::uint32 cached_has_bits = 0; +::PROTOBUF_NAMESPACE_ID::uint8* ModelProto::_InternalSerialize( + ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const { + // @@protoc_insertion_point(serialize_to_array_start:sentencepiece.ModelProto) + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; // repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; for (unsigned int i = 0, - n = static_cast(this->pieces_size()); i < n; i++) { - ::google::protobuf::internal::WireFormatLite::WriteMessage( - 1, - this->pieces(static_cast(i)), - output); + n = static_cast(this->_internal_pieces_size()); i < n; i++) { + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite:: + InternalWriteMessage(1, this->_internal_pieces(i), target, stream); } cached_has_bits = _has_bits_[0]; // optional .sentencepiece.TrainerSpec trainer_spec = 2; if (cached_has_bits & 0x00000001u) { - ::google::protobuf::internal::WireFormatLite::WriteMessage( - 2, this->_internal_trainer_spec(), output); + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite:: + InternalWriteMessage( + 2, _Internal::trainer_spec(this), target, stream); } // optional .sentencepiece.NormalizerSpec normalizer_spec = 3; if (cached_has_bits & 0x00000002u) { - ::google::protobuf::internal::WireFormatLite::WriteMessage( - 3, this->_internal_normalizer_spec(), output); + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite:: + InternalWriteMessage( + 3, _Internal::normalizer_spec(this), target, stream); } // optional .sentencepiece.SelfTestData self_test_data = 4; if (cached_has_bits & 0x00000004u) { - ::google::protobuf::internal::WireFormatLite::WriteMessage( - 4, this->_internal_self_test_data(), output); + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite:: + InternalWriteMessage( + 4, _Internal::self_test_data(this), target, stream); } // optional .sentencepiece.NormalizerSpec denormalizer_spec = 5; if (cached_has_bits & 0x00000008u) { - ::google::protobuf::internal::WireFormatLite::WriteMessage( - 5, this->_internal_denormalizer_spec(), output); + target = stream->EnsureSpace(target); + target = ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite:: + InternalWriteMessage( + 5, _Internal::denormalizer_spec(this), target, stream); } // Extension range [200, 536870912) - _extensions_.SerializeWithCachedSizes( - 200, 536870912, output); + target = _extensions_._InternalSerialize( + 200, 536870912, target, stream); - output->WriteRaw(_internal_metadata_.unknown_fields().data(), - static_cast(_internal_metadata_.unknown_fields().size())); - // @@protoc_insertion_point(serialize_end:sentencepiece.ModelProto) + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + target = stream->WriteRaw(_internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).data(), + static_cast(_internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).size()), target); + } + // @@protoc_insertion_point(serialize_to_array_end:sentencepiece.ModelProto) + return target; } size_t ModelProto::ByteSizeLong() const { @@ -3271,81 +3363,84 @@ size_t ModelProto::ByteSizeLong() const { total_size += _extensions_.ByteSize(); - total_size += _internal_metadata_.unknown_fields().size(); + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; + // Prevent compiler warnings about cached_has_bits being unused + (void) cached_has_bits; // repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; - { - unsigned int count = static_cast(this->pieces_size()); - total_size += 1UL * count; - for (unsigned int i = 0; i < count; i++) { - total_size += - ::google::protobuf::internal::WireFormatLite::MessageSize( - this->pieces(static_cast(i))); - } + total_size += 1UL * this->_internal_pieces_size(); + for (const auto& msg : this->pieces_) { + total_size += + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::MessageSize(msg); } - if (_has_bits_[0 / 32] & 15u) { + cached_has_bits = _has_bits_[0]; + if (cached_has_bits & 0x0000000fu) { // optional .sentencepiece.TrainerSpec trainer_spec = 2; - if (has_trainer_spec()) { + if (cached_has_bits & 0x00000001u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::MessageSize( + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::MessageSize( *trainer_spec_); } // optional .sentencepiece.NormalizerSpec normalizer_spec = 3; - if (has_normalizer_spec()) { + if (cached_has_bits & 0x00000002u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::MessageSize( + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::MessageSize( *normalizer_spec_); } // optional .sentencepiece.SelfTestData self_test_data = 4; - if (has_self_test_data()) { + if (cached_has_bits & 0x00000004u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::MessageSize( + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::MessageSize( *self_test_data_); } // optional .sentencepiece.NormalizerSpec denormalizer_spec = 5; - if (has_denormalizer_spec()) { + if (cached_has_bits & 0x00000008u) { total_size += 1 + - ::google::protobuf::internal::WireFormatLite::MessageSize( + ::PROTOBUF_NAMESPACE_ID::internal::WireFormatLite::MessageSize( *denormalizer_spec_); } } - int cached_size = ::google::protobuf::internal::ToCachedSize(total_size); + if (PROTOBUF_PREDICT_FALSE(_internal_metadata_.have_unknown_fields())) { + total_size += _internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString).size(); + } + int cached_size = ::PROTOBUF_NAMESPACE_ID::internal::ToCachedSize(total_size); SetCachedSize(cached_size); return total_size; } void ModelProto::CheckTypeAndMergeFrom( - const ::google::protobuf::MessageLite& from) { - MergeFrom(*::google::protobuf::down_cast(&from)); + const ::PROTOBUF_NAMESPACE_ID::MessageLite& from) { + MergeFrom(*::PROTOBUF_NAMESPACE_ID::internal::DownCast( + &from)); } void ModelProto::MergeFrom(const ModelProto& from) { // @@protoc_insertion_point(class_specific_merge_from_start:sentencepiece.ModelProto) GOOGLE_DCHECK_NE(&from, this); _extensions_.MergeFrom(from._extensions_); - _internal_metadata_.MergeFrom(from._internal_metadata_); - ::google::protobuf::uint32 cached_has_bits = 0; + _internal_metadata_.MergeFrom(from._internal_metadata_); + ::PROTOBUF_NAMESPACE_ID::uint32 cached_has_bits = 0; (void) cached_has_bits; pieces_.MergeFrom(from.pieces_); cached_has_bits = from._has_bits_[0]; - if (cached_has_bits & 15u) { + if (cached_has_bits & 0x0000000fu) { if (cached_has_bits & 0x00000001u) { - mutable_trainer_spec()->::sentencepiece::TrainerSpec::MergeFrom(from.trainer_spec()); + _internal_mutable_trainer_spec()->::sentencepiece::TrainerSpec::MergeFrom(from._internal_trainer_spec()); } if (cached_has_bits & 0x00000002u) { - mutable_normalizer_spec()->::sentencepiece::NormalizerSpec::MergeFrom(from.normalizer_spec()); + _internal_mutable_normalizer_spec()->::sentencepiece::NormalizerSpec::MergeFrom(from._internal_normalizer_spec()); } if (cached_has_bits & 0x00000004u) { - mutable_self_test_data()->::sentencepiece::SelfTestData::MergeFrom(from.self_test_data()); + _internal_mutable_self_test_data()->::sentencepiece::SelfTestData::MergeFrom(from._internal_self_test_data()); } if (cached_has_bits & 0x00000008u) { - mutable_denormalizer_spec()->::sentencepiece::NormalizerSpec::MergeFrom(from.denormalizer_spec()); + _internal_mutable_denormalizer_spec()->::sentencepiece::NormalizerSpec::MergeFrom(from._internal_denormalizer_spec()); } } } @@ -3362,66 +3457,63 @@ bool ModelProto::IsInitialized() const { return false; } - if (!::google::protobuf::internal::AllAreInitialized(this->pieces())) return false; - if (has_trainer_spec()) { - if (!this->trainer_spec_->IsInitialized()) return false; + if (!::PROTOBUF_NAMESPACE_ID::internal::AllAreInitialized(pieces_)) return false; + if (_internal_has_trainer_spec()) { + if (!trainer_spec_->IsInitialized()) return false; } - if (has_normalizer_spec()) { - if (!this->normalizer_spec_->IsInitialized()) return false; + if (_internal_has_normalizer_spec()) { + if (!normalizer_spec_->IsInitialized()) return false; } - if (has_self_test_data()) { - if (!this->self_test_data_->IsInitialized()) return false; + if (_internal_has_self_test_data()) { + if (!self_test_data_->IsInitialized()) return false; } - if (has_denormalizer_spec()) { - if (!this->denormalizer_spec_->IsInitialized()) return false; + if (_internal_has_denormalizer_spec()) { + if (!denormalizer_spec_->IsInitialized()) return false; } return true; } -void ModelProto::Swap(ModelProto* other) { - if (other == this) return; - InternalSwap(other); -} void ModelProto::InternalSwap(ModelProto* other) { using std::swap; - CastToBase(&pieces_)->InternalSwap(CastToBase(&other->pieces_)); - swap(trainer_spec_, other->trainer_spec_); - swap(normalizer_spec_, other->normalizer_spec_); - swap(self_test_data_, other->self_test_data_); - swap(denormalizer_spec_, other->denormalizer_spec_); - swap(_has_bits_[0], other->_has_bits_[0]); - _internal_metadata_.Swap(&other->_internal_metadata_); _extensions_.Swap(&other->_extensions_); + _internal_metadata_.Swap(&other->_internal_metadata_); + swap(_has_bits_[0], other->_has_bits_[0]); + pieces_.InternalSwap(&other->pieces_); + ::PROTOBUF_NAMESPACE_ID::internal::memswap< + PROTOBUF_FIELD_OFFSET(ModelProto, denormalizer_spec_) + + sizeof(ModelProto::denormalizer_spec_) + - PROTOBUF_FIELD_OFFSET(ModelProto, trainer_spec_)>( + reinterpret_cast(&trainer_spec_), + reinterpret_cast(&other->trainer_spec_)); } -::std::string ModelProto::GetTypeName() const { +std::string ModelProto::GetTypeName() const { return "sentencepiece.ModelProto"; } // @@protoc_insertion_point(namespace_scope) } // namespace sentencepiece -namespace google { -namespace protobuf { -template<> GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE ::sentencepiece::TrainerSpec* Arena::CreateMaybeMessage< ::sentencepiece::TrainerSpec >(Arena* arena) { - return Arena::CreateInternal< ::sentencepiece::TrainerSpec >(arena); +PROTOBUF_NAMESPACE_OPEN +template<> PROTOBUF_NOINLINE ::sentencepiece::TrainerSpec* Arena::CreateMaybeMessage< ::sentencepiece::TrainerSpec >(Arena* arena) { + return Arena::CreateMessageInternal< ::sentencepiece::TrainerSpec >(arena); } -template<> GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE ::sentencepiece::NormalizerSpec* Arena::CreateMaybeMessage< ::sentencepiece::NormalizerSpec >(Arena* arena) { - return Arena::CreateInternal< ::sentencepiece::NormalizerSpec >(arena); +template<> PROTOBUF_NOINLINE ::sentencepiece::NormalizerSpec* Arena::CreateMaybeMessage< ::sentencepiece::NormalizerSpec >(Arena* arena) { + return Arena::CreateMessageInternal< ::sentencepiece::NormalizerSpec >(arena); } -template<> GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE ::sentencepiece::SelfTestData_Sample* Arena::CreateMaybeMessage< ::sentencepiece::SelfTestData_Sample >(Arena* arena) { - return Arena::CreateInternal< ::sentencepiece::SelfTestData_Sample >(arena); +template<> PROTOBUF_NOINLINE ::sentencepiece::SelfTestData_Sample* Arena::CreateMaybeMessage< ::sentencepiece::SelfTestData_Sample >(Arena* arena) { + return Arena::CreateMessageInternal< ::sentencepiece::SelfTestData_Sample >(arena); } -template<> GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE ::sentencepiece::SelfTestData* Arena::CreateMaybeMessage< ::sentencepiece::SelfTestData >(Arena* arena) { - return Arena::CreateInternal< ::sentencepiece::SelfTestData >(arena); +template<> PROTOBUF_NOINLINE ::sentencepiece::SelfTestData* Arena::CreateMaybeMessage< ::sentencepiece::SelfTestData >(Arena* arena) { + return Arena::CreateMessageInternal< ::sentencepiece::SelfTestData >(arena); } -template<> GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE ::sentencepiece::ModelProto_SentencePiece* Arena::CreateMaybeMessage< ::sentencepiece::ModelProto_SentencePiece >(Arena* arena) { - return Arena::CreateInternal< ::sentencepiece::ModelProto_SentencePiece >(arena); +template<> PROTOBUF_NOINLINE ::sentencepiece::ModelProto_SentencePiece* Arena::CreateMaybeMessage< ::sentencepiece::ModelProto_SentencePiece >(Arena* arena) { + return Arena::CreateMessageInternal< ::sentencepiece::ModelProto_SentencePiece >(arena); } -template<> GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE ::sentencepiece::ModelProto* Arena::CreateMaybeMessage< ::sentencepiece::ModelProto >(Arena* arena) { - return Arena::CreateInternal< ::sentencepiece::ModelProto >(arena); +template<> PROTOBUF_NOINLINE ::sentencepiece::ModelProto* Arena::CreateMaybeMessage< ::sentencepiece::ModelProto >(Arena* arena) { + return Arena::CreateMessageInternal< ::sentencepiece::ModelProto >(arena); } -} // namespace protobuf -} // namespace google +PROTOBUF_NAMESPACE_CLOSE // @@protoc_insertion_point(global_scope) +#include diff --git a/src/builtin_pb/sentencepiece_model.pb.h b/src/builtin_pb/sentencepiece_model.pb.h index 01cecc1f..583c6aa9 100644 --- a/src/builtin_pb/sentencepiece_model.pb.h +++ b/src/builtin_pb/sentencepiece_model.pb.h @@ -1,49 +1,56 @@ // Generated by the protocol buffer compiler. DO NOT EDIT! // source: sentencepiece_model.proto -#ifndef PROTOBUF_INCLUDED_sentencepiece_5fmodel_2eproto -#define PROTOBUF_INCLUDED_sentencepiece_5fmodel_2eproto +#ifndef GOOGLE_PROTOBUF_INCLUDED_sentencepiece_5fmodel_2eproto +#define GOOGLE_PROTOBUF_INCLUDED_sentencepiece_5fmodel_2eproto +#include #include -#include - -#if GOOGLE_PROTOBUF_VERSION < 3006001 +#include +#if PROTOBUF_VERSION < 3014000 #error This file was generated by a newer version of protoc which is -#error incompatible with your Protocol Buffer headers. Please update +#error incompatible with your Protocol Buffer headers. Please update #error your headers. #endif -#if 3006001 < GOOGLE_PROTOBUF_MIN_PROTOC_VERSION +#if 3014000 < PROTOBUF_MIN_PROTOC_VERSION #error This file was generated by an older version of protoc which is -#error incompatible with your Protocol Buffer headers. Please +#error incompatible with your Protocol Buffer headers. Please #error regenerate this file with a newer version of protoc. #endif +#include #include #include #include #include #include -#include #include #include #include // IWYU pragma: export #include // IWYU pragma: export #include // @@protoc_insertion_point(includes) -#define PROTOBUF_INTERNAL_EXPORT_protobuf_sentencepiece_5fmodel_2eproto +#include +#define PROTOBUF_INTERNAL_EXPORT_sentencepiece_5fmodel_2eproto +PROTOBUF_NAMESPACE_OPEN +namespace internal { +class AnyMetadata; +} // namespace internal +PROTOBUF_NAMESPACE_CLOSE -namespace protobuf_sentencepiece_5fmodel_2eproto { // Internal implementation detail -- do not use these members. -struct TableStruct { - static const ::google::protobuf::internal::ParseTableField entries[]; - static const ::google::protobuf::internal::AuxillaryParseTableField aux[]; - static const ::google::protobuf::internal::ParseTable schema[6]; - static const ::google::protobuf::internal::FieldMetadata field_metadata[]; - static const ::google::protobuf::internal::SerializationTable serialization_table[]; - static const ::google::protobuf::uint32 offsets[]; +struct TableStruct_sentencepiece_5fmodel_2eproto { + static const ::PROTOBUF_NAMESPACE_ID::internal::ParseTableField entries[] + PROTOBUF_SECTION_VARIABLE(protodesc_cold); + static const ::PROTOBUF_NAMESPACE_ID::internal::AuxiliaryParseTableField aux[] + PROTOBUF_SECTION_VARIABLE(protodesc_cold); + static const ::PROTOBUF_NAMESPACE_ID::internal::ParseTable schema[6] + PROTOBUF_SECTION_VARIABLE(protodesc_cold); + static const ::PROTOBUF_NAMESPACE_ID::internal::FieldMetadata field_metadata[]; + static const ::PROTOBUF_NAMESPACE_ID::internal::SerializationTable serialization_table[]; + static const ::PROTOBUF_NAMESPACE_ID::uint32 offsets[]; }; -} // namespace protobuf_sentencepiece_5fmodel_2eproto namespace sentencepiece { class ModelProto; class ModelProtoDefaultTypeInternal; @@ -64,30 +71,38 @@ class TrainerSpec; class TrainerSpecDefaultTypeInternal; extern TrainerSpecDefaultTypeInternal _TrainerSpec_default_instance_; } // namespace sentencepiece -namespace google { -namespace protobuf { +PROTOBUF_NAMESPACE_OPEN template<> ::sentencepiece::ModelProto* Arena::CreateMaybeMessage<::sentencepiece::ModelProto>(Arena*); template<> ::sentencepiece::ModelProto_SentencePiece* Arena::CreateMaybeMessage<::sentencepiece::ModelProto_SentencePiece>(Arena*); template<> ::sentencepiece::NormalizerSpec* Arena::CreateMaybeMessage<::sentencepiece::NormalizerSpec>(Arena*); template<> ::sentencepiece::SelfTestData* Arena::CreateMaybeMessage<::sentencepiece::SelfTestData>(Arena*); template<> ::sentencepiece::SelfTestData_Sample* Arena::CreateMaybeMessage<::sentencepiece::SelfTestData_Sample>(Arena*); template<> ::sentencepiece::TrainerSpec* Arena::CreateMaybeMessage<::sentencepiece::TrainerSpec>(Arena*); -} // namespace protobuf -} // namespace google +PROTOBUF_NAMESPACE_CLOSE namespace sentencepiece { -enum TrainerSpec_ModelType { +enum TrainerSpec_ModelType : int { TrainerSpec_ModelType_UNIGRAM = 1, TrainerSpec_ModelType_BPE = 2, TrainerSpec_ModelType_WORD = 3, TrainerSpec_ModelType_CHAR = 4 }; bool TrainerSpec_ModelType_IsValid(int value); -const TrainerSpec_ModelType TrainerSpec_ModelType_ModelType_MIN = TrainerSpec_ModelType_UNIGRAM; -const TrainerSpec_ModelType TrainerSpec_ModelType_ModelType_MAX = TrainerSpec_ModelType_CHAR; -const int TrainerSpec_ModelType_ModelType_ARRAYSIZE = TrainerSpec_ModelType_ModelType_MAX + 1; - -enum ModelProto_SentencePiece_Type { +constexpr TrainerSpec_ModelType TrainerSpec_ModelType_ModelType_MIN = TrainerSpec_ModelType_UNIGRAM; +constexpr TrainerSpec_ModelType TrainerSpec_ModelType_ModelType_MAX = TrainerSpec_ModelType_CHAR; +constexpr int TrainerSpec_ModelType_ModelType_ARRAYSIZE = TrainerSpec_ModelType_ModelType_MAX + 1; + +const std::string& TrainerSpec_ModelType_Name(TrainerSpec_ModelType value); +template +inline const std::string& TrainerSpec_ModelType_Name(T enum_t_value) { + static_assert(::std::is_same::value || + ::std::is_integral::value, + "Incorrect type passed to function TrainerSpec_ModelType_Name."); + return TrainerSpec_ModelType_Name(static_cast(enum_t_value)); +} +bool TrainerSpec_ModelType_Parse( + ::PROTOBUF_NAMESPACE_ID::ConstStringParam name, TrainerSpec_ModelType* value); +enum ModelProto_SentencePiece_Type : int { ModelProto_SentencePiece_Type_NORMAL = 1, ModelProto_SentencePiece_Type_UNKNOWN = 2, ModelProto_SentencePiece_Type_CONTROL = 3, @@ -96,48 +111,56 @@ enum ModelProto_SentencePiece_Type { ModelProto_SentencePiece_Type_UNUSED = 5 }; bool ModelProto_SentencePiece_Type_IsValid(int value); -const ModelProto_SentencePiece_Type ModelProto_SentencePiece_Type_Type_MIN = ModelProto_SentencePiece_Type_NORMAL; -const ModelProto_SentencePiece_Type ModelProto_SentencePiece_Type_Type_MAX = ModelProto_SentencePiece_Type_BYTE; -const int ModelProto_SentencePiece_Type_Type_ARRAYSIZE = ModelProto_SentencePiece_Type_Type_MAX + 1; - +constexpr ModelProto_SentencePiece_Type ModelProto_SentencePiece_Type_Type_MIN = ModelProto_SentencePiece_Type_NORMAL; +constexpr ModelProto_SentencePiece_Type ModelProto_SentencePiece_Type_Type_MAX = ModelProto_SentencePiece_Type_BYTE; +constexpr int ModelProto_SentencePiece_Type_Type_ARRAYSIZE = ModelProto_SentencePiece_Type_Type_MAX + 1; + +const std::string& ModelProto_SentencePiece_Type_Name(ModelProto_SentencePiece_Type value); +template +inline const std::string& ModelProto_SentencePiece_Type_Name(T enum_t_value) { + static_assert(::std::is_same::value || + ::std::is_integral::value, + "Incorrect type passed to function ModelProto_SentencePiece_Type_Name."); + return ModelProto_SentencePiece_Type_Name(static_cast(enum_t_value)); +} +bool ModelProto_SentencePiece_Type_Parse( + ::PROTOBUF_NAMESPACE_ID::ConstStringParam name, ModelProto_SentencePiece_Type* value); // =================================================================== -class TrainerSpec : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:sentencepiece.TrainerSpec) */ { +class TrainerSpec PROTOBUF_FINAL : + public ::PROTOBUF_NAMESPACE_ID::MessageLite /* @@protoc_insertion_point(class_definition:sentencepiece.TrainerSpec) */ { public: - TrainerSpec(); + inline TrainerSpec() : TrainerSpec(nullptr) {} virtual ~TrainerSpec(); TrainerSpec(const TrainerSpec& from); - - inline TrainerSpec& operator=(const TrainerSpec& from) { - CopyFrom(from); - return *this; - } - #if LANG_CXX11 TrainerSpec(TrainerSpec&& from) noexcept : TrainerSpec() { *this = ::std::move(from); } + inline TrainerSpec& operator=(const TrainerSpec& from) { + CopyFrom(from); + return *this; + } inline TrainerSpec& operator=(TrainerSpec&& from) noexcept { - if (GetArenaNoVirtual() == from.GetArenaNoVirtual()) { + if (GetArena() == from.GetArena()) { if (this != &from) InternalSwap(&from); } else { CopyFrom(from); } return *this; } - #endif - inline const ::std::string& unknown_fields() const { - return _internal_metadata_.unknown_fields(); + + inline const std::string& unknown_fields() const { + return _internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString); } - inline ::std::string* mutable_unknown_fields() { - return _internal_metadata_.mutable_unknown_fields(); + inline std::string* mutable_unknown_fields() { + return _internal_metadata_.mutable_unknown_fields(); } static const TrainerSpec& default_instance(); - static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY static inline const TrainerSpec* internal_default_instance() { return reinterpret_cast( &_TrainerSpec_default_instance_); @@ -145,654 +168,937 @@ class TrainerSpec : public ::google::protobuf::MessageLite /* @@protoc_insertion static constexpr int kIndexInFileMessages = 0; - void Swap(TrainerSpec* other); friend void swap(TrainerSpec& a, TrainerSpec& b) { a.Swap(&b); } + inline void Swap(TrainerSpec* other) { + if (other == this) return; + if (GetArena() == other->GetArena()) { + InternalSwap(other); + } else { + ::PROTOBUF_NAMESPACE_ID::internal::GenericSwap(this, other); + } + } + void UnsafeArenaSwap(TrainerSpec* other) { + if (other == this) return; + GOOGLE_DCHECK(GetArena() == other->GetArena()); + InternalSwap(other); + } // implements Message ---------------------------------------------- inline TrainerSpec* New() const final { - return CreateMaybeMessage(NULL); + return CreateMaybeMessage(nullptr); } - TrainerSpec* New(::google::protobuf::Arena* arena) const final { + TrainerSpec* New(::PROTOBUF_NAMESPACE_ID::Arena* arena) const final { return CreateMaybeMessage(arena); } - void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from) + void CheckTypeAndMergeFrom(const ::PROTOBUF_NAMESPACE_ID::MessageLite& from) final; void CopyFrom(const TrainerSpec& from); void MergeFrom(const TrainerSpec& from); - void Clear() final; + PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final; bool IsInitialized() const final; size_t ByteSizeLong() const final; - bool MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input) final; - void SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const final; + const char* _InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) final; + ::PROTOBUF_NAMESPACE_ID::uint8* _InternalSerialize( + ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const final; void DiscardUnknownFields(); int GetCachedSize() const final { return _cached_size_.Get(); } private: - void SharedCtor(); - void SharedDtor(); + inline void SharedCtor(); + inline void SharedDtor(); void SetCachedSize(int size) const; void InternalSwap(TrainerSpec* other); - private: - inline ::google::protobuf::Arena* GetArenaNoVirtual() const { - return NULL; - } - inline void* MaybeArenaPtr() const { - return NULL; + friend class ::PROTOBUF_NAMESPACE_ID::internal::AnyMetadata; + static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() { + return "sentencepiece.TrainerSpec"; } + protected: + explicit TrainerSpec(::PROTOBUF_NAMESPACE_ID::Arena* arena); + private: + static void ArenaDtor(void* object); + inline void RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena* arena); public: - ::std::string GetTypeName() const final; + std::string GetTypeName() const final; // nested types ---------------------------------------------------- typedef TrainerSpec_ModelType ModelType; - static const ModelType UNIGRAM = + static constexpr ModelType UNIGRAM = TrainerSpec_ModelType_UNIGRAM; - static const ModelType BPE = + static constexpr ModelType BPE = TrainerSpec_ModelType_BPE; - static const ModelType WORD = + static constexpr ModelType WORD = TrainerSpec_ModelType_WORD; - static const ModelType CHAR = + static constexpr ModelType CHAR = TrainerSpec_ModelType_CHAR; static inline bool ModelType_IsValid(int value) { return TrainerSpec_ModelType_IsValid(value); } - static const ModelType ModelType_MIN = + static constexpr ModelType ModelType_MIN = TrainerSpec_ModelType_ModelType_MIN; - static const ModelType ModelType_MAX = + static constexpr ModelType ModelType_MAX = TrainerSpec_ModelType_ModelType_MAX; - static const int ModelType_ARRAYSIZE = + static constexpr int ModelType_ARRAYSIZE = TrainerSpec_ModelType_ModelType_ARRAYSIZE; + template + static inline const std::string& ModelType_Name(T enum_t_value) { + static_assert(::std::is_same::value || + ::std::is_integral::value, + "Incorrect type passed to function ModelType_Name."); + return TrainerSpec_ModelType_Name(enum_t_value); + } + static inline bool ModelType_Parse(::PROTOBUF_NAMESPACE_ID::ConstStringParam name, + ModelType* value) { + return TrainerSpec_ModelType_Parse(name, value); + } // accessors ------------------------------------------------------- + enum : int { + kInputFieldNumber = 1, + kAcceptLanguageFieldNumber = 5, + kControlSymbolsFieldNumber = 30, + kUserDefinedSymbolsFieldNumber = 31, + kModelPrefixFieldNumber = 2, + kInputFormatFieldNumber = 7, + kRequiredCharsFieldNumber = 36, + kUnkSurfaceFieldNumber = 44, + kUnkPieceFieldNumber = 45, + kBosPieceFieldNumber = 46, + kEosPieceFieldNumber = 47, + kPadPieceFieldNumber = 48, + kPretokenizationDelimiterFieldNumber = 53, + kSelfTestSampleSizeFieldNumber = 6, + kMiningSentenceSizeFieldNumber = 12, + kInputSentenceSizeFieldNumber = 11, + kTrainingSentenceSizeFieldNumber = 13, + kEnableDifferentialPrivacyFieldNumber = 50, + kTreatWhitespaceAsSuffixFieldNumber = 24, + kAllowWhitespaceOnlyPiecesFieldNumber = 26, + kSplitDigitsFieldNumber = 25, + kByteFallbackFieldNumber = 35, + kUseAllVocabFieldNumber = 34, + kTrainExtremelyLargeCorpusFieldNumber = 49, + kUnkIdFieldNumber = 40, + kDifferentialPrivacyNoiseLevelFieldNumber = 51, + kDifferentialPrivacyClippingThresholdFieldNumber = 52, + kModelTypeFieldNumber = 3, + kVocabSizeFieldNumber = 4, + kCharacterCoverageFieldNumber = 10, + kSeedSentencepieceSizeFieldNumber = 14, + kShrinkingFactorFieldNumber = 15, + kNumThreadsFieldNumber = 16, + kNumSubIterationsFieldNumber = 17, + kMaxSentenceLengthFieldNumber = 18, + kMaxSentencepieceLengthFieldNumber = 20, + kShuffleInputSentenceFieldNumber = 19, + kSplitByUnicodeScriptFieldNumber = 21, + kSplitByNumberFieldNumber = 23, + kSplitByWhitespaceFieldNumber = 22, + kVocabularyOutputPieceScoreFieldNumber = 32, + kHardVocabLimitFieldNumber = 33, + kBosIdFieldNumber = 41, + kEosIdFieldNumber = 42, + kPadIdFieldNumber = 43, + }; // repeated string input = 1; int input_size() const; + private: + int _internal_input_size() const; + public: void clear_input(); - static const int kInputFieldNumber = 1; - const ::std::string& input(int index) const; - ::std::string* mutable_input(int index); - void set_input(int index, const ::std::string& value); - #if LANG_CXX11 - void set_input(int index, ::std::string&& value); - #endif + const std::string& input(int index) const; + std::string* mutable_input(int index); + void set_input(int index, const std::string& value); + void set_input(int index, std::string&& value); void set_input(int index, const char* value); void set_input(int index, const char* value, size_t size); - ::std::string* add_input(); - void add_input(const ::std::string& value); - #if LANG_CXX11 - void add_input(::std::string&& value); - #endif + std::string* add_input(); + void add_input(const std::string& value); + void add_input(std::string&& value); void add_input(const char* value); void add_input(const char* value, size_t size); - const ::google::protobuf::RepeatedPtrField< ::std::string>& input() const; - ::google::protobuf::RepeatedPtrField< ::std::string>* mutable_input(); + const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField& input() const; + ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField* mutable_input(); + private: + const std::string& _internal_input(int index) const; + std::string* _internal_add_input(); + public: // repeated string accept_language = 5; int accept_language_size() const; + private: + int _internal_accept_language_size() const; + public: void clear_accept_language(); - static const int kAcceptLanguageFieldNumber = 5; - const ::std::string& accept_language(int index) const; - ::std::string* mutable_accept_language(int index); - void set_accept_language(int index, const ::std::string& value); - #if LANG_CXX11 - void set_accept_language(int index, ::std::string&& value); - #endif + const std::string& accept_language(int index) const; + std::string* mutable_accept_language(int index); + void set_accept_language(int index, const std::string& value); + void set_accept_language(int index, std::string&& value); void set_accept_language(int index, const char* value); void set_accept_language(int index, const char* value, size_t size); - ::std::string* add_accept_language(); - void add_accept_language(const ::std::string& value); - #if LANG_CXX11 - void add_accept_language(::std::string&& value); - #endif + std::string* add_accept_language(); + void add_accept_language(const std::string& value); + void add_accept_language(std::string&& value); void add_accept_language(const char* value); void add_accept_language(const char* value, size_t size); - const ::google::protobuf::RepeatedPtrField< ::std::string>& accept_language() const; - ::google::protobuf::RepeatedPtrField< ::std::string>* mutable_accept_language(); + const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField& accept_language() const; + ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField* mutable_accept_language(); + private: + const std::string& _internal_accept_language(int index) const; + std::string* _internal_add_accept_language(); + public: // repeated string control_symbols = 30; int control_symbols_size() const; + private: + int _internal_control_symbols_size() const; + public: void clear_control_symbols(); - static const int kControlSymbolsFieldNumber = 30; - const ::std::string& control_symbols(int index) const; - ::std::string* mutable_control_symbols(int index); - void set_control_symbols(int index, const ::std::string& value); - #if LANG_CXX11 - void set_control_symbols(int index, ::std::string&& value); - #endif + const std::string& control_symbols(int index) const; + std::string* mutable_control_symbols(int index); + void set_control_symbols(int index, const std::string& value); + void set_control_symbols(int index, std::string&& value); void set_control_symbols(int index, const char* value); void set_control_symbols(int index, const char* value, size_t size); - ::std::string* add_control_symbols(); - void add_control_symbols(const ::std::string& value); - #if LANG_CXX11 - void add_control_symbols(::std::string&& value); - #endif + std::string* add_control_symbols(); + void add_control_symbols(const std::string& value); + void add_control_symbols(std::string&& value); void add_control_symbols(const char* value); void add_control_symbols(const char* value, size_t size); - const ::google::protobuf::RepeatedPtrField< ::std::string>& control_symbols() const; - ::google::protobuf::RepeatedPtrField< ::std::string>* mutable_control_symbols(); + const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField& control_symbols() const; + ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField* mutable_control_symbols(); + private: + const std::string& _internal_control_symbols(int index) const; + std::string* _internal_add_control_symbols(); + public: // repeated string user_defined_symbols = 31; int user_defined_symbols_size() const; + private: + int _internal_user_defined_symbols_size() const; + public: void clear_user_defined_symbols(); - static const int kUserDefinedSymbolsFieldNumber = 31; - const ::std::string& user_defined_symbols(int index) const; - ::std::string* mutable_user_defined_symbols(int index); - void set_user_defined_symbols(int index, const ::std::string& value); - #if LANG_CXX11 - void set_user_defined_symbols(int index, ::std::string&& value); - #endif + const std::string& user_defined_symbols(int index) const; + std::string* mutable_user_defined_symbols(int index); + void set_user_defined_symbols(int index, const std::string& value); + void set_user_defined_symbols(int index, std::string&& value); void set_user_defined_symbols(int index, const char* value); void set_user_defined_symbols(int index, const char* value, size_t size); - ::std::string* add_user_defined_symbols(); - void add_user_defined_symbols(const ::std::string& value); - #if LANG_CXX11 - void add_user_defined_symbols(::std::string&& value); - #endif + std::string* add_user_defined_symbols(); + void add_user_defined_symbols(const std::string& value); + void add_user_defined_symbols(std::string&& value); void add_user_defined_symbols(const char* value); void add_user_defined_symbols(const char* value, size_t size); - const ::google::protobuf::RepeatedPtrField< ::std::string>& user_defined_symbols() const; - ::google::protobuf::RepeatedPtrField< ::std::string>* mutable_user_defined_symbols(); + const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField& user_defined_symbols() const; + ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField* mutable_user_defined_symbols(); + private: + const std::string& _internal_user_defined_symbols(int index) const; + std::string* _internal_add_user_defined_symbols(); + public: // optional string model_prefix = 2; bool has_model_prefix() const; + private: + bool _internal_has_model_prefix() const; + public: void clear_model_prefix(); - static const int kModelPrefixFieldNumber = 2; - const ::std::string& model_prefix() const; - void set_model_prefix(const ::std::string& value); - #if LANG_CXX11 - void set_model_prefix(::std::string&& value); - #endif + const std::string& model_prefix() const; + void set_model_prefix(const std::string& value); + void set_model_prefix(std::string&& value); void set_model_prefix(const char* value); void set_model_prefix(const char* value, size_t size); - ::std::string* mutable_model_prefix(); - ::std::string* release_model_prefix(); - void set_allocated_model_prefix(::std::string* model_prefix); + std::string* mutable_model_prefix(); + std::string* release_model_prefix(); + void set_allocated_model_prefix(std::string* model_prefix); + private: + const std::string& _internal_model_prefix() const; + void _internal_set_model_prefix(const std::string& value); + std::string* _internal_mutable_model_prefix(); + public: // optional string input_format = 7; bool has_input_format() const; + private: + bool _internal_has_input_format() const; + public: void clear_input_format(); - static const int kInputFormatFieldNumber = 7; - const ::std::string& input_format() const; - void set_input_format(const ::std::string& value); - #if LANG_CXX11 - void set_input_format(::std::string&& value); - #endif + const std::string& input_format() const; + void set_input_format(const std::string& value); + void set_input_format(std::string&& value); void set_input_format(const char* value); void set_input_format(const char* value, size_t size); - ::std::string* mutable_input_format(); - ::std::string* release_input_format(); - void set_allocated_input_format(::std::string* input_format); + std::string* mutable_input_format(); + std::string* release_input_format(); + void set_allocated_input_format(std::string* input_format); + private: + const std::string& _internal_input_format() const; + void _internal_set_input_format(const std::string& value); + std::string* _internal_mutable_input_format(); + public: // optional string required_chars = 36; bool has_required_chars() const; + private: + bool _internal_has_required_chars() const; + public: void clear_required_chars(); - static const int kRequiredCharsFieldNumber = 36; - const ::std::string& required_chars() const; - void set_required_chars(const ::std::string& value); - #if LANG_CXX11 - void set_required_chars(::std::string&& value); - #endif + const std::string& required_chars() const; + void set_required_chars(const std::string& value); + void set_required_chars(std::string&& value); void set_required_chars(const char* value); void set_required_chars(const char* value, size_t size); - ::std::string* mutable_required_chars(); - ::std::string* release_required_chars(); - void set_allocated_required_chars(::std::string* required_chars); + std::string* mutable_required_chars(); + std::string* release_required_chars(); + void set_allocated_required_chars(std::string* required_chars); + private: + const std::string& _internal_required_chars() const; + void _internal_set_required_chars(const std::string& value); + std::string* _internal_mutable_required_chars(); + public: // optional string unk_surface = 44 [default = " \342\201\207 "]; bool has_unk_surface() const; + private: + bool _internal_has_unk_surface() const; + public: void clear_unk_surface(); - static const int kUnkSurfaceFieldNumber = 44; - const ::std::string& unk_surface() const; - void set_unk_surface(const ::std::string& value); - #if LANG_CXX11 - void set_unk_surface(::std::string&& value); - #endif + const std::string& unk_surface() const; + void set_unk_surface(const std::string& value); + void set_unk_surface(std::string&& value); void set_unk_surface(const char* value); void set_unk_surface(const char* value, size_t size); - ::std::string* mutable_unk_surface(); - ::std::string* release_unk_surface(); - void set_allocated_unk_surface(::std::string* unk_surface); + std::string* mutable_unk_surface(); + std::string* release_unk_surface(); + void set_allocated_unk_surface(std::string* unk_surface); + private: + const std::string& _internal_unk_surface() const; + void _internal_set_unk_surface(const std::string& value); + std::string* _internal_mutable_unk_surface(); + public: // optional string unk_piece = 45 [default = ""]; bool has_unk_piece() const; + private: + bool _internal_has_unk_piece() const; + public: void clear_unk_piece(); - static const int kUnkPieceFieldNumber = 45; - const ::std::string& unk_piece() const; - void set_unk_piece(const ::std::string& value); - #if LANG_CXX11 - void set_unk_piece(::std::string&& value); - #endif + const std::string& unk_piece() const; + void set_unk_piece(const std::string& value); + void set_unk_piece(std::string&& value); void set_unk_piece(const char* value); void set_unk_piece(const char* value, size_t size); - ::std::string* mutable_unk_piece(); - ::std::string* release_unk_piece(); - void set_allocated_unk_piece(::std::string* unk_piece); + std::string* mutable_unk_piece(); + std::string* release_unk_piece(); + void set_allocated_unk_piece(std::string* unk_piece); + private: + const std::string& _internal_unk_piece() const; + void _internal_set_unk_piece(const std::string& value); + std::string* _internal_mutable_unk_piece(); + public: // optional string bos_piece = 46 [default = ""]; bool has_bos_piece() const; + private: + bool _internal_has_bos_piece() const; + public: void clear_bos_piece(); - static const int kBosPieceFieldNumber = 46; - const ::std::string& bos_piece() const; - void set_bos_piece(const ::std::string& value); - #if LANG_CXX11 - void set_bos_piece(::std::string&& value); - #endif + const std::string& bos_piece() const; + void set_bos_piece(const std::string& value); + void set_bos_piece(std::string&& value); void set_bos_piece(const char* value); void set_bos_piece(const char* value, size_t size); - ::std::string* mutable_bos_piece(); - ::std::string* release_bos_piece(); - void set_allocated_bos_piece(::std::string* bos_piece); + std::string* mutable_bos_piece(); + std::string* release_bos_piece(); + void set_allocated_bos_piece(std::string* bos_piece); + private: + const std::string& _internal_bos_piece() const; + void _internal_set_bos_piece(const std::string& value); + std::string* _internal_mutable_bos_piece(); + public: // optional string eos_piece = 47 [default = ""]; bool has_eos_piece() const; + private: + bool _internal_has_eos_piece() const; + public: void clear_eos_piece(); - static const int kEosPieceFieldNumber = 47; - const ::std::string& eos_piece() const; - void set_eos_piece(const ::std::string& value); - #if LANG_CXX11 - void set_eos_piece(::std::string&& value); - #endif + const std::string& eos_piece() const; + void set_eos_piece(const std::string& value); + void set_eos_piece(std::string&& value); void set_eos_piece(const char* value); void set_eos_piece(const char* value, size_t size); - ::std::string* mutable_eos_piece(); - ::std::string* release_eos_piece(); - void set_allocated_eos_piece(::std::string* eos_piece); + std::string* mutable_eos_piece(); + std::string* release_eos_piece(); + void set_allocated_eos_piece(std::string* eos_piece); + private: + const std::string& _internal_eos_piece() const; + void _internal_set_eos_piece(const std::string& value); + std::string* _internal_mutable_eos_piece(); + public: // optional string pad_piece = 48 [default = ""]; bool has_pad_piece() const; + private: + bool _internal_has_pad_piece() const; + public: void clear_pad_piece(); - static const int kPadPieceFieldNumber = 48; - const ::std::string& pad_piece() const; - void set_pad_piece(const ::std::string& value); - #if LANG_CXX11 - void set_pad_piece(::std::string&& value); - #endif + const std::string& pad_piece() const; + void set_pad_piece(const std::string& value); + void set_pad_piece(std::string&& value); void set_pad_piece(const char* value); void set_pad_piece(const char* value, size_t size); - ::std::string* mutable_pad_piece(); - ::std::string* release_pad_piece(); - void set_allocated_pad_piece(::std::string* pad_piece); + std::string* mutable_pad_piece(); + std::string* release_pad_piece(); + void set_allocated_pad_piece(std::string* pad_piece); + private: + const std::string& _internal_pad_piece() const; + void _internal_set_pad_piece(const std::string& value); + std::string* _internal_mutable_pad_piece(); + public: + + // optional string pretokenization_delimiter = 53 [default = ""]; + bool has_pretokenization_delimiter() const; + private: + bool _internal_has_pretokenization_delimiter() const; + public: + void clear_pretokenization_delimiter(); + const std::string& pretokenization_delimiter() const; + void set_pretokenization_delimiter(const std::string& value); + void set_pretokenization_delimiter(std::string&& value); + void set_pretokenization_delimiter(const char* value); + void set_pretokenization_delimiter(const char* value, size_t size); + std::string* mutable_pretokenization_delimiter(); + std::string* release_pretokenization_delimiter(); + void set_allocated_pretokenization_delimiter(std::string* pretokenization_delimiter); + private: + const std::string& _internal_pretokenization_delimiter() const; + void _internal_set_pretokenization_delimiter(const std::string& value); + std::string* _internal_mutable_pretokenization_delimiter(); + public: // optional int32 self_test_sample_size = 6 [default = 0]; bool has_self_test_sample_size() const; + private: + bool _internal_has_self_test_sample_size() const; + public: void clear_self_test_sample_size(); - static const int kSelfTestSampleSizeFieldNumber = 6; - ::google::protobuf::int32 self_test_sample_size() const; - void set_self_test_sample_size(::google::protobuf::int32 value); + ::PROTOBUF_NAMESPACE_ID::int32 self_test_sample_size() const; + void set_self_test_sample_size(::PROTOBUF_NAMESPACE_ID::int32 value); + private: + ::PROTOBUF_NAMESPACE_ID::int32 _internal_self_test_sample_size() const; + void _internal_set_self_test_sample_size(::PROTOBUF_NAMESPACE_ID::int32 value); + public: + + // optional int32 mining_sentence_size = 12 [deprecated = true]; + PROTOBUF_DEPRECATED bool has_mining_sentence_size() const; + private: + bool _internal_has_mining_sentence_size() const; + public: + PROTOBUF_DEPRECATED void clear_mining_sentence_size(); + PROTOBUF_DEPRECATED ::PROTOBUF_NAMESPACE_ID::int32 mining_sentence_size() const; + PROTOBUF_DEPRECATED void set_mining_sentence_size(::PROTOBUF_NAMESPACE_ID::int32 value); + private: + ::PROTOBUF_NAMESPACE_ID::int32 _internal_mining_sentence_size() const; + void _internal_set_mining_sentence_size(::PROTOBUF_NAMESPACE_ID::int32 value); + public: - // optional int32 input_sentence_size = 11 [default = 0]; + // optional uint64 input_sentence_size = 11 [default = 0]; bool has_input_sentence_size() const; + private: + bool _internal_has_input_sentence_size() const; + public: void clear_input_sentence_size(); - static const int kInputSentenceSizeFieldNumber = 11; - ::google::protobuf::int32 input_sentence_size() const; - void set_input_sentence_size(::google::protobuf::int32 value); - - // optional int32 mining_sentence_size = 12 [deprecated = true]; - GOOGLE_PROTOBUF_DEPRECATED_ATTR bool has_mining_sentence_size() const; - GOOGLE_PROTOBUF_DEPRECATED_ATTR void clear_mining_sentence_size(); - GOOGLE_PROTOBUF_DEPRECATED_ATTR static const int kMiningSentenceSizeFieldNumber = 12; - GOOGLE_PROTOBUF_DEPRECATED_ATTR ::google::protobuf::int32 mining_sentence_size() const; - GOOGLE_PROTOBUF_DEPRECATED_ATTR void set_mining_sentence_size(::google::protobuf::int32 value); + ::PROTOBUF_NAMESPACE_ID::uint64 input_sentence_size() const; + void set_input_sentence_size(::PROTOBUF_NAMESPACE_ID::uint64 value); + private: + ::PROTOBUF_NAMESPACE_ID::uint64 _internal_input_sentence_size() const; + void _internal_set_input_sentence_size(::PROTOBUF_NAMESPACE_ID::uint64 value); + public: // optional int32 training_sentence_size = 13 [deprecated = true]; - GOOGLE_PROTOBUF_DEPRECATED_ATTR bool has_training_sentence_size() const; - GOOGLE_PROTOBUF_DEPRECATED_ATTR void clear_training_sentence_size(); - GOOGLE_PROTOBUF_DEPRECATED_ATTR static const int kTrainingSentenceSizeFieldNumber = 13; - GOOGLE_PROTOBUF_DEPRECATED_ATTR ::google::protobuf::int32 training_sentence_size() const; - GOOGLE_PROTOBUF_DEPRECATED_ATTR void set_training_sentence_size(::google::protobuf::int32 value); + PROTOBUF_DEPRECATED bool has_training_sentence_size() const; + private: + bool _internal_has_training_sentence_size() const; + public: + PROTOBUF_DEPRECATED void clear_training_sentence_size(); + PROTOBUF_DEPRECATED ::PROTOBUF_NAMESPACE_ID::int32 training_sentence_size() const; + PROTOBUF_DEPRECATED void set_training_sentence_size(::PROTOBUF_NAMESPACE_ID::int32 value); + private: + ::PROTOBUF_NAMESPACE_ID::int32 _internal_training_sentence_size() const; + void _internal_set_training_sentence_size(::PROTOBUF_NAMESPACE_ID::int32 value); + public: + + // optional bool enable_differential_privacy = 50 [default = false]; + bool has_enable_differential_privacy() const; + private: + bool _internal_has_enable_differential_privacy() const; + public: + void clear_enable_differential_privacy(); + bool enable_differential_privacy() const; + void set_enable_differential_privacy(bool value); + private: + bool _internal_enable_differential_privacy() const; + void _internal_set_enable_differential_privacy(bool value); + public: // optional bool treat_whitespace_as_suffix = 24 [default = false]; bool has_treat_whitespace_as_suffix() const; + private: + bool _internal_has_treat_whitespace_as_suffix() const; + public: void clear_treat_whitespace_as_suffix(); - static const int kTreatWhitespaceAsSuffixFieldNumber = 24; bool treat_whitespace_as_suffix() const; void set_treat_whitespace_as_suffix(bool value); + private: + bool _internal_treat_whitespace_as_suffix() const; + void _internal_set_treat_whitespace_as_suffix(bool value); + public: + + // optional bool allow_whitespace_only_pieces = 26 [default = false]; + bool has_allow_whitespace_only_pieces() const; + private: + bool _internal_has_allow_whitespace_only_pieces() const; + public: + void clear_allow_whitespace_only_pieces(); + bool allow_whitespace_only_pieces() const; + void set_allow_whitespace_only_pieces(bool value); + private: + bool _internal_allow_whitespace_only_pieces() const; + void _internal_set_allow_whitespace_only_pieces(bool value); + public: // optional bool split_digits = 25 [default = false]; bool has_split_digits() const; + private: + bool _internal_has_split_digits() const; + public: void clear_split_digits(); - static const int kSplitDigitsFieldNumber = 25; bool split_digits() const; void set_split_digits(bool value); + private: + bool _internal_split_digits() const; + void _internal_set_split_digits(bool value); + public: // optional bool byte_fallback = 35 [default = false]; bool has_byte_fallback() const; + private: + bool _internal_has_byte_fallback() const; + public: void clear_byte_fallback(); - static const int kByteFallbackFieldNumber = 35; bool byte_fallback() const; void set_byte_fallback(bool value); + private: + bool _internal_byte_fallback() const; + void _internal_set_byte_fallback(bool value); + public: // optional bool use_all_vocab = 34 [default = false]; bool has_use_all_vocab() const; + private: + bool _internal_has_use_all_vocab() const; + public: void clear_use_all_vocab(); - static const int kUseAllVocabFieldNumber = 34; bool use_all_vocab() const; void set_use_all_vocab(bool value); - - // optional int32 unk_id = 40 [default = 0]; - bool has_unk_id() const; - void clear_unk_id(); - static const int kUnkIdFieldNumber = 40; - ::google::protobuf::int32 unk_id() const; - void set_unk_id(::google::protobuf::int32 value); + private: + bool _internal_use_all_vocab() const; + void _internal_set_use_all_vocab(bool value); + public: // optional bool train_extremely_large_corpus = 49 [default = false]; bool has_train_extremely_large_corpus() const; + private: + bool _internal_has_train_extremely_large_corpus() const; + public: void clear_train_extremely_large_corpus(); - static const int kTrainExtremelyLargeCorpusFieldNumber = 49; bool train_extremely_large_corpus() const; void set_train_extremely_large_corpus(bool value); + private: + bool _internal_train_extremely_large_corpus() const; + void _internal_set_train_extremely_large_corpus(bool value); + public: + + // optional int32 unk_id = 40 [default = 0]; + bool has_unk_id() const; + private: + bool _internal_has_unk_id() const; + public: + void clear_unk_id(); + ::PROTOBUF_NAMESPACE_ID::int32 unk_id() const; + void set_unk_id(::PROTOBUF_NAMESPACE_ID::int32 value); + private: + ::PROTOBUF_NAMESPACE_ID::int32 _internal_unk_id() const; + void _internal_set_unk_id(::PROTOBUF_NAMESPACE_ID::int32 value); + public: + + // optional float differential_privacy_noise_level = 51 [default = 0]; + bool has_differential_privacy_noise_level() const; + private: + bool _internal_has_differential_privacy_noise_level() const; + public: + void clear_differential_privacy_noise_level(); + float differential_privacy_noise_level() const; + void set_differential_privacy_noise_level(float value); + private: + float _internal_differential_privacy_noise_level() const; + void _internal_set_differential_privacy_noise_level(float value); + public: + + // optional uint64 differential_privacy_clipping_threshold = 52 [default = 0]; + bool has_differential_privacy_clipping_threshold() const; + private: + bool _internal_has_differential_privacy_clipping_threshold() const; + public: + void clear_differential_privacy_clipping_threshold(); + ::PROTOBUF_NAMESPACE_ID::uint64 differential_privacy_clipping_threshold() const; + void set_differential_privacy_clipping_threshold(::PROTOBUF_NAMESPACE_ID::uint64 value); + private: + ::PROTOBUF_NAMESPACE_ID::uint64 _internal_differential_privacy_clipping_threshold() const; + void _internal_set_differential_privacy_clipping_threshold(::PROTOBUF_NAMESPACE_ID::uint64 value); + public: // optional .sentencepiece.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM]; bool has_model_type() const; + private: + bool _internal_has_model_type() const; + public: void clear_model_type(); - static const int kModelTypeFieldNumber = 3; ::sentencepiece::TrainerSpec_ModelType model_type() const; void set_model_type(::sentencepiece::TrainerSpec_ModelType value); + private: + ::sentencepiece::TrainerSpec_ModelType _internal_model_type() const; + void _internal_set_model_type(::sentencepiece::TrainerSpec_ModelType value); + public: // optional int32 vocab_size = 4 [default = 8000]; bool has_vocab_size() const; + private: + bool _internal_has_vocab_size() const; + public: void clear_vocab_size(); - static const int kVocabSizeFieldNumber = 4; - ::google::protobuf::int32 vocab_size() const; - void set_vocab_size(::google::protobuf::int32 value); + ::PROTOBUF_NAMESPACE_ID::int32 vocab_size() const; + void set_vocab_size(::PROTOBUF_NAMESPACE_ID::int32 value); + private: + ::PROTOBUF_NAMESPACE_ID::int32 _internal_vocab_size() const; + void _internal_set_vocab_size(::PROTOBUF_NAMESPACE_ID::int32 value); + public: // optional float character_coverage = 10 [default = 0.9995]; bool has_character_coverage() const; + private: + bool _internal_has_character_coverage() const; + public: void clear_character_coverage(); - static const int kCharacterCoverageFieldNumber = 10; float character_coverage() const; void set_character_coverage(float value); + private: + float _internal_character_coverage() const; + void _internal_set_character_coverage(float value); + public: // optional int32 seed_sentencepiece_size = 14 [default = 1000000]; bool has_seed_sentencepiece_size() const; + private: + bool _internal_has_seed_sentencepiece_size() const; + public: void clear_seed_sentencepiece_size(); - static const int kSeedSentencepieceSizeFieldNumber = 14; - ::google::protobuf::int32 seed_sentencepiece_size() const; - void set_seed_sentencepiece_size(::google::protobuf::int32 value); + ::PROTOBUF_NAMESPACE_ID::int32 seed_sentencepiece_size() const; + void set_seed_sentencepiece_size(::PROTOBUF_NAMESPACE_ID::int32 value); + private: + ::PROTOBUF_NAMESPACE_ID::int32 _internal_seed_sentencepiece_size() const; + void _internal_set_seed_sentencepiece_size(::PROTOBUF_NAMESPACE_ID::int32 value); + public: // optional float shrinking_factor = 15 [default = 0.75]; bool has_shrinking_factor() const; + private: + bool _internal_has_shrinking_factor() const; + public: void clear_shrinking_factor(); - static const int kShrinkingFactorFieldNumber = 15; float shrinking_factor() const; void set_shrinking_factor(float value); + private: + float _internal_shrinking_factor() const; + void _internal_set_shrinking_factor(float value); + public: // optional int32 num_threads = 16 [default = 16]; bool has_num_threads() const; + private: + bool _internal_has_num_threads() const; + public: void clear_num_threads(); - static const int kNumThreadsFieldNumber = 16; - ::google::protobuf::int32 num_threads() const; - void set_num_threads(::google::protobuf::int32 value); + ::PROTOBUF_NAMESPACE_ID::int32 num_threads() const; + void set_num_threads(::PROTOBUF_NAMESPACE_ID::int32 value); + private: + ::PROTOBUF_NAMESPACE_ID::int32 _internal_num_threads() const; + void _internal_set_num_threads(::PROTOBUF_NAMESPACE_ID::int32 value); + public: // optional int32 num_sub_iterations = 17 [default = 2]; bool has_num_sub_iterations() const; + private: + bool _internal_has_num_sub_iterations() const; + public: void clear_num_sub_iterations(); - static const int kNumSubIterationsFieldNumber = 17; - ::google::protobuf::int32 num_sub_iterations() const; - void set_num_sub_iterations(::google::protobuf::int32 value); + ::PROTOBUF_NAMESPACE_ID::int32 num_sub_iterations() const; + void set_num_sub_iterations(::PROTOBUF_NAMESPACE_ID::int32 value); + private: + ::PROTOBUF_NAMESPACE_ID::int32 _internal_num_sub_iterations() const; + void _internal_set_num_sub_iterations(::PROTOBUF_NAMESPACE_ID::int32 value); + public: // optional int32 max_sentence_length = 18 [default = 4192]; bool has_max_sentence_length() const; + private: + bool _internal_has_max_sentence_length() const; + public: void clear_max_sentence_length(); - static const int kMaxSentenceLengthFieldNumber = 18; - ::google::protobuf::int32 max_sentence_length() const; - void set_max_sentence_length(::google::protobuf::int32 value); + ::PROTOBUF_NAMESPACE_ID::int32 max_sentence_length() const; + void set_max_sentence_length(::PROTOBUF_NAMESPACE_ID::int32 value); + private: + ::PROTOBUF_NAMESPACE_ID::int32 _internal_max_sentence_length() const; + void _internal_set_max_sentence_length(::PROTOBUF_NAMESPACE_ID::int32 value); + public: // optional int32 max_sentencepiece_length = 20 [default = 16]; bool has_max_sentencepiece_length() const; + private: + bool _internal_has_max_sentencepiece_length() const; + public: void clear_max_sentencepiece_length(); - static const int kMaxSentencepieceLengthFieldNumber = 20; - ::google::protobuf::int32 max_sentencepiece_length() const; - void set_max_sentencepiece_length(::google::protobuf::int32 value); + ::PROTOBUF_NAMESPACE_ID::int32 max_sentencepiece_length() const; + void set_max_sentencepiece_length(::PROTOBUF_NAMESPACE_ID::int32 value); + private: + ::PROTOBUF_NAMESPACE_ID::int32 _internal_max_sentencepiece_length() const; + void _internal_set_max_sentencepiece_length(::PROTOBUF_NAMESPACE_ID::int32 value); + public: // optional bool shuffle_input_sentence = 19 [default = true]; bool has_shuffle_input_sentence() const; + private: + bool _internal_has_shuffle_input_sentence() const; + public: void clear_shuffle_input_sentence(); - static const int kShuffleInputSentenceFieldNumber = 19; bool shuffle_input_sentence() const; void set_shuffle_input_sentence(bool value); + private: + bool _internal_shuffle_input_sentence() const; + void _internal_set_shuffle_input_sentence(bool value); + public: // optional bool split_by_unicode_script = 21 [default = true]; bool has_split_by_unicode_script() const; + private: + bool _internal_has_split_by_unicode_script() const; + public: void clear_split_by_unicode_script(); - static const int kSplitByUnicodeScriptFieldNumber = 21; bool split_by_unicode_script() const; void set_split_by_unicode_script(bool value); + private: + bool _internal_split_by_unicode_script() const; + void _internal_set_split_by_unicode_script(bool value); + public: // optional bool split_by_number = 23 [default = true]; bool has_split_by_number() const; + private: + bool _internal_has_split_by_number() const; + public: void clear_split_by_number(); - static const int kSplitByNumberFieldNumber = 23; bool split_by_number() const; void set_split_by_number(bool value); + private: + bool _internal_split_by_number() const; + void _internal_set_split_by_number(bool value); + public: // optional bool split_by_whitespace = 22 [default = true]; bool has_split_by_whitespace() const; + private: + bool _internal_has_split_by_whitespace() const; + public: void clear_split_by_whitespace(); - static const int kSplitByWhitespaceFieldNumber = 22; bool split_by_whitespace() const; void set_split_by_whitespace(bool value); + private: + bool _internal_split_by_whitespace() const; + void _internal_set_split_by_whitespace(bool value); + public: // optional bool vocabulary_output_piece_score = 32 [default = true]; bool has_vocabulary_output_piece_score() const; + private: + bool _internal_has_vocabulary_output_piece_score() const; + public: void clear_vocabulary_output_piece_score(); - static const int kVocabularyOutputPieceScoreFieldNumber = 32; bool vocabulary_output_piece_score() const; void set_vocabulary_output_piece_score(bool value); + private: + bool _internal_vocabulary_output_piece_score() const; + void _internal_set_vocabulary_output_piece_score(bool value); + public: // optional bool hard_vocab_limit = 33 [default = true]; bool has_hard_vocab_limit() const; + private: + bool _internal_has_hard_vocab_limit() const; + public: void clear_hard_vocab_limit(); - static const int kHardVocabLimitFieldNumber = 33; bool hard_vocab_limit() const; void set_hard_vocab_limit(bool value); + private: + bool _internal_hard_vocab_limit() const; + void _internal_set_hard_vocab_limit(bool value); + public: // optional int32 bos_id = 41 [default = 1]; bool has_bos_id() const; + private: + bool _internal_has_bos_id() const; + public: void clear_bos_id(); - static const int kBosIdFieldNumber = 41; - ::google::protobuf::int32 bos_id() const; - void set_bos_id(::google::protobuf::int32 value); + ::PROTOBUF_NAMESPACE_ID::int32 bos_id() const; + void set_bos_id(::PROTOBUF_NAMESPACE_ID::int32 value); + private: + ::PROTOBUF_NAMESPACE_ID::int32 _internal_bos_id() const; + void _internal_set_bos_id(::PROTOBUF_NAMESPACE_ID::int32 value); + public: // optional int32 eos_id = 42 [default = 2]; bool has_eos_id() const; + private: + bool _internal_has_eos_id() const; + public: void clear_eos_id(); - static const int kEosIdFieldNumber = 42; - ::google::protobuf::int32 eos_id() const; - void set_eos_id(::google::protobuf::int32 value); + ::PROTOBUF_NAMESPACE_ID::int32 eos_id() const; + void set_eos_id(::PROTOBUF_NAMESPACE_ID::int32 value); + private: + ::PROTOBUF_NAMESPACE_ID::int32 _internal_eos_id() const; + void _internal_set_eos_id(::PROTOBUF_NAMESPACE_ID::int32 value); + public: // optional int32 pad_id = 43 [default = -1]; bool has_pad_id() const; + private: + bool _internal_has_pad_id() const; + public: void clear_pad_id(); - static const int kPadIdFieldNumber = 43; - ::google::protobuf::int32 pad_id() const; - void set_pad_id(::google::protobuf::int32 value); + ::PROTOBUF_NAMESPACE_ID::int32 pad_id() const; + void set_pad_id(::PROTOBUF_NAMESPACE_ID::int32 value); + private: + ::PROTOBUF_NAMESPACE_ID::int32 _internal_pad_id() const; + void _internal_set_pad_id(::PROTOBUF_NAMESPACE_ID::int32 value); + public: GOOGLE_PROTOBUF_EXTENSION_ACCESSORS(TrainerSpec) // @@protoc_insertion_point(class_scope:sentencepiece.TrainerSpec) private: - void set_has_input_format(); - void clear_has_input_format(); - void set_has_model_prefix(); - void clear_has_model_prefix(); - void set_has_model_type(); - void clear_has_model_type(); - void set_has_vocab_size(); - void clear_has_vocab_size(); - void set_has_self_test_sample_size(); - void clear_has_self_test_sample_size(); - void set_has_character_coverage(); - void clear_has_character_coverage(); - void set_has_input_sentence_size(); - void clear_has_input_sentence_size(); - void set_has_shuffle_input_sentence(); - void clear_has_shuffle_input_sentence(); - void set_has_mining_sentence_size(); - void clear_has_mining_sentence_size(); - void set_has_training_sentence_size(); - void clear_has_training_sentence_size(); - void set_has_seed_sentencepiece_size(); - void clear_has_seed_sentencepiece_size(); - void set_has_shrinking_factor(); - void clear_has_shrinking_factor(); - void set_has_max_sentence_length(); - void clear_has_max_sentence_length(); - void set_has_num_threads(); - void clear_has_num_threads(); - void set_has_num_sub_iterations(); - void clear_has_num_sub_iterations(); - void set_has_max_sentencepiece_length(); - void clear_has_max_sentencepiece_length(); - void set_has_split_by_unicode_script(); - void clear_has_split_by_unicode_script(); - void set_has_split_by_number(); - void clear_has_split_by_number(); - void set_has_split_by_whitespace(); - void clear_has_split_by_whitespace(); - void set_has_treat_whitespace_as_suffix(); - void clear_has_treat_whitespace_as_suffix(); - void set_has_split_digits(); - void clear_has_split_digits(); - void set_has_required_chars(); - void clear_has_required_chars(); - void set_has_byte_fallback(); - void clear_has_byte_fallback(); - void set_has_vocabulary_output_piece_score(); - void clear_has_vocabulary_output_piece_score(); - void set_has_hard_vocab_limit(); - void clear_has_hard_vocab_limit(); - void set_has_use_all_vocab(); - void clear_has_use_all_vocab(); - void set_has_unk_id(); - void clear_has_unk_id(); - void set_has_bos_id(); - void clear_has_bos_id(); - void set_has_eos_id(); - void clear_has_eos_id(); - void set_has_pad_id(); - void clear_has_pad_id(); - void set_has_unk_piece(); - void clear_has_unk_piece(); - void set_has_bos_piece(); - void clear_has_bos_piece(); - void set_has_eos_piece(); - void clear_has_eos_piece(); - void set_has_pad_piece(); - void clear_has_pad_piece(); - void set_has_unk_surface(); - void clear_has_unk_surface(); - void set_has_train_extremely_large_corpus(); - void clear_has_train_extremely_large_corpus(); - - ::google::protobuf::internal::ExtensionSet _extensions_; - - ::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_; - ::google::protobuf::internal::HasBits<2> _has_bits_; - ::google::protobuf::RepeatedPtrField< ::std::string> input_; - ::google::protobuf::RepeatedPtrField< ::std::string> accept_language_; - ::google::protobuf::RepeatedPtrField< ::std::string> control_symbols_; - ::google::protobuf::RepeatedPtrField< ::std::string> user_defined_symbols_; - ::google::protobuf::internal::ArenaStringPtr model_prefix_; - ::google::protobuf::internal::ArenaStringPtr input_format_; - ::google::protobuf::internal::ArenaStringPtr required_chars_; - public: - static ::google::protobuf::internal::ExplicitlyConstructed< ::std::string> _i_give_permission_to_break_this_code_default_unk_surface_; - private: - ::google::protobuf::internal::ArenaStringPtr unk_surface_; - public: - static ::google::protobuf::internal::ExplicitlyConstructed< ::std::string> _i_give_permission_to_break_this_code_default_unk_piece_; - private: - ::google::protobuf::internal::ArenaStringPtr unk_piece_; - public: - static ::google::protobuf::internal::ExplicitlyConstructed< ::std::string> _i_give_permission_to_break_this_code_default_bos_piece_; - private: - ::google::protobuf::internal::ArenaStringPtr bos_piece_; - public: - static ::google::protobuf::internal::ExplicitlyConstructed< ::std::string> _i_give_permission_to_break_this_code_default_eos_piece_; - private: - ::google::protobuf::internal::ArenaStringPtr eos_piece_; - public: - static ::google::protobuf::internal::ExplicitlyConstructed< ::std::string> _i_give_permission_to_break_this_code_default_pad_piece_; - private: - ::google::protobuf::internal::ArenaStringPtr pad_piece_; - ::google::protobuf::int32 self_test_sample_size_; - ::google::protobuf::int32 input_sentence_size_; - ::google::protobuf::int32 mining_sentence_size_; - ::google::protobuf::int32 training_sentence_size_; + class _Internal; + + ::PROTOBUF_NAMESPACE_ID::internal::ExtensionSet _extensions_; + + template friend class ::PROTOBUF_NAMESPACE_ID::Arena::InternalHelper; + typedef void InternalArenaConstructable_; + typedef void DestructorSkippable_; + ::PROTOBUF_NAMESPACE_ID::internal::HasBits<2> _has_bits_; + mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; + ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField input_; + ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField accept_language_; + ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField control_symbols_; + ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField user_defined_symbols_; + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr model_prefix_; + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr input_format_; + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr required_chars_; + static const ::PROTOBUF_NAMESPACE_ID::internal::LazyString _i_give_permission_to_break_this_code_default_unk_surface_; + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr unk_surface_; + static const ::PROTOBUF_NAMESPACE_ID::internal::LazyString _i_give_permission_to_break_this_code_default_unk_piece_; + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr unk_piece_; + static const ::PROTOBUF_NAMESPACE_ID::internal::LazyString _i_give_permission_to_break_this_code_default_bos_piece_; + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr bos_piece_; + static const ::PROTOBUF_NAMESPACE_ID::internal::LazyString _i_give_permission_to_break_this_code_default_eos_piece_; + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr eos_piece_; + static const ::PROTOBUF_NAMESPACE_ID::internal::LazyString _i_give_permission_to_break_this_code_default_pad_piece_; + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr pad_piece_; + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr pretokenization_delimiter_; + ::PROTOBUF_NAMESPACE_ID::int32 self_test_sample_size_; + ::PROTOBUF_NAMESPACE_ID::int32 mining_sentence_size_; + ::PROTOBUF_NAMESPACE_ID::uint64 input_sentence_size_; + ::PROTOBUF_NAMESPACE_ID::int32 training_sentence_size_; + bool enable_differential_privacy_; bool treat_whitespace_as_suffix_; + bool allow_whitespace_only_pieces_; bool split_digits_; bool byte_fallback_; bool use_all_vocab_; - ::google::protobuf::int32 unk_id_; bool train_extremely_large_corpus_; + ::PROTOBUF_NAMESPACE_ID::int32 unk_id_; + float differential_privacy_noise_level_; + ::PROTOBUF_NAMESPACE_ID::uint64 differential_privacy_clipping_threshold_; int model_type_; - ::google::protobuf::int32 vocab_size_; + ::PROTOBUF_NAMESPACE_ID::int32 vocab_size_; float character_coverage_; - ::google::protobuf::int32 seed_sentencepiece_size_; + ::PROTOBUF_NAMESPACE_ID::int32 seed_sentencepiece_size_; float shrinking_factor_; - ::google::protobuf::int32 num_threads_; - ::google::protobuf::int32 num_sub_iterations_; - ::google::protobuf::int32 max_sentence_length_; - ::google::protobuf::int32 max_sentencepiece_length_; + ::PROTOBUF_NAMESPACE_ID::int32 num_threads_; + ::PROTOBUF_NAMESPACE_ID::int32 num_sub_iterations_; + ::PROTOBUF_NAMESPACE_ID::int32 max_sentence_length_; + ::PROTOBUF_NAMESPACE_ID::int32 max_sentencepiece_length_; bool shuffle_input_sentence_; bool split_by_unicode_script_; bool split_by_number_; bool split_by_whitespace_; bool vocabulary_output_piece_score_; bool hard_vocab_limit_; - ::google::protobuf::int32 bos_id_; - ::google::protobuf::int32 eos_id_; - ::google::protobuf::int32 pad_id_; - mutable ::google::protobuf::internal::CachedSize _cached_size_; - friend struct ::protobuf_sentencepiece_5fmodel_2eproto::TableStruct; + ::PROTOBUF_NAMESPACE_ID::int32 bos_id_; + ::PROTOBUF_NAMESPACE_ID::int32 eos_id_; + ::PROTOBUF_NAMESPACE_ID::int32 pad_id_; + friend struct ::TableStruct_sentencepiece_5fmodel_2eproto; }; // ------------------------------------------------------------------- -class NormalizerSpec : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:sentencepiece.NormalizerSpec) */ { +class NormalizerSpec PROTOBUF_FINAL : + public ::PROTOBUF_NAMESPACE_ID::MessageLite /* @@protoc_insertion_point(class_definition:sentencepiece.NormalizerSpec) */ { public: - NormalizerSpec(); + inline NormalizerSpec() : NormalizerSpec(nullptr) {} virtual ~NormalizerSpec(); NormalizerSpec(const NormalizerSpec& from); - - inline NormalizerSpec& operator=(const NormalizerSpec& from) { - CopyFrom(from); - return *this; - } - #if LANG_CXX11 NormalizerSpec(NormalizerSpec&& from) noexcept : NormalizerSpec() { *this = ::std::move(from); } + inline NormalizerSpec& operator=(const NormalizerSpec& from) { + CopyFrom(from); + return *this; + } inline NormalizerSpec& operator=(NormalizerSpec&& from) noexcept { - if (GetArenaNoVirtual() == from.GetArenaNoVirtual()) { + if (GetArena() == from.GetArena()) { if (this != &from) InternalSwap(&from); } else { CopyFrom(from); } return *this; } - #endif - inline const ::std::string& unknown_fields() const { - return _internal_metadata_.unknown_fields(); + + inline const std::string& unknown_fields() const { + return _internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString); } - inline ::std::string* mutable_unknown_fields() { - return _internal_metadata_.mutable_unknown_fields(); + inline std::string* mutable_unknown_fields() { + return _internal_metadata_.mutable_unknown_fields(); } static const NormalizerSpec& default_instance(); - static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY static inline const NormalizerSpec* internal_default_instance() { return reinterpret_cast( &_NormalizerSpec_default_instance_); @@ -800,208 +1106,261 @@ class NormalizerSpec : public ::google::protobuf::MessageLite /* @@protoc_insert static constexpr int kIndexInFileMessages = 1; - void Swap(NormalizerSpec* other); friend void swap(NormalizerSpec& a, NormalizerSpec& b) { a.Swap(&b); } + inline void Swap(NormalizerSpec* other) { + if (other == this) return; + if (GetArena() == other->GetArena()) { + InternalSwap(other); + } else { + ::PROTOBUF_NAMESPACE_ID::internal::GenericSwap(this, other); + } + } + void UnsafeArenaSwap(NormalizerSpec* other) { + if (other == this) return; + GOOGLE_DCHECK(GetArena() == other->GetArena()); + InternalSwap(other); + } // implements Message ---------------------------------------------- inline NormalizerSpec* New() const final { - return CreateMaybeMessage(NULL); + return CreateMaybeMessage(nullptr); } - NormalizerSpec* New(::google::protobuf::Arena* arena) const final { + NormalizerSpec* New(::PROTOBUF_NAMESPACE_ID::Arena* arena) const final { return CreateMaybeMessage(arena); } - void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from) + void CheckTypeAndMergeFrom(const ::PROTOBUF_NAMESPACE_ID::MessageLite& from) final; void CopyFrom(const NormalizerSpec& from); void MergeFrom(const NormalizerSpec& from); - void Clear() final; + PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final; bool IsInitialized() const final; size_t ByteSizeLong() const final; - bool MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input) final; - void SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const final; + const char* _InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) final; + ::PROTOBUF_NAMESPACE_ID::uint8* _InternalSerialize( + ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const final; void DiscardUnknownFields(); int GetCachedSize() const final { return _cached_size_.Get(); } private: - void SharedCtor(); - void SharedDtor(); + inline void SharedCtor(); + inline void SharedDtor(); void SetCachedSize(int size) const; void InternalSwap(NormalizerSpec* other); - private: - inline ::google::protobuf::Arena* GetArenaNoVirtual() const { - return NULL; - } - inline void* MaybeArenaPtr() const { - return NULL; + friend class ::PROTOBUF_NAMESPACE_ID::internal::AnyMetadata; + static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() { + return "sentencepiece.NormalizerSpec"; } + protected: + explicit NormalizerSpec(::PROTOBUF_NAMESPACE_ID::Arena* arena); + private: + static void ArenaDtor(void* object); + inline void RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena* arena); public: - ::std::string GetTypeName() const final; + std::string GetTypeName() const final; // nested types ---------------------------------------------------- // accessors ------------------------------------------------------- + enum : int { + kNameFieldNumber = 1, + kPrecompiledCharsmapFieldNumber = 2, + kNormalizationRuleTsvFieldNumber = 6, + kEncodeCaseFieldNumber = 7, + kDecodeCaseFieldNumber = 8, + kAddDummyPrefixFieldNumber = 3, + kRemoveExtraWhitespacesFieldNumber = 4, + kEscapeWhitespacesFieldNumber = 5, + }; // optional string name = 1; bool has_name() const; + private: + bool _internal_has_name() const; + public: void clear_name(); - static const int kNameFieldNumber = 1; - const ::std::string& name() const; - void set_name(const ::std::string& value); - #if LANG_CXX11 - void set_name(::std::string&& value); - #endif + const std::string& name() const; + void set_name(const std::string& value); + void set_name(std::string&& value); void set_name(const char* value); void set_name(const char* value, size_t size); - ::std::string* mutable_name(); - ::std::string* release_name(); - void set_allocated_name(::std::string* name); + std::string* mutable_name(); + std::string* release_name(); + void set_allocated_name(std::string* name); + private: + const std::string& _internal_name() const; + void _internal_set_name(const std::string& value); + std::string* _internal_mutable_name(); + public: // optional bytes precompiled_charsmap = 2; bool has_precompiled_charsmap() const; + private: + bool _internal_has_precompiled_charsmap() const; + public: void clear_precompiled_charsmap(); - static const int kPrecompiledCharsmapFieldNumber = 2; - const ::std::string& precompiled_charsmap() const; - void set_precompiled_charsmap(const ::std::string& value); - #if LANG_CXX11 - void set_precompiled_charsmap(::std::string&& value); - #endif + const std::string& precompiled_charsmap() const; + void set_precompiled_charsmap(const std::string& value); + void set_precompiled_charsmap(std::string&& value); void set_precompiled_charsmap(const char* value); void set_precompiled_charsmap(const void* value, size_t size); - ::std::string* mutable_precompiled_charsmap(); - ::std::string* release_precompiled_charsmap(); - void set_allocated_precompiled_charsmap(::std::string* precompiled_charsmap); + std::string* mutable_precompiled_charsmap(); + std::string* release_precompiled_charsmap(); + void set_allocated_precompiled_charsmap(std::string* precompiled_charsmap); + private: + const std::string& _internal_precompiled_charsmap() const; + void _internal_set_precompiled_charsmap(const std::string& value); + std::string* _internal_mutable_precompiled_charsmap(); + public: // optional string normalization_rule_tsv = 6; bool has_normalization_rule_tsv() const; + private: + bool _internal_has_normalization_rule_tsv() const; + public: void clear_normalization_rule_tsv(); - static const int kNormalizationRuleTsvFieldNumber = 6; - const ::std::string& normalization_rule_tsv() const; - void set_normalization_rule_tsv(const ::std::string& value); - #if LANG_CXX11 - void set_normalization_rule_tsv(::std::string&& value); - #endif + const std::string& normalization_rule_tsv() const; + void set_normalization_rule_tsv(const std::string& value); + void set_normalization_rule_tsv(std::string&& value); void set_normalization_rule_tsv(const char* value); void set_normalization_rule_tsv(const char* value, size_t size); - ::std::string* mutable_normalization_rule_tsv(); - ::std::string* release_normalization_rule_tsv(); - void set_allocated_normalization_rule_tsv(::std::string* normalization_rule_tsv); + std::string* mutable_normalization_rule_tsv(); + std::string* release_normalization_rule_tsv(); + void set_allocated_normalization_rule_tsv(std::string* normalization_rule_tsv); + private: + const std::string& _internal_normalization_rule_tsv() const; + void _internal_set_normalization_rule_tsv(const std::string& value); + std::string* _internal_mutable_normalization_rule_tsv(); + public: // optional bool encode_case = 7 [default = false]; bool has_encode_case() const; + private: + bool _internal_has_encode_case() const; + public: void clear_encode_case(); - static const int kEncodeCaseFieldNumber = 7; bool encode_case() const; void set_encode_case(bool value); + private: + bool _internal_encode_case() const; + void _internal_set_encode_case(bool value); + public: // optional bool decode_case = 8 [default = false]; bool has_decode_case() const; + private: + bool _internal_has_decode_case() const; + public: void clear_decode_case(); - static const int kDecodeCaseFieldNumber = 8; bool decode_case() const; void set_decode_case(bool value); + private: + bool _internal_decode_case() const; + void _internal_set_decode_case(bool value); + public: // optional bool add_dummy_prefix = 3 [default = true]; bool has_add_dummy_prefix() const; + private: + bool _internal_has_add_dummy_prefix() const; + public: void clear_add_dummy_prefix(); - static const int kAddDummyPrefixFieldNumber = 3; bool add_dummy_prefix() const; void set_add_dummy_prefix(bool value); + private: + bool _internal_add_dummy_prefix() const; + void _internal_set_add_dummy_prefix(bool value); + public: // optional bool remove_extra_whitespaces = 4 [default = true]; bool has_remove_extra_whitespaces() const; + private: + bool _internal_has_remove_extra_whitespaces() const; + public: void clear_remove_extra_whitespaces(); - static const int kRemoveExtraWhitespacesFieldNumber = 4; bool remove_extra_whitespaces() const; void set_remove_extra_whitespaces(bool value); + private: + bool _internal_remove_extra_whitespaces() const; + void _internal_set_remove_extra_whitespaces(bool value); + public: // optional bool escape_whitespaces = 5 [default = true]; bool has_escape_whitespaces() const; + private: + bool _internal_has_escape_whitespaces() const; + public: void clear_escape_whitespaces(); - static const int kEscapeWhitespacesFieldNumber = 5; bool escape_whitespaces() const; void set_escape_whitespaces(bool value); + private: + bool _internal_escape_whitespaces() const; + void _internal_set_escape_whitespaces(bool value); + public: GOOGLE_PROTOBUF_EXTENSION_ACCESSORS(NormalizerSpec) // @@protoc_insertion_point(class_scope:sentencepiece.NormalizerSpec) private: - void set_has_name(); - void clear_has_name(); - void set_has_precompiled_charsmap(); - void clear_has_precompiled_charsmap(); - void set_has_add_dummy_prefix(); - void clear_has_add_dummy_prefix(); - void set_has_remove_extra_whitespaces(); - void clear_has_remove_extra_whitespaces(); - void set_has_escape_whitespaces(); - void clear_has_escape_whitespaces(); - void set_has_normalization_rule_tsv(); - void clear_has_normalization_rule_tsv(); - void set_has_encode_case(); - void clear_has_encode_case(); - void set_has_decode_case(); - void clear_has_decode_case(); - - ::google::protobuf::internal::ExtensionSet _extensions_; - - ::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_; - ::google::protobuf::internal::HasBits<1> _has_bits_; - mutable ::google::protobuf::internal::CachedSize _cached_size_; - ::google::protobuf::internal::ArenaStringPtr name_; - ::google::protobuf::internal::ArenaStringPtr precompiled_charsmap_; - ::google::protobuf::internal::ArenaStringPtr normalization_rule_tsv_; + class _Internal; + + ::PROTOBUF_NAMESPACE_ID::internal::ExtensionSet _extensions_; + + template friend class ::PROTOBUF_NAMESPACE_ID::Arena::InternalHelper; + typedef void InternalArenaConstructable_; + typedef void DestructorSkippable_; + ::PROTOBUF_NAMESPACE_ID::internal::HasBits<1> _has_bits_; + mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr name_; + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr precompiled_charsmap_; + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr normalization_rule_tsv_; bool encode_case_; bool decode_case_; bool add_dummy_prefix_; bool remove_extra_whitespaces_; bool escape_whitespaces_; - friend struct ::protobuf_sentencepiece_5fmodel_2eproto::TableStruct; + friend struct ::TableStruct_sentencepiece_5fmodel_2eproto; }; // ------------------------------------------------------------------- -class SelfTestData_Sample : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:sentencepiece.SelfTestData.Sample) */ { +class SelfTestData_Sample PROTOBUF_FINAL : + public ::PROTOBUF_NAMESPACE_ID::MessageLite /* @@protoc_insertion_point(class_definition:sentencepiece.SelfTestData.Sample) */ { public: - SelfTestData_Sample(); + inline SelfTestData_Sample() : SelfTestData_Sample(nullptr) {} virtual ~SelfTestData_Sample(); SelfTestData_Sample(const SelfTestData_Sample& from); - - inline SelfTestData_Sample& operator=(const SelfTestData_Sample& from) { - CopyFrom(from); - return *this; - } - #if LANG_CXX11 SelfTestData_Sample(SelfTestData_Sample&& from) noexcept : SelfTestData_Sample() { *this = ::std::move(from); } + inline SelfTestData_Sample& operator=(const SelfTestData_Sample& from) { + CopyFrom(from); + return *this; + } inline SelfTestData_Sample& operator=(SelfTestData_Sample&& from) noexcept { - if (GetArenaNoVirtual() == from.GetArenaNoVirtual()) { + if (GetArena() == from.GetArena()) { if (this != &from) InternalSwap(&from); } else { CopyFrom(from); } return *this; } - #endif - inline const ::std::string& unknown_fields() const { - return _internal_metadata_.unknown_fields(); + + inline const std::string& unknown_fields() const { + return _internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString); } - inline ::std::string* mutable_unknown_fields() { - return _internal_metadata_.mutable_unknown_fields(); + inline std::string* mutable_unknown_fields() { + return _internal_metadata_.mutable_unknown_fields(); } static const SelfTestData_Sample& default_instance(); - static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY static inline const SelfTestData_Sample* internal_default_instance() { return reinterpret_cast( &_SelfTestData_Sample_default_instance_); @@ -1009,137 +1368,161 @@ class SelfTestData_Sample : public ::google::protobuf::MessageLite /* @@protoc_i static constexpr int kIndexInFileMessages = 2; - void Swap(SelfTestData_Sample* other); friend void swap(SelfTestData_Sample& a, SelfTestData_Sample& b) { a.Swap(&b); } + inline void Swap(SelfTestData_Sample* other) { + if (other == this) return; + if (GetArena() == other->GetArena()) { + InternalSwap(other); + } else { + ::PROTOBUF_NAMESPACE_ID::internal::GenericSwap(this, other); + } + } + void UnsafeArenaSwap(SelfTestData_Sample* other) { + if (other == this) return; + GOOGLE_DCHECK(GetArena() == other->GetArena()); + InternalSwap(other); + } // implements Message ---------------------------------------------- inline SelfTestData_Sample* New() const final { - return CreateMaybeMessage(NULL); + return CreateMaybeMessage(nullptr); } - SelfTestData_Sample* New(::google::protobuf::Arena* arena) const final { + SelfTestData_Sample* New(::PROTOBUF_NAMESPACE_ID::Arena* arena) const final { return CreateMaybeMessage(arena); } - void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from) + void CheckTypeAndMergeFrom(const ::PROTOBUF_NAMESPACE_ID::MessageLite& from) final; void CopyFrom(const SelfTestData_Sample& from); void MergeFrom(const SelfTestData_Sample& from); - void Clear() final; + PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final; bool IsInitialized() const final; size_t ByteSizeLong() const final; - bool MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input) final; - void SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const final; + const char* _InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) final; + ::PROTOBUF_NAMESPACE_ID::uint8* _InternalSerialize( + ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const final; void DiscardUnknownFields(); int GetCachedSize() const final { return _cached_size_.Get(); } private: - void SharedCtor(); - void SharedDtor(); + inline void SharedCtor(); + inline void SharedDtor(); void SetCachedSize(int size) const; void InternalSwap(SelfTestData_Sample* other); - private: - inline ::google::protobuf::Arena* GetArenaNoVirtual() const { - return NULL; - } - inline void* MaybeArenaPtr() const { - return NULL; + friend class ::PROTOBUF_NAMESPACE_ID::internal::AnyMetadata; + static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() { + return "sentencepiece.SelfTestData.Sample"; } + protected: + explicit SelfTestData_Sample(::PROTOBUF_NAMESPACE_ID::Arena* arena); + private: + static void ArenaDtor(void* object); + inline void RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena* arena); public: - ::std::string GetTypeName() const final; + std::string GetTypeName() const final; // nested types ---------------------------------------------------- // accessors ------------------------------------------------------- + enum : int { + kInputFieldNumber = 1, + kExpectedFieldNumber = 2, + }; // optional string input = 1; bool has_input() const; + private: + bool _internal_has_input() const; + public: void clear_input(); - static const int kInputFieldNumber = 1; - const ::std::string& input() const; - void set_input(const ::std::string& value); - #if LANG_CXX11 - void set_input(::std::string&& value); - #endif + const std::string& input() const; + void set_input(const std::string& value); + void set_input(std::string&& value); void set_input(const char* value); void set_input(const char* value, size_t size); - ::std::string* mutable_input(); - ::std::string* release_input(); - void set_allocated_input(::std::string* input); + std::string* mutable_input(); + std::string* release_input(); + void set_allocated_input(std::string* input); + private: + const std::string& _internal_input() const; + void _internal_set_input(const std::string& value); + std::string* _internal_mutable_input(); + public: // optional string expected = 2; bool has_expected() const; + private: + bool _internal_has_expected() const; + public: void clear_expected(); - static const int kExpectedFieldNumber = 2; - const ::std::string& expected() const; - void set_expected(const ::std::string& value); - #if LANG_CXX11 - void set_expected(::std::string&& value); - #endif + const std::string& expected() const; + void set_expected(const std::string& value); + void set_expected(std::string&& value); void set_expected(const char* value); void set_expected(const char* value, size_t size); - ::std::string* mutable_expected(); - ::std::string* release_expected(); - void set_allocated_expected(::std::string* expected); + std::string* mutable_expected(); + std::string* release_expected(); + void set_allocated_expected(std::string* expected); + private: + const std::string& _internal_expected() const; + void _internal_set_expected(const std::string& value); + std::string* _internal_mutable_expected(); + public: // @@protoc_insertion_point(class_scope:sentencepiece.SelfTestData.Sample) private: - void set_has_input(); - void clear_has_input(); - void set_has_expected(); - void clear_has_expected(); - - ::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_; - ::google::protobuf::internal::HasBits<1> _has_bits_; - mutable ::google::protobuf::internal::CachedSize _cached_size_; - ::google::protobuf::internal::ArenaStringPtr input_; - ::google::protobuf::internal::ArenaStringPtr expected_; - friend struct ::protobuf_sentencepiece_5fmodel_2eproto::TableStruct; + class _Internal; + + template friend class ::PROTOBUF_NAMESPACE_ID::Arena::InternalHelper; + typedef void InternalArenaConstructable_; + typedef void DestructorSkippable_; + ::PROTOBUF_NAMESPACE_ID::internal::HasBits<1> _has_bits_; + mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr input_; + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr expected_; + friend struct ::TableStruct_sentencepiece_5fmodel_2eproto; }; // ------------------------------------------------------------------- -class SelfTestData : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:sentencepiece.SelfTestData) */ { +class SelfTestData PROTOBUF_FINAL : + public ::PROTOBUF_NAMESPACE_ID::MessageLite /* @@protoc_insertion_point(class_definition:sentencepiece.SelfTestData) */ { public: - SelfTestData(); + inline SelfTestData() : SelfTestData(nullptr) {} virtual ~SelfTestData(); SelfTestData(const SelfTestData& from); - - inline SelfTestData& operator=(const SelfTestData& from) { - CopyFrom(from); - return *this; - } - #if LANG_CXX11 SelfTestData(SelfTestData&& from) noexcept : SelfTestData() { *this = ::std::move(from); } + inline SelfTestData& operator=(const SelfTestData& from) { + CopyFrom(from); + return *this; + } inline SelfTestData& operator=(SelfTestData&& from) noexcept { - if (GetArenaNoVirtual() == from.GetArenaNoVirtual()) { + if (GetArena() == from.GetArena()) { if (this != &from) InternalSwap(&from); } else { CopyFrom(from); } return *this; } - #endif - inline const ::std::string& unknown_fields() const { - return _internal_metadata_.unknown_fields(); + + inline const std::string& unknown_fields() const { + return _internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString); } - inline ::std::string* mutable_unknown_fields() { - return _internal_metadata_.mutable_unknown_fields(); + inline std::string* mutable_unknown_fields() { + return _internal_metadata_.mutable_unknown_fields(); } static const SelfTestData& default_instance(); - static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY static inline const SelfTestData* internal_default_instance() { return reinterpret_cast( &_SelfTestData_default_instance_); @@ -1147,50 +1530,63 @@ class SelfTestData : public ::google::protobuf::MessageLite /* @@protoc_insertio static constexpr int kIndexInFileMessages = 3; - void Swap(SelfTestData* other); friend void swap(SelfTestData& a, SelfTestData& b) { a.Swap(&b); } + inline void Swap(SelfTestData* other) { + if (other == this) return; + if (GetArena() == other->GetArena()) { + InternalSwap(other); + } else { + ::PROTOBUF_NAMESPACE_ID::internal::GenericSwap(this, other); + } + } + void UnsafeArenaSwap(SelfTestData* other) { + if (other == this) return; + GOOGLE_DCHECK(GetArena() == other->GetArena()); + InternalSwap(other); + } // implements Message ---------------------------------------------- inline SelfTestData* New() const final { - return CreateMaybeMessage(NULL); + return CreateMaybeMessage(nullptr); } - SelfTestData* New(::google::protobuf::Arena* arena) const final { + SelfTestData* New(::PROTOBUF_NAMESPACE_ID::Arena* arena) const final { return CreateMaybeMessage(arena); } - void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from) + void CheckTypeAndMergeFrom(const ::PROTOBUF_NAMESPACE_ID::MessageLite& from) final; void CopyFrom(const SelfTestData& from); void MergeFrom(const SelfTestData& from); - void Clear() final; + PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final; bool IsInitialized() const final; size_t ByteSizeLong() const final; - bool MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input) final; - void SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const final; + const char* _InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) final; + ::PROTOBUF_NAMESPACE_ID::uint8* _InternalSerialize( + ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const final; void DiscardUnknownFields(); int GetCachedSize() const final { return _cached_size_.Get(); } private: - void SharedCtor(); - void SharedDtor(); + inline void SharedCtor(); + inline void SharedDtor(); void SetCachedSize(int size) const; void InternalSwap(SelfTestData* other); - private: - inline ::google::protobuf::Arena* GetArenaNoVirtual() const { - return NULL; - } - inline void* MaybeArenaPtr() const { - return NULL; + friend class ::PROTOBUF_NAMESPACE_ID::internal::AnyMetadata; + static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() { + return "sentencepiece.SelfTestData"; } + protected: + explicit SelfTestData(::PROTOBUF_NAMESPACE_ID::Arena* arena); + private: + static void ArenaDtor(void* object); + inline void RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena* arena); public: - ::std::string GetTypeName() const final; + std::string GetTypeName() const final; // nested types ---------------------------------------------------- @@ -1198,68 +1594,77 @@ class SelfTestData : public ::google::protobuf::MessageLite /* @@protoc_insertio // accessors ------------------------------------------------------- + enum : int { + kSamplesFieldNumber = 1, + }; // repeated .sentencepiece.SelfTestData.Sample samples = 1; int samples_size() const; + private: + int _internal_samples_size() const; + public: void clear_samples(); - static const int kSamplesFieldNumber = 1; ::sentencepiece::SelfTestData_Sample* mutable_samples(int index); - ::google::protobuf::RepeatedPtrField< ::sentencepiece::SelfTestData_Sample >* + ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::SelfTestData_Sample >* mutable_samples(); + private: + const ::sentencepiece::SelfTestData_Sample& _internal_samples(int index) const; + ::sentencepiece::SelfTestData_Sample* _internal_add_samples(); + public: const ::sentencepiece::SelfTestData_Sample& samples(int index) const; ::sentencepiece::SelfTestData_Sample* add_samples(); - const ::google::protobuf::RepeatedPtrField< ::sentencepiece::SelfTestData_Sample >& + const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::SelfTestData_Sample >& samples() const; GOOGLE_PROTOBUF_EXTENSION_ACCESSORS(SelfTestData) // @@protoc_insertion_point(class_scope:sentencepiece.SelfTestData) private: + class _Internal; - ::google::protobuf::internal::ExtensionSet _extensions_; + ::PROTOBUF_NAMESPACE_ID::internal::ExtensionSet _extensions_; - ::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_; - ::google::protobuf::internal::HasBits<1> _has_bits_; - mutable ::google::protobuf::internal::CachedSize _cached_size_; - ::google::protobuf::RepeatedPtrField< ::sentencepiece::SelfTestData_Sample > samples_; - friend struct ::protobuf_sentencepiece_5fmodel_2eproto::TableStruct; + template friend class ::PROTOBUF_NAMESPACE_ID::Arena::InternalHelper; + typedef void InternalArenaConstructable_; + typedef void DestructorSkippable_; + ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::SelfTestData_Sample > samples_; + mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; + friend struct ::TableStruct_sentencepiece_5fmodel_2eproto; }; // ------------------------------------------------------------------- -class ModelProto_SentencePiece : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:sentencepiece.ModelProto.SentencePiece) */ { +class ModelProto_SentencePiece PROTOBUF_FINAL : + public ::PROTOBUF_NAMESPACE_ID::MessageLite /* @@protoc_insertion_point(class_definition:sentencepiece.ModelProto.SentencePiece) */ { public: - ModelProto_SentencePiece(); + inline ModelProto_SentencePiece() : ModelProto_SentencePiece(nullptr) {} virtual ~ModelProto_SentencePiece(); ModelProto_SentencePiece(const ModelProto_SentencePiece& from); - - inline ModelProto_SentencePiece& operator=(const ModelProto_SentencePiece& from) { - CopyFrom(from); - return *this; - } - #if LANG_CXX11 ModelProto_SentencePiece(ModelProto_SentencePiece&& from) noexcept : ModelProto_SentencePiece() { *this = ::std::move(from); } + inline ModelProto_SentencePiece& operator=(const ModelProto_SentencePiece& from) { + CopyFrom(from); + return *this; + } inline ModelProto_SentencePiece& operator=(ModelProto_SentencePiece&& from) noexcept { - if (GetArenaNoVirtual() == from.GetArenaNoVirtual()) { + if (GetArena() == from.GetArena()) { if (this != &from) InternalSwap(&from); } else { CopyFrom(from); } return *this; } - #endif - inline const ::std::string& unknown_fields() const { - return _internal_metadata_.unknown_fields(); + + inline const std::string& unknown_fields() const { + return _internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString); } - inline ::std::string* mutable_unknown_fields() { - return _internal_metadata_.mutable_unknown_fields(); + inline std::string* mutable_unknown_fields() { + return _internal_metadata_.mutable_unknown_fields(); } static const ModelProto_SentencePiece& default_instance(); - static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY static inline const ModelProto_SentencePiece* internal_default_instance() { return reinterpret_cast( &_ModelProto_SentencePiece_default_instance_); @@ -1267,165 +1672,206 @@ class ModelProto_SentencePiece : public ::google::protobuf::MessageLite /* @@pro static constexpr int kIndexInFileMessages = 4; - void Swap(ModelProto_SentencePiece* other); friend void swap(ModelProto_SentencePiece& a, ModelProto_SentencePiece& b) { a.Swap(&b); } + inline void Swap(ModelProto_SentencePiece* other) { + if (other == this) return; + if (GetArena() == other->GetArena()) { + InternalSwap(other); + } else { + ::PROTOBUF_NAMESPACE_ID::internal::GenericSwap(this, other); + } + } + void UnsafeArenaSwap(ModelProto_SentencePiece* other) { + if (other == this) return; + GOOGLE_DCHECK(GetArena() == other->GetArena()); + InternalSwap(other); + } // implements Message ---------------------------------------------- inline ModelProto_SentencePiece* New() const final { - return CreateMaybeMessage(NULL); + return CreateMaybeMessage(nullptr); } - ModelProto_SentencePiece* New(::google::protobuf::Arena* arena) const final { + ModelProto_SentencePiece* New(::PROTOBUF_NAMESPACE_ID::Arena* arena) const final { return CreateMaybeMessage(arena); } - void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from) + void CheckTypeAndMergeFrom(const ::PROTOBUF_NAMESPACE_ID::MessageLite& from) final; void CopyFrom(const ModelProto_SentencePiece& from); void MergeFrom(const ModelProto_SentencePiece& from); - void Clear() final; + PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final; bool IsInitialized() const final; size_t ByteSizeLong() const final; - bool MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input) final; - void SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const final; + const char* _InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) final; + ::PROTOBUF_NAMESPACE_ID::uint8* _InternalSerialize( + ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const final; void DiscardUnknownFields(); int GetCachedSize() const final { return _cached_size_.Get(); } private: - void SharedCtor(); - void SharedDtor(); + inline void SharedCtor(); + inline void SharedDtor(); void SetCachedSize(int size) const; void InternalSwap(ModelProto_SentencePiece* other); - private: - inline ::google::protobuf::Arena* GetArenaNoVirtual() const { - return NULL; - } - inline void* MaybeArenaPtr() const { - return NULL; + friend class ::PROTOBUF_NAMESPACE_ID::internal::AnyMetadata; + static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() { + return "sentencepiece.ModelProto.SentencePiece"; } + protected: + explicit ModelProto_SentencePiece(::PROTOBUF_NAMESPACE_ID::Arena* arena); + private: + static void ArenaDtor(void* object); + inline void RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena* arena); public: - ::std::string GetTypeName() const final; + std::string GetTypeName() const final; // nested types ---------------------------------------------------- typedef ModelProto_SentencePiece_Type Type; - static const Type NORMAL = + static constexpr Type NORMAL = ModelProto_SentencePiece_Type_NORMAL; - static const Type UNKNOWN = + static constexpr Type UNKNOWN = ModelProto_SentencePiece_Type_UNKNOWN; - static const Type CONTROL = + static constexpr Type CONTROL = ModelProto_SentencePiece_Type_CONTROL; - static const Type USER_DEFINED = + static constexpr Type USER_DEFINED = ModelProto_SentencePiece_Type_USER_DEFINED; - static const Type BYTE = + static constexpr Type BYTE = ModelProto_SentencePiece_Type_BYTE; - static const Type UNUSED = + static constexpr Type UNUSED = ModelProto_SentencePiece_Type_UNUSED; static inline bool Type_IsValid(int value) { return ModelProto_SentencePiece_Type_IsValid(value); } - static const Type Type_MIN = + static constexpr Type Type_MIN = ModelProto_SentencePiece_Type_Type_MIN; - static const Type Type_MAX = + static constexpr Type Type_MAX = ModelProto_SentencePiece_Type_Type_MAX; - static const int Type_ARRAYSIZE = + static constexpr int Type_ARRAYSIZE = ModelProto_SentencePiece_Type_Type_ARRAYSIZE; + template + static inline const std::string& Type_Name(T enum_t_value) { + static_assert(::std::is_same::value || + ::std::is_integral::value, + "Incorrect type passed to function Type_Name."); + return ModelProto_SentencePiece_Type_Name(enum_t_value); + } + static inline bool Type_Parse(::PROTOBUF_NAMESPACE_ID::ConstStringParam name, + Type* value) { + return ModelProto_SentencePiece_Type_Parse(name, value); + } // accessors ------------------------------------------------------- + enum : int { + kPieceFieldNumber = 1, + kScoreFieldNumber = 2, + kTypeFieldNumber = 3, + }; // optional string piece = 1; bool has_piece() const; + private: + bool _internal_has_piece() const; + public: void clear_piece(); - static const int kPieceFieldNumber = 1; - const ::std::string& piece() const; - void set_piece(const ::std::string& value); - #if LANG_CXX11 - void set_piece(::std::string&& value); - #endif + const std::string& piece() const; + void set_piece(const std::string& value); + void set_piece(std::string&& value); void set_piece(const char* value); void set_piece(const char* value, size_t size); - ::std::string* mutable_piece(); - ::std::string* release_piece(); - void set_allocated_piece(::std::string* piece); + std::string* mutable_piece(); + std::string* release_piece(); + void set_allocated_piece(std::string* piece); + private: + const std::string& _internal_piece() const; + void _internal_set_piece(const std::string& value); + std::string* _internal_mutable_piece(); + public: // optional float score = 2; bool has_score() const; + private: + bool _internal_has_score() const; + public: void clear_score(); - static const int kScoreFieldNumber = 2; float score() const; void set_score(float value); + private: + float _internal_score() const; + void _internal_set_score(float value); + public: // optional .sentencepiece.ModelProto.SentencePiece.Type type = 3 [default = NORMAL]; bool has_type() const; + private: + bool _internal_has_type() const; + public: void clear_type(); - static const int kTypeFieldNumber = 3; ::sentencepiece::ModelProto_SentencePiece_Type type() const; void set_type(::sentencepiece::ModelProto_SentencePiece_Type value); + private: + ::sentencepiece::ModelProto_SentencePiece_Type _internal_type() const; + void _internal_set_type(::sentencepiece::ModelProto_SentencePiece_Type value); + public: GOOGLE_PROTOBUF_EXTENSION_ACCESSORS(ModelProto_SentencePiece) // @@protoc_insertion_point(class_scope:sentencepiece.ModelProto.SentencePiece) private: - void set_has_piece(); - void clear_has_piece(); - void set_has_score(); - void clear_has_score(); - void set_has_type(); - void clear_has_type(); - - ::google::protobuf::internal::ExtensionSet _extensions_; - - ::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_; - ::google::protobuf::internal::HasBits<1> _has_bits_; - mutable ::google::protobuf::internal::CachedSize _cached_size_; - ::google::protobuf::internal::ArenaStringPtr piece_; + class _Internal; + + ::PROTOBUF_NAMESPACE_ID::internal::ExtensionSet _extensions_; + + template friend class ::PROTOBUF_NAMESPACE_ID::Arena::InternalHelper; + typedef void InternalArenaConstructable_; + typedef void DestructorSkippable_; + ::PROTOBUF_NAMESPACE_ID::internal::HasBits<1> _has_bits_; + mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr piece_; float score_; int type_; - friend struct ::protobuf_sentencepiece_5fmodel_2eproto::TableStruct; + friend struct ::TableStruct_sentencepiece_5fmodel_2eproto; }; // ------------------------------------------------------------------- -class ModelProto : public ::google::protobuf::MessageLite /* @@protoc_insertion_point(class_definition:sentencepiece.ModelProto) */ { +class ModelProto PROTOBUF_FINAL : + public ::PROTOBUF_NAMESPACE_ID::MessageLite /* @@protoc_insertion_point(class_definition:sentencepiece.ModelProto) */ { public: - ModelProto(); + inline ModelProto() : ModelProto(nullptr) {} virtual ~ModelProto(); ModelProto(const ModelProto& from); - - inline ModelProto& operator=(const ModelProto& from) { - CopyFrom(from); - return *this; - } - #if LANG_CXX11 ModelProto(ModelProto&& from) noexcept : ModelProto() { *this = ::std::move(from); } + inline ModelProto& operator=(const ModelProto& from) { + CopyFrom(from); + return *this; + } inline ModelProto& operator=(ModelProto&& from) noexcept { - if (GetArenaNoVirtual() == from.GetArenaNoVirtual()) { + if (GetArena() == from.GetArena()) { if (this != &from) InternalSwap(&from); } else { CopyFrom(from); } return *this; } - #endif - inline const ::std::string& unknown_fields() const { - return _internal_metadata_.unknown_fields(); + + inline const std::string& unknown_fields() const { + return _internal_metadata_.unknown_fields(::PROTOBUF_NAMESPACE_ID::internal::GetEmptyString); } - inline ::std::string* mutable_unknown_fields() { - return _internal_metadata_.mutable_unknown_fields(); + inline std::string* mutable_unknown_fields() { + return _internal_metadata_.mutable_unknown_fields(); } static const ModelProto& default_instance(); - static void InitAsDefaultInstance(); // FOR INTERNAL USE ONLY static inline const ModelProto* internal_default_instance() { return reinterpret_cast( &_ModelProto_default_instance_); @@ -1433,50 +1879,63 @@ class ModelProto : public ::google::protobuf::MessageLite /* @@protoc_insertion_ static constexpr int kIndexInFileMessages = 5; - void Swap(ModelProto* other); friend void swap(ModelProto& a, ModelProto& b) { a.Swap(&b); } + inline void Swap(ModelProto* other) { + if (other == this) return; + if (GetArena() == other->GetArena()) { + InternalSwap(other); + } else { + ::PROTOBUF_NAMESPACE_ID::internal::GenericSwap(this, other); + } + } + void UnsafeArenaSwap(ModelProto* other) { + if (other == this) return; + GOOGLE_DCHECK(GetArena() == other->GetArena()); + InternalSwap(other); + } // implements Message ---------------------------------------------- inline ModelProto* New() const final { - return CreateMaybeMessage(NULL); + return CreateMaybeMessage(nullptr); } - ModelProto* New(::google::protobuf::Arena* arena) const final { + ModelProto* New(::PROTOBUF_NAMESPACE_ID::Arena* arena) const final { return CreateMaybeMessage(arena); } - void CheckTypeAndMergeFrom(const ::google::protobuf::MessageLite& from) + void CheckTypeAndMergeFrom(const ::PROTOBUF_NAMESPACE_ID::MessageLite& from) final; void CopyFrom(const ModelProto& from); void MergeFrom(const ModelProto& from); - void Clear() final; + PROTOBUF_ATTRIBUTE_REINITIALIZES void Clear() final; bool IsInitialized() const final; size_t ByteSizeLong() const final; - bool MergePartialFromCodedStream( - ::google::protobuf::io::CodedInputStream* input) final; - void SerializeWithCachedSizes( - ::google::protobuf::io::CodedOutputStream* output) const final; + const char* _InternalParse(const char* ptr, ::PROTOBUF_NAMESPACE_ID::internal::ParseContext* ctx) final; + ::PROTOBUF_NAMESPACE_ID::uint8* _InternalSerialize( + ::PROTOBUF_NAMESPACE_ID::uint8* target, ::PROTOBUF_NAMESPACE_ID::io::EpsCopyOutputStream* stream) const final; void DiscardUnknownFields(); int GetCachedSize() const final { return _cached_size_.Get(); } private: - void SharedCtor(); - void SharedDtor(); + inline void SharedCtor(); + inline void SharedDtor(); void SetCachedSize(int size) const; void InternalSwap(ModelProto* other); - private: - inline ::google::protobuf::Arena* GetArenaNoVirtual() const { - return NULL; - } - inline void* MaybeArenaPtr() const { - return NULL; + friend class ::PROTOBUF_NAMESPACE_ID::internal::AnyMetadata; + static ::PROTOBUF_NAMESPACE_ID::StringPiece FullMessageName() { + return "sentencepiece.ModelProto"; } + protected: + explicit ModelProto(::PROTOBUF_NAMESPACE_ID::Arena* arena); + private: + static void ArenaDtor(void* object); + inline void RegisterArenaDtor(::PROTOBUF_NAMESPACE_ID::Arena* arena); public: - ::std::string GetTypeName() const final; + std::string GetTypeName() const final; // nested types ---------------------------------------------------- @@ -1484,89 +1943,121 @@ class ModelProto : public ::google::protobuf::MessageLite /* @@protoc_insertion_ // accessors ------------------------------------------------------- + enum : int { + kPiecesFieldNumber = 1, + kTrainerSpecFieldNumber = 2, + kNormalizerSpecFieldNumber = 3, + kSelfTestDataFieldNumber = 4, + kDenormalizerSpecFieldNumber = 5, + }; // repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; int pieces_size() const; + private: + int _internal_pieces_size() const; + public: void clear_pieces(); - static const int kPiecesFieldNumber = 1; ::sentencepiece::ModelProto_SentencePiece* mutable_pieces(int index); - ::google::protobuf::RepeatedPtrField< ::sentencepiece::ModelProto_SentencePiece >* + ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::ModelProto_SentencePiece >* mutable_pieces(); + private: + const ::sentencepiece::ModelProto_SentencePiece& _internal_pieces(int index) const; + ::sentencepiece::ModelProto_SentencePiece* _internal_add_pieces(); + public: const ::sentencepiece::ModelProto_SentencePiece& pieces(int index) const; ::sentencepiece::ModelProto_SentencePiece* add_pieces(); - const ::google::protobuf::RepeatedPtrField< ::sentencepiece::ModelProto_SentencePiece >& + const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::ModelProto_SentencePiece >& pieces() const; // optional .sentencepiece.TrainerSpec trainer_spec = 2; bool has_trainer_spec() const; - void clear_trainer_spec(); - static const int kTrainerSpecFieldNumber = 2; private: - const ::sentencepiece::TrainerSpec& _internal_trainer_spec() const; + bool _internal_has_trainer_spec() const; public: + void clear_trainer_spec(); const ::sentencepiece::TrainerSpec& trainer_spec() const; ::sentencepiece::TrainerSpec* release_trainer_spec(); ::sentencepiece::TrainerSpec* mutable_trainer_spec(); void set_allocated_trainer_spec(::sentencepiece::TrainerSpec* trainer_spec); + private: + const ::sentencepiece::TrainerSpec& _internal_trainer_spec() const; + ::sentencepiece::TrainerSpec* _internal_mutable_trainer_spec(); + public: + void unsafe_arena_set_allocated_trainer_spec( + ::sentencepiece::TrainerSpec* trainer_spec); + ::sentencepiece::TrainerSpec* unsafe_arena_release_trainer_spec(); // optional .sentencepiece.NormalizerSpec normalizer_spec = 3; bool has_normalizer_spec() const; - void clear_normalizer_spec(); - static const int kNormalizerSpecFieldNumber = 3; private: - const ::sentencepiece::NormalizerSpec& _internal_normalizer_spec() const; + bool _internal_has_normalizer_spec() const; public: + void clear_normalizer_spec(); const ::sentencepiece::NormalizerSpec& normalizer_spec() const; ::sentencepiece::NormalizerSpec* release_normalizer_spec(); ::sentencepiece::NormalizerSpec* mutable_normalizer_spec(); void set_allocated_normalizer_spec(::sentencepiece::NormalizerSpec* normalizer_spec); + private: + const ::sentencepiece::NormalizerSpec& _internal_normalizer_spec() const; + ::sentencepiece::NormalizerSpec* _internal_mutable_normalizer_spec(); + public: + void unsafe_arena_set_allocated_normalizer_spec( + ::sentencepiece::NormalizerSpec* normalizer_spec); + ::sentencepiece::NormalizerSpec* unsafe_arena_release_normalizer_spec(); // optional .sentencepiece.SelfTestData self_test_data = 4; bool has_self_test_data() const; - void clear_self_test_data(); - static const int kSelfTestDataFieldNumber = 4; private: - const ::sentencepiece::SelfTestData& _internal_self_test_data() const; + bool _internal_has_self_test_data() const; public: + void clear_self_test_data(); const ::sentencepiece::SelfTestData& self_test_data() const; ::sentencepiece::SelfTestData* release_self_test_data(); ::sentencepiece::SelfTestData* mutable_self_test_data(); void set_allocated_self_test_data(::sentencepiece::SelfTestData* self_test_data); + private: + const ::sentencepiece::SelfTestData& _internal_self_test_data() const; + ::sentencepiece::SelfTestData* _internal_mutable_self_test_data(); + public: + void unsafe_arena_set_allocated_self_test_data( + ::sentencepiece::SelfTestData* self_test_data); + ::sentencepiece::SelfTestData* unsafe_arena_release_self_test_data(); // optional .sentencepiece.NormalizerSpec denormalizer_spec = 5; bool has_denormalizer_spec() const; - void clear_denormalizer_spec(); - static const int kDenormalizerSpecFieldNumber = 5; private: - const ::sentencepiece::NormalizerSpec& _internal_denormalizer_spec() const; + bool _internal_has_denormalizer_spec() const; public: + void clear_denormalizer_spec(); const ::sentencepiece::NormalizerSpec& denormalizer_spec() const; ::sentencepiece::NormalizerSpec* release_denormalizer_spec(); ::sentencepiece::NormalizerSpec* mutable_denormalizer_spec(); void set_allocated_denormalizer_spec(::sentencepiece::NormalizerSpec* denormalizer_spec); + private: + const ::sentencepiece::NormalizerSpec& _internal_denormalizer_spec() const; + ::sentencepiece::NormalizerSpec* _internal_mutable_denormalizer_spec(); + public: + void unsafe_arena_set_allocated_denormalizer_spec( + ::sentencepiece::NormalizerSpec* denormalizer_spec); + ::sentencepiece::NormalizerSpec* unsafe_arena_release_denormalizer_spec(); GOOGLE_PROTOBUF_EXTENSION_ACCESSORS(ModelProto) // @@protoc_insertion_point(class_scope:sentencepiece.ModelProto) private: - void set_has_trainer_spec(); - void clear_has_trainer_spec(); - void set_has_normalizer_spec(); - void clear_has_normalizer_spec(); - void set_has_self_test_data(); - void clear_has_self_test_data(); - void set_has_denormalizer_spec(); - void clear_has_denormalizer_spec(); - - ::google::protobuf::internal::ExtensionSet _extensions_; - - ::google::protobuf::internal::InternalMetadataWithArenaLite _internal_metadata_; - ::google::protobuf::internal::HasBits<1> _has_bits_; - mutable ::google::protobuf::internal::CachedSize _cached_size_; - ::google::protobuf::RepeatedPtrField< ::sentencepiece::ModelProto_SentencePiece > pieces_; + class _Internal; + + ::PROTOBUF_NAMESPACE_ID::internal::ExtensionSet _extensions_; + + template friend class ::PROTOBUF_NAMESPACE_ID::Arena::InternalHelper; + typedef void InternalArenaConstructable_; + typedef void DestructorSkippable_; + ::PROTOBUF_NAMESPACE_ID::internal::HasBits<1> _has_bits_; + mutable ::PROTOBUF_NAMESPACE_ID::internal::CachedSize _cached_size_; + ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::ModelProto_SentencePiece > pieces_; ::sentencepiece::TrainerSpec* trainer_spec_; ::sentencepiece::NormalizerSpec* normalizer_spec_; ::sentencepiece::SelfTestData* self_test_data_; ::sentencepiece::NormalizerSpec* denormalizer_spec_; - friend struct ::protobuf_sentencepiece_5fmodel_2eproto::TableStruct; + friend struct ::TableStruct_sentencepiece_5fmodel_2eproto; }; // =================================================================== @@ -1580,32 +2071,40 @@ class ModelProto : public ::google::protobuf::MessageLite /* @@protoc_insertion_ // TrainerSpec // repeated string input = 1; -inline int TrainerSpec::input_size() const { +inline int TrainerSpec::_internal_input_size() const { return input_.size(); } +inline int TrainerSpec::input_size() const { + return _internal_input_size(); +} inline void TrainerSpec::clear_input() { input_.Clear(); } -inline const ::std::string& TrainerSpec::input(int index) const { - // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.input) +inline std::string* TrainerSpec::add_input() { + // @@protoc_insertion_point(field_add_mutable:sentencepiece.TrainerSpec.input) + return _internal_add_input(); +} +inline const std::string& TrainerSpec::_internal_input(int index) const { return input_.Get(index); } -inline ::std::string* TrainerSpec::mutable_input(int index) { +inline const std::string& TrainerSpec::input(int index) const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.input) + return _internal_input(index); +} +inline std::string* TrainerSpec::mutable_input(int index) { // @@protoc_insertion_point(field_mutable:sentencepiece.TrainerSpec.input) return input_.Mutable(index); } -inline void TrainerSpec::set_input(int index, const ::std::string& value) { +inline void TrainerSpec::set_input(int index, const std::string& value) { // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.input) input_.Mutable(index)->assign(value); } -#if LANG_CXX11 -inline void TrainerSpec::set_input(int index, ::std::string&& value) { +inline void TrainerSpec::set_input(int index, std::string&& value) { // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.input) input_.Mutable(index)->assign(std::move(value)); } -#endif inline void TrainerSpec::set_input(int index, const char* value) { - GOOGLE_DCHECK(value != NULL); + GOOGLE_DCHECK(value != nullptr); input_.Mutable(index)->assign(value); // @@protoc_insertion_point(field_set_char:sentencepiece.TrainerSpec.input) } @@ -1614,22 +2113,19 @@ inline void TrainerSpec::set_input(int index, const char* value, size_t size) { reinterpret_cast(value), size); // @@protoc_insertion_point(field_set_pointer:sentencepiece.TrainerSpec.input) } -inline ::std::string* TrainerSpec::add_input() { - // @@protoc_insertion_point(field_add_mutable:sentencepiece.TrainerSpec.input) +inline std::string* TrainerSpec::_internal_add_input() { return input_.Add(); } -inline void TrainerSpec::add_input(const ::std::string& value) { +inline void TrainerSpec::add_input(const std::string& value) { input_.Add()->assign(value); // @@protoc_insertion_point(field_add:sentencepiece.TrainerSpec.input) } -#if LANG_CXX11 -inline void TrainerSpec::add_input(::std::string&& value) { +inline void TrainerSpec::add_input(std::string&& value) { input_.Add(std::move(value)); // @@protoc_insertion_point(field_add:sentencepiece.TrainerSpec.input) } -#endif inline void TrainerSpec::add_input(const char* value) { - GOOGLE_DCHECK(value != NULL); + GOOGLE_DCHECK(value != nullptr); input_.Add()->assign(value); // @@protoc_insertion_point(field_add_char:sentencepiece.TrainerSpec.input) } @@ -1637,225 +2133,255 @@ inline void TrainerSpec::add_input(const char* value, size_t size) { input_.Add()->assign(reinterpret_cast(value), size); // @@protoc_insertion_point(field_add_pointer:sentencepiece.TrainerSpec.input) } -inline const ::google::protobuf::RepeatedPtrField< ::std::string>& +inline const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField& TrainerSpec::input() const { // @@protoc_insertion_point(field_list:sentencepiece.TrainerSpec.input) return input_; } -inline ::google::protobuf::RepeatedPtrField< ::std::string>* +inline ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField* TrainerSpec::mutable_input() { // @@protoc_insertion_point(field_mutable_list:sentencepiece.TrainerSpec.input) return &input_; } // optional string input_format = 7; -inline bool TrainerSpec::has_input_format() const { - return (_has_bits_[0] & 0x00000002u) != 0; -} -inline void TrainerSpec::set_has_input_format() { - _has_bits_[0] |= 0x00000002u; +inline bool TrainerSpec::_internal_has_input_format() const { + bool value = (_has_bits_[0] & 0x00000002u) != 0; + return value; } -inline void TrainerSpec::clear_has_input_format() { - _has_bits_[0] &= ~0x00000002u; +inline bool TrainerSpec::has_input_format() const { + return _internal_has_input_format(); } inline void TrainerSpec::clear_input_format() { - input_format_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - clear_has_input_format(); + input_format_.ClearToEmpty(); + _has_bits_[0] &= ~0x00000002u; } -inline const ::std::string& TrainerSpec::input_format() const { +inline const std::string& TrainerSpec::input_format() const { // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.input_format) - return input_format_.GetNoArena(); + return _internal_input_format(); } -inline void TrainerSpec::set_input_format(const ::std::string& value) { - set_has_input_format(); - input_format_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value); +inline void TrainerSpec::set_input_format(const std::string& value) { + _internal_set_input_format(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.input_format) } -#if LANG_CXX11 -inline void TrainerSpec::set_input_format(::std::string&& value) { - set_has_input_format(); - input_format_.SetNoArena( - &::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value)); +inline std::string* TrainerSpec::mutable_input_format() { + // @@protoc_insertion_point(field_mutable:sentencepiece.TrainerSpec.input_format) + return _internal_mutable_input_format(); +} +inline const std::string& TrainerSpec::_internal_input_format() const { + return input_format_.Get(); +} +inline void TrainerSpec::_internal_set_input_format(const std::string& value) { + _has_bits_[0] |= 0x00000002u; + input_format_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, value, GetArena()); +} +inline void TrainerSpec::set_input_format(std::string&& value) { + _has_bits_[0] |= 0x00000002u; + input_format_.Set( + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::move(value), GetArena()); // @@protoc_insertion_point(field_set_rvalue:sentencepiece.TrainerSpec.input_format) } -#endif inline void TrainerSpec::set_input_format(const char* value) { - GOOGLE_DCHECK(value != NULL); - set_has_input_format(); - input_format_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value)); + GOOGLE_DCHECK(value != nullptr); + _has_bits_[0] |= 0x00000002u; + input_format_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string(value), GetArena()); // @@protoc_insertion_point(field_set_char:sentencepiece.TrainerSpec.input_format) } -inline void TrainerSpec::set_input_format(const char* value, size_t size) { - set_has_input_format(); - input_format_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(reinterpret_cast(value), size)); +inline void TrainerSpec::set_input_format(const char* value, + size_t size) { + _has_bits_[0] |= 0x00000002u; + input_format_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string( + reinterpret_cast(value), size), GetArena()); // @@protoc_insertion_point(field_set_pointer:sentencepiece.TrainerSpec.input_format) } -inline ::std::string* TrainerSpec::mutable_input_format() { - set_has_input_format(); - // @@protoc_insertion_point(field_mutable:sentencepiece.TrainerSpec.input_format) - return input_format_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); +inline std::string* TrainerSpec::_internal_mutable_input_format() { + _has_bits_[0] |= 0x00000002u; + return input_format_.Mutable(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, GetArena()); } -inline ::std::string* TrainerSpec::release_input_format() { +inline std::string* TrainerSpec::release_input_format() { // @@protoc_insertion_point(field_release:sentencepiece.TrainerSpec.input_format) - if (!has_input_format()) { - return NULL; + if (!_internal_has_input_format()) { + return nullptr; } - clear_has_input_format(); - return input_format_.ReleaseNonDefaultNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); + _has_bits_[0] &= ~0x00000002u; + return input_format_.ReleaseNonDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); } -inline void TrainerSpec::set_allocated_input_format(::std::string* input_format) { - if (input_format != NULL) { - set_has_input_format(); +inline void TrainerSpec::set_allocated_input_format(std::string* input_format) { + if (input_format != nullptr) { + _has_bits_[0] |= 0x00000002u; } else { - clear_has_input_format(); + _has_bits_[0] &= ~0x00000002u; } - input_format_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), input_format); + input_format_.SetAllocated(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), input_format, + GetArena()); // @@protoc_insertion_point(field_set_allocated:sentencepiece.TrainerSpec.input_format) } // optional string model_prefix = 2; -inline bool TrainerSpec::has_model_prefix() const { - return (_has_bits_[0] & 0x00000001u) != 0; +inline bool TrainerSpec::_internal_has_model_prefix() const { + bool value = (_has_bits_[0] & 0x00000001u) != 0; + return value; } -inline void TrainerSpec::set_has_model_prefix() { - _has_bits_[0] |= 0x00000001u; -} -inline void TrainerSpec::clear_has_model_prefix() { - _has_bits_[0] &= ~0x00000001u; +inline bool TrainerSpec::has_model_prefix() const { + return _internal_has_model_prefix(); } inline void TrainerSpec::clear_model_prefix() { - model_prefix_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - clear_has_model_prefix(); + model_prefix_.ClearToEmpty(); + _has_bits_[0] &= ~0x00000001u; } -inline const ::std::string& TrainerSpec::model_prefix() const { +inline const std::string& TrainerSpec::model_prefix() const { // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.model_prefix) - return model_prefix_.GetNoArena(); + return _internal_model_prefix(); } -inline void TrainerSpec::set_model_prefix(const ::std::string& value) { - set_has_model_prefix(); - model_prefix_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value); +inline void TrainerSpec::set_model_prefix(const std::string& value) { + _internal_set_model_prefix(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.model_prefix) } -#if LANG_CXX11 -inline void TrainerSpec::set_model_prefix(::std::string&& value) { - set_has_model_prefix(); - model_prefix_.SetNoArena( - &::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value)); +inline std::string* TrainerSpec::mutable_model_prefix() { + // @@protoc_insertion_point(field_mutable:sentencepiece.TrainerSpec.model_prefix) + return _internal_mutable_model_prefix(); +} +inline const std::string& TrainerSpec::_internal_model_prefix() const { + return model_prefix_.Get(); +} +inline void TrainerSpec::_internal_set_model_prefix(const std::string& value) { + _has_bits_[0] |= 0x00000001u; + model_prefix_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, value, GetArena()); +} +inline void TrainerSpec::set_model_prefix(std::string&& value) { + _has_bits_[0] |= 0x00000001u; + model_prefix_.Set( + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::move(value), GetArena()); // @@protoc_insertion_point(field_set_rvalue:sentencepiece.TrainerSpec.model_prefix) } -#endif inline void TrainerSpec::set_model_prefix(const char* value) { - GOOGLE_DCHECK(value != NULL); - set_has_model_prefix(); - model_prefix_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value)); + GOOGLE_DCHECK(value != nullptr); + _has_bits_[0] |= 0x00000001u; + model_prefix_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string(value), GetArena()); // @@protoc_insertion_point(field_set_char:sentencepiece.TrainerSpec.model_prefix) } -inline void TrainerSpec::set_model_prefix(const char* value, size_t size) { - set_has_model_prefix(); - model_prefix_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(reinterpret_cast(value), size)); +inline void TrainerSpec::set_model_prefix(const char* value, + size_t size) { + _has_bits_[0] |= 0x00000001u; + model_prefix_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string( + reinterpret_cast(value), size), GetArena()); // @@protoc_insertion_point(field_set_pointer:sentencepiece.TrainerSpec.model_prefix) } -inline ::std::string* TrainerSpec::mutable_model_prefix() { - set_has_model_prefix(); - // @@protoc_insertion_point(field_mutable:sentencepiece.TrainerSpec.model_prefix) - return model_prefix_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); +inline std::string* TrainerSpec::_internal_mutable_model_prefix() { + _has_bits_[0] |= 0x00000001u; + return model_prefix_.Mutable(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, GetArena()); } -inline ::std::string* TrainerSpec::release_model_prefix() { +inline std::string* TrainerSpec::release_model_prefix() { // @@protoc_insertion_point(field_release:sentencepiece.TrainerSpec.model_prefix) - if (!has_model_prefix()) { - return NULL; + if (!_internal_has_model_prefix()) { + return nullptr; } - clear_has_model_prefix(); - return model_prefix_.ReleaseNonDefaultNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); + _has_bits_[0] &= ~0x00000001u; + return model_prefix_.ReleaseNonDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); } -inline void TrainerSpec::set_allocated_model_prefix(::std::string* model_prefix) { - if (model_prefix != NULL) { - set_has_model_prefix(); +inline void TrainerSpec::set_allocated_model_prefix(std::string* model_prefix) { + if (model_prefix != nullptr) { + _has_bits_[0] |= 0x00000001u; } else { - clear_has_model_prefix(); + _has_bits_[0] &= ~0x00000001u; } - model_prefix_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), model_prefix); + model_prefix_.SetAllocated(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), model_prefix, + GetArena()); // @@protoc_insertion_point(field_set_allocated:sentencepiece.TrainerSpec.model_prefix) } // optional .sentencepiece.TrainerSpec.ModelType model_type = 3 [default = UNIGRAM]; -inline bool TrainerSpec::has_model_type() const { - return (_has_bits_[0] & 0x00040000u) != 0; -} -inline void TrainerSpec::set_has_model_type() { - _has_bits_[0] |= 0x00040000u; +inline bool TrainerSpec::_internal_has_model_type() const { + bool value = (_has_bits_[0] & 0x00800000u) != 0; + return value; } -inline void TrainerSpec::clear_has_model_type() { - _has_bits_[0] &= ~0x00040000u; +inline bool TrainerSpec::has_model_type() const { + return _internal_has_model_type(); } inline void TrainerSpec::clear_model_type() { model_type_ = 1; - clear_has_model_type(); + _has_bits_[0] &= ~0x00800000u; +} +inline ::sentencepiece::TrainerSpec_ModelType TrainerSpec::_internal_model_type() const { + return static_cast< ::sentencepiece::TrainerSpec_ModelType >(model_type_); } inline ::sentencepiece::TrainerSpec_ModelType TrainerSpec::model_type() const { // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.model_type) - return static_cast< ::sentencepiece::TrainerSpec_ModelType >(model_type_); + return _internal_model_type(); } -inline void TrainerSpec::set_model_type(::sentencepiece::TrainerSpec_ModelType value) { +inline void TrainerSpec::_internal_set_model_type(::sentencepiece::TrainerSpec_ModelType value) { assert(::sentencepiece::TrainerSpec_ModelType_IsValid(value)); - set_has_model_type(); + _has_bits_[0] |= 0x00800000u; model_type_ = value; +} +inline void TrainerSpec::set_model_type(::sentencepiece::TrainerSpec_ModelType value) { + _internal_set_model_type(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.model_type) } // optional int32 vocab_size = 4 [default = 8000]; -inline bool TrainerSpec::has_vocab_size() const { - return (_has_bits_[0] & 0x00080000u) != 0; -} -inline void TrainerSpec::set_has_vocab_size() { - _has_bits_[0] |= 0x00080000u; +inline bool TrainerSpec::_internal_has_vocab_size() const { + bool value = (_has_bits_[0] & 0x01000000u) != 0; + return value; } -inline void TrainerSpec::clear_has_vocab_size() { - _has_bits_[0] &= ~0x00080000u; +inline bool TrainerSpec::has_vocab_size() const { + return _internal_has_vocab_size(); } inline void TrainerSpec::clear_vocab_size() { vocab_size_ = 8000; - clear_has_vocab_size(); + _has_bits_[0] &= ~0x01000000u; } -inline ::google::protobuf::int32 TrainerSpec::vocab_size() const { - // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.vocab_size) +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::_internal_vocab_size() const { return vocab_size_; } -inline void TrainerSpec::set_vocab_size(::google::protobuf::int32 value) { - set_has_vocab_size(); +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::vocab_size() const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.vocab_size) + return _internal_vocab_size(); +} +inline void TrainerSpec::_internal_set_vocab_size(::PROTOBUF_NAMESPACE_ID::int32 value) { + _has_bits_[0] |= 0x01000000u; vocab_size_ = value; +} +inline void TrainerSpec::set_vocab_size(::PROTOBUF_NAMESPACE_ID::int32 value) { + _internal_set_vocab_size(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.vocab_size) } // repeated string accept_language = 5; -inline int TrainerSpec::accept_language_size() const { +inline int TrainerSpec::_internal_accept_language_size() const { return accept_language_.size(); } +inline int TrainerSpec::accept_language_size() const { + return _internal_accept_language_size(); +} inline void TrainerSpec::clear_accept_language() { accept_language_.Clear(); } -inline const ::std::string& TrainerSpec::accept_language(int index) const { - // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.accept_language) +inline std::string* TrainerSpec::add_accept_language() { + // @@protoc_insertion_point(field_add_mutable:sentencepiece.TrainerSpec.accept_language) + return _internal_add_accept_language(); +} +inline const std::string& TrainerSpec::_internal_accept_language(int index) const { return accept_language_.Get(index); } -inline ::std::string* TrainerSpec::mutable_accept_language(int index) { +inline const std::string& TrainerSpec::accept_language(int index) const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.accept_language) + return _internal_accept_language(index); +} +inline std::string* TrainerSpec::mutable_accept_language(int index) { // @@protoc_insertion_point(field_mutable:sentencepiece.TrainerSpec.accept_language) return accept_language_.Mutable(index); } -inline void TrainerSpec::set_accept_language(int index, const ::std::string& value) { +inline void TrainerSpec::set_accept_language(int index, const std::string& value) { // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.accept_language) accept_language_.Mutable(index)->assign(value); } -#if LANG_CXX11 -inline void TrainerSpec::set_accept_language(int index, ::std::string&& value) { +inline void TrainerSpec::set_accept_language(int index, std::string&& value) { // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.accept_language) accept_language_.Mutable(index)->assign(std::move(value)); } -#endif inline void TrainerSpec::set_accept_language(int index, const char* value) { - GOOGLE_DCHECK(value != NULL); + GOOGLE_DCHECK(value != nullptr); accept_language_.Mutable(index)->assign(value); // @@protoc_insertion_point(field_set_char:sentencepiece.TrainerSpec.accept_language) } @@ -1864,22 +2390,19 @@ inline void TrainerSpec::set_accept_language(int index, const char* value, size_ reinterpret_cast(value), size); // @@protoc_insertion_point(field_set_pointer:sentencepiece.TrainerSpec.accept_language) } -inline ::std::string* TrainerSpec::add_accept_language() { - // @@protoc_insertion_point(field_add_mutable:sentencepiece.TrainerSpec.accept_language) +inline std::string* TrainerSpec::_internal_add_accept_language() { return accept_language_.Add(); } -inline void TrainerSpec::add_accept_language(const ::std::string& value) { +inline void TrainerSpec::add_accept_language(const std::string& value) { accept_language_.Add()->assign(value); // @@protoc_insertion_point(field_add:sentencepiece.TrainerSpec.accept_language) } -#if LANG_CXX11 -inline void TrainerSpec::add_accept_language(::std::string&& value) { +inline void TrainerSpec::add_accept_language(std::string&& value) { accept_language_.Add(std::move(value)); // @@protoc_insertion_point(field_add:sentencepiece.TrainerSpec.accept_language) } -#endif inline void TrainerSpec::add_accept_language(const char* value) { - GOOGLE_DCHECK(value != NULL); + GOOGLE_DCHECK(value != nullptr); accept_language_.Add()->assign(value); // @@protoc_insertion_point(field_add_char:sentencepiece.TrainerSpec.accept_language) } @@ -1887,452 +2410,713 @@ inline void TrainerSpec::add_accept_language(const char* value, size_t size) { accept_language_.Add()->assign(reinterpret_cast(value), size); // @@protoc_insertion_point(field_add_pointer:sentencepiece.TrainerSpec.accept_language) } -inline const ::google::protobuf::RepeatedPtrField< ::std::string>& +inline const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField& TrainerSpec::accept_language() const { // @@protoc_insertion_point(field_list:sentencepiece.TrainerSpec.accept_language) return accept_language_; } -inline ::google::protobuf::RepeatedPtrField< ::std::string>* +inline ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField* TrainerSpec::mutable_accept_language() { // @@protoc_insertion_point(field_mutable_list:sentencepiece.TrainerSpec.accept_language) return &accept_language_; } // optional int32 self_test_sample_size = 6 [default = 0]; -inline bool TrainerSpec::has_self_test_sample_size() const { - return (_has_bits_[0] & 0x00000100u) != 0; -} -inline void TrainerSpec::set_has_self_test_sample_size() { - _has_bits_[0] |= 0x00000100u; +inline bool TrainerSpec::_internal_has_self_test_sample_size() const { + bool value = (_has_bits_[0] & 0x00000200u) != 0; + return value; } -inline void TrainerSpec::clear_has_self_test_sample_size() { - _has_bits_[0] &= ~0x00000100u; +inline bool TrainerSpec::has_self_test_sample_size() const { + return _internal_has_self_test_sample_size(); } inline void TrainerSpec::clear_self_test_sample_size() { self_test_sample_size_ = 0; - clear_has_self_test_sample_size(); + _has_bits_[0] &= ~0x00000200u; } -inline ::google::protobuf::int32 TrainerSpec::self_test_sample_size() const { - // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.self_test_sample_size) +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::_internal_self_test_sample_size() const { return self_test_sample_size_; } -inline void TrainerSpec::set_self_test_sample_size(::google::protobuf::int32 value) { - set_has_self_test_sample_size(); +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::self_test_sample_size() const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.self_test_sample_size) + return _internal_self_test_sample_size(); +} +inline void TrainerSpec::_internal_set_self_test_sample_size(::PROTOBUF_NAMESPACE_ID::int32 value) { + _has_bits_[0] |= 0x00000200u; self_test_sample_size_ = value; +} +inline void TrainerSpec::set_self_test_sample_size(::PROTOBUF_NAMESPACE_ID::int32 value) { + _internal_set_self_test_sample_size(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.self_test_sample_size) } -// optional float character_coverage = 10 [default = 0.9995]; -inline bool TrainerSpec::has_character_coverage() const { - return (_has_bits_[0] & 0x00100000u) != 0; +// optional bool enable_differential_privacy = 50 [default = false]; +inline bool TrainerSpec::_internal_has_enable_differential_privacy() const { + bool value = (_has_bits_[0] & 0x00002000u) != 0; + return value; } -inline void TrainerSpec::set_has_character_coverage() { - _has_bits_[0] |= 0x00100000u; +inline bool TrainerSpec::has_enable_differential_privacy() const { + return _internal_has_enable_differential_privacy(); } -inline void TrainerSpec::clear_has_character_coverage() { - _has_bits_[0] &= ~0x00100000u; +inline void TrainerSpec::clear_enable_differential_privacy() { + enable_differential_privacy_ = false; + _has_bits_[0] &= ~0x00002000u; } -inline void TrainerSpec::clear_character_coverage() { - character_coverage_ = 0.9995f; - clear_has_character_coverage(); +inline bool TrainerSpec::_internal_enable_differential_privacy() const { + return enable_differential_privacy_; } -inline float TrainerSpec::character_coverage() const { - // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.character_coverage) - return character_coverage_; +inline bool TrainerSpec::enable_differential_privacy() const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.enable_differential_privacy) + return _internal_enable_differential_privacy(); } -inline void TrainerSpec::set_character_coverage(float value) { - set_has_character_coverage(); - character_coverage_ = value; - // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.character_coverage) +inline void TrainerSpec::_internal_set_enable_differential_privacy(bool value) { + _has_bits_[0] |= 0x00002000u; + enable_differential_privacy_ = value; +} +inline void TrainerSpec::set_enable_differential_privacy(bool value) { + _internal_set_enable_differential_privacy(value); + // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.enable_differential_privacy) } -// optional int32 input_sentence_size = 11 [default = 0]; -inline bool TrainerSpec::has_input_sentence_size() const { - return (_has_bits_[0] & 0x00000200u) != 0; +// optional float differential_privacy_noise_level = 51 [default = 0]; +inline bool TrainerSpec::_internal_has_differential_privacy_noise_level() const { + bool value = (_has_bits_[0] & 0x00200000u) != 0; + return value; } -inline void TrainerSpec::set_has_input_sentence_size() { - _has_bits_[0] |= 0x00000200u; +inline bool TrainerSpec::has_differential_privacy_noise_level() const { + return _internal_has_differential_privacy_noise_level(); } -inline void TrainerSpec::clear_has_input_sentence_size() { - _has_bits_[0] &= ~0x00000200u; +inline void TrainerSpec::clear_differential_privacy_noise_level() { + differential_privacy_noise_level_ = 0; + _has_bits_[0] &= ~0x00200000u; } -inline void TrainerSpec::clear_input_sentence_size() { - input_sentence_size_ = 0; - clear_has_input_sentence_size(); +inline float TrainerSpec::_internal_differential_privacy_noise_level() const { + return differential_privacy_noise_level_; } -inline ::google::protobuf::int32 TrainerSpec::input_sentence_size() const { - // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.input_sentence_size) - return input_sentence_size_; +inline float TrainerSpec::differential_privacy_noise_level() const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.differential_privacy_noise_level) + return _internal_differential_privacy_noise_level(); } -inline void TrainerSpec::set_input_sentence_size(::google::protobuf::int32 value) { - set_has_input_sentence_size(); - input_sentence_size_ = value; - // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.input_sentence_size) +inline void TrainerSpec::_internal_set_differential_privacy_noise_level(float value) { + _has_bits_[0] |= 0x00200000u; + differential_privacy_noise_level_ = value; +} +inline void TrainerSpec::set_differential_privacy_noise_level(float value) { + _internal_set_differential_privacy_noise_level(value); + // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.differential_privacy_noise_level) } -// optional bool shuffle_input_sentence = 19 [default = true]; -inline bool TrainerSpec::has_shuffle_input_sentence() const { - return (_has_bits_[0] & 0x08000000u) != 0; +// optional uint64 differential_privacy_clipping_threshold = 52 [default = 0]; +inline bool TrainerSpec::_internal_has_differential_privacy_clipping_threshold() const { + bool value = (_has_bits_[0] & 0x00400000u) != 0; + return value; } -inline void TrainerSpec::set_has_shuffle_input_sentence() { - _has_bits_[0] |= 0x08000000u; +inline bool TrainerSpec::has_differential_privacy_clipping_threshold() const { + return _internal_has_differential_privacy_clipping_threshold(); } -inline void TrainerSpec::clear_has_shuffle_input_sentence() { - _has_bits_[0] &= ~0x08000000u; +inline void TrainerSpec::clear_differential_privacy_clipping_threshold() { + differential_privacy_clipping_threshold_ = PROTOBUF_ULONGLONG(0); + _has_bits_[0] &= ~0x00400000u; +} +inline ::PROTOBUF_NAMESPACE_ID::uint64 TrainerSpec::_internal_differential_privacy_clipping_threshold() const { + return differential_privacy_clipping_threshold_; +} +inline ::PROTOBUF_NAMESPACE_ID::uint64 TrainerSpec::differential_privacy_clipping_threshold() const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.differential_privacy_clipping_threshold) + return _internal_differential_privacy_clipping_threshold(); +} +inline void TrainerSpec::_internal_set_differential_privacy_clipping_threshold(::PROTOBUF_NAMESPACE_ID::uint64 value) { + _has_bits_[0] |= 0x00400000u; + differential_privacy_clipping_threshold_ = value; +} +inline void TrainerSpec::set_differential_privacy_clipping_threshold(::PROTOBUF_NAMESPACE_ID::uint64 value) { + _internal_set_differential_privacy_clipping_threshold(value); + // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.differential_privacy_clipping_threshold) +} + +// optional float character_coverage = 10 [default = 0.9995]; +inline bool TrainerSpec::_internal_has_character_coverage() const { + bool value = (_has_bits_[0] & 0x02000000u) != 0; + return value; +} +inline bool TrainerSpec::has_character_coverage() const { + return _internal_has_character_coverage(); +} +inline void TrainerSpec::clear_character_coverage() { + character_coverage_ = 0.9995f; + _has_bits_[0] &= ~0x02000000u; +} +inline float TrainerSpec::_internal_character_coverage() const { + return character_coverage_; +} +inline float TrainerSpec::character_coverage() const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.character_coverage) + return _internal_character_coverage(); +} +inline void TrainerSpec::_internal_set_character_coverage(float value) { + _has_bits_[0] |= 0x02000000u; + character_coverage_ = value; +} +inline void TrainerSpec::set_character_coverage(float value) { + _internal_set_character_coverage(value); + // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.character_coverage) +} + +// optional uint64 input_sentence_size = 11 [default = 0]; +inline bool TrainerSpec::_internal_has_input_sentence_size() const { + bool value = (_has_bits_[0] & 0x00000800u) != 0; + return value; +} +inline bool TrainerSpec::has_input_sentence_size() const { + return _internal_has_input_sentence_size(); +} +inline void TrainerSpec::clear_input_sentence_size() { + input_sentence_size_ = PROTOBUF_ULONGLONG(0); + _has_bits_[0] &= ~0x00000800u; +} +inline ::PROTOBUF_NAMESPACE_ID::uint64 TrainerSpec::_internal_input_sentence_size() const { + return input_sentence_size_; +} +inline ::PROTOBUF_NAMESPACE_ID::uint64 TrainerSpec::input_sentence_size() const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.input_sentence_size) + return _internal_input_sentence_size(); +} +inline void TrainerSpec::_internal_set_input_sentence_size(::PROTOBUF_NAMESPACE_ID::uint64 value) { + _has_bits_[0] |= 0x00000800u; + input_sentence_size_ = value; +} +inline void TrainerSpec::set_input_sentence_size(::PROTOBUF_NAMESPACE_ID::uint64 value) { + _internal_set_input_sentence_size(value); + // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.input_sentence_size) +} + +// optional bool shuffle_input_sentence = 19 [default = true]; +inline bool TrainerSpec::_internal_has_shuffle_input_sentence() const { + bool value = (_has_bits_[1] & 0x00000001u) != 0; + return value; +} +inline bool TrainerSpec::has_shuffle_input_sentence() const { + return _internal_has_shuffle_input_sentence(); } inline void TrainerSpec::clear_shuffle_input_sentence() { shuffle_input_sentence_ = true; - clear_has_shuffle_input_sentence(); + _has_bits_[1] &= ~0x00000001u; +} +inline bool TrainerSpec::_internal_shuffle_input_sentence() const { + return shuffle_input_sentence_; } inline bool TrainerSpec::shuffle_input_sentence() const { // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.shuffle_input_sentence) - return shuffle_input_sentence_; + return _internal_shuffle_input_sentence(); } -inline void TrainerSpec::set_shuffle_input_sentence(bool value) { - set_has_shuffle_input_sentence(); +inline void TrainerSpec::_internal_set_shuffle_input_sentence(bool value) { + _has_bits_[1] |= 0x00000001u; shuffle_input_sentence_ = value; +} +inline void TrainerSpec::set_shuffle_input_sentence(bool value) { + _internal_set_shuffle_input_sentence(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.shuffle_input_sentence) } // optional int32 mining_sentence_size = 12 [deprecated = true]; -inline bool TrainerSpec::has_mining_sentence_size() const { - return (_has_bits_[0] & 0x00000400u) != 0; -} -inline void TrainerSpec::set_has_mining_sentence_size() { - _has_bits_[0] |= 0x00000400u; +inline bool TrainerSpec::_internal_has_mining_sentence_size() const { + bool value = (_has_bits_[0] & 0x00000400u) != 0; + return value; } -inline void TrainerSpec::clear_has_mining_sentence_size() { - _has_bits_[0] &= ~0x00000400u; +inline bool TrainerSpec::has_mining_sentence_size() const { + return _internal_has_mining_sentence_size(); } inline void TrainerSpec::clear_mining_sentence_size() { mining_sentence_size_ = 0; - clear_has_mining_sentence_size(); + _has_bits_[0] &= ~0x00000400u; } -inline ::google::protobuf::int32 TrainerSpec::mining_sentence_size() const { - // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.mining_sentence_size) +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::_internal_mining_sentence_size() const { return mining_sentence_size_; } -inline void TrainerSpec::set_mining_sentence_size(::google::protobuf::int32 value) { - set_has_mining_sentence_size(); +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::mining_sentence_size() const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.mining_sentence_size) + return _internal_mining_sentence_size(); +} +inline void TrainerSpec::_internal_set_mining_sentence_size(::PROTOBUF_NAMESPACE_ID::int32 value) { + _has_bits_[0] |= 0x00000400u; mining_sentence_size_ = value; +} +inline void TrainerSpec::set_mining_sentence_size(::PROTOBUF_NAMESPACE_ID::int32 value) { + _internal_set_mining_sentence_size(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.mining_sentence_size) } // optional int32 training_sentence_size = 13 [deprecated = true]; -inline bool TrainerSpec::has_training_sentence_size() const { - return (_has_bits_[0] & 0x00000800u) != 0; +inline bool TrainerSpec::_internal_has_training_sentence_size() const { + bool value = (_has_bits_[0] & 0x00001000u) != 0; + return value; } -inline void TrainerSpec::set_has_training_sentence_size() { - _has_bits_[0] |= 0x00000800u; -} -inline void TrainerSpec::clear_has_training_sentence_size() { - _has_bits_[0] &= ~0x00000800u; +inline bool TrainerSpec::has_training_sentence_size() const { + return _internal_has_training_sentence_size(); } inline void TrainerSpec::clear_training_sentence_size() { training_sentence_size_ = 0; - clear_has_training_sentence_size(); + _has_bits_[0] &= ~0x00001000u; } -inline ::google::protobuf::int32 TrainerSpec::training_sentence_size() const { - // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.training_sentence_size) +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::_internal_training_sentence_size() const { return training_sentence_size_; } -inline void TrainerSpec::set_training_sentence_size(::google::protobuf::int32 value) { - set_has_training_sentence_size(); +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::training_sentence_size() const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.training_sentence_size) + return _internal_training_sentence_size(); +} +inline void TrainerSpec::_internal_set_training_sentence_size(::PROTOBUF_NAMESPACE_ID::int32 value) { + _has_bits_[0] |= 0x00001000u; training_sentence_size_ = value; +} +inline void TrainerSpec::set_training_sentence_size(::PROTOBUF_NAMESPACE_ID::int32 value) { + _internal_set_training_sentence_size(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.training_sentence_size) } // optional int32 seed_sentencepiece_size = 14 [default = 1000000]; -inline bool TrainerSpec::has_seed_sentencepiece_size() const { - return (_has_bits_[0] & 0x00200000u) != 0; +inline bool TrainerSpec::_internal_has_seed_sentencepiece_size() const { + bool value = (_has_bits_[0] & 0x04000000u) != 0; + return value; } -inline void TrainerSpec::set_has_seed_sentencepiece_size() { - _has_bits_[0] |= 0x00200000u; -} -inline void TrainerSpec::clear_has_seed_sentencepiece_size() { - _has_bits_[0] &= ~0x00200000u; +inline bool TrainerSpec::has_seed_sentencepiece_size() const { + return _internal_has_seed_sentencepiece_size(); } inline void TrainerSpec::clear_seed_sentencepiece_size() { seed_sentencepiece_size_ = 1000000; - clear_has_seed_sentencepiece_size(); + _has_bits_[0] &= ~0x04000000u; } -inline ::google::protobuf::int32 TrainerSpec::seed_sentencepiece_size() const { - // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.seed_sentencepiece_size) +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::_internal_seed_sentencepiece_size() const { return seed_sentencepiece_size_; } -inline void TrainerSpec::set_seed_sentencepiece_size(::google::protobuf::int32 value) { - set_has_seed_sentencepiece_size(); +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::seed_sentencepiece_size() const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.seed_sentencepiece_size) + return _internal_seed_sentencepiece_size(); +} +inline void TrainerSpec::_internal_set_seed_sentencepiece_size(::PROTOBUF_NAMESPACE_ID::int32 value) { + _has_bits_[0] |= 0x04000000u; seed_sentencepiece_size_ = value; +} +inline void TrainerSpec::set_seed_sentencepiece_size(::PROTOBUF_NAMESPACE_ID::int32 value) { + _internal_set_seed_sentencepiece_size(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.seed_sentencepiece_size) } // optional float shrinking_factor = 15 [default = 0.75]; -inline bool TrainerSpec::has_shrinking_factor() const { - return (_has_bits_[0] & 0x00400000u) != 0; +inline bool TrainerSpec::_internal_has_shrinking_factor() const { + bool value = (_has_bits_[0] & 0x08000000u) != 0; + return value; } -inline void TrainerSpec::set_has_shrinking_factor() { - _has_bits_[0] |= 0x00400000u; -} -inline void TrainerSpec::clear_has_shrinking_factor() { - _has_bits_[0] &= ~0x00400000u; +inline bool TrainerSpec::has_shrinking_factor() const { + return _internal_has_shrinking_factor(); } inline void TrainerSpec::clear_shrinking_factor() { shrinking_factor_ = 0.75f; - clear_has_shrinking_factor(); + _has_bits_[0] &= ~0x08000000u; +} +inline float TrainerSpec::_internal_shrinking_factor() const { + return shrinking_factor_; } inline float TrainerSpec::shrinking_factor() const { // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.shrinking_factor) - return shrinking_factor_; + return _internal_shrinking_factor(); } -inline void TrainerSpec::set_shrinking_factor(float value) { - set_has_shrinking_factor(); +inline void TrainerSpec::_internal_set_shrinking_factor(float value) { + _has_bits_[0] |= 0x08000000u; shrinking_factor_ = value; +} +inline void TrainerSpec::set_shrinking_factor(float value) { + _internal_set_shrinking_factor(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.shrinking_factor) } // optional int32 max_sentence_length = 18 [default = 4192]; -inline bool TrainerSpec::has_max_sentence_length() const { - return (_has_bits_[0] & 0x02000000u) != 0; -} -inline void TrainerSpec::set_has_max_sentence_length() { - _has_bits_[0] |= 0x02000000u; +inline bool TrainerSpec::_internal_has_max_sentence_length() const { + bool value = (_has_bits_[0] & 0x40000000u) != 0; + return value; } -inline void TrainerSpec::clear_has_max_sentence_length() { - _has_bits_[0] &= ~0x02000000u; +inline bool TrainerSpec::has_max_sentence_length() const { + return _internal_has_max_sentence_length(); } inline void TrainerSpec::clear_max_sentence_length() { max_sentence_length_ = 4192; - clear_has_max_sentence_length(); + _has_bits_[0] &= ~0x40000000u; } -inline ::google::protobuf::int32 TrainerSpec::max_sentence_length() const { - // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.max_sentence_length) +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::_internal_max_sentence_length() const { return max_sentence_length_; } -inline void TrainerSpec::set_max_sentence_length(::google::protobuf::int32 value) { - set_has_max_sentence_length(); +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::max_sentence_length() const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.max_sentence_length) + return _internal_max_sentence_length(); +} +inline void TrainerSpec::_internal_set_max_sentence_length(::PROTOBUF_NAMESPACE_ID::int32 value) { + _has_bits_[0] |= 0x40000000u; max_sentence_length_ = value; +} +inline void TrainerSpec::set_max_sentence_length(::PROTOBUF_NAMESPACE_ID::int32 value) { + _internal_set_max_sentence_length(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.max_sentence_length) } // optional int32 num_threads = 16 [default = 16]; -inline bool TrainerSpec::has_num_threads() const { - return (_has_bits_[0] & 0x00800000u) != 0; -} -inline void TrainerSpec::set_has_num_threads() { - _has_bits_[0] |= 0x00800000u; +inline bool TrainerSpec::_internal_has_num_threads() const { + bool value = (_has_bits_[0] & 0x10000000u) != 0; + return value; } -inline void TrainerSpec::clear_has_num_threads() { - _has_bits_[0] &= ~0x00800000u; +inline bool TrainerSpec::has_num_threads() const { + return _internal_has_num_threads(); } inline void TrainerSpec::clear_num_threads() { num_threads_ = 16; - clear_has_num_threads(); + _has_bits_[0] &= ~0x10000000u; } -inline ::google::protobuf::int32 TrainerSpec::num_threads() const { - // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.num_threads) +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::_internal_num_threads() const { return num_threads_; } -inline void TrainerSpec::set_num_threads(::google::protobuf::int32 value) { - set_has_num_threads(); +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::num_threads() const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.num_threads) + return _internal_num_threads(); +} +inline void TrainerSpec::_internal_set_num_threads(::PROTOBUF_NAMESPACE_ID::int32 value) { + _has_bits_[0] |= 0x10000000u; num_threads_ = value; +} +inline void TrainerSpec::set_num_threads(::PROTOBUF_NAMESPACE_ID::int32 value) { + _internal_set_num_threads(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.num_threads) } // optional int32 num_sub_iterations = 17 [default = 2]; -inline bool TrainerSpec::has_num_sub_iterations() const { - return (_has_bits_[0] & 0x01000000u) != 0; -} -inline void TrainerSpec::set_has_num_sub_iterations() { - _has_bits_[0] |= 0x01000000u; +inline bool TrainerSpec::_internal_has_num_sub_iterations() const { + bool value = (_has_bits_[0] & 0x20000000u) != 0; + return value; } -inline void TrainerSpec::clear_has_num_sub_iterations() { - _has_bits_[0] &= ~0x01000000u; +inline bool TrainerSpec::has_num_sub_iterations() const { + return _internal_has_num_sub_iterations(); } inline void TrainerSpec::clear_num_sub_iterations() { num_sub_iterations_ = 2; - clear_has_num_sub_iterations(); + _has_bits_[0] &= ~0x20000000u; } -inline ::google::protobuf::int32 TrainerSpec::num_sub_iterations() const { - // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.num_sub_iterations) +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::_internal_num_sub_iterations() const { return num_sub_iterations_; } -inline void TrainerSpec::set_num_sub_iterations(::google::protobuf::int32 value) { - set_has_num_sub_iterations(); +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::num_sub_iterations() const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.num_sub_iterations) + return _internal_num_sub_iterations(); +} +inline void TrainerSpec::_internal_set_num_sub_iterations(::PROTOBUF_NAMESPACE_ID::int32 value) { + _has_bits_[0] |= 0x20000000u; num_sub_iterations_ = value; +} +inline void TrainerSpec::set_num_sub_iterations(::PROTOBUF_NAMESPACE_ID::int32 value) { + _internal_set_num_sub_iterations(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.num_sub_iterations) } // optional int32 max_sentencepiece_length = 20 [default = 16]; -inline bool TrainerSpec::has_max_sentencepiece_length() const { - return (_has_bits_[0] & 0x04000000u) != 0; +inline bool TrainerSpec::_internal_has_max_sentencepiece_length() const { + bool value = (_has_bits_[0] & 0x80000000u) != 0; + return value; } -inline void TrainerSpec::set_has_max_sentencepiece_length() { - _has_bits_[0] |= 0x04000000u; -} -inline void TrainerSpec::clear_has_max_sentencepiece_length() { - _has_bits_[0] &= ~0x04000000u; +inline bool TrainerSpec::has_max_sentencepiece_length() const { + return _internal_has_max_sentencepiece_length(); } inline void TrainerSpec::clear_max_sentencepiece_length() { max_sentencepiece_length_ = 16; - clear_has_max_sentencepiece_length(); + _has_bits_[0] &= ~0x80000000u; } -inline ::google::protobuf::int32 TrainerSpec::max_sentencepiece_length() const { - // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.max_sentencepiece_length) +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::_internal_max_sentencepiece_length() const { return max_sentencepiece_length_; } -inline void TrainerSpec::set_max_sentencepiece_length(::google::protobuf::int32 value) { - set_has_max_sentencepiece_length(); +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::max_sentencepiece_length() const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.max_sentencepiece_length) + return _internal_max_sentencepiece_length(); +} +inline void TrainerSpec::_internal_set_max_sentencepiece_length(::PROTOBUF_NAMESPACE_ID::int32 value) { + _has_bits_[0] |= 0x80000000u; max_sentencepiece_length_ = value; +} +inline void TrainerSpec::set_max_sentencepiece_length(::PROTOBUF_NAMESPACE_ID::int32 value) { + _internal_set_max_sentencepiece_length(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.max_sentencepiece_length) } // optional bool split_by_unicode_script = 21 [default = true]; -inline bool TrainerSpec::has_split_by_unicode_script() const { - return (_has_bits_[0] & 0x10000000u) != 0; -} -inline void TrainerSpec::set_has_split_by_unicode_script() { - _has_bits_[0] |= 0x10000000u; +inline bool TrainerSpec::_internal_has_split_by_unicode_script() const { + bool value = (_has_bits_[1] & 0x00000002u) != 0; + return value; } -inline void TrainerSpec::clear_has_split_by_unicode_script() { - _has_bits_[0] &= ~0x10000000u; +inline bool TrainerSpec::has_split_by_unicode_script() const { + return _internal_has_split_by_unicode_script(); } inline void TrainerSpec::clear_split_by_unicode_script() { split_by_unicode_script_ = true; - clear_has_split_by_unicode_script(); + _has_bits_[1] &= ~0x00000002u; +} +inline bool TrainerSpec::_internal_split_by_unicode_script() const { + return split_by_unicode_script_; } inline bool TrainerSpec::split_by_unicode_script() const { // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.split_by_unicode_script) - return split_by_unicode_script_; + return _internal_split_by_unicode_script(); } -inline void TrainerSpec::set_split_by_unicode_script(bool value) { - set_has_split_by_unicode_script(); +inline void TrainerSpec::_internal_set_split_by_unicode_script(bool value) { + _has_bits_[1] |= 0x00000002u; split_by_unicode_script_ = value; +} +inline void TrainerSpec::set_split_by_unicode_script(bool value) { + _internal_set_split_by_unicode_script(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.split_by_unicode_script) } // optional bool split_by_number = 23 [default = true]; -inline bool TrainerSpec::has_split_by_number() const { - return (_has_bits_[0] & 0x20000000u) != 0; +inline bool TrainerSpec::_internal_has_split_by_number() const { + bool value = (_has_bits_[1] & 0x00000004u) != 0; + return value; } -inline void TrainerSpec::set_has_split_by_number() { - _has_bits_[0] |= 0x20000000u; -} -inline void TrainerSpec::clear_has_split_by_number() { - _has_bits_[0] &= ~0x20000000u; +inline bool TrainerSpec::has_split_by_number() const { + return _internal_has_split_by_number(); } inline void TrainerSpec::clear_split_by_number() { split_by_number_ = true; - clear_has_split_by_number(); + _has_bits_[1] &= ~0x00000004u; +} +inline bool TrainerSpec::_internal_split_by_number() const { + return split_by_number_; } inline bool TrainerSpec::split_by_number() const { // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.split_by_number) - return split_by_number_; + return _internal_split_by_number(); } -inline void TrainerSpec::set_split_by_number(bool value) { - set_has_split_by_number(); +inline void TrainerSpec::_internal_set_split_by_number(bool value) { + _has_bits_[1] |= 0x00000004u; split_by_number_ = value; +} +inline void TrainerSpec::set_split_by_number(bool value) { + _internal_set_split_by_number(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.split_by_number) } // optional bool split_by_whitespace = 22 [default = true]; -inline bool TrainerSpec::has_split_by_whitespace() const { - return (_has_bits_[0] & 0x40000000u) != 0; -} -inline void TrainerSpec::set_has_split_by_whitespace() { - _has_bits_[0] |= 0x40000000u; +inline bool TrainerSpec::_internal_has_split_by_whitespace() const { + bool value = (_has_bits_[1] & 0x00000008u) != 0; + return value; } -inline void TrainerSpec::clear_has_split_by_whitespace() { - _has_bits_[0] &= ~0x40000000u; +inline bool TrainerSpec::has_split_by_whitespace() const { + return _internal_has_split_by_whitespace(); } inline void TrainerSpec::clear_split_by_whitespace() { split_by_whitespace_ = true; - clear_has_split_by_whitespace(); + _has_bits_[1] &= ~0x00000008u; +} +inline bool TrainerSpec::_internal_split_by_whitespace() const { + return split_by_whitespace_; } inline bool TrainerSpec::split_by_whitespace() const { // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.split_by_whitespace) - return split_by_whitespace_; + return _internal_split_by_whitespace(); } -inline void TrainerSpec::set_split_by_whitespace(bool value) { - set_has_split_by_whitespace(); +inline void TrainerSpec::_internal_set_split_by_whitespace(bool value) { + _has_bits_[1] |= 0x00000008u; split_by_whitespace_ = value; +} +inline void TrainerSpec::set_split_by_whitespace(bool value) { + _internal_set_split_by_whitespace(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.split_by_whitespace) } // optional bool treat_whitespace_as_suffix = 24 [default = false]; -inline bool TrainerSpec::has_treat_whitespace_as_suffix() const { - return (_has_bits_[0] & 0x00001000u) != 0; -} -inline void TrainerSpec::set_has_treat_whitespace_as_suffix() { - _has_bits_[0] |= 0x00001000u; +inline bool TrainerSpec::_internal_has_treat_whitespace_as_suffix() const { + bool value = (_has_bits_[0] & 0x00004000u) != 0; + return value; } -inline void TrainerSpec::clear_has_treat_whitespace_as_suffix() { - _has_bits_[0] &= ~0x00001000u; +inline bool TrainerSpec::has_treat_whitespace_as_suffix() const { + return _internal_has_treat_whitespace_as_suffix(); } inline void TrainerSpec::clear_treat_whitespace_as_suffix() { treat_whitespace_as_suffix_ = false; - clear_has_treat_whitespace_as_suffix(); + _has_bits_[0] &= ~0x00004000u; +} +inline bool TrainerSpec::_internal_treat_whitespace_as_suffix() const { + return treat_whitespace_as_suffix_; } inline bool TrainerSpec::treat_whitespace_as_suffix() const { // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.treat_whitespace_as_suffix) - return treat_whitespace_as_suffix_; + return _internal_treat_whitespace_as_suffix(); } -inline void TrainerSpec::set_treat_whitespace_as_suffix(bool value) { - set_has_treat_whitespace_as_suffix(); +inline void TrainerSpec::_internal_set_treat_whitespace_as_suffix(bool value) { + _has_bits_[0] |= 0x00004000u; treat_whitespace_as_suffix_ = value; +} +inline void TrainerSpec::set_treat_whitespace_as_suffix(bool value) { + _internal_set_treat_whitespace_as_suffix(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.treat_whitespace_as_suffix) } -// optional bool split_digits = 25 [default = false]; -inline bool TrainerSpec::has_split_digits() const { - return (_has_bits_[0] & 0x00002000u) != 0; +// optional bool allow_whitespace_only_pieces = 26 [default = false]; +inline bool TrainerSpec::_internal_has_allow_whitespace_only_pieces() const { + bool value = (_has_bits_[0] & 0x00008000u) != 0; + return value; } -inline void TrainerSpec::set_has_split_digits() { - _has_bits_[0] |= 0x00002000u; +inline bool TrainerSpec::has_allow_whitespace_only_pieces() const { + return _internal_has_allow_whitespace_only_pieces(); } -inline void TrainerSpec::clear_has_split_digits() { - _has_bits_[0] &= ~0x00002000u; +inline void TrainerSpec::clear_allow_whitespace_only_pieces() { + allow_whitespace_only_pieces_ = false; + _has_bits_[0] &= ~0x00008000u; +} +inline bool TrainerSpec::_internal_allow_whitespace_only_pieces() const { + return allow_whitespace_only_pieces_; +} +inline bool TrainerSpec::allow_whitespace_only_pieces() const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.allow_whitespace_only_pieces) + return _internal_allow_whitespace_only_pieces(); +} +inline void TrainerSpec::_internal_set_allow_whitespace_only_pieces(bool value) { + _has_bits_[0] |= 0x00008000u; + allow_whitespace_only_pieces_ = value; +} +inline void TrainerSpec::set_allow_whitespace_only_pieces(bool value) { + _internal_set_allow_whitespace_only_pieces(value); + // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.allow_whitespace_only_pieces) +} + +// optional bool split_digits = 25 [default = false]; +inline bool TrainerSpec::_internal_has_split_digits() const { + bool value = (_has_bits_[0] & 0x00010000u) != 0; + return value; +} +inline bool TrainerSpec::has_split_digits() const { + return _internal_has_split_digits(); } inline void TrainerSpec::clear_split_digits() { split_digits_ = false; - clear_has_split_digits(); + _has_bits_[0] &= ~0x00010000u; +} +inline bool TrainerSpec::_internal_split_digits() const { + return split_digits_; } inline bool TrainerSpec::split_digits() const { // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.split_digits) - return split_digits_; + return _internal_split_digits(); } -inline void TrainerSpec::set_split_digits(bool value) { - set_has_split_digits(); +inline void TrainerSpec::_internal_set_split_digits(bool value) { + _has_bits_[0] |= 0x00010000u; split_digits_ = value; +} +inline void TrainerSpec::set_split_digits(bool value) { + _internal_set_split_digits(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.split_digits) } +// optional string pretokenization_delimiter = 53 [default = ""]; +inline bool TrainerSpec::_internal_has_pretokenization_delimiter() const { + bool value = (_has_bits_[0] & 0x00000100u) != 0; + return value; +} +inline bool TrainerSpec::has_pretokenization_delimiter() const { + return _internal_has_pretokenization_delimiter(); +} +inline void TrainerSpec::clear_pretokenization_delimiter() { + pretokenization_delimiter_.ClearToEmpty(); + _has_bits_[0] &= ~0x00000100u; +} +inline const std::string& TrainerSpec::pretokenization_delimiter() const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.pretokenization_delimiter) + return _internal_pretokenization_delimiter(); +} +inline void TrainerSpec::set_pretokenization_delimiter(const std::string& value) { + _internal_set_pretokenization_delimiter(value); + // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.pretokenization_delimiter) +} +inline std::string* TrainerSpec::mutable_pretokenization_delimiter() { + // @@protoc_insertion_point(field_mutable:sentencepiece.TrainerSpec.pretokenization_delimiter) + return _internal_mutable_pretokenization_delimiter(); +} +inline const std::string& TrainerSpec::_internal_pretokenization_delimiter() const { + return pretokenization_delimiter_.Get(); +} +inline void TrainerSpec::_internal_set_pretokenization_delimiter(const std::string& value) { + _has_bits_[0] |= 0x00000100u; + pretokenization_delimiter_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, value, GetArena()); +} +inline void TrainerSpec::set_pretokenization_delimiter(std::string&& value) { + _has_bits_[0] |= 0x00000100u; + pretokenization_delimiter_.Set( + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::move(value), GetArena()); + // @@protoc_insertion_point(field_set_rvalue:sentencepiece.TrainerSpec.pretokenization_delimiter) +} +inline void TrainerSpec::set_pretokenization_delimiter(const char* value) { + GOOGLE_DCHECK(value != nullptr); + _has_bits_[0] |= 0x00000100u; + pretokenization_delimiter_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string(value), GetArena()); + // @@protoc_insertion_point(field_set_char:sentencepiece.TrainerSpec.pretokenization_delimiter) +} +inline void TrainerSpec::set_pretokenization_delimiter(const char* value, + size_t size) { + _has_bits_[0] |= 0x00000100u; + pretokenization_delimiter_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string( + reinterpret_cast(value), size), GetArena()); + // @@protoc_insertion_point(field_set_pointer:sentencepiece.TrainerSpec.pretokenization_delimiter) +} +inline std::string* TrainerSpec::_internal_mutable_pretokenization_delimiter() { + _has_bits_[0] |= 0x00000100u; + return pretokenization_delimiter_.Mutable(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, GetArena()); +} +inline std::string* TrainerSpec::release_pretokenization_delimiter() { + // @@protoc_insertion_point(field_release:sentencepiece.TrainerSpec.pretokenization_delimiter) + if (!_internal_has_pretokenization_delimiter()) { + return nullptr; + } + _has_bits_[0] &= ~0x00000100u; + return pretokenization_delimiter_.ReleaseNonDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); +} +inline void TrainerSpec::set_allocated_pretokenization_delimiter(std::string* pretokenization_delimiter) { + if (pretokenization_delimiter != nullptr) { + _has_bits_[0] |= 0x00000100u; + } else { + _has_bits_[0] &= ~0x00000100u; + } + pretokenization_delimiter_.SetAllocated(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), pretokenization_delimiter, + GetArena()); + // @@protoc_insertion_point(field_set_allocated:sentencepiece.TrainerSpec.pretokenization_delimiter) +} + // repeated string control_symbols = 30; -inline int TrainerSpec::control_symbols_size() const { +inline int TrainerSpec::_internal_control_symbols_size() const { return control_symbols_.size(); } +inline int TrainerSpec::control_symbols_size() const { + return _internal_control_symbols_size(); +} inline void TrainerSpec::clear_control_symbols() { control_symbols_.Clear(); } -inline const ::std::string& TrainerSpec::control_symbols(int index) const { - // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.control_symbols) +inline std::string* TrainerSpec::add_control_symbols() { + // @@protoc_insertion_point(field_add_mutable:sentencepiece.TrainerSpec.control_symbols) + return _internal_add_control_symbols(); +} +inline const std::string& TrainerSpec::_internal_control_symbols(int index) const { return control_symbols_.Get(index); } -inline ::std::string* TrainerSpec::mutable_control_symbols(int index) { +inline const std::string& TrainerSpec::control_symbols(int index) const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.control_symbols) + return _internal_control_symbols(index); +} +inline std::string* TrainerSpec::mutable_control_symbols(int index) { // @@protoc_insertion_point(field_mutable:sentencepiece.TrainerSpec.control_symbols) return control_symbols_.Mutable(index); } -inline void TrainerSpec::set_control_symbols(int index, const ::std::string& value) { +inline void TrainerSpec::set_control_symbols(int index, const std::string& value) { // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.control_symbols) control_symbols_.Mutable(index)->assign(value); } -#if LANG_CXX11 -inline void TrainerSpec::set_control_symbols(int index, ::std::string&& value) { +inline void TrainerSpec::set_control_symbols(int index, std::string&& value) { // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.control_symbols) control_symbols_.Mutable(index)->assign(std::move(value)); } -#endif inline void TrainerSpec::set_control_symbols(int index, const char* value) { - GOOGLE_DCHECK(value != NULL); + GOOGLE_DCHECK(value != nullptr); control_symbols_.Mutable(index)->assign(value); // @@protoc_insertion_point(field_set_char:sentencepiece.TrainerSpec.control_symbols) } @@ -2341,22 +3125,19 @@ inline void TrainerSpec::set_control_symbols(int index, const char* value, size_ reinterpret_cast(value), size); // @@protoc_insertion_point(field_set_pointer:sentencepiece.TrainerSpec.control_symbols) } -inline ::std::string* TrainerSpec::add_control_symbols() { - // @@protoc_insertion_point(field_add_mutable:sentencepiece.TrainerSpec.control_symbols) +inline std::string* TrainerSpec::_internal_add_control_symbols() { return control_symbols_.Add(); } -inline void TrainerSpec::add_control_symbols(const ::std::string& value) { +inline void TrainerSpec::add_control_symbols(const std::string& value) { control_symbols_.Add()->assign(value); // @@protoc_insertion_point(field_add:sentencepiece.TrainerSpec.control_symbols) } -#if LANG_CXX11 -inline void TrainerSpec::add_control_symbols(::std::string&& value) { +inline void TrainerSpec::add_control_symbols(std::string&& value) { control_symbols_.Add(std::move(value)); // @@protoc_insertion_point(field_add:sentencepiece.TrainerSpec.control_symbols) } -#endif inline void TrainerSpec::add_control_symbols(const char* value) { - GOOGLE_DCHECK(value != NULL); + GOOGLE_DCHECK(value != nullptr); control_symbols_.Add()->assign(value); // @@protoc_insertion_point(field_add_char:sentencepiece.TrainerSpec.control_symbols) } @@ -2364,44 +3145,52 @@ inline void TrainerSpec::add_control_symbols(const char* value, size_t size) { control_symbols_.Add()->assign(reinterpret_cast(value), size); // @@protoc_insertion_point(field_add_pointer:sentencepiece.TrainerSpec.control_symbols) } -inline const ::google::protobuf::RepeatedPtrField< ::std::string>& +inline const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField& TrainerSpec::control_symbols() const { // @@protoc_insertion_point(field_list:sentencepiece.TrainerSpec.control_symbols) return control_symbols_; } -inline ::google::protobuf::RepeatedPtrField< ::std::string>* +inline ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField* TrainerSpec::mutable_control_symbols() { // @@protoc_insertion_point(field_mutable_list:sentencepiece.TrainerSpec.control_symbols) return &control_symbols_; } // repeated string user_defined_symbols = 31; -inline int TrainerSpec::user_defined_symbols_size() const { +inline int TrainerSpec::_internal_user_defined_symbols_size() const { return user_defined_symbols_.size(); } +inline int TrainerSpec::user_defined_symbols_size() const { + return _internal_user_defined_symbols_size(); +} inline void TrainerSpec::clear_user_defined_symbols() { user_defined_symbols_.Clear(); } -inline const ::std::string& TrainerSpec::user_defined_symbols(int index) const { - // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.user_defined_symbols) +inline std::string* TrainerSpec::add_user_defined_symbols() { + // @@protoc_insertion_point(field_add_mutable:sentencepiece.TrainerSpec.user_defined_symbols) + return _internal_add_user_defined_symbols(); +} +inline const std::string& TrainerSpec::_internal_user_defined_symbols(int index) const { return user_defined_symbols_.Get(index); } -inline ::std::string* TrainerSpec::mutable_user_defined_symbols(int index) { +inline const std::string& TrainerSpec::user_defined_symbols(int index) const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.user_defined_symbols) + return _internal_user_defined_symbols(index); +} +inline std::string* TrainerSpec::mutable_user_defined_symbols(int index) { // @@protoc_insertion_point(field_mutable:sentencepiece.TrainerSpec.user_defined_symbols) return user_defined_symbols_.Mutable(index); } -inline void TrainerSpec::set_user_defined_symbols(int index, const ::std::string& value) { +inline void TrainerSpec::set_user_defined_symbols(int index, const std::string& value) { // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.user_defined_symbols) user_defined_symbols_.Mutable(index)->assign(value); } -#if LANG_CXX11 -inline void TrainerSpec::set_user_defined_symbols(int index, ::std::string&& value) { +inline void TrainerSpec::set_user_defined_symbols(int index, std::string&& value) { // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.user_defined_symbols) user_defined_symbols_.Mutable(index)->assign(std::move(value)); } -#endif inline void TrainerSpec::set_user_defined_symbols(int index, const char* value) { - GOOGLE_DCHECK(value != NULL); + GOOGLE_DCHECK(value != nullptr); user_defined_symbols_.Mutable(index)->assign(value); // @@protoc_insertion_point(field_set_char:sentencepiece.TrainerSpec.user_defined_symbols) } @@ -2410,22 +3199,19 @@ inline void TrainerSpec::set_user_defined_symbols(int index, const char* value, reinterpret_cast(value), size); // @@protoc_insertion_point(field_set_pointer:sentencepiece.TrainerSpec.user_defined_symbols) } -inline ::std::string* TrainerSpec::add_user_defined_symbols() { - // @@protoc_insertion_point(field_add_mutable:sentencepiece.TrainerSpec.user_defined_symbols) +inline std::string* TrainerSpec::_internal_add_user_defined_symbols() { return user_defined_symbols_.Add(); } -inline void TrainerSpec::add_user_defined_symbols(const ::std::string& value) { +inline void TrainerSpec::add_user_defined_symbols(const std::string& value) { user_defined_symbols_.Add()->assign(value); // @@protoc_insertion_point(field_add:sentencepiece.TrainerSpec.user_defined_symbols) } -#if LANG_CXX11 -inline void TrainerSpec::add_user_defined_symbols(::std::string&& value) { +inline void TrainerSpec::add_user_defined_symbols(std::string&& value) { user_defined_symbols_.Add(std::move(value)); // @@protoc_insertion_point(field_add:sentencepiece.TrainerSpec.user_defined_symbols) } -#endif inline void TrainerSpec::add_user_defined_symbols(const char* value) { - GOOGLE_DCHECK(value != NULL); + GOOGLE_DCHECK(value != nullptr); user_defined_symbols_.Add()->assign(value); // @@protoc_insertion_point(field_add_char:sentencepiece.TrainerSpec.user_defined_symbols) } @@ -2433,626 +3219,709 @@ inline void TrainerSpec::add_user_defined_symbols(const char* value, size_t size user_defined_symbols_.Add()->assign(reinterpret_cast(value), size); // @@protoc_insertion_point(field_add_pointer:sentencepiece.TrainerSpec.user_defined_symbols) } -inline const ::google::protobuf::RepeatedPtrField< ::std::string>& +inline const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField& TrainerSpec::user_defined_symbols() const { // @@protoc_insertion_point(field_list:sentencepiece.TrainerSpec.user_defined_symbols) return user_defined_symbols_; } -inline ::google::protobuf::RepeatedPtrField< ::std::string>* +inline ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField* TrainerSpec::mutable_user_defined_symbols() { // @@protoc_insertion_point(field_mutable_list:sentencepiece.TrainerSpec.user_defined_symbols) return &user_defined_symbols_; } // optional string required_chars = 36; -inline bool TrainerSpec::has_required_chars() const { - return (_has_bits_[0] & 0x00000004u) != 0; -} -inline void TrainerSpec::set_has_required_chars() { - _has_bits_[0] |= 0x00000004u; +inline bool TrainerSpec::_internal_has_required_chars() const { + bool value = (_has_bits_[0] & 0x00000004u) != 0; + return value; } -inline void TrainerSpec::clear_has_required_chars() { - _has_bits_[0] &= ~0x00000004u; +inline bool TrainerSpec::has_required_chars() const { + return _internal_has_required_chars(); } inline void TrainerSpec::clear_required_chars() { - required_chars_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - clear_has_required_chars(); + required_chars_.ClearToEmpty(); + _has_bits_[0] &= ~0x00000004u; } -inline const ::std::string& TrainerSpec::required_chars() const { +inline const std::string& TrainerSpec::required_chars() const { // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.required_chars) - return required_chars_.GetNoArena(); + return _internal_required_chars(); } -inline void TrainerSpec::set_required_chars(const ::std::string& value) { - set_has_required_chars(); - required_chars_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value); +inline void TrainerSpec::set_required_chars(const std::string& value) { + _internal_set_required_chars(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.required_chars) } -#if LANG_CXX11 -inline void TrainerSpec::set_required_chars(::std::string&& value) { - set_has_required_chars(); - required_chars_.SetNoArena( - &::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value)); +inline std::string* TrainerSpec::mutable_required_chars() { + // @@protoc_insertion_point(field_mutable:sentencepiece.TrainerSpec.required_chars) + return _internal_mutable_required_chars(); +} +inline const std::string& TrainerSpec::_internal_required_chars() const { + return required_chars_.Get(); +} +inline void TrainerSpec::_internal_set_required_chars(const std::string& value) { + _has_bits_[0] |= 0x00000004u; + required_chars_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, value, GetArena()); +} +inline void TrainerSpec::set_required_chars(std::string&& value) { + _has_bits_[0] |= 0x00000004u; + required_chars_.Set( + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::move(value), GetArena()); // @@protoc_insertion_point(field_set_rvalue:sentencepiece.TrainerSpec.required_chars) } -#endif inline void TrainerSpec::set_required_chars(const char* value) { - GOOGLE_DCHECK(value != NULL); - set_has_required_chars(); - required_chars_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value)); + GOOGLE_DCHECK(value != nullptr); + _has_bits_[0] |= 0x00000004u; + required_chars_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string(value), GetArena()); // @@protoc_insertion_point(field_set_char:sentencepiece.TrainerSpec.required_chars) } -inline void TrainerSpec::set_required_chars(const char* value, size_t size) { - set_has_required_chars(); - required_chars_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(reinterpret_cast(value), size)); +inline void TrainerSpec::set_required_chars(const char* value, + size_t size) { + _has_bits_[0] |= 0x00000004u; + required_chars_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string( + reinterpret_cast(value), size), GetArena()); // @@protoc_insertion_point(field_set_pointer:sentencepiece.TrainerSpec.required_chars) } -inline ::std::string* TrainerSpec::mutable_required_chars() { - set_has_required_chars(); - // @@protoc_insertion_point(field_mutable:sentencepiece.TrainerSpec.required_chars) - return required_chars_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); +inline std::string* TrainerSpec::_internal_mutable_required_chars() { + _has_bits_[0] |= 0x00000004u; + return required_chars_.Mutable(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, GetArena()); } -inline ::std::string* TrainerSpec::release_required_chars() { +inline std::string* TrainerSpec::release_required_chars() { // @@protoc_insertion_point(field_release:sentencepiece.TrainerSpec.required_chars) - if (!has_required_chars()) { - return NULL; + if (!_internal_has_required_chars()) { + return nullptr; } - clear_has_required_chars(); - return required_chars_.ReleaseNonDefaultNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); + _has_bits_[0] &= ~0x00000004u; + return required_chars_.ReleaseNonDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); } -inline void TrainerSpec::set_allocated_required_chars(::std::string* required_chars) { - if (required_chars != NULL) { - set_has_required_chars(); +inline void TrainerSpec::set_allocated_required_chars(std::string* required_chars) { + if (required_chars != nullptr) { + _has_bits_[0] |= 0x00000004u; } else { - clear_has_required_chars(); + _has_bits_[0] &= ~0x00000004u; } - required_chars_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), required_chars); + required_chars_.SetAllocated(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), required_chars, + GetArena()); // @@protoc_insertion_point(field_set_allocated:sentencepiece.TrainerSpec.required_chars) } // optional bool byte_fallback = 35 [default = false]; -inline bool TrainerSpec::has_byte_fallback() const { - return (_has_bits_[0] & 0x00004000u) != 0; -} -inline void TrainerSpec::set_has_byte_fallback() { - _has_bits_[0] |= 0x00004000u; +inline bool TrainerSpec::_internal_has_byte_fallback() const { + bool value = (_has_bits_[0] & 0x00020000u) != 0; + return value; } -inline void TrainerSpec::clear_has_byte_fallback() { - _has_bits_[0] &= ~0x00004000u; +inline bool TrainerSpec::has_byte_fallback() const { + return _internal_has_byte_fallback(); } inline void TrainerSpec::clear_byte_fallback() { byte_fallback_ = false; - clear_has_byte_fallback(); + _has_bits_[0] &= ~0x00020000u; +} +inline bool TrainerSpec::_internal_byte_fallback() const { + return byte_fallback_; } inline bool TrainerSpec::byte_fallback() const { // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.byte_fallback) - return byte_fallback_; + return _internal_byte_fallback(); } -inline void TrainerSpec::set_byte_fallback(bool value) { - set_has_byte_fallback(); +inline void TrainerSpec::_internal_set_byte_fallback(bool value) { + _has_bits_[0] |= 0x00020000u; byte_fallback_ = value; +} +inline void TrainerSpec::set_byte_fallback(bool value) { + _internal_set_byte_fallback(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.byte_fallback) } // optional bool vocabulary_output_piece_score = 32 [default = true]; -inline bool TrainerSpec::has_vocabulary_output_piece_score() const { - return (_has_bits_[0] & 0x80000000u) != 0; +inline bool TrainerSpec::_internal_has_vocabulary_output_piece_score() const { + bool value = (_has_bits_[1] & 0x00000010u) != 0; + return value; } -inline void TrainerSpec::set_has_vocabulary_output_piece_score() { - _has_bits_[0] |= 0x80000000u; -} -inline void TrainerSpec::clear_has_vocabulary_output_piece_score() { - _has_bits_[0] &= ~0x80000000u; +inline bool TrainerSpec::has_vocabulary_output_piece_score() const { + return _internal_has_vocabulary_output_piece_score(); } inline void TrainerSpec::clear_vocabulary_output_piece_score() { vocabulary_output_piece_score_ = true; - clear_has_vocabulary_output_piece_score(); + _has_bits_[1] &= ~0x00000010u; +} +inline bool TrainerSpec::_internal_vocabulary_output_piece_score() const { + return vocabulary_output_piece_score_; } inline bool TrainerSpec::vocabulary_output_piece_score() const { // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.vocabulary_output_piece_score) - return vocabulary_output_piece_score_; + return _internal_vocabulary_output_piece_score(); } -inline void TrainerSpec::set_vocabulary_output_piece_score(bool value) { - set_has_vocabulary_output_piece_score(); +inline void TrainerSpec::_internal_set_vocabulary_output_piece_score(bool value) { + _has_bits_[1] |= 0x00000010u; vocabulary_output_piece_score_ = value; +} +inline void TrainerSpec::set_vocabulary_output_piece_score(bool value) { + _internal_set_vocabulary_output_piece_score(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.vocabulary_output_piece_score) } // optional bool hard_vocab_limit = 33 [default = true]; -inline bool TrainerSpec::has_hard_vocab_limit() const { - return (_has_bits_[1] & 0x00000001u) != 0; -} -inline void TrainerSpec::set_has_hard_vocab_limit() { - _has_bits_[1] |= 0x00000001u; +inline bool TrainerSpec::_internal_has_hard_vocab_limit() const { + bool value = (_has_bits_[1] & 0x00000020u) != 0; + return value; } -inline void TrainerSpec::clear_has_hard_vocab_limit() { - _has_bits_[1] &= ~0x00000001u; +inline bool TrainerSpec::has_hard_vocab_limit() const { + return _internal_has_hard_vocab_limit(); } inline void TrainerSpec::clear_hard_vocab_limit() { hard_vocab_limit_ = true; - clear_has_hard_vocab_limit(); + _has_bits_[1] &= ~0x00000020u; +} +inline bool TrainerSpec::_internal_hard_vocab_limit() const { + return hard_vocab_limit_; } inline bool TrainerSpec::hard_vocab_limit() const { // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.hard_vocab_limit) - return hard_vocab_limit_; + return _internal_hard_vocab_limit(); } -inline void TrainerSpec::set_hard_vocab_limit(bool value) { - set_has_hard_vocab_limit(); +inline void TrainerSpec::_internal_set_hard_vocab_limit(bool value) { + _has_bits_[1] |= 0x00000020u; hard_vocab_limit_ = value; +} +inline void TrainerSpec::set_hard_vocab_limit(bool value) { + _internal_set_hard_vocab_limit(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.hard_vocab_limit) } // optional bool use_all_vocab = 34 [default = false]; -inline bool TrainerSpec::has_use_all_vocab() const { - return (_has_bits_[0] & 0x00008000u) != 0; -} -inline void TrainerSpec::set_has_use_all_vocab() { - _has_bits_[0] |= 0x00008000u; +inline bool TrainerSpec::_internal_has_use_all_vocab() const { + bool value = (_has_bits_[0] & 0x00040000u) != 0; + return value; } -inline void TrainerSpec::clear_has_use_all_vocab() { - _has_bits_[0] &= ~0x00008000u; +inline bool TrainerSpec::has_use_all_vocab() const { + return _internal_has_use_all_vocab(); } inline void TrainerSpec::clear_use_all_vocab() { use_all_vocab_ = false; - clear_has_use_all_vocab(); + _has_bits_[0] &= ~0x00040000u; +} +inline bool TrainerSpec::_internal_use_all_vocab() const { + return use_all_vocab_; } inline bool TrainerSpec::use_all_vocab() const { // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.use_all_vocab) - return use_all_vocab_; + return _internal_use_all_vocab(); } -inline void TrainerSpec::set_use_all_vocab(bool value) { - set_has_use_all_vocab(); +inline void TrainerSpec::_internal_set_use_all_vocab(bool value) { + _has_bits_[0] |= 0x00040000u; use_all_vocab_ = value; +} +inline void TrainerSpec::set_use_all_vocab(bool value) { + _internal_set_use_all_vocab(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.use_all_vocab) } // optional int32 unk_id = 40 [default = 0]; -inline bool TrainerSpec::has_unk_id() const { - return (_has_bits_[0] & 0x00010000u) != 0; -} -inline void TrainerSpec::set_has_unk_id() { - _has_bits_[0] |= 0x00010000u; +inline bool TrainerSpec::_internal_has_unk_id() const { + bool value = (_has_bits_[0] & 0x00100000u) != 0; + return value; } -inline void TrainerSpec::clear_has_unk_id() { - _has_bits_[0] &= ~0x00010000u; +inline bool TrainerSpec::has_unk_id() const { + return _internal_has_unk_id(); } inline void TrainerSpec::clear_unk_id() { unk_id_ = 0; - clear_has_unk_id(); + _has_bits_[0] &= ~0x00100000u; } -inline ::google::protobuf::int32 TrainerSpec::unk_id() const { - // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.unk_id) +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::_internal_unk_id() const { return unk_id_; } -inline void TrainerSpec::set_unk_id(::google::protobuf::int32 value) { - set_has_unk_id(); +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::unk_id() const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.unk_id) + return _internal_unk_id(); +} +inline void TrainerSpec::_internal_set_unk_id(::PROTOBUF_NAMESPACE_ID::int32 value) { + _has_bits_[0] |= 0x00100000u; unk_id_ = value; +} +inline void TrainerSpec::set_unk_id(::PROTOBUF_NAMESPACE_ID::int32 value) { + _internal_set_unk_id(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.unk_id) } // optional int32 bos_id = 41 [default = 1]; -inline bool TrainerSpec::has_bos_id() const { - return (_has_bits_[1] & 0x00000002u) != 0; -} -inline void TrainerSpec::set_has_bos_id() { - _has_bits_[1] |= 0x00000002u; +inline bool TrainerSpec::_internal_has_bos_id() const { + bool value = (_has_bits_[1] & 0x00000040u) != 0; + return value; } -inline void TrainerSpec::clear_has_bos_id() { - _has_bits_[1] &= ~0x00000002u; +inline bool TrainerSpec::has_bos_id() const { + return _internal_has_bos_id(); } inline void TrainerSpec::clear_bos_id() { bos_id_ = 1; - clear_has_bos_id(); + _has_bits_[1] &= ~0x00000040u; } -inline ::google::protobuf::int32 TrainerSpec::bos_id() const { - // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.bos_id) +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::_internal_bos_id() const { return bos_id_; } -inline void TrainerSpec::set_bos_id(::google::protobuf::int32 value) { - set_has_bos_id(); +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::bos_id() const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.bos_id) + return _internal_bos_id(); +} +inline void TrainerSpec::_internal_set_bos_id(::PROTOBUF_NAMESPACE_ID::int32 value) { + _has_bits_[1] |= 0x00000040u; bos_id_ = value; +} +inline void TrainerSpec::set_bos_id(::PROTOBUF_NAMESPACE_ID::int32 value) { + _internal_set_bos_id(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.bos_id) } // optional int32 eos_id = 42 [default = 2]; -inline bool TrainerSpec::has_eos_id() const { - return (_has_bits_[1] & 0x00000004u) != 0; -} -inline void TrainerSpec::set_has_eos_id() { - _has_bits_[1] |= 0x00000004u; +inline bool TrainerSpec::_internal_has_eos_id() const { + bool value = (_has_bits_[1] & 0x00000080u) != 0; + return value; } -inline void TrainerSpec::clear_has_eos_id() { - _has_bits_[1] &= ~0x00000004u; +inline bool TrainerSpec::has_eos_id() const { + return _internal_has_eos_id(); } inline void TrainerSpec::clear_eos_id() { eos_id_ = 2; - clear_has_eos_id(); + _has_bits_[1] &= ~0x00000080u; } -inline ::google::protobuf::int32 TrainerSpec::eos_id() const { - // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.eos_id) +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::_internal_eos_id() const { return eos_id_; } -inline void TrainerSpec::set_eos_id(::google::protobuf::int32 value) { - set_has_eos_id(); +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::eos_id() const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.eos_id) + return _internal_eos_id(); +} +inline void TrainerSpec::_internal_set_eos_id(::PROTOBUF_NAMESPACE_ID::int32 value) { + _has_bits_[1] |= 0x00000080u; eos_id_ = value; +} +inline void TrainerSpec::set_eos_id(::PROTOBUF_NAMESPACE_ID::int32 value) { + _internal_set_eos_id(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.eos_id) } // optional int32 pad_id = 43 [default = -1]; -inline bool TrainerSpec::has_pad_id() const { - return (_has_bits_[1] & 0x00000008u) != 0; +inline bool TrainerSpec::_internal_has_pad_id() const { + bool value = (_has_bits_[1] & 0x00000100u) != 0; + return value; } -inline void TrainerSpec::set_has_pad_id() { - _has_bits_[1] |= 0x00000008u; -} -inline void TrainerSpec::clear_has_pad_id() { - _has_bits_[1] &= ~0x00000008u; +inline bool TrainerSpec::has_pad_id() const { + return _internal_has_pad_id(); } inline void TrainerSpec::clear_pad_id() { pad_id_ = -1; - clear_has_pad_id(); + _has_bits_[1] &= ~0x00000100u; } -inline ::google::protobuf::int32 TrainerSpec::pad_id() const { - // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.pad_id) +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::_internal_pad_id() const { return pad_id_; } -inline void TrainerSpec::set_pad_id(::google::protobuf::int32 value) { - set_has_pad_id(); +inline ::PROTOBUF_NAMESPACE_ID::int32 TrainerSpec::pad_id() const { + // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.pad_id) + return _internal_pad_id(); +} +inline void TrainerSpec::_internal_set_pad_id(::PROTOBUF_NAMESPACE_ID::int32 value) { + _has_bits_[1] |= 0x00000100u; pad_id_ = value; +} +inline void TrainerSpec::set_pad_id(::PROTOBUF_NAMESPACE_ID::int32 value) { + _internal_set_pad_id(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.pad_id) } // optional string unk_piece = 45 [default = ""]; -inline bool TrainerSpec::has_unk_piece() const { - return (_has_bits_[0] & 0x00000010u) != 0; -} -inline void TrainerSpec::set_has_unk_piece() { - _has_bits_[0] |= 0x00000010u; +inline bool TrainerSpec::_internal_has_unk_piece() const { + bool value = (_has_bits_[0] & 0x00000010u) != 0; + return value; } -inline void TrainerSpec::clear_has_unk_piece() { - _has_bits_[0] &= ~0x00000010u; +inline bool TrainerSpec::has_unk_piece() const { + return _internal_has_unk_piece(); } inline void TrainerSpec::clear_unk_piece() { - unk_piece_.ClearToDefaultNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_.get()); - clear_has_unk_piece(); + unk_piece_.ClearToDefault(::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_, GetArena()); + _has_bits_[0] &= ~0x00000010u; } -inline const ::std::string& TrainerSpec::unk_piece() const { +inline const std::string& TrainerSpec::unk_piece() const { // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.unk_piece) - return unk_piece_.GetNoArena(); + if (unk_piece_.IsDefault(nullptr)) return _i_give_permission_to_break_this_code_default_unk_piece_.get(); + return _internal_unk_piece(); } -inline void TrainerSpec::set_unk_piece(const ::std::string& value) { - set_has_unk_piece(); - unk_piece_.SetNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_.get(), value); +inline void TrainerSpec::set_unk_piece(const std::string& value) { + _internal_set_unk_piece(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.unk_piece) } -#if LANG_CXX11 -inline void TrainerSpec::set_unk_piece(::std::string&& value) { - set_has_unk_piece(); - unk_piece_.SetNoArena( - &::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_.get(), ::std::move(value)); +inline std::string* TrainerSpec::mutable_unk_piece() { + // @@protoc_insertion_point(field_mutable:sentencepiece.TrainerSpec.unk_piece) + return _internal_mutable_unk_piece(); +} +inline const std::string& TrainerSpec::_internal_unk_piece() const { + return unk_piece_.Get(); +} +inline void TrainerSpec::_internal_set_unk_piece(const std::string& value) { + _has_bits_[0] |= 0x00000010u; + unk_piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, value, GetArena()); +} +inline void TrainerSpec::set_unk_piece(std::string&& value) { + _has_bits_[0] |= 0x00000010u; + unk_piece_.Set( + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, ::std::move(value), GetArena()); // @@protoc_insertion_point(field_set_rvalue:sentencepiece.TrainerSpec.unk_piece) } -#endif inline void TrainerSpec::set_unk_piece(const char* value) { - GOOGLE_DCHECK(value != NULL); - set_has_unk_piece(); - unk_piece_.SetNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_.get(), ::std::string(value)); + GOOGLE_DCHECK(value != nullptr); + _has_bits_[0] |= 0x00000010u; + unk_piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, ::std::string(value), GetArena()); // @@protoc_insertion_point(field_set_char:sentencepiece.TrainerSpec.unk_piece) } -inline void TrainerSpec::set_unk_piece(const char* value, size_t size) { - set_has_unk_piece(); - unk_piece_.SetNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_.get(), - ::std::string(reinterpret_cast(value), size)); +inline void TrainerSpec::set_unk_piece(const char* value, + size_t size) { + _has_bits_[0] |= 0x00000010u; + unk_piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, ::std::string( + reinterpret_cast(value), size), GetArena()); // @@protoc_insertion_point(field_set_pointer:sentencepiece.TrainerSpec.unk_piece) } -inline ::std::string* TrainerSpec::mutable_unk_piece() { - set_has_unk_piece(); - // @@protoc_insertion_point(field_mutable:sentencepiece.TrainerSpec.unk_piece) - return unk_piece_.MutableNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_.get()); +inline std::string* TrainerSpec::_internal_mutable_unk_piece() { + _has_bits_[0] |= 0x00000010u; + return unk_piece_.Mutable(::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_, GetArena()); } -inline ::std::string* TrainerSpec::release_unk_piece() { +inline std::string* TrainerSpec::release_unk_piece() { // @@protoc_insertion_point(field_release:sentencepiece.TrainerSpec.unk_piece) - if (!has_unk_piece()) { - return NULL; + if (!_internal_has_unk_piece()) { + return nullptr; } - clear_has_unk_piece(); - return unk_piece_.ReleaseNonDefaultNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_.get()); + _has_bits_[0] &= ~0x00000010u; + return unk_piece_.ReleaseNonDefault(nullptr, GetArena()); } -inline void TrainerSpec::set_allocated_unk_piece(::std::string* unk_piece) { - if (unk_piece != NULL) { - set_has_unk_piece(); +inline void TrainerSpec::set_allocated_unk_piece(std::string* unk_piece) { + if (unk_piece != nullptr) { + _has_bits_[0] |= 0x00000010u; } else { - clear_has_unk_piece(); + _has_bits_[0] &= ~0x00000010u; } - unk_piece_.SetAllocatedNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_piece_.get(), unk_piece); + unk_piece_.SetAllocated(nullptr, unk_piece, + GetArena()); // @@protoc_insertion_point(field_set_allocated:sentencepiece.TrainerSpec.unk_piece) } // optional string bos_piece = 46 [default = ""]; -inline bool TrainerSpec::has_bos_piece() const { - return (_has_bits_[0] & 0x00000020u) != 0; -} -inline void TrainerSpec::set_has_bos_piece() { - _has_bits_[0] |= 0x00000020u; +inline bool TrainerSpec::_internal_has_bos_piece() const { + bool value = (_has_bits_[0] & 0x00000020u) != 0; + return value; } -inline void TrainerSpec::clear_has_bos_piece() { - _has_bits_[0] &= ~0x00000020u; +inline bool TrainerSpec::has_bos_piece() const { + return _internal_has_bos_piece(); } inline void TrainerSpec::clear_bos_piece() { - bos_piece_.ClearToDefaultNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_.get()); - clear_has_bos_piece(); + bos_piece_.ClearToDefault(::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_, GetArena()); + _has_bits_[0] &= ~0x00000020u; } -inline const ::std::string& TrainerSpec::bos_piece() const { +inline const std::string& TrainerSpec::bos_piece() const { // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.bos_piece) - return bos_piece_.GetNoArena(); + if (bos_piece_.IsDefault(nullptr)) return _i_give_permission_to_break_this_code_default_bos_piece_.get(); + return _internal_bos_piece(); } -inline void TrainerSpec::set_bos_piece(const ::std::string& value) { - set_has_bos_piece(); - bos_piece_.SetNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_.get(), value); +inline void TrainerSpec::set_bos_piece(const std::string& value) { + _internal_set_bos_piece(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.bos_piece) } -#if LANG_CXX11 -inline void TrainerSpec::set_bos_piece(::std::string&& value) { - set_has_bos_piece(); - bos_piece_.SetNoArena( - &::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_.get(), ::std::move(value)); +inline std::string* TrainerSpec::mutable_bos_piece() { + // @@protoc_insertion_point(field_mutable:sentencepiece.TrainerSpec.bos_piece) + return _internal_mutable_bos_piece(); +} +inline const std::string& TrainerSpec::_internal_bos_piece() const { + return bos_piece_.Get(); +} +inline void TrainerSpec::_internal_set_bos_piece(const std::string& value) { + _has_bits_[0] |= 0x00000020u; + bos_piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, value, GetArena()); +} +inline void TrainerSpec::set_bos_piece(std::string&& value) { + _has_bits_[0] |= 0x00000020u; + bos_piece_.Set( + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, ::std::move(value), GetArena()); // @@protoc_insertion_point(field_set_rvalue:sentencepiece.TrainerSpec.bos_piece) } -#endif inline void TrainerSpec::set_bos_piece(const char* value) { - GOOGLE_DCHECK(value != NULL); - set_has_bos_piece(); - bos_piece_.SetNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_.get(), ::std::string(value)); + GOOGLE_DCHECK(value != nullptr); + _has_bits_[0] |= 0x00000020u; + bos_piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, ::std::string(value), GetArena()); // @@protoc_insertion_point(field_set_char:sentencepiece.TrainerSpec.bos_piece) } -inline void TrainerSpec::set_bos_piece(const char* value, size_t size) { - set_has_bos_piece(); - bos_piece_.SetNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_.get(), - ::std::string(reinterpret_cast(value), size)); +inline void TrainerSpec::set_bos_piece(const char* value, + size_t size) { + _has_bits_[0] |= 0x00000020u; + bos_piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, ::std::string( + reinterpret_cast(value), size), GetArena()); // @@protoc_insertion_point(field_set_pointer:sentencepiece.TrainerSpec.bos_piece) } -inline ::std::string* TrainerSpec::mutable_bos_piece() { - set_has_bos_piece(); - // @@protoc_insertion_point(field_mutable:sentencepiece.TrainerSpec.bos_piece) - return bos_piece_.MutableNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_.get()); +inline std::string* TrainerSpec::_internal_mutable_bos_piece() { + _has_bits_[0] |= 0x00000020u; + return bos_piece_.Mutable(::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_, GetArena()); } -inline ::std::string* TrainerSpec::release_bos_piece() { +inline std::string* TrainerSpec::release_bos_piece() { // @@protoc_insertion_point(field_release:sentencepiece.TrainerSpec.bos_piece) - if (!has_bos_piece()) { - return NULL; + if (!_internal_has_bos_piece()) { + return nullptr; } - clear_has_bos_piece(); - return bos_piece_.ReleaseNonDefaultNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_.get()); + _has_bits_[0] &= ~0x00000020u; + return bos_piece_.ReleaseNonDefault(nullptr, GetArena()); } -inline void TrainerSpec::set_allocated_bos_piece(::std::string* bos_piece) { - if (bos_piece != NULL) { - set_has_bos_piece(); +inline void TrainerSpec::set_allocated_bos_piece(std::string* bos_piece) { + if (bos_piece != nullptr) { + _has_bits_[0] |= 0x00000020u; } else { - clear_has_bos_piece(); + _has_bits_[0] &= ~0x00000020u; } - bos_piece_.SetAllocatedNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_bos_piece_.get(), bos_piece); + bos_piece_.SetAllocated(nullptr, bos_piece, + GetArena()); // @@protoc_insertion_point(field_set_allocated:sentencepiece.TrainerSpec.bos_piece) } // optional string eos_piece = 47 [default = ""]; -inline bool TrainerSpec::has_eos_piece() const { - return (_has_bits_[0] & 0x00000040u) != 0; -} -inline void TrainerSpec::set_has_eos_piece() { - _has_bits_[0] |= 0x00000040u; +inline bool TrainerSpec::_internal_has_eos_piece() const { + bool value = (_has_bits_[0] & 0x00000040u) != 0; + return value; } -inline void TrainerSpec::clear_has_eos_piece() { - _has_bits_[0] &= ~0x00000040u; +inline bool TrainerSpec::has_eos_piece() const { + return _internal_has_eos_piece(); } inline void TrainerSpec::clear_eos_piece() { - eos_piece_.ClearToDefaultNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_.get()); - clear_has_eos_piece(); + eos_piece_.ClearToDefault(::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_, GetArena()); + _has_bits_[0] &= ~0x00000040u; } -inline const ::std::string& TrainerSpec::eos_piece() const { +inline const std::string& TrainerSpec::eos_piece() const { // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.eos_piece) - return eos_piece_.GetNoArena(); + if (eos_piece_.IsDefault(nullptr)) return _i_give_permission_to_break_this_code_default_eos_piece_.get(); + return _internal_eos_piece(); } -inline void TrainerSpec::set_eos_piece(const ::std::string& value) { - set_has_eos_piece(); - eos_piece_.SetNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_.get(), value); +inline void TrainerSpec::set_eos_piece(const std::string& value) { + _internal_set_eos_piece(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.eos_piece) } -#if LANG_CXX11 -inline void TrainerSpec::set_eos_piece(::std::string&& value) { - set_has_eos_piece(); - eos_piece_.SetNoArena( - &::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_.get(), ::std::move(value)); +inline std::string* TrainerSpec::mutable_eos_piece() { + // @@protoc_insertion_point(field_mutable:sentencepiece.TrainerSpec.eos_piece) + return _internal_mutable_eos_piece(); +} +inline const std::string& TrainerSpec::_internal_eos_piece() const { + return eos_piece_.Get(); +} +inline void TrainerSpec::_internal_set_eos_piece(const std::string& value) { + _has_bits_[0] |= 0x00000040u; + eos_piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, value, GetArena()); +} +inline void TrainerSpec::set_eos_piece(std::string&& value) { + _has_bits_[0] |= 0x00000040u; + eos_piece_.Set( + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, ::std::move(value), GetArena()); // @@protoc_insertion_point(field_set_rvalue:sentencepiece.TrainerSpec.eos_piece) } -#endif inline void TrainerSpec::set_eos_piece(const char* value) { - GOOGLE_DCHECK(value != NULL); - set_has_eos_piece(); - eos_piece_.SetNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_.get(), ::std::string(value)); + GOOGLE_DCHECK(value != nullptr); + _has_bits_[0] |= 0x00000040u; + eos_piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, ::std::string(value), GetArena()); // @@protoc_insertion_point(field_set_char:sentencepiece.TrainerSpec.eos_piece) } -inline void TrainerSpec::set_eos_piece(const char* value, size_t size) { - set_has_eos_piece(); - eos_piece_.SetNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_.get(), - ::std::string(reinterpret_cast(value), size)); +inline void TrainerSpec::set_eos_piece(const char* value, + size_t size) { + _has_bits_[0] |= 0x00000040u; + eos_piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, ::std::string( + reinterpret_cast(value), size), GetArena()); // @@protoc_insertion_point(field_set_pointer:sentencepiece.TrainerSpec.eos_piece) } -inline ::std::string* TrainerSpec::mutable_eos_piece() { - set_has_eos_piece(); - // @@protoc_insertion_point(field_mutable:sentencepiece.TrainerSpec.eos_piece) - return eos_piece_.MutableNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_.get()); +inline std::string* TrainerSpec::_internal_mutable_eos_piece() { + _has_bits_[0] |= 0x00000040u; + return eos_piece_.Mutable(::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_, GetArena()); } -inline ::std::string* TrainerSpec::release_eos_piece() { +inline std::string* TrainerSpec::release_eos_piece() { // @@protoc_insertion_point(field_release:sentencepiece.TrainerSpec.eos_piece) - if (!has_eos_piece()) { - return NULL; + if (!_internal_has_eos_piece()) { + return nullptr; } - clear_has_eos_piece(); - return eos_piece_.ReleaseNonDefaultNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_.get()); + _has_bits_[0] &= ~0x00000040u; + return eos_piece_.ReleaseNonDefault(nullptr, GetArena()); } -inline void TrainerSpec::set_allocated_eos_piece(::std::string* eos_piece) { - if (eos_piece != NULL) { - set_has_eos_piece(); +inline void TrainerSpec::set_allocated_eos_piece(std::string* eos_piece) { + if (eos_piece != nullptr) { + _has_bits_[0] |= 0x00000040u; } else { - clear_has_eos_piece(); + _has_bits_[0] &= ~0x00000040u; } - eos_piece_.SetAllocatedNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_eos_piece_.get(), eos_piece); + eos_piece_.SetAllocated(nullptr, eos_piece, + GetArena()); // @@protoc_insertion_point(field_set_allocated:sentencepiece.TrainerSpec.eos_piece) } // optional string pad_piece = 48 [default = ""]; -inline bool TrainerSpec::has_pad_piece() const { - return (_has_bits_[0] & 0x00000080u) != 0; -} -inline void TrainerSpec::set_has_pad_piece() { - _has_bits_[0] |= 0x00000080u; +inline bool TrainerSpec::_internal_has_pad_piece() const { + bool value = (_has_bits_[0] & 0x00000080u) != 0; + return value; } -inline void TrainerSpec::clear_has_pad_piece() { - _has_bits_[0] &= ~0x00000080u; +inline bool TrainerSpec::has_pad_piece() const { + return _internal_has_pad_piece(); } inline void TrainerSpec::clear_pad_piece() { - pad_piece_.ClearToDefaultNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_.get()); - clear_has_pad_piece(); + pad_piece_.ClearToDefault(::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_, GetArena()); + _has_bits_[0] &= ~0x00000080u; } -inline const ::std::string& TrainerSpec::pad_piece() const { +inline const std::string& TrainerSpec::pad_piece() const { // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.pad_piece) - return pad_piece_.GetNoArena(); + if (pad_piece_.IsDefault(nullptr)) return _i_give_permission_to_break_this_code_default_pad_piece_.get(); + return _internal_pad_piece(); } -inline void TrainerSpec::set_pad_piece(const ::std::string& value) { - set_has_pad_piece(); - pad_piece_.SetNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_.get(), value); +inline void TrainerSpec::set_pad_piece(const std::string& value) { + _internal_set_pad_piece(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.pad_piece) } -#if LANG_CXX11 -inline void TrainerSpec::set_pad_piece(::std::string&& value) { - set_has_pad_piece(); - pad_piece_.SetNoArena( - &::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_.get(), ::std::move(value)); +inline std::string* TrainerSpec::mutable_pad_piece() { + // @@protoc_insertion_point(field_mutable:sentencepiece.TrainerSpec.pad_piece) + return _internal_mutable_pad_piece(); +} +inline const std::string& TrainerSpec::_internal_pad_piece() const { + return pad_piece_.Get(); +} +inline void TrainerSpec::_internal_set_pad_piece(const std::string& value) { + _has_bits_[0] |= 0x00000080u; + pad_piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, value, GetArena()); +} +inline void TrainerSpec::set_pad_piece(std::string&& value) { + _has_bits_[0] |= 0x00000080u; + pad_piece_.Set( + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, ::std::move(value), GetArena()); // @@protoc_insertion_point(field_set_rvalue:sentencepiece.TrainerSpec.pad_piece) } -#endif inline void TrainerSpec::set_pad_piece(const char* value) { - GOOGLE_DCHECK(value != NULL); - set_has_pad_piece(); - pad_piece_.SetNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_.get(), ::std::string(value)); + GOOGLE_DCHECK(value != nullptr); + _has_bits_[0] |= 0x00000080u; + pad_piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, ::std::string(value), GetArena()); // @@protoc_insertion_point(field_set_char:sentencepiece.TrainerSpec.pad_piece) } -inline void TrainerSpec::set_pad_piece(const char* value, size_t size) { - set_has_pad_piece(); - pad_piece_.SetNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_.get(), - ::std::string(reinterpret_cast(value), size)); +inline void TrainerSpec::set_pad_piece(const char* value, + size_t size) { + _has_bits_[0] |= 0x00000080u; + pad_piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, ::std::string( + reinterpret_cast(value), size), GetArena()); // @@protoc_insertion_point(field_set_pointer:sentencepiece.TrainerSpec.pad_piece) } -inline ::std::string* TrainerSpec::mutable_pad_piece() { - set_has_pad_piece(); - // @@protoc_insertion_point(field_mutable:sentencepiece.TrainerSpec.pad_piece) - return pad_piece_.MutableNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_.get()); +inline std::string* TrainerSpec::_internal_mutable_pad_piece() { + _has_bits_[0] |= 0x00000080u; + return pad_piece_.Mutable(::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_, GetArena()); } -inline ::std::string* TrainerSpec::release_pad_piece() { +inline std::string* TrainerSpec::release_pad_piece() { // @@protoc_insertion_point(field_release:sentencepiece.TrainerSpec.pad_piece) - if (!has_pad_piece()) { - return NULL; + if (!_internal_has_pad_piece()) { + return nullptr; } - clear_has_pad_piece(); - return pad_piece_.ReleaseNonDefaultNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_.get()); + _has_bits_[0] &= ~0x00000080u; + return pad_piece_.ReleaseNonDefault(nullptr, GetArena()); } -inline void TrainerSpec::set_allocated_pad_piece(::std::string* pad_piece) { - if (pad_piece != NULL) { - set_has_pad_piece(); +inline void TrainerSpec::set_allocated_pad_piece(std::string* pad_piece) { + if (pad_piece != nullptr) { + _has_bits_[0] |= 0x00000080u; } else { - clear_has_pad_piece(); + _has_bits_[0] &= ~0x00000080u; } - pad_piece_.SetAllocatedNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_pad_piece_.get(), pad_piece); + pad_piece_.SetAllocated(nullptr, pad_piece, + GetArena()); // @@protoc_insertion_point(field_set_allocated:sentencepiece.TrainerSpec.pad_piece) } // optional string unk_surface = 44 [default = " \342\201\207 "]; -inline bool TrainerSpec::has_unk_surface() const { - return (_has_bits_[0] & 0x00000008u) != 0; -} -inline void TrainerSpec::set_has_unk_surface() { - _has_bits_[0] |= 0x00000008u; +inline bool TrainerSpec::_internal_has_unk_surface() const { + bool value = (_has_bits_[0] & 0x00000008u) != 0; + return value; } -inline void TrainerSpec::clear_has_unk_surface() { - _has_bits_[0] &= ~0x00000008u; +inline bool TrainerSpec::has_unk_surface() const { + return _internal_has_unk_surface(); } inline void TrainerSpec::clear_unk_surface() { - unk_surface_.ClearToDefaultNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_.get()); - clear_has_unk_surface(); + unk_surface_.ClearToDefault(::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_, GetArena()); + _has_bits_[0] &= ~0x00000008u; } -inline const ::std::string& TrainerSpec::unk_surface() const { +inline const std::string& TrainerSpec::unk_surface() const { // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.unk_surface) - return unk_surface_.GetNoArena(); + if (unk_surface_.IsDefault(nullptr)) return _i_give_permission_to_break_this_code_default_unk_surface_.get(); + return _internal_unk_surface(); } -inline void TrainerSpec::set_unk_surface(const ::std::string& value) { - set_has_unk_surface(); - unk_surface_.SetNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_.get(), value); +inline void TrainerSpec::set_unk_surface(const std::string& value) { + _internal_set_unk_surface(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.unk_surface) } -#if LANG_CXX11 -inline void TrainerSpec::set_unk_surface(::std::string&& value) { - set_has_unk_surface(); - unk_surface_.SetNoArena( - &::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_.get(), ::std::move(value)); +inline std::string* TrainerSpec::mutable_unk_surface() { + // @@protoc_insertion_point(field_mutable:sentencepiece.TrainerSpec.unk_surface) + return _internal_mutable_unk_surface(); +} +inline const std::string& TrainerSpec::_internal_unk_surface() const { + return unk_surface_.Get(); +} +inline void TrainerSpec::_internal_set_unk_surface(const std::string& value) { + _has_bits_[0] |= 0x00000008u; + unk_surface_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, value, GetArena()); +} +inline void TrainerSpec::set_unk_surface(std::string&& value) { + _has_bits_[0] |= 0x00000008u; + unk_surface_.Set( + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, ::std::move(value), GetArena()); // @@protoc_insertion_point(field_set_rvalue:sentencepiece.TrainerSpec.unk_surface) } -#endif inline void TrainerSpec::set_unk_surface(const char* value) { - GOOGLE_DCHECK(value != NULL); - set_has_unk_surface(); - unk_surface_.SetNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_.get(), ::std::string(value)); + GOOGLE_DCHECK(value != nullptr); + _has_bits_[0] |= 0x00000008u; + unk_surface_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, ::std::string(value), GetArena()); // @@protoc_insertion_point(field_set_char:sentencepiece.TrainerSpec.unk_surface) } -inline void TrainerSpec::set_unk_surface(const char* value, size_t size) { - set_has_unk_surface(); - unk_surface_.SetNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_.get(), - ::std::string(reinterpret_cast(value), size)); +inline void TrainerSpec::set_unk_surface(const char* value, + size_t size) { + _has_bits_[0] |= 0x00000008u; + unk_surface_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::NonEmptyDefault{}, ::std::string( + reinterpret_cast(value), size), GetArena()); // @@protoc_insertion_point(field_set_pointer:sentencepiece.TrainerSpec.unk_surface) } -inline ::std::string* TrainerSpec::mutable_unk_surface() { - set_has_unk_surface(); - // @@protoc_insertion_point(field_mutable:sentencepiece.TrainerSpec.unk_surface) - return unk_surface_.MutableNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_.get()); +inline std::string* TrainerSpec::_internal_mutable_unk_surface() { + _has_bits_[0] |= 0x00000008u; + return unk_surface_.Mutable(::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_, GetArena()); } -inline ::std::string* TrainerSpec::release_unk_surface() { +inline std::string* TrainerSpec::release_unk_surface() { // @@protoc_insertion_point(field_release:sentencepiece.TrainerSpec.unk_surface) - if (!has_unk_surface()) { - return NULL; + if (!_internal_has_unk_surface()) { + return nullptr; } - clear_has_unk_surface(); - return unk_surface_.ReleaseNonDefaultNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_.get()); + _has_bits_[0] &= ~0x00000008u; + return unk_surface_.ReleaseNonDefault(nullptr, GetArena()); } -inline void TrainerSpec::set_allocated_unk_surface(::std::string* unk_surface) { - if (unk_surface != NULL) { - set_has_unk_surface(); +inline void TrainerSpec::set_allocated_unk_surface(std::string* unk_surface) { + if (unk_surface != nullptr) { + _has_bits_[0] |= 0x00000008u; } else { - clear_has_unk_surface(); + _has_bits_[0] &= ~0x00000008u; } - unk_surface_.SetAllocatedNoArena(&::sentencepiece::TrainerSpec::_i_give_permission_to_break_this_code_default_unk_surface_.get(), unk_surface); + unk_surface_.SetAllocated(nullptr, unk_surface, + GetArena()); // @@protoc_insertion_point(field_set_allocated:sentencepiece.TrainerSpec.unk_surface) } // optional bool train_extremely_large_corpus = 49 [default = false]; -inline bool TrainerSpec::has_train_extremely_large_corpus() const { - return (_has_bits_[0] & 0x00020000u) != 0; -} -inline void TrainerSpec::set_has_train_extremely_large_corpus() { - _has_bits_[0] |= 0x00020000u; +inline bool TrainerSpec::_internal_has_train_extremely_large_corpus() const { + bool value = (_has_bits_[0] & 0x00080000u) != 0; + return value; } -inline void TrainerSpec::clear_has_train_extremely_large_corpus() { - _has_bits_[0] &= ~0x00020000u; +inline bool TrainerSpec::has_train_extremely_large_corpus() const { + return _internal_has_train_extremely_large_corpus(); } inline void TrainerSpec::clear_train_extremely_large_corpus() { train_extremely_large_corpus_ = false; - clear_has_train_extremely_large_corpus(); + _has_bits_[0] &= ~0x00080000u; +} +inline bool TrainerSpec::_internal_train_extremely_large_corpus() const { + return train_extremely_large_corpus_; } inline bool TrainerSpec::train_extremely_large_corpus() const { // @@protoc_insertion_point(field_get:sentencepiece.TrainerSpec.train_extremely_large_corpus) - return train_extremely_large_corpus_; + return _internal_train_extremely_large_corpus(); } -inline void TrainerSpec::set_train_extremely_large_corpus(bool value) { - set_has_train_extremely_large_corpus(); +inline void TrainerSpec::_internal_set_train_extremely_large_corpus(bool value) { + _has_bits_[0] |= 0x00080000u; train_extremely_large_corpus_ = value; +} +inline void TrainerSpec::set_train_extremely_large_corpus(bool value) { + _internal_set_train_extremely_large_corpus(value); // @@protoc_insertion_point(field_set:sentencepiece.TrainerSpec.train_extremely_large_corpus) } @@ -3061,320 +3930,361 @@ inline void TrainerSpec::set_train_extremely_large_corpus(bool value) { // NormalizerSpec // optional string name = 1; -inline bool NormalizerSpec::has_name() const { - return (_has_bits_[0] & 0x00000001u) != 0; +inline bool NormalizerSpec::_internal_has_name() const { + bool value = (_has_bits_[0] & 0x00000001u) != 0; + return value; } -inline void NormalizerSpec::set_has_name() { - _has_bits_[0] |= 0x00000001u; -} -inline void NormalizerSpec::clear_has_name() { - _has_bits_[0] &= ~0x00000001u; +inline bool NormalizerSpec::has_name() const { + return _internal_has_name(); } inline void NormalizerSpec::clear_name() { - name_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - clear_has_name(); + name_.ClearToEmpty(); + _has_bits_[0] &= ~0x00000001u; } -inline const ::std::string& NormalizerSpec::name() const { +inline const std::string& NormalizerSpec::name() const { // @@protoc_insertion_point(field_get:sentencepiece.NormalizerSpec.name) - return name_.GetNoArena(); + return _internal_name(); } -inline void NormalizerSpec::set_name(const ::std::string& value) { - set_has_name(); - name_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value); +inline void NormalizerSpec::set_name(const std::string& value) { + _internal_set_name(value); // @@protoc_insertion_point(field_set:sentencepiece.NormalizerSpec.name) } -#if LANG_CXX11 -inline void NormalizerSpec::set_name(::std::string&& value) { - set_has_name(); - name_.SetNoArena( - &::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value)); +inline std::string* NormalizerSpec::mutable_name() { + // @@protoc_insertion_point(field_mutable:sentencepiece.NormalizerSpec.name) + return _internal_mutable_name(); +} +inline const std::string& NormalizerSpec::_internal_name() const { + return name_.Get(); +} +inline void NormalizerSpec::_internal_set_name(const std::string& value) { + _has_bits_[0] |= 0x00000001u; + name_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, value, GetArena()); +} +inline void NormalizerSpec::set_name(std::string&& value) { + _has_bits_[0] |= 0x00000001u; + name_.Set( + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::move(value), GetArena()); // @@protoc_insertion_point(field_set_rvalue:sentencepiece.NormalizerSpec.name) } -#endif inline void NormalizerSpec::set_name(const char* value) { - GOOGLE_DCHECK(value != NULL); - set_has_name(); - name_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value)); + GOOGLE_DCHECK(value != nullptr); + _has_bits_[0] |= 0x00000001u; + name_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string(value), GetArena()); // @@protoc_insertion_point(field_set_char:sentencepiece.NormalizerSpec.name) } -inline void NormalizerSpec::set_name(const char* value, size_t size) { - set_has_name(); - name_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(reinterpret_cast(value), size)); +inline void NormalizerSpec::set_name(const char* value, + size_t size) { + _has_bits_[0] |= 0x00000001u; + name_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string( + reinterpret_cast(value), size), GetArena()); // @@protoc_insertion_point(field_set_pointer:sentencepiece.NormalizerSpec.name) } -inline ::std::string* NormalizerSpec::mutable_name() { - set_has_name(); - // @@protoc_insertion_point(field_mutable:sentencepiece.NormalizerSpec.name) - return name_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); +inline std::string* NormalizerSpec::_internal_mutable_name() { + _has_bits_[0] |= 0x00000001u; + return name_.Mutable(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, GetArena()); } -inline ::std::string* NormalizerSpec::release_name() { +inline std::string* NormalizerSpec::release_name() { // @@protoc_insertion_point(field_release:sentencepiece.NormalizerSpec.name) - if (!has_name()) { - return NULL; + if (!_internal_has_name()) { + return nullptr; } - clear_has_name(); - return name_.ReleaseNonDefaultNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); + _has_bits_[0] &= ~0x00000001u; + return name_.ReleaseNonDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); } -inline void NormalizerSpec::set_allocated_name(::std::string* name) { - if (name != NULL) { - set_has_name(); +inline void NormalizerSpec::set_allocated_name(std::string* name) { + if (name != nullptr) { + _has_bits_[0] |= 0x00000001u; } else { - clear_has_name(); + _has_bits_[0] &= ~0x00000001u; } - name_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), name); + name_.SetAllocated(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), name, + GetArena()); // @@protoc_insertion_point(field_set_allocated:sentencepiece.NormalizerSpec.name) } // optional bytes precompiled_charsmap = 2; -inline bool NormalizerSpec::has_precompiled_charsmap() const { - return (_has_bits_[0] & 0x00000002u) != 0; -} -inline void NormalizerSpec::set_has_precompiled_charsmap() { - _has_bits_[0] |= 0x00000002u; +inline bool NormalizerSpec::_internal_has_precompiled_charsmap() const { + bool value = (_has_bits_[0] & 0x00000002u) != 0; + return value; } -inline void NormalizerSpec::clear_has_precompiled_charsmap() { - _has_bits_[0] &= ~0x00000002u; +inline bool NormalizerSpec::has_precompiled_charsmap() const { + return _internal_has_precompiled_charsmap(); } inline void NormalizerSpec::clear_precompiled_charsmap() { - precompiled_charsmap_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - clear_has_precompiled_charsmap(); + precompiled_charsmap_.ClearToEmpty(); + _has_bits_[0] &= ~0x00000002u; } -inline const ::std::string& NormalizerSpec::precompiled_charsmap() const { +inline const std::string& NormalizerSpec::precompiled_charsmap() const { // @@protoc_insertion_point(field_get:sentencepiece.NormalizerSpec.precompiled_charsmap) - return precompiled_charsmap_.GetNoArena(); + return _internal_precompiled_charsmap(); } -inline void NormalizerSpec::set_precompiled_charsmap(const ::std::string& value) { - set_has_precompiled_charsmap(); - precompiled_charsmap_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value); +inline void NormalizerSpec::set_precompiled_charsmap(const std::string& value) { + _internal_set_precompiled_charsmap(value); // @@protoc_insertion_point(field_set:sentencepiece.NormalizerSpec.precompiled_charsmap) } -#if LANG_CXX11 -inline void NormalizerSpec::set_precompiled_charsmap(::std::string&& value) { - set_has_precompiled_charsmap(); - precompiled_charsmap_.SetNoArena( - &::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value)); +inline std::string* NormalizerSpec::mutable_precompiled_charsmap() { + // @@protoc_insertion_point(field_mutable:sentencepiece.NormalizerSpec.precompiled_charsmap) + return _internal_mutable_precompiled_charsmap(); +} +inline const std::string& NormalizerSpec::_internal_precompiled_charsmap() const { + return precompiled_charsmap_.Get(); +} +inline void NormalizerSpec::_internal_set_precompiled_charsmap(const std::string& value) { + _has_bits_[0] |= 0x00000002u; + precompiled_charsmap_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, value, GetArena()); +} +inline void NormalizerSpec::set_precompiled_charsmap(std::string&& value) { + _has_bits_[0] |= 0x00000002u; + precompiled_charsmap_.Set( + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::move(value), GetArena()); // @@protoc_insertion_point(field_set_rvalue:sentencepiece.NormalizerSpec.precompiled_charsmap) } -#endif inline void NormalizerSpec::set_precompiled_charsmap(const char* value) { - GOOGLE_DCHECK(value != NULL); - set_has_precompiled_charsmap(); - precompiled_charsmap_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value)); + GOOGLE_DCHECK(value != nullptr); + _has_bits_[0] |= 0x00000002u; + precompiled_charsmap_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string(value), GetArena()); // @@protoc_insertion_point(field_set_char:sentencepiece.NormalizerSpec.precompiled_charsmap) } -inline void NormalizerSpec::set_precompiled_charsmap(const void* value, size_t size) { - set_has_precompiled_charsmap(); - precompiled_charsmap_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(reinterpret_cast(value), size)); +inline void NormalizerSpec::set_precompiled_charsmap(const void* value, + size_t size) { + _has_bits_[0] |= 0x00000002u; + precompiled_charsmap_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string( + reinterpret_cast(value), size), GetArena()); // @@protoc_insertion_point(field_set_pointer:sentencepiece.NormalizerSpec.precompiled_charsmap) } -inline ::std::string* NormalizerSpec::mutable_precompiled_charsmap() { - set_has_precompiled_charsmap(); - // @@protoc_insertion_point(field_mutable:sentencepiece.NormalizerSpec.precompiled_charsmap) - return precompiled_charsmap_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); +inline std::string* NormalizerSpec::_internal_mutable_precompiled_charsmap() { + _has_bits_[0] |= 0x00000002u; + return precompiled_charsmap_.Mutable(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, GetArena()); } -inline ::std::string* NormalizerSpec::release_precompiled_charsmap() { +inline std::string* NormalizerSpec::release_precompiled_charsmap() { // @@protoc_insertion_point(field_release:sentencepiece.NormalizerSpec.precompiled_charsmap) - if (!has_precompiled_charsmap()) { - return NULL; + if (!_internal_has_precompiled_charsmap()) { + return nullptr; } - clear_has_precompiled_charsmap(); - return precompiled_charsmap_.ReleaseNonDefaultNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); + _has_bits_[0] &= ~0x00000002u; + return precompiled_charsmap_.ReleaseNonDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); } -inline void NormalizerSpec::set_allocated_precompiled_charsmap(::std::string* precompiled_charsmap) { - if (precompiled_charsmap != NULL) { - set_has_precompiled_charsmap(); +inline void NormalizerSpec::set_allocated_precompiled_charsmap(std::string* precompiled_charsmap) { + if (precompiled_charsmap != nullptr) { + _has_bits_[0] |= 0x00000002u; } else { - clear_has_precompiled_charsmap(); + _has_bits_[0] &= ~0x00000002u; } - precompiled_charsmap_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), precompiled_charsmap); + precompiled_charsmap_.SetAllocated(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), precompiled_charsmap, + GetArena()); // @@protoc_insertion_point(field_set_allocated:sentencepiece.NormalizerSpec.precompiled_charsmap) } // optional bool add_dummy_prefix = 3 [default = true]; -inline bool NormalizerSpec::has_add_dummy_prefix() const { - return (_has_bits_[0] & 0x00000020u) != 0; +inline bool NormalizerSpec::_internal_has_add_dummy_prefix() const { + bool value = (_has_bits_[0] & 0x00000020u) != 0; + return value; } -inline void NormalizerSpec::set_has_add_dummy_prefix() { - _has_bits_[0] |= 0x00000020u; -} -inline void NormalizerSpec::clear_has_add_dummy_prefix() { - _has_bits_[0] &= ~0x00000020u; +inline bool NormalizerSpec::has_add_dummy_prefix() const { + return _internal_has_add_dummy_prefix(); } inline void NormalizerSpec::clear_add_dummy_prefix() { add_dummy_prefix_ = true; - clear_has_add_dummy_prefix(); + _has_bits_[0] &= ~0x00000020u; +} +inline bool NormalizerSpec::_internal_add_dummy_prefix() const { + return add_dummy_prefix_; } inline bool NormalizerSpec::add_dummy_prefix() const { // @@protoc_insertion_point(field_get:sentencepiece.NormalizerSpec.add_dummy_prefix) - return add_dummy_prefix_; + return _internal_add_dummy_prefix(); } -inline void NormalizerSpec::set_add_dummy_prefix(bool value) { - set_has_add_dummy_prefix(); +inline void NormalizerSpec::_internal_set_add_dummy_prefix(bool value) { + _has_bits_[0] |= 0x00000020u; add_dummy_prefix_ = value; +} +inline void NormalizerSpec::set_add_dummy_prefix(bool value) { + _internal_set_add_dummy_prefix(value); // @@protoc_insertion_point(field_set:sentencepiece.NormalizerSpec.add_dummy_prefix) } // optional bool remove_extra_whitespaces = 4 [default = true]; -inline bool NormalizerSpec::has_remove_extra_whitespaces() const { - return (_has_bits_[0] & 0x00000040u) != 0; -} -inline void NormalizerSpec::set_has_remove_extra_whitespaces() { - _has_bits_[0] |= 0x00000040u; +inline bool NormalizerSpec::_internal_has_remove_extra_whitespaces() const { + bool value = (_has_bits_[0] & 0x00000040u) != 0; + return value; } -inline void NormalizerSpec::clear_has_remove_extra_whitespaces() { - _has_bits_[0] &= ~0x00000040u; +inline bool NormalizerSpec::has_remove_extra_whitespaces() const { + return _internal_has_remove_extra_whitespaces(); } inline void NormalizerSpec::clear_remove_extra_whitespaces() { remove_extra_whitespaces_ = true; - clear_has_remove_extra_whitespaces(); + _has_bits_[0] &= ~0x00000040u; +} +inline bool NormalizerSpec::_internal_remove_extra_whitespaces() const { + return remove_extra_whitespaces_; } inline bool NormalizerSpec::remove_extra_whitespaces() const { // @@protoc_insertion_point(field_get:sentencepiece.NormalizerSpec.remove_extra_whitespaces) - return remove_extra_whitespaces_; + return _internal_remove_extra_whitespaces(); } -inline void NormalizerSpec::set_remove_extra_whitespaces(bool value) { - set_has_remove_extra_whitespaces(); +inline void NormalizerSpec::_internal_set_remove_extra_whitespaces(bool value) { + _has_bits_[0] |= 0x00000040u; remove_extra_whitespaces_ = value; +} +inline void NormalizerSpec::set_remove_extra_whitespaces(bool value) { + _internal_set_remove_extra_whitespaces(value); // @@protoc_insertion_point(field_set:sentencepiece.NormalizerSpec.remove_extra_whitespaces) } // optional bool escape_whitespaces = 5 [default = true]; -inline bool NormalizerSpec::has_escape_whitespaces() const { - return (_has_bits_[0] & 0x00000080u) != 0; -} -inline void NormalizerSpec::set_has_escape_whitespaces() { - _has_bits_[0] |= 0x00000080u; +inline bool NormalizerSpec::_internal_has_escape_whitespaces() const { + bool value = (_has_bits_[0] & 0x00000080u) != 0; + return value; } -inline void NormalizerSpec::clear_has_escape_whitespaces() { - _has_bits_[0] &= ~0x00000080u; +inline bool NormalizerSpec::has_escape_whitespaces() const { + return _internal_has_escape_whitespaces(); } inline void NormalizerSpec::clear_escape_whitespaces() { escape_whitespaces_ = true; - clear_has_escape_whitespaces(); + _has_bits_[0] &= ~0x00000080u; +} +inline bool NormalizerSpec::_internal_escape_whitespaces() const { + return escape_whitespaces_; } inline bool NormalizerSpec::escape_whitespaces() const { // @@protoc_insertion_point(field_get:sentencepiece.NormalizerSpec.escape_whitespaces) - return escape_whitespaces_; + return _internal_escape_whitespaces(); } -inline void NormalizerSpec::set_escape_whitespaces(bool value) { - set_has_escape_whitespaces(); +inline void NormalizerSpec::_internal_set_escape_whitespaces(bool value) { + _has_bits_[0] |= 0x00000080u; escape_whitespaces_ = value; +} +inline void NormalizerSpec::set_escape_whitespaces(bool value) { + _internal_set_escape_whitespaces(value); // @@protoc_insertion_point(field_set:sentencepiece.NormalizerSpec.escape_whitespaces) } // optional string normalization_rule_tsv = 6; -inline bool NormalizerSpec::has_normalization_rule_tsv() const { - return (_has_bits_[0] & 0x00000004u) != 0; -} -inline void NormalizerSpec::set_has_normalization_rule_tsv() { - _has_bits_[0] |= 0x00000004u; +inline bool NormalizerSpec::_internal_has_normalization_rule_tsv() const { + bool value = (_has_bits_[0] & 0x00000004u) != 0; + return value; } -inline void NormalizerSpec::clear_has_normalization_rule_tsv() { - _has_bits_[0] &= ~0x00000004u; +inline bool NormalizerSpec::has_normalization_rule_tsv() const { + return _internal_has_normalization_rule_tsv(); } inline void NormalizerSpec::clear_normalization_rule_tsv() { - normalization_rule_tsv_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - clear_has_normalization_rule_tsv(); + normalization_rule_tsv_.ClearToEmpty(); + _has_bits_[0] &= ~0x00000004u; } -inline const ::std::string& NormalizerSpec::normalization_rule_tsv() const { +inline const std::string& NormalizerSpec::normalization_rule_tsv() const { // @@protoc_insertion_point(field_get:sentencepiece.NormalizerSpec.normalization_rule_tsv) - return normalization_rule_tsv_.GetNoArena(); + return _internal_normalization_rule_tsv(); } -inline void NormalizerSpec::set_normalization_rule_tsv(const ::std::string& value) { - set_has_normalization_rule_tsv(); - normalization_rule_tsv_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value); +inline void NormalizerSpec::set_normalization_rule_tsv(const std::string& value) { + _internal_set_normalization_rule_tsv(value); // @@protoc_insertion_point(field_set:sentencepiece.NormalizerSpec.normalization_rule_tsv) } -#if LANG_CXX11 -inline void NormalizerSpec::set_normalization_rule_tsv(::std::string&& value) { - set_has_normalization_rule_tsv(); - normalization_rule_tsv_.SetNoArena( - &::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value)); +inline std::string* NormalizerSpec::mutable_normalization_rule_tsv() { + // @@protoc_insertion_point(field_mutable:sentencepiece.NormalizerSpec.normalization_rule_tsv) + return _internal_mutable_normalization_rule_tsv(); +} +inline const std::string& NormalizerSpec::_internal_normalization_rule_tsv() const { + return normalization_rule_tsv_.Get(); +} +inline void NormalizerSpec::_internal_set_normalization_rule_tsv(const std::string& value) { + _has_bits_[0] |= 0x00000004u; + normalization_rule_tsv_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, value, GetArena()); +} +inline void NormalizerSpec::set_normalization_rule_tsv(std::string&& value) { + _has_bits_[0] |= 0x00000004u; + normalization_rule_tsv_.Set( + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::move(value), GetArena()); // @@protoc_insertion_point(field_set_rvalue:sentencepiece.NormalizerSpec.normalization_rule_tsv) } -#endif inline void NormalizerSpec::set_normalization_rule_tsv(const char* value) { - GOOGLE_DCHECK(value != NULL); - set_has_normalization_rule_tsv(); - normalization_rule_tsv_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value)); + GOOGLE_DCHECK(value != nullptr); + _has_bits_[0] |= 0x00000004u; + normalization_rule_tsv_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string(value), GetArena()); // @@protoc_insertion_point(field_set_char:sentencepiece.NormalizerSpec.normalization_rule_tsv) } -inline void NormalizerSpec::set_normalization_rule_tsv(const char* value, size_t size) { - set_has_normalization_rule_tsv(); - normalization_rule_tsv_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(reinterpret_cast(value), size)); +inline void NormalizerSpec::set_normalization_rule_tsv(const char* value, + size_t size) { + _has_bits_[0] |= 0x00000004u; + normalization_rule_tsv_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string( + reinterpret_cast(value), size), GetArena()); // @@protoc_insertion_point(field_set_pointer:sentencepiece.NormalizerSpec.normalization_rule_tsv) } -inline ::std::string* NormalizerSpec::mutable_normalization_rule_tsv() { - set_has_normalization_rule_tsv(); - // @@protoc_insertion_point(field_mutable:sentencepiece.NormalizerSpec.normalization_rule_tsv) - return normalization_rule_tsv_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); +inline std::string* NormalizerSpec::_internal_mutable_normalization_rule_tsv() { + _has_bits_[0] |= 0x00000004u; + return normalization_rule_tsv_.Mutable(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, GetArena()); } -inline ::std::string* NormalizerSpec::release_normalization_rule_tsv() { +inline std::string* NormalizerSpec::release_normalization_rule_tsv() { // @@protoc_insertion_point(field_release:sentencepiece.NormalizerSpec.normalization_rule_tsv) - if (!has_normalization_rule_tsv()) { - return NULL; + if (!_internal_has_normalization_rule_tsv()) { + return nullptr; } - clear_has_normalization_rule_tsv(); - return normalization_rule_tsv_.ReleaseNonDefaultNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); + _has_bits_[0] &= ~0x00000004u; + return normalization_rule_tsv_.ReleaseNonDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); } -inline void NormalizerSpec::set_allocated_normalization_rule_tsv(::std::string* normalization_rule_tsv) { - if (normalization_rule_tsv != NULL) { - set_has_normalization_rule_tsv(); +inline void NormalizerSpec::set_allocated_normalization_rule_tsv(std::string* normalization_rule_tsv) { + if (normalization_rule_tsv != nullptr) { + _has_bits_[0] |= 0x00000004u; } else { - clear_has_normalization_rule_tsv(); + _has_bits_[0] &= ~0x00000004u; } - normalization_rule_tsv_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), normalization_rule_tsv); + normalization_rule_tsv_.SetAllocated(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), normalization_rule_tsv, + GetArena()); // @@protoc_insertion_point(field_set_allocated:sentencepiece.NormalizerSpec.normalization_rule_tsv) } // optional bool encode_case = 7 [default = false]; -inline bool NormalizerSpec::has_encode_case() const { - return (_has_bits_[0] & 0x00000008u) != 0; -} -inline void NormalizerSpec::set_has_encode_case() { - _has_bits_[0] |= 0x00000008u; +inline bool NormalizerSpec::_internal_has_encode_case() const { + bool value = (_has_bits_[0] & 0x00000008u) != 0; + return value; } -inline void NormalizerSpec::clear_has_encode_case() { - _has_bits_[0] &= ~0x00000008u; +inline bool NormalizerSpec::has_encode_case() const { + return _internal_has_encode_case(); } inline void NormalizerSpec::clear_encode_case() { encode_case_ = false; - clear_has_encode_case(); + _has_bits_[0] &= ~0x00000008u; +} +inline bool NormalizerSpec::_internal_encode_case() const { + return encode_case_; } inline bool NormalizerSpec::encode_case() const { // @@protoc_insertion_point(field_get:sentencepiece.NormalizerSpec.encode_case) - return encode_case_; + return _internal_encode_case(); } -inline void NormalizerSpec::set_encode_case(bool value) { - set_has_encode_case(); +inline void NormalizerSpec::_internal_set_encode_case(bool value) { + _has_bits_[0] |= 0x00000008u; encode_case_ = value; +} +inline void NormalizerSpec::set_encode_case(bool value) { + _internal_set_encode_case(value); // @@protoc_insertion_point(field_set:sentencepiece.NormalizerSpec.encode_case) } // optional bool decode_case = 8 [default = false]; -inline bool NormalizerSpec::has_decode_case() const { - return (_has_bits_[0] & 0x00000010u) != 0; -} -inline void NormalizerSpec::set_has_decode_case() { - _has_bits_[0] |= 0x00000010u; +inline bool NormalizerSpec::_internal_has_decode_case() const { + bool value = (_has_bits_[0] & 0x00000010u) != 0; + return value; } -inline void NormalizerSpec::clear_has_decode_case() { - _has_bits_[0] &= ~0x00000010u; +inline bool NormalizerSpec::has_decode_case() const { + return _internal_has_decode_case(); } inline void NormalizerSpec::clear_decode_case() { decode_case_ = false; - clear_has_decode_case(); + _has_bits_[0] &= ~0x00000010u; +} +inline bool NormalizerSpec::_internal_decode_case() const { + return decode_case_; } inline bool NormalizerSpec::decode_case() const { // @@protoc_insertion_point(field_get:sentencepiece.NormalizerSpec.decode_case) - return decode_case_; + return _internal_decode_case(); } -inline void NormalizerSpec::set_decode_case(bool value) { - set_has_decode_case(); +inline void NormalizerSpec::_internal_set_decode_case(bool value) { + _has_bits_[0] |= 0x00000010u; decode_case_ = value; +} +inline void NormalizerSpec::set_decode_case(bool value) { + _internal_set_decode_case(value); // @@protoc_insertion_point(field_set:sentencepiece.NormalizerSpec.decode_case) } @@ -3383,134 +4293,148 @@ inline void NormalizerSpec::set_decode_case(bool value) { // SelfTestData_Sample // optional string input = 1; -inline bool SelfTestData_Sample::has_input() const { - return (_has_bits_[0] & 0x00000001u) != 0; -} -inline void SelfTestData_Sample::set_has_input() { - _has_bits_[0] |= 0x00000001u; +inline bool SelfTestData_Sample::_internal_has_input() const { + bool value = (_has_bits_[0] & 0x00000001u) != 0; + return value; } -inline void SelfTestData_Sample::clear_has_input() { - _has_bits_[0] &= ~0x00000001u; +inline bool SelfTestData_Sample::has_input() const { + return _internal_has_input(); } inline void SelfTestData_Sample::clear_input() { - input_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - clear_has_input(); + input_.ClearToEmpty(); + _has_bits_[0] &= ~0x00000001u; } -inline const ::std::string& SelfTestData_Sample::input() const { +inline const std::string& SelfTestData_Sample::input() const { // @@protoc_insertion_point(field_get:sentencepiece.SelfTestData.Sample.input) - return input_.GetNoArena(); + return _internal_input(); } -inline void SelfTestData_Sample::set_input(const ::std::string& value) { - set_has_input(); - input_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value); +inline void SelfTestData_Sample::set_input(const std::string& value) { + _internal_set_input(value); // @@protoc_insertion_point(field_set:sentencepiece.SelfTestData.Sample.input) } -#if LANG_CXX11 -inline void SelfTestData_Sample::set_input(::std::string&& value) { - set_has_input(); - input_.SetNoArena( - &::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value)); +inline std::string* SelfTestData_Sample::mutable_input() { + // @@protoc_insertion_point(field_mutable:sentencepiece.SelfTestData.Sample.input) + return _internal_mutable_input(); +} +inline const std::string& SelfTestData_Sample::_internal_input() const { + return input_.Get(); +} +inline void SelfTestData_Sample::_internal_set_input(const std::string& value) { + _has_bits_[0] |= 0x00000001u; + input_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, value, GetArena()); +} +inline void SelfTestData_Sample::set_input(std::string&& value) { + _has_bits_[0] |= 0x00000001u; + input_.Set( + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::move(value), GetArena()); // @@protoc_insertion_point(field_set_rvalue:sentencepiece.SelfTestData.Sample.input) } -#endif inline void SelfTestData_Sample::set_input(const char* value) { - GOOGLE_DCHECK(value != NULL); - set_has_input(); - input_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value)); + GOOGLE_DCHECK(value != nullptr); + _has_bits_[0] |= 0x00000001u; + input_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string(value), GetArena()); // @@protoc_insertion_point(field_set_char:sentencepiece.SelfTestData.Sample.input) } -inline void SelfTestData_Sample::set_input(const char* value, size_t size) { - set_has_input(); - input_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(reinterpret_cast(value), size)); +inline void SelfTestData_Sample::set_input(const char* value, + size_t size) { + _has_bits_[0] |= 0x00000001u; + input_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string( + reinterpret_cast(value), size), GetArena()); // @@protoc_insertion_point(field_set_pointer:sentencepiece.SelfTestData.Sample.input) } -inline ::std::string* SelfTestData_Sample::mutable_input() { - set_has_input(); - // @@protoc_insertion_point(field_mutable:sentencepiece.SelfTestData.Sample.input) - return input_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); +inline std::string* SelfTestData_Sample::_internal_mutable_input() { + _has_bits_[0] |= 0x00000001u; + return input_.Mutable(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, GetArena()); } -inline ::std::string* SelfTestData_Sample::release_input() { +inline std::string* SelfTestData_Sample::release_input() { // @@protoc_insertion_point(field_release:sentencepiece.SelfTestData.Sample.input) - if (!has_input()) { - return NULL; + if (!_internal_has_input()) { + return nullptr; } - clear_has_input(); - return input_.ReleaseNonDefaultNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); + _has_bits_[0] &= ~0x00000001u; + return input_.ReleaseNonDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); } -inline void SelfTestData_Sample::set_allocated_input(::std::string* input) { - if (input != NULL) { - set_has_input(); +inline void SelfTestData_Sample::set_allocated_input(std::string* input) { + if (input != nullptr) { + _has_bits_[0] |= 0x00000001u; } else { - clear_has_input(); + _has_bits_[0] &= ~0x00000001u; } - input_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), input); + input_.SetAllocated(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), input, + GetArena()); // @@protoc_insertion_point(field_set_allocated:sentencepiece.SelfTestData.Sample.input) } // optional string expected = 2; -inline bool SelfTestData_Sample::has_expected() const { - return (_has_bits_[0] & 0x00000002u) != 0; -} -inline void SelfTestData_Sample::set_has_expected() { - _has_bits_[0] |= 0x00000002u; +inline bool SelfTestData_Sample::_internal_has_expected() const { + bool value = (_has_bits_[0] & 0x00000002u) != 0; + return value; } -inline void SelfTestData_Sample::clear_has_expected() { - _has_bits_[0] &= ~0x00000002u; +inline bool SelfTestData_Sample::has_expected() const { + return _internal_has_expected(); } inline void SelfTestData_Sample::clear_expected() { - expected_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - clear_has_expected(); + expected_.ClearToEmpty(); + _has_bits_[0] &= ~0x00000002u; } -inline const ::std::string& SelfTestData_Sample::expected() const { +inline const std::string& SelfTestData_Sample::expected() const { // @@protoc_insertion_point(field_get:sentencepiece.SelfTestData.Sample.expected) - return expected_.GetNoArena(); + return _internal_expected(); } -inline void SelfTestData_Sample::set_expected(const ::std::string& value) { - set_has_expected(); - expected_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value); +inline void SelfTestData_Sample::set_expected(const std::string& value) { + _internal_set_expected(value); // @@protoc_insertion_point(field_set:sentencepiece.SelfTestData.Sample.expected) } -#if LANG_CXX11 -inline void SelfTestData_Sample::set_expected(::std::string&& value) { - set_has_expected(); - expected_.SetNoArena( - &::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value)); +inline std::string* SelfTestData_Sample::mutable_expected() { + // @@protoc_insertion_point(field_mutable:sentencepiece.SelfTestData.Sample.expected) + return _internal_mutable_expected(); +} +inline const std::string& SelfTestData_Sample::_internal_expected() const { + return expected_.Get(); +} +inline void SelfTestData_Sample::_internal_set_expected(const std::string& value) { + _has_bits_[0] |= 0x00000002u; + expected_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, value, GetArena()); +} +inline void SelfTestData_Sample::set_expected(std::string&& value) { + _has_bits_[0] |= 0x00000002u; + expected_.Set( + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::move(value), GetArena()); // @@protoc_insertion_point(field_set_rvalue:sentencepiece.SelfTestData.Sample.expected) } -#endif inline void SelfTestData_Sample::set_expected(const char* value) { - GOOGLE_DCHECK(value != NULL); - set_has_expected(); - expected_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value)); + GOOGLE_DCHECK(value != nullptr); + _has_bits_[0] |= 0x00000002u; + expected_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string(value), GetArena()); // @@protoc_insertion_point(field_set_char:sentencepiece.SelfTestData.Sample.expected) } -inline void SelfTestData_Sample::set_expected(const char* value, size_t size) { - set_has_expected(); - expected_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(reinterpret_cast(value), size)); +inline void SelfTestData_Sample::set_expected(const char* value, + size_t size) { + _has_bits_[0] |= 0x00000002u; + expected_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string( + reinterpret_cast(value), size), GetArena()); // @@protoc_insertion_point(field_set_pointer:sentencepiece.SelfTestData.Sample.expected) } -inline ::std::string* SelfTestData_Sample::mutable_expected() { - set_has_expected(); - // @@protoc_insertion_point(field_mutable:sentencepiece.SelfTestData.Sample.expected) - return expected_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); +inline std::string* SelfTestData_Sample::_internal_mutable_expected() { + _has_bits_[0] |= 0x00000002u; + return expected_.Mutable(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, GetArena()); } -inline ::std::string* SelfTestData_Sample::release_expected() { +inline std::string* SelfTestData_Sample::release_expected() { // @@protoc_insertion_point(field_release:sentencepiece.SelfTestData.Sample.expected) - if (!has_expected()) { - return NULL; + if (!_internal_has_expected()) { + return nullptr; } - clear_has_expected(); - return expected_.ReleaseNonDefaultNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); + _has_bits_[0] &= ~0x00000002u; + return expected_.ReleaseNonDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); } -inline void SelfTestData_Sample::set_allocated_expected(::std::string* expected) { - if (expected != NULL) { - set_has_expected(); +inline void SelfTestData_Sample::set_allocated_expected(std::string* expected) { + if (expected != nullptr) { + _has_bits_[0] |= 0x00000002u; } else { - clear_has_expected(); + _has_bits_[0] &= ~0x00000002u; } - expected_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), expected); + expected_.SetAllocated(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), expected, + GetArena()); // @@protoc_insertion_point(field_set_allocated:sentencepiece.SelfTestData.Sample.expected) } @@ -3519,9 +4443,12 @@ inline void SelfTestData_Sample::set_allocated_expected(::std::string* expected) // SelfTestData // repeated .sentencepiece.SelfTestData.Sample samples = 1; -inline int SelfTestData::samples_size() const { +inline int SelfTestData::_internal_samples_size() const { return samples_.size(); } +inline int SelfTestData::samples_size() const { + return _internal_samples_size(); +} inline void SelfTestData::clear_samples() { samples_.Clear(); } @@ -3529,20 +4456,26 @@ inline ::sentencepiece::SelfTestData_Sample* SelfTestData::mutable_samples(int i // @@protoc_insertion_point(field_mutable:sentencepiece.SelfTestData.samples) return samples_.Mutable(index); } -inline ::google::protobuf::RepeatedPtrField< ::sentencepiece::SelfTestData_Sample >* +inline ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::SelfTestData_Sample >* SelfTestData::mutable_samples() { // @@protoc_insertion_point(field_mutable_list:sentencepiece.SelfTestData.samples) return &samples_; } +inline const ::sentencepiece::SelfTestData_Sample& SelfTestData::_internal_samples(int index) const { + return samples_.Get(index); +} inline const ::sentencepiece::SelfTestData_Sample& SelfTestData::samples(int index) const { // @@protoc_insertion_point(field_get:sentencepiece.SelfTestData.samples) - return samples_.Get(index); + return _internal_samples(index); +} +inline ::sentencepiece::SelfTestData_Sample* SelfTestData::_internal_add_samples() { + return samples_.Add(); } inline ::sentencepiece::SelfTestData_Sample* SelfTestData::add_samples() { // @@protoc_insertion_point(field_add:sentencepiece.SelfTestData.samples) - return samples_.Add(); + return _internal_add_samples(); } -inline const ::google::protobuf::RepeatedPtrField< ::sentencepiece::SelfTestData_Sample >& +inline const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::SelfTestData_Sample >& SelfTestData::samples() const { // @@protoc_insertion_point(field_list:sentencepiece.SelfTestData.samples) return samples_; @@ -3553,117 +4486,132 @@ SelfTestData::samples() const { // ModelProto_SentencePiece // optional string piece = 1; -inline bool ModelProto_SentencePiece::has_piece() const { - return (_has_bits_[0] & 0x00000001u) != 0; +inline bool ModelProto_SentencePiece::_internal_has_piece() const { + bool value = (_has_bits_[0] & 0x00000001u) != 0; + return value; } -inline void ModelProto_SentencePiece::set_has_piece() { - _has_bits_[0] |= 0x00000001u; -} -inline void ModelProto_SentencePiece::clear_has_piece() { - _has_bits_[0] &= ~0x00000001u; +inline bool ModelProto_SentencePiece::has_piece() const { + return _internal_has_piece(); } inline void ModelProto_SentencePiece::clear_piece() { - piece_.ClearToEmptyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); - clear_has_piece(); + piece_.ClearToEmpty(); + _has_bits_[0] &= ~0x00000001u; } -inline const ::std::string& ModelProto_SentencePiece::piece() const { +inline const std::string& ModelProto_SentencePiece::piece() const { // @@protoc_insertion_point(field_get:sentencepiece.ModelProto.SentencePiece.piece) - return piece_.GetNoArena(); + return _internal_piece(); } -inline void ModelProto_SentencePiece::set_piece(const ::std::string& value) { - set_has_piece(); - piece_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), value); +inline void ModelProto_SentencePiece::set_piece(const std::string& value) { + _internal_set_piece(value); // @@protoc_insertion_point(field_set:sentencepiece.ModelProto.SentencePiece.piece) } -#if LANG_CXX11 -inline void ModelProto_SentencePiece::set_piece(::std::string&& value) { - set_has_piece(); - piece_.SetNoArena( - &::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::move(value)); +inline std::string* ModelProto_SentencePiece::mutable_piece() { + // @@protoc_insertion_point(field_mutable:sentencepiece.ModelProto.SentencePiece.piece) + return _internal_mutable_piece(); +} +inline const std::string& ModelProto_SentencePiece::_internal_piece() const { + return piece_.Get(); +} +inline void ModelProto_SentencePiece::_internal_set_piece(const std::string& value) { + _has_bits_[0] |= 0x00000001u; + piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, value, GetArena()); +} +inline void ModelProto_SentencePiece::set_piece(std::string&& value) { + _has_bits_[0] |= 0x00000001u; + piece_.Set( + ::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::move(value), GetArena()); // @@protoc_insertion_point(field_set_rvalue:sentencepiece.ModelProto.SentencePiece.piece) } -#endif inline void ModelProto_SentencePiece::set_piece(const char* value) { - GOOGLE_DCHECK(value != NULL); - set_has_piece(); - piece_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), ::std::string(value)); + GOOGLE_DCHECK(value != nullptr); + _has_bits_[0] |= 0x00000001u; + piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string(value), GetArena()); // @@protoc_insertion_point(field_set_char:sentencepiece.ModelProto.SentencePiece.piece) } -inline void ModelProto_SentencePiece::set_piece(const char* value, size_t size) { - set_has_piece(); - piece_.SetNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), - ::std::string(reinterpret_cast(value), size)); +inline void ModelProto_SentencePiece::set_piece(const char* value, + size_t size) { + _has_bits_[0] |= 0x00000001u; + piece_.Set(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, ::std::string( + reinterpret_cast(value), size), GetArena()); // @@protoc_insertion_point(field_set_pointer:sentencepiece.ModelProto.SentencePiece.piece) } -inline ::std::string* ModelProto_SentencePiece::mutable_piece() { - set_has_piece(); - // @@protoc_insertion_point(field_mutable:sentencepiece.ModelProto.SentencePiece.piece) - return piece_.MutableNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); +inline std::string* ModelProto_SentencePiece::_internal_mutable_piece() { + _has_bits_[0] |= 0x00000001u; + return piece_.Mutable(::PROTOBUF_NAMESPACE_ID::internal::ArenaStringPtr::EmptyDefault{}, GetArena()); } -inline ::std::string* ModelProto_SentencePiece::release_piece() { +inline std::string* ModelProto_SentencePiece::release_piece() { // @@protoc_insertion_point(field_release:sentencepiece.ModelProto.SentencePiece.piece) - if (!has_piece()) { - return NULL; + if (!_internal_has_piece()) { + return nullptr; } - clear_has_piece(); - return piece_.ReleaseNonDefaultNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); + _has_bits_[0] &= ~0x00000001u; + return piece_.ReleaseNonDefault(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), GetArena()); } -inline void ModelProto_SentencePiece::set_allocated_piece(::std::string* piece) { - if (piece != NULL) { - set_has_piece(); +inline void ModelProto_SentencePiece::set_allocated_piece(std::string* piece) { + if (piece != nullptr) { + _has_bits_[0] |= 0x00000001u; } else { - clear_has_piece(); + _has_bits_[0] &= ~0x00000001u; } - piece_.SetAllocatedNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), piece); + piece_.SetAllocated(&::PROTOBUF_NAMESPACE_ID::internal::GetEmptyStringAlreadyInited(), piece, + GetArena()); // @@protoc_insertion_point(field_set_allocated:sentencepiece.ModelProto.SentencePiece.piece) } // optional float score = 2; -inline bool ModelProto_SentencePiece::has_score() const { - return (_has_bits_[0] & 0x00000002u) != 0; -} -inline void ModelProto_SentencePiece::set_has_score() { - _has_bits_[0] |= 0x00000002u; +inline bool ModelProto_SentencePiece::_internal_has_score() const { + bool value = (_has_bits_[0] & 0x00000002u) != 0; + return value; } -inline void ModelProto_SentencePiece::clear_has_score() { - _has_bits_[0] &= ~0x00000002u; +inline bool ModelProto_SentencePiece::has_score() const { + return _internal_has_score(); } inline void ModelProto_SentencePiece::clear_score() { score_ = 0; - clear_has_score(); + _has_bits_[0] &= ~0x00000002u; +} +inline float ModelProto_SentencePiece::_internal_score() const { + return score_; } inline float ModelProto_SentencePiece::score() const { // @@protoc_insertion_point(field_get:sentencepiece.ModelProto.SentencePiece.score) - return score_; + return _internal_score(); } -inline void ModelProto_SentencePiece::set_score(float value) { - set_has_score(); +inline void ModelProto_SentencePiece::_internal_set_score(float value) { + _has_bits_[0] |= 0x00000002u; score_ = value; +} +inline void ModelProto_SentencePiece::set_score(float value) { + _internal_set_score(value); // @@protoc_insertion_point(field_set:sentencepiece.ModelProto.SentencePiece.score) } // optional .sentencepiece.ModelProto.SentencePiece.Type type = 3 [default = NORMAL]; -inline bool ModelProto_SentencePiece::has_type() const { - return (_has_bits_[0] & 0x00000004u) != 0; -} -inline void ModelProto_SentencePiece::set_has_type() { - _has_bits_[0] |= 0x00000004u; +inline bool ModelProto_SentencePiece::_internal_has_type() const { + bool value = (_has_bits_[0] & 0x00000004u) != 0; + return value; } -inline void ModelProto_SentencePiece::clear_has_type() { - _has_bits_[0] &= ~0x00000004u; +inline bool ModelProto_SentencePiece::has_type() const { + return _internal_has_type(); } inline void ModelProto_SentencePiece::clear_type() { type_ = 1; - clear_has_type(); + _has_bits_[0] &= ~0x00000004u; +} +inline ::sentencepiece::ModelProto_SentencePiece_Type ModelProto_SentencePiece::_internal_type() const { + return static_cast< ::sentencepiece::ModelProto_SentencePiece_Type >(type_); } inline ::sentencepiece::ModelProto_SentencePiece_Type ModelProto_SentencePiece::type() const { // @@protoc_insertion_point(field_get:sentencepiece.ModelProto.SentencePiece.type) - return static_cast< ::sentencepiece::ModelProto_SentencePiece_Type >(type_); + return _internal_type(); } -inline void ModelProto_SentencePiece::set_type(::sentencepiece::ModelProto_SentencePiece_Type value) { +inline void ModelProto_SentencePiece::_internal_set_type(::sentencepiece::ModelProto_SentencePiece_Type value) { assert(::sentencepiece::ModelProto_SentencePiece_Type_IsValid(value)); - set_has_type(); + _has_bits_[0] |= 0x00000004u; type_ = value; +} +inline void ModelProto_SentencePiece::set_type(::sentencepiece::ModelProto_SentencePiece_Type value) { + _internal_set_type(value); // @@protoc_insertion_point(field_set:sentencepiece.ModelProto.SentencePiece.type) } @@ -3672,9 +4620,12 @@ inline void ModelProto_SentencePiece::set_type(::sentencepiece::ModelProto_Sente // ModelProto // repeated .sentencepiece.ModelProto.SentencePiece pieces = 1; -inline int ModelProto::pieces_size() const { +inline int ModelProto::_internal_pieces_size() const { return pieces_.size(); } +inline int ModelProto::pieces_size() const { + return _internal_pieces_size(); +} inline void ModelProto::clear_pieces() { pieces_.Clear(); } @@ -3682,252 +4633,358 @@ inline ::sentencepiece::ModelProto_SentencePiece* ModelProto::mutable_pieces(int // @@protoc_insertion_point(field_mutable:sentencepiece.ModelProto.pieces) return pieces_.Mutable(index); } -inline ::google::protobuf::RepeatedPtrField< ::sentencepiece::ModelProto_SentencePiece >* +inline ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::ModelProto_SentencePiece >* ModelProto::mutable_pieces() { // @@protoc_insertion_point(field_mutable_list:sentencepiece.ModelProto.pieces) return &pieces_; } +inline const ::sentencepiece::ModelProto_SentencePiece& ModelProto::_internal_pieces(int index) const { + return pieces_.Get(index); +} inline const ::sentencepiece::ModelProto_SentencePiece& ModelProto::pieces(int index) const { // @@protoc_insertion_point(field_get:sentencepiece.ModelProto.pieces) - return pieces_.Get(index); + return _internal_pieces(index); +} +inline ::sentencepiece::ModelProto_SentencePiece* ModelProto::_internal_add_pieces() { + return pieces_.Add(); } inline ::sentencepiece::ModelProto_SentencePiece* ModelProto::add_pieces() { // @@protoc_insertion_point(field_add:sentencepiece.ModelProto.pieces) - return pieces_.Add(); + return _internal_add_pieces(); } -inline const ::google::protobuf::RepeatedPtrField< ::sentencepiece::ModelProto_SentencePiece >& +inline const ::PROTOBUF_NAMESPACE_ID::RepeatedPtrField< ::sentencepiece::ModelProto_SentencePiece >& ModelProto::pieces() const { // @@protoc_insertion_point(field_list:sentencepiece.ModelProto.pieces) return pieces_; } // optional .sentencepiece.TrainerSpec trainer_spec = 2; -inline bool ModelProto::has_trainer_spec() const { - return (_has_bits_[0] & 0x00000001u) != 0; -} -inline void ModelProto::set_has_trainer_spec() { - _has_bits_[0] |= 0x00000001u; +inline bool ModelProto::_internal_has_trainer_spec() const { + bool value = (_has_bits_[0] & 0x00000001u) != 0; + PROTOBUF_ASSUME(!value || trainer_spec_ != nullptr); + return value; } -inline void ModelProto::clear_has_trainer_spec() { - _has_bits_[0] &= ~0x00000001u; +inline bool ModelProto::has_trainer_spec() const { + return _internal_has_trainer_spec(); } inline void ModelProto::clear_trainer_spec() { - if (trainer_spec_ != NULL) trainer_spec_->Clear(); - clear_has_trainer_spec(); + if (trainer_spec_ != nullptr) trainer_spec_->Clear(); + _has_bits_[0] &= ~0x00000001u; } inline const ::sentencepiece::TrainerSpec& ModelProto::_internal_trainer_spec() const { - return *trainer_spec_; + const ::sentencepiece::TrainerSpec* p = trainer_spec_; + return p != nullptr ? *p : reinterpret_cast( + ::sentencepiece::_TrainerSpec_default_instance_); } inline const ::sentencepiece::TrainerSpec& ModelProto::trainer_spec() const { - const ::sentencepiece::TrainerSpec* p = trainer_spec_; // @@protoc_insertion_point(field_get:sentencepiece.ModelProto.trainer_spec) - return p != NULL ? *p : *reinterpret_cast( - &::sentencepiece::_TrainerSpec_default_instance_); + return _internal_trainer_spec(); +} +inline void ModelProto::unsafe_arena_set_allocated_trainer_spec( + ::sentencepiece::TrainerSpec* trainer_spec) { + if (GetArena() == nullptr) { + delete reinterpret_cast<::PROTOBUF_NAMESPACE_ID::MessageLite*>(trainer_spec_); + } + trainer_spec_ = trainer_spec; + if (trainer_spec) { + _has_bits_[0] |= 0x00000001u; + } else { + _has_bits_[0] &= ~0x00000001u; + } + // @@protoc_insertion_point(field_unsafe_arena_set_allocated:sentencepiece.ModelProto.trainer_spec) } inline ::sentencepiece::TrainerSpec* ModelProto::release_trainer_spec() { + _has_bits_[0] &= ~0x00000001u; + ::sentencepiece::TrainerSpec* temp = trainer_spec_; + trainer_spec_ = nullptr; + if (GetArena() != nullptr) { + temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); + } + return temp; +} +inline ::sentencepiece::TrainerSpec* ModelProto::unsafe_arena_release_trainer_spec() { // @@protoc_insertion_point(field_release:sentencepiece.ModelProto.trainer_spec) - clear_has_trainer_spec(); + _has_bits_[0] &= ~0x00000001u; ::sentencepiece::TrainerSpec* temp = trainer_spec_; - trainer_spec_ = NULL; + trainer_spec_ = nullptr; return temp; } -inline ::sentencepiece::TrainerSpec* ModelProto::mutable_trainer_spec() { - set_has_trainer_spec(); - if (trainer_spec_ == NULL) { - auto* p = CreateMaybeMessage<::sentencepiece::TrainerSpec>(GetArenaNoVirtual()); +inline ::sentencepiece::TrainerSpec* ModelProto::_internal_mutable_trainer_spec() { + _has_bits_[0] |= 0x00000001u; + if (trainer_spec_ == nullptr) { + auto* p = CreateMaybeMessage<::sentencepiece::TrainerSpec>(GetArena()); trainer_spec_ = p; } - // @@protoc_insertion_point(field_mutable:sentencepiece.ModelProto.trainer_spec) return trainer_spec_; } +inline ::sentencepiece::TrainerSpec* ModelProto::mutable_trainer_spec() { + // @@protoc_insertion_point(field_mutable:sentencepiece.ModelProto.trainer_spec) + return _internal_mutable_trainer_spec(); +} inline void ModelProto::set_allocated_trainer_spec(::sentencepiece::TrainerSpec* trainer_spec) { - ::google::protobuf::Arena* message_arena = GetArenaNoVirtual(); - if (message_arena == NULL) { + ::PROTOBUF_NAMESPACE_ID::Arena* message_arena = GetArena(); + if (message_arena == nullptr) { delete trainer_spec_; } if (trainer_spec) { - ::google::protobuf::Arena* submessage_arena = NULL; + ::PROTOBUF_NAMESPACE_ID::Arena* submessage_arena = + ::PROTOBUF_NAMESPACE_ID::Arena::GetArena(trainer_spec); if (message_arena != submessage_arena) { - trainer_spec = ::google::protobuf::internal::GetOwnedMessage( + trainer_spec = ::PROTOBUF_NAMESPACE_ID::internal::GetOwnedMessage( message_arena, trainer_spec, submessage_arena); } - set_has_trainer_spec(); + _has_bits_[0] |= 0x00000001u; } else { - clear_has_trainer_spec(); + _has_bits_[0] &= ~0x00000001u; } trainer_spec_ = trainer_spec; // @@protoc_insertion_point(field_set_allocated:sentencepiece.ModelProto.trainer_spec) } // optional .sentencepiece.NormalizerSpec normalizer_spec = 3; -inline bool ModelProto::has_normalizer_spec() const { - return (_has_bits_[0] & 0x00000002u) != 0; -} -inline void ModelProto::set_has_normalizer_spec() { - _has_bits_[0] |= 0x00000002u; +inline bool ModelProto::_internal_has_normalizer_spec() const { + bool value = (_has_bits_[0] & 0x00000002u) != 0; + PROTOBUF_ASSUME(!value || normalizer_spec_ != nullptr); + return value; } -inline void ModelProto::clear_has_normalizer_spec() { - _has_bits_[0] &= ~0x00000002u; +inline bool ModelProto::has_normalizer_spec() const { + return _internal_has_normalizer_spec(); } inline void ModelProto::clear_normalizer_spec() { - if (normalizer_spec_ != NULL) normalizer_spec_->Clear(); - clear_has_normalizer_spec(); + if (normalizer_spec_ != nullptr) normalizer_spec_->Clear(); + _has_bits_[0] &= ~0x00000002u; } inline const ::sentencepiece::NormalizerSpec& ModelProto::_internal_normalizer_spec() const { - return *normalizer_spec_; + const ::sentencepiece::NormalizerSpec* p = normalizer_spec_; + return p != nullptr ? *p : reinterpret_cast( + ::sentencepiece::_NormalizerSpec_default_instance_); } inline const ::sentencepiece::NormalizerSpec& ModelProto::normalizer_spec() const { - const ::sentencepiece::NormalizerSpec* p = normalizer_spec_; // @@protoc_insertion_point(field_get:sentencepiece.ModelProto.normalizer_spec) - return p != NULL ? *p : *reinterpret_cast( - &::sentencepiece::_NormalizerSpec_default_instance_); + return _internal_normalizer_spec(); +} +inline void ModelProto::unsafe_arena_set_allocated_normalizer_spec( + ::sentencepiece::NormalizerSpec* normalizer_spec) { + if (GetArena() == nullptr) { + delete reinterpret_cast<::PROTOBUF_NAMESPACE_ID::MessageLite*>(normalizer_spec_); + } + normalizer_spec_ = normalizer_spec; + if (normalizer_spec) { + _has_bits_[0] |= 0x00000002u; + } else { + _has_bits_[0] &= ~0x00000002u; + } + // @@protoc_insertion_point(field_unsafe_arena_set_allocated:sentencepiece.ModelProto.normalizer_spec) } inline ::sentencepiece::NormalizerSpec* ModelProto::release_normalizer_spec() { + _has_bits_[0] &= ~0x00000002u; + ::sentencepiece::NormalizerSpec* temp = normalizer_spec_; + normalizer_spec_ = nullptr; + if (GetArena() != nullptr) { + temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); + } + return temp; +} +inline ::sentencepiece::NormalizerSpec* ModelProto::unsafe_arena_release_normalizer_spec() { // @@protoc_insertion_point(field_release:sentencepiece.ModelProto.normalizer_spec) - clear_has_normalizer_spec(); + _has_bits_[0] &= ~0x00000002u; ::sentencepiece::NormalizerSpec* temp = normalizer_spec_; - normalizer_spec_ = NULL; + normalizer_spec_ = nullptr; return temp; } -inline ::sentencepiece::NormalizerSpec* ModelProto::mutable_normalizer_spec() { - set_has_normalizer_spec(); - if (normalizer_spec_ == NULL) { - auto* p = CreateMaybeMessage<::sentencepiece::NormalizerSpec>(GetArenaNoVirtual()); +inline ::sentencepiece::NormalizerSpec* ModelProto::_internal_mutable_normalizer_spec() { + _has_bits_[0] |= 0x00000002u; + if (normalizer_spec_ == nullptr) { + auto* p = CreateMaybeMessage<::sentencepiece::NormalizerSpec>(GetArena()); normalizer_spec_ = p; } - // @@protoc_insertion_point(field_mutable:sentencepiece.ModelProto.normalizer_spec) return normalizer_spec_; } +inline ::sentencepiece::NormalizerSpec* ModelProto::mutable_normalizer_spec() { + // @@protoc_insertion_point(field_mutable:sentencepiece.ModelProto.normalizer_spec) + return _internal_mutable_normalizer_spec(); +} inline void ModelProto::set_allocated_normalizer_spec(::sentencepiece::NormalizerSpec* normalizer_spec) { - ::google::protobuf::Arena* message_arena = GetArenaNoVirtual(); - if (message_arena == NULL) { + ::PROTOBUF_NAMESPACE_ID::Arena* message_arena = GetArena(); + if (message_arena == nullptr) { delete normalizer_spec_; } if (normalizer_spec) { - ::google::protobuf::Arena* submessage_arena = NULL; + ::PROTOBUF_NAMESPACE_ID::Arena* submessage_arena = + ::PROTOBUF_NAMESPACE_ID::Arena::GetArena(normalizer_spec); if (message_arena != submessage_arena) { - normalizer_spec = ::google::protobuf::internal::GetOwnedMessage( + normalizer_spec = ::PROTOBUF_NAMESPACE_ID::internal::GetOwnedMessage( message_arena, normalizer_spec, submessage_arena); } - set_has_normalizer_spec(); + _has_bits_[0] |= 0x00000002u; } else { - clear_has_normalizer_spec(); + _has_bits_[0] &= ~0x00000002u; } normalizer_spec_ = normalizer_spec; // @@protoc_insertion_point(field_set_allocated:sentencepiece.ModelProto.normalizer_spec) } // optional .sentencepiece.SelfTestData self_test_data = 4; -inline bool ModelProto::has_self_test_data() const { - return (_has_bits_[0] & 0x00000004u) != 0; -} -inline void ModelProto::set_has_self_test_data() { - _has_bits_[0] |= 0x00000004u; +inline bool ModelProto::_internal_has_self_test_data() const { + bool value = (_has_bits_[0] & 0x00000004u) != 0; + PROTOBUF_ASSUME(!value || self_test_data_ != nullptr); + return value; } -inline void ModelProto::clear_has_self_test_data() { - _has_bits_[0] &= ~0x00000004u; +inline bool ModelProto::has_self_test_data() const { + return _internal_has_self_test_data(); } inline void ModelProto::clear_self_test_data() { - if (self_test_data_ != NULL) self_test_data_->Clear(); - clear_has_self_test_data(); + if (self_test_data_ != nullptr) self_test_data_->Clear(); + _has_bits_[0] &= ~0x00000004u; } inline const ::sentencepiece::SelfTestData& ModelProto::_internal_self_test_data() const { - return *self_test_data_; + const ::sentencepiece::SelfTestData* p = self_test_data_; + return p != nullptr ? *p : reinterpret_cast( + ::sentencepiece::_SelfTestData_default_instance_); } inline const ::sentencepiece::SelfTestData& ModelProto::self_test_data() const { - const ::sentencepiece::SelfTestData* p = self_test_data_; // @@protoc_insertion_point(field_get:sentencepiece.ModelProto.self_test_data) - return p != NULL ? *p : *reinterpret_cast( - &::sentencepiece::_SelfTestData_default_instance_); + return _internal_self_test_data(); +} +inline void ModelProto::unsafe_arena_set_allocated_self_test_data( + ::sentencepiece::SelfTestData* self_test_data) { + if (GetArena() == nullptr) { + delete reinterpret_cast<::PROTOBUF_NAMESPACE_ID::MessageLite*>(self_test_data_); + } + self_test_data_ = self_test_data; + if (self_test_data) { + _has_bits_[0] |= 0x00000004u; + } else { + _has_bits_[0] &= ~0x00000004u; + } + // @@protoc_insertion_point(field_unsafe_arena_set_allocated:sentencepiece.ModelProto.self_test_data) } inline ::sentencepiece::SelfTestData* ModelProto::release_self_test_data() { + _has_bits_[0] &= ~0x00000004u; + ::sentencepiece::SelfTestData* temp = self_test_data_; + self_test_data_ = nullptr; + if (GetArena() != nullptr) { + temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); + } + return temp; +} +inline ::sentencepiece::SelfTestData* ModelProto::unsafe_arena_release_self_test_data() { // @@protoc_insertion_point(field_release:sentencepiece.ModelProto.self_test_data) - clear_has_self_test_data(); + _has_bits_[0] &= ~0x00000004u; ::sentencepiece::SelfTestData* temp = self_test_data_; - self_test_data_ = NULL; + self_test_data_ = nullptr; return temp; } -inline ::sentencepiece::SelfTestData* ModelProto::mutable_self_test_data() { - set_has_self_test_data(); - if (self_test_data_ == NULL) { - auto* p = CreateMaybeMessage<::sentencepiece::SelfTestData>(GetArenaNoVirtual()); +inline ::sentencepiece::SelfTestData* ModelProto::_internal_mutable_self_test_data() { + _has_bits_[0] |= 0x00000004u; + if (self_test_data_ == nullptr) { + auto* p = CreateMaybeMessage<::sentencepiece::SelfTestData>(GetArena()); self_test_data_ = p; } - // @@protoc_insertion_point(field_mutable:sentencepiece.ModelProto.self_test_data) return self_test_data_; } +inline ::sentencepiece::SelfTestData* ModelProto::mutable_self_test_data() { + // @@protoc_insertion_point(field_mutable:sentencepiece.ModelProto.self_test_data) + return _internal_mutable_self_test_data(); +} inline void ModelProto::set_allocated_self_test_data(::sentencepiece::SelfTestData* self_test_data) { - ::google::protobuf::Arena* message_arena = GetArenaNoVirtual(); - if (message_arena == NULL) { + ::PROTOBUF_NAMESPACE_ID::Arena* message_arena = GetArena(); + if (message_arena == nullptr) { delete self_test_data_; } if (self_test_data) { - ::google::protobuf::Arena* submessage_arena = NULL; + ::PROTOBUF_NAMESPACE_ID::Arena* submessage_arena = + ::PROTOBUF_NAMESPACE_ID::Arena::GetArena(self_test_data); if (message_arena != submessage_arena) { - self_test_data = ::google::protobuf::internal::GetOwnedMessage( + self_test_data = ::PROTOBUF_NAMESPACE_ID::internal::GetOwnedMessage( message_arena, self_test_data, submessage_arena); } - set_has_self_test_data(); + _has_bits_[0] |= 0x00000004u; } else { - clear_has_self_test_data(); + _has_bits_[0] &= ~0x00000004u; } self_test_data_ = self_test_data; // @@protoc_insertion_point(field_set_allocated:sentencepiece.ModelProto.self_test_data) } // optional .sentencepiece.NormalizerSpec denormalizer_spec = 5; -inline bool ModelProto::has_denormalizer_spec() const { - return (_has_bits_[0] & 0x00000008u) != 0; -} -inline void ModelProto::set_has_denormalizer_spec() { - _has_bits_[0] |= 0x00000008u; +inline bool ModelProto::_internal_has_denormalizer_spec() const { + bool value = (_has_bits_[0] & 0x00000008u) != 0; + PROTOBUF_ASSUME(!value || denormalizer_spec_ != nullptr); + return value; } -inline void ModelProto::clear_has_denormalizer_spec() { - _has_bits_[0] &= ~0x00000008u; +inline bool ModelProto::has_denormalizer_spec() const { + return _internal_has_denormalizer_spec(); } inline void ModelProto::clear_denormalizer_spec() { - if (denormalizer_spec_ != NULL) denormalizer_spec_->Clear(); - clear_has_denormalizer_spec(); + if (denormalizer_spec_ != nullptr) denormalizer_spec_->Clear(); + _has_bits_[0] &= ~0x00000008u; } inline const ::sentencepiece::NormalizerSpec& ModelProto::_internal_denormalizer_spec() const { - return *denormalizer_spec_; + const ::sentencepiece::NormalizerSpec* p = denormalizer_spec_; + return p != nullptr ? *p : reinterpret_cast( + ::sentencepiece::_NormalizerSpec_default_instance_); } inline const ::sentencepiece::NormalizerSpec& ModelProto::denormalizer_spec() const { - const ::sentencepiece::NormalizerSpec* p = denormalizer_spec_; // @@protoc_insertion_point(field_get:sentencepiece.ModelProto.denormalizer_spec) - return p != NULL ? *p : *reinterpret_cast( - &::sentencepiece::_NormalizerSpec_default_instance_); + return _internal_denormalizer_spec(); +} +inline void ModelProto::unsafe_arena_set_allocated_denormalizer_spec( + ::sentencepiece::NormalizerSpec* denormalizer_spec) { + if (GetArena() == nullptr) { + delete reinterpret_cast<::PROTOBUF_NAMESPACE_ID::MessageLite*>(denormalizer_spec_); + } + denormalizer_spec_ = denormalizer_spec; + if (denormalizer_spec) { + _has_bits_[0] |= 0x00000008u; + } else { + _has_bits_[0] &= ~0x00000008u; + } + // @@protoc_insertion_point(field_unsafe_arena_set_allocated:sentencepiece.ModelProto.denormalizer_spec) } inline ::sentencepiece::NormalizerSpec* ModelProto::release_denormalizer_spec() { + _has_bits_[0] &= ~0x00000008u; + ::sentencepiece::NormalizerSpec* temp = denormalizer_spec_; + denormalizer_spec_ = nullptr; + if (GetArena() != nullptr) { + temp = ::PROTOBUF_NAMESPACE_ID::internal::DuplicateIfNonNull(temp); + } + return temp; +} +inline ::sentencepiece::NormalizerSpec* ModelProto::unsafe_arena_release_denormalizer_spec() { // @@protoc_insertion_point(field_release:sentencepiece.ModelProto.denormalizer_spec) - clear_has_denormalizer_spec(); + _has_bits_[0] &= ~0x00000008u; ::sentencepiece::NormalizerSpec* temp = denormalizer_spec_; - denormalizer_spec_ = NULL; + denormalizer_spec_ = nullptr; return temp; } -inline ::sentencepiece::NormalizerSpec* ModelProto::mutable_denormalizer_spec() { - set_has_denormalizer_spec(); - if (denormalizer_spec_ == NULL) { - auto* p = CreateMaybeMessage<::sentencepiece::NormalizerSpec>(GetArenaNoVirtual()); +inline ::sentencepiece::NormalizerSpec* ModelProto::_internal_mutable_denormalizer_spec() { + _has_bits_[0] |= 0x00000008u; + if (denormalizer_spec_ == nullptr) { + auto* p = CreateMaybeMessage<::sentencepiece::NormalizerSpec>(GetArena()); denormalizer_spec_ = p; } - // @@protoc_insertion_point(field_mutable:sentencepiece.ModelProto.denormalizer_spec) return denormalizer_spec_; } +inline ::sentencepiece::NormalizerSpec* ModelProto::mutable_denormalizer_spec() { + // @@protoc_insertion_point(field_mutable:sentencepiece.ModelProto.denormalizer_spec) + return _internal_mutable_denormalizer_spec(); +} inline void ModelProto::set_allocated_denormalizer_spec(::sentencepiece::NormalizerSpec* denormalizer_spec) { - ::google::protobuf::Arena* message_arena = GetArenaNoVirtual(); - if (message_arena == NULL) { + ::PROTOBUF_NAMESPACE_ID::Arena* message_arena = GetArena(); + if (message_arena == nullptr) { delete denormalizer_spec_; } if (denormalizer_spec) { - ::google::protobuf::Arena* submessage_arena = NULL; + ::PROTOBUF_NAMESPACE_ID::Arena* submessage_arena = + ::PROTOBUF_NAMESPACE_ID::Arena::GetArena(denormalizer_spec); if (message_arena != submessage_arena) { - denormalizer_spec = ::google::protobuf::internal::GetOwnedMessage( + denormalizer_spec = ::PROTOBUF_NAMESPACE_ID::internal::GetOwnedMessage( message_arena, denormalizer_spec, submessage_arena); } - set_has_denormalizer_spec(); + _has_bits_[0] |= 0x00000008u; } else { - clear_has_denormalizer_spec(); + _has_bits_[0] &= ~0x00000008u; } denormalizer_spec_ = denormalizer_spec; // @@protoc_insertion_point(field_set_allocated:sentencepiece.ModelProto.denormalizer_spec) @@ -3951,15 +5008,14 @@ inline void ModelProto::set_allocated_denormalizer_spec(::sentencepiece::Normali } // namespace sentencepiece -namespace google { -namespace protobuf { +PROTOBUF_NAMESPACE_OPEN template <> struct is_proto_enum< ::sentencepiece::TrainerSpec_ModelType> : ::std::true_type {}; template <> struct is_proto_enum< ::sentencepiece::ModelProto_SentencePiece_Type> : ::std::true_type {}; -} // namespace protobuf -} // namespace google +PROTOBUF_NAMESPACE_CLOSE // @@protoc_insertion_point(global_scope) -#endif // PROTOBUF_INCLUDED_sentencepiece_5fmodel_2eproto +#include +#endif // GOOGLE_PROTOBUF_INCLUDED_GOOGLE_PROTOBUF_INCLUDED_sentencepiece_5fmodel_2eproto diff --git a/src/common.h b/src/common.h index af0b1c2c..b38b3f7b 100644 --- a/src/common.h +++ b/src/common.h @@ -26,7 +26,7 @@ #include #include "config.h" -#include "third_party/absl/flags/flag.h" +#include "third_party/absl/strings/string_view.h" #if defined(_WIN32) && !defined(__CYGWIN__) #define OS_WIN @@ -51,19 +51,6 @@ typedef uint32_t char32; typedef uint32_t uint32; typedef uint64_t uint64; -static constexpr uint8 kuint8max = ((uint8)0xFF); -static constexpr uint16 kuint16max = ((uint16)0xFFFF); -static constexpr uint32 kuint32max = ((uint32)0xFFFFFFFF); -static constexpr uint64 kuint64max = ((uint64)(0xFFFFFFFFFFFFFFFF)); -static constexpr int8 kint8min = ((int8)~0x7F); -static constexpr int8 kint8max = ((int8)0x7F); -static constexpr int16 kint16min = ((int16)~0x7FFF); -static constexpr int16 kint16max = ((int16)0x7FFF); -static constexpr int32 kint32min = ((int32)~0x7FFFFFFF); -static constexpr int32 kint32max = ((int32)0x7FFFFFFF); -static constexpr int64 kint64min = ((int64)(~0x7FFFFFFFFFFFFFFF)); -static constexpr int64 kint64max = ((int64)(0x7FFFFFFFFFFFFFFF)); - static constexpr uint32 kUnicodeError = 0xFFFD; #if defined(OS_WIN) && defined(UNICODE) && defined(_UNICODE) @@ -82,14 +69,29 @@ char (&ArraySizeHelper(const T (&array)[N]))[N]; #define arraysize(array) (sizeof(ArraySizeHelper(array))) +#if defined(_FREEBSD) +#include +#endif +#if !defined(__APPLE__) && !defined(_WIN32) && !defined(_FREEBSD) +#include +#if BYTE_ORDER == __BIG_ENDIAN +#define IS_BIG_ENDIAN +#endif +#endif + namespace sentencepiece { #ifdef OS_WIN namespace win32 { -std::wstring Utf8ToWide(const std::string &input); -std::string WideToUtf8(const std::wstring &input); +std::wstring Utf8ToWide(const absl::string_view input); } // namespace win32 #endif +#ifdef IS_BIG_ENDIAN +namespace util { +inline uint32 Swap32(uint32 x) { return __builtin_bswap32(x); } +} // namespace util +#endif + namespace error { void Abort(); @@ -112,15 +114,6 @@ class Die { private: bool die_; }; - -template -T &&CheckNotNull(const char *file, int line, const char *exprtext, T &&t) { - if (t == nullptr) { - std::cerr << file << "(" << line << ") " << exprtext; - Abort(); - } - return std::forward(t); -} } // namespace error namespace logging { @@ -132,6 +125,9 @@ enum LogSeverity { LOG_SEVERITY_SIZE = 4, }; +int GetMinLogLevel(); +void SetMinLogLevel(int v); + inline const char *BaseName(const char *path) { #ifdef OS_WIN const char *p = strrchr(path, '\\'); @@ -144,10 +140,8 @@ inline const char *BaseName(const char *path) { } // namespace logging } // namespace sentencepiece -ABSL_DECLARE_FLAG(int32, minloglevel); - #define LOG(severity) \ - (absl::GetFlag(FLAGS_minloglevel) > \ + (::sentencepiece::logging::GetMinLogLevel() > \ ::sentencepiece::logging::LOG_##severity) \ ? 0 \ : ::sentencepiece::error::Die( \ @@ -171,10 +165,6 @@ ABSL_DECLARE_FLAG(int32, minloglevel); #define CHECK_LE(a, b) CHECK((a) <= (b)) #define CHECK_GT(a, b) CHECK((a) > (b)) #define CHECK_LT(a, b) CHECK((a) < (b)) -#define CHECK_NOTNULL(val) \ - ::sentencepiece::error::CheckNotNull( \ - ::sentencepiece::logging::BaseName(__FILE__), __LINE__, \ - "'" #val "' Must be non NULL", (val)) #define FRIEND_TEST(a, b) friend class a##_Test_##b; diff --git a/src/compile_charsmap_main.cc b/src/compile_charsmap_main.cc index 7386c59e..88f2d12d 100644 --- a/src/compile_charsmap_main.cc +++ b/src/compile_charsmap_main.cc @@ -156,6 +156,7 @@ struct BinaryBlob { } // namespace sentencepiece int main(int argc, char **argv) { + sentencepiece::ScopedResourceDestructor cleaner; sentencepiece::ParseCommandLineFlags(argv[0], &argc, &argv, true); const std::vector> data; for (const auto &p : kRuleList) { @@ -176,10 +178,14 @@ int main(int argc, char **argv) { // Write Header. std::string index; CHECK_OK(Builder::CompileCharsMap(normalized_map, &index)); - data.emplace_back(p.first, index); // Write TSV file. CHECK_OK(Builder::SaveCharsMap(p.first + ".tsv", normalized_map)); + + // Do not make NFKD map as it is optionally created. + if (p.first.find("nfkd") != std::string::npos) continue; + + data.emplace_back(p.first, index); } if (absl::GetFlag(FLAGS_output_precompiled_header)) { diff --git a/src/error.cc b/src/error.cc index f2ea7ed1..d3792dc3 100644 --- a/src/error.cc +++ b/src/error.cc @@ -13,9 +13,20 @@ // limitations under the License.! #include + #include "common.h" +#include "init.h" #include "sentencepiece_processor.h" +#ifdef _USE_EXTERNAL_ABSL +// Naive workaround to define minloglevel on external absl package. +// We want to define them in other cc file. +#include "third_party/absl/flags/flag.h" +#include "third_party/absl/flags/parse.h" +ABSL_FLAG(int32, minloglevel, 0, + "Messages logged at a lower level than this don't actually."); +#endif + namespace sentencepiece { namespace error { int gTestCounter = 0; @@ -25,6 +36,7 @@ void Abort() { SetTestCounter(2); } else { std::cerr << "Program terminated with an unrecoverable error." << std::endl; + ShutdownLibrary(); exit(-1); } } @@ -33,6 +45,7 @@ void Exit(int code) { if (GetTestCounter() == 1) { SetTestCounter(2); } else { + ShutdownLibrary(); exit(code); } } @@ -51,15 +64,10 @@ struct Status::Rep { std::string error_message; }; -Status::Status(StatusCode code, const char* error_message) : rep_(new Rep) { - rep_->code = code; - rep_->error_message = error_message; -} - -Status::Status(StatusCode code, const std::string& error_message) +Status::Status(StatusCode code, absl::string_view error_message) : rep_(new Rep) { rep_->code = code; - rep_->error_message = error_message; + rep_->error_message = std::string(error_message); } Status::Status(const Status& s) diff --git a/src/freelist.h b/src/freelist.h index f4461f3b..8038048a 100644 --- a/src/freelist.h +++ b/src/freelist.h @@ -46,6 +46,13 @@ class FreeList { // Returns the number of allocated elements. size_t size() const { return chunk_size_ * chunk_index_ + element_index_; } + void swap(FreeList& other) { + std::swap(freelist_, other.freelist_); + std::swap(element_index_, other.element_index_); + std::swap(chunk_index_, other.chunk_index_); + std::swap(chunk_size_, other.chunk_size_); + } + // Returns the element as an array. T* operator[](size_t index) const { return freelist_[index / chunk_size_] + index % chunk_size_; @@ -76,7 +83,7 @@ class FreeList { // The last element is stored at freelist_[chunk_index_][element_index_] size_t element_index_ = 0; size_t chunk_index_ = 0; - const size_t chunk_size_ = 0; + size_t chunk_size_ = 0; // Do not modify except in swap() }; } // namespace model } // namespace sentencepiece diff --git a/src/freelist_test.cc b/src/freelist_test.cc index 9eb41a04..4c6c99ed 100644 --- a/src/freelist_test.cc +++ b/src/freelist_test.cc @@ -30,17 +30,20 @@ TEST(FreeListTest, BasicTest) { *n = i; } - EXPECT_EQ(kSize, l.size()); + FreeList l2(3); // Test swap() + l.swap(l2); + + EXPECT_EQ(kSize, l2.size()); for (size_t i = 0; i < kSize; ++i) { - EXPECT_EQ(i, *l[i]); + EXPECT_EQ(i, *l2[i]); } - l.Free(); - EXPECT_EQ(0, l.size()); + l2.Free(); + EXPECT_EQ(0, l2.size()); // Zero-initialized after `Free`. for (size_t i = 0; i < kSize; ++i) { - int *n = l.Allocate(); + int *n = l2.Allocate(); EXPECT_EQ(0, *n); } } diff --git a/src/init.h b/src/init.h index a569c226..6ae047e8 100644 --- a/src/init.h +++ b/src/init.h @@ -15,9 +15,46 @@ #ifndef INIT_H_ #define INIT_H_ +#include "common.h" +#include "third_party/absl/flags/flag.h" +#include "third_party/absl/flags/parse.h" + +#ifdef _USE_EXTERNAL_PROTOBUF +#include "google/protobuf/message_lite.h" +#else +#include "third_party/protobuf-lite/google/protobuf/message_lite.h" +#endif + +ABSL_DECLARE_FLAG(int32, minloglevel); + namespace sentencepiece { -void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, - bool remvoe_flags = true); +inline void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, + bool remove_arg = true) { + const auto unused_args = absl::ParseCommandLine(*argc, *argv); + + if (remove_arg) { + char **argv_val = *argv; + *argv = argv_val = argv_val + *argc - unused_args.size(); + std::copy(unused_args.begin(), unused_args.end(), argv_val); + *argc = static_cast(unused_args.size()); + } + + logging::SetMinLogLevel(absl::GetFlag(FLAGS_minloglevel)); +} + +inline void ShutdownLibrary() { + google::protobuf::ShutdownProtobufLibrary(); +#ifdef HAS_ABSL_CLEANUP_FLAGS + absl::CleanupFlags(); +#endif +} + +class ScopedResourceDestructor { + public: + ScopedResourceDestructor() {} + ~ScopedResourceDestructor() { ShutdownLibrary(); } +}; + } // namespace sentencepiece #endif // INIT_H_ diff --git a/src/init_test.cc b/src/init_test.cc index 9007bec7..e5cd2e40 100644 --- a/src/init_test.cc +++ b/src/init_test.cc @@ -19,8 +19,8 @@ ABSL_FLAG(int32, int32_f, 10, "int32_flags"); ABSL_FLAG(bool, bool_f, false, "bool_flags"); -ABSL_FLAG(int64, int64_f, 20, "int64_flags"); -ABSL_FLAG(uint64, uint64_f, 30, "uint64_flags"); +ABSL_FLAG(int64, int64_f, 9223372036854775807LL, "int64_flags"); +ABSL_FLAG(uint64, uint64_f, 18446744073709551615ULL, "uint64_flags"); ABSL_FLAG(double, double_f, 40.0, "double_flags"); ABSL_FLAG(std::string, string_f, "str", "string_flags"); @@ -33,8 +33,8 @@ namespace absl { TEST(FlagsTest, DefaultValueTest) { EXPECT_EQ(10, absl::GetFlag(FLAGS_int32_f)); EXPECT_EQ(false, absl::GetFlag(FLAGS_bool_f)); - EXPECT_EQ(20, absl::GetFlag(FLAGS_int64_f)); - EXPECT_EQ(30, absl::GetFlag(FLAGS_uint64_f)); + EXPECT_EQ(9223372036854775807LL, absl::GetFlag(FLAGS_int64_f)); + EXPECT_EQ(18446744073709551615ULL, absl::GetFlag(FLAGS_uint64_f)); EXPECT_EQ(40.0, absl::GetFlag(FLAGS_double_f)); EXPECT_EQ("str", absl::GetFlag(FLAGS_string_f)); } @@ -87,6 +87,8 @@ TEST(FlagsTest, ParseCommandLineFlagsTest3) { EXPECT_EQ(1, argc); } +#ifndef _USE_EXTERNAL_ABSL + TEST(FlagsTest, ParseCommandLineFlagsHelpTest) { const char *kFlags[] = {"program", "--help"}; int argc = arraysize(kFlags); @@ -141,4 +143,5 @@ TEST(FlagsTest, ParseCommandLineFlagsEmptyIntArgs) { char **argv = const_cast(kFlags); EXPECT_DEATH(ParseCommandLineFlags(kFlags[0], &argc, &argv), ); } +#endif // _USE_EXTERNAL_ABSL } // namespace absl diff --git a/src/model_interface.cc b/src/model_interface.cc index ea5d0e75..c49be1eb 100644 --- a/src/model_interface.cc +++ b/src/model_interface.cc @@ -134,32 +134,53 @@ void ModelInterface::InitializePieces() { } std::vector SplitIntoWords(absl::string_view text, - bool treat_whitespace_as_suffix) { + bool treat_ws_as_suffix, + bool allow_ws_only_pieces) { const char *begin = text.data(); const char *end = text.data() + text.size(); // Space symbol (U+2581) const absl::string_view kSpaceSymbol = "\xe2\x96\x81"; + bool in_ws_sequence = false; std::vector result; - if (treat_whitespace_as_suffix) { + if (treat_ws_as_suffix) { // put ws tokens at the end of non-ws sequences. if (begin < end) result.emplace_back(begin, 0); while (begin < end) { const int mblen = std::min(string_util::OneCharLen(begin), end - begin); const bool is_ws = absl::string_view(begin, mblen) == kSpaceSymbol; + + if (is_ws) { // keep track of sequences consecutive ws tokens. + in_ws_sequence = true; + } else if (in_ws_sequence) { + if (allow_ws_only_pieces) result.emplace_back(begin, 0); + + in_ws_sequence = false; + } + result.back() = absl::string_view(result.back().data(), result.back().size() + mblen); begin += mblen; - if (begin < end && is_ws) result.emplace_back(begin, 0); + + if (begin < end && is_ws && !allow_ws_only_pieces) + result.emplace_back(begin, 0); } } else { while (begin < end) { const int mblen = std::min(string_util::OneCharLen(begin), end - begin); + bool is_ws = absl::string_view(begin, mblen) == kSpaceSymbol; + + // if is whitespace (and not in sequence if allow_ws_only_pieces is True) if (begin == text.data() || - absl::string_view(begin, mblen) == kSpaceSymbol) + (is_ws && (!in_ws_sequence || !allow_ws_only_pieces))) { result.emplace_back(begin, 0); // add empty string piece. + in_ws_sequence = true; + } + + if (in_ws_sequence && !is_ws) in_ws_sequence = false; + result.back() = absl::string_view(result.back().data(), result.back().size() + mblen); begin += mblen; diff --git a/src/model_interface.h b/src/model_interface.h index 75cbb233..06e92430 100644 --- a/src/model_interface.h +++ b/src/model_interface.h @@ -33,8 +33,9 @@ namespace sentencepiece { // "_this_is_a_pen" => ["_this", "_is", "_a", "_pen"] -std::vector SplitIntoWords(absl::string_view text, - bool add_ws_as_suffix = false); +std::vector SplitIntoWords( + absl::string_view text, bool treat_ws_as_suffix = false, + bool allow_ws_only_pieces = false); // Converts byte (0-255) to piece (e.g., 58 -> "<0x3A>"). std::string ByteToPiece(unsigned char c); @@ -52,8 +53,8 @@ class ModelProto; // Given a normalized string, returns a sequence of sentence pieces with ids. class ModelInterface { public: - using PieceToIdMap = absl::flat_hash_map; + using PieceToIdMap = absl::flat_hash_map; + // string_util::string_view_hash>; absl::string_view unk_piece() const; absl::string_view bos_piece() const; @@ -76,19 +77,6 @@ class ModelInterface { return matcher_.get(); } - // Sets the encoder version. Currently only unigram has an optimized encoder. - // The optimized version is always used by default if there is one, so - // normally users do not need to call this function. This function is provided - // just in case that a user want to manually choose which encoder version to - // use. - virtual util::Status SetEncoderVersion(EncoderVersion encoder_version) { - encoder_version_ = encoder_version; - return util::OkStatus(); - } - - // Returns the current encoder version in use. - virtual EncoderVersion GetEncoderVersion() const { return encoder_version_; } - // Given a normalized string, returns a sequence of sentence pieces with ids. // The concatenation of pieces must be the same as `normalized`. virtual EncodeResult Encode(absl::string_view normalized) const = 0; @@ -106,12 +94,41 @@ class ModelInterface { return EncodeResult(); } + // Sample `samples` many tokenisations from the segmentation lattice + // If `wor` is true, the samples are taken without replacement, and the scores + // are the inclusion probabilities of the elements in the sample; otherwise + // the samples are taken with replacement and the scores are the log-probs of + // sample elements + // If `include_best` is true, the best tokenisation is always included in the + // sample, and the remaining elements are sampled excluding the best. + virtual NBestEncodeResult SampleEncodeAndScore(absl::string_view normalized, + float alpha, int samples, + bool wor, + bool include_best) const { + LOG(ERROR) << "Not implemented."; + return {{EncodeResult(), 0.0}}; + } + + // Calculates the entropy of the segmentation lattice with inverse temperature + // `alpha`. Uses a novel dynamic program to calculate the entropy. + virtual float CalculateEntropy(absl::string_view normalized, + float alpha) const { + LOG(ERROR) << "Not implemented."; + return 0.0; + } + // Return true if SampleEncode returns a valid result. virtual bool IsSampleEncodeAvailable() const { return false; } // Return true if NBestEncode returns a valid result. virtual bool IsNBestEncodeAvailable() const { return false; } + // Return true if SampleEncodeAndScore returns a valid result. + virtual bool IsSampleEncodeAndScoreAvailable() const { return false; } + + // Return true if CalculateEntropy returns a valid result. + virtual bool IsCalculateEntropyAvailable() const { return false; } + // Returns the vocab id of `piece`. // Returns UNK(0) if `piece` is unknown virtual int PieceToId(absl::string_view piece) const; @@ -124,7 +141,10 @@ class ModelInterface { // Returns the size of sentence pieces, which is the same // as the size of vocabulary for NMT. - virtual int GetPieceSize() const { return model_proto_->pieces_size(); } + virtual int GetPieceSize() const { + if (!model_proto_) return 0; + return model_proto_->pieces_size(); + } // Returns the score of `id`. // Score represents a log probability of the piece. @@ -222,10 +242,6 @@ class ModelInterface { // unknown id. int unk_id_ = 0; - // The encoder version. Currently it is only effective for unigram model but - // ignored by other models. - EncoderVersion encoder_version_ = EncoderVersion::kOptimized; - // status. util::Status status_; }; diff --git a/src/model_interface_test.cc b/src/model_interface_test.cc index f5ee492b..09e41d34 100644 --- a/src/model_interface_test.cc +++ b/src/model_interface_test.cc @@ -12,8 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License.! -#include "model_factory.h" #include "model_interface.h" + +#include "model_factory.h" #include "testharness.h" #include "third_party/absl/container/flat_hash_map.h" #include "util.h" @@ -412,6 +413,50 @@ TEST(ModelInterfaceTest, SplitIntoWordsSuffixTest) { } } +TEST(ModelInterfaceTest, SplitIntoWordsWhiteSpaceOnly) { + { + const auto v = + SplitIntoWords("this" WS "is" WS "a" WS "pen" WS, true, true); + EXPECT_EQ(4, v.size()); + EXPECT_EQ("this" WS, v[0]); + EXPECT_EQ("is" WS, v[1]); + EXPECT_EQ("a" WS, v[2]); + EXPECT_EQ("pen" WS, v[3]); + } + + { + const auto v = SplitIntoWords(WS WS WS "a", false, true); + EXPECT_EQ(1, v.size()); + EXPECT_EQ(WS WS WS "a", v[0]); + } + + { + const auto v = SplitIntoWords("a" WS WS WS, true, true); + EXPECT_EQ(1, v.size()); + EXPECT_EQ("a" WS WS WS, v[0]); + } + + { + const auto v = SplitIntoWords(WS WS, true, true); + EXPECT_EQ(1, v.size()); + EXPECT_EQ(WS WS, v[0]); + } + + { + const auto v = SplitIntoWords(WS WS "a" WS, true, true); + EXPECT_EQ(2, v.size()); + EXPECT_EQ(WS WS, v[0]); + EXPECT_EQ("a" WS, v[1]); + } + + { + const auto v = SplitIntoWords(WS WS "a" WS, false, true); + EXPECT_EQ(2, v.size()); + EXPECT_EQ(WS WS "a", v[0]); + EXPECT_EQ(WS, v[1]); + } +} + TEST(ModelInterfaceTest, ByteToPieceTest) { EXPECT_EQ(ByteToPiece(0), "<0x00>"); EXPECT_EQ(ByteToPiece(1), "<0x01>"); @@ -437,22 +482,6 @@ TEST(ModelInterfaceTest, PieceToByteTest) { EXPECT_EQ(PieceToByte("a"), -1); } -TEST(ModelInterfaceTest, SetEncoderVersion) { - for (const auto type : kModelTypes) { - ModelProto model_proto = MakeBaseModelProto(type); - AddPiece(&model_proto, "a"); - AddPiece(&model_proto, "b"); - auto model = ModelFactory::Create(model_proto); - - // Verify the default encoder version. - EXPECT_EQ(EncoderVersion::kOptimized, model->GetEncoderVersion()); - - // Set the encoder version to original and verify. - EXPECT_TRUE(model->SetEncoderVersion(EncoderVersion::kOriginal).ok()); - EXPECT_EQ(EncoderVersion::kOriginal, model->GetEncoderVersion()); - } -} - TEST(ModelInterfaceTest, VerifyOutputsEquivalent) { for (const auto type : kModelTypes) { ModelProto model_proto = MakeBaseModelProto(type); diff --git a/src/normalizer.cc b/src/normalizer.cc index 6d697e79..e9f68c69 100644 --- a/src/normalizer.cc +++ b/src/normalizer.cc @@ -48,9 +48,7 @@ Normalizer::~Normalizer() {} void Normalizer::Init() { absl::string_view index = spec_->precompiled_charsmap(); - if (index.empty()) { - LOG(INFO) << "precompiled_charsmap is empty. use identity normalization."; - } else { + if (!index.empty()) { absl::string_view trie_blob, normalized; #ifdef IS_BIG_ENDIAN status_ = DecodePrecompiledCharsMap(index, &trie_blob, &normalized, @@ -279,14 +277,14 @@ std::string Normalizer::EncodePrecompiledCharsMap( std::string blob; blob.append(string_util::EncodePOD(trie_blob.size())); blob.append(trie_blob.data(), trie_blob.size()); - blob.append(normalized.data(), normalized.size()); #ifdef IS_BIG_ENDIAN uint32 *data = reinterpret_cast(const_cast(blob.data())); - for (int i = 0; i <= trie_blob.size() / 4; ++i) - data[i] = util::Swap32(data[i]); + for (int i = 0; i < blob.size() / 4; ++i) data[i] = util::Swap32(data[i]); #endif + blob.append(normalized.data(), normalized.size()); + return blob; } @@ -295,7 +293,6 @@ util::Status Normalizer::DecodePrecompiledCharsMap( absl::string_view blob, absl::string_view *trie_blob, absl::string_view *normalized, std::string *buffer) { uint32 trie_blob_size = 0; - if (blob.size() <= sizeof(trie_blob_size) || !string_util::DecodePOD( absl::string_view(blob.data(), sizeof(trie_blob_size)), @@ -307,15 +304,17 @@ util::Status Normalizer::DecodePrecompiledCharsMap( trie_blob_size = util::Swap32(trie_blob_size); #endif - if (trie_blob_size >= blob.size()) + if (trie_blob_size >= blob.size()) { return util::InternalError("Trie data size exceeds the input blob size."); + } blob.remove_prefix(sizeof(trie_blob_size)); #ifdef IS_BIG_ENDIAN + CHECK_OR_RETURN(buffer); buffer->assign(blob.data(), trie_blob_size); uint32 *data = reinterpret_cast(const_cast(buffer->data())); - for (int i = 0; i < trie_blob_size / 4; ++i) data[i] = util::Swap32(data[i]); + for (int i = 0; i < buffer->size() / 4; ++i) data[i] = util::Swap32(data[i]); *trie_blob = absl::string_view(buffer->data(), trie_blob_size); #else *trie_blob = absl::string_view(blob.data(), trie_blob_size); diff --git a/src/normalizer.h b/src/normalizer.h index 34d9d09e..467aea75 100644 --- a/src/normalizer.h +++ b/src/normalizer.h @@ -22,7 +22,6 @@ #include #include "common.h" -#include "util.h" #include "sentencepiece_model.pb.h" #include "sentencepiece_processor.h" #include "third_party/absl/strings/string_view.h" diff --git a/src/normalizer_test.cc b/src/normalizer_test.cc index 585e8f44..6c402bf3 100644 --- a/src/normalizer_test.cc +++ b/src/normalizer_test.cc @@ -12,11 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License.! -#include "normalizer.h" - #include #include "builder.h" +#include "normalizer.h" #include "sentencepiece_trainer.h" #include "testharness.h" #include "util.h" diff --git a/src/pretokenizer_for_training.cc b/src/pretokenizer_for_training.cc index 049658eb..d4f492c5 100644 --- a/src/pretokenizer_for_training.cc +++ b/src/pretokenizer_for_training.cc @@ -11,9 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License.! +#include "pretokenizer_for_training.h" + #include -#include "pretokenizer_for_training.h" #include "third_party/absl/strings/str_replace.h" namespace sentencepiece { @@ -24,10 +25,9 @@ namespace { // defined them explicitly to avoid the dependency to trainier_interface. // Currently, we have no separated build rules. const char kWSStr[] = "\xe2\x96\x81"; -const char kUPPBoundaryStr[] = "\t"; } // namespace -std::string PretokenizerForTrainingInterface::PreTokenize( +std::vector PretokenizerForTrainingInterface::PreTokenize( absl::string_view text) const { return Postprocess(Tokenize(Preprocess(text))); } @@ -40,14 +40,17 @@ std::string PretokenizerForTrainingInterface::Preprocess( } // static -std::string PretokenizerForTrainingInterface::Postprocess( +std::vector PretokenizerForTrainingInterface::Postprocess( const SentencePieceText &spt) { // Inserts kUPPBoundaryStr before/after of token boundaries. + std::vector result; std::string output; + int prev = 0; for (const auto &piece : spt.pieces()) { if (prev == piece.begin() && piece.begin() != 0) { - output += kUPPBoundaryStr; + result.push_back(output); + output.clear(); } else { output.append(piece.begin() - prev, ' '); } @@ -55,8 +58,11 @@ std::string PretokenizerForTrainingInterface::Postprocess( prev = piece.end(); } - // Restores kWSStr. - return absl::StrReplaceAll(output, {{" ", kWSStr}}); + if (!output.empty()) result.push_back(output); + + for (auto &w : result) w = absl::StrReplaceAll(w, {{" ", kWSStr}}); + + return result; } } // namespace pretokenizer diff --git a/src/pretokenizer_for_training.h b/src/pretokenizer_for_training.h index 2d3bc827..fa54f95c 100644 --- a/src/pretokenizer_for_training.h +++ b/src/pretokenizer_for_training.h @@ -44,7 +44,7 @@ class PretokenizerForTrainingInterface { // segmentation: piece[0] = {0, 1}, piece[1] = {2, 6}, // piece[2] = {7, 15}, piece[3] = {15, 20} // output: I love sentencepiece. - std::string PreTokenize(absl::string_view text) const; + std::vector PreTokenize(absl::string_view text) const; // Returns pre-tokenized result. // Note that the pre-tokenized constraint is specified with the @@ -54,7 +54,7 @@ class PretokenizerForTrainingInterface { private: static std::string Preprocess(absl::string_view text); - static std::string Postprocess(const SentencePieceText &spt); + static std::vector Postprocess(const SentencePieceText &spt); }; } // namespace pretokenizer diff --git a/src/pretokenizer_for_training_test.cc b/src/pretokenizer_for_training_test.cc index 80f4787c..99db0c51 100644 --- a/src/pretokenizer_for_training_test.cc +++ b/src/pretokenizer_for_training_test.cc @@ -12,8 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License.! #include "pretokenizer_for_training.h" + #include "testharness.h" #include "third_party/absl/strings/str_cat.h" +#include "third_party/absl/strings/str_join.h" +#include "third_party/absl/strings/str_split.h" #include "trainer_interface.h" namespace sentencepiece { @@ -64,9 +67,11 @@ TEST(PretokenizerForTrainingTest, BaseTest) { mock.SetOutput(spt); - EXPECT_EQ(absl::StrCat("I", TrainerInterface::kWSStr, "love", - TrainerInterface::kWSStr, "sentence\tpiece"), - mock.PreTokenize("I love sentencepiece")); + const auto expected = + absl::StrCat("I", TrainerInterface::kWSStr, "love", + TrainerInterface::kWSStr, "sentence||||piece"); + EXPECT_EQ(expected, + absl::StrJoin(mock.PreTokenize("I love sentencepiece"), "||||")); } { @@ -94,7 +99,9 @@ TEST(PretokenizerForTrainingTest, BaseTest) { mock.SetOutput(spt); - EXPECT_EQ("これ\tは\tペン\tです", mock.PreTokenize("これはペンです")); + const auto expected = "これ||||は||||ペン||||です"; + EXPECT_EQ(expected, + absl::StrJoin(mock.PreTokenize("これはペンです"), "||||")); } } diff --git a/src/sentencepiece_model.proto b/src/sentencepiece_model.proto index 8575f3f2..31d6eea7 100644 --- a/src/sentencepiece_model.proto +++ b/src/sentencepiece_model.proto @@ -20,6 +20,7 @@ option optimize_for = LITE_RUNTIME; package sentencepiece; // TrainerSpec encodes a various parameters for SentencePiece training. +// Next id: 54 message TrainerSpec { /////////////////////////////////////////////////////////////////// // General parameters @@ -62,6 +63,16 @@ message TrainerSpec { // Size of self-test samples, which are encoded in the model file. optional int32 self_test_sample_size = 6 [default = 0]; + // Whether to use DP version of sentencepiece. Use it with TSV input format + // (requires precomputed word tab counts to work). + optional bool enable_differential_privacy = 50 [default = false]; + // Set these parameters if you need DP version of sentencepiece. + // std of noise to add. + optional float differential_privacy_noise_level = 51 [default = 0.0]; + // Clipping threshold to apply after adding noise. All the words with + // frequency less than this value are dropped. + optional uint64 differential_privacy_clipping_threshold = 52 [default = 0]; + /////////////////////////////////////////////////////////////////// // Training parameters. // @@ -74,7 +85,7 @@ message TrainerSpec { // Maximum size of sentences the trainer loads from `input` parameter. // Trainer simply loads the `input` files in sequence. // It is better to shuffle the input corpus randomly. - optional int32 input_sentence_size = 11 [default = 0]; + optional uint64 input_sentence_size = 11 [default = 0]; optional bool shuffle_input_sentence = 19 [default = true]; // Maximum size of sentences to make seed sentence pieces. @@ -139,9 +150,20 @@ message TrainerSpec { // of sentence. optional bool treat_whitespace_as_suffix = 24 [default = false]; + // Allows pieces that only contain whitespaces instead of appearing only as + // prefix or suffix of other pieces. + optional bool allow_whitespace_only_pieces = 26 [default = false]; + // Split all digits (0-9) into separate pieces. optional bool split_digits = 25 [default = false]; + // Defines the pre-tokenization delimiter. + // When specified, no pieces crossing this delimiter is not included + // in the vocab. Then the delimiter string is virtually ignored + // during the training. This field can allows constraints on the vocabulary + // selection. Note that this field is available on unigram mode. + optional string pretokenization_delimiter = 53 [ default = ""]; + /////////////////////////////////////////////////////////////////// // Vocabulary management // diff --git a/src/sentencepiece_processor.cc b/src/sentencepiece_processor.cc index 91df9f31..c71f4c31 100644 --- a/src/sentencepiece_processor.cc +++ b/src/sentencepiece_processor.cc @@ -45,8 +45,168 @@ const char kSpaceSymbol[] = "\xe2\x96\x81"; // since this character can be useful both for user and // developer. We can easily figure out that is emitted. const char kDefaultUnknownSymbol[] = " \xE2\x81\x87 "; + +// REPLACEMENT CHARACTER (U+FFFD) in UTF-8. +const char kReplacementCharacter[] = "\xef\xbf\xbd"; + +std::vector ToPieceArray(const std::vector &v) { + std::vector out(v.size()); + for (int i = 0; i < v.size(); ++i) out[i] = v[i]; + return out; +} + +void ConvertToUnicodeSpansInternal(SentencePieceText *spt) { + if (spt == nullptr || spt->text().empty()) return; + + std::vector utf8_to_unicode(spt->text().size() + 1, 0); + absl::string_view str = spt->text(); + size_t prev = 0; + int ulen = 0; + while (!str.empty()) { + const size_t mblen = std::max(1, string_util::OneCharLen(str.data())); + for (int i = prev; i < prev + mblen; ++i) { + utf8_to_unicode[i] = ulen; + } + ++ulen; + prev += mblen; + str.remove_prefix(mblen); + } + utf8_to_unicode[prev] = ulen; + + auto clip = [&](int s) { + return std::min(std::max(0, s), utf8_to_unicode.size() - 1); + }; + + for (auto &piece : *(spt->mutable_pieces())) { + piece.set_begin(utf8_to_unicode[clip(piece.begin())]); + piece.set_end(utf8_to_unicode[clip(piece.end())]); + } +} + } // namespace +ImmutableSentencePieceText::ImmutableSentencePieceText() + : spt_(&SentencePieceText::default_instance()) {} + +ImmutableSentencePieceText::ImmutableSentencePieceText( + const SentencePieceText &spt) + : spt_(&spt) {} + +ImmutableSentencePieceText::~ImmutableSentencePieceText() {} + +ImmutableSentencePieceText_ImmutableSentencePiece:: + ImmutableSentencePieceText_ImmutableSentencePiece() + : sp_(&SentencePieceText_SentencePiece::default_instance()) {} + +ImmutableSentencePieceText_ImmutableSentencePiece:: + ImmutableSentencePieceText_ImmutableSentencePiece( + const SentencePieceText_SentencePiece &sp) + : sp_(&sp) {} + +const std::string &ImmutableSentencePieceText_ImmutableSentencePiece::piece() + const { + return sp_->piece(); +} + +const std::string &ImmutableSentencePieceText_ImmutableSentencePiece::surface() + const { + return sp_->surface(); +} + +uint32_t ImmutableSentencePieceText_ImmutableSentencePiece::id() const { + return sp_->id(); +} + +uint32_t ImmutableSentencePieceText_ImmutableSentencePiece::begin() const { + return sp_->begin(); +} + +uint32_t ImmutableSentencePieceText_ImmutableSentencePiece::end() const { + return sp_->end(); +} + +std::vector +ImmutableSentencePieceText::pieces() const { + std::vector pieces( + spt_->pieces_size()); + for (int i = 0; i < spt_->pieces_size(); ++i) + pieces[i] = + ImmutableSentencePieceText_ImmutableSentencePiece(spt_->pieces(i)); + return pieces; +} + +size_t ImmutableSentencePieceText::pieces_size() const { + return spt_->pieces_size(); +} + +ImmutableSentencePieceText_ImmutableSentencePiece +ImmutableSentencePieceText::pieces(int index) const { + return ImmutableSentencePieceText_ImmutableSentencePiece(spt_->pieces(index)); +} + +const std::string &ImmutableSentencePieceText::text() const { + return spt_->text(); +} + +float ImmutableSentencePieceText::score() const { + return spt_ ? spt_->score() : 0.0; +} + +SentencePieceText *ImmutableSentencePieceText::mutable_proto() { + if (rep_ == nullptr) { + rep_ = std::make_shared(); + spt_ = rep_.get(); + } + return rep_.get(); +} + +void ImmutableSentencePieceText::ConvertToUnicodeSpans() { + ConvertToUnicodeSpansInternal(mutable_proto()); +} + +util::bytes ImmutableSentencePieceText::SerializeAsString() const { + return spt_->SerializeAsString(); +} + +ImmutableNBestSentencePieceText::ImmutableNBestSentencePieceText() {} +ImmutableNBestSentencePieceText::~ImmutableNBestSentencePieceText() {} + +size_t ImmutableNBestSentencePieceText::nbests_size() const { + return rep_ ? rep_->nbests_size() : 0; +} + +ImmutableSentencePieceText ImmutableNBestSentencePieceText::nbests( + int index) const { + return ImmutableSentencePieceText(rep_->nbests(index)); +} + +std::vector +ImmutableNBestSentencePieceText::nbests() const { + if (rep_ == nullptr) return {}; + std::vector nbests(rep_->nbests_size()); + for (int i = 0; i < rep_->nbests_size(); ++i) + nbests[i] = ImmutableSentencePieceText(rep_->nbests(i)); + return nbests; +} + +NBestSentencePieceText *ImmutableNBestSentencePieceText::mutable_proto() { + if (rep_ == nullptr) { + rep_ = std::make_shared(); + } + return rep_.get(); +} + +void ImmutableNBestSentencePieceText::ConvertToUnicodeSpans() { + if (!mutable_proto()) return; + for (auto &spt : *(mutable_proto()->mutable_nbests())) { + ConvertToUnicodeSpansInternal(&spt); + } +} + +util::bytes ImmutableNBestSentencePieceText::SerializeAsString() const { + return rep_ ? rep_->SerializeAsString() : ""; +} + SentencePieceProcessor::SentencePieceProcessor() {} SentencePieceProcessor::~SentencePieceProcessor() {} @@ -78,7 +238,6 @@ util::Status SentencePieceProcessor::Load( std::unique_ptr model_proto) { model_proto_ = std::move(model_proto); model_ = ModelFactory::Create(*model_proto_); - normalizer_ = absl::make_unique( model_proto_->normalizer_spec(), model_proto_->trainer_spec()); @@ -117,15 +276,6 @@ util::Status SentencePieceProcessor::Load( return util::OkStatus(); } -util::Status SentencePieceProcessor::SetEncoderVersion( - EncoderVersion encoder_version) { - return model_->SetEncoderVersion(encoder_version); -} - -EncoderVersion SentencePieceProcessor::GetEncoderVersion() const { - return model_->GetEncoderVersion(); -} - util::Status SentencePieceProcessor::SetEncodeExtraOptions( absl::string_view extra_options) { return ParseExtraOptions(extra_options, &encode_extra_options_); @@ -145,7 +295,7 @@ util::Status SentencePieceProcessor::status() const { } util::Status SentencePieceProcessor::SetVocabulary( - const std::vector &valid_vocab) { + const std::vector &valid_vocab) { RETURN_IF_ERROR(status()); // TODO(taku): supports vocabulary constraint in BPE model. @@ -153,7 +303,8 @@ util::Status SentencePieceProcessor::SetVocabulary( CHECK_OR_RETURN(type == TrainerSpec::UNIGRAM || type == TrainerSpec::BPE) << "Vocabulary constraint is only enabled in subword units."; - const std::set vocab(valid_vocab.begin(), valid_vocab.end()); + const std::set vocab(valid_vocab.begin(), + valid_vocab.end()); for (int i = 0; i < model_proto_->pieces_size(); ++i) { auto *piece = model_proto_->mutable_pieces(i); @@ -206,7 +357,7 @@ util::Status SentencePieceProcessor::LoadVocabulary(absl::string_view filename, } } - return SetVocabulary(vocab); + return SetVocabulary(ToPieceArray(vocab)); } #define CHECK_OR_RETURN_STATUS_STL(container) \ @@ -249,6 +400,12 @@ util::Status SentencePieceProcessor::Encode(absl::string_view input, util::Status SentencePieceProcessor::Decode( const std::vector &pieces, std::string *detokenized) const { + return Decode(ToPieceArray(pieces), detokenized); +} + +util::Status SentencePieceProcessor::Decode( + const std::vector &pieces, + std::string *detokenized) const { CHECK_OR_RETURN_STATUS_STL(detokenized); SentencePieceText spt; @@ -333,6 +490,56 @@ util::Status SentencePieceProcessor::SampleEncode(absl::string_view input, return util::OkStatus(); } +util::Status SentencePieceProcessor::SampleEncodeAndScore( + absl::string_view input, int num_samples, float alpha, bool wor, + bool include_best, + std::vector, float>> *pieces) const { + CHECK_OR_RETURN_STATUS_STL(pieces); + + NBestSentencePieceText spt; + RETURN_IF_ERROR( + SampleEncodeAndScore(input, num_samples, alpha, wor, include_best, &spt)); + + pieces->clear(); + pieces->reserve(spt.nbests_size()); + + for (const auto &nbest : spt.nbests()) { + std::vector result; + result.reserve(nbest.pieces_size()); + for (const auto &sp : nbest.pieces()) { + result.emplace_back(sp.piece()); + } + pieces->emplace_back(result, nbest.score()); + } + + return util::OkStatus(); +} + +util::Status SentencePieceProcessor::SampleEncodeAndScore( + absl::string_view input, int num_samples, float alpha, bool wor, + bool include_best, + std::vector, float>> *ids) const { + CHECK_OR_RETURN_STATUS_STL(ids); + + NBestSentencePieceText spt; + RETURN_IF_ERROR( + SampleEncodeAndScore(input, num_samples, alpha, wor, include_best, &spt)); + + ids->clear(); + ids->reserve(spt.nbests_size()); + + for (const auto &nbest : spt.nbests()) { + std::vector result; + result.reserve(nbest.pieces_size()); + for (const auto &sp : nbest.pieces()) { + result.emplace_back(sp.id()); + } + ids->emplace_back(result, nbest.score()); + } + + return util::OkStatus(); +} + util::Status SentencePieceProcessor::PopulateSentencePieceText( absl::string_view input, absl::string_view normalized, const std::vector &norm_to_orig, const EncodeResult &result, @@ -503,26 +710,75 @@ util::Status SentencePieceProcessor::SampleEncode( return util::OkStatus(); } +util::Status SentencePieceProcessor::SampleEncodeAndScore( + absl::string_view input, int samples, float alpha, bool wor, + bool include_best, NBestSentencePieceText *samples_spt) const { + CHECK_OR_RETURN(model_->IsSampleEncodeAndScoreAvailable()) + << "SampleEncodeAndScore is not available for the current model."; + std::string normalized; + std::vector norm_to_orig; + RETURN_IF_ERROR(normalizer_->Normalize(input, &normalized, &norm_to_orig)); + + const auto results = model_->SampleEncodeAndScore(normalized, alpha, samples, + wor, include_best); + CHECK_OR_RETURN(!results.empty()) + << "SampleEncodeAndScore returns empty result."; + + for (const auto &result : results) { + auto *spt = samples_spt->add_nbests(); + spt->set_score(result.second); + RETURN_IF_ERROR(PopulateSentencePieceText(input, normalized, norm_to_orig, + result.first, spt)); + } + + return util::OkStatus(); +} + +util::Status SentencePieceProcessor::CalculateEntropy(absl::string_view input, + float alpha, + float *entropy) const { + CHECK_OR_RETURN(model_->IsCalculateEntropyAvailable()) + << "CalculateEntropy is not available for the current model."; + std::string normalized; + std::vector norm_to_orig; + RETURN_IF_ERROR(normalizer_->Normalize(input, &normalized, &norm_to_orig)); + + *entropy = model_->CalculateEntropy(normalized, alpha); + return util::OkStatus(); +} + util::Status SentencePieceProcessor::Decode( const std::vector &pieces, SentencePieceText *spt) const { + return Decode(ToPieceArray(pieces), spt); +} + +util::Status SentencePieceProcessor::Decode( + const std::vector &pieces, + SentencePieceText *spt) const { CHECK_OR_RETURN_STATUS_PROTO(spt); const char *unk_surface = kDefaultUnknownSymbol; if (model_proto_ && model_proto_->trainer_spec().has_unk_surface()) unk_surface = model_proto_->trainer_spec().unk_surface().c_str(); - auto DecodeSentencePiece = [&](absl::string_view piece, int id, - bool is_bos_ws, bool is_eos_ws) -> std::string { - if (IsControl(id)) { // , - return ""; // invisible symbol. + // Returns decoded piece and a boolean indicating if the function has consumed + // a bos whitespace token (a piece starting with a kSpaceSymbol). This is used + // to strip only the first whitespace token from the decoded sequence for + // add_dummy_prefix. + auto DecodeSentencePiece = + [&](absl::string_view piece, int id, + bool is_bos_ws, bool is_eos_ws) -> std::pair { + if (IsControl(id)) { // , + return std::make_pair("", false); // invisible symbol. } else if (IsUnknown(id)) { if (IdToPiece(id) == piece) { // - return unk_surface; + return std::make_pair(unk_surface, false); } else { // return piece when piece is not . - return std::string(piece); + return std::make_pair(std::string(piece), false); } } + bool has_bos_ws = false; // whether the token starts with a kSpaceSymbol if(!model_proto_ || !model_proto_->has_trainer_spec() || !model_proto_->trainer_spec().treat_whitespace_as_suffix()) { if(is_bos_ws && @@ -532,8 +788,14 @@ util::Status SentencePieceProcessor::Decode( model_proto_->normalizer_spec().remove_extra_whitespaces())))) { // Consume if the current position is bos and // piece starts with kSpaceSymbol. - absl::ConsumePrefix(&piece, kSpaceSymbol); + has_bos_ws = absl::ConsumePrefix(&piece, kSpaceSymbol); + + if (model_proto_ && + model_proto_->normalizer_spec().remove_extra_whitespaces()) { + // if we are removing extra whitespace, we remove all leading whitespace + has_bos_ws = false; } + } } else { if(is_eos_ws && (!model_proto_ || @@ -547,69 +809,101 @@ util::Status SentencePieceProcessor::Decode( } } - return absl::StrReplaceAll(piece, {{kSpaceSymbol, " "}}); + return std::make_pair(absl::StrReplaceAll(piece, {{kSpaceSymbol, " "}}), + has_bos_ws); }; - for (const std::string &w : pieces) { + for (absl::string_view w : pieces) { auto *sp = spt->add_pieces(); - sp->set_piece(w); + sp->mutable_piece()->assign(w.data(), w.size()); sp->set_id(PieceToId(w)); } RETURN_IF_ERROR(ApplyExtraOptions(decode_extra_options_, spt)); std::string *text = spt->mutable_text(); - auto SetSurface = [&](int index, const std::string &surface) { + auto SetSurface = [&](int index, absl::string_view surface) { auto *sp = spt->mutable_pieces(index); - sp->set_surface(surface); + sp->set_surface(std::string(surface)); sp->set_begin(text->size()); sp->set_end(text->size() + surface.size()); - *text += surface; + absl::StrAppend(text, surface); }; - auto ProcessBytePieces = [&](int begin, int end) -> util::Status { - if (begin < end) { - // Constructs byte sequence. - std::string bytes; - for (int i = begin; i < end; ++i) { - const auto &sp = spt->pieces(i); - const int byte = PieceToByte(sp.piece()); - CHECK_LE_OR_RETURN(0, byte); - bytes.append(1, byte); - } - // Decodes byte sequence as UTF-8 and encodes the result into UTF-8 bytes - // again. - int i = begin; - for (const char32 uc : - string_util::UTF8ToUnicodeText(absl::string_view(bytes))) { - if (uc == kUnicodeError) { - // Invalid UTF-8 bytes are mapped to REPLACEMENT CHARACTER (U+FFFD). - SetSurface(i++, string_util::UnicodeCharToUTF8(kUnicodeError)); - } else { - const std::string utf8 = string_util::UnicodeCharToUTF8(uc); - for (int j = 0; j < utf8.size(); j++) { - // The last byte piece holds the surface of the original unknown - // character. The other byte pieces hold an empty string as - // surface. - if (j == utf8.size() - 1) { - SetSurface(i++, utf8); - } else { - SetSurface(i++, ""); - } + + auto ProcessBytePieces = [&](int token_index_begin, + int token_index_end) -> util::Status { + if (token_index_begin >= token_index_end) { + return util::OkStatus(); + } + + // Constructs byte sequence. + std::string bytes; + for (int i = token_index_begin; i < token_index_end; ++i) { + const auto &sp = spt->pieces(i); + const int byte = PieceToByte(sp.piece()); + CHECK_LE_OR_RETURN(0, byte); + bytes.append(1, byte); + } + + // Set surfaces of `bytes` for each Unicode character. + int offset = 0; + const int bytes_len = bytes.size(); + while (offset < bytes_len) { + // Consume `bytes` by one Unicode character. + size_t consumed; // Number of bytes consumed in this iteration. + const bool is_valid = string_util::IsValidDecodeUTF8( + absl::string_view(bytes).substr(offset), &consumed); + + // Set surfaces of the consumed byte pieces. + const int token_index = token_index_begin + offset; + + if (!is_valid) { + // The byte piece at `token_index` is structurally invalid. Map it to + // REPLACEMENT CHARACTER (U+FFFD). + CHECK_EQ_OR_RETURN(consumed, 1); + SetSurface(token_index, kReplacementCharacter); + } else { + const absl::string_view utf8 = + absl::string_view(bytes).substr(offset, consumed); + for (int j = 0; j < consumed; j++) { + // The last byte piece holds the surface of the original unknown + // character. The other byte pieces hold an empty string as + // surface. + if (j == consumed - 1) { + SetSurface(token_index + j, utf8); + } else { + SetSurface(token_index + j, ""); } } } - CHECK_EQ_OR_RETURN(i, end); + offset += consumed; } + CHECK_EQ_OR_RETURN(token_index_begin + offset, token_index_end); + return util::OkStatus(); }; + int byte_start = 0; + bool is_bos_ws = true; // whether we expect a bos ws token to consume. + bool bos_ws_seen = false; + std::string decoded; + for (int i = 0; i < spt->pieces_size(); ++i) { const auto &sp = spt->pieces(i); if (!IsByte(sp.id())) { RETURN_IF_ERROR(ProcessBytePieces(byte_start, i)); + + // if we have seen a bos_ws token or any non-empty token + if (bos_ws_seen || !text->empty()) is_bos_ws = false; + byte_start = i + 1; - bool is_eos_space = i == spt->pieces_size() - 1; - SetSurface(i, DecodeSentencePiece(sp.piece(), sp.id(), text->empty(), is_eos_space)); + bool is_eos_ws = i == spt->pieces_size() - 1; + + std::tie(decoded, bos_ws_seen) = + DecodeSentencePiece(sp.piece(), sp.id(), is_bos_ws, is_eos_ws); + + SetSurface(i, decoded); + } } RETURN_IF_ERROR(ProcessBytePieces(byte_start, spt->pieces_size())); @@ -670,48 +964,18 @@ util::Status SentencePieceProcessor::Decode( util::Status SentencePieceProcessor::Decode(const std::vector &ids, SentencePieceText *spt) const { std::vector pieces; + const int num_pieces = GetPieceSize(); pieces.reserve(ids.size()); for (const int id : ids) { + if (id < 0 || id >= num_pieces) { + return util::Status(util::StatusCode::kOutOfRange, + absl::StrCat("Invalid id: ", id)); + } pieces.emplace_back(IdToPiece(id)); } return Decode(pieces, spt); } -std::string SentencePieceProcessor::EncodeAsSerializedProto( - absl::string_view input) const { - SentencePieceText spt; - if (!Encode(input, &spt).ok()) return ""; - return spt.SerializeAsString(); -} - -std::string SentencePieceProcessor::SampleEncodeAsSerializedProto( - absl::string_view input, int nbest_size, float alpha) const { - SentencePieceText spt; - if (!SampleEncode(input, nbest_size, alpha, &spt).ok()) return ""; - return spt.SerializeAsString(); -} - -std::string SentencePieceProcessor::NBestEncodeAsSerializedProto( - absl::string_view input, int nbest_size) const { - NBestSentencePieceText spt; - if (!NBestEncode(input, nbest_size, &spt).ok()) return ""; - return spt.SerializeAsString(); -} - -std::string SentencePieceProcessor::DecodePiecesAsSerializedProto( - const std::vector &pieces) const { - SentencePieceText spt; - if (!Decode(pieces, &spt).ok()) return ""; - return spt.SerializeAsString(); -} - -std::string SentencePieceProcessor::DecodeIdsAsSerializedProto( - const std::vector &ids) const { - SentencePieceText spt; - if (!Decode(ids, &spt).ok()) return ""; - return spt.SerializeAsString(); -} - #define CHECK_STATUS_OR_RETURN_DEFAULT(value) \ if (!status().ok()) { \ LOG(ERROR) << status().message() << "\nReturns default value " << value; \ @@ -810,6 +1074,15 @@ util::Status SentencePieceProcessor::ApplyExtraOptions( piece->set_piece(model_->bos_piece().data(), model_->bos_piece().size()); } break; + case UNK_PIECE: { + for (int i = 0; i < spt->pieces_size(); ++i) { + auto *piece = spt->mutable_pieces(i); + if (IsUnknown(piece->id())) { + piece->set_piece(model_->unk_piece().data(), + model_->unk_piece().size()); + } + } + } break; default: return util::InternalError("unknown extra_option type."); } @@ -832,7 +1105,9 @@ util::Status SentencePieceProcessor::ParseExtraOptions( static std::map extra_option_map = {{"bos", SentencePieceProcessor::BOS}, {"eos", SentencePieceProcessor::EOS}, - {"reverse", SentencePieceProcessor::REVERSE}}; + {"reverse", SentencePieceProcessor::REVERSE}, + {"unk", SentencePieceProcessor::UNK_PIECE}, + {"unk_piece", SentencePieceProcessor::UNK_PIECE}}; for (const auto &s : absl::StrSplit(extra_option, ":")) { const auto it = extra_option_map.find(s); CHECK_OR_RETURN(it != extra_option_map.end()) @@ -870,8 +1145,13 @@ std::string SentencePieceProcessor::serialized_model_proto() const { return model_proto_ ? model_proto_->SerializeAsString() : ""; } -namespace io { +// Set seed value of random generator. +// Do not set static_cast(-1), +// as this seed is reserved for initializing from +// std::random_device. +void SetRandomGeneratorSeed(unsigned int seed); +namespace io { util::Status LoadModelProto(absl::string_view filename, ModelProto *model_proto) { if (filename.empty()) { diff --git a/src/sentencepiece_processor.h b/src/sentencepiece_processor.h index 72279207..14b1e8cd 100644 --- a/src/sentencepiece_processor.h +++ b/src/sentencepiece_processor.h @@ -18,40 +18,17 @@ #include #include #include +#include #include #include -#if defined(_USE_INTERNAL_STRING_VIEW) -#include "third_party/absl/strings/string_view.h" -#elif defined(_USE_TF_STRING_VIEW) -#include "absl/strings/string_view.h" -#else -// Minimum absl::string_view class that is used only for -// the argument of public APIs. +#ifndef SWIG namespace absl { -class string_view { - public: - string_view() : ptr_(nullptr), length_(0) {} - string_view(const std::string &str) : ptr_(str.data()), length_(str.size()) {} - string_view(const char *str) : ptr_(str), length_(std::strlen(str)) {} - string_view(const char *data, size_t len) : ptr_(data), length_(len) {} - - const char *data() const { return ptr_; } - size_t size() const { return length_; } - - private: - const char *ptr_ = nullptr; - size_t length_ = 0; -}; +using std::string_view; } // namespace absl -#endif - -namespace sentencepiece { - -#ifndef SWIG -using EncodeResult = std::vector>; #endif // SWIG +namespace sentencepiece { namespace util { enum class StatusCode : int { @@ -78,8 +55,7 @@ class Status { public: Status(); ~Status(); - Status(StatusCode code, const char *error_message); - Status(StatusCode code, const std::string &error_message); + Status(StatusCode code, absl::string_view error_message); Status(const Status &s); void operator=(const Status &s); bool operator==(const Status &s) const; @@ -126,17 +102,17 @@ class Status { // sp.Load("//path/to/model"); // // vector sps; -// sp.Encode("hello world.", &sps); +// sp.Encode("hello world.", &sps).IgnoreError(); // // vector ids; -// sp.Encode("hello world.", &ids); +// sp.Encode("hello world.", &ids).IgnoreError(); // // string detok; // sp.Decode(sps, &detok); -// CHECK_EQ("hello world.", detok); +// CHECK_EQ("hello world.", detok).IgnoreError(); // // sp.Decode(ids, &detok); -// CHECK_EQ("hello world.", detok); +// CHECK_EQ("hello world.", detok).IgnoreError(); // // We can also use SentencePieceText which manages the byte-offsets // between user input (output) and internal sentence pieces. @@ -163,20 +139,100 @@ namespace normalizer { class Normalizer; } // namespace normalizer -// Defines the multiple versions of encoder within each model. Currently only -// the Unigram model has an optimized encoder. -enum class EncoderVersion { - kOptimized, // The optimized encoder (default). - kOriginal // The original encoder (user may choose to fall back to this - // just in case). -}; - +#ifndef SWIGGO namespace util { // Redefine std::string for serialized_proto interface as Python's string is // a Unicode string. We can enforce the return value to be raw byte sequence // with SWIG's typemap. using bytes = std::string; } // namespace util +#endif // SWIGGO + +class NBestSentencePieceText; +class ModelInterface; +class SentencePieceText; +class SentencePieceText_SentencePiece; + +// Wrapper class of SentencePieceText +// This wrapper only allows an immutable access to the proto and +// hides the actual implementation of protobuf. +// See sentencepiece.proto for the details of this class. +class ImmutableSentencePieceText_ImmutableSentencePiece { + public: + ImmutableSentencePieceText_ImmutableSentencePiece(); + ~ImmutableSentencePieceText_ImmutableSentencePiece() = default; + + const std::string &piece() const; + const std::string &surface() const; + uint32_t id() const; + uint32_t begin() const; + uint32_t end() const; + + friend class ImmutableSentencePieceText; + + private: + explicit ImmutableSentencePieceText_ImmutableSentencePiece( + const SentencePieceText_SentencePiece &sp); + const SentencePieceText_SentencePiece *sp_ = nullptr; +}; + +class ImmutableSentencePieceText { + public: + ImmutableSentencePieceText(); + virtual ~ImmutableSentencePieceText(); + + std::vector pieces() const; + + size_t pieces_size() const; + ImmutableSentencePieceText_ImmutableSentencePiece pieces(int index) const; + + const std::string &text() const; + float score() const; + + util::bytes SerializeAsString() const; + + // Returns the actual mutable proto. + // Do not use this outside of SentencePieceProcessor, as + // it returns the raw pointer managed by the shared_ptr. + SentencePieceText *mutable_proto(); + + // Converts the utf8 byte spans into Unicode char span. + void ConvertToUnicodeSpans(); + + friend class ImmutableNBestSentencePieceText; + + private: + explicit ImmutableSentencePieceText(const SentencePieceText &spt); + const SentencePieceText *spt_ = nullptr; + std::shared_ptr rep_; +}; + +// Wrapper class of SentencePieceText +// This wrapper only allows an immutable access to the proto and +// hides the actual implementation of protobuf. +// See sentencepiece.proto for the details of this class. +class ImmutableNBestSentencePieceText { + public: + ImmutableNBestSentencePieceText(); + virtual ~ImmutableNBestSentencePieceText(); + + std::vector nbests() const; + + size_t nbests_size() const; + ImmutableSentencePieceText nbests(int index) const; + + util::bytes SerializeAsString() const; + + // Returns the actual mutable proto. + // Do not use this outside of SentencePieceProcessor, as + // it returns the raw pointer managed by the shared_ptr. + NBestSentencePieceText *mutable_proto(); + + void ConvertToUnicodeSpans(); + + private: + std::shared_ptr rep_; +}; class SentencePieceProcessor { public: @@ -220,7 +276,7 @@ class SentencePieceProcessor { // Restricts the vocabulary set. // The input sentences are encoded into the tokens in `valid_vocab`. virtual util::Status SetVocabulary( - const std::vector &valid_vocab); + const std::vector &valid_vocab); // Reverts the vocabulary restriction. virtual util::Status ResetVocabulary(); @@ -232,7 +288,7 @@ class SentencePieceProcessor { int threshold); ////////////////////////////////////////////////////////////// - // Simple API. + // Simple Encode and Decode API. // // Given a UTF8 input, encodes it into a sequence of sentence pieces. virtual util::Status Encode(absl::string_view input, @@ -246,20 +302,17 @@ class SentencePieceProcessor { virtual util::Status Decode(const std::vector &pieces, std::string *detokenized) const; + // Given a sequence of pieces, decodes it into a detokenized output. + virtual util::Status Decode(const std::vector &pieces, + std::string *detokenized) const; + // Given a sequence of ids, decodes it into a detokenized output. virtual util::Status Decode(const std::vector &ids, std::string *detokenized) const; - // Sets the encoder version. Normally users do not need to call this function. - // But they can call this fucntion just in case if they want to fall back to - // the original encoder. - virtual util::Status SetEncoderVersion(EncoderVersion encoder_version); - - // Returns the current encoder version in use. - virtual EncoderVersion GetEncoderVersion() const; - ////////////////////////////////////////////////////////////// // NBest API. + // // Same as Encode, but returns nbest results. virtual util::Status NBestEncode( absl::string_view input, int nbest_size, @@ -271,24 +324,24 @@ class SentencePieceProcessor { ////////////////////////////////////////////////////////////// // Sampling API. + // // Unigram and BPE support sampling mode. // - Unigram (--model_type=unigram): - // When `nbest_size` is positive value, approximately samples one - // segmentation from nbest candidates. When `nbest_size` is negative value, - // samples one segmentation from the hypotheses (Lattice) according to the - // generation probabilities using forward-filtering and backward-sampling - // algorithm. `alpha` is a smoothing parameter. The best segmentation - // (Viterbi segmentation) is more likely sampled when setting larger - // alpha. When alpha is 0.0, one segmentation is uniformly sampled from the - // nbest or lattice. - // `nbest_size` and `alpha` correspond to parameters `l` and `alpha` + // `nbest_size`: When `nbest_size` is positive value, approximately samples + // one segmentation from nbest candidates. When `nbest_size` is negative + // value, samples one segmentation from the hypotheses (Lattice) according to + // the generation probabilities using forward-filtering and backward-sampling + // algorithm. + // `alpha`: Smoothing parameter (inverse temperature). The best segmentation + // (Viterbi segmentation) is more likely sampled when setting larger alpha. + // When alpha is 0.0, one segmentation is uniformly sampled from the nbest or + // lattice. `nbest_size` and `alpha` correspond to parameters `l` and `alpha` // in https://arxiv.org/abs/1804.10959 (nbest_size < 0 means l = infinity) // // - BPE (--model_type=bpe): - // `alpha` is the dropout probability `p` of bpe merge operations - // in https://arxiv.org/abs/1910.13267 - // Nbest-based sampling is not supported so nbest_size parameter is ignored in - // BPE. + // `alpha`: The dropout probability `p` of bpe merge operations in + // https://arxiv.org/abs/1910.13267 Nbest-based sampling is not supported so + // nbest_size parameter is ignored in BPE. virtual util::Status SampleEncode(absl::string_view input, int nbest_size, float alpha, std::vector *pieces) const; @@ -297,48 +350,105 @@ class SentencePieceProcessor { virtual util::Status SampleEncode(absl::string_view input, int nbest_size, float alpha, std::vector *ids) const; + ////////////////////////////////////////////////////////////// + // SampleEncodeAndScore API. + // + // Sample `samples` many tokenisations from the segmentation lattice. + // These methods are only available in model_type=unigram. + // + // `alpha`: smoothing parameter (inverse temperature). The same as `alpha` in + // `Sample` method. + // 'wor`: If `wor` is true, the samples are taken without replacement, and the + // scores are the inclusion probabilities of the elements in the sample; + // otherwise the samples are taken with replacement and the scores are the + // log-probs of sample elements + // `include_best`: If `include_best` is true, the best tokenisation is always + // included in the sample, and the remaining elements are sampled excluding + // the best. + virtual util::Status SampleEncodeAndScore( + absl::string_view input, int num_samples, float alpha, bool wor, + bool include_best, + std::vector, float>> *pieces) const; + + // Same as above, but returns a sequence of ids. + virtual util::Status SampleEncodeAndScore( + absl::string_view input, int num_samples, float alpha, bool wor, + bool include_best, + std::vector, float>> *ids) const; + + ////////////////////////////////////////////////////////////// + // Entropy API. + // + // This only available in model_type=unigram. + // Calculate entropy of possible tokenisations + virtual util::Status CalculateEntropy(absl::string_view input, float alpha, + float *entropy) const; + ////////////////////////////////////////////////////////////// // Advanced API returning SentencePieceText, which manages // utf8-byte alignments between user-input/detokenized text // and internal sentencepiece sequence. // // Given a UTF8 input, encodes it into SentencePieceText. + // + // When using these APIs, sentencepiece.pb.h header files must be included. + // We can also use ImutableSentencePieceText as follows. + // + // ImmutableSentencePieceText spt; + // Encode("hello", spt.mutable_proto()).IgnoreError(); + // std::cout << spt.pieces_size() << std::endl; virtual util::Status Encode(absl::string_view input, SentencePieceText *spt) const; - // Same as above, but returns NBestSentencePieceText. virtual util::Status NBestEncode(absl::string_view input, int nbest_size, NBestSentencePieceText *nbest_spt) const; - // Same as above, but samples one segmentation from the hypotheses - // (Lattice). virtual util::Status SampleEncode(absl::string_view input, int nbest_size, float alpha, SentencePieceText *spt) const; - // Given a sequence of pieces, decodes it into SentencePieceText. + virtual util::Status SampleEncodeAndScore( + absl::string_view input, int num_samples, float alpha, bool wor, + bool include_best, NBestSentencePieceText *samples_spt) const; + + // DEPRECATED: Remove this API and use std::vector virtual util::Status Decode(const std::vector &pieces, SentencePieceText *spt) const; - // Given a sequence of ids, decodes it into SentencePieceText. - virtual util::Status Decode(const std::vector &ids, + virtual util::Status Decode(const std::vector &pieces, SentencePieceText *spt) const; - ////////////////////////////////////////////////////////////// - // Handy methods that return the result directly. - // These functions ignore internal errors. + virtual util::Status Decode(const std::vector &ids, + SentencePieceText *spt) const; #ifdef SWIG -#define DEFINE_SPP_DIRECT_FUNC_IMPL(FuncName, OutType, ...) \ - OutType output; \ - const auto _status = FuncName(__VA_ARGS__, &output); \ - if (!_status.ok()) throw _status; \ - return output; +#define SPP_SWIG_CHECK_AND_THROW \ + if (!status.ok()) throw status; #else +#define SPP_SWIG_CHECK_AND_THROW \ + if (!status.ok()) { \ + } +#endif // SWIG + #define DEFINE_SPP_DIRECT_FUNC_IMPL(FuncName, OutType, ...) \ OutType output; \ - FuncName(__VA_ARGS__, &output).IgnoreError(); \ + const auto status = FuncName(__VA_ARGS__, &output); \ + SPP_SWIG_CHECK_AND_THROW; \ + return output; + +#define DEFINE_SPP_SERIALIZED_PROTO_IMPL(FuncName, OutType, ...) \ + OutType output; \ + const auto status = FuncName(__VA_ARGS__, output.mutable_proto()); \ + SPP_SWIG_CHECK_AND_THROW; \ + return output.SerializeAsString(); + +#define DEFINE_SPP_IMMUTABLE_PROTO_IMPL(FuncName, OutType, ...) \ + OutType output; \ + const auto status = FuncName(__VA_ARGS__, output.mutable_proto()); \ + SPP_SWIG_CHECK_AND_THROW; \ return output; -#endif + ////////////////////////////////////////////////////////////// + // Handy methods that return the result directly. + // These functions ignore internal errors. virtual std::vector EncodeAsPieces( absl::string_view input) const { DEFINE_SPP_DIRECT_FUNC_IMPL(Encode, std::vector, input); @@ -374,34 +484,135 @@ class SentencePieceProcessor { nbest_size, alpha); } + virtual std::vector, float>> + SampleEncodeAndScoreAsPieces(absl::string_view input, int num_samples, + float alpha, bool wor, bool include_best) const { + using _T = std::vector, float>>; + DEFINE_SPP_DIRECT_FUNC_IMPL(SampleEncodeAndScore, _T, input, num_samples, + alpha, wor, include_best); + } + + virtual std::vector, float>> + SampleEncodeAndScoreAsIds(absl::string_view input, int num_samples, + float alpha, bool wor, bool include_best) const { + using _T = std::vector, float>>; + DEFINE_SPP_DIRECT_FUNC_IMPL(SampleEncodeAndScore, _T, input, num_samples, + alpha, wor, include_best); + } + + // DEPRECATED: Remove this API and use std::vector virtual std::string DecodePieces( const std::vector &pieces) const { DEFINE_SPP_DIRECT_FUNC_IMPL(Decode, std::string, pieces); } + virtual std::string DecodePieces( + const std::vector &pieces) const { + DEFINE_SPP_DIRECT_FUNC_IMPL(Decode, std::string, pieces); + } + virtual std::string DecodeIds(const std::vector &ids) const { DEFINE_SPP_DIRECT_FUNC_IMPL(Decode, std::string, ids); } -#undef DEFINE_SPP_DIRECT_FUNC_IMPL + virtual float CalculateEntropy(absl::string_view text, float alpha) const { + DEFINE_SPP_DIRECT_FUNC_IMPL(CalculateEntropy, float, text, alpha); + } + ////////////////////////////////////////////////////////////// + // SerializedProto API. (DEPRECATED). Use ImmutableProto API. // They are used in Python interface. Returns serialized proto. // In python module, we can get access to the full Proto after // deserialzing the returned byte sequence. - virtual util::bytes EncodeAsSerializedProto(absl::string_view input) const; + virtual util::bytes EncodeAsSerializedProto(absl::string_view input) const { + DEFINE_SPP_SERIALIZED_PROTO_IMPL(Encode, ImmutableSentencePieceText, input); + } virtual util::bytes SampleEncodeAsSerializedProto(absl::string_view input, int nbest_size, - float alpha) const; + float alpha) const { + DEFINE_SPP_SERIALIZED_PROTO_IMPL(SampleEncode, ImmutableSentencePieceText, + input, nbest_size, alpha); + } virtual util::bytes NBestEncodeAsSerializedProto(absl::string_view input, - int nbest_size) const; + int nbest_size) const { + DEFINE_SPP_SERIALIZED_PROTO_IMPL( + NBestEncode, ImmutableNBestSentencePieceText, input, nbest_size); + } + + virtual util::bytes SampleEncodeAndScoreAsSerializedProto( + absl::string_view input, int num_samples, float alpha, bool wor, + bool include_best) const { + DEFINE_SPP_SERIALIZED_PROTO_IMPL(SampleEncodeAndScore, + ImmutableNBestSentencePieceText, input, + num_samples, alpha, wor, include_best); + } + + // TODO(taku): Remove this API and use std::vector + virtual util::bytes DecodePiecesAsSerializedProto( + const std::vector &pieces) const { + DEFINE_SPP_SERIALIZED_PROTO_IMPL(Decode, ImmutableSentencePieceText, + pieces); + } virtual util::bytes DecodePiecesAsSerializedProto( - const std::vector &pieces) const; + const std::vector &pieces) const { + DEFINE_SPP_SERIALIZED_PROTO_IMPL(Decode, ImmutableSentencePieceText, + pieces); + } virtual util::bytes DecodeIdsAsSerializedProto( - const std::vector &ids) const; + const std::vector &ids) const { + DEFINE_SPP_SERIALIZED_PROTO_IMPL(Decode, ImmutableSentencePieceText, ids); + } + + ////////////////////////////////////////////////////////////// + // ImmutableProto API. + virtual ImmutableSentencePieceText EncodeAsImmutableProto( + absl::string_view input) const { + DEFINE_SPP_IMMUTABLE_PROTO_IMPL(Encode, ImmutableSentencePieceText, input); + } + + virtual ImmutableSentencePieceText SampleEncodeAsImmutableProto( + absl::string_view input, int nbest_size, float alpha) const { + DEFINE_SPP_IMMUTABLE_PROTO_IMPL(SampleEncode, ImmutableSentencePieceText, + input, nbest_size, alpha); + } + + virtual ImmutableNBestSentencePieceText NBestEncodeAsImmutableProto( + absl::string_view input, int nbest_size) const { + DEFINE_SPP_IMMUTABLE_PROTO_IMPL( + NBestEncode, ImmutableNBestSentencePieceText, input, nbest_size); + } + + virtual ImmutableNBestSentencePieceText SampleEncodeAndScoreAsImmutableProto( + absl::string_view input, int num_samples, float alpha, bool wor, + bool include_best) const { + DEFINE_SPP_IMMUTABLE_PROTO_IMPL(SampleEncodeAndScore, + ImmutableNBestSentencePieceText, input, + num_samples, alpha, wor, include_best); + } + + // TODO(taku): Remove this API and use std::vector + virtual ImmutableSentencePieceText DecodePiecesAsImmutableProto( + const std::vector &pieces) const { + DEFINE_SPP_IMMUTABLE_PROTO_IMPL(Decode, ImmutableSentencePieceText, pieces); + } + + virtual ImmutableSentencePieceText DecodePiecesAsImmutableProto( + const std::vector &pieces) const { + DEFINE_SPP_IMMUTABLE_PROTO_IMPL(Decode, ImmutableSentencePieceText, pieces); + } + + virtual ImmutableSentencePieceText DecodeIdsAsImmutableProto( + const std::vector &ids) const { + DEFINE_SPP_IMMUTABLE_PROTO_IMPL(Decode, ImmutableSentencePieceText, ids); + } + +#undef DEFINE_SPP_DIRECT_FUNC_IMPL +#undef DEFINE_SPP_SERIALIZED_PROTO_IMPL +#undef DEFINE_SPP_IMMUTABLE_PROTO_IMPL ////////////////////////////////////////////////////////////// // Vocabulary management methods. @@ -418,7 +629,8 @@ class SentencePieceProcessor { virtual const std::string &IdToPiece(int id) const; // Returns the score of `id`. - // Usually score is an emission log probability of unigram language model. + // Usually score is an emission log probability of unigram language + // model. virtual float GetScore(int id) const; // Returns true if `id` is unknown symbol. @@ -448,7 +660,6 @@ class SentencePieceProcessor { // Returns PAD () id. virtual int pad_id() const; -#ifndef SWIG ////////////////////////////////////////////////////////////// // Model management. // @@ -457,7 +668,6 @@ class SentencePieceProcessor { // Allows injection of a normalizer instance. `normalizer` is moved. void SetNormalizer(std::unique_ptr &&normalizer); -#endif // Returns immutable model proto. Useful to obtain extended // or experimental parameters encoded in model_proto. @@ -468,7 +678,7 @@ class SentencePieceProcessor { util::bytes serialized_model_proto() const; private: - enum ExtraOption { REVERSE, BOS, EOS }; + enum ExtraOption { REVERSE, BOS, EOS, UNK_PIECE }; util::Status ParseExtraOptions(absl::string_view extra_option, std::vector *extra_options) const; @@ -499,7 +709,6 @@ class SentencePieceProcessor { // std::random_device. void SetRandomGeneratorSeed(unsigned int seed); -#ifndef SWIG // IO related functions to absorb model formats. namespace io { // Loads `model_proto` from `filename`. @@ -514,6 +723,5 @@ util::Status LoadModelProto(absl::string_view, ModelProto *model_proto); // Saves `model_proto` as `filename`. util::Status SaveModelProto(absl::string_view, const ModelProto &model_proto); } // namespace io -#endif // SWIG } // namespace sentencepiece #endif // SENTENCEPIECE_PROCESSOR_H_ diff --git a/src/sentencepiece_processor_test.cc b/src/sentencepiece_processor_test.cc index 571dde4b..f05dc5d1 100644 --- a/src/sentencepiece_processor_test.cc +++ b/src/sentencepiece_processor_test.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License.! +#include "sentencepiece_processor.h" + #include #include "builder.h" @@ -20,7 +22,6 @@ #include "normalizer.h" #include "sentencepiece.pb.h" #include "sentencepiece_model.pb.h" -#include "sentencepiece_processor.h" #include "sentencepiece_trainer.h" #include "testharness.h" #include "third_party/absl/container/flat_hash_map.h" @@ -551,10 +552,9 @@ TEST(SentencepieceProcessorTest, DecodeTest) { int GetPieceSize() const override { return 7; } int PieceToId(absl::string_view piece) const override { - static absl::flat_hash_map - kMap = {{"", 0}, {"", 1}, {"", 2}, {WS "ABC", 3}, - {WS "DE", 4}, {"F", 5}, {"G" WS "H", 6}}; + static absl::flat_hash_map kMap = { + {"", 0}, {"", 1}, {"", 2}, {WS "ABC", 3}, + {WS "DE", 4}, {"F", 5}, {"G" WS "H", 6}}; return port::FindWithDefault(kMap, piece, 0); } @@ -709,6 +709,86 @@ TEST(SentencepieceProcessorTest, DecodeTest) { } } +TEST(SentencepieceProcessorTest, DummyPrefixDecodeTest) { + class DecodeMockModel : public ModelInterface { + public: + EncodeResult Encode(absl::string_view normalized) const override { + return {}; + } + + int GetPieceSize() const override { return 7; } + + int PieceToId(absl::string_view piece) const override { + static absl::flat_hash_map kMap = { + {"", 0}, {"", 1}, {"", 2}, {WS "ABC", 3}, + {WS "DE", 4}, {"F", 5}, {"G" WS "H", 6}, {WS, 7}}; + return port::FindWithDefault(kMap, piece, 0); + } + + const std::string &IdToPiece(int id) const override { + static std::vector kMap = { + "", "", "", WS "ABC", WS "DE", "F", "G" WS "H", WS}; + return kMap[id]; + } + + bool IsUnknown(int id) const override { return (id == 0); } + + bool IsControl(int id) const override { return (id == 1 || id == 2); } + + bool IsByte(int id) const override { return false; } + + float GetScore(int id) const override { return 0.0; } + }; + + // start the sequence with a whitespace token + const std::vector input = { + "", WS, WS "ABC", "", WS "DE", "F", "G" WS "H", "I", ""}; + + { + SentencePieceProcessor sp; + auto proto = absl::make_unique(); + proto->mutable_trainer_spec()->set_unk_surface(""); + proto->mutable_normalizer_spec()->set_add_dummy_prefix(true); + proto->mutable_normalizer_spec()->set_remove_extra_whitespaces(false); + sp.Load(std::move(proto)).IgnoreError(); + + auto mock = absl::make_unique(); + sp.SetModel(std::move(mock)); + + const auto normalization_spec = MakeDefaultNormalizerSpec(); + sp.SetNormalizer( + absl::make_unique(normalization_spec)); + + SentencePieceText spt; + + EXPECT_TRUE(sp.Decode(input, &spt).ok()); + EXPECT_EQ(" ABC DEFG HI", spt.text()); + EXPECT_EQ(9, spt.pieces_size()); + } + + { + SentencePieceProcessor sp; + auto proto = absl::make_unique(); + proto->mutable_trainer_spec()->set_unk_surface(""); + proto->mutable_normalizer_spec()->set_add_dummy_prefix(true); + proto->mutable_normalizer_spec()->set_remove_extra_whitespaces(true); + sp.Load(std::move(proto)).IgnoreError(); + + auto mock = absl::make_unique(); + sp.SetModel(std::move(mock)); + + const auto normalization_spec = MakeDefaultNormalizerSpec(); + sp.SetNormalizer( + absl::make_unique(normalization_spec)); + + SentencePieceText spt; + + EXPECT_TRUE(sp.Decode(input, &spt).ok()); + EXPECT_EQ("ABC DEFG HI", spt.text()); + EXPECT_EQ(9, spt.pieces_size()); + } +} + TEST(SentencepieceProcessorTest, ByteFallbackDecodeTest) { class ByteFallbackDecodeMockModel : public ModelInterface { public: @@ -741,6 +821,8 @@ TEST(SentencepieceProcessorTest, ByteFallbackDecodeTest) { return kMap[id]; } + int GetPieceSize() const override { return 256; } + bool IsUnknown(int id) const override { return (id == 0); } bool IsControl(int id) const override { return (id == 1 || id == 2); } @@ -759,24 +841,39 @@ TEST(SentencepieceProcessorTest, ByteFallbackDecodeTest) { absl::make_unique(normalization_spec)); { - const std::vector input = {"", "A", "B", - // "あ" -> 0xE3 0x81 0x82 - "<0xE3>", "<0x81>", "<0x82>", - // "Z" -> 0x5A - "<0x5A>", - // "Ω" -> 0xCE 0xA9 - "<0xCE>", "<0xA9>", "C", - // Invalid UTF-8 bytes. - "<0xE0>", "<0x80>", - // "い" -> 0xE3 0x81 0x84 - "<0xE3>", "<0x81>", "<0x84>"}; + const std::vector input = { + "", + "A", + "B", + // "あ" -> 0xE3 0x81 0x82 + "<0xE3>", + "<0x81>", + "<0x82>", + // "Z" -> 0x5A + "<0x5A>", + // "Ω" -> 0xCE 0xA9 + "<0xCE>", + "<0xA9>", + "C", + // Invalid UTF-8 bytes. + "<0xE0>", + "<0x80>", + // "い" -> 0xE3 0x81 0x84 + "<0xE3>", + "<0x81>", + "<0x84>", + // REPLACEMENT CHARACTER as byte pieces. + "<0xEF>", + "<0xBF>", + "<0xBD>", + }; SentencePieceText spt; EXPECT_TRUE(sp.Decode(input, &spt).ok()); - EXPECT_EQ("ABあZΩC\xEF\xBF\xBD\xEF\xBF\xBDい", spt.text()); - EXPECT_EQ(15, spt.pieces_size()); + EXPECT_EQ("ABあZΩC\xEF\xBF\xBD\xEF\xBF\xBDい\xEF\xBF\xBD", spt.text()); + EXPECT_EQ(18, spt.pieces_size()); - for (int i = 0; i < 15; ++i) { + for (int i = 0; i < 18; ++i) { EXPECT_EQ(input[i], spt.pieces(i).piece()); } @@ -834,6 +931,16 @@ TEST(SentencepieceProcessorTest, ByteFallbackDecodeTest) { EXPECT_EQ(15, spt.pieces(13).end()); EXPECT_EQ(15, spt.pieces(14).begin()); EXPECT_EQ(18, spt.pieces(14).end()); + + EXPECT_EQ("", spt.pieces(15).surface()); + EXPECT_EQ("", spt.pieces(16).surface()); + EXPECT_EQ("\xEF\xBF\xBD", spt.pieces(17).surface()); + EXPECT_EQ(18, spt.pieces(15).begin()); + EXPECT_EQ(18, spt.pieces(15).end()); + EXPECT_EQ(18, spt.pieces(16).begin()); + EXPECT_EQ(18, spt.pieces(16).end()); + EXPECT_EQ(18, spt.pieces(17).begin()); + EXPECT_EQ(21, spt.pieces(17).end()); } } @@ -950,18 +1057,6 @@ TEST(SentencePieceProcessorTest, EndToEndTest) { EXPECT_EQ(2, sp.eos_id()); EXPECT_EQ(-1, sp.pad_id()); - { - // Verify the default encoder version. - EXPECT_EQ(EncoderVersion::kOptimized, sp.GetEncoderVersion()); - - // Set the encoder version to original and verify. - EXPECT_TRUE(sp.SetEncoderVersion(EncoderVersion::kOriginal).ok()); - EXPECT_EQ(EncoderVersion::kOriginal, sp.GetEncoderVersion()); - - // Set back to the default encoder version. - EXPECT_TRUE(sp.SetEncoderVersion(EncoderVersion::kOptimized).ok()); - } - { std::vector sps; const std::vector expected_str = {WS, "ab", "c"}; @@ -1137,6 +1232,13 @@ TEST(SentencePieceProcessorTest, EndToEndTest) { EXPECT_EQ("cba", output); } + // Out of range + { + std::string output; + const std::vector ids = {3, 4, 127}; + EXPECT_FALSE(sp.Decode(ids, &output).ok()); + } + { EXPECT_TRUE(sp.SetDecodeExtraOptions("bos:eos:reverse").ok()); @@ -1459,4 +1561,153 @@ TEST(SentencePieceProcessorTest, VocabularyTest) { EXPECT_FALSE(sp.IsUnused(6)); EXPECT_FALSE(sp.IsUnused(7)); } + +TEST(SentencePieceProcessorTest, ImmutableSentencePieceTextTest) { + ImmutableSentencePieceText spt; + EXPECT_TRUE(spt.text().empty()); + EXPECT_EQ(spt.score(), 0.0); + EXPECT_TRUE(spt.SerializeAsString().empty()); + + auto *v = spt.mutable_proto(); + + v->set_text("hello world"); + v->set_score(1.0); + for (int i = 0; i < 10; ++i) { + auto *p = v->add_pieces(); + p->set_surface(absl::StrCat("surface_", i)); + p->set_piece(absl::StrCat("surface_", i)); + p->set_id(i); + p->set_begin(i + 10); + p->set_end(i + 20); + } + + EXPECT_EQ(v->pieces_size(), spt.pieces_size()); + for (int i = 0; i < spt.pieces_size(); ++i) { + EXPECT_EQ(v->pieces(i).surface(), spt.pieces(i).surface()); + EXPECT_EQ(v->pieces(i).piece(), spt.pieces(i).piece()); + EXPECT_EQ(v->pieces(i).id(), spt.pieces(i).id()); + EXPECT_EQ(v->pieces(i).begin(), spt.pieces(i).begin()); + EXPECT_EQ(v->pieces(i).end(), spt.pieces(i).end()); + } + + auto check_proto = [&v](const ImmutableSentencePieceText &s) { + int n = 0; + for (auto &p : s.pieces()) { + EXPECT_EQ(v->pieces(n).surface(), p.surface()); + EXPECT_EQ(v->pieces(n).piece(), p.piece()); + EXPECT_EQ(v->pieces(n).id(), p.id()); + EXPECT_EQ(v->pieces(n).begin(), p.begin()); + EXPECT_EQ(v->pieces(n).end(), p.end()); + ++n; + } + EXPECT_EQ(v->text(), s.text()); + EXPECT_EQ(v->score(), s.score()); + EXPECT_EQ(v->SerializeAsString(), s.SerializeAsString()); + }; + + // test copy. + const auto spt2 = spt; + check_proto(spt2); + + // test assign. + const ImmutableSentencePieceText spt3(spt); + check_proto(spt3); + + // default piece. + const ImmutableSentencePieceText_ImmutableSentencePiece piece; + EXPECT_TRUE(piece.surface().empty()); + EXPECT_TRUE(piece.piece().empty()); + EXPECT_EQ(piece.begin(), 0); + EXPECT_EQ(piece.end(), 0); + EXPECT_EQ(piece.id(), 0); +} + +TEST(SentencePieceProcessorTest, ImmutableNBestSentencePieceTextTest) { + ImmutableNBestSentencePieceText spt; + EXPECT_EQ(spt.nbests_size(), 0); + EXPECT_TRUE(spt.SerializeAsString().empty()); + + auto *v = spt.mutable_proto(); + + for (int i = 0; i < 10; ++i) { + auto *p = v->add_nbests(); + p->set_text(absl::StrCat("text_", i)); + p->set_score(2.0 * i); + } + + auto check_proto = [&v](const ImmutableNBestSentencePieceText &s) { + EXPECT_EQ(v->nbests_size(), s.nbests_size()); + for (int i = 0; i < v->nbests_size(); ++i) { + EXPECT_EQ(v->nbests(i).text(), s.nbests(i).text()); + EXPECT_EQ(v->nbests(i).score(), s.nbests(i).score()); + } + EXPECT_EQ(v->SerializeAsString(), s.SerializeAsString()); + }; + + check_proto(spt); + + // test copy. + const auto spt2 = spt; + check_proto(spt2); + + // test assign. + const ImmutableNBestSentencePieceText spt3(spt); + check_proto(spt3); +} + +TEST(SentencePieceProcessorTest, ConvertToUnicodeSpansTest) { + auto make_spt = [&](const std::vector &tokens) { + ImmutableSentencePieceText ispt; + auto *spt = ispt.mutable_proto(); + int prev = 0; + std::string text; + for (const auto &tok : tokens) { + auto *piece = spt->add_pieces(); + piece->set_surface(tok); + piece->set_piece(tok); + piece->set_begin(prev); + piece->set_end(prev + tok.size()); + prev += tok.size(); + text += tok; + } + spt->set_text(text); + ispt.ConvertToUnicodeSpans(); + return ispt; + }; + + { + const auto spt = make_spt({"hello", "_world", "."}); + EXPECT_EQ(spt.pieces_size(), 3); + EXPECT_EQ(spt.pieces(0).begin(), 0); + EXPECT_EQ(spt.pieces(0).end(), 5); + EXPECT_EQ(spt.pieces(1).begin(), 5); + EXPECT_EQ(spt.pieces(1).end(), 11); + EXPECT_EQ(spt.pieces(2).begin(), 11); + EXPECT_EQ(spt.pieces(2).end(), 12); + } + + { + const auto spt = make_spt({"これは", "test", "です"}); + EXPECT_EQ(spt.pieces_size(), 3); + EXPECT_EQ(spt.pieces(0).begin(), 0); + EXPECT_EQ(spt.pieces(0).end(), 3); + EXPECT_EQ(spt.pieces(1).begin(), 3); + EXPECT_EQ(spt.pieces(1).end(), 7); + + EXPECT_EQ(spt.pieces(2).begin(), 7); + EXPECT_EQ(spt.pieces(2).end(), 9); + } + + { + const auto spt = make_spt({"いABは", "にほCD", "へと"}); + EXPECT_EQ(spt.pieces_size(), 3); + EXPECT_EQ(spt.pieces(0).begin(), 0); + EXPECT_EQ(spt.pieces(0).end(), 4); + EXPECT_EQ(spt.pieces(1).begin(), 4); + EXPECT_EQ(spt.pieces(1).end(), 8); + EXPECT_EQ(spt.pieces(2).begin(), 8); + EXPECT_EQ(spt.pieces(2).end(), 10); + } +} + } // namespace sentencepiece diff --git a/src/sentencepiece_trainer.cc b/src/sentencepiece_trainer.cc index cb825a10..869312b3 100644 --- a/src/sentencepiece_trainer.cc +++ b/src/sentencepiece_trainer.cc @@ -31,8 +31,6 @@ #include "trainer_factory.h" #include "util.h" -ABSL_DECLARE_FLAG(int, minloglevel); - namespace sentencepiece { namespace { static constexpr char kDefaultNormalizerName[] = "nmt_nfkc"; @@ -112,7 +110,7 @@ util::Status SentencePieceTrainer::MergeSpecsFromArgs( for (auto arg : absl::StrSplit(args, " ")) { absl::ConsumePrefix(&arg, "--"); std::string key, value; - const auto pos = arg.find("="); + const auto pos = arg.find('='); if (pos == absl::string_view::npos) { key = std::string(arg); } else { @@ -151,7 +149,7 @@ util::Status SentencePieceTrainer::MergeSpecsFromArgs( } else if (key == "minloglevel") { int v = 0; CHECK_OR_RETURN(absl::SimpleAtoi(value, &v)); - absl::SetFlag(&FLAGS_minloglevel, v); + logging::SetMinLogLevel(v); continue; } else if(key == "encode_unicode_case") { bool encode_unicode_case; diff --git a/src/sentencepiece_trainer.h b/src/sentencepiece_trainer.h index bb74ab93..b4af6f0e 100644 --- a/src/sentencepiece_trainer.h +++ b/src/sentencepiece_trainer.h @@ -129,12 +129,12 @@ class SentencePieceTrainer { // with comma-separated values. `field_name` must not be a nested message. // The body of these functions are automatically generated with // data/gen_spec_parser.pl - static util::Status SetProtoField(const std::string &name, - const std::string &value, + static util::Status SetProtoField(absl::string_view name, + absl::string_view value, TrainerSpec *message); - static util::Status SetProtoField(const std::string &name, - const std::string &value, + static util::Status SetProtoField(absl::string_view name, + absl::string_view value, NormalizerSpec *message); // Populates model type from string representation, e.g., "bpe". diff --git a/src/spec_parser.h b/src/spec_parser.h index 7aff0fd9..6a19c0a9 100644 --- a/src/spec_parser.h +++ b/src/spec_parser.h @@ -25,10 +25,10 @@ namespace sentencepiece { -#define PARSE_STRING(param_name) \ - if (name == #param_name) { \ - message->set_##param_name(value); \ - return util::OkStatus(); \ +#define PARSE_STRING(param_name) \ + if (name == #param_name) { \ + message->set_##param_name(std::string(value)); \ + return util::OkStatus(); \ } #define PARSE_REPEATED_STRING(param_name) \ @@ -55,6 +55,16 @@ namespace sentencepiece { return util::OkStatus(); \ } +#define PARSE_UINT64(param_name) \ + if (name == #param_name) { \ + uint64 v; \ + if (!string_util::lexical_cast(value, &v)) \ + return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \ + << "cannot parse \"" << value << "\" as int."; \ + message->set_##param_name(v); \ + return util::OkStatus(); \ + } + #define PARSE_DOUBLE(param_name) \ if (name == #param_name) { \ double v; \ @@ -134,7 +144,9 @@ inline std::string PrintProto(const TrainerSpec &message, PRINT_PARAM(split_by_number); PRINT_PARAM(split_by_whitespace); PRINT_PARAM(split_digits); + PRINT_PARAM(pretokenization_delimiter); PRINT_PARAM(treat_whitespace_as_suffix); + PRINT_PARAM(allow_whitespace_only_pieces); PRINT_REPEATED_STRING(control_symbols); PRINT_REPEATED_STRING(user_defined_symbols); PRINT_PARAM(required_chars); @@ -152,6 +164,9 @@ inline std::string PrintProto(const TrainerSpec &message, PRINT_PARAM(eos_piece); PRINT_PARAM(pad_piece); PRINT_PARAM(unk_surface); + PRINT_PARAM(enable_differential_privacy); + PRINT_PARAM(differential_privacy_noise_level); + PRINT_PARAM(differential_privacy_clipping_threshold); os << "}\n"; @@ -177,8 +192,8 @@ inline std::string PrintProto(const NormalizerSpec &message, return os.str(); } -util::Status SentencePieceTrainer::SetProtoField(const std::string &name, - const std::string &value, +util::Status SentencePieceTrainer::SetProtoField(absl::string_view name, + absl::string_view value, TrainerSpec *message) { CHECK_OR_RETURN(message); @@ -198,7 +213,7 @@ util::Status SentencePieceTrainer::SetProtoField(const std::string &name, PARSE_REPEATED_STRING(accept_language); PARSE_INT32(self_test_sample_size); PARSE_DOUBLE(character_coverage); - PARSE_INT32(input_sentence_size); + PARSE_UINT64(input_sentence_size); PARSE_BOOL(shuffle_input_sentence); PARSE_INT32(seed_sentencepiece_size); PARSE_DOUBLE(shrinking_factor); @@ -210,7 +225,9 @@ util::Status SentencePieceTrainer::SetProtoField(const std::string &name, PARSE_BOOL(split_by_number); PARSE_BOOL(split_by_whitespace); PARSE_BOOL(split_digits); + PARSE_STRING(pretokenization_delimiter); PARSE_BOOL(treat_whitespace_as_suffix); + PARSE_BOOL(allow_whitespace_only_pieces); PARSE_REPEATED_STRING(control_symbols); PARSE_REPEATED_STRING(user_defined_symbols); PARSE_STRING(required_chars); @@ -228,13 +245,16 @@ util::Status SentencePieceTrainer::SetProtoField(const std::string &name, PARSE_STRING(eos_piece); PARSE_STRING(pad_piece); PARSE_STRING(unk_surface); + PARSE_BOOL(enable_differential_privacy); + PARSE_DOUBLE(differential_privacy_noise_level); + PARSE_UINT64(differential_privacy_clipping_threshold); return util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC) << "unknown field name \"" << name << "\" in TrainerSpec."; } -util::Status SentencePieceTrainer::SetProtoField(const std::string &name, - const std::string &value, +util::Status SentencePieceTrainer::SetProtoField(absl::string_view name, + absl::string_view value, NormalizerSpec *message) { CHECK_OR_RETURN(message); diff --git a/src/spm_decode_main.cc b/src/spm_decode_main.cc index 32cb382b..bc49bd38 100644 --- a/src/spm_decode_main.cc +++ b/src/spm_decode_main.cc @@ -34,6 +34,7 @@ ABSL_FLAG(std::string, extra_options, "", "':' separated encoder extra options, e.g., \"reverse:bos:eos\""); int main(int argc, char *argv[]) { + sentencepiece::ScopedResourceDestructor cleaner; sentencepiece::ParseCommandLineFlags(argv[0], &argc, &argv, true); std::vector rest_args; @@ -64,6 +65,7 @@ int main(int argc, char *argv[]) { auto ToIds = [&](const std::vector &pieces) { std::vector ids; + ids.reserve(pieces.size()); for (const auto &s : pieces) { ids.push_back(atoi(s.c_str())); } diff --git a/src/spm_encode_main.cc b/src/spm_encode_main.cc index f151ecfc..2fbb850f 100644 --- a/src/spm_encode_main.cc +++ b/src/spm_encode_main.cc @@ -28,16 +28,17 @@ #include "trainer_interface.h" ABSL_FLAG(std::string, model, "", "model file name"); -ABSL_FLAG(std::string, output_format, "piece", - "choose from piece, id, proto, nbest_piece, nbest_id, nbest_proto, " - "sample_piece, sample_id or sample_proto."); +ABSL_FLAG( + std::string, output_format, "piece", + "choose from piece, id, proto, nbest_piece, nbest_id, or nbest_proto"); ABSL_FLAG(std::string, input, "", "input filename"); ABSL_FLAG(std::string, output, "", "output filename"); ABSL_FLAG(std::string, extra_options, "", "':' separated encoder extra options, e.g., \"reverse:bos:eos\""); ABSL_FLAG(int32, nbest_size, 10, "NBest size"); ABSL_FLAG(double, alpha, 0.5, "Smoothing parameter for sampling mode."); -ABSL_FLAG(int32, random_seed, -1, "Seed value for random generator."); +ABSL_FLAG(uint32, random_seed, static_cast(-1), + "Seed value for random generator."); // Piece restriction with vocabulary file. // https://github.com/rsennrich/subword-nmt#best-practice-advice-for-byte-pair-encoding-in-nmt @@ -50,6 +51,7 @@ ABSL_FLAG(bool, generate_vocabulary, false, "Generates vocabulary file instead of segmentation"); int main(int argc, char *argv[]) { + sentencepiece::ScopedResourceDestructor cleaner; sentencepiece::ParseCommandLineFlags(argv[0], &argc, &argv, true); std::vector rest_args; @@ -61,8 +63,9 @@ int main(int argc, char *argv[]) { rest_args.push_back(absl::GetFlag(FLAGS_input)); } - if (absl::GetFlag(FLAGS_random_seed) != -1) + if (absl::GetFlag(FLAGS_random_seed) != -1) { sentencepiece::SetRandomGeneratorSeed(absl::GetFlag(FLAGS_random_seed)); + } if (rest_args.empty()) rest_args.push_back(""); // empty means that reading from stdin. @@ -90,13 +93,13 @@ int main(int argc, char *argv[]) { absl::flat_hash_map vocab; sentencepiece::SentencePieceText spt; sentencepiece::NBestSentencePieceText nbest_spt; - std::function process; + std::function process; const int nbest_size = absl::GetFlag(FLAGS_nbest_size); const float alpha = absl::GetFlag(FLAGS_alpha); if (absl::GetFlag(FLAGS_generate_vocabulary)) { - process = [&](const std::string &line) { + process = [&](absl::string_view line) { CHECK_OK(sp.Encode(line, &spt)); for (const auto &piece : spt.pieces()) { if (!sp.IsUnknown(piece.id()) && !sp.IsControl(piece.id())) @@ -104,47 +107,47 @@ int main(int argc, char *argv[]) { } }; } else if (absl::GetFlag(FLAGS_output_format) == "piece") { - process = [&](const std::string &line) { + process = [&](absl::string_view line) { CHECK_OK(sp.Encode(line, &sps)); output->WriteLine(absl::StrJoin(sps, " ")); }; } else if (absl::GetFlag(FLAGS_output_format) == "id") { - process = [&](const std::string &line) { + process = [&](absl::string_view line) { CHECK_OK(sp.Encode(line, &ids)); output->WriteLine(absl::StrJoin(ids, " ")); }; } else if (absl::GetFlag(FLAGS_output_format) == "proto") { - process = [&](const std::string &line) { CHECK_OK(sp.Encode(line, &spt)); }; + process = [&](absl::string_view line) { CHECK_OK(sp.Encode(line, &spt)); }; } else if (absl::GetFlag(FLAGS_output_format) == "sample_piece") { - process = [&](const std::string &line) { + process = [&](absl::string_view line) { CHECK_OK(sp.SampleEncode(line, nbest_size, alpha, &sps)); output->WriteLine(absl::StrJoin(sps, " ")); }; } else if (absl::GetFlag(FLAGS_output_format) == "sample_id") { - process = [&](const std::string &line) { + process = [&](absl::string_view line) { CHECK_OK(sp.SampleEncode(line, nbest_size, alpha, &ids)); output->WriteLine(absl::StrJoin(ids, " ")); }; } else if (absl::GetFlag(FLAGS_output_format) == "sample_proto") { - process = [&](const std::string &line) { + process = [&](absl::string_view line) { CHECK_OK(sp.SampleEncode(line, nbest_size, alpha, &spt)); }; } else if (absl::GetFlag(FLAGS_output_format) == "nbest_piece") { - process = [&](const std::string &line) { + process = [&](absl::string_view line) { CHECK_OK(sp.NBestEncode(line, nbest_size, &nbest_sps)); for (const auto &result : nbest_sps) { output->WriteLine(absl::StrJoin(result, " ")); } }; } else if (absl::GetFlag(FLAGS_output_format) == "nbest_id") { - process = [&](const std::string &line) { + process = [&](absl::string_view line) { CHECK_OK(sp.NBestEncode(line, nbest_size, &nbest_ids)); for (const auto &result : nbest_ids) { output->WriteLine(absl::StrJoin(result, " ")); } }; } else if (absl::GetFlag(FLAGS_output_format) == "nbest_proto") { - process = [&](const std::string &line) { + process = [&](absl::string_view line) { CHECK_OK(sp.NBestEncode(line, nbest_size, &nbest_spt)); }; } else { diff --git a/src/spm_export_vocab_main.cc b/src/spm_export_vocab_main.cc index b5d93cb0..e5b97dfe 100644 --- a/src/spm_export_vocab_main.cc +++ b/src/spm_export_vocab_main.cc @@ -1,11 +1,10 @@ - - // Copyright 2016 Google Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at -// n// http://www.apache.org/licenses/LICENSE-2.0 +// +// http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, @@ -29,6 +28,7 @@ ABSL_FLAG(std::string, output_format, "vocab", "and scores, syms outputs pieces and indices."); int main(int argc, char *argv[]) { + sentencepiece::ScopedResourceDestructor cleaner; sentencepiece::ParseCommandLineFlags(argv[0], &argc, &argv, true); sentencepiece::SentencePieceProcessor sp; diff --git a/src/spm_normalize_main.cc b/src/spm_normalize_main.cc index 96da3606..39f3ef90 100644 --- a/src/spm_normalize_main.cc +++ b/src/spm_normalize_main.cc @@ -46,6 +46,7 @@ using sentencepiece::normalizer::Builder; using sentencepiece::normalizer::Normalizer; int main(int argc, char *argv[]) { + sentencepiece::ScopedResourceDestructor cleaner; sentencepiece::ParseCommandLineFlags(argv[0], &argc, &argv, true); std::vector rest_args; diff --git a/src/spm_train_main.cc b/src/spm_train_main.cc index c33fd052..8a9890aa 100644 --- a/src/spm_train_main.cc +++ b/src/spm_train_main.cc @@ -47,7 +47,8 @@ ABSL_FLAG(int32, self_test_sample_size, "the size of self test samples"); ABSL_FLAG(double, character_coverage, kDefaultTrainerSpec.character_coverage(), "character coverage to determine the minimum symbols"); -ABSL_FLAG(int32, input_sentence_size, kDefaultTrainerSpec.input_sentence_size(), +ABSL_FLAG(std::uint64_t, input_sentence_size, + kDefaultTrainerSpec.input_sentence_size(), "maximum size of sentences the trainer loads"); ABSL_FLAG(bool, shuffle_input_sentence, kDefaultTrainerSpec.shuffle_input_sentence(), @@ -76,9 +77,15 @@ ABSL_FLAG(bool, split_by_whitespace, kDefaultTrainerSpec.split_by_whitespace(), "use a white space to split sentence pieces"); ABSL_FLAG(bool, split_digits, kDefaultTrainerSpec.split_digits(), "split all digits (0-9) into separate pieces"); +ABSL_FLAG(std::string, pretokenization_delimiter, + kDefaultTrainerSpec.pretokenization_delimiter(), + "specifies the delimiter of pre-tokenization"); ABSL_FLAG(bool, treat_whitespace_as_suffix, kDefaultTrainerSpec.treat_whitespace_as_suffix(), "treat whitespace marker as suffix instead of prefix."); +ABSL_FLAG(bool, allow_whitespace_only_pieces, + kDefaultTrainerSpec.allow_whitespace_only_pieces(), + "allow pieces that only contain (consecutive) whitespace tokens"); ABSL_FLAG(std::string, control_symbols, "", "comma separated list of control symbols"); ABSL_FLAG(std::string, control_symbols_file, "", @@ -140,9 +147,23 @@ ABSL_FLAG(std::string, unk_surface, kDefaultTrainerSpec.unk_surface(), ABSL_FLAG(bool, train_extremely_large_corpus, kDefaultTrainerSpec.train_extremely_large_corpus(), "Increase bit depth for unigram tokenization."); -ABSL_FLAG(int32, random_seed, -1, "Seed value for random generator."); +ABSL_FLAG(uint32, random_seed, static_cast(-1), + "Seed value for random generator."); + +// DP related. +ABSL_FLAG(bool, enable_differential_privacy, false, + "Whether to add DP while training. Currently supported only by " + "UNIGRAM model."); + +ABSL_FLAG(float, differential_privacy_noise_level, 0.0f, + "Amount of noise to add for" + " DP"); +ABSL_FLAG(std::uint64_t, differential_privacy_clipping_threshold, 0, + "Threshold for" + " clipping the counts for DP"); int main(int argc, char *argv[]) { + sentencepiece::ScopedResourceDestructor cleaner; sentencepiece::ParseCommandLineFlags(argv[0], &argc, &argv, true); sentencepiece::TrainerSpec trainer_spec; @@ -152,8 +173,9 @@ int main(int argc, char *argv[]) { CHECK(!absl::GetFlag(FLAGS_input).empty()); CHECK(!absl::GetFlag(FLAGS_model_prefix).empty()); - if (absl::GetFlag(FLAGS_random_seed) != -1) + if (absl::GetFlag(FLAGS_random_seed) != -1) { sentencepiece::SetRandomGeneratorSeed(absl::GetFlag(FLAGS_random_seed)); + } auto load_lines = [](absl::string_view filename) { std::vector lines; @@ -211,8 +233,10 @@ int main(int argc, char *argv[]) { SetTrainerSpecFromFlag(split_by_whitespace); SetTrainerSpecFromFlag(split_by_number); SetTrainerSpecFromFlag(split_digits); + SetTrainerSpecFromFlag(pretokenization_delimiter); SetTrainerSpecFromFlag(byte_fallback); SetTrainerSpecFromFlag(treat_whitespace_as_suffix); + SetTrainerSpecFromFlag(allow_whitespace_only_pieces); SetTrainerSpecFromFlag(hard_vocab_limit); SetTrainerSpecFromFlag(use_all_vocab); SetTrainerSpecFromFlag(unk_id); @@ -231,6 +255,10 @@ int main(int argc, char *argv[]) { SetRepeatedTrainerSpecFromFlag(control_symbols); SetRepeatedTrainerSpecFromFlag(user_defined_symbols); SetTrainerSpecFromFlag(train_extremely_large_corpus); + // DP related. + SetTrainerSpecFromFlag(enable_differential_privacy); + SetTrainerSpecFromFlag(differential_privacy_noise_level); + SetTrainerSpecFromFlag(differential_privacy_clipping_threshold); SetRepeatedTrainerSpecFromFile(control_symbols); SetRepeatedTrainerSpecFromFile(user_defined_symbols); diff --git a/src/test_main.cc b/src/test_main.cc index b3170e27..38c978d0 100644 --- a/src/test_main.cc +++ b/src/test_main.cc @@ -24,6 +24,7 @@ ABSL_FLAG(std::string, test_srcdir, "../data", "Data directory."); ABSL_FLAG(std::string, test_tmpdir, "test_tmp", "Temporary directory."); int main(int argc, char **argv) { + sentencepiece::ScopedResourceDestructor cleaner; sentencepiece::ParseCommandLineFlags(argv[0], &argc, &argv, true); sentencepiece::test::RunAllTests(); return 0; diff --git a/src/testharness.cc b/src/testharness.cc index e852d3f5..f6b1efee 100644 --- a/src/testharness.cc +++ b/src/testharness.cc @@ -26,8 +26,6 @@ #include #include "common.h" -#include "init.h" -#include "third_party/absl/flags/flag.h" #include "third_party/absl/strings/str_cat.h" #include "util.h" diff --git a/src/testharness.h b/src/testharness.h index 193ec74b..9879b06e 100644 --- a/src/testharness.h +++ b/src/testharness.h @@ -21,8 +21,8 @@ #include #include "common.h" -#include "init.h" #include "third_party/absl/flags/flag.h" +#include "third_party/absl/flags/parse.h" #include "third_party/absl/strings/string_view.h" ABSL_DECLARE_FLAG(std::string, test_tmpdir); diff --git a/src/trainer_interface.cc b/src/trainer_interface.cc index 31659cb9..db15a82a 100644 --- a/src/trainer_interface.cc +++ b/src/trainer_interface.cc @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License.! +#include "trainer_interface.h" + +#include #include #include #include @@ -27,12 +30,13 @@ #include "sentencepiece_trainer.h" #include "third_party/absl/container/flat_hash_map.h" #include "third_party/absl/memory/memory.h" +#include "third_party/absl/random/distributions.h" +#include "third_party/absl/random/random.h" #include "third_party/absl/strings/numbers.h" #include "third_party/absl/strings/str_cat.h" #include "third_party/absl/strings/str_format.h" #include "third_party/absl/strings/str_join.h" #include "third_party/absl/strings/str_split.h" -#include "trainer_interface.h" #include "unicode_script.h" #include "util.h" @@ -63,7 +67,7 @@ util::Status VerifySpec(const TrainerSpec &trainer_spec) { CHECK_RANGE(trainer_spec.character_coverage(), 0.98, 1.0); CHECK_RANGE(trainer_spec.max_sentencepiece_length(), 1, 512); CHECK_RANGE(trainer_spec.num_sub_iterations(), 1, 10); - CHECK_RANGE(trainer_spec.num_threads(), 1, 128); + CHECK_RANGE(trainer_spec.num_threads(), 1, 1024); CHECK_RANGE(trainer_spec.self_test_sample_size(), 0, 1000); CHECK_RANGE(trainer_spec.shrinking_factor(), 0.5, 0.95); CHECK_RANGE(trainer_spec.max_sentence_length(), 10, 1073741824); @@ -77,14 +81,20 @@ util::Status VerifySpec(const TrainerSpec &trainer_spec) { CHECK_OR_RETURN(!trainer_spec.eos_piece().empty()); CHECK_OR_RETURN(!trainer_spec.pad_piece().empty()); - if (SentencePieceTrainer::GetPretokenizerForTraining()) { - CHECK_EQ_OR_RETURN(TrainerSpec::UNIGRAM, trainer_spec.model_type()) - << "PretokenizerForTraining is only supported in UNIGRAM mode."; + if (SentencePieceTrainer::GetPretokenizerForTraining() || + !trainer_spec.pretokenization_delimiter().empty()) { + CHECK_OR_RETURN(trainer_spec.model_type() == TrainerSpec::UNIGRAM || + trainer_spec.model_type() == TrainerSpec::BPE) + << "PretokenizerForTraining is only supported in UNIGRAM or BPE mode."; } return util::OkStatus(); } +bool is_unicode_decimal_number(char32 c) { + return (c >= 0x30 && c <= 0x39) || (c >= 0xff10 && c <= 0xff19); +} + class SentenceSelector { public: using Sampler = random::ReservoirSampler; @@ -120,16 +130,14 @@ class SentenceSelector { } bool Add(const std::pair &sentence) { - if (spec_->input_sentence_size() <= 0) { + if (spec_->input_sentence_size() == 0) { sentences_->emplace_back(sentence); } else { if (spec_->shuffle_input_sentence()) { sampler_->Add(sentence); } else { sentences_->emplace_back(sentence); - if (sentences_->size() >= - static_cast(spec_->input_sentence_size())) - return false; + if (sentences_->size() >= spec_->input_sentence_size()) return false; } } @@ -211,9 +219,10 @@ bool TrainerInterface::IsValidSentencePiece( constexpr unicode_script::ScriptType kAnyType = static_cast(-1); - auto is_number = [](char32 c) { return (c >= 0x30 && c <= 0x39); }; - unicode_script::ScriptType prev_script = kAnyType; + bool all_whitespace_piece = + std::all_of(sentencepiece.begin(), sentencepiece.end(), + [](char32 c) { return c == kWSChar; }); for (size_t pos = 0; pos < sentencepiece.size(); ++pos) { const char32 c = sentencepiece[pos]; @@ -223,7 +232,6 @@ bool TrainerInterface::IsValidSentencePiece( if (c == 0x0000) { // NULL is not allowed for Darts (TRIE). return false; } - // kUPPBoundaryChar is included when split_by_upp_for_training is true. if (c == kUPPBoundaryChar) { return false; } @@ -236,25 +244,30 @@ bool TrainerInterface::IsValidSentencePiece( } if (c == kWSChar) { - // Only allows whitespace to appear as a prefix of piece. + // Only allows whitespace to appear as a prefix of piece unless + // allow_whitespace_only_pieces is True. // When split_by_whitespace is false, we allow whitespaces to // appear in the middle, "foo_bar", but do not allow them // to appear as suffix, "foo_bar_". // Regardless of the setting of split_by_whitespace, // whitespace is treated as a prefix/infix of symbol or - // independent symbol. - if (trainer_spec_.treat_whitespace_as_suffix()) { - if ((trainer_spec_.split_by_whitespace() && - pos < sentencepiece.size() - 1) || - (!trainer_spec_.split_by_whitespace() && - pos < sentencepiece.size() - 1 && pos == 0)) { - return false; - } - } else { - if ((trainer_spec_.split_by_whitespace() && pos > 0) || - (!trainer_spec_.split_by_whitespace() && pos > 0 && - pos == sentencepiece.size() - 1)) { - return false; + // independent symbol, unless allow_whitespace_only_pieces() is true, + // in which case whitespace only pieces can occur. + if (!trainer_spec_.allow_whitespace_only_pieces() || + !all_whitespace_piece) { + if (trainer_spec_.treat_whitespace_as_suffix()) { + if ((trainer_spec_.split_by_whitespace() && + pos < sentencepiece.size() - 1) || + (!trainer_spec_.split_by_whitespace() && + pos < sentencepiece.size() - 1 && pos == 0)) { + return false; + } + } else { + if ((trainer_spec_.split_by_whitespace() && pos > 0) || + (!trainer_spec_.split_by_whitespace() && pos > 0 && + pos == sentencepiece.size() - 1)) { + return false; + } } } } else { @@ -264,13 +277,15 @@ bool TrainerInterface::IsValidSentencePiece( if (s == unicode_script::U_Hiragana || s == unicode_script::U_Katakana || c == 0x30FC) { // long vowel sound (Katakana) should be Katakana s = unicode_script::U_Han; + } else if (s == unicode_script::U_Inherited) { + s = prev_script; } - if (!trainer_spec_.split_by_number() && is_number(c)) { + if (!trainer_spec_.split_by_number() && is_unicode_decimal_number(c)) { s = kAnyType; } - if (trainer_spec_.split_digits() && is_number(c)) { + if (trainer_spec_.split_digits() && is_unicode_decimal_number(c)) { if (sentencepiece.size() > 1) return false; } @@ -287,6 +302,22 @@ bool TrainerInterface::IsValidSentencePiece( return true; } +template +void AddDPNoise(const TrainerSpec &trainer_spec, absl::SharedBitGen &generator, + T *to_update) { + if (trainer_spec.differential_privacy_noise_level() > 0) { + float random_num = absl::Gaussian( + generator, 0, trainer_spec.differential_privacy_noise_level()); + + *to_update = + std::round(std::max(0.f, random_num + static_cast(*to_update))); + } + // Clip anything below the clipping threshold to 0. + if (*to_update < trainer_spec.differential_privacy_clipping_threshold()) { + *to_update = 0; + } +} + util::Status TrainerInterface::LoadSentences() { RETURN_IF_ERROR(status()); CHECK_OR_RETURN(sentences_.empty()); @@ -378,6 +409,7 @@ util::Status TrainerInterface::LoadSentences() { LOG(INFO) << "Sampled " << sentences_.size() << " sentences from " << selector.total_size() << " sentences."; } + if (too_long_lines > 0) LOG(INFO) << "Skipped " << too_long_lines << " too long sentences."; if (self_test_samples_.size() > 0) @@ -421,6 +453,54 @@ util::Status TrainerInterface::LoadSentences() { } } + // If DP is required, add the noise/clip the input. + if (trainer_spec_.enable_differential_privacy()) { + if (trainer_spec_.input_format() != "tsv") { + LOG(ERROR) + << "Dp version will not work correctly with text input format."; + } + if (trainer_spec_.differential_privacy_noise_level() <= 0) { + LOG(WARNING) << "Private version with <=0 noise level will give " + "infinity epsilon guarantees."; + } + if (trainer_spec_.differential_privacy_clipping_threshold() <= 0) { + LOG(WARNING) << "Private version with <=0 clipping threshold will give " + "infinity epsilon guarantees."; + } + + // Add noise to all the sentences via threadpool. + + // This line is mainly for tests with small num of sentences. + const auto num_workers = + std::min(trainer_spec_.num_threads(), sentences_.size() - 1); + + { + auto pool = absl::make_unique(num_workers); + pool->StartWorkers(); + for (int n = 0; n < num_workers; ++n) { + pool->Schedule([&, n]() { + // One per thread generator. + absl::SharedBitGen generator; + for (size_t i = n; i < sentences_.size(); i += num_workers) { + AddDPNoise(trainer_spec_, generator, + &(sentences_[i].second)); + } + }); + } + } + + // Remove zero freq elements. + const auto before_size = sentences_.size(); + auto it = std::remove_if(sentences_.begin(), sentences_.end(), + [](const Sentence &s) { return s.second <= 0; }); + const auto new_size = std::distance(sentences_.begin(), it); + const int num_erased = before_size - new_size; + sentences_.erase(it, sentences_.end()); + + LOG(INFO) << "DP noise resulted in " << 1.0 * num_erased / before_size + << " fraction of sentences removed."; + } + // Count character frequencies. int64 all_chars_count = 0; // A map from a character to {is_required_char, character count}. @@ -519,7 +599,8 @@ void TrainerInterface::SplitSentencesByWhitespace() { absl::flat_hash_map tokens; for (const auto &s : sentences_) { for (const auto &w : - SplitIntoWords(s.first, trainer_spec_.treat_whitespace_as_suffix())) { + SplitIntoWords(s.first, trainer_spec_.treat_whitespace_as_suffix(), + trainer_spec_.allow_whitespace_only_pieces())) { tokens[std::string(w)] += s.second; } } @@ -604,6 +685,7 @@ util::Status TrainerInterface::Serialize(ModelProto *model_proto) const { util::Status TrainerInterface::SaveModel(absl::string_view filename) const { LOG(INFO) << "Saving model: " << filename; ModelProto model_proto; + RETURN_IF_ERROR(Serialize(&model_proto)); auto output = filesystem::NewWritableFile(filename.data(), true); @@ -619,6 +701,14 @@ util::Status TrainerInterface::SaveVocab(absl::string_view filename) const { auto output = filesystem::NewWritableFile(filename); RETURN_IF_ERROR(output->status()); + for (const auto &piece : model_proto.pieces()) { + if (piece.piece().find_first_of(" \t\r\n") != std::string::npos) { + LOG(WARNING) << "The piece [" << piece.piece() + << "] contains escaped characters that break the format of " + << filename; + } + } + if (trainer_spec_.vocabulary_output_piece_score()) { for (const auto &piece : model_proto.pieces()) { std::ostringstream os; @@ -671,19 +761,19 @@ util::Status TrainerInterface::InitMetaPieces() { std::set dup; int id = 0; - auto insert_meta_symbol = [&id, &dup, this]( - const std::string &w, - ModelProto::SentencePiece::Type type) -> bool { + auto insert_meta_symbol = + [&id, &dup, this](const std::string &w, + ModelProto::SentencePiece::Type type) -> util::Status { if (!dup.insert(w).second) { - LOG(ERROR) << w << " is already defined."; - return false; + return util::InternalError(absl::StrCat( + w, " is already defined. duplicated symbols are not allowed.")); } if (w == trainer_spec_.unk_piece()) { - LOG(ERROR) << trainer_spec_.unk_piece() - << " must not be defined with --control_symbols and " - "--user_defined_symbols."; - return false; + return util::InternalError( + absl::StrCat(trainer_spec_.unk_piece(), + " must not be defined with --control_symbols and " + "--user_defined_symbols.")); } if (w == trainer_spec_.bos_piece() && trainer_spec_.bos_id() >= 0) { @@ -696,21 +786,22 @@ util::Status TrainerInterface::InitMetaPieces() { while (meta_pieces_.find(id) != meta_pieces_.end()) ++id; meta_pieces_[id] = std::make_pair(w, type); } - return true; + + return util::OkStatus(); }; for (const auto &w : trainer_spec_.control_symbols()) { - CHECK_OR_RETURN(insert_meta_symbol(w, ModelProto::SentencePiece::CONTROL)); + RETURN_IF_ERROR(insert_meta_symbol(w, ModelProto::SentencePiece::CONTROL)); } for (const auto &w : trainer_spec_.user_defined_symbols()) { - CHECK_OR_RETURN( + RETURN_IF_ERROR( insert_meta_symbol(w, ModelProto::SentencePiece::USER_DEFINED)); } if (trainer_spec_.byte_fallback()) { for (int i = 0; i < 256; ++i) { - CHECK_OR_RETURN( + RETURN_IF_ERROR( insert_meta_symbol(ByteToPiece(i), ModelProto::SentencePiece::BYTE)); } } diff --git a/src/trainer_interface.h b/src/trainer_interface.h index f66d59a4..8d625a95 100644 --- a/src/trainer_interface.h +++ b/src/trainer_interface.h @@ -107,16 +107,16 @@ class TrainerInterface { FRIEND_TEST(TrainerInterfaceTest, SerializeTest); FRIEND_TEST(TrainerInterfaceTest, CharactersTest); + // Loads all sentences from spec.input() or SentenceIterator. + // It loads at most input_sentence_size sentences. + util::Status LoadSentences(); + protected: // Returns true if |piece| is valid sentence piece. // The result is affected by // max_sentencepiece_length, split_by_whiespace, split_by_unicode_script. bool IsValidSentencePiece(const string_util::UnicodeText &piece) const; - // Loads all sentences from spec.input() or SentenceIterator. - // It loads at most input_sentence_size sentences. - util::Status LoadSentences(); - // Splits all sentencecs by whitespaces and // replace the |sentences_| with tokenized string. // e.g., diff --git a/src/trainer_interface_test.cc b/src/trainer_interface_test.cc index c61c7ce0..feb970f2 100644 --- a/src/trainer_interface_test.cc +++ b/src/trainer_interface_test.cc @@ -12,13 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License.! +#include "trainer_interface.h" + #include #include "filesystem.h" #include "testharness.h" #include "third_party/absl/strings/str_cat.h" #include "third_party/absl/strings/str_format.h" -#include "trainer_interface.h" #include "util.h" namespace sentencepiece { @@ -72,15 +73,20 @@ TEST(TrainerInterfaceTest, IsValidSentencePieceTest) { EXPECT_FALSE(IsValid("F1")); EXPECT_FALSE(IsValid("1F")); EXPECT_FALSE(IsValid("1A2")); - EXPECT_TRUE(IsValid("$10")); // $ and 1 are both "common" script. + EXPECT_TRUE(IsValid("$10")); // $ and 1 are both "common" script. EXPECT_FALSE(IsValid("$ABC")); EXPECT_FALSE(IsValid("ab\tbc")); // "\t" is UPP boundary. EXPECT_FALSE(IsValid("ab cd")); EXPECT_FALSE(IsValid("\0\0")); EXPECT_FALSE(IsValid("\0")); + EXPECT_TRUE(IsValid("proteïni")); // Combining Diaeresis should inherit + // script from base character. + EXPECT_TRUE(IsValid("ثَبَّتَ")); // Arabic Fatha and Shadda should inherit script + // from base character. trainer_spec.set_split_by_whitespace(false); EXPECT_TRUE(IsValid(WS)); + EXPECT_TRUE(IsValid(WS WS WS "a")); EXPECT_TRUE(IsValid(WS "a")); EXPECT_FALSE(IsValid("a" WS)); EXPECT_FALSE(IsValid(WS "a" WS)); @@ -88,7 +94,17 @@ TEST(TrainerInterfaceTest, IsValidSentencePieceTest) { EXPECT_TRUE(IsValid(WS "a" WS "b")); EXPECT_TRUE(IsValid(WS "a" WS "b" WS "c")); EXPECT_FALSE(IsValid("a" WS "b" WS)); + EXPECT_FALSE(IsValid(WS WS)); + EXPECT_FALSE(IsValid(WS WS WS)); + + trainer_spec.set_allow_whitespace_only_pieces(true); + EXPECT_TRUE(IsValid(WS)); + EXPECT_TRUE(IsValid(WS WS)); + EXPECT_TRUE(IsValid(WS WS WS)); + EXPECT_TRUE(IsValid(WS WS "a")); + EXPECT_FALSE(IsValid("a" WS WS)); // suffix whitespace illegal without flag + trainer_spec.set_allow_whitespace_only_pieces(false); trainer_spec.set_split_by_unicode_script(false); EXPECT_TRUE(IsValid("あいう")); EXPECT_TRUE(IsValid("グーグル")); @@ -98,6 +114,26 @@ TEST(TrainerInterfaceTest, IsValidSentencePieceTest) { EXPECT_TRUE(IsValid("$10")); EXPECT_TRUE(IsValid("$ABC")); + trainer_spec.set_split_by_unicode_script(true); + trainer_spec.set_split_by_number(true); + EXPECT_FALSE(IsValid("F1")); + EXPECT_TRUE(IsValid("$10")); + + trainer_spec.set_split_by_unicode_script(true); + trainer_spec.set_split_by_number(false); + EXPECT_TRUE(IsValid("F1")); + EXPECT_TRUE(IsValid("$10")); + + trainer_spec.set_split_by_unicode_script(false); + trainer_spec.set_split_by_number(true); + EXPECT_TRUE(IsValid("F1")); + EXPECT_TRUE(IsValid("$10")); + + trainer_spec.set_split_by_unicode_script(false); + trainer_spec.set_split_by_number(false); + EXPECT_TRUE(IsValid("F1")); + EXPECT_TRUE(IsValid("$10")); + trainer_spec.set_max_sentencepiece_length(4); EXPECT_TRUE(IsValid("1234")); EXPECT_FALSE(IsValid("12345")); @@ -124,6 +160,15 @@ TEST(TrainerInterfaceTest, IsValidSentencePieceTest) { EXPECT_FALSE(IsValid(WS "a" WS "b")); EXPECT_FALSE(IsValid("a" WS "b" WS)); + trainer_spec.set_allow_whitespace_only_pieces(true); + EXPECT_TRUE(IsValid(WS)); + EXPECT_TRUE(IsValid(WS WS)); + EXPECT_FALSE(IsValid(WS "a" WS)); + EXPECT_FALSE(IsValid("a" WS "b")); + EXPECT_FALSE(IsValid(WS "a" WS "b")); + EXPECT_FALSE(IsValid("a" WS "b" WS)); + + trainer_spec.set_allow_whitespace_only_pieces(false); trainer_spec.set_split_by_whitespace(false); EXPECT_TRUE(IsValid(WS)); EXPECT_FALSE(IsValid(WS "a")); @@ -146,6 +191,12 @@ TEST(TrainerInterfaceTest, IsValidSentencePieceTest) { EXPECT_FALSE(IsValid("2007")); EXPECT_FALSE(IsValid("x1")); EXPECT_FALSE(IsValid("2x")); + // Fullwidth digits. + EXPECT_TRUE(IsValid("1")); + EXPECT_FALSE(IsValid("59")); + EXPECT_FALSE(IsValid("2007")); + EXPECT_FALSE(IsValid("*1")); + EXPECT_FALSE(IsValid("2*")); } TEST(TrainerInterfaceTest, OverrideSpecialPiecesTest) { diff --git a/src/unicode_script.h b/src/unicode_script.h index 917c49d2..67042c00 100644 --- a/src/unicode_script.h +++ b/src/unicode_script.h @@ -19,7 +19,7 @@ namespace sentencepiece { namespace unicode_script { -enum ScriptType { +enum ScriptType : int32_t { U_Adlam, U_Ahom, U_Anatolian_Hieroglyphs, diff --git a/src/unigram_model.cc b/src/unigram_model.cc index bd2d99b2..d9f1ce9d 100644 --- a/src/unigram_model.cc +++ b/src/unigram_model.cc @@ -12,19 +12,22 @@ // See the License for the specific language governing permissions and // limitations under the License.! +#include "unigram_model.h" + #include #include #include +#include #include #include #include #include #include +#include "third_party/absl/container/flat_hash_map.h" #include "third_party/absl/memory/memory.h" #include "third_party/absl/strings/str_split.h" #include "third_party/absl/strings/string_view.h" -#include "unigram_model.h" #include "util.h" namespace sentencepiece { @@ -55,6 +58,17 @@ inline float LogSumExp(float x, float y, bool init_mode) { return vmax + log(std::exp(static_cast(vmin - vmax)) + 1.0); } } + +// Returns a sample from a standard Gumbel distribution. +// If U ~ U[0, 1], -log(-log U) ~ G(0,1) +inline float Gumbel() { + const float kEpsilon = 1e-7; + auto *mt = random::GetRandomGenerator(); + std::uniform_real_distribution dis(0.0, 1.0); + float noise = -std::log(-(std::log(dis(*mt) + kEpsilon))); + + return noise; +} } // namespace Lattice::Lattice() : node_allocator_(kPreallocateLatticeNodeSize) {} @@ -145,7 +159,7 @@ Lattice::Node *Lattice::Insert(int pos, int length) { return node; } -std::vector Lattice::Viterbi() { +Lattice::LatticePathWithScore Lattice::Viterbi() { const int len = size(); for (int pos = 0; pos <= len; ++pos) { @@ -171,6 +185,7 @@ std::vector Lattice::Viterbi() { // backtrace std::vector results; + float score = begin_nodes(len)[0]->backtrace_score; for (Node *node = begin_nodes_[len][0]->prev; node->prev != nullptr; node = node->prev) { results.push_back(node); @@ -178,30 +193,33 @@ std::vector Lattice::Viterbi() { std::reverse(results.begin(), results.end()); - return results; -} + LatticePathWithScore retval = {results, score}; -float Lattice::PopulateMarginal(float freq, - std::vector *expected) const { - if (expected == nullptr) return 0.0; + return retval; +} +std::vector Lattice::ForwardAlgorithm(float inv_theta) const { const int len = size(); - - // alpha and beta (accumulative log prob) in Forward Backward. - // the index of alpha/beta is Node::node_id. std::vector alpha(node_allocator_.size(), 0.0); - std::vector beta(node_allocator_.size(), 0.0); for (int pos = 0; pos <= len; ++pos) { for (Node *rnode : begin_nodes_[pos]) { for (Node *lnode : end_nodes_[pos]) { - alpha[rnode->node_id] = LogSumExp(alpha[rnode->node_id], - lnode->score + alpha[lnode->node_id], - lnode == end_nodes_[pos][0]); + alpha[rnode->node_id] = + LogSumExp(alpha[rnode->node_id], + inv_theta * lnode->score + alpha[lnode->node_id], + lnode == end_nodes_[pos][0]); } } } + return alpha; +} + +std::vector Lattice::BackwardAlgorithm(float inv_theta) const { + const int len = size(); + std::vector beta(node_allocator_.size(), 0.0); + for (int pos = len; pos >= 0; --pos) { for (Node *lnode : end_nodes_[pos]) { for (Node *rnode : begin_nodes_[pos]) { @@ -212,6 +230,21 @@ float Lattice::PopulateMarginal(float freq, } } + return beta; +} + +float Lattice::PopulateMarginal(float freq, + std::vector *expected) const { + if (expected == nullptr) return 0.0; + + const int len = size(); + + // alpha and beta (accumulative log prob) in Forward Backward. + // the index of alpha/beta is Node::node_id. + + const auto alpha = ForwardAlgorithm(1.0); + const auto beta = BackwardAlgorithm(1.0); + const float Z = alpha[begin_nodes_[len][0]->node_id]; for (int pos = 0; pos < len; ++pos) { for (Node *node : begin_nodes_[pos]) { @@ -228,13 +261,97 @@ float Lattice::PopulateMarginal(float freq, return freq * Z; } -std::vector> Lattice::NBest(size_t nbest_size) { +float Lattice::CalculateEntropy(float inv_theta) const { + const int len = size(); + + // alpha[node_id] is the marginal prob of sequence up to start of node + // H is entropy of sequence + // the index of alpha/H is Node::node_id. + std::vector H(node_allocator_.size(), 0.0); + + // Populate the forward marginals to get the normalising constant + const auto alpha = ForwardAlgorithm(inv_theta); + + // Now populate the forward entropies + for (int pos = 0; pos <= len; ++pos) { + for (Node *rnode : begin_nodes_[pos]) { + for (Node *lnode : end_nodes_[pos]) { + // Contribution each lnode makes = p(lnode) * (H(lnode) + log p(lnode)) + + // We have to normalise p(lnode) by the marginal contribution it makes + const float lnode_transition_prob = + ((inv_theta * lnode->score) + alpha[lnode->node_id] - + alpha[rnode->node_id]); + H[rnode->node_id] += std::exp(lnode_transition_prob) * + (H[lnode->node_id] + lnode_transition_prob); + } + } + } + + return -H[begin_nodes_[len][0]->node_id]; +} + +namespace { + +// The node structure to support A* algorithm in Lattice::NBest() +struct Hypothesis { + Lattice::Node *node; + Hypothesis *next; + float fx; // the priority to pop a new hypothesis from the priority queue. + float gx; // the sum of scores from EOS to the left-most node in x. +}; + +// Helper function for cloning a Hypothesis and the ones on their next paths. +// The graph structure is preserved. +// +// to_clone: the Hypothesis to clone. +// clone_map: mapping between the old pointers and the new pointers. +// allocator: allocate and own the cloned Hypothesis. +// +// Returns the cloned Hypothesis*. All Hypothesis on its "next" chain are also +// guaranteed to have been allocated via "allocator", and "clone_map" is updated +// with all new mappings. +Hypothesis *CloneHypAndDependents( + const Hypothesis *to_clone, + absl::flat_hash_map *clone_map, + model::FreeList *allocator) { + Hypothesis *cloned = nullptr; + Hypothesis **result_callback = &cloned; + + // Iteratively clone "to_clone" and its dependencies. + // The new pointer will be written back to *result_callback. + while (to_clone != nullptr) { + // If "to_clone" has already been cloned before, we just look up the result. + auto iter = clone_map->find(to_clone); + if (iter != clone_map->end()) { + *result_callback = iter->second; + break; + } + + // Allocate a new Hypothesis and copy the values. + Hypothesis *new_hyp = allocator->Allocate(); + *new_hyp = *to_clone; + *result_callback = new_hyp; + clone_map->insert({to_clone, new_hyp}); + + // Move on to clone "to_clone->next". + to_clone = to_clone->next; + result_callback = &(new_hyp->next); + } + return cloned; +} + +} // namespace + +std::vector Lattice::NBest(size_t nbest_size, + bool sample, + float inv_theta) { if (nbest_size < 1) { LOG(WARNING) << "nbest_size >= 1. Returns empty result."; return {}; } - if (nbest_size == 1) { + if (nbest_size == 1 && !sample) { return {Viterbi()}; } @@ -243,17 +360,12 @@ std::vector> Lattice::NBest(size_t nbest_size) { // At each partial path x, compute f(x) as follows // f(x) = g(x) + h(x). // g(x): the sum of scores from EOS to the left-most node in x. + // for a complete hypothesis, g(hyp) is the score of the hypothesis. // h(x): a heuristic that estimates the largest score from x to BOS. // f(x): the priority to pop a new hypothesis from the priority queue. // // As left-to-right Viterbi search can tell the *exact* value of h(x), // we can obtain the exact n-best results with A*. - struct Hypothesis { - Node *node; - Hypothesis *next; - float fx; - float gx; - }; class HypothesisComparator { public: @@ -268,18 +380,29 @@ std::vector> Lattice::NBest(size_t nbest_size) { model::FreeList hypothesis_allocator(kPreallocatedHypothesisSize); Agenda agenda; - std::vector> results; + std::vector results; auto *eos = hypothesis_allocator.Allocate(); eos->node = eos_node(); eos->next = nullptr; - eos->fx = eos->node->score; - eos->gx = eos->node->score; - agenda.push(eos); + eos->gx = 0.0; - // Run Viterbi first to fill backtrace score. - Viterbi(); + std::vector alpha(node_allocator_.size(), 0.0); + if (sample) { + // Run forwards algorithm to get normalising constants + alpha = ForwardAlgorithm(inv_theta); + // f(eos) = Gumbel(0), as it is the perturbed score of the entire lattice. + eos->fx = Gumbel(); + } else { + // Run Viterbi first to fill backtrace score. + Viterbi(); + eos->fx = eos->node->backtrace_score; + } + agenda.push(eos); + + int shrink_count = 0; // Number of times agenda has shrunk. For logging only. + bool printed_memory_warning = false; // For logging only. while (!agenda.empty()) { auto *top = agenda.top(); agenda.pop(); @@ -289,61 +412,110 @@ std::vector> Lattice::NBest(size_t nbest_size) { if (node == bos_node()) { results.resize(results.size() + 1); for (auto *n = top->next; n->next != nullptr; n = n->next) { - results.back().push_back(n->node); + results.back().first.push_back(n->node); } + results.back().second = top->fx; if (results.size() == nbest_size) { break; } continue; } + const int end_nodes_size = end_nodes(node->pos).size(); + std::vector probs(end_nodes_size, 0.0); + std::vector perturbed_probs(end_nodes_size, 0.0); + std::vector adjusted_probs(end_nodes_size, 0.0); + const float Z = alpha[node->node_id]; + if (sample) { + float max_score = -1e8; + // Calculate the marginal and perturbed scores for stochastic search + for (int i = 0; i < end_nodes(node->pos).size(); i++) { + Node *lnode = end_nodes(node->pos)[i]; + // Calculate backwards transition score + probs[i] = + top->gx + alpha[lnode->node_id] + (inv_theta * lnode->score) - Z; + perturbed_probs[i] = probs[i] + Gumbel(); + if (perturbed_probs[i] > max_score) { + max_score = perturbed_probs[i]; + } + } + // Now constrain the sampled continuations to match the score of parent + for (int i = 0; i < adjusted_probs.size(); i++) { + // Use numerically stable version of truncated Gumbel: + // https://arxiv.org/pdf/1903.06059.pdf appendix B.3 + const float v = top->fx - perturbed_probs[i] + + std::log1p(-std::exp(perturbed_probs[i] - max_score)); + adjusted_probs[i] = top->fx - std::max(static_cast(0.0), v) - + std::log1p(std::exp(-std::abs(v))); + } + } + // Expands new node ending at node->pos - for (Node *lnode : end_nodes(node->pos)) { + for (int i = 0; i < end_nodes(node->pos).size(); i++) { + Node *lnode = end_nodes(node->pos)[i]; auto *hyp = hypothesis_allocator.Allocate(); hyp->node = lnode; - hyp->gx = lnode->score + top->gx; // just adds node->score - hyp->fx = - lnode->backtrace_score + top->gx; // backtrace_score is h(node). + if (sample) { + hyp->gx = probs[i]; + hyp->fx = adjusted_probs[i]; + } else { + hyp->gx = lnode->score + top->gx; // just adds node->score + hyp->fx = + lnode->backtrace_score + top->gx; // backtrace_score is h(node). + } hyp->next = top; agenda.push(hyp); } + static constexpr int kOneBillion = 1000000000; // 10^9. + if (hypothesis_allocator.size() >= kOneBillion) { + if (!printed_memory_warning) { + printed_memory_warning = true; + LOG(WARNING) << "Allocator size exceeds " << kOneBillion + << " with an example of length " << this->size(); + } + } + // When the input is too long or contains duplicated phrases, // `agenda` will get extremely big. Here we avoid this case by // dynamically shrinking the agenda. - constexpr int kMaxAgendaSize = 100000; + constexpr int kMaxAgendaSize = 10000; constexpr int kMinAgendaSize = 512; if (agenda.size() >= kMaxAgendaSize) { - LOG(WARNING) << "Too big agenda. shrinking"; // Keeps the top `kMinAgendaSize` hypothesis. Agenda new_agenda; + // Keeps the top hypothesis and the ones on their "next" paths. + model::FreeList new_allocator(kPreallocatedHypothesisSize); + // Map between old Hypothesis* and new Hypothesis*. + absl::flat_hash_map clone_map; + const int size = std::min(kMinAgendaSize, nbest_size * 10); + shrink_count++; + LOG(WARNING) << "Too big agenda size " << agenda.size() + << ". Shrinking (round " << shrink_count << ") down to " + << size << "."; for (int i = 0; i < size; ++i) { - new_agenda.push(agenda.top()); + const Hypothesis *top_hyp = agenda.top(); + Hypothesis *cloned_hyp = + CloneHypAndDependents(top_hyp, &clone_map, &new_allocator); + new_agenda.push(cloned_hyp); agenda.pop(); } agenda = std::move(new_agenda); + hypothesis_allocator.swap(new_allocator); } } return results; } -std::vector Lattice::Sample(float theta) { +std::vector Lattice::Sample(float inv_theta) { const int len = size(); if (len == 0) return {}; std::vector alpha(node_allocator_.size(), 0.0); - for (int pos = 0; pos <= len; ++pos) { - for (Node *rnode : begin_nodes_[pos]) { - for (Node *lnode : end_nodes_[pos]) { - alpha[rnode->node_id] = LogSumExp( - alpha[rnode->node_id], theta * lnode->score + alpha[lnode->node_id], - lnode == end_nodes_[pos][0]); - } - } - } + alpha = ForwardAlgorithm(inv_theta); auto *mt = random::GetRandomGenerator(); @@ -355,8 +527,8 @@ std::vector Lattice::Sample(float theta) { while (true) { probs.clear(); for (const Node *lnode : end_nodes_[node->pos]) { - probs.push_back(std::exp(static_cast(alpha[lnode->node_id] + - theta * lnode->score - Z))); + probs.push_back(std::exp(static_cast( + alpha[lnode->node_id] + inv_theta * lnode->score - Z))); } std::discrete_distribution dist(probs.begin(), probs.end()); node = end_nodes_[node->pos][dist(*mt)]; @@ -514,7 +686,7 @@ EncodeResult Model::Encode(absl::string_view normalized) const { PopulateNodes(&lattice); EncodeResult results; - for (const auto *node : lattice.Viterbi()) { + for (const auto *node : lattice.Viterbi().first) { results.emplace_back(node->piece, node->id); } @@ -529,26 +701,28 @@ NBestEncodeResult Model::NBestEncode(absl::string_view normalized, nbest_size = std::max(1, std::min(nbest_size, 1024)); + if (nbest_size <= 1) { + return {std::pair(Encode(normalized), 0.0)}; + } + Lattice lattice; lattice.SetSentence(normalized); PopulateNodes(&lattice); NBestEncodeResult nbest_results; - for (const auto &nbest : lattice.NBest(nbest_size)) { + for (const auto &nbest : lattice.NBest(nbest_size, false, 0.0)) { EncodeResult results; - float score = 0.0; - for (const auto *node : nbest) { - score += node->score; + for (const auto *node : nbest.first) { results.emplace_back(node->piece, node->id); } - nbest_results.emplace_back(results, score); + nbest_results.emplace_back(results, nbest.second); } return nbest_results; } EncodeResult Model::SampleEncode(absl::string_view normalized, - float theta) const { + float inv_theta) const { if (!status().ok() || normalized.empty()) { return {}; } @@ -558,13 +732,129 @@ EncodeResult Model::SampleEncode(absl::string_view normalized, PopulateNodes(&lattice); EncodeResult results; - for (const auto *node : lattice.Sample(theta)) { + for (const auto *node : lattice.Sample(inv_theta)) { results.emplace_back(node->piece, node->id); } return results; } +NBestEncodeResult Model::SampleEncodeAndScore(absl::string_view normalized, + float inv_theta, int samples, + bool wor, + bool include_best) const { + if (!status().ok() || normalized.empty()) { + return {}; + } + NBestEncodeResult results; + Lattice lattice; + lattice.SetSentence(normalized); + PopulateNodes(&lattice); + + const std::vector alpha = lattice.ForwardAlgorithm(inv_theta); + const float marginal = alpha[lattice.eos_node()->node_id]; + + if (include_best) { + if (!wor) { + LOG(ERROR) << "include_best not supported for wor false"; + return {}; + } + EncodeResult result; + const auto best_path = lattice.Viterbi(); + for (const auto *node : best_path.first) { + result.emplace_back(node->piece, node->id); + } + + // Inclusion probability if we always include the best is 1. + results.emplace_back(result, 0.0); + } + + if (wor) { + // Draw k+1 samples as we need perturbed score of k+1th element + auto nbest_samples = lattice.NBest(samples + 1, true, inv_theta); + + if (include_best) { + std::vector> nbest_paths( + nbest_samples.size()); + for (int i = 0; i < nbest_samples.size(); i++) { + nbest_paths[i] = nbest_samples[i].first; + } + // Remove the best result from the samples if necessary + const auto best_path = lattice.Viterbi(); + + const int index_of_best = + (std::find(nbest_paths.begin(), nbest_paths.end(), best_path.first) - + nbest_paths.begin()); + + if (index_of_best != nbest_samples.size()) { + nbest_samples.erase(nbest_samples.begin() + index_of_best); + } else { + nbest_samples.pop_back(); + } + } + // We use the perturbed score of the k+1th element to calculate the + // inclusion probability. + const double kappa = static_cast(nbest_samples.back().second); + // Discard the last sample + nbest_samples.pop_back(); + for (const auto &nbest : nbest_samples) { + EncodeResult result; + float score = 0.0; + + for (const auto *node : nbest.first) { + score += (inv_theta * node->score); + result.emplace_back(node->piece, node->id); + } + + results.emplace_back(result, score - marginal); + } + + // Now calculate the inclusion probability + for (auto &it : results) { + // Only modify non best sample inclusion probabilities. + if (it.second != 0.0) { + const double x = it.second - kappa; + const double y = std::exp(x); + double inclusion_prob; + if (x <= -10) { + // Series expansion of the log Gumbel survival function up to eps. + inclusion_prob = + x - (y / 2) + (std::pow(y, 2) / 24) - std::pow(y, 4) / 2880; + } else { + inclusion_prob = std::log(-std::expm1(-y)); + } + it.second = static_cast(inclusion_prob); + } + } + } else { + while (results.size() < samples) { + Lattice lattice; + lattice.SetSentence(normalized); + PopulateNodes(&lattice); + + float score = 0.0; + EncodeResult result; + const std::vector sample = lattice.Sample(inv_theta); + for (const auto *node : sample) { + result.emplace_back(node->piece, node->id); + score += (inv_theta * node->score); + } + results.emplace_back(result, score - marginal); + } + } + + return results; +} + +float Model::CalculateEntropy(absl::string_view normalized, + float inv_theta) const { + Lattice lattice; + lattice.SetSentence(normalized); + PopulateNodes(&lattice); + + return lattice.CalculateEntropy(inv_theta); +} + bool Model::VerifyOutputsEquivalent(absl::string_view expected, absl::string_view actual) const { auto compute_unigram_model_score = diff --git a/src/unigram_model.h b/src/unigram_model.h index 2f66a5f3..aa4f28f3 100644 --- a/src/unigram_model.h +++ b/src/unigram_model.h @@ -82,17 +82,28 @@ class Lattice { // After calling this method, The caller must set Node::score and Node::id. Node *Insert(int pos, int length); + using LatticePathWithScore = std::pair, float>; + // Returns Viterbi path. All nodes must be populated in advance. - std::vector Viterbi(); + LatticePathWithScore Viterbi(); + + // Runs forwards/backwards algorithm, returns vector with normalised + // transition probs. + std::vector ForwardAlgorithm(float theta) const; + std::vector BackwardAlgorithm(float theta) const; // Returns n-best results. - std::vector> NBest(size_t nbest_size); + std::vector NBest(size_t nbest_size, bool sample, + float theta); // Samples one path from the lattice according to the // generation probability (Product of piece probabilities). // `theta` is a smoothing parameter. std::vector Sample(float theta); + // Calculates the entropy of the lattice. + float CalculateEntropy(float theta) const; + // Populates marginal probability of every node in this lattice. // |freq| is the frequency of the sentence. // for (auto *node : all_nodes_) { @@ -127,8 +138,19 @@ class Model : public ModelInterface { EncodeResult SampleEncode(absl::string_view normalized, float theta) const override; + NBestEncodeResult SampleEncodeAndScore(absl::string_view normalized, + float theta, int samples, bool wor, + bool include_best) const override; + + float CalculateEntropy(absl::string_view normalized, + float theta) const override; + bool IsSampleEncodeAvailable() const override { return true; } + bool IsSampleEncodeAndScoreAvailable() const override { return true; } + + bool IsCalculateEntropyAvailable() const override { return true; } + bool IsNBestEncodeAvailable() const override { return true; } // Returns the minimum score in sentence pieces. @@ -151,6 +173,18 @@ class Model : public ModelInterface { bool VerifyOutputsEquivalent(absl::string_view expected, absl::string_view actual) const override; + enum EncoderVersion { + kOptimized, // The optimized encoder. + kOriginal // The original encoder. + }; + + void SetEncoderVersion(EncoderVersion encoder_version) { + encoder_version_ = encoder_version; + } + + // Returns the current encoder version in use. + EncoderVersion GetEncoderVersion() const { return encoder_version_; } + protected: // Builds a Trie index. void BuildTrie(std::vector> *pieces); @@ -173,6 +207,9 @@ class Model : public ModelInterface { // Maximum size of the return value of Trie, which corresponds // to the maximum size of shared common prefix in the sentence pieces. int trie_results_size_; + + // encoder version. + EncoderVersion encoder_version_ = kOptimized; }; } // namespace unigram diff --git a/src/unigram_model_test.cc b/src/unigram_model_test.cc index dacec38d..bf22da30 100644 --- a/src/unigram_model_test.cc +++ b/src/unigram_model_test.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License.! +#include "unigram_model.h" + #include #include #include @@ -22,7 +24,6 @@ #include "testharness.h" #include "third_party/absl/strings/str_cat.h" #include "third_party/absl/strings/str_join.h" -#include "unigram_model.h" #include "util.h" namespace sentencepiece { @@ -161,11 +162,11 @@ TEST(LatticeTest, InsertTest) { TEST(LatticeTest, ViterbiFromIncompleteLatticeTest) { Lattice lattice; lattice.SetSentence("ABC"); - EXPECT_TRUE(lattice.Viterbi().empty()); + EXPECT_TRUE(lattice.Viterbi().first.empty()); // Still incomplete lattice.Insert(0, 1); - EXPECT_TRUE(lattice.Viterbi().empty()); + EXPECT_TRUE(lattice.Viterbi().first.empty()); lattice.Insert(1, 1); lattice.Insert(2, 1); @@ -198,16 +199,16 @@ TEST(LatticeTest, ViterbiTest) { InsertWithScore(&lattice, 0, 1, 0.0); // A InsertWithScore(&lattice, 1, 1, 0.0); // B InsertWithScore(&lattice, 2, 1, 0.0); // C - EXPECT_EQ("A B C", GetTokenized(lattice.Viterbi())); + EXPECT_EQ("A B C", GetTokenized(lattice.Viterbi().first)); InsertWithScore(&lattice, 0, 2, 2.0); // AB - EXPECT_EQ("AB C", GetTokenized(lattice.Viterbi())); + EXPECT_EQ("AB C", GetTokenized(lattice.Viterbi().first)); InsertWithScore(&lattice, 1, 2, 5.0); // BC - EXPECT_EQ("A BC", GetTokenized(lattice.Viterbi())); + EXPECT_EQ("A BC", GetTokenized(lattice.Viterbi().first)); InsertWithScore(&lattice, 0, 3, 10.0); // ABC - EXPECT_EQ("ABC", GetTokenized(lattice.Viterbi())); + EXPECT_EQ("ABC", GetTokenized(lattice.Viterbi().first)); } TEST(LatticeTest, NBestTest) { @@ -221,21 +222,175 @@ TEST(LatticeTest, NBestTest) { InsertWithScore(&lattice, 1, 2, 5.0); // BC InsertWithScore(&lattice, 0, 3, 10.0); // ABC - auto nbests = lattice.NBest(10); + auto nbests = lattice.NBest(10, false, 0.0); EXPECT_EQ(4, nbests.size()); - EXPECT_EQ("ABC", GetTokenized(nbests[0])); - EXPECT_EQ("A BC", GetTokenized(nbests[1])); - EXPECT_EQ("AB C", GetTokenized(nbests[2])); - EXPECT_EQ("A B C", GetTokenized(nbests[3])); + EXPECT_EQ("ABC", GetTokenized(nbests[0].first)); + EXPECT_EQ("A BC", GetTokenized(nbests[1].first)); + EXPECT_EQ("AB C", GetTokenized(nbests[2].first)); + EXPECT_EQ("A B C", GetTokenized(nbests[3].first)); - auto nbests0 = lattice.NBest(0); + auto nbests0 = lattice.NBest(0, false, 0.0); EXPECT_TRUE(nbests0.empty()); - auto nbests1 = lattice.NBest(1); + auto nbests1 = lattice.NBest(1, false, 0.0); EXPECT_EQ(nbests1.size(), 1); } +TEST(LatticeTest, NBestSampleTest) { + Lattice lattice; + lattice.SetSentence("ABC"); + + InsertWithScore(&lattice, 0, 1, 0.0); // A + InsertWithScore(&lattice, 1, 1, 0.0); // B + InsertWithScore(&lattice, 2, 1, 0.1); // C + InsertWithScore(&lattice, 0, 2, 0.2); // AB + InsertWithScore(&lattice, 1, 2, 0.5); // BC + InsertWithScore(&lattice, 0, 3, 1.0); // ABC + + // Calculate expected probabilities of each path + // Note that sampling without replacement affects the expected frequencies! + const std::vector kInv_Theta = {0.0, 0.01, 0.5, 0.7, 1.0}; + for (const auto inv_theta : kInv_Theta) { + std::vector strings = {"ABC", "AB C", "A BC", "A B C"}; + std::map probs; + probs["ABC"] = std::exp(inv_theta * 1.0); + probs["AB C"] = std::exp(inv_theta * (0.2 + 0.1)); + probs["A BC"] = std::exp(inv_theta * (0.0 + 0.5)); + probs["A B C"] = std::exp(inv_theta * (0.0 + 0.0 + 0.1)); + + for (const auto &it : strings) { + EXPECT_EQ(1, probs.count(it)); + } + + double Z = 0.0; + for (const auto &it : probs) Z += it.second; + for (auto &it : probs) it.second /= Z; + + std::map, float> pair_probs; + for (const auto &first : strings) { + for (const auto &second : strings) { + if (first == second) { + pair_probs[std::make_pair(first, second)] = 0; + } else { + float first_prob = probs[first]; + float second_prob = probs[second] / (1 - first_prob); + pair_probs[std::make_pair(first, second)] = first_prob * second_prob; + } + } + } + + std::map inclusion_probs; + for (const auto &string : strings) { + float inclusion_prob = 0.0; + for (const auto &other_string : strings) { + inclusion_prob += pair_probs[std::make_pair(string, other_string)]; + } + for (const auto &other_string : strings) { + inclusion_prob += pair_probs[std::make_pair(other_string, string)]; + } + inclusion_probs[string] = inclusion_prob / 2; + } + + int kTrials = 10000; + + std::vector kNumSamples = {1, 2}; + + for (const auto num_samples : kNumSamples) { + std::map counts; + for (int i = 0; i < kTrials; i++) { + auto nbests = lattice.NBest(num_samples, true, inv_theta); + for (const auto &nbest : nbests) { + counts[GetTokenized(nbest.first)]++; + } + } + + EXPECT_EQ(inclusion_probs.size(), counts.size()); + // If we take multiple samples WOR, we have to use corrected probs. + std::map probs_to_use = + (num_samples == 1 ? probs : inclusion_probs); + + for (const auto &it : probs_to_use) { + EXPECT_NEAR(it.second, 1.0 * counts[it.first] / (kTrials * num_samples), + 0.02); + } + } + } +} + +TEST(LatticeTest, CalculateEntropyTest) { + Lattice lattice; + lattice.SetSentence("ABC"); + + InsertWithScore(&lattice, 0, 1, 0.0); // A + InsertWithScore(&lattice, 1, 1, 0.0); // B + InsertWithScore(&lattice, 2, 1, 0.1); // C + InsertWithScore(&lattice, 0, 2, 0.2); // AB + InsertWithScore(&lattice, 1, 2, 0.5); // BC + InsertWithScore(&lattice, 0, 3, 1.0); // ABC + + // Calculate expected probabilities of each path + const std::vector kInv_Theta = {0.0, 0.01, 0.5, 0.7, 1.0}; + for (const auto inv_theta : kInv_Theta) { + std::vector strings = {"ABC", "AB C", "A BC", "A B C"}; + std::map probs; + probs["ABC"] = std::exp(inv_theta * 1.0); + probs["AB C"] = std::exp(inv_theta * (0.2 + 0.1)); + probs["A BC"] = std::exp(inv_theta * (0.0 + 0.5)); + probs["A B C"] = std::exp(inv_theta * (0.0 + 0.0 + 0.1)); + + double Z = 0.0; + for (const auto &it : probs) Z += it.second; + for (auto &it : probs) it.second /= Z; + + for (const auto &it : strings) { + EXPECT_EQ(1, probs.count(it)); + } + float entropy = 0.0; + for (const auto &it : probs) { + entropy += (it.second * std::log(it.second)); + } + EXPECT_NEAR(-entropy, lattice.CalculateEntropy(inv_theta), 0.02); + } +} + +TEST(LatticeTest, ForwardAlgorithmTest) { + Lattice lattice; + lattice.SetSentence("ABC"); + + InsertWithScore(&lattice, 0, 1, 0.0); // A + InsertWithScore(&lattice, 1, 1, 0.0); // B + InsertWithScore(&lattice, 2, 1, 0.1); // C + InsertWithScore(&lattice, 0, 2, 0.2); // AB + InsertWithScore(&lattice, 1, 2, 0.5); // BC + InsertWithScore(&lattice, 0, 3, 1.0); // ABC + + const std::vector kInv_Theta = {0.0, 0.01, 0.5, 0.7, 1.0}; + for (const auto inv_theta : kInv_Theta) { + std::vector alpha = lattice.ForwardAlgorithm(inv_theta); + EXPECT_EQ(alpha.size(), 8); // 6 nodes, plus BOS, EOS + // only alpha[C], alpha[EOS] have non-zero alpha + for (int i : {0, 1, 2, 3}) { + for (const auto &node : lattice.begin_nodes(i)) { + if (i < 2) { + EXPECT_EQ(alpha[node->node_id], 0.0); + } else if (i == 2) { + float Z = std::log(std::exp(inv_theta * (0.0 + 0.0)) + + std::exp(inv_theta * 0.2)); + EXPECT_EQ(alpha[node->node_id], Z); + } else if (i == 3) { + float Z = + std::log(std::exp(inv_theta * (0.0 + 0.0 + 0.1)) + // A + B + C + std::exp(inv_theta * (0.2 + 0.1)) + // AB + C + std::exp(inv_theta * (0.0 + 0.5)) + // A + BC + std::exp(inv_theta * 1.0)); // ABC + EXPECT_EQ(Z, alpha[node->node_id]); + } + } + } + } +} + TEST(LatticeTest, PopulateMarginalTest) { Lattice lattice; lattice.SetSentence("ABC"); @@ -282,14 +437,14 @@ TEST(LatticeTest, SampleTest) { InsertWithScoreAndId(&lattice, 1, 2, 1.7, 4); // BC InsertWithScoreAndId(&lattice, 0, 3, 1.8, 5); // ABC - const std::vector kTheta = {0.0, 0.01, 0.5, 0.7, 1.0}; - for (int i = 0; i < kTheta.size(); ++i) { + const std::vector kInv_Theta = {0.0, 0.01, 0.5, 0.7, 1.0}; + for (int i = 0; i < kInv_Theta.size(); ++i) { std::map probs; // Expands all paths in the lattice. - probs["A B C"] = exp(kTheta[i] * (1.0 + 1.2 + 1.5)); // A B C - probs["AB C"] = exp(kTheta[i] * (1.6 + 1.5)); // AB C - probs["A BC"] = exp(kTheta[i] * (1.0 + 1.7)); // A BC - probs["ABC"] = exp(kTheta[i] * 1.8); // ABC + probs["A B C"] = exp(kInv_Theta[i] * (1.0 + 1.2 + 1.5)); // A B C + probs["AB C"] = exp(kInv_Theta[i] * (1.6 + 1.5)); // AB C + probs["A BC"] = exp(kInv_Theta[i] * (1.0 + 1.7)); // A BC + probs["ABC"] = exp(kInv_Theta[i] * 1.8); // ABC // Computes expected probabilities. double Z = 0.0; @@ -300,7 +455,7 @@ TEST(LatticeTest, SampleTest) { constexpr int kTrial = 100000; std::map freq; for (int n = 0; n < kTrial; ++n) { - freq[GetTokenized(lattice.Sample(kTheta[i]))]++; + freq[GetTokenized(lattice.Sample(kInv_Theta[i]))]++; } EXPECT_EQ(probs.size(), freq.size()); @@ -327,18 +482,18 @@ ModelProto MakeBaseModelProto() { } // Returns model protos in parameterized tests. -const std::vector &GetEncoderVersions() { - static const std::vector &v = - *new std::vector{EncoderVersion::kOptimized, - EncoderVersion::kOriginal}; +const std::vector &GetEncoderVersions() { + static const std::vector &v = + *new std::vector{Model::kOptimized, + Model::kOriginal}; return v; } -class UnigramModelTest : public test::TestWithParam { +class UnigramModelTest : public test::TestWithParam { protected: void SetUp() override { encoder_version_ = GetParam(); } void TearDown() override {} - EncoderVersion encoder_version_; + Model::EncoderVersion encoder_version_; }; void AddPiece(ModelProto *model_proto, const std::string &piece, @@ -361,6 +516,102 @@ TEST(UnigramModelTest, SetUnigramModelTest) { model.model_proto().SerializeAsString()); } +TEST(UnigramModelTest, SampleEncodeAndScoreTest) { + // Test whether inclusion probabilities are correct + ModelProto model_proto = MakeBaseModelProto(); + AddPiece(&model_proto, "A", 0.0); // 3 + AddPiece(&model_proto, "B", 0.0); // 4 + AddPiece(&model_proto, "C", 0.1); // 5 + AddPiece(&model_proto, "AB", 0.2); // 6 + AddPiece(&model_proto, "BC", 0.5); // 7 + AddPiece(&model_proto, "ABC", 1.0); // 8 + + Model model(model_proto); + + Lattice lattice; + lattice.SetSentence("ABC"); + model.PopulateNodes(&lattice); + + std::vector kInv_Theta = {0.0, 1.0}; + + for (const auto inv_theta : kInv_Theta) { + std::vector strings = {"ABC", "AB C", "A BC", "A B C"}; + std::map probs; + probs["ABC"] = std::exp(inv_theta * 1.0); + probs["AB C"] = std::exp(inv_theta * (0.2 + 0.1)); + probs["A BC"] = std::exp(inv_theta * (0.0 + 0.5)); + probs["A B C"] = std::exp(inv_theta * (0.0 + 0.0 + 0.1)); + + for (const auto &it : strings) { + EXPECT_EQ(1, probs.count(it)); + } + + double Z = 0.0; + for (const auto &it : probs) Z += it.second; + for (auto &it : probs) it.second /= Z; + + std::map, float> pair_probs; + for (const auto &first : strings) { + for (const auto &second : strings) { + if (first == second) { + pair_probs[std::make_pair(first, second)] = 0; + } else { + const float first_prob = probs[first]; + const float second_prob = probs[second] / (1 - first_prob); + pair_probs[std::make_pair(first, second)] = first_prob * second_prob; + } + } + } + + std::map inclusion_probs; + for (const auto &string : strings) { + float inclusion_prob = 0.0; + for (const auto &other_string : strings) { + inclusion_prob += pair_probs[std::make_pair(string, other_string)]; + } + for (const auto &other_string : strings) { + inclusion_prob += pair_probs[std::make_pair(other_string, string)]; + } + inclusion_probs[string] = inclusion_prob / 2; + } + std::vector kNumSamples = {1, 2}; + + for (const auto num_samples : kNumSamples) { + std::map counts; + std::map scores; + int kTrials = 50000; + for (int i = 0; i < kTrials; i++) { + NBestEncodeResult sample = model.SampleEncodeAndScore( + "ABC", inv_theta, num_samples, true, false); + + for (const auto &it : sample) { + std::vector tokens; + for (const auto &inner_it : it.first) { + tokens.push_back(std::string(inner_it.first)); + } + std::string sample_string = absl::StrJoin(tokens, " "); + counts[sample_string] += 1; + // use the fact that E(1_{i in sample} / score of i) = 1 + // see https://arxiv.org/pdf/1903.06059.pdf appendix D + scores[sample_string] += std::exp(-it.second); + } + } + + // Check that counts and probs are correct + std::map probs_to_use = + (num_samples == 1 ? probs : inclusion_probs); + + for (const auto &it : scores) Z += it.second; + for (const auto &it : probs_to_use) { + EXPECT_NEAR(it.second, 1.0 * counts[it.first] / (kTrials * num_samples), + 0.02); + // The expectation is quite loose, use a higher tolerance + EXPECT_NEAR(1.0, scores[it.first] / kTrials, 0.30); + } + } + } +} + TEST_P(UnigramModelTest, PieceToIdTest) { ModelProto model_proto = MakeBaseModelProto(); @@ -370,7 +621,7 @@ TEST_P(UnigramModelTest, PieceToIdTest) { AddPiece(&model_proto, "d", 0.4); Model model(model_proto); - EXPECT_TRUE(model.SetEncoderVersion(encoder_version_).ok()); + model.SetEncoderVersion(encoder_version_); EXPECT_EQ(model_proto.SerializeAsString(), model.model_proto().SerializeAsString()); @@ -428,7 +679,7 @@ TEST_P(UnigramModelTest, PopulateNodesAllUnknownsTest) { ModelProto model_proto = MakeBaseModelProto(); AddPiece(&model_proto, "x"); Model model(model_proto); - EXPECT_TRUE(model.SetEncoderVersion(encoder_version_).ok()); + model.SetEncoderVersion(encoder_version_); Lattice lattice; lattice.SetSentence("abc"); @@ -452,7 +703,7 @@ TEST_P(UnigramModelTest, PopulateNodesTest) { AddPiece(&model_proto, "bc", 0.4); // 6 Model model(model_proto); - EXPECT_TRUE(model.SetEncoderVersion(encoder_version_).ok()); + model.SetEncoderVersion(encoder_version_); Lattice lattice; lattice.SetSentence("abc"); @@ -487,7 +738,7 @@ TEST_P(UnigramModelTest, PopulateNodesWithUnusedTest) { model_proto.mutable_pieces(6)->set_type(ModelProto::SentencePiece::UNUSED); Model model(model_proto); - EXPECT_TRUE(model.SetEncoderVersion(encoder_version_).ok()); + model.SetEncoderVersion(encoder_version_); Lattice lattice; lattice.SetSentence("abc"); @@ -512,7 +763,7 @@ TEST_P(UnigramModelTest, ModelNBestTest) { AddPiece(&model_proto, "abc", 10.0); // 8 Model model(model_proto); - EXPECT_TRUE(model.SetEncoderVersion(encoder_version_).ok()); + model.SetEncoderVersion(encoder_version_); auto nbest = model.NBestEncode("", 10); EXPECT_EQ(1, nbest.size()); @@ -551,7 +802,7 @@ TEST_P(UnigramModelTest, EncodeTest) { ModelProto::SentencePiece::USER_DEFINED); Model model(model_proto); - EXPECT_TRUE(model.SetEncoderVersion(encoder_version_).ok()); + model.SetEncoderVersion(encoder_version_); EncodeResult result; @@ -634,7 +885,7 @@ TEST_P(UnigramModelTest, EncodeWithUnusedTest) { // No unused. { Model model(model_proto); - EXPECT_TRUE(model.SetEncoderVersion(encoder_version_).ok()); + model.SetEncoderVersion(encoder_version_); const auto result = model.Encode("abcd"); EXPECT_EQ(1, result.size()); EXPECT_EQ("abcd", result[0].first); @@ -643,7 +894,7 @@ TEST_P(UnigramModelTest, EncodeWithUnusedTest) { { model_proto.mutable_pieces(3)->set_type(ModelProto::SentencePiece::UNUSED); Model model(model_proto); - EXPECT_TRUE(model.SetEncoderVersion(encoder_version_).ok()); + model.SetEncoderVersion(encoder_version_); const auto result = model.Encode("abcd"); EXPECT_EQ(2, result.size()); EXPECT_EQ("abc", result[0].first); @@ -654,7 +905,7 @@ TEST_P(UnigramModelTest, EncodeWithUnusedTest) { model_proto.mutable_pieces(3)->set_type(ModelProto::SentencePiece::UNUSED); model_proto.mutable_pieces(5)->set_type(ModelProto::SentencePiece::UNUSED); Model model(model_proto); - EXPECT_TRUE(model.SetEncoderVersion(encoder_version_).ok()); + model.SetEncoderVersion(encoder_version_); const auto result = model.Encode("abcd"); EXPECT_EQ(2, result.size()); EXPECT_EQ("abc", result[0].first); @@ -668,7 +919,7 @@ TEST_P(UnigramModelTest, EncodeWithUnusedTest) { model_proto.mutable_pieces(4)->set_type(ModelProto::SentencePiece::UNUSED); model_proto.mutable_pieces(5)->set_type(ModelProto::SentencePiece::NORMAL); Model model(model_proto); - EXPECT_TRUE(model.SetEncoderVersion(encoder_version_).ok()); + model.SetEncoderVersion(encoder_version_); const auto result = model.Encode("abcd"); EXPECT_EQ(2, result.size()); EXPECT_EQ("ab", result[0].first); @@ -688,7 +939,7 @@ TEST_P(UnigramModelTest, VerifyOutputsEquivalent) { AddPiece(&model_proto, "c", 2.0); // 9 AddPiece(&model_proto, "d", 1.0); // 10 Model model(model_proto); - EXPECT_TRUE(model.SetEncoderVersion(encoder_version_).ok()); + model.SetEncoderVersion(encoder_version_); // Equivalent outputs. EXPECT_TRUE(model.VerifyOutputsEquivalent("", "")); EXPECT_TRUE(model.VerifyOutputsEquivalent("a b", "a b")); diff --git a/src/unigram_model_trainer.cc b/src/unigram_model_trainer.cc index e5dc8c06..d58c4080 100644 --- a/src/unigram_model_trainer.cc +++ b/src/unigram_model_trainer.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License.! +#include "unigram_model_trainer.h" + #include #include #include @@ -27,15 +29,18 @@ #include "sentencepiece_trainer.h" #include "third_party/absl/container/flat_hash_map.h" #include "third_party/absl/memory/memory.h" +#include "third_party/absl/strings/str_replace.h" +#include "third_party/absl/strings/str_split.h" #include "third_party/esaxx/esa.hxx" // Suffix array library. #include "unicode_script.h" -#include "unigram_model_trainer.h" #include "util.h" namespace sentencepiece { namespace unigram { namespace { +constexpr char32 kSentenceBoundary = 0x0000; + double Digamma(double x) { double result = 0.0; for (; x < 7; ++x) result -= 1 / x; @@ -59,6 +64,40 @@ void ToLogProb(IT begin, IT end) { it->second = std::log(static_cast(it->second)) - logsum; } } + +template +class BoundedPriorityQueue { + public: + explicit BoundedPriorityQueue(size_t size) : size_(size) {} + ~BoundedPriorityQueue() = default; + + void push(T elem, int64 score) { + if (queue_.size() > 4 * size_) resize(); + if (sorted && queue_.size() >= size_ && queue_[size_ - 1].second > score) + return; + queue_.emplace_back(elem, score); + } + + const std::vector> &get() { + resize(); + return queue_; + } + + private: + void resize() { + std::sort(queue_.begin(), queue_.end(), [](const auto &p1, const auto &p2) { + return (p1.second > p2.second || + (p1.second == p2.second && p1.first < p2.first)); + }); + sorted = true; + if (queue_.size() > size_) queue_.resize(size_); + } + + bool sorted = false; + size_t size_ = 0; + std::vector> queue_; +}; + } // namespace TrainerModel::TrainerModel(const TrainerSpec &trainer_spec, @@ -95,9 +134,15 @@ void TrainerModel::SetSentencePieces(SentencePieces &&sentencepieces) { CHECK(status().ok()); } +TrainerModel::SentencePieces Trainer::MakeSeedSentencePieces() { + return trainer_spec_.train_extremely_large_corpus() + ? MakeSeedSentencePiecesInternal() + : MakeSeedSentencePiecesInternal(); +} + // Returns seed sentencepieces for EM training. template -TrainerModel::SentencePieces Trainer::MakeSeedSentencePieces() const { +TrainerModel::SentencePieces Trainer::MakeSeedSentencePiecesInternal() { CHECK(!sentences_.empty()); CHECK(!required_chars_.empty()); @@ -105,20 +150,59 @@ TrainerModel::SentencePieces Trainer::MakeSeedSentencePieces() const { // Pretokenizer is used as a constraint of piece extractions. const auto *pretokenizer = SentencePieceTrainer::GetPretokenizerForTraining(); + auto pretokenize_or_rewrite = [&](std::pair *w) { + if (pretokenizer) { + std::vector chars; + for (const auto &w : pretokenizer->PreTokenize(w->first)) { + for (const auto &c : string_util::UTF8ToUnicodeText(w)) { + chars.push_back(c); + } + chars.push_back(kSentenceBoundary); + } + return chars; + } else if (!trainer_spec_.pretokenization_delimiter().empty()) { + // When delimiter is specified, tokenize the input with the delimiter. + // For EM training, we assume that the delimiter doesn't exist and + // rewrite the original sentence. + std::vector chars; + absl::string_view delimiter = trainer_spec_.pretokenization_delimiter(); + for (const auto &w : absl::StrSplit(w->first, delimiter)) { + for (const auto &c : string_util::UTF8ToUnicodeText(w)) { + chars.push_back(c); + } + chars.push_back(kSentenceBoundary); + } + // Removes the delimiter. + w->first = absl::StrReplaceAll(w->first, {{delimiter, ""}}); + return chars; + } + return string_util::UTF8ToUnicodeText(w->first); + }; + // Merges all sentences into one array with 0x0000 delimiter. std::vector array; absl::flat_hash_map all_chars; - constexpr char32 kSentenceBoundary = 0x0000; - for (const auto &w : sentences_) { - const auto ut = string_util::UTF8ToUnicodeText( - pretokenizer ? pretokenizer->PreTokenize(w.first) : w.first); + const bool is_tsv = trainer_spec_.input_format() == "tsv"; + + for (auto &w : sentences_) { + const auto ut = pretokenize_or_rewrite(&w); for (const auto &c : ut) { array.push_back(c); if (c != kUNKChar && c != kSentenceBoundary) { all_chars[string_util::UnicodeCharToUTF8(c)] += w.second; } } + array.push_back(kSentenceBoundary); // sentence boundary marker. + + // Naive workaround to over-sample the input. + // In TSV mode, the frequency field is not used to extract the seed piece. + // we can at least extract all pieces by copying the input because + // the occurrence gets at least larger than or equals to 2. + if (is_tsv) { + for (const auto &c : ut) array.push_back(c); + array.push_back(kSentenceBoundary); + } } CHECK_LE(array.size(), @@ -139,16 +223,18 @@ TrainerModel::SentencePieces Trainer::MakeSeedSentencePieces() const { CHECK_EQ(0, esaxx(array.begin(), SA.begin(), L.begin(), R.begin(), D.begin(), n, kAlphabetSize, node_num)); - LOG(INFO) << "Extracting frequent sub strings..."; - std::vector> substr_index; + LOG(INFO) << "Extracting frequent sub strings... node_num=" << node_num; + BoundedPriorityQueue queue( + static_cast(trainer_spec_.seed_sentencepiece_size())); + for (node_int_type i = 0; i < node_num; ++i) { const node_int_type offset = SA[L[i]]; const node_int_type len = D[i]; if (len <= 1) { continue; } - const char32 *begin = &array[0] + offset; - const char32 *end = &array[0] + offset + len; + const char32 *begin = &array[offset]; + const char32 *end = &array[offset + len]; // Skips if a substring contains a sentence boundary. if (std::find(begin, end, kSentenceBoundary) != end) { continue; @@ -161,7 +247,7 @@ TrainerModel::SentencePieces Trainer::MakeSeedSentencePieces() const { // character-wise coverage is the default score. const node_int_type freq = R[i] - L[i]; const node_int_type score = freq * len; - substr_index.emplace_back(i, score); + queue.push(i, score); } // all_chars must be included in the seed sentencepieces. @@ -170,20 +256,15 @@ TrainerModel::SentencePieces Trainer::MakeSeedSentencePieces() const { seed_sentencepieces.emplace_back(it); } - // Sort by the coverage of sub strings. - for (const auto &p : Sorted(substr_index)) { + for (const auto &p : queue.get()) { const node_int_type offset = SA[L[p.first]]; const node_int_type len = D[p.first]; CHECK_GT(len, 0); const char32 *begin = &array[offset]; const char32 *end = &array[offset + len]; const UnicodeText uw(begin, end); - CHECK(IsValidSentencePiece(uw)); // just in case. const std::string w = string_util::UnicodeTextToUTF8(uw); - if (seed_sentencepieces.size() == - static_cast(trainer_spec_.seed_sentencepiece_size())) { - break; - } + CHECK(IsValidSentencePiece(uw)); // just in case. CHECK(!port::ContainsKey(all_chars, w)); seed_sentencepieces.emplace_back(w, p.second); } @@ -222,7 +303,7 @@ std::vector Trainer::RunEStep(const TrainerModel &model, float *obj, lattice.SetSentence(w); model.PopulateNodes(&lattice); const float Z = lattice.PopulateMarginal(freq, &expected[n]); - ntokens[n] += lattice.Viterbi().size(); + ntokens[n] += lattice.Viterbi().first.size(); CHECK(!std::isnan(Z)) << "likelihood is NAN. Input sentence may be too long"; objs[n] -= Z / all_sentence_freq; @@ -296,17 +377,17 @@ TrainerModel::SentencePieces Trainer::PruneSentencePieces( const auto &w = sentencepieces[i]; lattice.SetSentence(w.first); model.PopulateNodes(&lattice); - const auto nbests = lattice.NBest(2); + const auto nbests = lattice.NBest(2, false, 0.0); if (nbests.size() == 1) { // No second-best result is found. always keep this sentencepiece. always_keep[i] = true; continue; - } else if (nbests[0].size() >= 2) { + } else if (nbests[0].first.size() >= 2) { // Can safely remove this sentencepiece if its Viterbi path is split. always_keep[i] = false; - } else if (nbests[0].size() == 1) { + } else if (nbests[0].first.size() == 1) { always_keep[i] = true; - for (const auto *node : nbests[1]) { + for (const auto *node : nbests[1].first) { alternatives[i].push_back(node->id); } } @@ -338,7 +419,7 @@ TrainerModel::SentencePieces Trainer::PruneSentencePieces( lattice.SetSentence(w.first); model.PopulateNodes(&lattice); vsums[n] += w.second; - for (const auto *node : lattice.Viterbi()) { + for (const auto *node : lattice.Viterbi().first) { if (node->id >= 0) { freqs[n][node->id] += w.second; inverteds[n][node->id].push_back(i); @@ -388,10 +469,10 @@ TrainerModel::SentencePieces Trainer::PruneSentencePieces( // After removing the sentencepiece[i], its frequency freq[i] is // re-assigned to alternatives. - // new_sum = current_sum - freq[i] + freq[i] * alternatives.size() - // = current_sum + freq[i] (alternatives - 1) + // new_sum = current_sum - freq[i] + freq[i] * alternatives[i].size() + // = current_sum + freq[i] * (alternatives[i] - 1) const float logsum_alt = std::log( - static_cast(sum + freq[i] * (alternatives.size() - 1))); + static_cast(sum + freq[i] * (alternatives[i].size() - 1))); // The frequencies of altenatives are increased by freq[i]. float logprob_alt = 0.0; @@ -473,13 +554,8 @@ util::Status Trainer::Train() { RETURN_IF_ERROR(model.status()); RETURN_IF_ERROR(LoadSentences()); - if (trainer_spec_.train_extremely_large_corpus()) { - auto seed_sentencepieces = MakeSeedSentencePieces(); - model.SetSentencePieces(std::move(seed_sentencepieces)); - } else { - auto seed_sentencepieces = MakeSeedSentencePieces(); - model.SetSentencePieces(std::move(seed_sentencepieces)); - } + auto seed_sentencepieces = MakeSeedSentencePieces(); + model.SetSentencePieces(std::move(seed_sentencepieces)); if (trainer_spec_.split_by_whitespace()) { SplitSentencesByWhitespace(); diff --git a/src/unigram_model_trainer.h b/src/unigram_model_trainer.h index 91fbeb47..c6562e65 100644 --- a/src/unigram_model_trainer.h +++ b/src/unigram_model_trainer.h @@ -68,6 +68,8 @@ class Trainer : public TrainerInterface { : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec, denormalizer_spec) {} + TrainerModel::SentencePieces MakeSeedSentencePieces(); + util::Status Train() override; private: @@ -78,7 +80,7 @@ class Trainer : public TrainerInterface { // node_int_type should be of integer type (int32 or int64), // determined by train_extremely_large_corpus. template - TrainerModel::SentencePieces MakeSeedSentencePieces() const; + TrainerModel::SentencePieces MakeSeedSentencePiecesInternal(); // Executes the E step of EM and returns expected count. // The index of return array is the vocab id. diff --git a/src/unigram_model_trainer_test.cc b/src/unigram_model_trainer_test.cc index ffe515e4..31da90ba 100644 --- a/src/unigram_model_trainer_test.cc +++ b/src/unigram_model_trainer_test.cc @@ -12,18 +12,22 @@ // See the License for the specific language governing permissions and // limitations under the License.! +#include "unigram_model_trainer.h" + +#include +#include + +#include "filesystem.h" #include "sentencepiece_model.pb.h" #include "sentencepiece_processor.h" #include "sentencepiece_trainer.h" #include "testharness.h" #include "third_party/absl/strings/str_cat.h" #include "third_party/absl/strings/str_join.h" -#include "unigram_model_trainer.h" #include "util.h" namespace sentencepiece { namespace unigram { -namespace { // Space symbol #define WS "\xe2\x96\x81" @@ -35,6 +39,117 @@ TEST(UnigramTrainerTest, TrainerModelTest) { EXPECT_EQ(EncodeResult(), model.Encode("test")); } +struct TrainerResult { + std::string sentence_pieces; + std::vector> seed_pieces_and_probs; +}; + +TrainerResult RunTrainer(const std::vector& input, int size, + const bool use_dp = false, const float dp_noise = 0.0, + const uint32 dp_clip = 0) { + const std::string input_file = + util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "input"); + const std::string model_prefix = + util::JoinPath(absl::GetFlag(FLAGS_test_tmpdir), "model"); + { + auto output = filesystem::NewWritableFile(input_file); + for (const auto& line : input) { + output->WriteLine(line); + } + } + + TrainerSpec trainer_spec; + trainer_spec.set_input_format("tsv"); + trainer_spec.set_model_type(TrainerSpec::UNIGRAM); + trainer_spec.add_input(input_file); + trainer_spec.set_vocab_size(size - 3); // remove , , + trainer_spec.set_model_prefix(model_prefix); + + trainer_spec.set_enable_differential_privacy(use_dp); + trainer_spec.set_differential_privacy_noise_level(dp_noise); + trainer_spec.set_differential_privacy_clipping_threshold(dp_clip); + + NormalizerSpec normalizer_spec; + normalizer_spec.set_name("identity"); + normalizer_spec.set_add_dummy_prefix(false); + + NormalizerSpec denormalizer_spec; + + std::vector> seed_pieces; + + { + Trainer trainer(trainer_spec, normalizer_spec, denormalizer_spec); + EXPECT_OK(trainer.LoadSentences()); + TrainerModel::SentencePieces res = trainer.MakeSeedSentencePieces(); + + for (const auto& piece : res) { + seed_pieces.emplace_back(piece.first, piece.second); + } + } + + std::vector pieces; + + { + Trainer trainer(trainer_spec, normalizer_spec, denormalizer_spec); + EXPECT_TRUE(trainer.Train().ok()); + + SentencePieceProcessor processor; + EXPECT_TRUE(processor.Load(model_prefix + ".model").ok()); + + const auto& model = processor.model_proto(); + + // remove , , + for (int i = 3; i < model.pieces_size(); ++i) { + pieces.emplace_back(model.pieces(i).piece()); + } + } + + TrainerResult res; + res.seed_pieces_and_probs = seed_pieces; + std::sort(pieces.begin(), pieces.end()); + res.sentence_pieces = absl::StrJoin(pieces, " "); + return res; +} + +TEST(UnigramTrainerTest, BasicTest) { + const auto& res = RunTrainer( + {"magnanimity \t 5", "Pineapple \t 6", "i have an apple and a pen \t 1", + "Overly \t 6", "Available \t 3"}, + 30); + + // Check seed pieces. + EXPECT_EQ(27, res.seed_pieces_and_probs.size()); + + // Check final pieces. + EXPECT_EQ("A O P a an apple b d e g h i l le m n p r t v ve y ▁ ▁an", + res.sentence_pieces); +} + +TEST(UnigramTrainerTest, BasicDPTest) { + // no noise, clipping. + { + const auto& res = RunTrainer( + {"magnanimity \t 5", "Pineapple \t 6", "i have an apple and a pen \t 1", + "Overly \t 6", "Available \t 5"}, + 22, true /*use_dp*/, 0 /*dp_noise*/, 4 /*dp_clipping*/); + + // Got 16 instead of 27 seeds. + EXPECT_EQ(16, res.seed_pieces_and_probs.size()); + + // And they are equiv to if the last sentence was not there. + const auto& res_nodp = RunTrainer( + {"magnanimity \t 5", "Pineapple \t 6", "Overly \t 6", "Available \t 5"}, + 22); + + EXPECT_EQ(res.seed_pieces_and_probs, res_nodp.seed_pieces_and_probs); + + // Check final pieces. + EXPECT_EQ(res.sentence_pieces, res_nodp.sentence_pieces); + } +} + +namespace { + static constexpr char kTestInputData[] = "wagahaiwa_nekodearu.txt"; TEST(UnigramTrainerTest, EndToEndTest) { @@ -77,12 +192,13 @@ TEST(UnigramTrainerTest, EndToEndTest) { .ok()); // TODO(taku): Temporally disable this test on Windows. #ifndef OS_WIN - EXPECT_EQ(WS - " 吾輩 《 わが はい 》 は 猫 である 。 名前 はまだ 無い 。 " - "どこ で 生 れた か とん と 見当 《 けん とう 》 が つか ぬ 。 " - "何でも 薄 暗 い じめ じめ した 所で ニャーニャー " - "泣 い ていた 事 だけは 記憶 している 。", - absl::StrJoin(tok, " ")); + LOG(INFO) << "[" << absl::StrJoin(tok, " ") << std::endl; + EXPECT_EQ( + WS + " 吾輩 《 わが はい 》 は猫である 。 名前はまだ 無 い 。 どこ で 生 れた " + "か とん と 見当 《 けん とう 》 が つか ぬ 。 何でも 薄 暗 い じめ じめ " + "した 所で ニャーニャー 泣 い ていた 事 だけは 記憶 している 。", + absl::StrJoin(tok, " ")); #endif } diff --git a/src/util.cc b/src/util.cc index 58225ae2..538b00b2 100644 --- a/src/util.cc +++ b/src/util.cc @@ -14,22 +14,34 @@ #include "util.h" +#include #include namespace sentencepiece { + namespace { constexpr unsigned int kDefaultSeed = static_cast(-1); -static unsigned int g_seed = kDefaultSeed; +static std::atomic g_seed = kDefaultSeed; +static std::atomic g_minloglevel = 0; } // namespace void SetRandomGeneratorSeed(unsigned int seed) { - if (seed != kDefaultSeed) g_seed = seed; + if (seed != kDefaultSeed) g_seed.store(seed); } uint32 GetRandomGeneratorSeed() { - return g_seed == kDefaultSeed ? std::random_device{}() : g_seed; + try { + return g_seed == kDefaultSeed ? std::random_device{}() : g_seed.load(); + } catch (...) { + return g_seed.load(); + } } +namespace logging { +int GetMinLogLevel() { return g_minloglevel.load(); } +void SetMinLogLevel(int v) { g_minloglevel.store(v); } +} // namespace logging + namespace string_util { // mblen sotres the number of bytes consumed after decoding. @@ -210,7 +222,6 @@ std::vector StrSplitAsCSV(absl::string_view text) { std::vector result; for (; str < eos; ++str) { - while (*str == ' ' || *str == '\t') ++str; if (*str == '"') { start = ++str; end = start; @@ -237,15 +248,17 @@ std::vector StrSplitAsCSV(absl::string_view text) { #ifdef OS_WIN namespace win32 { -std::wstring Utf8ToWide(const std::string &input) { - int output_length = - ::MultiByteToWideChar(CP_UTF8, 0, input.c_str(), -1, nullptr, 0); +std::wstring Utf8ToWide(absl::string_view input) { + int output_length = ::MultiByteToWideChar( + CP_UTF8, 0, input.data(), static_cast(input.size()), nullptr, 0); output_length = output_length <= 0 ? 0 : output_length - 1; if (output_length == 0) { return L""; } std::unique_ptr input_wide(new wchar_t[output_length + 1]); - const int result = ::MultiByteToWideChar(CP_UTF8, 0, input.c_str(), -1, + std::fill(input_wide.get(), input_wide.get() + output_length + 1, L'\0'); + const int result = ::MultiByteToWideChar(CP_UTF8, 0, input.data(), + static_cast(input.size()), input_wide.get(), output_length + 1); std::wstring output; if (result > 0) { @@ -253,24 +266,6 @@ std::wstring Utf8ToWide(const std::string &input) { } return output; } - -std::string WideToUtf8(const std::wstring &input) { - const int output_length = ::WideCharToMultiByte(CP_UTF8, 0, input.c_str(), -1, - nullptr, 0, nullptr, nullptr); - if (output_length == 0) { - return ""; - } - - std::unique_ptr input_encoded(new char[output_length + 1]); - const int result = - ::WideCharToMultiByte(CP_UTF8, 0, input.c_str(), -1, input_encoded.get(), - output_length + 1, nullptr, nullptr); - std::string output; - if (result > 0) { - output.assign(input_encoded.get()); - } - return output; -} } // namespace win32 #endif } // namespace sentencepiece diff --git a/src/util.h b/src/util.h index 673e8f6b..51102919 100644 --- a/src/util.h +++ b/src/util.h @@ -36,10 +36,6 @@ #include #endif -#if !defined(__APPLE__) && !defined(_WIN32) && BYTE_ORDER == __BIG_ENDIAN -#define IS_BIG_ENDIAN -#endif - namespace sentencepiece { template std::ostream &operator<<(std::ostream &out, const std::vector &v) { @@ -54,17 +50,6 @@ uint32 GetRandomGeneratorSeed(); // String utilities namespace string_util { -struct string_view_hash { - // DJB hash function. - inline size_t operator()(const absl::string_view &sp) const { - size_t hash = 5381; - for (size_t i = 0; i < sp.size(); ++i) { - hash = ((hash << 5) + hash) + sp[i]; - } - return hash; - } -}; - template inline bool lexical_cast(absl::string_view arg, Target *result) { std::stringstream ss; @@ -99,7 +84,6 @@ inline bool lexical_cast(absl::string_view arg, std::string *result) { template inline bool DecodePOD(absl::string_view str, T *result) { - CHECK_NOTNULL(result); if (sizeof(*result) != str.size()) { return false; } @@ -307,9 +291,9 @@ std::mt19937 *GetRandomGenerator(); template class ReservoirSampler { public: - explicit ReservoirSampler(std::vector *sampled, size_t size) + explicit ReservoirSampler(std::vector *sampled, uint64 size) : sampled_(sampled), size_(size), engine_(GetRandomGeneratorSeed()) {} - explicit ReservoirSampler(std::vector *sampled, size_t size, size_t seed) + explicit ReservoirSampler(std::vector *sampled, uint64 size, uint64 seed) : sampled_(sampled), size_(size), engine_(seed) {} virtual ~ReservoirSampler() {} @@ -320,18 +304,18 @@ class ReservoirSampler { if (sampled_->size() < size_) { sampled_->push_back(item); } else { - std::uniform_int_distribution dist(0, total_ - 1); - const size_t n = dist(engine_); + std::uniform_int_distribution dist(0, total_ - 1); + const uint64 n = dist(engine_); if (n < sampled_->size()) (*sampled_)[n] = item; } } - size_t total_size() const { return total_; } + uint64 total_size() const { return total_; } private: std::vector *sampled_ = nullptr; - size_t size_ = 0; - size_t total_ = 0; + uint64 size_ = 0; + uint64 total_ = 0; std::mt19937 engine_; }; @@ -417,10 +401,6 @@ class StatusBuilder { #define CHECK_GT_OR_RETURN(a, b) CHECK_OR_RETURN((a) > (b)) #define CHECK_LT_OR_RETURN(a, b) CHECK_OR_RETURN((a) < (b)) -#ifdef IS_BIG_ENDIAN -inline uint32 Swap32(uint32 x) { return __builtin_bswap32(x); } -#endif - } // namespace util namespace port { diff --git a/tensorflow/.gitignore b/tensorflow/.gitignore deleted file mode 100644 index 09189687..00000000 --- a/tensorflow/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -build/ -sdist/ -dist/ -tmp/ -*py[cod] diff --git a/tensorflow/README.md b/tensorflow/README.md deleted file mode 100644 index 2dd41744..00000000 --- a/tensorflow/README.md +++ /dev/null @@ -1,40 +0,0 @@ -# SentencePiece TensorFlow module - -## WARNING -tf_sentencepiece is s going to be deprecated in tensorflow 2.3.0. tf_sentencepiece for tensorflow 2.2.0x is the last release of tf_sentencepiece. Use [tensoflow_text](https://github.com/tensorflow/text) to run sentencepiece on tensorflow. - -Example -```Python -import tensorflow as tf -import tensorflow_text as text - -model = open('test_model.model', 'rb').read() -s1 = text.SentencepieceTokenizer(model=model) -print(s1.tokenize(['hello world'])) -print(s1.tokenize_with_offsets(['hello world'])) - -s2 = text.SentencepieceTokenizer(model=model, out_type=tf.dtypes.string) -print(s2.tokenize(['hello world'])) -print(s2.tokenize_with_offsets(['hello world'])) -``` - -## Introduction - -SentencePiece TensorFlow module implements the encode (text to id/piece) and decode (id/piece to text) operations which are executed lazily on top of TensorFlow's Session mechanism. This module allows to make an end-to-end training/inference computatation graph by directly feeding raw sentences with the tf.placeholder. -The SentencePiece model (model proto) is passed as an attribute of the TensorFlow operation -and embedded into the TensorFlow graph so the model and graph become purely self-contained. - -## Build and Install SentencePiece -For Linux (x64), macOS environment: - -``` -% pip install tf_sentencepiece -``` - -## Usage -Use pydoc to see the usage instruction -``` -% pydoc sentencepiece_processor_ops -``` - -[Sample code](https://colab.research.google.com/drive/1rQ0tgXmHv02sMO6VdTO0yYaTvc1Yv1yP) diff --git a/test.bat b/test.bat deleted file mode 100644 index 7c42097c..00000000 --- a/test.bat +++ /dev/null @@ -1,38 +0,0 @@ -set PLATFORM=%1 -if "%PLATFORM%"=="" set PLATFORM=x64 -set PLATFORM_PREFIX= -if "%PLATFORM%"=="x64" set PLATFORM_PREFIX=-x64 -set _CL_=/utf-8 -set PATH=c:\Program Files\Git\usr\bin;c:\MinGW\bin;%PATH% -set CURRENT_PATH=%~dp0 -set LIBRARY_PATH=%CURRENT_PATH%build\root - -mkdir build -cd build - -cmake .. -A %PLATFORM% -DSPM_BUILD_TEST=ON -DSPM_ENABLE_SHARED=OFF -DCMAKE_INSTALL_PREFIX=%LIBRARY_PATH% -cmake --build . --config Release --target install || goto :error -ctest -C Release || goto :error -cpack || goto :error - -cd ..\python -rem call :BuildPython C:\Python27%PLATFORM_PREFIX% -call :BuildPython C:\Python35%PLATFORM_PREFIX% -call :BuildPython C:\Python36%PLATFORM_PREFIX% -call :BuildPython C:\Python37%PLATFORM_PREFIX% -call :BuildPython C:\Python38%PLATFORM_PREFIX% -call :BuildPython C:\Python39%PLATFORM_PREFIX% -c:\Python38%PLATFORM_PREFIX%\python setup.py sdist || goto :error -exit - -:BuildPython -%1\python -m pip install wheel || goto :error -%1\python setup.py build || goto :error -%1\python setup.py bdist_wheel || goto :error -%1\python setup.py test || goto :error -rmdir /Q /S build -del /S *.pyd -exit /b - -:error -exit /b %errorlevel% diff --git a/test.sh b/test.sh deleted file mode 100755 index 89da0183..00000000 --- a/test.sh +++ /dev/null @@ -1,147 +0,0 @@ -#!/bin/sh - -# Copyright 2018 Google Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License.! - -set -e # exit immediately on error -set -x # display all commands - -setup_ubuntu() { - export DEBIAN_FRONTEND=noninteractive - apt-get update - apt-get install -y build-essential cmake git pkg-config python3-pip - pip3 install --upgrade pip - - export PATH="/usr/local/bin:$PATH" - - . /etc/os-release - if [ "${VERSION_ID}" = "14.04" ]; then - apt-get install -y cmake3 python-dev - fi -} - -setup_debian() { - setup_ubuntu -} - -setup_fedora() { - dnf update -y - dnf install -y rpm-build gcc-c++ make cmake pkg-config python-pip python-devel -} - -build_generic() { - mkdir -p build - cd build - cmake .. -DSPM_BUILD_TEST=ON - make -j2 - make CTEST_OUTPUT_ON_FAILURE=1 test - make package_source - cd .. -} - -build_python() { - cd build - make install - cd .. - export LD_LIBRARY_PATH=/usr/local/lib:/usr/local/lib64:$LD_LIBRARY_PATH - export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig:/usr/local/lib64/pkgconfig - ldconfig -v - cd python - python3 setup.py test - cd .. -} - -build_linux_gcc_coverall_ubuntu() { - setup_debian - apt-get install -y lcov - pip3 install cpp-coveralls - pip3 install 'requests[security]' - build_generic - build_python - mkdir -p build - cd build - cmake .. -DSPM_COVERAGE=ON - make -j2 - make coverage - coveralls --exclude-pattern '.*(include|usr|test|third_party|pb|_main).*' --gcov-options '\-lp' --gcov gcov - cd .. -} - -build_linux_gcc_ubuntu() { - setup_ubuntu - build_generic - build_python -} - -build_linux_gcc_ubuntu_i386() { - setup_ubuntu - build_generic - build_python -} - -build_linux_gcc_debian() { - setup_debian - build_generic - build_python -} - -build_linux_gcc_fedora() { - setup_fedora - build_generic - build_python -} - -build_linux_clang_ubuntu() { - setup_ubuntu - apt-get install -y clang - export CXX="clang++" CC="clang" - build_generic - rm -fr build -} - -build_osx() { - brew update - brew install protobuf || brew link --overwrite protobuf - brew link --overwrite python@2 - build_generic - cd build - make install - cd .. - cd python - python setup.py test - python setup.py clean - /usr/local/bin/python setup.py test - /usr/local/bin/python setup.py clean - cd .. -} - -run_docker() { - docker pull "$1" - docker run -e COVERALLS_REPO_TOKEN=${COVERALLS_REPO_TOKEN} --rm -ti --name travis-ci -v `pwd`:/sentencepiece -w /sentencepiece -td "$1" /bin/bash - docker exec travis-ci bash -c "./test.sh native $2" - docker stop travis-ci -} - -## main -if [ "$#" -ne 2 ]; then - echo "sh test.sh ." - echo "when is native, runs command natively without docker." - exit -fi - -if [ "$1" = "native" ]; then - eval "$2" -else - run_docker $1 $2 -fi diff --git a/third_party/absl/flags/flag.cc b/third_party/absl/flags/flag.cc index 09ff78f7..5d6642a9 100644 --- a/third_party/absl/flags/flag.cc +++ b/third_party/absl/flags/flag.cc @@ -61,8 +61,8 @@ struct FlagFunc { namespace { -using FlagMap = std::map; -using FlagList = std::vector; +using FlagMap = std::map>; +using FlagList = std::vector>; FlagMap *GetFlagMap() { static auto *flag_map = new FlagMap; @@ -111,7 +111,7 @@ std::string PrintHelp(const char *programname) { os << PACKAGE_STRING << "\n\n"; os << "Usage: " << programname << " [options] files\n\n"; - for (const auto *func : *GetFlagList()) { + for (auto func : *GetFlagList()) { os << " --" << func->name << " (" << func->help << ")"; os << " type: " << func->type << " default: " << func->default_value << '\n'; @@ -123,7 +123,7 @@ std::string PrintHelp(const char *programname) { } } // namespace -void RegisterFlag(const std::string &name, FlagFunc *func) { +void RegisterFlag(const std::string &name, std::shared_ptr func) { GetFlagList()->emplace_back(func); GetFlagMap()->emplace(name, func); } @@ -140,7 +140,7 @@ Flag::Flag(const char *name, const char *type, const char *help, func_->set_value = [this](const std::string &value) { this->set_value_as_str(value); }; - RegisterFlag(name, func_.get()); + RegisterFlag(name, func_); } template @@ -171,7 +171,9 @@ void Flag::set_value_as_str(const std::string &value_as_str) { template class Flag; template class Flag; +template class Flag; template class Flag; +template class Flag; template class Flag; template class Flag; template class Flag; @@ -217,4 +219,14 @@ std::vector ParseCommandLine(int argc, char *argv[]) { return output_args; } + +void CleanupFlags() { + static bool is_shutdown = false; + if (!is_shutdown) { + delete internal::GetFlagList(); + delete internal::GetFlagMap(); + is_shutdown = true; + } +} + } // namespace absl diff --git a/third_party/absl/flags/flag.h b/third_party/absl/flags/flag.h index f3bf71d9..c5223583 100644 --- a/third_party/absl/flags/flag.h +++ b/third_party/absl/flags/flag.h @@ -24,7 +24,8 @@ namespace absl { namespace internal { struct FlagFunc; -void RegisterFlag(const std::string &name, FlagFunc *func); +void RegisterFlag(const std::string &name, std::shared_ptr func); + } // namespace internal template @@ -39,7 +40,7 @@ class Flag { private: T value_; - std::unique_ptr func_; + std::shared_ptr func_; }; template @@ -53,7 +54,10 @@ void SetFlag(Flag *flag, const V &v) { flag->set_value(value); } -std::vector ParseCommandLine(int argc, char *argv[]); +#define HAS_ABSL_CLEANUP_FLAGS + +void CleanupFlags(); + } // namespace absl #define ABSL_FLAG(Type, name, defautl_value, help) \ diff --git a/third_party/absl/flags/parse.h b/third_party/absl/flags/parse.h new file mode 100644 index 00000000..6a06e633 --- /dev/null +++ b/third_party/absl/flags/parse.h @@ -0,0 +1,25 @@ +// Copyright 2016 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License.! + +#ifndef ABSL_FLAGS_PARSE_H_ +#define ABSL_FLAGS_PARSE_H_ + +#include + +namespace absl { + +std::vector ParseCommandLine(int argc, char *argv[]); +} // namespace absl + +#endif // ABSL_FLAGS_PARSE_H_ diff --git a/src/init.cc b/third_party/absl/random/distributions.h similarity index 53% rename from src/init.cc rename to third_party/absl/random/distributions.h index f1800c52..246ecb27 100644 --- a/src/init.cc +++ b/third_party/absl/random/distributions.h @@ -12,21 +12,20 @@ // See the License for the specific language governing permissions and // limitations under the License.! -#include "init.h" +#ifndef ABSL_CONTAINER_DISTRIBUTIONS_H_ +#define ABSL_CONTAINER_DISTRIBUTIONS_H_ -#include "third_party/absl/flags/flag.h" +#include -namespace sentencepiece { +#include "random.h" -void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, - bool remove_arg) { - const auto unused_args = absl::ParseCommandLine(*argc, *argv); +namespace absl { - if (remove_arg) { - char **argv_val = *argv; - *argv = argv_val = argv_val + *argc - unused_args.size(); - std::copy(unused_args.begin(), unused_args.end(), argv_val); - *argc = static_cast(unused_args.size()); - } +template +T Gaussian(SharedBitGen &generator, T mean, T stddev) { + std::normal_distribution<> dist(mean, stddev); + return dist(*generator.engine()); } -} // namespace sentencepiece +} // namespace absl + +#endif // ABSL_CONTAINER_DISTRIBUTIONS_H_ diff --git a/third_party/absl/random/random.h b/third_party/absl/random/random.h new file mode 100644 index 00000000..3c3a21ed --- /dev/null +++ b/third_party/absl/random/random.h @@ -0,0 +1,33 @@ +// Copyright 2016 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License.! + +#ifndef ABSL_CONTAINER_RANDOM_H_ +#define ABSL_CONTAINER_RANDOM_H_ + +#include + +#include "../../../src/util.h" + +using sentencepiece::random::GetRandomGenerator; + +namespace absl { + +class SharedBitGen { + public: + std::mt19937 *engine() { return GetRandomGenerator(); } +}; + +} // namespace absl + +#endif // ABSL_CONTAINER_RANDOM_H_ diff --git a/third_party/absl/strings/str_cat.h b/third_party/absl/strings/str_cat.h index 7f3cce99..3c6c819f 100644 --- a/third_party/absl/strings/str_cat.h +++ b/third_party/absl/strings/str_cat.h @@ -35,14 +35,18 @@ inline std::string StrCat(absl::string_view str) { } template -inline std::string StrCat(absl::string_view first, const T &... rest) { +inline std::string StrCat(absl::string_view first, const T &...rest) { return StrCat(first) + StrCat(rest...); } template -inline std::string StrCat(int first, const T &... rest) { +inline std::string StrCat(int first, const T &...rest) { return StrCat(first) + StrCat(rest...); } +inline void StrAppend(std::string *base, absl::string_view str) { + base->append(str.data(), str.size()); +} + } // namespace absl #endif // ABSL_STRINGS_STR_CAT_H_ diff --git a/third_party/absl/strings/string_view.cc b/third_party/absl/strings/string_view.cc deleted file mode 100644 index dce208d2..00000000 --- a/third_party/absl/strings/string_view.cc +++ /dev/null @@ -1,267 +0,0 @@ -// Copyright 2017 The Abseil Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "third_party/absl/strings/string_view.h" - -#ifndef ABSL_HAVE_STD_STRING_VIEW - -#include -#include -#include -#include - -// #include "absl/strings/internal/memutil.h" - -namespace absl { - -namespace { -void WritePadding(std::ostream& o, size_t pad) { - char fill_buf[32]; - memset(fill_buf, o.fill(), sizeof(fill_buf)); - while (pad) { - size_t n = std::min(pad, sizeof(fill_buf)); - o.write(fill_buf, n); - pad -= n; - } -} - -class LookupTable { - public: - // For each character in wanted, sets the index corresponding - // to the ASCII code of that character. This is used by - // the find_.*_of methods below to tell whether or not a character is in - // the lookup table in constant time. - explicit LookupTable(string_view wanted) { - for (char c : wanted) { - table_[Index(c)] = true; - } - } - bool operator[](char c) const { return table_[Index(c)]; } - - private: - static unsigned char Index(char c) { return static_cast(c); } - bool table_[UCHAR_MAX + 1] = {}; -}; - -} // namespace - -std::ostream& operator<<(std::ostream& o, string_view piece) { - std::ostream::sentry sentry(o); - if (sentry) { - size_t lpad = 0; - size_t rpad = 0; - if (static_cast(o.width()) > piece.size()) { - size_t pad = o.width() - piece.size(); - if ((o.flags() & o.adjustfield) == o.left) { - rpad = pad; - } else { - lpad = pad; - } - } - if (lpad) WritePadding(o, lpad); - o.write(piece.data(), piece.size()); - if (rpad) WritePadding(o, rpad); - o.width(0); - } - return o; -} - -string_view::size_type string_view::copy(char* buf, size_type n, - size_type pos) const { - size_type ulen = length_; - assert(pos <= ulen); - size_type rlen = std::min(ulen - pos, n); - if (rlen > 0) { - const char* start = ptr_ + pos; - std::copy(start, start + rlen, buf); - } - return rlen; -} - -namespace { -const char* memmatch(const char* phaystack, size_t haylen, const char* pneedle, - size_t neelen) { - if (0 == neelen) { - return phaystack; // even if haylen is 0 - } - if (haylen < neelen) { - return nullptr; - } - const char* match; - const char* hayend = phaystack + haylen - neelen + 1; - while ((match = (const char*)(memchr(phaystack, pneedle[0], - hayend - phaystack)))) { - if (memcmp(match, pneedle, neelen) == 0) { - return match; - } else { - phaystack = match + 1; - } - } - return nullptr; -} -} // namespace - -string_view::size_type string_view::find(string_view s, size_type pos) const - noexcept { - if (empty() || pos > length_) { - if (empty() && pos == 0 && s.empty()) return 0; - return npos; - } - const char* result = memmatch(ptr_ + pos, length_ - pos, s.ptr_, s.length_); - return result ? result - ptr_ : npos; -} - -string_view::size_type string_view::find(char c, size_type pos) const noexcept { - if (empty() || pos >= length_) { - return npos; - } - const char* result = - static_cast(memchr(ptr_ + pos, c, length_ - pos)); - return result != nullptr ? result - ptr_ : npos; -} - -string_view::size_type string_view::rfind(string_view s, size_type pos) const - noexcept { - if (length_ < s.length_) return npos; - if (s.empty()) return std::min(length_, pos); - const char* last = ptr_ + std::min(length_ - s.length_, pos) + s.length_; - const char* result = std::find_end(ptr_, last, s.ptr_, s.ptr_ + s.length_); - return result != last ? result - ptr_ : npos; -} - -// Search range is [0..pos] inclusive. If pos == npos, search everything. -string_view::size_type string_view::rfind(char c, size_type pos) const - noexcept { - // Note: memrchr() is not available on Windows. - if (empty()) return npos; - for (size_type i = std::min(pos, length_ - 1);; --i) { - if (ptr_[i] == c) { - return i; - } - if (i == 0) break; - } - return npos; -} - -string_view::size_type string_view::find_first_of(string_view s, - size_type pos) const - noexcept { - if (empty() || s.empty()) { - return npos; - } - // Avoid the cost of LookupTable() for a single-character search. - if (s.length_ == 1) return find_first_of(s.ptr_[0], pos); - LookupTable tbl(s); - for (size_type i = pos; i < length_; ++i) { - if (tbl[ptr_[i]]) { - return i; - } - } - return npos; -} - -string_view::size_type string_view::find_first_not_of(string_view s, - size_type pos) const - noexcept { - if (empty()) return npos; - // Avoid the cost of LookupTable() for a single-character search. - if (s.length_ == 1) return find_first_not_of(s.ptr_[0], pos); - LookupTable tbl(s); - for (size_type i = pos; i < length_; ++i) { - if (!tbl[ptr_[i]]) { - return i; - } - } - return npos; -} - -string_view::size_type string_view::find_first_not_of(char c, - size_type pos) const - noexcept { - if (empty()) return npos; - for (; pos < length_; ++pos) { - if (ptr_[pos] != c) { - return pos; - } - } - return npos; -} - -string_view::size_type string_view::find_last_of(string_view s, - size_type pos) const noexcept { - if (empty() || s.empty()) return npos; - // Avoid the cost of LookupTable() for a single-character search. - if (s.length_ == 1) return find_last_of(s.ptr_[0], pos); - LookupTable tbl(s); - for (size_type i = std::min(pos, length_ - 1);; --i) { - if (tbl[ptr_[i]]) { - return i; - } - if (i == 0) break; - } - return npos; -} - -string_view::size_type string_view::find_last_not_of(string_view s, - size_type pos) const - noexcept { - if (empty()) return npos; - size_type i = std::min(pos, length_ - 1); - if (s.empty()) return i; - // Avoid the cost of LookupTable() for a single-character search. - if (s.length_ == 1) return find_last_not_of(s.ptr_[0], pos); - LookupTable tbl(s); - for (;; --i) { - if (!tbl[ptr_[i]]) { - return i; - } - if (i == 0) break; - } - return npos; -} - -string_view::size_type string_view::find_last_not_of(char c, - size_type pos) const - noexcept { - if (empty()) return npos; - size_type i = std::min(pos, length_ - 1); - for (;; --i) { - if (ptr_[i] != c) { - return i; - } - if (i == 0) break; - } - return npos; -} - -// MSVC has non-standard behavior that implicitly creates definitions for static -// const members. These implicit definitions conflict with explicit out-of-class -// member definitions that are required by the C++ standard, resulting in -// LNK1169 "multiply defined" errors at link time. __declspec(selectany) asks -// MSVC to choose only one definition for the symbol it decorates. See details -// at http://msdn.microsoft.com/en-us/library/34h23df8(v=vs.100).aspx -#ifdef _MSC_VER -#define ABSL_STRING_VIEW_SELECTANY __declspec(selectany) -#else -#define ABSL_STRING_VIEW_SELECTANY -#endif - -ABSL_STRING_VIEW_SELECTANY -constexpr string_view::size_type string_view::npos; -ABSL_STRING_VIEW_SELECTANY -constexpr string_view::size_type string_view::kMaxSize; - -} // namespace absl - -#endif // ABSL_HAVE_STD_STRING_VIEW diff --git a/third_party/absl/strings/string_view.h b/third_party/absl/strings/string_view.h index 68d46e3f..9bb8b1c3 100644 --- a/third_party/absl/strings/string_view.h +++ b/third_party/absl/strings/string_view.h @@ -28,518 +28,10 @@ #define ABSL_STRINGS_STRING_VIEW_H_ #include -// #include "absl/base/config.h" - -#ifdef ABSL_HAVE_STD_STRING_VIEW - #include namespace absl { using std::string_view; -} // namespace absl - -#else // ABSL_HAVE_STD_STRING_VIEW - -#include -#include -#include -#include -#include -#include -#include - -#ifdef __has_builtin -#define ABSL_HAVE_BUILTIN(x) __has_builtin(x) -#else -#define ABSL_HAVE_BUILTIN(x) 0 -#endif - -// #include "absl/base/internal/throw_delegate.h" -// #include "absl/base/macros.h" -// #include "absl/base/port.h" - -namespace absl { - -// absl::string_view -// -// A `string_view` provides a lightweight view into the std::string data -// provided by a `std::string`, double-quoted std::string literal, character -// array, or even another `string_view`. A `string_view` does *not* own the -// std::string to which it points, and that data cannot be modified through the -// view. -// -// You can use `string_view` as a function or method parameter anywhere a -// parameter can receive a double-quoted std::string literal, `const char*`, -// `std::string`, or another `absl::string_view` argument with no need to copy -// the std::string data. Systematic use of `string_view` within function -// arguments reduces data copies and `strlen()` calls. -// -// Because of its small size, prefer passing `string_view` by value: -// -// void MyFunction(absl::string_view arg); -// -// If circumstances require, you may also pass one by const reference: -// -// void MyFunction(const absl::string_view& arg); // not preferred -// -// Passing by value generates slightly smaller code for many architectures. -// -// In either case, the source data of the `string_view` must outlive the -// `string_view` itself. -// -// A `string_view` is also suitable for local variables if you know that the -// lifetime of the underlying object is longer than the lifetime of your -// `string_view` variable. However, beware of binding a `string_view` to a -// temporary value: -// -// // BAD use of string_view: lifetime problem -// absl::string_view sv = obj.ReturnAString(); -// -// // GOOD use of string_view: str outlives sv -// std::string str = obj.ReturnAString(); -// absl::string_view sv = str; -// -// Due to lifetime issues, a `string_view` is sometimes a poor choice for a -// return value and usually a poor choice for a data member. If you do use a -// `string_view` this way, it is your responsibility to ensure that the object -// pointed to by the `string_view` outlives the `string_view`. -// -// A `string_view` may represent a whole std::string or just part of a -// std::string. For example, when splitting a std::string, -// `std::vector` is a natural data type for the output. -// -// -// When constructed from a source which is nul-terminated, the `string_view` -// itself will not include the nul-terminator unless a specific size (including -// the nul) is passed to the constructor. As a result, common idioms that work -// on nul-terminated strings do not work on `string_view` objects. If you write -// code that scans a `string_view`, you must check its length rather than test -// for nul, for example. Note, however, that nuls may still be embedded within -// a `string_view` explicitly. -// -// You may create a null `string_view` in two ways: -// -// absl::string_view sv(); -// absl::string_view sv(nullptr, 0); -// -// For the above, `sv.data() == nullptr`, `sv.length() == 0`, and -// `sv.empty() == true`. Also, if you create a `string_view` with a non-null -// pointer then `sv.data() != nullptr`. Thus, you can use `string_view()` to -// signal an undefined value that is different from other `string_view` values -// in a similar fashion to how `const char* p1 = nullptr;` is different from -// `const char* p2 = "";`. However, in practice, it is not recommended to rely -// on this behavior. -// -// Be careful not to confuse a null `string_view` with an empty one. A null -// `string_view` is an empty `string_view`, but some empty `string_view`s are -// not null. Prefer checking for emptiness over checking for null. -// -// There are many ways to create an empty string_view: -// -// const char* nullcp = nullptr; -// // string_view.size() will return 0 in all cases. -// absl::string_view(); -// absl::string_view(nullcp, 0); -// absl::string_view(""); -// absl::string_view("", 0); -// absl::string_view("abcdef", 0); -// absl::string_view("abcdef" + 6, 0); -// -// All empty `string_view` objects whether null or not, are equal: -// -// absl::string_view() == absl::string_view("", 0) -// absl::string_view(nullptr, 0) == absl:: string_view("abcdef"+6, 0) -class string_view { - public: - using traits_type = std::char_traits; - using value_type = char; - using pointer = char*; - using const_pointer = const char*; - using reference = char&; - using const_reference = const char&; - using const_iterator = const char*; - using iterator = const_iterator; - using const_reverse_iterator = std::reverse_iterator; - using reverse_iterator = const_reverse_iterator; - using size_type = size_t; - using difference_type = std::ptrdiff_t; - - static constexpr size_type npos = static_cast(-1); - - // Null `string_view` constructor - constexpr string_view() noexcept : ptr_(nullptr), length_(0) {} - - // Implicit constructors - - template - string_view( // NOLINT(runtime/explicit) - const std::basic_string, Allocator>& - str) noexcept - : ptr_(str.data()), length_(CheckLengthInternal(str.size())) {} - - // Implicit constructor of a `string_view` from nul-terminated `str`. When - // accepting possibly null strings, use `absl::NullSafeStringView(str)` - // instead (see below). - constexpr string_view(const char* str) // NOLINT(runtime/explicit) - : ptr_(str), length_(CheckLengthInternal(StrLenInternal(str))) {} - - // Implicit constructor of a `string_view` from a `const char*` and length. - constexpr string_view(const char* data, size_type len) - : ptr_(data), length_(CheckLengthInternal(len)) {} - - // NOTE: Harmlessly omitted to work around gdb bug. - // constexpr string_view(const string_view&) noexcept = default; - // string_view& operator=(const string_view&) noexcept = default; - - // Iterators - - // string_view::begin() - // - // Returns an iterator pointing to the first character at the beginning of the - // `string_view`, or `end()` if the `string_view` is empty. - constexpr const_iterator begin() const noexcept { return ptr_; } - - // string_view::end() - // - // Returns an iterator pointing just beyond the last character at the end of - // the `string_view`. This iterator acts as a placeholder; attempting to - // access it results in undefined behavior. - constexpr const_iterator end() const noexcept { return ptr_ + length_; } - - // string_view::cbegin() - // - // Returns a const iterator pointing to the first character at the beginning - // of the `string_view`, or `end()` if the `string_view` is empty. - constexpr const_iterator cbegin() const noexcept { return begin(); } - - // string_view::cend() - // - // Returns a const iterator pointing just beyond the last character at the end - // of the `string_view`. This pointer acts as a placeholder; attempting to - // access its element results in undefined behavior. - constexpr const_iterator cend() const noexcept { return end(); } - - // string_view::rbegin() - // - // Returns a reverse iterator pointing to the last character at the end of the - // `string_view`, or `rend()` if the `string_view` is empty. - const_reverse_iterator rbegin() const noexcept { - return const_reverse_iterator(end()); - } - - // string_view::rend() - // - // Returns a reverse iterator pointing just before the first character at the - // beginning of the `string_view`. This pointer acts as a placeholder; - // attempting to access its element results in undefined behavior. - const_reverse_iterator rend() const noexcept { - return const_reverse_iterator(begin()); - } - - // string_view::crbegin() - // - // Returns a const reverse iterator pointing to the last character at the end - // of the `string_view`, or `crend()` if the `string_view` is empty. - const_reverse_iterator crbegin() const noexcept { return rbegin(); } - - // string_view::crend() - // - // Returns a const reverse iterator pointing just before the first character - // at the beginning of the `string_view`. This pointer acts as a placeholder; - // attempting to access its element results in undefined behavior. - const_reverse_iterator crend() const noexcept { return rend(); } - - // Capacity Utilities - - // string_view::size() - // - // Returns the number of characters in the `string_view`. - constexpr size_type size() const noexcept { return length_; } - - // string_view::length() - // - // Returns the number of characters in the `string_view`. Alias for `size()`. - constexpr size_type length() const noexcept { return size(); } - - // string_view::max_size() - // - // Returns the maximum number of characters the `string_view` can hold. - constexpr size_type max_size() const noexcept { return kMaxSize; } - - // string_view::empty() - // - // Checks if the `string_view` is empty (refers to no characters). - constexpr bool empty() const noexcept { return length_ == 0; } - - // std::string:view::operator[] - // - // Returns the ith element of an `string_view` using the array operator. - // Note that this operator does not perform any bounds checking. - constexpr const_reference operator[](size_type i) const { return ptr_[i]; } - - // string_view::front() - // - // Returns the first element of a `string_view`. - constexpr const_reference front() const { return ptr_[0]; } - - // string_view::back() - // - // Returns the last element of a `string_view`. - constexpr const_reference back() const { return ptr_[size() - 1]; } - - // string_view::data() - // - // Returns a pointer to the underlying character array (which is of course - // stored elsewhere). Note that `string_view::data()` may contain embedded nul - // characters, but the returned buffer may or may not be nul-terminated; - // therefore, do not pass `data()` to a routine that expects a nul-terminated - // std::string. - constexpr const_pointer data() const noexcept { return ptr_; } - - // Modifiers - - // string_view::remove_prefix() - // - // Removes the first `n` characters from the `string_view`. Note that the - // underlying std::string is not changed, only the view. - void remove_prefix(size_type n) { - assert(n <= length_); - ptr_ += n; - length_ -= n; - } - - // string_view::remove_suffix() - // - // Removes the last `n` characters from the `string_view`. Note that the - // underlying std::string is not changed, only the view. - void remove_suffix(size_type n) { - assert(n <= length_); - length_ -= n; - } - - // string_view::swap() - // - // Swaps this `string_view` with another `string_view`. - void swap(string_view& s) noexcept { - auto t = *this; - *this = s; - s = t; - } - - // Explicit conversion operators - - // Converts to `std::basic_string`. - template - explicit operator std::basic_string() const { - if (!data()) return {}; - return std::basic_string(data(), size()); - } - - // string_view::copy() - // - // Copies the contents of the `string_view` at offset `pos` and length `n` - // into `buf`. - size_type copy(char* buf, size_type n, size_type pos = 0) const; - - // string_view::substr() - // - // Returns a "substring" of the `string_view` (at offset `pos` and length - // `n`) as another string_view. This function throws `std::out_of_bounds` if - // `pos > size'. - string_view substr(size_type pos, size_type n = npos) const { - n = std::min(n, length_ - pos); - return string_view(ptr_ + pos, n); - } - - // string_view::compare() - // - // Performs a lexicographical comparison between the `string_view` and - // another `absl::string_view), returning -1 if `this` is less than, 0 if - // `this` is equal to, and 1 if `this` is greater than the passed std::string - // view. Note that in the case of data equality, a further comparison is made - // on the respective sizes of the two `string_view`s to determine which is - // smaller, equal, or greater. - int compare(string_view x) const noexcept { - auto min_length = std::min(length_, x.length_); - if (min_length > 0) { - int r = memcmp(ptr_, x.ptr_, min_length); - if (r < 0) return -1; - if (r > 0) return 1; - } - if (length_ < x.length_) return -1; - if (length_ > x.length_) return 1; - return 0; - } - - // Overload of `string_view::compare()` for comparing a substring of the - // 'string_view` and another `absl::string_view`. - int compare(size_type pos1, size_type count1, string_view v) const { - return substr(pos1, count1).compare(v); - } - - // Overload of `string_view::compare()` for comparing a substring of the - // `string_view` and a substring of another `absl::string_view`. - int compare(size_type pos1, size_type count1, string_view v, size_type pos2, - size_type count2) const { - return substr(pos1, count1).compare(v.substr(pos2, count2)); - } - - // Overload of `string_view::compare()` for comparing a `string_view` and a - // a different C-style std::string `s`. - int compare(const char* s) const { return compare(string_view(s)); } - - // Overload of `string_view::compare()` for comparing a substring of the - // `string_view` and a different std::string C-style std::string `s`. - int compare(size_type pos1, size_type count1, const char* s) const { - return substr(pos1, count1).compare(string_view(s)); - } - - // Overload of `string_view::compare()` for comparing a substring of the - // `string_view` and a substring of a different C-style std::string `s`. - int compare(size_type pos1, size_type count1, const char* s, - size_type count2) const { - return substr(pos1, count1).compare(string_view(s, count2)); - } - - // Find Utilities - - // string_view::find() - // - // Finds the first occurrence of the substring `s` within the `string_view`, - // returning the position of the first character's match, or `npos` if no - // match was found. - size_type find(string_view s, size_type pos = 0) const noexcept; - - // Overload of `string_view::find()` for finding the given character `c` - // within the `string_view`. - size_type find(char c, size_type pos = 0) const noexcept; - - // string_view::rfind() - // - // Finds the last occurrence of a substring `s` within the `string_view`, - // returning the position of the first character's match, or `npos` if no - // match was found. - size_type rfind(string_view s, size_type pos = npos) const noexcept; - - // Overload of `string_view::rfind()` for finding the last given character `c` - // within the `string_view`. - size_type rfind(char c, size_type pos = npos) const noexcept; - - // string_view::find_first_of() - // - // Finds the first occurrence of any of the characters in `s` within the - // `string_view`, returning the start position of the match, or `npos` if no - // match was found. - size_type find_first_of(string_view s, size_type pos = 0) const noexcept; - - // Overload of `string_view::find_first_of()` for finding a character `c` - // within the `string_view`. - size_type find_first_of(char c, size_type pos = 0) const noexcept { - return find(c, pos); - } - - // string_view::find_last_of() - // - // Finds the last occurrence of any of the characters in `s` within the - // `string_view`, returning the start position of the match, or `npos` if no - // match was found. - size_type find_last_of(string_view s, size_type pos = npos) const noexcept; - - // Overload of `string_view::find_last_of()` for finding a character `c` - // within the `string_view`. - size_type find_last_of(char c, size_type pos = npos) const noexcept { - return rfind(c, pos); - } - - // string_view::find_first_not_of() - // - // Finds the first occurrence of any of the characters not in `s` within the - // `string_view`, returning the start position of the first non-match, or - // `npos` if no non-match was found. - size_type find_first_not_of(string_view s, size_type pos = 0) const noexcept; - - // Overload of `string_view::find_first_not_of()` for finding a character - // that is not `c` within the `string_view`. - size_type find_first_not_of(char c, size_type pos = 0) const noexcept; - - // string_view::find_last_not_of() - // - // Finds the last occurrence of any of the characters not in `s` within the - // `string_view`, returning the start position of the last non-match, or - // `npos` if no non-match was found. - size_type find_last_not_of(string_view s, - size_type pos = npos) const noexcept; - - // Overload of `string_view::find_last_not_of()` for finding a character - // that is not `c` within the `string_view`. - size_type find_last_not_of(char c, size_type pos = npos) const noexcept; - - private: - static constexpr size_type kMaxSize = - std::numeric_limits::max(); - - // check whether __builtin_strlen is provided by the compiler. - // GCC doesn't have __has_builtin() - // (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66970), - // but has __builtin_strlen according to - // https://gcc.gnu.org/onlinedocs/gcc-4.7.0/gcc/Other-Builtins.html. -#if ABSL_HAVE_BUILTIN(__builtin_strlen) || \ - (defined(__GNUC__) && !defined(__clang__)) - static constexpr size_type StrLenInternal(const char* str) { - return str ? __builtin_strlen(str) : 0; - } -#else - static constexpr size_type StrLenInternal(const char* str) { - return str ? strlen(str) : 0; - } -#endif - - static constexpr size_type CheckLengthInternal(size_type len) { return len; } - - const char* ptr_; - size_type length_; -}; - -// This large function is defined inline so that in a fairly common case where -// one of the arguments is a literal, the compiler can elide a lot of the -// following comparisons. -inline bool operator==(string_view x, string_view y) noexcept { - auto len = x.size(); - if (len != y.size()) { - return false; - } - return x.data() == y.data() || len <= 0 || - memcmp(x.data(), y.data(), len) == 0; -} - -inline bool operator!=(string_view x, string_view y) noexcept { - return !(x == y); -} - -inline bool operator<(string_view x, string_view y) noexcept { - auto min_size = std::min(x.size(), y.size()); - const int r = min_size == 0 ? 0 : memcmp(x.data(), y.data(), min_size); - return (r < 0) || (r == 0 && x.size() < y.size()); -} - -inline bool operator>(string_view x, string_view y) noexcept { return y < x; } - -inline bool operator<=(string_view x, string_view y) noexcept { - return !(y < x); -} - -inline bool operator>=(string_view x, string_view y) noexcept { - return !(x < y); -} - -// IO Insertion Operator -std::ostream& operator<<(std::ostream& o, string_view piece); - -} // namespace absl - -#endif // ABSL_HAVE_STD_STRING_VIEW - -namespace absl { // ClippedSubstr() // diff --git a/third_party/esaxx/sais.hxx b/third_party/esaxx/sais.hxx index f1702f8c..b9071c8f 100644 --- a/third_party/esaxx/sais.hxx +++ b/third_party/esaxx/sais.hxx @@ -179,7 +179,7 @@ typedef typename std::iterator_traits::value_type char_type; sort all the S-substrings */ if(fs < (maxthreads * k)) { index_type *C, *B; - if((C = new index_type[maxthreads * k]) == 0) { return -2; } + C = new index_type[maxthreads * k]; B = (1 < maxthreads) ? C + k : C; getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */ #ifdef _OPENMP @@ -271,7 +271,7 @@ typedef typename std::iterator_traits::value_type char_type; /* stage 3: induce the result for the original problem */ if(fs < (maxthreads * k)) { index_type *B, *C; - if((C = new index_type[maxthreads * k]) == 0) { return -2; } + C = new index_type[maxthreads * k]; B = (1 < maxthreads) ? C + k : C; /* put all left-most S characters into their buckets */ getCounts(T, C, n, k); getBuckets(C, B, k, true); /* find ends of buckets */ diff --git a/third_party/protobuf-lite/arena.cc b/third_party/protobuf-lite/arena.cc index c117c9e5..414e0234 100644 --- a/third_party/protobuf-lite/arena.cc +++ b/third_party/protobuf-lite/arena.cc @@ -31,24 +31,30 @@ #include #include +#include #include +#include #ifdef ADDRESS_SANITIZER #include #endif // ADDRESS_SANITIZER -#include +#include -namespace google { static const size_t kMinCleanupListElements = 8; static const size_t kMaxCleanupListElements = 64; // 1kB on 64-bit. +namespace google { namespace protobuf { + +PROTOBUF_EXPORT /*static*/ void* (*const ArenaOptions::kDefaultBlockAlloc)( + size_t) = &::operator new; + namespace internal { -std::atomic ArenaImpl::lifecycle_id_generator_; +ArenaImpl::CacheAlignedLifecycleIdGenerator ArenaImpl::lifecycle_id_generator_; #if defined(GOOGLE_PROTOBUF_NO_THREADLOCAL) ArenaImpl::ThreadCache& ArenaImpl::thread_cache() { static internal::ThreadLocalStorage* thread_cache_ = @@ -57,77 +63,205 @@ ArenaImpl::ThreadCache& ArenaImpl::thread_cache() { } #elif defined(PROTOBUF_USE_DLLS) ArenaImpl::ThreadCache& ArenaImpl::thread_cache() { - static GOOGLE_THREAD_LOCAL ThreadCache thread_cache_ = { -1, NULL }; + static PROTOBUF_THREAD_LOCAL ThreadCache thread_cache_ = { + 0, static_cast(-1), nullptr}; return thread_cache_; } #else -GOOGLE_THREAD_LOCAL ArenaImpl::ThreadCache ArenaImpl::thread_cache_ = {-1, NULL}; +PROTOBUF_THREAD_LOCAL ArenaImpl::ThreadCache ArenaImpl::thread_cache_ = { + 0, static_cast(-1), nullptr}; +#endif + +void ArenaFree(void* object, size_t size) { +#if defined(__GXX_DELETE_WITH_SIZE__) || defined(__cpp_sized_deallocation) + ::operator delete(object, size); +#else + (void)size; + ::operator delete(object); #endif +} + +ArenaImpl::ArenaImpl(const ArenaOptions& options) { + ArenaMetricsCollector* collector = nullptr; + bool record_allocs = false; + if (options.make_metrics_collector != nullptr) { + collector = (*options.make_metrics_collector)(); + record_allocs = (collector && collector->RecordAllocs()); + } + + // Get memory where we can store non-default options if needed. + // Use supplied initial_block if it is large enough. + size_t min_block_size = kOptionsSize + kBlockHeaderSize + kSerialArenaSize; + char* mem = options.initial_block; + size_t mem_size = options.initial_block_size; + GOOGLE_DCHECK_EQ(reinterpret_cast(mem) & 7, 0); + if (mem == nullptr || mem_size < min_block_size) { + // Supplied initial block is not big enough. + mem_size = std::max(min_block_size, options.start_block_size); + mem = reinterpret_cast((*options.block_alloc)(mem_size)); + } + + // Create the special block. + const bool special = true; + const bool user_owned = (mem == options.initial_block); + auto block = + new (mem) SerialArena::Block(mem_size, nullptr, special, user_owned); -void ArenaImpl::Init() { - lifecycle_id_ = - lifecycle_id_generator_.fetch_add(1, std::memory_order_relaxed); + // Options occupy the beginning of the initial block. + options_ = new (block->Pointer(block->pos())) Options; +#ifdef ADDRESS_SANITIZER + ASAN_UNPOISON_MEMORY_REGION(options_, kOptionsSize); +#endif // ADDRESS_SANITIZER + options_->start_block_size = options.start_block_size; + options_->max_block_size = options.max_block_size; + options_->block_alloc = options.block_alloc; + options_->block_dealloc = options.block_dealloc; + options_->metrics_collector = collector; + block->set_pos(block->pos() + kOptionsSize); + + Init(record_allocs); + SetInitialBlock(block); +} + +void ArenaImpl::Init(bool record_allocs) { + ThreadCache& tc = thread_cache(); + auto id = tc.next_lifecycle_id; + constexpr uint64 kInc = ThreadCache::kPerThreadIds * 2; + if (PROTOBUF_PREDICT_FALSE((id & (kInc - 1)) == 0)) { + if (sizeof(lifecycle_id_generator_.id) == 4) { + // 2^32 is dangerous low to guarantee uniqueness. If we start dolling out + // unique id's in ranges of kInc it's unacceptably low. In this case + // we increment by 1. The additional range of kPerThreadIds that are used + // per thread effectively pushes the overflow time from weeks to years + // of continuous running. + id = lifecycle_id_generator_.id.fetch_add(1, std::memory_order_relaxed) * + kInc; + } else { + id = + lifecycle_id_generator_.id.fetch_add(kInc, std::memory_order_relaxed); + } + } + tc.next_lifecycle_id = id + 2; + // We store "record_allocs" in the low bit of lifecycle_id_. + lifecycle_id_ = id | (record_allocs ? 1 : 0); hint_.store(nullptr, std::memory_order_relaxed); threads_.store(nullptr, std::memory_order_relaxed); + space_allocated_.store(0, std::memory_order_relaxed); +} - if (initial_block_) { - // Thread which calls Init() owns the first block. This allows the - // single-threaded case to allocate on the first block without having to - // perform atomic operations. - new (initial_block_) Block(options_.initial_block_size, NULL); - SerialArena* serial = - SerialArena::New(initial_block_, &thread_cache(), this); - serial->set_next(NULL); - threads_.store(serial, std::memory_order_relaxed); - space_allocated_.store(options_.initial_block_size, - std::memory_order_relaxed); - CacheSerialArena(serial); - } else { - space_allocated_.store(0, std::memory_order_relaxed); - } +void ArenaImpl::SetInitialBlock(SerialArena::Block* block) { + // Calling thread owns the first block. This allows the single-threaded case + // to allocate on the first block without having to perform atomic operations. + SerialArena* serial = SerialArena::New(block, &thread_cache(), this); + serial->set_next(NULL); + threads_.store(serial, std::memory_order_relaxed); + space_allocated_.store(block->size(), std::memory_order_relaxed); + CacheSerialArena(serial); } ArenaImpl::~ArenaImpl() { // Have to do this in a first pass, because some of the destructors might // refer to memory in other blocks. CleanupList(); - FreeBlocks(); + + ArenaMetricsCollector* collector = nullptr; + auto deallocator = &ArenaFree; + if (options_) { + collector = options_->metrics_collector; + deallocator = options_->block_dealloc; + } + + PerBlock([deallocator](SerialArena::Block* b) { +#ifdef ADDRESS_SANITIZER + // This memory was provided by the underlying allocator as unpoisoned, so + // return it in an unpoisoned state. + ASAN_UNPOISON_MEMORY_REGION(b->Pointer(0), b->size()); +#endif // ADDRESS_SANITIZER + if (!b->user_owned()) { + (*deallocator)(b, b->size()); + } + }); + + if (collector) { + collector->OnDestroy(SpaceAllocated()); + } } uint64 ArenaImpl::Reset() { + if (options_ && options_->metrics_collector) { + options_->metrics_collector->OnReset(SpaceAllocated()); + } + // Have to do this in a first pass, because some of the destructors might // refer to memory in other blocks. CleanupList(); - uint64 space_allocated = FreeBlocks(); - Init(); + // Discard all blocks except the special block (if present). + uint64 space_allocated = 0; + SerialArena::Block* special_block = nullptr; + auto deallocator = (options_ ? options_->block_dealloc : &ArenaFree); + PerBlock( + [&space_allocated, &special_block, deallocator](SerialArena::Block* b) { + space_allocated += b->size(); +#ifdef ADDRESS_SANITIZER + // This memory was provided by the underlying allocator as unpoisoned, + // so return it in an unpoisoned state. + ASAN_UNPOISON_MEMORY_REGION(b->Pointer(0), b->size()); +#endif // ADDRESS_SANITIZER + if (!b->special()) { + (*deallocator)(b, b->size()); + } else { + // Prepare special block for reuse. + // Note: if options_ is present, it occupies the beginning of the + // block and therefore pos is advanced past it. + GOOGLE_DCHECK(special_block == nullptr); + special_block = b; + } + }); + + Init(record_allocs()); + if (special_block != nullptr) { + // next() should still be nullptr since we are using a stack discipline, but + // clear it anyway to reduce fragility. + GOOGLE_DCHECK_EQ(special_block->next(), nullptr); + special_block->clear_next(); + special_block->set_pos(kBlockHeaderSize + (options_ ? kOptionsSize : 0)); + SetInitialBlock(special_block); + } return space_allocated; } -ArenaImpl::Block* ArenaImpl::NewBlock(Block* last_block, size_t min_bytes) { +std::pair ArenaImpl::NewBuffer(size_t last_size, + size_t min_bytes) { size_t size; - if (last_block) { + if (last_size != -1) { // Double the current block size, up to a limit. - size = std::min(2 * last_block->size(), options_.max_block_size); + auto max_size = options_ ? options_->max_block_size : kDefaultMaxBlockSize; + size = std::min(2 * last_size, max_size); } else { - size = options_.start_block_size; + size = options_ ? options_->start_block_size : kDefaultStartBlockSize; } // Verify that min_bytes + kBlockHeaderSize won't overflow. GOOGLE_CHECK_LE(min_bytes, std::numeric_limits::max() - kBlockHeaderSize); size = std::max(size, kBlockHeaderSize + min_bytes); - void* mem = options_.block_alloc(size); - Block* b = new (mem) Block(size, last_block); + void* mem = options_ ? (*options_->block_alloc)(size) : ::operator new(size); space_allocated_.fetch_add(size, std::memory_order_relaxed); - return b; + return {mem, size}; } -ArenaImpl::Block::Block(size_t size, Block* next) - : next_(next), pos_(kBlockHeaderSize), size_(size) {} +SerialArena::Block* SerialArena::NewBlock(SerialArena::Block* last_block, + size_t min_bytes, ArenaImpl* arena) { + void* mem; + size_t size; + std::tie(mem, size) = + arena->NewBuffer(last_block ? last_block->size() : -1, min_bytes); + Block* b = new (mem) Block(size, last_block, false, false); + return b; +} -GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE -void ArenaImpl::SerialArena::AddCleanupFallback(void* elem, - void (*cleanup)(void*)) { +PROTOBUF_NOINLINE +void SerialArena::AddCleanupFallback(void* elem, void (*cleanup)(void*)) { size_t size = cleanup_ ? cleanup_->size * 2 : kMinCleanupListElements; size = std::min(size, kMaxCleanupListElements); size_t bytes = internal::AlignUpTo8(CleanupChunk::SizeOf(size)); @@ -142,20 +276,10 @@ void ArenaImpl::SerialArena::AddCleanupFallback(void* elem, AddCleanup(elem, cleanup); } -GOOGLE_PROTOBUF_ATTRIBUTE_FUNC_ALIGN(32) -void* ArenaImpl::AllocateAligned(size_t n) { - SerialArena* arena; - if (GOOGLE_PREDICT_TRUE(GetSerialArenaFast(&arena))) { - return arena->AllocateAligned(n); - } else { - return AllocateAlignedFallback(n); - } -} - void* ArenaImpl::AllocateAlignedAndAddCleanup(size_t n, void (*cleanup)(void*)) { SerialArena* arena; - if (GOOGLE_PREDICT_TRUE(GetSerialArenaFast(&arena))) { + if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFast(&arena))) { return arena->AllocateAlignedAndAddCleanup(n, cleanup); } else { return AllocateAlignedAndAddCleanupFallback(n, cleanup); @@ -164,66 +288,36 @@ void* ArenaImpl::AllocateAlignedAndAddCleanup(size_t n, void ArenaImpl::AddCleanup(void* elem, void (*cleanup)(void*)) { SerialArena* arena; - if (GOOGLE_PREDICT_TRUE(GetSerialArenaFast(&arena))) { + if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFast(&arena))) { arena->AddCleanup(elem, cleanup); } else { return AddCleanupFallback(elem, cleanup); } } -GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE +PROTOBUF_NOINLINE void* ArenaImpl::AllocateAlignedFallback(size_t n) { - return GetSerialArena()->AllocateAligned(n); + return GetSerialArenaFallback(&thread_cache())->AllocateAligned(n); } -GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE +PROTOBUF_NOINLINE void* ArenaImpl::AllocateAlignedAndAddCleanupFallback(size_t n, void (*cleanup)(void*)) { - return GetSerialArena()->AllocateAlignedAndAddCleanup(n, cleanup); + return GetSerialArenaFallback( + &thread_cache())->AllocateAlignedAndAddCleanup(n, cleanup); } -GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE +PROTOBUF_NOINLINE void ArenaImpl::AddCleanupFallback(void* elem, void (*cleanup)(void*)) { - GetSerialArena()->AddCleanup(elem, cleanup); + GetSerialArenaFallback(&thread_cache())->AddCleanup(elem, cleanup); } -inline GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE -bool ArenaImpl::GetSerialArenaFast(ArenaImpl::SerialArena** arena) { - // If this thread already owns a block in this arena then try to use that. - // This fast path optimizes the case where multiple threads allocate from the - // same arena. - ThreadCache* tc = &thread_cache(); - if (GOOGLE_PREDICT_TRUE(tc->last_lifecycle_id_seen == lifecycle_id_)) { - *arena = tc->last_serial_arena; - return true; - } - - // Check whether we own the last accessed SerialArena on this arena. This - // fast path optimizes the case where a single thread uses multiple arenas. - SerialArena* serial = hint_.load(std::memory_order_acquire); - if (GOOGLE_PREDICT_TRUE(serial != NULL && serial->owner() == tc)) { - *arena = serial; - return true; - } - - return false; -} - -ArenaImpl::SerialArena* ArenaImpl::GetSerialArena() { - SerialArena* arena; - if (GOOGLE_PREDICT_TRUE(GetSerialArenaFast(&arena))) { - return arena; - } else { - return GetSerialArenaFallback(&thread_cache()); - } -} - -GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE -void* ArenaImpl::SerialArena::AllocateAlignedFallback(size_t n) { +PROTOBUF_NOINLINE +void* SerialArena::AllocateAlignedFallback(size_t n) { // Sync back to current's pos. head_->set_pos(head_->size() - (limit_ - ptr_)); - head_ = arena_->NewBlock(head_, n); + head_ = NewBlock(head_, n, arena_); ptr_ = head_->Pointer(head_->pos()); limit_ = head_->Pointer(head_->size()); @@ -241,13 +335,17 @@ uint64 ArenaImpl::SpaceAllocated() const { uint64 ArenaImpl::SpaceUsed() const { SerialArena* serial = threads_.load(std::memory_order_acquire); uint64 space_used = 0; - for ( ; serial; serial = serial->next()) { + for (; serial; serial = serial->next()) { space_used += serial->SpaceUsed(); } + // Remove the overhead of Options structure, if any. + if (options_) { + space_used -= kOptionsSize; + } return space_used; } -uint64 ArenaImpl::SerialArena::SpaceUsed() const { +uint64 SerialArena::SpaceUsed() const { // Get current block's size from ptr_ (since we can't trust head_->pos(). uint64 space_used = ptr_ - head_->Pointer(kBlockHeaderSize); // Get subsequent block size from b->pos(). @@ -255,102 +353,51 @@ uint64 ArenaImpl::SerialArena::SpaceUsed() const { space_used += (b->pos() - kBlockHeaderSize); } // Remove the overhead of the SerialArena itself. - space_used -= kSerialArenaSize; + space_used -= ArenaImpl::kSerialArenaSize; return space_used; } -uint64 ArenaImpl::FreeBlocks() { - uint64 space_allocated = 0; - // By omitting an Acquire barrier we ensure that any user code that doesn't - // properly synchronize Reset() or the destructor will throw a TSAN warning. - SerialArena* serial = threads_.load(std::memory_order_relaxed); - - while (serial) { - // This is inside a block we are freeing, so we need to read it now. - SerialArena* next = serial->next(); - space_allocated += ArenaImpl::SerialArena::Free(serial, initial_block_, - options_.block_dealloc); - // serial is dead now. - serial = next; - } - - return space_allocated; -} - -uint64 ArenaImpl::SerialArena::Free(ArenaImpl::SerialArena* serial, - Block* initial_block, - void (*block_dealloc)(void*, size_t)) { - uint64 space_allocated = 0; - - // We have to be careful in this function, since we will be freeing the Block - // that contains this SerialArena. Be careful about accessing |serial|. - - for (Block* b = serial->head_; b; ) { - // This is inside the block we are freeing, so we need to read it now. - Block* next_block = b->next(); - space_allocated += (b->size()); - -#ifdef ADDRESS_SANITIZER - // This memory was provided by the underlying allocator as unpoisoned, so - // return it in an unpoisoned state. - ASAN_UNPOISON_MEMORY_REGION(b->Pointer(0), b->size()); -#endif // ADDRESS_SANITIZER - - if (b != initial_block) { - block_dealloc(b, b->size()); - } - - b = next_block; - } - - return space_allocated; -} - void ArenaImpl::CleanupList() { // By omitting an Acquire barrier we ensure that any user code that doesn't // properly synchronize Reset() or the destructor will throw a TSAN warning. SerialArena* serial = threads_.load(std::memory_order_relaxed); - for ( ; serial; serial = serial->next()) { + for (; serial; serial = serial->next()) { serial->CleanupList(); } } -void ArenaImpl::SerialArena::CleanupList() { +void SerialArena::CleanupList() { if (cleanup_ != NULL) { CleanupListFallback(); } } -void ArenaImpl::SerialArena::CleanupListFallback() { - // Cleanup newest chunk: ptrs give us length. +void SerialArena::CleanupListFallback() { + // The first chunk might be only partially full, so calculate its size + // from cleanup_ptr_. Subsequent chunks are always full, so use list->size. size_t n = cleanup_ptr_ - &cleanup_->nodes[0]; - CleanupNode* node = cleanup_ptr_; - for (size_t i = 0; i < n; i++) { - --node; - node->cleanup(node->elem); - } - - // Cleanup older chunks, which are known to be full. - CleanupChunk* list = cleanup_->next; - while (list) { - size_t n = list->size; - CleanupNode* node = &list->nodes[list->size]; - for (size_t i = 0; i < n; i++) { - --node; - node->cleanup(node->elem); + CleanupChunk* list = cleanup_; + while (true) { + CleanupNode* node = &list->nodes[0]; + // Cleanup newest elements first (allocated last). + for (size_t i = n; i > 0; i--) { + node[i - 1].cleanup(node[i - 1].elem); } list = list->next; + if (list == nullptr) { + break; + } + // All but the first chunk are always full. + n = list->size; } } -ArenaImpl::SerialArena* ArenaImpl::SerialArena::New(Block* b, void* owner, - ArenaImpl* arena) { - GOOGLE_DCHECK_EQ(b->pos(), kBlockHeaderSize); // Should be a fresh block - GOOGLE_DCHECK_LE(kBlockHeaderSize + kSerialArenaSize, b->size()); - SerialArena* serial = - reinterpret_cast(b->Pointer(kBlockHeaderSize)); - b->set_pos(kBlockHeaderSize + kSerialArenaSize); +SerialArena* SerialArena::New(Block* b, void* owner, ArenaImpl* arena) { + auto pos = b->pos(); + GOOGLE_DCHECK_LE(pos + ArenaImpl::kSerialArenaSize, b->size()); + SerialArena* serial = reinterpret_cast(b->Pointer(pos)); + b->set_pos(pos + ArenaImpl::kSerialArenaSize); serial->arena_ = arena; serial->owner_ = owner; serial->head_ = b; @@ -362,11 +409,11 @@ ArenaImpl::SerialArena* ArenaImpl::SerialArena::New(Block* b, void* owner, return serial; } -GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE -ArenaImpl::SerialArena* ArenaImpl::GetSerialArenaFallback(void* me) { +PROTOBUF_NOINLINE +SerialArena* ArenaImpl::GetSerialArenaFallback(void* me) { // Look for this SerialArena in our linked list. SerialArena* serial = threads_.load(std::memory_order_acquire); - for ( ; serial; serial = serial->next()) { + for (; serial; serial = serial->next()) { if (serial->owner() == me) { break; } @@ -375,7 +422,7 @@ ArenaImpl::SerialArena* ArenaImpl::GetSerialArenaFallback(void* me) { if (!serial) { // This thread doesn't have any SerialArena, which also means it doesn't // have any blocks yet. So we'll allocate its first block now. - Block* b = NewBlock(NULL, kSerialArenaSize); + SerialArena::Block* b = SerialArena::NewBlock(NULL, kSerialArenaSize, this); serial = SerialArena::New(b, me, this); SerialArena* head = threads_.load(std::memory_order_relaxed); @@ -389,26 +436,13 @@ ArenaImpl::SerialArena* ArenaImpl::GetSerialArenaFallback(void* me) { return serial; } -} // namespace internal - -void Arena::CallDestructorHooks() { - uint64 space_allocated = impl_.SpaceAllocated(); - // Call the reset hook - if (on_arena_reset_ != NULL) { - on_arena_reset_(this, hooks_cookie_, space_allocated); - } +ArenaMetricsCollector::~ArenaMetricsCollector() {} - // Call the destruction hook - if (on_arena_destruction_ != NULL) { - on_arena_destruction_(this, hooks_cookie_, space_allocated); - } -} +} // namespace internal -void Arena::OnArenaAllocation(const std::type_info* allocated_type, - size_t n) const { - if (on_arena_allocation_ != NULL) { - on_arena_allocation_(allocated_type, n, hooks_cookie_); - } +PROTOBUF_FUNC_ALIGN(32) +void* Arena::AllocateAlignedNoHook(size_t n) { + return impl_.AllocateAligned(n); } } // namespace protobuf diff --git a/third_party/protobuf-lite/arenastring.cc b/third_party/protobuf-lite/arenastring.cc index 7f33a0c8..b5f48c53 100644 --- a/third_party/protobuf-lite/arenastring.cc +++ b/third_party/protobuf-lite/arenastring.cc @@ -28,15 +28,226 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// The ArenaString implementation is not included in the open-source release. Do -// not include this file in the distribution. - #include +#include +#include +#include +#include +#include +#include +#include +#include + +// clang-format off +#include +// clang-format on + namespace google { namespace protobuf { namespace internal { +const std::string& LazyString::Init() const { + static WrappedMutex mu{GOOGLE_PROTOBUF_LINKER_INITIALIZED}; + mu.Lock(); + const std::string* res = inited_.load(std::memory_order_acquire); + if (res == nullptr) { + auto init_value = init_value_; + res = ::new (static_cast(string_buf_)) + std::string(init_value.ptr, init_value.size); + inited_.store(res, std::memory_order_release); + } + mu.Unlock(); + return *res; +} + + +void ArenaStringPtr::Set(const std::string* default_value, + ConstStringParam value, ::google::protobuf::Arena* arena) { + if (IsDefault(default_value)) { + tagged_ptr_.Set(Arena::Create(arena, value)); + } else { + UnsafeMutablePointer()->assign(value.data(), value.length()); + } +} + +void ArenaStringPtr::Set(const std::string* default_value, std::string&& value, + ::google::protobuf::Arena* arena) { + if (IsDefault(default_value)) { + if (arena == nullptr) { + tagged_ptr_.Set(new std::string(std::move(value))); + } else { + tagged_ptr_.Set(Arena::Create(arena, std::move(value))); + } + } else if (IsDonatedString()) { + std::string* current = tagged_ptr_.Get(); + auto* s = new (current) std::string(std::move(value)); + arena->OwnDestructor(s); + tagged_ptr_.Set(s); + } else /* !IsDonatedString() */ { + *UnsafeMutablePointer() = std::move(value); + } +} + +void ArenaStringPtr::Set(EmptyDefault, ConstStringParam value, + ::google::protobuf::Arena* arena) { + Set(&GetEmptyStringAlreadyInited(), value, arena); +} + +void ArenaStringPtr::Set(EmptyDefault, std::string&& value, + ::google::protobuf::Arena* arena) { + Set(&GetEmptyStringAlreadyInited(), std::move(value), arena); +} + +void ArenaStringPtr::Set(NonEmptyDefault, ConstStringParam value, + ::google::protobuf::Arena* arena) { + Set(nullptr, value, arena); +} + +void ArenaStringPtr::Set(NonEmptyDefault, std::string&& value, + ::google::protobuf::Arena* arena) { + Set(nullptr, std::move(value), arena); +} + +std::string* ArenaStringPtr::Mutable(EmptyDefault, ::google::protobuf::Arena* arena) { + if (!IsDonatedString() && !IsDefault(&GetEmptyStringAlreadyInited())) { + return UnsafeMutablePointer(); + } else { + return MutableSlow(arena); + } +} + +std::string* ArenaStringPtr::Mutable(const LazyString& default_value, + ::google::protobuf::Arena* arena) { + if (!IsDonatedString() && !IsDefault(nullptr)) { + return UnsafeMutablePointer(); + } else { + return MutableSlow(arena, default_value); + } +} + +std::string* ArenaStringPtr::MutableNoCopy(const std::string* default_value, + ::google::protobuf::Arena* arena) { + if (!IsDonatedString() && !IsDefault(default_value)) { + return UnsafeMutablePointer(); + } else { + GOOGLE_DCHECK(IsDefault(default_value)); + // Allocate empty. The contents are not relevant. + std::string* new_string = Arena::Create(arena); + tagged_ptr_.Set(new_string); + return new_string; + } +} + +template +std::string* ArenaStringPtr::MutableSlow(::google::protobuf::Arena* arena, + const Lazy&... lazy_default) { + const std::string* const default_value = + sizeof...(Lazy) == 0 ? &GetEmptyStringAlreadyInited() : nullptr; + GOOGLE_DCHECK(IsDefault(default_value)); + std::string* new_string = + Arena::Create(arena, lazy_default.get()...); + tagged_ptr_.Set(new_string); + return new_string; +} + +std::string* ArenaStringPtr::Release(const std::string* default_value, + ::google::protobuf::Arena* arena) { + if (IsDefault(default_value)) { + return nullptr; + } else { + return ReleaseNonDefault(default_value, arena); + } +} + +std::string* ArenaStringPtr::ReleaseNonDefault(const std::string* default_value, + ::google::protobuf::Arena* arena) { + GOOGLE_DCHECK(!IsDefault(default_value)); + + if (!IsDonatedString()) { + std::string* released; + if (arena != nullptr) { + released = new std::string; + released->swap(*UnsafeMutablePointer()); + } else { + released = UnsafeMutablePointer(); + } + tagged_ptr_.Set(const_cast(default_value)); + return released; + } else /* IsDonatedString() */ { + GOOGLE_DCHECK(arena != nullptr); + std::string* released = new std::string(Get()); + tagged_ptr_.Set(const_cast(default_value)); + return released; + } +} + +void ArenaStringPtr::SetAllocated(const std::string* default_value, + std::string* value, ::google::protobuf::Arena* arena) { + // Release what we have first. + if (arena == nullptr && !IsDefault(default_value)) { + delete UnsafeMutablePointer(); + } + if (value == nullptr) { + tagged_ptr_.Set(const_cast(default_value)); + } else { +#ifdef NDEBUG + tagged_ptr_.Set(value); + if (arena != nullptr) { + arena->Own(value); + } +#else + // On debug builds, copy the string so the address differs. delete will + // fail if value was a stack-allocated temporary/etc., which would have + // failed when arena ran its cleanup list. + std::string* new_value = Arena::Create(arena, *value); + delete value; + tagged_ptr_.Set(new_value); +#endif + } +} + +void ArenaStringPtr::Destroy(const std::string* default_value, + ::google::protobuf::Arena* arena) { + if (arena == nullptr) { + GOOGLE_DCHECK(!IsDonatedString()); + if (!IsDefault(default_value)) { + delete UnsafeMutablePointer(); + } + } +} + +void ArenaStringPtr::Destroy(EmptyDefault, ::google::protobuf::Arena* arena) { + Destroy(&GetEmptyStringAlreadyInited(), arena); +} + +void ArenaStringPtr::Destroy(NonEmptyDefault, ::google::protobuf::Arena* arena) { + Destroy(nullptr, arena); +} + +void ArenaStringPtr::ClearToEmpty() { + if (IsDefault(&GetEmptyStringAlreadyInited())) { + // Already set to default -- do nothing. + } else { + // Unconditionally mask away the tag. + // + // UpdateDonatedString uses assign when capacity is larger than the new + // value, which is trivially true in the donated string case. + // const_cast(PtrValue())->clear(); + tagged_ptr_.Get()->clear(); + } +} + +void ArenaStringPtr::ClearToDefault(const LazyString& default_value, + ::google::protobuf::Arena* arena) { + (void)arena; + if (IsDefault(nullptr)) { + // Already set to default -- do nothing. + } else if (!IsDonatedString()) { + UnsafeMutablePointer()->assign(default_value.get()); + } +} + } // namespace internal } // namespace protobuf diff --git a/third_party/protobuf-lite/bytestream.cc b/third_party/protobuf-lite/bytestream.cc index f4af6a50..a0f298ed 100644 --- a/third_party/protobuf-lite/bytestream.cc +++ b/third_party/protobuf-lite/bytestream.cc @@ -33,6 +33,8 @@ #include #include +#include + namespace google { namespace protobuf { namespace strings { @@ -113,7 +115,7 @@ char* GrowingArrayByteSink::GetBuffer(size_t* nbytes) { ShrinkToFit(); char* b = buf_; *nbytes = size_; - buf_ = NULL; + buf_ = nullptr; size_ = capacity_ = 0; return b; } diff --git a/third_party/protobuf-lite/coded_stream.cc b/third_party/protobuf-lite/coded_stream.cc index 0851ff0c..59d86f98 100644 --- a/third_party/protobuf-lite/coded_stream.cc +++ b/third_party/protobuf-lite/coded_stream.cc @@ -38,17 +38,24 @@ // will not cross the end of the buffer, since we can avoid a lot // of branching in this case. -#include +#include + +#include + #include +#include #include -#include -#include -#include + #include #include +#include +#include +#include #include +#include + namespace google { namespace protobuf { namespace io { @@ -59,8 +66,8 @@ static const int kMaxVarintBytes = 10; static const int kMaxVarint32Bytes = 5; -inline bool NextNonEmpty(ZeroCopyInputStream* input, - const void** data, int* size) { +inline bool NextNonEmpty(ZeroCopyInputStream* input, const void** data, + int* size) { bool success; do { success = input->Next(data, size); @@ -82,10 +89,6 @@ CodedInputStream::~CodedInputStream() { int CodedInputStream::default_recursion_limit_ = 100; -void CodedOutputStream::EnableAliasing(bool enabled) { - aliasing_enabled_ = enabled && output_->AllowsAliasing(); -} - void CodedInputStream::BackUpInputToCurrentPosition() { int backup_bytes = BufferSize() + buffer_size_after_limit_ + overflow_bytes_; if (backup_bytes > 0) { @@ -121,9 +124,9 @@ CodedInputStream::Limit CodedInputStream::PushLimit(int byte_limit) { // security: byte_limit is possibly evil, so check for negative values // and overflow. Also check that the new requested limit is before the // previous limit; otherwise we continue to enforce the previous limit. - if (GOOGLE_PREDICT_TRUE(byte_limit >= 0 && - byte_limit <= INT_MAX - current_position && - byte_limit < current_limit_ - current_position)) { + if (PROTOBUF_PREDICT_TRUE(byte_limit >= 0 && + byte_limit <= INT_MAX - current_position && + byte_limit < current_limit_ - current_position)) { current_limit_ = current_position + byte_limit; RecomputeBufferLimits(); } @@ -187,11 +190,13 @@ int CodedInputStream::BytesUntilTotalBytesLimit() const { } void CodedInputStream::PrintTotalBytesLimitError() { - GOOGLE_LOG(ERROR) << "A protocol message was rejected because it was too " - "big (more than " << total_bytes_limit_ - << " bytes). To increase the limit (or to disable these " - "warnings), see CodedInputStream::SetTotalBytesLimit() " - "in google/protobuf/io/coded_stream.h."; + GOOGLE_LOG(ERROR) + << "A protocol message was rejected because it was too " + "big (more than " + << total_bytes_limit_ + << " bytes). To increase the limit (or to disable these " + "warnings), see CodedInputStream::SetTotalBytesLimit() " + "in third_party/protobuf/src/google/protobuf/io/coded_stream.h."; } bool CodedInputStream::SkipFallback(int count, int original_buffer_size) { @@ -234,15 +239,42 @@ bool CodedInputStream::GetDirectBufferPointer(const void** data, int* size) { } bool CodedInputStream::ReadRaw(void* buffer, int size) { - return InternalReadRawInline(buffer, size); + int current_buffer_size; + while ((current_buffer_size = BufferSize()) < size) { + // Reading past end of buffer. Copy what we have, then refresh. + memcpy(buffer, buffer_, current_buffer_size); + buffer = reinterpret_cast(buffer) + current_buffer_size; + size -= current_buffer_size; + Advance(current_buffer_size); + if (!Refresh()) return false; + } + + memcpy(buffer, buffer_, size); + Advance(size); + + return true; } -bool CodedInputStream::ReadString(string* buffer, int size) { +bool CodedInputStream::ReadString(std::string* buffer, int size) { if (size < 0) return false; // security: size is often user-supplied - return InternalReadStringInline(buffer, size); + + if (BufferSize() >= size) { + STLStringResizeUninitialized(buffer, size); + std::pair z = as_string_data(buffer); + if (z.second) { + // Oddly enough, memcpy() requires its first two args to be non-NULL even + // if we copy 0 bytes. So, we have ensured that z.first is non-NULL here. + GOOGLE_DCHECK(z.first != NULL); + memcpy(z.first, buffer_, size); + Advance(size); + } + return true; + } + + return ReadStringFallback(buffer, size); } -bool CodedInputStream::ReadStringFallback(string* buffer, int size) { +bool CodedInputStream::ReadStringFallback(std::string* buffer, int size) { if (!buffer->empty()) { buffer->clear(); } @@ -312,14 +344,28 @@ bool CodedInputStream::ReadLittleEndian64Fallback(uint64* value) { namespace { +// Decodes varint64 with known size, N, and returns next pointer. Knowing N at +// compile time, compiler can generate optimal code. For example, instead of +// subtracting 0x80 at each iteration, it subtracts properly shifted mask once. +template +const uint8* DecodeVarint64KnownSize(const uint8* buffer, uint64* value) { + GOOGLE_DCHECK_GT(N, 0); + uint64 result = static_cast(buffer[N - 1]) << (7 * (N - 1)); + for (int i = 0, offset = 0; i < N - 1; i++, offset += 7) { + result += static_cast(buffer[i] - 0x80) << offset; + } + *value = result; + return buffer + N; +} + // Read a varint from the given buffer, write it to *value, and return a pair. // The first part of the pair is true iff the read was successful. The second // part is buffer + (number of bytes read). This function is always inlined, // so returning a pair is costless. -GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE -::std::pair ReadVarint32FromArray( - uint32 first_byte, const uint8* buffer, - uint32* value); +PROTOBUF_ALWAYS_INLINE +::std::pair ReadVarint32FromArray(uint32 first_byte, + const uint8* buffer, + uint32* value); inline ::std::pair ReadVarint32FromArray( uint32 first_byte, const uint8* buffer, uint32* value) { // Fast path: We have enough bytes left in the buffer to guarantee that @@ -330,71 +376,72 @@ inline ::std::pair ReadVarint32FromArray( uint32 b; uint32 result = first_byte - 0x80; ++ptr; // We just processed the first byte. Move on to the second. - b = *(ptr++); result += b << 7; if (!(b & 0x80)) goto done; + b = *(ptr++); + result += b << 7; + if (!(b & 0x80)) goto done; result -= 0x80 << 7; - b = *(ptr++); result += b << 14; if (!(b & 0x80)) goto done; + b = *(ptr++); + result += b << 14; + if (!(b & 0x80)) goto done; result -= 0x80 << 14; - b = *(ptr++); result += b << 21; if (!(b & 0x80)) goto done; + b = *(ptr++); + result += b << 21; + if (!(b & 0x80)) goto done; result -= 0x80 << 21; - b = *(ptr++); result += b << 28; if (!(b & 0x80)) goto done; + b = *(ptr++); + result += b << 28; + if (!(b & 0x80)) goto done; // "result -= 0x80 << 28" is irrevelant. // If the input is larger than 32 bits, we still need to read it all // and discard the high-order bits. for (int i = 0; i < kMaxVarintBytes - kMaxVarint32Bytes; i++) { - b = *(ptr++); if (!(b & 0x80)) goto done; + b = *(ptr++); + if (!(b & 0x80)) goto done; } // We have overrun the maximum size of a varint (10 bytes). Assume // the data is corrupt. return std::make_pair(false, ptr); - done: +done: *value = result; return std::make_pair(true, ptr); } -GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE::std::pair -ReadVarint64FromArray(const uint8* buffer, uint64* value); +PROTOBUF_ALWAYS_INLINE::std::pair ReadVarint64FromArray( + const uint8* buffer, uint64* value); inline ::std::pair ReadVarint64FromArray( const uint8* buffer, uint64* value) { - const uint8* ptr = buffer; - uint32 b; - - // Splitting into 32-bit pieces gives better performance on 32-bit - // processors. - uint32 part0 = 0, part1 = 0, part2 = 0; - - b = *(ptr++); part0 = b ; if (!(b & 0x80)) goto done; - part0 -= 0x80; - b = *(ptr++); part0 += b << 7; if (!(b & 0x80)) goto done; - part0 -= 0x80 << 7; - b = *(ptr++); part0 += b << 14; if (!(b & 0x80)) goto done; - part0 -= 0x80 << 14; - b = *(ptr++); part0 += b << 21; if (!(b & 0x80)) goto done; - part0 -= 0x80 << 21; - b = *(ptr++); part1 = b ; if (!(b & 0x80)) goto done; - part1 -= 0x80; - b = *(ptr++); part1 += b << 7; if (!(b & 0x80)) goto done; - part1 -= 0x80 << 7; - b = *(ptr++); part1 += b << 14; if (!(b & 0x80)) goto done; - part1 -= 0x80 << 14; - b = *(ptr++); part1 += b << 21; if (!(b & 0x80)) goto done; - part1 -= 0x80 << 21; - b = *(ptr++); part2 = b ; if (!(b & 0x80)) goto done; - part2 -= 0x80; - b = *(ptr++); part2 += b << 7; if (!(b & 0x80)) goto done; - // "part2 -= 0x80 << 7" is irrelevant because (0x80 << 7) << 56 is 0. - - // We have overrun the maximum size of a varint (10 bytes). Assume - // the data is corrupt. - return std::make_pair(false, ptr); + // Assumes varint64 is at least 2 bytes. + GOOGLE_DCHECK_GE(buffer[0], 128); + + const uint8* next; + if (buffer[1] < 128) { + next = DecodeVarint64KnownSize<2>(buffer, value); + } else if (buffer[2] < 128) { + next = DecodeVarint64KnownSize<3>(buffer, value); + } else if (buffer[3] < 128) { + next = DecodeVarint64KnownSize<4>(buffer, value); + } else if (buffer[4] < 128) { + next = DecodeVarint64KnownSize<5>(buffer, value); + } else if (buffer[5] < 128) { + next = DecodeVarint64KnownSize<6>(buffer, value); + } else if (buffer[6] < 128) { + next = DecodeVarint64KnownSize<7>(buffer, value); + } else if (buffer[7] < 128) { + next = DecodeVarint64KnownSize<8>(buffer, value); + } else if (buffer[8] < 128) { + next = DecodeVarint64KnownSize<9>(buffer, value); + } else if (buffer[9] < 128) { + next = DecodeVarint64KnownSize<10>(buffer, value); + } else { + // We have overrun the maximum size of a varint (10 bytes). Assume + // the data is corrupt. + return std::make_pair(false, buffer + 11); + } - done: - *value = (static_cast(part0)) | - (static_cast(part1) << 28) | - (static_cast(part2) << 56); - return std::make_pair(true, ptr); + return std::make_pair(true, next); } } // namespace @@ -416,7 +463,7 @@ int64 CodedInputStream::ReadVarint32Fallback(uint32 first_byte_or_zero) { << "Caller should provide us with *buffer_ when buffer is non-empty"; uint32 temp; ::std::pair p = - ReadVarint32FromArray(first_byte_or_zero, buffer_, &temp); + ReadVarint32FromArray(first_byte_or_zero, buffer_, &temp); if (!p.first) return -1; buffer_ = p.second; return temp; @@ -619,156 +666,285 @@ bool CodedInputStream::Refresh() { // CodedOutputStream ================================================= -std::atomic CodedOutputStream::default_serialization_deterministic_{ - false}; +void EpsCopyOutputStream::EnableAliasing(bool enabled) { + aliasing_enabled_ = enabled && stream_->AllowsAliasing(); +} -CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* output) - : CodedOutputStream(output, true) {} +int64 EpsCopyOutputStream::ByteCount(uint8* ptr) const { + // Calculate the current offset relative to the end of the stream buffer. + int delta = (end_ - ptr) + (buffer_end_ ? 0 : kSlopBytes); + return stream_->ByteCount() - delta; +} -CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* output, - bool do_eager_refresh) - : output_(output), - buffer_(NULL), - buffer_size_(0), - total_bytes_(0), - had_error_(false), - aliasing_enabled_(false), - is_serialization_deterministic_(IsDefaultSerializationDeterministic()) { - if (do_eager_refresh) { - // Eagerly Refresh() so buffer space is immediately available. - Refresh(); - // The Refresh() may have failed. If the client doesn't write any data, - // though, don't consider this an error. If the client does write data, then - // another Refresh() will be attempted and it will set the error once again. - had_error_ = false; +// Flushes what's written out to the underlying ZeroCopyOutputStream buffers. +// Returns the size remaining in the buffer and sets buffer_end_ to the start +// of the remaining buffer, ie. [buffer_end_, buffer_end_ + return value) +int EpsCopyOutputStream::Flush(uint8* ptr) { + while (buffer_end_ && ptr > end_) { + int overrun = ptr - end_; + GOOGLE_DCHECK(!had_error_); + GOOGLE_DCHECK(overrun <= kSlopBytes); // NOLINT + ptr = Next() + overrun; + if (had_error_) return 0; } + int s; + if (buffer_end_) { + std::memcpy(buffer_end_, buffer_, ptr - buffer_); + buffer_end_ += ptr - buffer_; + s = end_ - ptr; + } else { + // The stream is writing directly in the ZeroCopyOutputStream buffer. + s = end_ + kSlopBytes - ptr; + buffer_end_ = ptr; + } + GOOGLE_DCHECK(s >= 0); // NOLINT + return s; } -CodedOutputStream::~CodedOutputStream() { - Trim(); +uint8* EpsCopyOutputStream::Trim(uint8* ptr) { + if (had_error_) return ptr; + int s = Flush(ptr); + if (s) stream_->BackUp(s); + // Reset to initial state (expecting new buffer) + buffer_end_ = end_ = buffer_; + return buffer_; } -void CodedOutputStream::Trim() { - if (buffer_size_ > 0) { - output_->BackUp(buffer_size_); - total_bytes_ -= buffer_size_; - buffer_size_ = 0; - buffer_ = NULL; - } + +uint8* EpsCopyOutputStream::FlushAndResetBuffer(uint8* ptr) { + if (had_error_) return buffer_; + int s = Flush(ptr); + if (had_error_) return buffer_; + return SetInitialBuffer(buffer_end_, s); } -bool CodedOutputStream::Skip(int count) { +bool EpsCopyOutputStream::Skip(int count, uint8** pp) { if (count < 0) return false; - - while (count > buffer_size_) { - count -= buffer_size_; - if (!Refresh()) return false; + if (had_error_) { + *pp = buffer_; + return false; } - - Advance(count); + int size = Flush(*pp); + if (had_error_) { + *pp = buffer_; + return false; + } + void* data = buffer_end_; + while (count > size) { + count -= size; + if (!stream_->Next(&data, &size)) { + *pp = Error(); + return false; + } + } + *pp = SetInitialBuffer(static_cast(data) + count, size - count); return true; } -bool CodedOutputStream::GetDirectBufferPointer(void** data, int* size) { - if (buffer_size_ == 0 && !Refresh()) return false; - - *data = buffer_; - *size = buffer_size_; +bool EpsCopyOutputStream::GetDirectBufferPointer(void** data, int* size, + uint8** pp) { + if (had_error_) { + *pp = buffer_; + return false; + } + *size = Flush(*pp); + if (had_error_) { + *pp = buffer_; + return false; + } + *data = buffer_end_; + while (*size == 0) { + if (!stream_->Next(data, size)) { + *pp = Error(); + return false; + } + } + *pp = SetInitialBuffer(*data, *size); return true; } -void CodedOutputStream::WriteRaw(const void* data, int size) { - while (buffer_size_ < size) { - memcpy(buffer_, data, buffer_size_); - size -= buffer_size_; - data = reinterpret_cast(data) + buffer_size_; - if (!Refresh()) return; +uint8* EpsCopyOutputStream::GetDirectBufferForNBytesAndAdvance(int size, + uint8** pp) { + if (had_error_) { + *pp = buffer_; + return nullptr; + } + int s = Flush(*pp); + if (had_error_) { + *pp = buffer_; + return nullptr; + } + if (s >= size) { + auto res = buffer_end_; + *pp = SetInitialBuffer(buffer_end_ + size, s - size); + return res; + } else { + *pp = SetInitialBuffer(buffer_end_, s); + return nullptr; } - - memcpy(buffer_, data, size); - Advance(size); -} - -uint8* CodedOutputStream::WriteRawToArray( - const void* data, int size, uint8* target) { - memcpy(target, data, size); - return target + size; } - -void CodedOutputStream::WriteAliasedRaw(const void* data, int size) { - if (size < buffer_size_ - ) { - WriteRaw(data, size); +uint8* EpsCopyOutputStream::Next() { + GOOGLE_DCHECK(!had_error_); // NOLINT + if (PROTOBUF_PREDICT_FALSE(stream_ == nullptr)) return Error(); + if (buffer_end_) { + // We're in the patch buffer and need to fill up the previous buffer. + std::memcpy(buffer_end_, buffer_, end_ - buffer_); + uint8* ptr; + int size; + do { + void* data; + if (PROTOBUF_PREDICT_FALSE(!stream_->Next(&data, &size))) { + // Stream has an error, we use the patch buffer to continue to be + // able to write. + return Error(); + } + ptr = static_cast(data); + } while (size == 0); + if (PROTOBUF_PREDICT_TRUE(size > kSlopBytes)) { + std::memcpy(ptr, end_, kSlopBytes); + end_ = ptr + size - kSlopBytes; + buffer_end_ = nullptr; + return ptr; + } else { + GOOGLE_DCHECK(size > 0); // NOLINT + // Buffer to small + std::memmove(buffer_, end_, kSlopBytes); + buffer_end_ = ptr; + end_ = buffer_ + size; + return buffer_; + } } else { - Trim(); - - total_bytes_ += size; - had_error_ |= !output_->WriteAliasedRaw(data, size); + std::memcpy(buffer_, end_, kSlopBytes); + buffer_end_ = end_; + end_ = buffer_ + kSlopBytes; + return buffer_; } } -void CodedOutputStream::WriteLittleEndian32(uint32 value) { - uint8 bytes[sizeof(value)]; - - bool use_fast = buffer_size_ >= sizeof(value); - uint8* ptr = use_fast ? buffer_ : bytes; - - WriteLittleEndian32ToArray(value, ptr); +uint8* EpsCopyOutputStream::EnsureSpaceFallback(uint8* ptr) { + do { + if (PROTOBUF_PREDICT_FALSE(had_error_)) return buffer_; + int overrun = ptr - end_; + GOOGLE_DCHECK(overrun >= 0); // NOLINT + GOOGLE_DCHECK(overrun <= kSlopBytes); // NOLINT + ptr = Next() + overrun; + } while (ptr >= end_); + GOOGLE_DCHECK(ptr < end_); // NOLINT + return ptr; +} + +uint8* EpsCopyOutputStream::WriteRawFallback(const void* data, int size, + uint8* ptr) { + int s = GetSize(ptr); + while (s < size) { + std::memcpy(ptr, data, s); + size -= s; + data = static_cast(data) + s; + ptr = EnsureSpaceFallback(ptr + s); + s = GetSize(ptr); + } + std::memcpy(ptr, data, size); + return ptr + size; +} - if (use_fast) { - Advance(sizeof(value)); +uint8* EpsCopyOutputStream::WriteAliasedRaw(const void* data, int size, + uint8* ptr) { + if (size < GetSize(ptr) + ) { + return WriteRaw(data, size, ptr); } else { - WriteRaw(bytes, sizeof(value)); + ptr = Trim(ptr); + if (stream_->WriteAliasedRaw(data, size)) return ptr; + return Error(); } } -void CodedOutputStream::WriteLittleEndian64(uint64 value) { - uint8 bytes[sizeof(value)]; - - bool use_fast = buffer_size_ >= sizeof(value); - uint8* ptr = use_fast ? buffer_ : bytes; - - WriteLittleEndian64ToArray(value, ptr); - - if (use_fast) { - Advance(sizeof(value)); - } else { - WriteRaw(bytes, sizeof(value)); +#ifndef PROTOBUF_LITTLE_ENDIAN +uint8* EpsCopyOutputStream::WriteRawLittleEndian32(const void* data, int size, + uint8* ptr) { + auto p = static_cast(data); + auto end = p + size; + while (end - p >= kSlopBytes) { + ptr = EnsureSpace(ptr); + uint32 buffer[4]; + static_assert(sizeof(buffer) == kSlopBytes, "Buffer must be kSlopBytes"); + std::memcpy(buffer, p, kSlopBytes); + p += kSlopBytes; + for (auto x : buffer) + ptr = CodedOutputStream::WriteLittleEndian32ToArray(x, ptr); + } + while (p < end) { + ptr = EnsureSpace(ptr); + uint32 buffer; + std::memcpy(&buffer, p, 4); + p += 4; + ptr = CodedOutputStream::WriteLittleEndian32ToArray(buffer, ptr); + } + return ptr; +} + +uint8* EpsCopyOutputStream::WriteRawLittleEndian64(const void* data, int size, + uint8* ptr) { + auto p = static_cast(data); + auto end = p + size; + while (end - p >= kSlopBytes) { + ptr = EnsureSpace(ptr); + uint64 buffer[2]; + static_assert(sizeof(buffer) == kSlopBytes, "Buffer must be kSlopBytes"); + std::memcpy(buffer, p, kSlopBytes); + p += kSlopBytes; + for (auto x : buffer) + ptr = CodedOutputStream::WriteLittleEndian64ToArray(x, ptr); } + while (p < end) { + ptr = EnsureSpace(ptr); + uint64 buffer; + std::memcpy(&buffer, p, 8); + p += 8; + ptr = CodedOutputStream::WriteLittleEndian64ToArray(buffer, ptr); + } + return ptr; } +#endif + -void CodedOutputStream::WriteVarint32SlowPath(uint32 value) { - uint8 bytes[kMaxVarint32Bytes]; - uint8* target = &bytes[0]; - uint8* end = WriteVarint32ToArray(value, target); - int size = end - target; - WriteRaw(bytes, size); +uint8* EpsCopyOutputStream::WriteStringMaybeAliasedOutline(uint32 num, + const std::string& s, + uint8* ptr) { + ptr = EnsureSpace(ptr); + uint32 size = s.size(); + ptr = WriteLengthDelim(num, size, ptr); + return WriteRawMaybeAliased(s.data(), size, ptr); } -void CodedOutputStream::WriteVarint64SlowPath(uint64 value) { - uint8 bytes[kMaxVarintBytes]; - uint8* target = &bytes[0]; - uint8* end = WriteVarint64ToArray(value, target); - int size = end - target; - WriteRaw(bytes, size); +uint8* EpsCopyOutputStream::WriteStringOutline(uint32 num, const std::string& s, + uint8* ptr) { + ptr = EnsureSpace(ptr); + uint32 size = s.size(); + ptr = WriteLengthDelim(num, size, ptr); + return WriteRaw(s.data(), size, ptr); } -bool CodedOutputStream::Refresh() { - void* void_buffer; - if (output_->Next(&void_buffer, &buffer_size_)) { - buffer_ = reinterpret_cast(void_buffer); - total_bytes_ += buffer_size_; - return true; - } else { - buffer_ = NULL; - buffer_size_ = 0; - had_error_ = true; - return false; +std::atomic CodedOutputStream::default_serialization_deterministic_{ + false}; + +CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* stream, + bool do_eager_refresh) + : impl_(stream, IsDefaultSerializationDeterministic(), &cur_), + start_count_(stream->ByteCount()) { + if (do_eager_refresh) { + void* data; + int size; + if (!stream->Next(&data, &size) || size == 0) return; + cur_ = impl_.SetInitialBuffer(data, size); } } -uint8* CodedOutputStream::WriteStringWithSizeToArray(const string& str, +CodedOutputStream::~CodedOutputStream() { Trim(); } + + +uint8* CodedOutputStream::WriteStringWithSizeToArray(const std::string& str, uint8* target) { GOOGLE_DCHECK_LE(str.size(), kuint32max); target = WriteVarint32ToArray(str.size(), target); diff --git a/third_party/protobuf-lite/common.cc b/third_party/protobuf-lite/common.cc index 6544c6ed..bc150f56 100644 --- a/third_party/protobuf-lite/common.cc +++ b/third_party/protobuf-lite/common.cc @@ -30,20 +30,18 @@ // Author: kenton@google.com (Kenton Varda) -#include // TODO(gerbens) ideally remove this. #include -#include -#include -#include -#include -#include + +#include #include #include #include #include #ifdef _WIN32 +#ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN // We only need minimal includes +#endif #include #define snprintf _snprintf // see comment in strutil.cc #elif defined(HAVE_PTHREAD) @@ -55,6 +53,16 @@ #include #endif +#include +#include +#include +#include +#include +#include +#include + +#include + namespace google { namespace protobuf { @@ -88,7 +96,7 @@ void VerifyVersion(int headerVersion, } } -string VersionString(int version) { +std::string VersionString(int version) { int major = version / 1000000; int minor = (version / 1000) % 1000; int micro = version % 1000; @@ -119,7 +127,7 @@ namespace internal { #if defined(__ANDROID__) inline void DefaultLogHandler(LogLevel level, const char* filename, int line, - const string& message) { + const std::string& message) { if (level < GOOGLE_PROTOBUF_MIN_LOG_LEVEL) { return; } @@ -154,7 +162,7 @@ inline void DefaultLogHandler(LogLevel level, const char* filename, int line, #else void DefaultLogHandler(LogLevel level, const char* filename, int line, - const string& message) { + const std::string& message) { if (level < GOOGLE_PROTOBUF_MIN_LOG_LEVEL) { return; } @@ -169,29 +177,14 @@ void DefaultLogHandler(LogLevel level, const char* filename, int line, #endif void NullLogHandler(LogLevel /* level */, const char* /* filename */, - int /* line */, const string& /* message */) { + int /* line */, const std::string& /* message */) { // Nothing. } static LogHandler* log_handler_ = &DefaultLogHandler; -static int log_silencer_count_ = 0; - -static Mutex* log_silencer_count_mutex_ = NULL; -GOOGLE_PROTOBUF_DECLARE_ONCE(log_silencer_count_init_); +static std::atomic log_silencer_count_ = ATOMIC_VAR_INIT(0); -void DeleteLogSilencerCount() { - delete log_silencer_count_mutex_; - log_silencer_count_mutex_ = NULL; -} -void InitLogSilencerCount() { - log_silencer_count_mutex_ = new Mutex; - OnShutdown(&DeleteLogSilencerCount); -} -void InitLogSilencerCountOnce() { - GoogleOnceInit(&log_silencer_count_init_, &InitLogSilencerCount); -} - -LogMessage& LogMessage::operator<<(const string& value) { +LogMessage& LogMessage::operator<<(const std::string& value) { message_ += value; return *this; } @@ -206,8 +199,7 @@ LogMessage& LogMessage::operator<<(const StringPiece& value) { return *this; } -LogMessage& LogMessage::operator<<( - const ::google::protobuf::util::Status& status) { +LogMessage& LogMessage::operator<<(const util::Status& status) { message_ += status.ToString(); return *this; } @@ -243,8 +235,8 @@ DECLARE_STREAM_OPERATOR(long , "%ld") DECLARE_STREAM_OPERATOR(unsigned long, "%lu") DECLARE_STREAM_OPERATOR(double , "%g" ) DECLARE_STREAM_OPERATOR(void* , "%p" ) -DECLARE_STREAM_OPERATOR(long long , "%" GOOGLE_LL_FORMAT "d") -DECLARE_STREAM_OPERATOR(unsigned long long, "%" GOOGLE_LL_FORMAT "u") +DECLARE_STREAM_OPERATOR(long long , "%" PROTOBUF_LL_FORMAT "d") +DECLARE_STREAM_OPERATOR(unsigned long long, "%" PROTOBUF_LL_FORMAT "u") #undef DECLARE_STREAM_OPERATOR LogMessage::LogMessage(LogLevel level, const char* filename, int line) @@ -255,8 +247,6 @@ void LogMessage::Finish() { bool suppress = false; if (level_ != LOGLEVEL_FATAL) { - InitLogSilencerCountOnce(); - MutexLock lock(log_silencer_count_mutex_); suppress = log_silencer_count_ > 0; } @@ -282,9 +272,9 @@ void LogFinisher::operator=(LogMessage& other) { LogHandler* SetLogHandler(LogHandler* new_func) { LogHandler* old = internal::log_handler_; if (old == &internal::NullLogHandler) { - old = NULL; + old = nullptr; } - if (new_func == NULL) { + if (new_func == nullptr) { internal::log_handler_ = &internal::NullLogHandler; } else { internal::log_handler_ = new_func; @@ -293,14 +283,10 @@ LogHandler* SetLogHandler(LogHandler* new_func) { } LogSilencer::LogSilencer() { - internal::InitLogSilencerCountOnce(); - MutexLock lock(internal::log_silencer_count_mutex_); ++internal::log_silencer_count_; }; LogSilencer::~LogSilencer() { - internal::InitLogSilencerCountOnce(); - MutexLock lock(internal::log_silencer_count_mutex_); --internal::log_silencer_count_; }; @@ -318,7 +304,7 @@ void DoNothing() {} // // TODO(xiaofeng): PROTOBUF_LITTLE_ENDIAN is unfortunately defined in // google/protobuf/io/coded_stream.h and therefore can not be used here. -// Maybe move that macro definition here in the furture. +// Maybe move that macro definition here in the future. uint32 ghtonl(uint32 x) { union { uint32 result; @@ -331,52 +317,6 @@ uint32 ghtonl(uint32 x) { return result; } -// =================================================================== -// Shutdown support. - -namespace internal { - -typedef void OnShutdownFunc(); -struct ShutdownData { - ~ShutdownData() { - std::reverse(functions.begin(), functions.end()); - for (auto pair : functions) pair.first(pair.second); - } - - static ShutdownData* get() { - static auto* data = new ShutdownData; - return data; - } - - std::vector> functions; - Mutex mutex; -}; - -static void RunZeroArgFunc(const void* arg) { - reinterpret_cast(const_cast(arg))(); -} - -void OnShutdown(void (*func)()) { - OnShutdownRun(RunZeroArgFunc, reinterpret_cast(func)); -} - -void OnShutdownRun(void (*f)(const void*), const void* arg) { - auto shutdown_data = ShutdownData::get(); - MutexLock lock(&shutdown_data->mutex); - shutdown_data->functions.push_back(std::make_pair(f, arg)); -} - -} // namespace internal - -void ShutdownProtobufLibrary() { - // This function should be called only once, but accepts multiple calls. - static bool is_shutdown = false; - if (!is_shutdown) { - delete internal::ShutdownData::get(); - is_shutdown = true; - } -} - #if PROTOBUF_USE_EXCEPTIONS FatalException::~FatalException() throw() {} diff --git a/third_party/protobuf-lite/extension_set.cc b/third_party/protobuf-lite/extension_set.cc index cb205c4f..bfa1c42a 100644 --- a/third_party/protobuf-lite/extension_set.cc +++ b/third_party/protobuf-lite/extension_set.cc @@ -32,16 +32,23 @@ // Based on original Protocol Buffers design by // Sanjay Ghemawat, Jeff Dean, and others. -#include +#include + #include +#include #include #include -#include -#include +#include +#include #include -#include +#include +#include +#include #include #include +#include + +#include namespace google { namespace protobuf { @@ -69,38 +76,47 @@ inline bool is_packable(WireFormatLite::WireType type) { case WireFormatLite::WIRETYPE_END_GROUP: return false; - // Do not add a default statement. Let the compiler complain when someone - // adds a new wire type. + // Do not add a default statement. Let the compiler complain when someone + // adds a new wire type. } GOOGLE_LOG(FATAL) << "can't reach here."; return false; } // Registry stuff. -typedef hash_map, - ExtensionInfo> ExtensionRegistry; +struct ExtensionHasher { + std::size_t operator()(const std::pair& p) const { + return std::hash{}(p.first) ^ + std::hash{}(p.second); + } +}; + +typedef std::unordered_map, ExtensionInfo, + ExtensionHasher> + ExtensionRegistry; static const ExtensionRegistry* global_registry = nullptr; // This function is only called at startup, so there is no need for thread- // safety. -void Register(const MessageLite* containing_type, - int number, ExtensionInfo info) { +void Register(const MessageLite* containing_type, int number, + ExtensionInfo info) { static auto local_static_registry = OnShutdownDelete(new ExtensionRegistry); global_registry = local_static_registry; if (!InsertIfNotPresent(local_static_registry, - std::make_pair(containing_type, number), info)) { + std::make_pair(containing_type, number), info)) { GOOGLE_LOG(FATAL) << "Multiple extension registrations for type \"" - << containing_type->GetTypeName() - << "\", field number " << number << "."; + << containing_type->GetTypeName() << "\", field number " + << number << "."; } } -const ExtensionInfo* FindRegisteredExtension( - const MessageLite* containing_type, int number) { +const ExtensionInfo* FindRegisteredExtension(const MessageLite* containing_type, + int number) { return global_registry == nullptr - ? nullptr - : FindOrNull(*global_registry, std::make_pair(containing_type, number)); + ? nullptr + : FindOrNull(*global_registry, + std::make_pair(containing_type, number)); } } // namespace @@ -160,7 +176,7 @@ void ExtensionSet::RegisterMessageExtension(const MessageLite* containing_type, GOOGLE_CHECK(type == WireFormatLite::TYPE_MESSAGE || type == WireFormatLite::TYPE_GROUP); ExtensionInfo info(type, is_repeated, is_packed); - info.message_prototype = prototype; + info.message_info = {prototype}; Register(containing_type, number, info); } @@ -168,34 +184,42 @@ void ExtensionSet::RegisterMessageExtension(const MessageLite* containing_type, // =================================================================== // Constructors and basic methods. -ExtensionSet::ExtensionSet(::google::protobuf::Arena* arena) +ExtensionSet::ExtensionSet(Arena* arena) : arena_(arena), flat_capacity_(0), flat_size_(0), - map_{flat_capacity_ == 0 ? NULL - : ::google::protobuf::Arena::CreateArray( - arena_, flat_capacity_)} {} - -ExtensionSet::ExtensionSet() - : arena_(NULL), - flat_capacity_(0), - flat_size_(0), - map_{flat_capacity_ == 0 ? NULL - : ::google::protobuf::Arena::CreateArray( - arena_, flat_capacity_)} {} + map_{flat_capacity_ == 0 + ? NULL + : Arena::CreateArray(arena_, flat_capacity_)} {} ExtensionSet::~ExtensionSet() { // Deletes all allocated extensions. if (arena_ == NULL) { ForEach([](int /* number */, Extension& ext) { ext.Free(); }); - if (GOOGLE_PREDICT_FALSE(is_large())) { + if (PROTOBUF_PREDICT_FALSE(is_large())) { delete map_.large; } else { - delete[] map_.flat; + DeleteFlatMap(map_.flat, flat_capacity_); } } } +void ExtensionSet::DeleteFlatMap(const ExtensionSet::KeyValue* flat, + uint16 flat_capacity) { +#ifdef __cpp_sized_deallocation + // Arena::CreateArray already requires a trivially destructible type, but + // ensure this constraint is not violated in the future. + static_assert(std::is_trivially_destructible::value, + "CreateArray requires a trivially destructible type"); + // A const-cast is needed, but this is safe as we are about to deallocate the + // array. + ::operator delete[](const_cast(flat), + sizeof(*flat) * flat_capacity); +#else // !__cpp_sized_deallocation + delete[] flat; +#endif // !__cpp_sized_deallocation +} + // Defined in extension_set_heavy.cc. // void ExtensionSet::AppendToList(const Descriptor* containing_type, // const DescriptorPool* pool, @@ -246,88 +270,88 @@ void ExtensionSet::ClearExtension(int number) { namespace { -enum Cardinality { - REPEATED, - OPTIONAL -}; +enum { REPEATED_FIELD, OPTIONAL_FIELD }; } // namespace -#define GOOGLE_DCHECK_TYPE(EXTENSION, LABEL, CPPTYPE) \ - GOOGLE_DCHECK_EQ((EXTENSION).is_repeated ? REPEATED : OPTIONAL, LABEL); \ +#define GOOGLE_DCHECK_TYPE(EXTENSION, LABEL, CPPTYPE) \ + GOOGLE_DCHECK_EQ((EXTENSION).is_repeated ? REPEATED_FIELD : OPTIONAL_FIELD, LABEL); \ GOOGLE_DCHECK_EQ(cpp_type((EXTENSION).type), WireFormatLite::CPPTYPE_##CPPTYPE) // ------------------------------------------------------------------- // Primitives -#define PRIMITIVE_ACCESSORS(UPPERCASE, LOWERCASE, CAMELCASE) \ - \ -LOWERCASE ExtensionSet::Get##CAMELCASE(int number, \ - LOWERCASE default_value) const { \ - const Extension* extension = FindOrNull(number); \ - if (extension == NULL || extension->is_cleared) { \ - return default_value; \ - } else { \ - GOOGLE_DCHECK_TYPE(*extension, OPTIONAL, UPPERCASE); \ - return extension->LOWERCASE##_value; \ - } \ -} \ - \ -void ExtensionSet::Set##CAMELCASE(int number, FieldType type, \ - LOWERCASE value, \ - const FieldDescriptor* descriptor) { \ - Extension* extension; \ - if (MaybeNewExtension(number, descriptor, &extension)) { \ - extension->type = type; \ - GOOGLE_DCHECK_EQ(cpp_type(extension->type), WireFormatLite::CPPTYPE_##UPPERCASE); \ - extension->is_repeated = false; \ - } else { \ - GOOGLE_DCHECK_TYPE(*extension, OPTIONAL, UPPERCASE); \ - } \ - extension->is_cleared = false; \ - extension->LOWERCASE##_value = value; \ -} \ - \ -LOWERCASE ExtensionSet::GetRepeated##CAMELCASE(int number, int index) const { \ - const Extension* extension = FindOrNull(number); \ - GOOGLE_CHECK(extension != NULL) << "Index out-of-bounds (field is empty)."; \ - GOOGLE_DCHECK_TYPE(*extension, REPEATED, UPPERCASE); \ - return extension->repeated_##LOWERCASE##_value->Get(index); \ -} \ - \ -void ExtensionSet::SetRepeated##CAMELCASE( \ - int number, int index, LOWERCASE value) { \ - Extension* extension = FindOrNull(number); \ - GOOGLE_CHECK(extension != NULL) << "Index out-of-bounds (field is empty)."; \ - GOOGLE_DCHECK_TYPE(*extension, REPEATED, UPPERCASE); \ - extension->repeated_##LOWERCASE##_value->Set(index, value); \ -} \ - \ -void ExtensionSet::Add##CAMELCASE(int number, FieldType type, \ - bool packed, LOWERCASE value, \ - const FieldDescriptor* descriptor) { \ - Extension* extension; \ - if (MaybeNewExtension(number, descriptor, &extension)) { \ - extension->type = type; \ - GOOGLE_DCHECK_EQ(cpp_type(extension->type), WireFormatLite::CPPTYPE_##UPPERCASE); \ - extension->is_repeated = true; \ - extension->is_packed = packed; \ - extension->repeated_##LOWERCASE##_value = \ - Arena::CreateMessage >(arena_); \ - } else { \ - GOOGLE_DCHECK_TYPE(*extension, REPEATED, UPPERCASE); \ - GOOGLE_DCHECK_EQ(extension->is_packed, packed); \ - } \ - extension->repeated_##LOWERCASE##_value->Add(value); \ -} - -PRIMITIVE_ACCESSORS( INT32, int32, Int32) -PRIMITIVE_ACCESSORS( INT64, int64, Int64) +#define PRIMITIVE_ACCESSORS(UPPERCASE, LOWERCASE, CAMELCASE) \ + \ + LOWERCASE ExtensionSet::Get##CAMELCASE(int number, LOWERCASE default_value) \ + const { \ + const Extension* extension = FindOrNull(number); \ + if (extension == NULL || extension->is_cleared) { \ + return default_value; \ + } else { \ + GOOGLE_DCHECK_TYPE(*extension, OPTIONAL_FIELD, UPPERCASE); \ + return extension->LOWERCASE##_value; \ + } \ + } \ + \ + void ExtensionSet::Set##CAMELCASE(int number, FieldType type, \ + LOWERCASE value, \ + const FieldDescriptor* descriptor) { \ + Extension* extension; \ + if (MaybeNewExtension(number, descriptor, &extension)) { \ + extension->type = type; \ + GOOGLE_DCHECK_EQ(cpp_type(extension->type), \ + WireFormatLite::CPPTYPE_##UPPERCASE); \ + extension->is_repeated = false; \ + } else { \ + GOOGLE_DCHECK_TYPE(*extension, OPTIONAL_FIELD, UPPERCASE); \ + } \ + extension->is_cleared = false; \ + extension->LOWERCASE##_value = value; \ + } \ + \ + LOWERCASE ExtensionSet::GetRepeated##CAMELCASE(int number, int index) \ + const { \ + const Extension* extension = FindOrNull(number); \ + GOOGLE_CHECK(extension != NULL) << "Index out-of-bounds (field is empty)."; \ + GOOGLE_DCHECK_TYPE(*extension, REPEATED_FIELD, UPPERCASE); \ + return extension->repeated_##LOWERCASE##_value->Get(index); \ + } \ + \ + void ExtensionSet::SetRepeated##CAMELCASE(int number, int index, \ + LOWERCASE value) { \ + Extension* extension = FindOrNull(number); \ + GOOGLE_CHECK(extension != NULL) << "Index out-of-bounds (field is empty)."; \ + GOOGLE_DCHECK_TYPE(*extension, REPEATED_FIELD, UPPERCASE); \ + extension->repeated_##LOWERCASE##_value->Set(index, value); \ + } \ + \ + void ExtensionSet::Add##CAMELCASE(int number, FieldType type, bool packed, \ + LOWERCASE value, \ + const FieldDescriptor* descriptor) { \ + Extension* extension; \ + if (MaybeNewExtension(number, descriptor, &extension)) { \ + extension->type = type; \ + GOOGLE_DCHECK_EQ(cpp_type(extension->type), \ + WireFormatLite::CPPTYPE_##UPPERCASE); \ + extension->is_repeated = true; \ + extension->is_packed = packed; \ + extension->repeated_##LOWERCASE##_value = \ + Arena::CreateMessage>(arena_); \ + } else { \ + GOOGLE_DCHECK_TYPE(*extension, REPEATED_FIELD, UPPERCASE); \ + GOOGLE_DCHECK_EQ(extension->is_packed, packed); \ + } \ + extension->repeated_##LOWERCASE##_value->Add(value); \ + } + +PRIMITIVE_ACCESSORS(INT32, int32, Int32) +PRIMITIVE_ACCESSORS(INT64, int64, Int64) PRIMITIVE_ACCESSORS(UINT32, uint32, UInt32) PRIMITIVE_ACCESSORS(UINT64, uint64, UInt64) -PRIMITIVE_ACCESSORS( FLOAT, float, Float) +PRIMITIVE_ACCESSORS(FLOAT, float, Float) PRIMITIVE_ACCESSORS(DOUBLE, double, Double) -PRIMITIVE_ACCESSORS( BOOL, bool, Bool) +PRIMITIVE_ACCESSORS(BOOL, bool, Bool) #undef PRIMITIVE_ACCESSORS @@ -358,43 +382,43 @@ void* ExtensionSet::MutableRawRepeatedField(int number, FieldType field_type, static_cast(field_type))) { case WireFormatLite::CPPTYPE_INT32: extension->repeated_int32_value = - Arena::CreateMessage >(arena_); + Arena::CreateMessage>(arena_); break; case WireFormatLite::CPPTYPE_INT64: extension->repeated_int64_value = - Arena::CreateMessage >(arena_); + Arena::CreateMessage>(arena_); break; case WireFormatLite::CPPTYPE_UINT32: extension->repeated_uint32_value = - Arena::CreateMessage >(arena_); + Arena::CreateMessage>(arena_); break; case WireFormatLite::CPPTYPE_UINT64: extension->repeated_uint64_value = - Arena::CreateMessage >(arena_); + Arena::CreateMessage>(arena_); break; case WireFormatLite::CPPTYPE_DOUBLE: extension->repeated_double_value = - Arena::CreateMessage >(arena_); + Arena::CreateMessage>(arena_); break; case WireFormatLite::CPPTYPE_FLOAT: extension->repeated_float_value = - Arena::CreateMessage >(arena_); + Arena::CreateMessage>(arena_); break; case WireFormatLite::CPPTYPE_BOOL: extension->repeated_bool_value = - Arena::CreateMessage >(arena_); + Arena::CreateMessage>(arena_); break; case WireFormatLite::CPPTYPE_ENUM: extension->repeated_enum_value = - Arena::CreateMessage >(arena_); + Arena::CreateMessage>(arena_); break; case WireFormatLite::CPPTYPE_STRING: extension->repeated_string_value = - Arena::CreateMessage >(arena_); + Arena::CreateMessage>(arena_); break; case WireFormatLite::CPPTYPE_MESSAGE: extension->repeated_message_value = - Arena::CreateMessage >(arena_); + Arena::CreateMessage>(arena_); break; } } @@ -414,7 +438,6 @@ void* ExtensionSet::MutableRawRepeatedField(int number) { return extension->repeated_int32_value; } - // ------------------------------------------------------------------- // Enums @@ -424,7 +447,7 @@ int ExtensionSet::GetEnum(int number, int default_value) const { // Not present. Return the default value. return default_value; } else { - GOOGLE_DCHECK_TYPE(*extension, OPTIONAL, ENUM); + GOOGLE_DCHECK_TYPE(*extension, OPTIONAL_FIELD, ENUM); return extension->enum_value; } } @@ -437,7 +460,7 @@ void ExtensionSet::SetEnum(int number, FieldType type, int value, GOOGLE_DCHECK_EQ(cpp_type(extension->type), WireFormatLite::CPPTYPE_ENUM); extension->is_repeated = false; } else { - GOOGLE_DCHECK_TYPE(*extension, OPTIONAL, ENUM); + GOOGLE_DCHECK_TYPE(*extension, OPTIONAL_FIELD, ENUM); } extension->is_cleared = false; extension->enum_value = value; @@ -446,19 +469,18 @@ void ExtensionSet::SetEnum(int number, FieldType type, int value, int ExtensionSet::GetRepeatedEnum(int number, int index) const { const Extension* extension = FindOrNull(number); GOOGLE_CHECK(extension != NULL) << "Index out-of-bounds (field is empty)."; - GOOGLE_DCHECK_TYPE(*extension, REPEATED, ENUM); + GOOGLE_DCHECK_TYPE(*extension, REPEATED_FIELD, ENUM); return extension->repeated_enum_value->Get(index); } void ExtensionSet::SetRepeatedEnum(int number, int index, int value) { Extension* extension = FindOrNull(number); GOOGLE_CHECK(extension != NULL) << "Index out-of-bounds (field is empty)."; - GOOGLE_DCHECK_TYPE(*extension, REPEATED, ENUM); + GOOGLE_DCHECK_TYPE(*extension, REPEATED_FIELD, ENUM); extension->repeated_enum_value->Set(index, value); } -void ExtensionSet::AddEnum(int number, FieldType type, - bool packed, int value, +void ExtensionSet::AddEnum(int number, FieldType type, bool packed, int value, const FieldDescriptor* descriptor) { Extension* extension; if (MaybeNewExtension(number, descriptor, &extension)) { @@ -467,9 +489,9 @@ void ExtensionSet::AddEnum(int number, FieldType type, extension->is_repeated = true; extension->is_packed = packed; extension->repeated_enum_value = - Arena::CreateMessage >(arena_); + Arena::CreateMessage>(arena_); } else { - GOOGLE_DCHECK_TYPE(*extension, REPEATED, ENUM); + GOOGLE_DCHECK_TYPE(*extension, REPEATED_FIELD, ENUM); GOOGLE_DCHECK_EQ(extension->is_packed, packed); } extension->repeated_enum_value->Add(value); @@ -478,49 +500,50 @@ void ExtensionSet::AddEnum(int number, FieldType type, // ------------------------------------------------------------------- // Strings -const string& ExtensionSet::GetString(int number, - const string& default_value) const { +const std::string& ExtensionSet::GetString( + int number, const std::string& default_value) const { const Extension* extension = FindOrNull(number); if (extension == NULL || extension->is_cleared) { // Not present. Return the default value. return default_value; } else { - GOOGLE_DCHECK_TYPE(*extension, OPTIONAL, STRING); + GOOGLE_DCHECK_TYPE(*extension, OPTIONAL_FIELD, STRING); return *extension->string_value; } } -string* ExtensionSet::MutableString(int number, FieldType type, - const FieldDescriptor* descriptor) { +std::string* ExtensionSet::MutableString(int number, FieldType type, + const FieldDescriptor* descriptor) { Extension* extension; if (MaybeNewExtension(number, descriptor, &extension)) { extension->type = type; GOOGLE_DCHECK_EQ(cpp_type(extension->type), WireFormatLite::CPPTYPE_STRING); extension->is_repeated = false; - extension->string_value = Arena::Create(arena_); + extension->string_value = Arena::Create(arena_); } else { - GOOGLE_DCHECK_TYPE(*extension, OPTIONAL, STRING); + GOOGLE_DCHECK_TYPE(*extension, OPTIONAL_FIELD, STRING); } extension->is_cleared = false; return extension->string_value; } -const string& ExtensionSet::GetRepeatedString(int number, int index) const { +const std::string& ExtensionSet::GetRepeatedString(int number, + int index) const { const Extension* extension = FindOrNull(number); GOOGLE_CHECK(extension != NULL) << "Index out-of-bounds (field is empty)."; - GOOGLE_DCHECK_TYPE(*extension, REPEATED, STRING); + GOOGLE_DCHECK_TYPE(*extension, REPEATED_FIELD, STRING); return extension->repeated_string_value->Get(index); } -string* ExtensionSet::MutableRepeatedString(int number, int index) { +std::string* ExtensionSet::MutableRepeatedString(int number, int index) { Extension* extension = FindOrNull(number); GOOGLE_CHECK(extension != NULL) << "Index out-of-bounds (field is empty)."; - GOOGLE_DCHECK_TYPE(*extension, REPEATED, STRING); + GOOGLE_DCHECK_TYPE(*extension, REPEATED_FIELD, STRING); return extension->repeated_string_value->Mutable(index); } -string* ExtensionSet::AddString(int number, FieldType type, - const FieldDescriptor* descriptor) { +std::string* ExtensionSet::AddString(int number, FieldType type, + const FieldDescriptor* descriptor) { Extension* extension; if (MaybeNewExtension(number, descriptor, &extension)) { extension->type = type; @@ -528,9 +551,9 @@ string* ExtensionSet::AddString(int number, FieldType type, extension->is_repeated = true; extension->is_packed = false; extension->repeated_string_value = - Arena::CreateMessage >(arena_); + Arena::CreateMessage>(arena_); } else { - GOOGLE_DCHECK_TYPE(*extension, REPEATED, STRING); + GOOGLE_DCHECK_TYPE(*extension, REPEATED_FIELD, STRING); } return extension->repeated_string_value->Add(); } @@ -545,7 +568,7 @@ const MessageLite& ExtensionSet::GetMessage( // Not present. Return the default value. return default_value; } else { - GOOGLE_DCHECK_TYPE(*extension, OPTIONAL, MESSAGE); + GOOGLE_DCHECK_TYPE(*extension, OPTIONAL_FIELD, MESSAGE); if (extension->is_lazy) { return extension->lazymessage_value->GetMessage(default_value); } else { @@ -572,7 +595,7 @@ MessageLite* ExtensionSet::MutableMessage(int number, FieldType type, extension->is_cleared = false; return extension->message_value; } else { - GOOGLE_DCHECK_TYPE(*extension, OPTIONAL, MESSAGE); + GOOGLE_DCHECK_TYPE(*extension, OPTIONAL_FIELD, MESSAGE); extension->is_cleared = false; if (extension->is_lazy) { return extension->lazymessage_value->MutableMessage(prototype); @@ -594,7 +617,7 @@ void ExtensionSet::SetAllocatedMessage(int number, FieldType type, ClearExtension(number); return; } - ::google::protobuf::Arena* message_arena = message->GetArena(); + Arena* message_arena = message->GetArena(); Extension* extension; if (MaybeNewExtension(number, descriptor, &extension)) { extension->type = type; @@ -611,7 +634,7 @@ void ExtensionSet::SetAllocatedMessage(int number, FieldType type, extension->message_value->CheckTypeAndMergeFrom(*message); } } else { - GOOGLE_DCHECK_TYPE(*extension, OPTIONAL, MESSAGE); + GOOGLE_DCHECK_TYPE(*extension, OPTIONAL_FIELD, MESSAGE); if (extension->is_lazy) { extension->lazymessage_value->SetAllocatedMessage(message); } else { @@ -647,7 +670,7 @@ void ExtensionSet::UnsafeArenaSetAllocatedMessage( extension->is_lazy = false; extension->message_value = message; } else { - GOOGLE_DCHECK_TYPE(*extension, OPTIONAL, MESSAGE); + GOOGLE_DCHECK_TYPE(*extension, OPTIONAL_FIELD, MESSAGE); if (extension->is_lazy) { extension->lazymessage_value->UnsafeArenaSetAllocatedMessage(message); } else { @@ -667,7 +690,7 @@ MessageLite* ExtensionSet::ReleaseMessage(int number, // Not present. Return NULL. return NULL; } else { - GOOGLE_DCHECK_TYPE(*extension, OPTIONAL, MESSAGE); + GOOGLE_DCHECK_TYPE(*extension, OPTIONAL_FIELD, MESSAGE); MessageLite* ret = NULL; if (extension->is_lazy) { ret = extension->lazymessage_value->ReleaseMessage(prototype); @@ -696,7 +719,7 @@ MessageLite* ExtensionSet::UnsafeArenaReleaseMessage( // Not present. Return NULL. return NULL; } else { - GOOGLE_DCHECK_TYPE(*extension, OPTIONAL, MESSAGE); + GOOGLE_DCHECK_TYPE(*extension, OPTIONAL_FIELD, MESSAGE); MessageLite* ret = NULL; if (extension->is_lazy) { ret = extension->lazymessage_value->UnsafeArenaReleaseMessage(prototype); @@ -715,18 +738,18 @@ MessageLite* ExtensionSet::UnsafeArenaReleaseMessage( // MessageLite* ExtensionSet::ReleaseMessage(const FieldDescriptor* descriptor, // MessageFactory* factory); -const MessageLite& ExtensionSet::GetRepeatedMessage( - int number, int index) const { +const MessageLite& ExtensionSet::GetRepeatedMessage(int number, + int index) const { const Extension* extension = FindOrNull(number); GOOGLE_CHECK(extension != NULL) << "Index out-of-bounds (field is empty)."; - GOOGLE_DCHECK_TYPE(*extension, REPEATED, MESSAGE); + GOOGLE_DCHECK_TYPE(*extension, REPEATED_FIELD, MESSAGE); return extension->repeated_message_value->Get(index); } MessageLite* ExtensionSet::MutableRepeatedMessage(int number, int index) { Extension* extension = FindOrNull(number); GOOGLE_CHECK(extension != NULL) << "Index out-of-bounds (field is empty)."; - GOOGLE_DCHECK_TYPE(*extension, REPEATED, MESSAGE); + GOOGLE_DCHECK_TYPE(*extension, REPEATED_FIELD, MESSAGE); return extension->repeated_message_value->Mutable(index); } @@ -739,17 +762,16 @@ MessageLite* ExtensionSet::AddMessage(int number, FieldType type, GOOGLE_DCHECK_EQ(cpp_type(extension->type), WireFormatLite::CPPTYPE_MESSAGE); extension->is_repeated = true; extension->repeated_message_value = - Arena::CreateMessage >(arena_); + Arena::CreateMessage>(arena_); } else { - GOOGLE_DCHECK_TYPE(*extension, REPEATED, MESSAGE); + GOOGLE_DCHECK_TYPE(*extension, REPEATED_FIELD, MESSAGE); } // RepeatedPtrField does not know how to Add() since it cannot // allocate an abstract object, so we have to be tricky. - MessageLite* result = - reinterpret_cast<::google::protobuf::internal::RepeatedPtrFieldBase*>( - extension->repeated_message_value) - ->AddFromCleared >(); + MessageLite* result = reinterpret_cast( + extension->repeated_message_value) + ->AddFromCleared>(); if (result == NULL) { result = prototype.New(arena_); extension->repeated_message_value->AddAllocated(result); @@ -769,7 +791,7 @@ void ExtensionSet::RemoveLast(int number) { GOOGLE_CHECK(extension != NULL) << "Index out-of-bounds (field is empty)."; GOOGLE_DCHECK(extension->is_repeated); - switch(cpp_type(extension->type)) { + switch (cpp_type(extension->type)) { case WireFormatLite::CPPTYPE_INT32: extension->repeated_int32_value->RemoveLast(); break; @@ -816,7 +838,7 @@ void ExtensionSet::SwapElements(int number, int index1, int index2) { GOOGLE_CHECK(extension != NULL) << "Index out-of-bounds (field is empty)."; GOOGLE_DCHECK(extension->is_repeated); - switch(cpp_type(extension->type)) { + switch (cpp_type(extension->type)) { case WireFormatLite::CPPTYPE_INT32: extension->repeated_int32_value->SwapElements(index1, index2); break; @@ -879,8 +901,8 @@ size_t SizeOfUnion(ItX it_xs, ItX end_xs, ItY it_ys, ItY end_ys) { } // namespace void ExtensionSet::MergeFrom(const ExtensionSet& other) { - if (GOOGLE_PREDICT_TRUE(!is_large())) { - if (GOOGLE_PREDICT_TRUE(!other.is_large())) { + if (PROTOBUF_PREDICT_TRUE(!is_large())) { + if (PROTOBUF_PREDICT_TRUE(!other.is_large())) { GrowCapacity(SizeOfUnion(flat_begin(), flat_end(), other.flat_begin(), other.flat_end())); } else { @@ -895,11 +917,11 @@ void ExtensionSet::MergeFrom(const ExtensionSet& other) { } void ExtensionSet::InternalExtensionMergeFrom( - int number, const Extension& other_extension) { + int number, const Extension& other_extension) { if (other_extension.is_repeated) { Extension* extension; - bool is_new = MaybeNewExtension(number, other_extension.descriptor, - &extension); + bool is_new = + MaybeNewExtension(number, other_extension.descriptor, &extension); if (is_new) { // Extension did not already exist in set. extension->type = other_extension.type; @@ -912,31 +934,31 @@ void ExtensionSet::InternalExtensionMergeFrom( } switch (cpp_type(other_extension.type)) { -#define HANDLE_TYPE(UPPERCASE, LOWERCASE, REPEATED_TYPE) \ - case WireFormatLite::CPPTYPE_##UPPERCASE: \ - if (is_new) { \ - extension->repeated_##LOWERCASE##_value = \ - Arena::CreateMessage(arena_); \ - } \ - extension->repeated_##LOWERCASE##_value->MergeFrom( \ - *other_extension.repeated_##LOWERCASE##_value); \ - break; - - HANDLE_TYPE( INT32, int32, RepeatedField < int32>); - HANDLE_TYPE( INT64, int64, RepeatedField < int64>); - HANDLE_TYPE( UINT32, uint32, RepeatedField < uint32>); - HANDLE_TYPE( UINT64, uint64, RepeatedField < uint64>); - HANDLE_TYPE( FLOAT, float, RepeatedField < float>); - HANDLE_TYPE( DOUBLE, double, RepeatedField < double>); - HANDLE_TYPE( BOOL, bool, RepeatedField < bool>); - HANDLE_TYPE( ENUM, enum, RepeatedField < int>); - HANDLE_TYPE( STRING, string, RepeatedPtrField< string>); +#define HANDLE_TYPE(UPPERCASE, LOWERCASE, REPEATED_TYPE) \ + case WireFormatLite::CPPTYPE_##UPPERCASE: \ + if (is_new) { \ + extension->repeated_##LOWERCASE##_value = \ + Arena::CreateMessage(arena_); \ + } \ + extension->repeated_##LOWERCASE##_value->MergeFrom( \ + *other_extension.repeated_##LOWERCASE##_value); \ + break; + + HANDLE_TYPE(INT32, int32, RepeatedField); + HANDLE_TYPE(INT64, int64, RepeatedField); + HANDLE_TYPE(UINT32, uint32, RepeatedField); + HANDLE_TYPE(UINT64, uint64, RepeatedField); + HANDLE_TYPE(FLOAT, float, RepeatedField); + HANDLE_TYPE(DOUBLE, double, RepeatedField); + HANDLE_TYPE(BOOL, bool, RepeatedField); + HANDLE_TYPE(ENUM, enum, RepeatedField); + HANDLE_TYPE(STRING, string, RepeatedPtrField); #undef HANDLE_TYPE case WireFormatLite::CPPTYPE_MESSAGE: if (is_new) { extension->repeated_message_value = - Arena::CreateMessage >(arena_); + Arena::CreateMessage>(arena_); } // We can't call RepeatedPtrField::MergeFrom() because // it would attempt to allocate new objects. @@ -945,9 +967,9 @@ void ExtensionSet::InternalExtensionMergeFrom( for (int i = 0; i < other_repeated_message->size(); i++) { const MessageLite& other_message = other_repeated_message->Get(i); MessageLite* target = - reinterpret_cast<::google::protobuf::internal::RepeatedPtrFieldBase*>( + reinterpret_cast( extension->repeated_message_value) - ->AddFromCleared >(); + ->AddFromCleared>(); if (target == NULL) { target = other_message.New(arena_); extension->repeated_message_value->AddAllocated(target); @@ -959,32 +981,30 @@ void ExtensionSet::InternalExtensionMergeFrom( } else { if (!other_extension.is_cleared) { switch (cpp_type(other_extension.type)) { -#define HANDLE_TYPE(UPPERCASE, LOWERCASE, CAMELCASE) \ - case WireFormatLite::CPPTYPE_##UPPERCASE: \ - Set##CAMELCASE(number, other_extension.type, \ - other_extension.LOWERCASE##_value, \ - other_extension.descriptor); \ - break; - - HANDLE_TYPE( INT32, int32, Int32); - HANDLE_TYPE( INT64, int64, Int64); +#define HANDLE_TYPE(UPPERCASE, LOWERCASE, CAMELCASE) \ + case WireFormatLite::CPPTYPE_##UPPERCASE: \ + Set##CAMELCASE(number, other_extension.type, \ + other_extension.LOWERCASE##_value, \ + other_extension.descriptor); \ + break; + + HANDLE_TYPE(INT32, int32, Int32); + HANDLE_TYPE(INT64, int64, Int64); HANDLE_TYPE(UINT32, uint32, UInt32); HANDLE_TYPE(UINT64, uint64, UInt64); - HANDLE_TYPE( FLOAT, float, Float); + HANDLE_TYPE(FLOAT, float, Float); HANDLE_TYPE(DOUBLE, double, Double); - HANDLE_TYPE( BOOL, bool, Bool); - HANDLE_TYPE( ENUM, enum, Enum); + HANDLE_TYPE(BOOL, bool, Bool); + HANDLE_TYPE(ENUM, enum, Enum); #undef HANDLE_TYPE case WireFormatLite::CPPTYPE_STRING: - SetString(number, other_extension.type, - *other_extension.string_value, + SetString(number, other_extension.type, *other_extension.string_value, other_extension.descriptor); break; case WireFormatLite::CPPTYPE_MESSAGE: { Extension* extension; - bool is_new = MaybeNewExtension(number, - other_extension.descriptor, - &extension); + bool is_new = + MaybeNewExtension(number, other_extension.descriptor, &extension); if (is_new) { extension->type = other_extension.type; extension->is_packed = other_extension.is_packed; @@ -1004,7 +1024,7 @@ void ExtensionSet::InternalExtensionMergeFrom( } } else { GOOGLE_DCHECK_EQ(extension->type, other_extension.type); - GOOGLE_DCHECK_EQ(extension->is_packed,other_extension.is_packed); + GOOGLE_DCHECK_EQ(extension->is_packed, other_extension.is_packed); GOOGLE_DCHECK(!extension->is_repeated); if (other_extension.is_lazy) { if (extension->is_lazy) { @@ -1017,9 +1037,9 @@ void ExtensionSet::InternalExtensionMergeFrom( } } else { if (extension->is_lazy) { - extension->lazymessage_value->MutableMessage( - *other_extension.message_value)->CheckTypeAndMergeFrom( - *other_extension.message_value); + extension->lazymessage_value + ->MutableMessage(*other_extension.message_value) + ->CheckTypeAndMergeFrom(*other_extension.message_value); } else { extension->message_value->CheckTypeAndMergeFrom( *other_extension.message_value); @@ -1035,7 +1055,7 @@ void ExtensionSet::InternalExtensionMergeFrom( } void ExtensionSet::Swap(ExtensionSet* x) { - if (GetArenaNoVirtual() == x->GetArenaNoVirtual()) { + if (GetArena() == x->GetArena()) { using std::swap; swap(flat_capacity_, x->flat_capacity_); swap(flat_size_, x->flat_size_); @@ -1053,8 +1073,7 @@ void ExtensionSet::Swap(ExtensionSet* x) { } } -void ExtensionSet::SwapExtension(ExtensionSet* other, - int number) { +void ExtensionSet::SwapExtension(ExtensionSet* other, int number) { if (this == other) return; Extension* this_ext = FindOrNull(number); Extension* other_ext = other->FindOrNull(number); @@ -1064,7 +1083,7 @@ void ExtensionSet::SwapExtension(ExtensionSet* other, } if (this_ext != NULL && other_ext != NULL) { - if (GetArenaNoVirtual() == other->GetArenaNoVirtual()) { + if (GetArena() == other->GetArena()) { using std::swap; swap(*this_ext, *other_ext); } else { @@ -1085,7 +1104,7 @@ void ExtensionSet::SwapExtension(ExtensionSet* other, } if (this_ext == NULL) { - if (GetArenaNoVirtual() == other->GetArenaNoVirtual()) { + if (GetArena() == other->GetArena()) { *Insert(number).first = *other_ext; } else { InternalExtensionMergeFrom(number, *other_ext); @@ -1095,7 +1114,7 @@ void ExtensionSet::SwapExtension(ExtensionSet* other, } if (other_ext == NULL) { - if (GetArenaNoVirtual() == other->GetArenaNoVirtual()) { + if (GetArena() == other->GetArena()) { *other->Insert(number).first = *this_ext; } else { other->InternalExtensionMergeFrom(number, *this_ext); @@ -1108,7 +1127,7 @@ void ExtensionSet::SwapExtension(ExtensionSet* other, bool ExtensionSet::IsInitialized() const { // Extensions are never required. However, we need to check that all // embedded messages are initialized. - if (GOOGLE_PREDICT_FALSE(is_large())) { + if (PROTOBUF_PREDICT_FALSE(is_large())) { for (const auto& kv : *map_.large) { if (!kv.second.IsInitialized()) return false; } @@ -1120,9 +1139,11 @@ bool ExtensionSet::IsInitialized() const { return true; } -bool ExtensionSet::FindExtensionInfoFromTag( - uint32 tag, ExtensionFinder* extension_finder, int* field_number, - ExtensionInfo* extension, bool* was_packed_on_wire) { +bool ExtensionSet::FindExtensionInfoFromTag(uint32 tag, + ExtensionFinder* extension_finder, + int* field_number, + ExtensionInfo* extension, + bool* was_packed_on_wire) { *field_number = WireFormatLite::GetTagFieldNumber(tag); WireFormatLite::WireType wire_type = WireFormatLite::GetTagWireType(tag); return FindExtensionInfoFromFieldNumber(wire_type, *field_number, @@ -1158,19 +1179,44 @@ bool ExtensionSet::ParseField(uint32 tag, io::CodedInputStream* input, int number; bool was_packed_on_wire; ExtensionInfo extension; - if (!FindExtensionInfoFromTag( - tag, extension_finder, &number, &extension, &was_packed_on_wire)) { + if (!FindExtensionInfoFromTag(tag, extension_finder, &number, &extension, + &was_packed_on_wire)) { return field_skipper->SkipField(input, tag); } else { - return ParseFieldWithExtensionInfo( - number, was_packed_on_wire, extension, input, field_skipper); + return ParseFieldWithExtensionInfo(number, was_packed_on_wire, extension, + input, field_skipper); + } +} + +const char* ExtensionSet::ParseField(uint64 tag, const char* ptr, + const MessageLite* containing_type, + internal::InternalMetadata* metadata, + internal::ParseContext* ctx) { + GeneratedExtensionFinder finder(containing_type); + int number = tag >> 3; + bool was_packed_on_wire; + ExtensionInfo extension; + if (!FindExtensionInfoFromFieldNumber(tag & 7, number, &finder, &extension, + &was_packed_on_wire)) { + return UnknownFieldParse( + tag, metadata->mutable_unknown_fields(), ptr, ctx); } + return ParseFieldWithExtensionInfo( + number, was_packed_on_wire, extension, metadata, ptr, ctx); +} + +const char* ExtensionSet::ParseMessageSetItem( + const char* ptr, const MessageLite* containing_type, + internal::InternalMetadata* metadata, internal::ParseContext* ctx) { + return ParseMessageSetItemTmpl(ptr, containing_type, + metadata, ctx); } -bool ExtensionSet::ParseFieldWithExtensionInfo( - int number, bool was_packed_on_wire, const ExtensionInfo& extension, - io::CodedInputStream* input, - FieldSkipper* field_skipper) { +bool ExtensionSet::ParseFieldWithExtensionInfo(int number, + bool was_packed_on_wire, + const ExtensionInfo& extension, + io::CodedInputStream* input, + FieldSkipper* field_skipper) { // Explicitly not read extension.is_packed, instead check whether the field // was encoded in packed form on the wire. if (was_packed_on_wire) { @@ -1179,39 +1225,40 @@ bool ExtensionSet::ParseFieldWithExtensionInfo( io::CodedInputStream::Limit limit = input->PushLimit(size); switch (extension.type) { -#define HANDLE_TYPE(UPPERCASE, CPP_CAMELCASE, CPP_LOWERCASE) \ - case WireFormatLite::TYPE_##UPPERCASE: \ - while (input->BytesUntilLimit() > 0) { \ - CPP_LOWERCASE value; \ - if (!WireFormatLite::ReadPrimitive< \ - CPP_LOWERCASE, WireFormatLite::TYPE_##UPPERCASE>( \ - input, &value)) return false; \ - Add##CPP_CAMELCASE(number, WireFormatLite::TYPE_##UPPERCASE, \ - extension.is_packed, value, \ - extension.descriptor); \ - } \ - break - - HANDLE_TYPE( INT32, Int32, int32); - HANDLE_TYPE( INT64, Int64, int64); - HANDLE_TYPE( UINT32, UInt32, uint32); - HANDLE_TYPE( UINT64, UInt64, uint64); - HANDLE_TYPE( SINT32, Int32, int32); - HANDLE_TYPE( SINT64, Int64, int64); - HANDLE_TYPE( FIXED32, UInt32, uint32); - HANDLE_TYPE( FIXED64, UInt64, uint64); - HANDLE_TYPE(SFIXED32, Int32, int32); - HANDLE_TYPE(SFIXED64, Int64, int64); - HANDLE_TYPE( FLOAT, Float, float); - HANDLE_TYPE( DOUBLE, Double, double); - HANDLE_TYPE( BOOL, Bool, bool); +#define HANDLE_TYPE(UPPERCASE, CPP_CAMELCASE, CPP_LOWERCASE) \ + case WireFormatLite::TYPE_##UPPERCASE: \ + while (input->BytesUntilLimit() > 0) { \ + CPP_LOWERCASE value; \ + if (!WireFormatLite::ReadPrimitive( \ + input, &value)) \ + return false; \ + Add##CPP_CAMELCASE(number, WireFormatLite::TYPE_##UPPERCASE, \ + extension.is_packed, value, extension.descriptor); \ + } \ + break + + HANDLE_TYPE(INT32, Int32, int32); + HANDLE_TYPE(INT64, Int64, int64); + HANDLE_TYPE(UINT32, UInt32, uint32); + HANDLE_TYPE(UINT64, UInt64, uint64); + HANDLE_TYPE(SINT32, Int32, int32); + HANDLE_TYPE(SINT64, Int64, int64); + HANDLE_TYPE(FIXED32, UInt32, uint32); + HANDLE_TYPE(FIXED64, UInt64, uint64); + HANDLE_TYPE(SFIXED32, Int32, int32); + HANDLE_TYPE(SFIXED64, Int64, int64); + HANDLE_TYPE(FLOAT, Float, float); + HANDLE_TYPE(DOUBLE, Double, double); + HANDLE_TYPE(BOOL, Bool, bool); #undef HANDLE_TYPE case WireFormatLite::TYPE_ENUM: while (input->BytesUntilLimit() > 0) { int value; if (!WireFormatLite::ReadPrimitive( - input, &value)) return false; + input, &value)) + return false; if (extension.enum_validity_check.func( extension.enum_validity_check.arg, value)) { AddEnum(number, WireFormatLite::TYPE_ENUM, extension.is_packed, @@ -1234,41 +1281,42 @@ bool ExtensionSet::ParseFieldWithExtensionInfo( input->PopLimit(limit); } else { switch (extension.type) { -#define HANDLE_TYPE(UPPERCASE, CPP_CAMELCASE, CPP_LOWERCASE) \ - case WireFormatLite::TYPE_##UPPERCASE: { \ - CPP_LOWERCASE value; \ - if (!WireFormatLite::ReadPrimitive< \ - CPP_LOWERCASE, WireFormatLite::TYPE_##UPPERCASE>( \ - input, &value)) return false; \ - if (extension.is_repeated) { \ - Add##CPP_CAMELCASE(number, WireFormatLite::TYPE_##UPPERCASE, \ - extension.is_packed, value, \ - extension.descriptor); \ - } else { \ - Set##CPP_CAMELCASE(number, WireFormatLite::TYPE_##UPPERCASE, value, \ - extension.descriptor); \ - } \ - } break - - HANDLE_TYPE( INT32, Int32, int32); - HANDLE_TYPE( INT64, Int64, int64); - HANDLE_TYPE( UINT32, UInt32, uint32); - HANDLE_TYPE( UINT64, UInt64, uint64); - HANDLE_TYPE( SINT32, Int32, int32); - HANDLE_TYPE( SINT64, Int64, int64); - HANDLE_TYPE( FIXED32, UInt32, uint32); - HANDLE_TYPE( FIXED64, UInt64, uint64); - HANDLE_TYPE(SFIXED32, Int32, int32); - HANDLE_TYPE(SFIXED64, Int64, int64); - HANDLE_TYPE( FLOAT, Float, float); - HANDLE_TYPE( DOUBLE, Double, double); - HANDLE_TYPE( BOOL, Bool, bool); +#define HANDLE_TYPE(UPPERCASE, CPP_CAMELCASE, CPP_LOWERCASE) \ + case WireFormatLite::TYPE_##UPPERCASE: { \ + CPP_LOWERCASE value; \ + if (!WireFormatLite::ReadPrimitive( \ + input, &value)) \ + return false; \ + if (extension.is_repeated) { \ + Add##CPP_CAMELCASE(number, WireFormatLite::TYPE_##UPPERCASE, \ + extension.is_packed, value, extension.descriptor); \ + } else { \ + Set##CPP_CAMELCASE(number, WireFormatLite::TYPE_##UPPERCASE, value, \ + extension.descriptor); \ + } \ + } break + + HANDLE_TYPE(INT32, Int32, int32); + HANDLE_TYPE(INT64, Int64, int64); + HANDLE_TYPE(UINT32, UInt32, uint32); + HANDLE_TYPE(UINT64, UInt64, uint64); + HANDLE_TYPE(SINT32, Int32, int32); + HANDLE_TYPE(SINT64, Int64, int64); + HANDLE_TYPE(FIXED32, UInt32, uint32); + HANDLE_TYPE(FIXED64, UInt64, uint64); + HANDLE_TYPE(SFIXED32, Int32, int32); + HANDLE_TYPE(SFIXED64, Int64, int64); + HANDLE_TYPE(FLOAT, Float, float); + HANDLE_TYPE(DOUBLE, Double, double); + HANDLE_TYPE(BOOL, Bool, bool); #undef HANDLE_TYPE case WireFormatLite::TYPE_ENUM: { int value; if (!WireFormatLite::ReadPrimitive( - input, &value)) return false; + input, &value)) + return false; if (!extension.enum_validity_check.func( extension.enum_validity_check.arg, value)) { @@ -1284,40 +1332,50 @@ bool ExtensionSet::ParseFieldWithExtensionInfo( break; } - case WireFormatLite::TYPE_STRING: { - string* value = extension.is_repeated ? - AddString(number, WireFormatLite::TYPE_STRING, extension.descriptor) : - MutableString(number, WireFormatLite::TYPE_STRING, - extension.descriptor); + case WireFormatLite::TYPE_STRING: { + std::string* value = + extension.is_repeated + ? AddString(number, WireFormatLite::TYPE_STRING, + extension.descriptor) + : MutableString(number, WireFormatLite::TYPE_STRING, + extension.descriptor); if (!WireFormatLite::ReadString(input, value)) return false; break; } - case WireFormatLite::TYPE_BYTES: { - string* value = extension.is_repeated ? - AddString(number, WireFormatLite::TYPE_BYTES, extension.descriptor) : - MutableString(number, WireFormatLite::TYPE_BYTES, - extension.descriptor); + case WireFormatLite::TYPE_BYTES: { + std::string* value = + extension.is_repeated + ? AddString(number, WireFormatLite::TYPE_BYTES, + extension.descriptor) + : MutableString(number, WireFormatLite::TYPE_BYTES, + extension.descriptor); if (!WireFormatLite::ReadBytes(input, value)) return false; break; } case WireFormatLite::TYPE_GROUP: { - MessageLite* value = extension.is_repeated ? - AddMessage(number, WireFormatLite::TYPE_GROUP, - *extension.message_prototype, extension.descriptor) : - MutableMessage(number, WireFormatLite::TYPE_GROUP, - *extension.message_prototype, extension.descriptor); + MessageLite* value = + extension.is_repeated + ? AddMessage(number, WireFormatLite::TYPE_GROUP, + *extension.message_info.prototype, + extension.descriptor) + : MutableMessage(number, WireFormatLite::TYPE_GROUP, + *extension.message_info.prototype, + extension.descriptor); if (!WireFormatLite::ReadGroup(number, input, value)) return false; break; } case WireFormatLite::TYPE_MESSAGE: { - MessageLite* value = extension.is_repeated ? - AddMessage(number, WireFormatLite::TYPE_MESSAGE, - *extension.message_prototype, extension.descriptor) : - MutableMessage(number, WireFormatLite::TYPE_MESSAGE, - *extension.message_prototype, extension.descriptor); + MessageLite* value = + extension.is_repeated + ? AddMessage(number, WireFormatLite::TYPE_MESSAGE, + *extension.message_info.prototype, + extension.descriptor) + : MutableMessage(number, WireFormatLite::TYPE_MESSAGE, + *extension.message_info.prototype, + extension.descriptor); if (!WireFormatLite::ReadMessage(input, value)) return false; break; } @@ -1342,33 +1400,90 @@ bool ExtensionSet::ParseField(uint32 tag, io::CodedInputStream* input, return ParseField(tag, input, &finder, &skipper); } -// Defined in extension_set_heavy.cc. -// bool ExtensionSet::ParseField(uint32 tag, io::CodedInputStream* input, -// const MessageLite* containing_type, -// UnknownFieldSet* unknown_fields) +bool ExtensionSet::ParseMessageSetLite(io::CodedInputStream* input, + ExtensionFinder* extension_finder, + FieldSkipper* field_skipper) { + while (true) { + const uint32 tag = input->ReadTag(); + switch (tag) { + case 0: + return true; + case WireFormatLite::kMessageSetItemStartTag: + if (!ParseMessageSetItemLite(input, extension_finder, field_skipper)) { + return false; + } + break; + default: + if (!ParseField(tag, input, extension_finder, field_skipper)) { + return false; + } + break; + } + } +} -// Defined in extension_set_heavy.cc. -// bool ExtensionSet::ParseMessageSet(io::CodedInputStream* input, -// const MessageLite* containing_type, -// UnknownFieldSet* unknown_fields); - -void ExtensionSet::SerializeWithCachedSizes( - int start_field_number, int end_field_number, - io::CodedOutputStream* output) const { - if (GOOGLE_PREDICT_FALSE(is_large())) { +bool ExtensionSet::ParseMessageSetItemLite(io::CodedInputStream* input, + ExtensionFinder* extension_finder, + FieldSkipper* field_skipper) { + struct MSLite { + bool ParseField(int type_id, io::CodedInputStream* input) { + return me->ParseField( + WireFormatLite::WIRETYPE_LENGTH_DELIMITED + 8 * type_id, input, + extension_finder, field_skipper); + } + + bool SkipField(uint32 tag, io::CodedInputStream* input) { + return field_skipper->SkipField(input, tag); + } + + ExtensionSet* me; + ExtensionFinder* extension_finder; + FieldSkipper* field_skipper; + }; + + return ParseMessageSetItemImpl(input, + MSLite{this, extension_finder, field_skipper}); +} + +bool ExtensionSet::ParseMessageSet(io::CodedInputStream* input, + const MessageLite* containing_type, + std::string* unknown_fields) { + io::StringOutputStream zcis(unknown_fields); + io::CodedOutputStream output(&zcis); + CodedOutputStreamFieldSkipper skipper(&output); + GeneratedExtensionFinder finder(containing_type); + return ParseMessageSetLite(input, &finder, &skipper); +} + +uint8* ExtensionSet::_InternalSerialize(int start_field_number, + int end_field_number, uint8* target, + io::EpsCopyOutputStream* stream) const { + if (PROTOBUF_PREDICT_FALSE(is_large())) { const auto& end = map_.large->end(); for (auto it = map_.large->lower_bound(start_field_number); it != end && it->first < end_field_number; ++it) { - it->second.SerializeFieldWithCachedSizes(it->first, output); + target = it->second.InternalSerializeFieldWithCachedSizesToArray( + it->first, target, stream); } - return; + return target; } const KeyValue* end = flat_end(); for (const KeyValue* it = std::lower_bound( flat_begin(), end, start_field_number, KeyValue::FirstComparator()); it != end && it->first < end_field_number; ++it) { - it->second.SerializeFieldWithCachedSizes(it->first, output); + target = it->second.InternalSerializeFieldWithCachedSizesToArray( + it->first, target, stream); } + return target; +} + +uint8* ExtensionSet::InternalSerializeMessageSetWithCachedSizesToArray( + uint8* target, io::EpsCopyOutputStream* stream) const { + ForEach([&target, stream](int number, const Extension& ext) { + target = ext.InternalSerializeMessageSetItemWithCachedSizesToArray( + number, target, stream); + }); + return target; } size_t ExtensionSet::ByteSize() const { @@ -1397,20 +1512,20 @@ bool ExtensionSet::MaybeNewExtension(int number, void ExtensionSet::Extension::Clear() { if (is_repeated) { switch (cpp_type(type)) { -#define HANDLE_TYPE(UPPERCASE, LOWERCASE) \ - case WireFormatLite::CPPTYPE_##UPPERCASE: \ - repeated_##LOWERCASE##_value->Clear(); \ - break - - HANDLE_TYPE( INT32, int32); - HANDLE_TYPE( INT64, int64); - HANDLE_TYPE( UINT32, uint32); - HANDLE_TYPE( UINT64, uint64); - HANDLE_TYPE( FLOAT, float); - HANDLE_TYPE( DOUBLE, double); - HANDLE_TYPE( BOOL, bool); - HANDLE_TYPE( ENUM, enum); - HANDLE_TYPE( STRING, string); +#define HANDLE_TYPE(UPPERCASE, LOWERCASE) \ + case WireFormatLite::CPPTYPE_##UPPERCASE: \ + repeated_##LOWERCASE##_value->Clear(); \ + break + + HANDLE_TYPE(INT32, int32); + HANDLE_TYPE(INT64, int64); + HANDLE_TYPE(UINT32, uint32); + HANDLE_TYPE(UINT64, uint64); + HANDLE_TYPE(FLOAT, float); + HANDLE_TYPE(DOUBLE, double); + HANDLE_TYPE(BOOL, bool); + HANDLE_TYPE(ENUM, enum); + HANDLE_TYPE(STRING, string); HANDLE_TYPE(MESSAGE, message); #undef HANDLE_TYPE } @@ -1439,152 +1554,42 @@ void ExtensionSet::Extension::Clear() { } } -void ExtensionSet::Extension::SerializeFieldWithCachedSizes( - int number, - io::CodedOutputStream* output) const { - if (is_repeated) { - if (is_packed) { - if (cached_size == 0) return; - - WireFormatLite::WriteTag(number, - WireFormatLite::WIRETYPE_LENGTH_DELIMITED, output); - output->WriteVarint32(cached_size); - - switch (real_type(type)) { -#define HANDLE_TYPE(UPPERCASE, CAMELCASE, LOWERCASE) \ - case WireFormatLite::TYPE_##UPPERCASE: \ - for (int i = 0; i < repeated_##LOWERCASE##_value->size(); i++) { \ - WireFormatLite::Write##CAMELCASE##NoTag( \ - repeated_##LOWERCASE##_value->Get(i), output); \ - } \ - break - - HANDLE_TYPE( INT32, Int32, int32); - HANDLE_TYPE( INT64, Int64, int64); - HANDLE_TYPE( UINT32, UInt32, uint32); - HANDLE_TYPE( UINT64, UInt64, uint64); - HANDLE_TYPE( SINT32, SInt32, int32); - HANDLE_TYPE( SINT64, SInt64, int64); - HANDLE_TYPE( FIXED32, Fixed32, uint32); - HANDLE_TYPE( FIXED64, Fixed64, uint64); - HANDLE_TYPE(SFIXED32, SFixed32, int32); - HANDLE_TYPE(SFIXED64, SFixed64, int64); - HANDLE_TYPE( FLOAT, Float, float); - HANDLE_TYPE( DOUBLE, Double, double); - HANDLE_TYPE( BOOL, Bool, bool); - HANDLE_TYPE( ENUM, Enum, enum); -#undef HANDLE_TYPE - - case WireFormatLite::TYPE_STRING: - case WireFormatLite::TYPE_BYTES: - case WireFormatLite::TYPE_GROUP: - case WireFormatLite::TYPE_MESSAGE: - GOOGLE_LOG(FATAL) << "Non-primitive types can't be packed."; - break; - } - } else { - switch (real_type(type)) { -#define HANDLE_TYPE(UPPERCASE, CAMELCASE, LOWERCASE) \ - case WireFormatLite::TYPE_##UPPERCASE: \ - for (int i = 0; i < repeated_##LOWERCASE##_value->size(); i++) { \ - WireFormatLite::Write##CAMELCASE(number, \ - repeated_##LOWERCASE##_value->Get(i), output); \ - } \ - break - - HANDLE_TYPE( INT32, Int32, int32); - HANDLE_TYPE( INT64, Int64, int64); - HANDLE_TYPE( UINT32, UInt32, uint32); - HANDLE_TYPE( UINT64, UInt64, uint64); - HANDLE_TYPE( SINT32, SInt32, int32); - HANDLE_TYPE( SINT64, SInt64, int64); - HANDLE_TYPE( FIXED32, Fixed32, uint32); - HANDLE_TYPE( FIXED64, Fixed64, uint64); - HANDLE_TYPE(SFIXED32, SFixed32, int32); - HANDLE_TYPE(SFIXED64, SFixed64, int64); - HANDLE_TYPE( FLOAT, Float, float); - HANDLE_TYPE( DOUBLE, Double, double); - HANDLE_TYPE( BOOL, Bool, bool); - HANDLE_TYPE( STRING, String, string); - HANDLE_TYPE( BYTES, Bytes, string); - HANDLE_TYPE( ENUM, Enum, enum); - HANDLE_TYPE( GROUP, Group, message); - HANDLE_TYPE( MESSAGE, Message, message); -#undef HANDLE_TYPE - } - } - } else if (!is_cleared) { - switch (real_type(type)) { -#define HANDLE_TYPE(UPPERCASE, CAMELCASE, VALUE) \ - case WireFormatLite::TYPE_##UPPERCASE: \ - WireFormatLite::Write##CAMELCASE(number, VALUE, output); \ - break - - HANDLE_TYPE( INT32, Int32, int32_value); - HANDLE_TYPE( INT64, Int64, int64_value); - HANDLE_TYPE( UINT32, UInt32, uint32_value); - HANDLE_TYPE( UINT64, UInt64, uint64_value); - HANDLE_TYPE( SINT32, SInt32, int32_value); - HANDLE_TYPE( SINT64, SInt64, int64_value); - HANDLE_TYPE( FIXED32, Fixed32, uint32_value); - HANDLE_TYPE( FIXED64, Fixed64, uint64_value); - HANDLE_TYPE(SFIXED32, SFixed32, int32_value); - HANDLE_TYPE(SFIXED64, SFixed64, int64_value); - HANDLE_TYPE( FLOAT, Float, float_value); - HANDLE_TYPE( DOUBLE, Double, double_value); - HANDLE_TYPE( BOOL, Bool, bool_value); - HANDLE_TYPE( STRING, String, *string_value); - HANDLE_TYPE( BYTES, Bytes, *string_value); - HANDLE_TYPE( ENUM, Enum, enum_value); - HANDLE_TYPE( GROUP, Group, *message_value); -#undef HANDLE_TYPE - case WireFormatLite::TYPE_MESSAGE: - if (is_lazy) { - lazymessage_value->WriteMessage(number, output); - } else { - WireFormatLite::WriteMessage(number, *message_value, output); - } - break; - } - } -} - size_t ExtensionSet::Extension::ByteSize(int number) const { size_t result = 0; if (is_repeated) { if (is_packed) { switch (real_type(type)) { -#define HANDLE_TYPE(UPPERCASE, CAMELCASE, LOWERCASE) \ - case WireFormatLite::TYPE_##UPPERCASE: \ - for (int i = 0; i < repeated_##LOWERCASE##_value->size(); i++) { \ - result += WireFormatLite::CAMELCASE##Size( \ - repeated_##LOWERCASE##_value->Get(i)); \ - } \ - break - - HANDLE_TYPE( INT32, Int32, int32); - HANDLE_TYPE( INT64, Int64, int64); - HANDLE_TYPE( UINT32, UInt32, uint32); - HANDLE_TYPE( UINT64, UInt64, uint64); - HANDLE_TYPE( SINT32, SInt32, int32); - HANDLE_TYPE( SINT64, SInt64, int64); - HANDLE_TYPE( ENUM, Enum, enum); +#define HANDLE_TYPE(UPPERCASE, CAMELCASE, LOWERCASE) \ + case WireFormatLite::TYPE_##UPPERCASE: \ + for (int i = 0; i < repeated_##LOWERCASE##_value->size(); i++) { \ + result += WireFormatLite::CAMELCASE##Size( \ + repeated_##LOWERCASE##_value->Get(i)); \ + } \ + break + + HANDLE_TYPE(INT32, Int32, int32); + HANDLE_TYPE(INT64, Int64, int64); + HANDLE_TYPE(UINT32, UInt32, uint32); + HANDLE_TYPE(UINT64, UInt64, uint64); + HANDLE_TYPE(SINT32, SInt32, int32); + HANDLE_TYPE(SINT64, SInt64, int64); + HANDLE_TYPE(ENUM, Enum, enum); #undef HANDLE_TYPE // Stuff with fixed size. -#define HANDLE_TYPE(UPPERCASE, CAMELCASE, LOWERCASE) \ - case WireFormatLite::TYPE_##UPPERCASE: \ - result += WireFormatLite::k##CAMELCASE##Size * \ - FromIntSize(repeated_##LOWERCASE##_value->size()); \ - break - HANDLE_TYPE( FIXED32, Fixed32, uint32); - HANDLE_TYPE( FIXED64, Fixed64, uint64); - HANDLE_TYPE(SFIXED32, SFixed32, int32); - HANDLE_TYPE(SFIXED64, SFixed64, int64); - HANDLE_TYPE( FLOAT, Float, float); - HANDLE_TYPE( DOUBLE, Double, double); - HANDLE_TYPE( BOOL, Bool, bool); +#define HANDLE_TYPE(UPPERCASE, CAMELCASE, LOWERCASE) \ + case WireFormatLite::TYPE_##UPPERCASE: \ + result += WireFormatLite::k##CAMELCASE##Size * \ + FromIntSize(repeated_##LOWERCASE##_value->size()); \ + break + HANDLE_TYPE(FIXED32, Fixed32, uint32); + HANDLE_TYPE(FIXED64, Fixed64, uint64); + HANDLE_TYPE(SFIXED32, SFixed32, int32); + HANDLE_TYPE(SFIXED64, SFixed64, int64); + HANDLE_TYPE(FLOAT, Float, float); + HANDLE_TYPE(DOUBLE, Double, double); + HANDLE_TYPE(BOOL, Bool, bool); #undef HANDLE_TYPE case WireFormatLite::TYPE_STRING: @@ -1598,75 +1603,73 @@ size_t ExtensionSet::Extension::ByteSize(int number) const { cached_size = ToCachedSize(result); if (result > 0) { result += io::CodedOutputStream::VarintSize32(result); - result += io::CodedOutputStream::VarintSize32( - WireFormatLite::MakeTag(number, - WireFormatLite::WIRETYPE_LENGTH_DELIMITED)); + result += io::CodedOutputStream::VarintSize32(WireFormatLite::MakeTag( + number, WireFormatLite::WIRETYPE_LENGTH_DELIMITED)); } } else { size_t tag_size = WireFormatLite::TagSize(number, real_type(type)); switch (real_type(type)) { #define HANDLE_TYPE(UPPERCASE, CAMELCASE, LOWERCASE) \ - case WireFormatLite::TYPE_##UPPERCASE: \ - result += tag_size * \ - FromIntSize(repeated_##LOWERCASE##_value->size()); \ - for (int i = 0; i < repeated_##LOWERCASE##_value->size(); i++) { \ - result += WireFormatLite::CAMELCASE##Size( \ - repeated_##LOWERCASE##_value->Get(i)); \ - } \ - break - - HANDLE_TYPE( INT32, Int32, int32); - HANDLE_TYPE( INT64, Int64, int64); - HANDLE_TYPE( UINT32, UInt32, uint32); - HANDLE_TYPE( UINT64, UInt64, uint64); - HANDLE_TYPE( SINT32, SInt32, int32); - HANDLE_TYPE( SINT64, SInt64, int64); - HANDLE_TYPE( STRING, String, string); - HANDLE_TYPE( BYTES, Bytes, string); - HANDLE_TYPE( ENUM, Enum, enum); - HANDLE_TYPE( GROUP, Group, message); - HANDLE_TYPE( MESSAGE, Message, message); + case WireFormatLite::TYPE_##UPPERCASE: \ + result += tag_size * FromIntSize(repeated_##LOWERCASE##_value->size()); \ + for (int i = 0; i < repeated_##LOWERCASE##_value->size(); i++) { \ + result += WireFormatLite::CAMELCASE##Size( \ + repeated_##LOWERCASE##_value->Get(i)); \ + } \ + break + + HANDLE_TYPE(INT32, Int32, int32); + HANDLE_TYPE(INT64, Int64, int64); + HANDLE_TYPE(UINT32, UInt32, uint32); + HANDLE_TYPE(UINT64, UInt64, uint64); + HANDLE_TYPE(SINT32, SInt32, int32); + HANDLE_TYPE(SINT64, SInt64, int64); + HANDLE_TYPE(STRING, String, string); + HANDLE_TYPE(BYTES, Bytes, string); + HANDLE_TYPE(ENUM, Enum, enum); + HANDLE_TYPE(GROUP, Group, message); + HANDLE_TYPE(MESSAGE, Message, message); #undef HANDLE_TYPE // Stuff with fixed size. -#define HANDLE_TYPE(UPPERCASE, CAMELCASE, LOWERCASE) \ - case WireFormatLite::TYPE_##UPPERCASE: \ - result += (tag_size + WireFormatLite::k##CAMELCASE##Size) * \ - FromIntSize(repeated_##LOWERCASE##_value->size()); \ - break - HANDLE_TYPE( FIXED32, Fixed32, uint32); - HANDLE_TYPE( FIXED64, Fixed64, uint64); - HANDLE_TYPE(SFIXED32, SFixed32, int32); - HANDLE_TYPE(SFIXED64, SFixed64, int64); - HANDLE_TYPE( FLOAT, Float, float); - HANDLE_TYPE( DOUBLE, Double, double); - HANDLE_TYPE( BOOL, Bool, bool); +#define HANDLE_TYPE(UPPERCASE, CAMELCASE, LOWERCASE) \ + case WireFormatLite::TYPE_##UPPERCASE: \ + result += (tag_size + WireFormatLite::k##CAMELCASE##Size) * \ + FromIntSize(repeated_##LOWERCASE##_value->size()); \ + break + HANDLE_TYPE(FIXED32, Fixed32, uint32); + HANDLE_TYPE(FIXED64, Fixed64, uint64); + HANDLE_TYPE(SFIXED32, SFixed32, int32); + HANDLE_TYPE(SFIXED64, SFixed64, int64); + HANDLE_TYPE(FLOAT, Float, float); + HANDLE_TYPE(DOUBLE, Double, double); + HANDLE_TYPE(BOOL, Bool, bool); #undef HANDLE_TYPE } } } else if (!is_cleared) { result += WireFormatLite::TagSize(number, real_type(type)); switch (real_type(type)) { -#define HANDLE_TYPE(UPPERCASE, CAMELCASE, LOWERCASE) \ - case WireFormatLite::TYPE_##UPPERCASE: \ - result += WireFormatLite::CAMELCASE##Size(LOWERCASE); \ - break - - HANDLE_TYPE( INT32, Int32, int32_value); - HANDLE_TYPE( INT64, Int64, int64_value); - HANDLE_TYPE( UINT32, UInt32, uint32_value); - HANDLE_TYPE( UINT64, UInt64, uint64_value); - HANDLE_TYPE( SINT32, SInt32, int32_value); - HANDLE_TYPE( SINT64, SInt64, int64_value); - HANDLE_TYPE( STRING, String, *string_value); - HANDLE_TYPE( BYTES, Bytes, *string_value); - HANDLE_TYPE( ENUM, Enum, enum_value); - HANDLE_TYPE( GROUP, Group, *message_value); +#define HANDLE_TYPE(UPPERCASE, CAMELCASE, LOWERCASE) \ + case WireFormatLite::TYPE_##UPPERCASE: \ + result += WireFormatLite::CAMELCASE##Size(LOWERCASE); \ + break + + HANDLE_TYPE(INT32, Int32, int32_value); + HANDLE_TYPE(INT64, Int64, int64_value); + HANDLE_TYPE(UINT32, UInt32, uint32_value); + HANDLE_TYPE(UINT64, UInt64, uint64_value); + HANDLE_TYPE(SINT32, SInt32, int32_value); + HANDLE_TYPE(SINT64, SInt64, int64_value); + HANDLE_TYPE(STRING, String, *string_value); + HANDLE_TYPE(BYTES, Bytes, *string_value); + HANDLE_TYPE(ENUM, Enum, enum_value); + HANDLE_TYPE(GROUP, Group, *message_value); #undef HANDLE_TYPE case WireFormatLite::TYPE_MESSAGE: { if (is_lazy) { - size_t size = lazymessage_value->ByteSize(); + size_t size = lazymessage_value->ByteSizeLong(); result += io::CodedOutputStream::VarintSize32(size) + size; } else { result += WireFormatLite::MessageSize(*message_value); @@ -1675,17 +1678,17 @@ size_t ExtensionSet::Extension::ByteSize(int number) const { } // Stuff with fixed size. -#define HANDLE_TYPE(UPPERCASE, CAMELCASE) \ - case WireFormatLite::TYPE_##UPPERCASE: \ - result += WireFormatLite::k##CAMELCASE##Size; \ - break - HANDLE_TYPE( FIXED32, Fixed32); - HANDLE_TYPE( FIXED64, Fixed64); - HANDLE_TYPE(SFIXED32, SFixed32); - HANDLE_TYPE(SFIXED64, SFixed64); - HANDLE_TYPE( FLOAT, Float); - HANDLE_TYPE( DOUBLE, Double); - HANDLE_TYPE( BOOL, Bool); +#define HANDLE_TYPE(UPPERCASE, CAMELCASE) \ + case WireFormatLite::TYPE_##UPPERCASE: \ + result += WireFormatLite::k##CAMELCASE##Size; \ + break + HANDLE_TYPE(FIXED32, Fixed32); + HANDLE_TYPE(FIXED64, Fixed64); + HANDLE_TYPE(SFIXED32, SFixed32); + HANDLE_TYPE(SFIXED64, SFixed64); + HANDLE_TYPE(FLOAT, Float); + HANDLE_TYPE(DOUBLE, Double); + HANDLE_TYPE(BOOL, Bool); #undef HANDLE_TYPE } } @@ -1696,19 +1699,19 @@ size_t ExtensionSet::Extension::ByteSize(int number) const { int ExtensionSet::Extension::GetSize() const { GOOGLE_DCHECK(is_repeated); switch (cpp_type(type)) { -#define HANDLE_TYPE(UPPERCASE, LOWERCASE) \ - case WireFormatLite::CPPTYPE_##UPPERCASE: \ - return repeated_##LOWERCASE##_value->size() - - HANDLE_TYPE( INT32, int32); - HANDLE_TYPE( INT64, int64); - HANDLE_TYPE( UINT32, uint32); - HANDLE_TYPE( UINT64, uint64); - HANDLE_TYPE( FLOAT, float); - HANDLE_TYPE( DOUBLE, double); - HANDLE_TYPE( BOOL, bool); - HANDLE_TYPE( ENUM, enum); - HANDLE_TYPE( STRING, string); +#define HANDLE_TYPE(UPPERCASE, LOWERCASE) \ + case WireFormatLite::CPPTYPE_##UPPERCASE: \ + return repeated_##LOWERCASE##_value->size() + + HANDLE_TYPE(INT32, int32); + HANDLE_TYPE(INT64, int64); + HANDLE_TYPE(UINT32, uint32); + HANDLE_TYPE(UINT64, uint64); + HANDLE_TYPE(FLOAT, float); + HANDLE_TYPE(DOUBLE, double); + HANDLE_TYPE(BOOL, bool); + HANDLE_TYPE(ENUM, enum); + HANDLE_TYPE(STRING, string); HANDLE_TYPE(MESSAGE, message); #undef HANDLE_TYPE } @@ -1722,20 +1725,20 @@ int ExtensionSet::Extension::GetSize() const { void ExtensionSet::Extension::Free() { if (is_repeated) { switch (cpp_type(type)) { -#define HANDLE_TYPE(UPPERCASE, LOWERCASE) \ - case WireFormatLite::CPPTYPE_##UPPERCASE: \ - delete repeated_##LOWERCASE##_value; \ - break - - HANDLE_TYPE( INT32, int32); - HANDLE_TYPE( INT64, int64); - HANDLE_TYPE( UINT32, uint32); - HANDLE_TYPE( UINT64, uint64); - HANDLE_TYPE( FLOAT, float); - HANDLE_TYPE( DOUBLE, double); - HANDLE_TYPE( BOOL, bool); - HANDLE_TYPE( ENUM, enum); - HANDLE_TYPE( STRING, string); +#define HANDLE_TYPE(UPPERCASE, LOWERCASE) \ + case WireFormatLite::CPPTYPE_##UPPERCASE: \ + delete repeated_##LOWERCASE##_value; \ + break + + HANDLE_TYPE(INT32, int32); + HANDLE_TYPE(INT64, int64); + HANDLE_TYPE(UINT32, uint32); + HANDLE_TYPE(UINT64, uint64); + HANDLE_TYPE(FLOAT, float); + HANDLE_TYPE(DOUBLE, double); + HANDLE_TYPE(BOOL, bool); + HANDLE_TYPE(ENUM, enum); + HANDLE_TYPE(STRING, string); HANDLE_TYPE(MESSAGE, message); #undef HANDLE_TYPE } @@ -1785,7 +1788,7 @@ bool ExtensionSet::Extension::IsInitialized() const { void ExtensionSet::LazyMessageExtension::UnusedKeyMethod() {} const ExtensionSet::Extension* ExtensionSet::FindOrNull(int key) const { - if (GOOGLE_PREDICT_FALSE(is_large())) { + if (PROTOBUF_PREDICT_FALSE(is_large())) { return FindOrNullInLargeMap(key); } const KeyValue* end = flat_end(); @@ -1808,7 +1811,7 @@ const ExtensionSet::Extension* ExtensionSet::FindOrNullInLargeMap( } ExtensionSet::Extension* ExtensionSet::FindOrNull(int key) { - if (GOOGLE_PREDICT_FALSE(is_large())) { + if (PROTOBUF_PREDICT_FALSE(is_large())) { return FindOrNullInLargeMap(key); } KeyValue* end = flat_end(); @@ -1830,7 +1833,7 @@ ExtensionSet::Extension* ExtensionSet::FindOrNullInLargeMap(int key) { } std::pair ExtensionSet::Insert(int key) { - if (GOOGLE_PREDICT_FALSE(is_large())) { + if (PROTOBUF_PREDICT_FALSE(is_large())) { auto maybe = map_.large->insert({key, Extension()}); return {&maybe.first->second, maybe.second}; } @@ -1852,39 +1855,47 @@ std::pair ExtensionSet::Insert(int key) { } void ExtensionSet::GrowCapacity(size_t minimum_new_capacity) { - if (GOOGLE_PREDICT_FALSE(is_large())) { + if (PROTOBUF_PREDICT_FALSE(is_large())) { return; // LargeMap does not have a "reserve" method. } if (flat_capacity_ >= minimum_new_capacity) { return; } + auto new_flat_capacity = flat_capacity_; do { - flat_capacity_ = flat_capacity_ == 0 ? 1 : flat_capacity_ * 4; - } while (flat_capacity_ < minimum_new_capacity); + new_flat_capacity = new_flat_capacity == 0 ? 1 : new_flat_capacity * 4; + } while (new_flat_capacity < minimum_new_capacity); const KeyValue* begin = flat_begin(); const KeyValue* end = flat_end(); - if (flat_capacity_ > kMaximumFlatCapacity) { - // Switch to LargeMap - map_.large = ::google::protobuf::Arena::Create(arena_); - LargeMap::iterator hint = map_.large->begin(); + AllocatedData new_map; + if (new_flat_capacity > kMaximumFlatCapacity) { + new_map.large = Arena::Create(arena_); + LargeMap::iterator hint = new_map.large->begin(); for (const KeyValue* it = begin; it != end; ++it) { - hint = map_.large->insert(hint, {it->first, it->second}); + hint = new_map.large->insert(hint, {it->first, it->second}); } - flat_size_ = 0; } else { - map_.flat = ::google::protobuf::Arena::CreateArray(arena_, flat_capacity_); - std::copy(begin, end, map_.flat); + new_map.flat = Arena::CreateArray(arena_, new_flat_capacity); + std::copy(begin, end, new_map.flat); + } + + if (arena_ == nullptr) { + DeleteFlatMap(begin, flat_capacity_); + } + flat_capacity_ = new_flat_capacity; + map_ = new_map; + if (is_large()) { + flat_size_ = 0; } - if (arena_ == NULL) delete[] begin; } // static constexpr uint16 ExtensionSet::kMaximumFlatCapacity; void ExtensionSet::Erase(int key) { - if (GOOGLE_PREDICT_FALSE(is_large())) { + if (PROTOBUF_PREDICT_FALSE(is_large())) { map_.large->erase(key); return; } @@ -1911,6 +1922,222 @@ RepeatedStringTypeTraits::GetDefaultRepeatedField() { return instance; } +uint8* ExtensionSet::Extension::InternalSerializeFieldWithCachedSizesToArray( + int number, uint8* target, io::EpsCopyOutputStream* stream) const { + if (is_repeated) { + if (is_packed) { + if (cached_size == 0) return target; + + target = stream->EnsureSpace(target); + target = WireFormatLite::WriteTagToArray( + number, WireFormatLite::WIRETYPE_LENGTH_DELIMITED, target); + target = WireFormatLite::WriteInt32NoTagToArray(cached_size, target); + + switch (real_type(type)) { +#define HANDLE_TYPE(UPPERCASE, CAMELCASE, LOWERCASE) \ + case WireFormatLite::TYPE_##UPPERCASE: \ + for (int i = 0; i < repeated_##LOWERCASE##_value->size(); i++) { \ + target = stream->EnsureSpace(target); \ + target = WireFormatLite::Write##CAMELCASE##NoTagToArray( \ + repeated_##LOWERCASE##_value->Get(i), target); \ + } \ + break + + HANDLE_TYPE(INT32, Int32, int32); + HANDLE_TYPE(INT64, Int64, int64); + HANDLE_TYPE(UINT32, UInt32, uint32); + HANDLE_TYPE(UINT64, UInt64, uint64); + HANDLE_TYPE(SINT32, SInt32, int32); + HANDLE_TYPE(SINT64, SInt64, int64); + HANDLE_TYPE(FIXED32, Fixed32, uint32); + HANDLE_TYPE(FIXED64, Fixed64, uint64); + HANDLE_TYPE(SFIXED32, SFixed32, int32); + HANDLE_TYPE(SFIXED64, SFixed64, int64); + HANDLE_TYPE(FLOAT, Float, float); + HANDLE_TYPE(DOUBLE, Double, double); + HANDLE_TYPE(BOOL, Bool, bool); + HANDLE_TYPE(ENUM, Enum, enum); +#undef HANDLE_TYPE + + case WireFormatLite::TYPE_STRING: + case WireFormatLite::TYPE_BYTES: + case WireFormatLite::TYPE_GROUP: + case WireFormatLite::TYPE_MESSAGE: + GOOGLE_LOG(FATAL) << "Non-primitive types can't be packed."; + break; + } + } else { + switch (real_type(type)) { +#define HANDLE_TYPE(UPPERCASE, CAMELCASE, LOWERCASE) \ + case WireFormatLite::TYPE_##UPPERCASE: \ + for (int i = 0; i < repeated_##LOWERCASE##_value->size(); i++) { \ + target = stream->EnsureSpace(target); \ + target = WireFormatLite::Write##CAMELCASE##ToArray( \ + number, repeated_##LOWERCASE##_value->Get(i), target); \ + } \ + break + + HANDLE_TYPE(INT32, Int32, int32); + HANDLE_TYPE(INT64, Int64, int64); + HANDLE_TYPE(UINT32, UInt32, uint32); + HANDLE_TYPE(UINT64, UInt64, uint64); + HANDLE_TYPE(SINT32, SInt32, int32); + HANDLE_TYPE(SINT64, SInt64, int64); + HANDLE_TYPE(FIXED32, Fixed32, uint32); + HANDLE_TYPE(FIXED64, Fixed64, uint64); + HANDLE_TYPE(SFIXED32, SFixed32, int32); + HANDLE_TYPE(SFIXED64, SFixed64, int64); + HANDLE_TYPE(FLOAT, Float, float); + HANDLE_TYPE(DOUBLE, Double, double); + HANDLE_TYPE(BOOL, Bool, bool); + HANDLE_TYPE(ENUM, Enum, enum); +#undef HANDLE_TYPE +#define HANDLE_TYPE(UPPERCASE, CAMELCASE, LOWERCASE) \ + case WireFormatLite::TYPE_##UPPERCASE: \ + for (int i = 0; i < repeated_##LOWERCASE##_value->size(); i++) { \ + target = stream->EnsureSpace(target); \ + target = stream->WriteString( \ + number, repeated_##LOWERCASE##_value->Get(i), target); \ + } \ + break + HANDLE_TYPE(STRING, String, string); + HANDLE_TYPE(BYTES, Bytes, string); +#undef HANDLE_TYPE +#define HANDLE_TYPE(UPPERCASE, CAMELCASE, LOWERCASE) \ + case WireFormatLite::TYPE_##UPPERCASE: \ + for (int i = 0; i < repeated_##LOWERCASE##_value->size(); i++) { \ + target = stream->EnsureSpace(target); \ + target = WireFormatLite::InternalWrite##CAMELCASE( \ + number, repeated_##LOWERCASE##_value->Get(i), target, stream); \ + } \ + break + + HANDLE_TYPE(GROUP, Group, message); + HANDLE_TYPE(MESSAGE, Message, message); +#undef HANDLE_TYPE + } + } + } else if (!is_cleared) { + switch (real_type(type)) { +#define HANDLE_TYPE(UPPERCASE, CAMELCASE, VALUE) \ + case WireFormatLite::TYPE_##UPPERCASE: \ + target = stream->EnsureSpace(target); \ + target = WireFormatLite::Write##CAMELCASE##ToArray(number, VALUE, target); \ + break + + HANDLE_TYPE(INT32, Int32, int32_value); + HANDLE_TYPE(INT64, Int64, int64_value); + HANDLE_TYPE(UINT32, UInt32, uint32_value); + HANDLE_TYPE(UINT64, UInt64, uint64_value); + HANDLE_TYPE(SINT32, SInt32, int32_value); + HANDLE_TYPE(SINT64, SInt64, int64_value); + HANDLE_TYPE(FIXED32, Fixed32, uint32_value); + HANDLE_TYPE(FIXED64, Fixed64, uint64_value); + HANDLE_TYPE(SFIXED32, SFixed32, int32_value); + HANDLE_TYPE(SFIXED64, SFixed64, int64_value); + HANDLE_TYPE(FLOAT, Float, float_value); + HANDLE_TYPE(DOUBLE, Double, double_value); + HANDLE_TYPE(BOOL, Bool, bool_value); + HANDLE_TYPE(ENUM, Enum, enum_value); +#undef HANDLE_TYPE +#define HANDLE_TYPE(UPPERCASE, CAMELCASE, VALUE) \ + case WireFormatLite::TYPE_##UPPERCASE: \ + target = stream->EnsureSpace(target); \ + target = stream->WriteString(number, VALUE, target); \ + break + HANDLE_TYPE(STRING, String, *string_value); + HANDLE_TYPE(BYTES, Bytes, *string_value); +#undef HANDLE_TYPE + case WireFormatLite::TYPE_GROUP: + target = stream->EnsureSpace(target); + target = WireFormatLite::InternalWriteGroup(number, *message_value, + target, stream); + break; + case WireFormatLite::TYPE_MESSAGE: + if (is_lazy) { + target = + lazymessage_value->WriteMessageToArray(number, target, stream); + } else { + target = stream->EnsureSpace(target); + target = WireFormatLite::InternalWriteMessage(number, *message_value, + target, stream); + } + break; + } + } + return target; +} + +uint8* +ExtensionSet::Extension::InternalSerializeMessageSetItemWithCachedSizesToArray( + int number, uint8* target, io::EpsCopyOutputStream* stream) const { + if (type != WireFormatLite::TYPE_MESSAGE || is_repeated) { + // Not a valid MessageSet extension, but serialize it the normal way. + GOOGLE_LOG(WARNING) << "Invalid message set extension."; + return InternalSerializeFieldWithCachedSizesToArray(number, target, stream); + } + + if (is_cleared) return target; + + target = stream->EnsureSpace(target); + // Start group. + target = io::CodedOutputStream::WriteTagToArray( + WireFormatLite::kMessageSetItemStartTag, target); + // Write type ID. + target = WireFormatLite::WriteUInt32ToArray( + WireFormatLite::kMessageSetTypeIdNumber, number, target); + // Write message. + if (is_lazy) { + target = lazymessage_value->WriteMessageToArray( + WireFormatLite::kMessageSetMessageNumber, target, stream); + } else { + target = WireFormatLite::InternalWriteMessage( + WireFormatLite::kMessageSetMessageNumber, *message_value, target, + stream); + } + // End group. + target = stream->EnsureSpace(target); + target = io::CodedOutputStream::WriteTagToArray( + WireFormatLite::kMessageSetItemEndTag, target); + return target; +} + +size_t ExtensionSet::Extension::MessageSetItemByteSize(int number) const { + if (type != WireFormatLite::TYPE_MESSAGE || is_repeated) { + // Not a valid MessageSet extension, but compute the byte size for it the + // normal way. + return ByteSize(number); + } + + if (is_cleared) return 0; + + size_t our_size = WireFormatLite::kMessageSetItemTagsSize; + + // type_id + our_size += io::CodedOutputStream::VarintSize32(number); + + // message + size_t message_size = 0; + if (is_lazy) { + message_size = lazymessage_value->ByteSizeLong(); + } else { + message_size = message_value->ByteSizeLong(); + } + + our_size += io::CodedOutputStream::VarintSize32(message_size); + our_size += message_size; + + return our_size; +} + +size_t ExtensionSet::MessageSetByteSize() const { + size_t total_size = 0; + ForEach([&total_size](int number, const Extension& ext) { + total_size += ext.MessageSetItemByteSize(number); + }); + return total_size; +} + } // namespace internal } // namespace protobuf } // namespace google diff --git a/third_party/protobuf-lite/google/protobuf/io/coded_stream_inl.h b/third_party/protobuf-lite/generated_enum_util.cc similarity index 50% rename from third_party/protobuf-lite/google/protobuf/io/coded_stream_inl.h rename to third_party/protobuf-lite/generated_enum_util.cc index d95b06e0..d0c25a96 100644 --- a/third_party/protobuf-lite/google/protobuf/io/coded_stream_inl.h +++ b/third_party/protobuf-lite/generated_enum_util.cc @@ -28,63 +28,68 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// Author: jasonh@google.com (Jason Hsueh) -// -// Implements methods of coded_stream.h that need to be inlined for performance -// reasons, but should not be defined in a public header. +#include -#ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_INL_H__ -#define GOOGLE_PROTOBUF_IO_CODED_STREAM_INL_H__ +#include -#include -#include -#include -#include -#include -#include +#include namespace google { namespace protobuf { -namespace io { +namespace internal { +namespace { -inline bool CodedInputStream::InternalReadStringInline(string* buffer, - int size) { - if (size < 0) return false; // security: size is often user-supplied +bool EnumCompareByName(const EnumEntry& a, const EnumEntry& b) { + return StringPiece(a.name) < StringPiece(b.name); +} - if (BufferSize() >= size) { - STLStringResizeUninitialized(buffer, size); - std::pair z = as_string_data(buffer); - if (z.second) { - // Oddly enough, memcpy() requires its first two args to be non-NULL even - // if we copy 0 bytes. So, we have ensured that z.first is non-NULL here. - GOOGLE_DCHECK(z.first != NULL); - memcpy(z.first, buffer_, size); - Advance(size); - } - return true; +// Gets the numeric value of the EnumEntry at the given index, but returns a +// special value for the index -1. This gives a way to use std::lower_bound on a +// sorted array of indices while searching for value that we associate with -1. +int GetValue(const EnumEntry* enums, int i, int target) { + if (i == -1) { + return target; + } else { + return enums[i].value; } - - return ReadStringFallback(buffer, size); } -inline bool CodedInputStream::InternalReadRawInline(void* buffer, int size) { - int current_buffer_size; - while ((current_buffer_size = BufferSize()) < size) { - // Reading past end of buffer. Copy what we have, then refresh. - memcpy(buffer, buffer_, current_buffer_size); - buffer = reinterpret_cast(buffer) + current_buffer_size; - size -= current_buffer_size; - Advance(current_buffer_size); - if (!Refresh()) return false; +} // namespace + +bool LookUpEnumValue(const EnumEntry* enums, size_t size, + StringPiece name, int* value) { + EnumEntry target{name, 0}; + auto it = std::lower_bound(enums, enums + size, target, EnumCompareByName); + if (it != enums + size && it->name == name) { + *value = it->value; + return true; } + return false; +} - memcpy(buffer, buffer_, size); - Advance(size); +int LookUpEnumName(const EnumEntry* enums, const int* sorted_indices, + size_t size, int value) { + auto comparator = [enums, value](int a, int b) { + return GetValue(enums, a, value) < GetValue(enums, b, value); + }; + auto it = + std::lower_bound(sorted_indices, sorted_indices + size, -1, comparator); + if (it != sorted_indices + size && enums[*it].value == value) { + return it - sorted_indices; + } + return -1; +} +bool InitializeEnumStrings( + const EnumEntry* enums, const int* sorted_indices, size_t size, + internal::ExplicitlyConstructed* enum_strings) { + for (int i = 0; i < size; ++i) { + enum_strings[i].Construct(enums[sorted_indices[i]].name); + internal::OnShutdownDestroyString(enum_strings[i].get_mutable()); + } return true; } -} // namespace io +} // namespace internal } // namespace protobuf } // namespace google -#endif // GOOGLE_PROTOBUF_IO_CODED_STREAM_INL_H__ diff --git a/third_party/protobuf-lite/generated_message_table_driven_lite.cc b/third_party/protobuf-lite/generated_message_table_driven_lite.cc index 961329f3..02e6dace 100644 --- a/third_party/protobuf-lite/generated_message_table_driven_lite.cc +++ b/third_party/protobuf-lite/generated_message_table_driven_lite.cc @@ -36,7 +36,6 @@ #include #include #include -#include namespace google { namespace protobuf { @@ -44,40 +43,40 @@ namespace internal { namespace { -string* MutableUnknownFields(MessageLite* msg, int64 arena_offset) { - return Raw(msg, arena_offset) - ->mutable_unknown_fields(); +std::string* MutableUnknownFields(MessageLite* msg, int64 arena_offset) { + return Raw(msg, arena_offset) + ->mutable_unknown_fields(); } struct UnknownFieldHandlerLite { + // TODO(mvels): consider renaming UnknownFieldHandler to (TableDrivenTraits?), + // and conflating InternalMetaData into it, simplifying the template. + static constexpr bool IsLite() { return true; } + static bool Skip(MessageLite* msg, const ParseTable& table, - io::CodedInputStream* input, - int tag) { + io::CodedInputStream* input, int tag) { GOOGLE_DCHECK(!table.unknown_field_set); - ::google::protobuf::io::StringOutputStream unknown_fields_string( + io::StringOutputStream unknown_fields_string( MutableUnknownFields(msg, table.arena_offset)); - ::google::protobuf::io::CodedOutputStream unknown_fields_stream( - &unknown_fields_string, false); + io::CodedOutputStream unknown_fields_stream(&unknown_fields_string, false); - return ::google::protobuf::internal::WireFormatLite::SkipField( - input, tag, &unknown_fields_stream); + return internal::WireFormatLite::SkipField(input, tag, + &unknown_fields_stream); } - static void Varint(MessageLite* msg, const ParseTable& table, - int tag, int value) { + static void Varint(MessageLite* msg, const ParseTable& table, int tag, + int value) { GOOGLE_DCHECK(!table.unknown_field_set); - ::google::protobuf::io::StringOutputStream unknown_fields_string( + io::StringOutputStream unknown_fields_string( MutableUnknownFields(msg, table.arena_offset)); - ::google::protobuf::io::CodedOutputStream unknown_fields_stream( - &unknown_fields_string, false); + io::CodedOutputStream unknown_fields_stream(&unknown_fields_string, false); unknown_fields_stream.WriteVarint32(tag); unknown_fields_stream.WriteVarint32(value); } - static bool ParseExtension( - MessageLite* msg, const ParseTable& table, - io::CodedInputStream* input, int tag) { + static bool ParseExtension(MessageLite* msg, const ParseTable& table, + io::CodedInputStream* input, int tag) { ExtensionSet* extensions = GetExtensionSet(msg, table.extension_offset); if (extensions == NULL) { return false; @@ -86,22 +85,20 @@ struct UnknownFieldHandlerLite { const MessageLite* prototype = table.default_instance(); GOOGLE_DCHECK(!table.unknown_field_set); - ::google::protobuf::io::StringOutputStream unknown_fields_string( + io::StringOutputStream unknown_fields_string( MutableUnknownFields(msg, table.arena_offset)); - ::google::protobuf::io::CodedOutputStream unknown_fields_stream( - &unknown_fields_string, false); - return extensions->ParseField( - tag, input, prototype, &unknown_fields_stream); + io::CodedOutputStream unknown_fields_stream(&unknown_fields_string, false); + return extensions->ParseField(tag, input, prototype, + &unknown_fields_stream); } }; } // namespace -bool MergePartialFromCodedStreamLite( - MessageLite* msg, const ParseTable& table, io::CodedInputStream* input) { - return MergePartialFromCodedStreamImpl( - msg, table, input); +bool MergePartialFromCodedStreamLite(MessageLite* msg, const ParseTable& table, + io::CodedInputStream* input) { + return MergePartialFromCodedStreamImpl(msg, table, + input); } } // namespace internal diff --git a/third_party/protobuf-lite/generated_message_util.cc b/third_party/protobuf-lite/generated_message_util.cc index e0241361..f1f6f883 100644 --- a/third_party/protobuf-lite/generated_message_util.cc +++ b/third_party/protobuf-lite/generated_message_util.cc @@ -35,54 +35,63 @@ #include #include + +#ifndef GOOGLE_PROTOBUF_SUPPORT_WINDOWS_XP // We're only using this as a standard way for getting the thread id. // We're not using any thread functionality. #include // NOLINT +#endif // #ifndef GOOGLE_PROTOBUF_SUPPORT_WINDOWS_XP + #include -#include #include +#include #include #include +#include #include #include #include -#include +#include #include #include -#include -namespace google { +namespace google { namespace protobuf { namespace internal { void DestroyMessage(const void* message) { static_cast(message)->~MessageLite(); } -void DestroyString(const void* s) { static_cast(s)->~string(); } +void DestroyString(const void* s) { + static_cast(s)->~basic_string(); +} -ExplicitlyConstructed fixed_address_empty_string; +PROTOBUF_ATTRIBUTE_NO_DESTROY PROTOBUF_CONSTINIT EmptyString + fixed_address_empty_string; // NOLINT -double Infinity() { - return std::numeric_limits::infinity(); -} -double NaN() { - return std::numeric_limits::quiet_NaN(); -} +PROTOBUF_CONSTINIT std::atomic init_protobuf_defaults_state{false}; static bool InitProtobufDefaultsImpl() { - fixed_address_empty_string.DefaultConstruct(); - OnShutdownDestroyString(fixed_address_empty_string.get_mutable()); + ::new (static_cast(&fixed_address_empty_string.value)) std::string(); + OnShutdownDestroyString(&fixed_address_empty_string.value); + + // Verify that we can indeed get the address during constant evaluation. + PROTOBUF_CONSTINIT static const std::string& fixed_address_empty_string_test = + GetEmptyStringAlreadyInited(); + (void)fixed_address_empty_string_test; + + init_protobuf_defaults_state.store(true, std::memory_order_release); return true; } -void InitProtobufDefaults() { +void InitProtobufDefaultsSlow() { static bool is_inited = InitProtobufDefaultsImpl(); (void)is_inited; } -size_t StringSpaceUsedExcludingSelfLong(const string& str) { +size_t StringSpaceUsedExcludingSelfLong(const std::string& str) { const void* start = &str; const void* end = &str + 1; if (start <= str.data() && str.data() < end) { @@ -108,8 +117,7 @@ struct PrimitiveTypeHelper; template <> struct PrimitiveTypeHelper { typedef bool Type; - static void Serialize(const void* ptr, - ::google::protobuf::io::CodedOutputStream* output) { + static void Serialize(const void* ptr, io::CodedOutputStream* output) { WireFormatLite::WriteBoolNoTag(Get(ptr), output); } static uint8* SerializeToArray(const void* ptr, uint8* buffer) { @@ -120,8 +128,7 @@ struct PrimitiveTypeHelper { template <> struct PrimitiveTypeHelper { typedef int32 Type; - static void Serialize(const void* ptr, - ::google::protobuf::io::CodedOutputStream* output) { + static void Serialize(const void* ptr, io::CodedOutputStream* output) { WireFormatLite::WriteInt32NoTag(Get(ptr), output); } static uint8* SerializeToArray(const void* ptr, uint8* buffer) { @@ -132,8 +139,7 @@ struct PrimitiveTypeHelper { template <> struct PrimitiveTypeHelper { typedef int32 Type; - static void Serialize(const void* ptr, - ::google::protobuf::io::CodedOutputStream* output) { + static void Serialize(const void* ptr, io::CodedOutputStream* output) { WireFormatLite::WriteSInt32NoTag(Get(ptr), output); } static uint8* SerializeToArray(const void* ptr, uint8* buffer) { @@ -144,8 +150,7 @@ struct PrimitiveTypeHelper { template <> struct PrimitiveTypeHelper { typedef uint32 Type; - static void Serialize(const void* ptr, - ::google::protobuf::io::CodedOutputStream* output) { + static void Serialize(const void* ptr, io::CodedOutputStream* output) { WireFormatLite::WriteUInt32NoTag(Get(ptr), output); } static uint8* SerializeToArray(const void* ptr, uint8* buffer) { @@ -155,8 +160,7 @@ struct PrimitiveTypeHelper { template <> struct PrimitiveTypeHelper { typedef int64 Type; - static void Serialize(const void* ptr, - ::google::protobuf::io::CodedOutputStream* output) { + static void Serialize(const void* ptr, io::CodedOutputStream* output) { WireFormatLite::WriteInt64NoTag(Get(ptr), output); } static uint8* SerializeToArray(const void* ptr, uint8* buffer) { @@ -167,8 +171,7 @@ struct PrimitiveTypeHelper { template <> struct PrimitiveTypeHelper { typedef int64 Type; - static void Serialize(const void* ptr, - ::google::protobuf::io::CodedOutputStream* output) { + static void Serialize(const void* ptr, io::CodedOutputStream* output) { WireFormatLite::WriteSInt64NoTag(Get(ptr), output); } static uint8* SerializeToArray(const void* ptr, uint8* buffer) { @@ -178,8 +181,7 @@ struct PrimitiveTypeHelper { template <> struct PrimitiveTypeHelper { typedef uint64 Type; - static void Serialize(const void* ptr, - ::google::protobuf::io::CodedOutputStream* output) { + static void Serialize(const void* ptr, io::CodedOutputStream* output) { WireFormatLite::WriteUInt64NoTag(Get(ptr), output); } static uint8* SerializeToArray(const void* ptr, uint8* buffer) { @@ -190,8 +192,7 @@ struct PrimitiveTypeHelper { template <> struct PrimitiveTypeHelper { typedef uint32 Type; - static void Serialize(const void* ptr, - ::google::protobuf::io::CodedOutputStream* output) { + static void Serialize(const void* ptr, io::CodedOutputStream* output) { WireFormatLite::WriteFixed32NoTag(Get(ptr), output); } static uint8* SerializeToArray(const void* ptr, uint8* buffer) { @@ -202,8 +203,7 @@ struct PrimitiveTypeHelper { template <> struct PrimitiveTypeHelper { typedef uint64 Type; - static void Serialize(const void* ptr, - ::google::protobuf::io::CodedOutputStream* output) { + static void Serialize(const void* ptr, io::CodedOutputStream* output) { WireFormatLite::WriteFixed64NoTag(Get(ptr), output); } static uint8* SerializeToArray(const void* ptr, uint8* buffer) { @@ -238,9 +238,8 @@ struct PrimitiveTypeHelper template <> struct PrimitiveTypeHelper { - typedef string Type; - static void Serialize(const void* ptr, - ::google::protobuf::io::CodedOutputStream* output) { + typedef std::string Type; + static void Serialize(const void* ptr, io::CodedOutputStream* output) { const Type& value = *static_cast(ptr); output->WriteVarint32(value.size()); output->WriteRawMaybeAliased(value.data(), value.size()); @@ -256,10 +255,6 @@ struct PrimitiveTypeHelper : PrimitiveTypeHelper {}; -template <> -struct PrimitiveTypeHelper - : PrimitiveTypeHelper {}; - // We want to serialize to both CodedOutputStream and directly into byte arrays // without duplicating the code. In fact we might want extra output channels in // the future. @@ -283,9 +278,8 @@ void WriteLengthTo(uint32 length, O* output) { // Specialization for coded output stream template -struct OutputHelper<::google::protobuf::io::CodedOutputStream, type> { - static void Serialize(const void* ptr, - ::google::protobuf::io::CodedOutputStream* output) { +struct OutputHelper { + static void Serialize(const void* ptr, io::CodedOutputStream* output) { PrimitiveTypeHelper::Serialize(ptr, output); } }; @@ -304,35 +298,29 @@ struct OutputHelper { }; void SerializeMessageNoTable(const MessageLite* msg, - ::google::protobuf::io::CodedOutputStream* output) { + io::CodedOutputStream* output) { msg->SerializeWithCachedSizes(output); } void SerializeMessageNoTable(const MessageLite* msg, ArrayOutput* output) { - output->ptr = msg->InternalSerializeWithCachedSizesToArray( - output->is_deterministic, output->ptr); + io::ArrayOutputStream array_stream(output->ptr, INT_MAX); + io::CodedOutputStream o(&array_stream); + o.SetSerializationDeterministic(output->is_deterministic); + msg->SerializeWithCachedSizes(&o); + output->ptr += o.ByteCount(); } // Helper to branch to fast path if possible -void SerializeMessageDispatch(const ::google::protobuf::MessageLite& msg, +void SerializeMessageDispatch(const MessageLite& msg, const FieldMetadata* field_table, int num_fields, int32 cached_size, - ::google::protobuf::io::CodedOutputStream* output) { + io::CodedOutputStream* output) { const uint8* base = reinterpret_cast(&msg); - // Try the fast path - uint8* ptr = output->GetDirectBufferForNBytesAndAdvance(cached_size); - if (ptr) { - // We use virtual dispatch to enable dedicated generated code for the - // fast path. - msg.InternalSerializeWithCachedSizesToArray( - output->IsSerializationDeterministic(), ptr); - return; - } SerializeInternal(base, field_table, num_fields, output); } // Helper to branch to fast path if possible -void SerializeMessageDispatch(const ::google::protobuf::MessageLite& msg, +void SerializeMessageDispatch(const MessageLite& msg, const FieldMetadata* field_table, int num_fields, int32 cached_size, ArrayOutput* output) { const uint8* base = reinterpret_cast(&msg); @@ -425,15 +413,6 @@ struct SingularFieldHelper { } }; -template <> -struct SingularFieldHelper { - template - static void Serialize(const void* field, const FieldMetadata& md, O* output) { - WriteTagTo(md.tag, output); - SerializeTo(&Get<::std::string>(field), output); - } -}; - template struct RepeatedFieldHelper { template @@ -499,17 +478,13 @@ struct RepeatedFieldHelper { for (int i = 0; i < AccessorHelper::Size(array); i++) { WriteTagTo(md.tag, output); SerializeMessageTo( - static_cast(AccessorHelper::Get(array, i)), md.ptr, - output); + static_cast(AccessorHelper::Get(array, i)), + md.ptr, output); } } }; -template <> -struct RepeatedFieldHelper - : RepeatedFieldHelper {}; - template struct PackedFieldHelper { template @@ -545,9 +520,6 @@ struct PackedFieldHelper template <> struct PackedFieldHelper : PackedFieldHelper {}; -template <> -struct PackedFieldHelper - : PackedFieldHelper {}; template struct OneOfFieldHelper { @@ -558,15 +530,6 @@ struct OneOfFieldHelper { }; -template <> -struct OneOfFieldHelper { - template - static void Serialize(const void* field, const FieldMetadata& md, O* output) { - SingularFieldHelper::Serialize( - Get(field), md, output); - } -}; - void SerializeNotImplemented(int field) { GOOGLE_LOG(FATAL) << "Not implemented field number " << field; } @@ -607,11 +570,6 @@ bool IsNull(const void* ptr) { } -template <> -bool IsNull(const void* ptr) { - return static_cast(ptr)->empty(); -} - #define SERIALIZERS_FOR_TYPE(type) \ case SERIALIZE_TABLE_OP(type, FieldMetadata::kPresence): \ if (!IsPresent(base, field_metadata.has_offset)) continue; \ @@ -635,8 +593,8 @@ bool IsNull(const void* ptr) { void SerializeInternal(const uint8* base, const FieldMetadata* field_metadata_table, - int32 num_fields, - ::google::protobuf::io::CodedOutputStream* output) { + int32 num_fields, io::CodedOutputStream* output) { + SpecialSerializer func = nullptr; for (int i = 0; i < num_fields; i++) { const FieldMetadata& field_metadata = field_metadata_table[i]; const uint8* ptr = base + field_metadata.offset; @@ -659,14 +617,13 @@ void SerializeInternal(const uint8* base, SERIALIZERS_FOR_TYPE(WireFormatLite::TYPE_SFIXED64); SERIALIZERS_FOR_TYPE(WireFormatLite::TYPE_SINT32); SERIALIZERS_FOR_TYPE(WireFormatLite::TYPE_SINT64); - SERIALIZERS_FOR_TYPE(FieldMetadata::kInlinedType); // Special cases case FieldMetadata::kSpecial: - reinterpret_cast( - const_cast(field_metadata.ptr))( - base, field_metadata.offset, field_metadata.tag, - field_metadata.has_offset, output); + func = reinterpret_cast( + const_cast(field_metadata.ptr)); + func(base, field_metadata.offset, field_metadata.tag, + field_metadata.has_offset, output); break; default: // __builtin_unreachable() @@ -681,6 +638,7 @@ uint8* SerializeInternalToArray(const uint8* base, uint8* buffer) { ArrayOutput array_output = {buffer, is_deterministic}; ArrayOutput* output = &array_output; + SpecialSerializer func = nullptr; for (int i = 0; i < num_fields; i++) { const FieldMetadata& field_metadata = field_metadata_table[i]; const uint8* ptr = base + field_metadata.offset; @@ -703,16 +661,15 @@ uint8* SerializeInternalToArray(const uint8* base, SERIALIZERS_FOR_TYPE(WireFormatLite::TYPE_SFIXED64); SERIALIZERS_FOR_TYPE(WireFormatLite::TYPE_SINT32); SERIALIZERS_FOR_TYPE(WireFormatLite::TYPE_SINT64); - SERIALIZERS_FOR_TYPE(FieldMetadata::kInlinedType); // Special cases case FieldMetadata::kSpecial: { io::ArrayOutputStream array_stream(array_output.ptr, INT_MAX); io::CodedOutputStream output(&array_stream); output.SetSerializationDeterministic(is_deterministic); - reinterpret_cast( - const_cast(field_metadata.ptr))( - base, field_metadata.offset, field_metadata.tag, - field_metadata.has_offset, &output); + func = reinterpret_cast( + const_cast(field_metadata.ptr)); + func(base, field_metadata.offset, field_metadata.tag, + field_metadata.has_offset, &output); array_output.ptr += output.ByteCount(); } break; default: @@ -725,18 +682,17 @@ uint8* SerializeInternalToArray(const uint8* base, #undef SERIALIZERS_FOR_TYPE void ExtensionSerializer(const uint8* ptr, uint32 offset, uint32 tag, - uint32 has_offset, - ::google::protobuf::io::CodedOutputStream* output) { + uint32 has_offset, io::CodedOutputStream* output) { reinterpret_cast(ptr + offset) ->SerializeWithCachedSizes(tag, has_offset, output); } void UnknownFieldSerializerLite(const uint8* ptr, uint32 offset, uint32 tag, uint32 has_offset, - ::google::protobuf::io::CodedOutputStream* output) { + io::CodedOutputStream* output) { output->WriteString( - reinterpret_cast(ptr + offset) - ->unknown_fields()); + reinterpret_cast(ptr + offset) + ->unknown_fields(&internal::GetEmptyString)); } MessageLite* DuplicateIfNonNullInternal(MessageLite* message) { @@ -749,6 +705,15 @@ MessageLite* DuplicateIfNonNullInternal(MessageLite* message) { } } +void GenericSwap(MessageLite* m1, MessageLite* m2) { + std::unique_ptr tmp(m1->New()); + tmp->CheckTypeAndMergeFrom(*m1); + m1->Clear(); + m1->CheckTypeAndMergeFrom(*m2); + m2->Clear(); + m2->CheckTypeAndMergeFrom(*tmp); +} + // Returns a message owned by this Arena. This may require Own()ing or // duplicating the message. MessageLite* GetOwnedMessageInternal(Arena* message_arena, @@ -770,12 +735,22 @@ namespace { void InitSCC_DFS(SCCInfoBase* scc) { if (scc->visit_status.load(std::memory_order_relaxed) != - SCCInfoBase::kUninitialized) return; + SCCInfoBase::kUninitialized) + return; scc->visit_status.store(SCCInfoBase::kRunning, std::memory_order_relaxed); - // Each base is followed by an array of pointers to deps - auto deps = reinterpret_cast(scc + 1); - for (int i = 0; i < scc->num_deps; i++) { - if (deps[i]) InitSCC_DFS(deps[i]); + // Each base is followed by an array of void*, containing first pointers to + // SCCInfoBase and then pointers-to-pointers to SCCInfoBase. + auto deps = reinterpret_cast(scc + 1); + auto strong_deps = reinterpret_cast(deps); + for (int i = 0; i < scc->num_deps; ++i) { + if (strong_deps[i]) InitSCC_DFS(strong_deps[i]); + } + auto implicit_weak_deps = + reinterpret_cast(deps + scc->num_deps); + for (int i = 0; i < scc->num_implicit_weak_deps; ++i) { + if (*implicit_weak_deps[i]) { + InitSCC_DFS(*implicit_weak_deps[i]); + } } scc->init_func(); // Mark done (note we use memory order release here), other threads could @@ -790,8 +765,17 @@ void InitSCCImpl(SCCInfoBase* scc) { static WrappedMutex mu{GOOGLE_PROTOBUF_LINKER_INITIALIZED}; // Either the default in case no initialization is running or the id of the // thread that is currently initializing. +#ifndef GOOGLE_PROTOBUF_SUPPORT_WINDOWS_XP static std::atomic runner; auto me = std::this_thread::get_id(); +#else + // This is a lightweight replacement for std::thread::id. std::thread does not + // work on Windows XP SP2 with the latest VC++ libraries, because it utilizes + // the Concurrency Runtime that is only supported on Windows XP SP3 and above. + static std::atomic_llong runner(-1); + auto me = ::GetCurrentThreadId(); +#endif // #ifndef GOOGLE_PROTOBUF_SUPPORT_WINDOWS_XP + // This will only happen because the constructor will call InitSCC while // constructing the default instance. if (runner.load(std::memory_order_relaxed) == me) { @@ -805,7 +789,13 @@ void InitSCCImpl(SCCInfoBase* scc) { mu.Lock(); runner.store(me, std::memory_order_relaxed); InitSCC_DFS(scc); + +#ifndef GOOGLE_PROTOBUF_SUPPORT_WINDOWS_XP runner.store(std::thread::id{}, std::memory_order_relaxed); +#else + runner.store(-1, std::memory_order_relaxed); +#endif // #ifndef GOOGLE_PROTOBUF_SUPPORT_WINDOWS_XP + mu.Unlock(); } diff --git a/third_party/protobuf-lite/google/protobuf/any.h b/third_party/protobuf-lite/google/protobuf/any.h new file mode 100644 index 00000000..e8f2cacf --- /dev/null +++ b/third_party/protobuf-lite/google/protobuf/any.h @@ -0,0 +1,150 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef GOOGLE_PROTOBUF_ANY_H__ +#define GOOGLE_PROTOBUF_ANY_H__ + +#include + +#include +#include +#include + +#include + +namespace google { +namespace protobuf { + +class FieldDescriptor; +class Message; + +namespace internal { + +extern const char kAnyFullTypeName[]; // "google.protobuf.Any". +extern const char kTypeGoogleApisComPrefix[]; // "type.googleapis.com/". +extern const char kTypeGoogleProdComPrefix[]; // "type.googleprod.com/". + +std::string GetTypeUrl(StringPiece message_name, + StringPiece type_url_prefix); + +// Helper class used to implement google::protobuf::Any. +class PROTOBUF_EXPORT AnyMetadata { + typedef ArenaStringPtr UrlType; + typedef ArenaStringPtr ValueType; + public: + // AnyMetadata does not take ownership of "type_url" and "value". + constexpr AnyMetadata(UrlType* type_url, ValueType* value) + : type_url_(type_url), value_(value) {} + + // Packs a message using the default type URL prefix: "type.googleapis.com". + // The resulted type URL will be "type.googleapis.com/". + template + void PackFrom(const T& message) { + InternalPackFrom(message, kTypeGoogleApisComPrefix, T::FullMessageName()); + } + + void PackFrom(const Message& message); + + // Packs a message using the given type URL prefix. The type URL will be + // constructed by concatenating the message type's full name to the prefix + // with an optional "/" separator if the prefix doesn't already end with "/". + // For example, both PackFrom(message, "type.googleapis.com") and + // PackFrom(message, "type.googleapis.com/") yield the same result type + // URL: "type.googleapis.com/". + template + void PackFrom(const T& message, StringPiece type_url_prefix) { + InternalPackFrom(message, type_url_prefix, T::FullMessageName()); + } + + void PackFrom(const Message& message, StringPiece type_url_prefix); + + // Unpacks the payload into the given message. Returns false if the message's + // type doesn't match the type specified in the type URL (i.e., the full + // name after the last "/" of the type URL doesn't match the message's actual + // full name) or parsing the payload has failed. + template + bool UnpackTo(T* message) const { + return InternalUnpackTo(T::FullMessageName(), message); + } + + bool UnpackTo(Message* message) const; + + // Checks whether the type specified in the type URL matches the given type. + // A type is considered matching if its full name matches the full name after + // the last "/" in the type URL. + template + bool Is() const { + return InternalIs(T::FullMessageName()); + } + + private: + void InternalPackFrom(const MessageLite& message, + StringPiece type_url_prefix, + StringPiece type_name); + bool InternalUnpackTo(StringPiece type_name, + MessageLite* message) const; + bool InternalIs(StringPiece type_name) const; + + UrlType* type_url_; + ValueType* value_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(AnyMetadata); +}; + +// Get the proto type name from Any::type_url value. For example, passing +// "type.googleapis.com/rpc.QueryOrigin" will return "rpc.QueryOrigin" in +// *full_type_name. Returns false if the type_url does not have a "/" +// in the type url separating the full type name. +// +// NOTE: this function is available publicly as: +// google::protobuf::Any() // static method on the generated message type. +bool ParseAnyTypeUrl(StringPiece type_url, std::string* full_type_name); + +// Get the proto type name and prefix from Any::type_url value. For example, +// passing "type.googleapis.com/rpc.QueryOrigin" will return +// "type.googleapis.com/" in *url_prefix and "rpc.QueryOrigin" in +// *full_type_name. Returns false if the type_url does not have a "/" in the +// type url separating the full type name. +bool ParseAnyTypeUrl(StringPiece type_url, std::string* url_prefix, + std::string* full_type_name); + +// See if message is of type google.protobuf.Any, if so, return the descriptors +// for "type_url" and "value" fields. +bool GetAnyFieldDescriptors(const Message& message, + const FieldDescriptor** type_url_field, + const FieldDescriptor** value_field); + +} // namespace internal +} // namespace protobuf +} // namespace google + +#include + +#endif // GOOGLE_PROTOBUF_ANY_H__ diff --git a/third_party/protobuf-lite/google/protobuf/arena.h b/third_party/protobuf-lite/google/protobuf/arena.h index 9928c8e6..f28bebfd 100644 --- a/third_party/protobuf-lite/google/protobuf/arena.h +++ b/third_party/protobuf-lite/google/protobuf/arena.h @@ -33,7 +33,10 @@ #ifndef GOOGLE_PROTOBUF_ARENA_H__ #define GOOGLE_PROTOBUF_ARENA_H__ + #include +#include +#include #ifdef max #undef max // Visual Studio defines this macro #endif @@ -48,9 +51,15 @@ using type_info = ::type_info; #include #endif -#include -#include #include +#include +#include + +#include + +#ifdef SWIG +#error "You cannot SWIG proto headers" +#endif namespace google { namespace protobuf { @@ -58,29 +67,28 @@ namespace protobuf { struct ArenaOptions; // defined below } // namespace protobuf +} // namespace google -namespace quality_webanswers { - -void TempPrivateWorkAround(::google::protobuf::ArenaOptions* arena_options); - -} // namespace quality_webanswers - +namespace google { namespace protobuf { -class Arena; // defined below -class Message; // defined in message.h +class Arena; // defined below +class Message; // defined in message.h class MessageLite; +template +class Map; namespace arena_metrics { -void EnableArenaMetrics(::google::protobuf::ArenaOptions* options); +void EnableArenaMetrics(ArenaOptions* options); } // namespace arena_metrics namespace internal { -struct ArenaStringPtr; // defined in arenastring.h -class LazyField; // defined in lazy_field.h +struct ArenaStringPtr; // defined in arenastring.h +class LazyField; // defined in lazy_field.h +class EpsCopyInputStream; // defined in parse_context.h template class GenericTypeHandler; // defined in repeated_field.h @@ -94,15 +102,6 @@ template void arena_delete_object(void* object) { delete reinterpret_cast(object); } -inline void arena_free(void* object, size_t size) { -#if defined(__GXX_DELETE_WITH_SIZE__) || defined(__cpp_sized_deallocation) - ::operator delete(object, size); -#else - (void)size; - ::operator delete(object); -#endif -} - } // namespace internal // ArenaOptions provides optional additional parameters to arena construction @@ -143,49 +142,35 @@ struct ArenaOptions { max_block_size(kDefaultMaxBlockSize), initial_block(NULL), initial_block_size(0), - block_alloc(&::operator new), - block_dealloc(&internal::arena_free), - on_arena_init(NULL), - on_arena_reset(NULL), - on_arena_destruction(NULL), - on_arena_allocation(NULL) {} + block_alloc(kDefaultBlockAlloc), + block_dealloc(&internal::ArenaFree), + make_metrics_collector(nullptr) {} + + PROTOBUF_EXPORT static void* (*const kDefaultBlockAlloc)(size_t); private: - // Hooks for adding external functionality such as user-specific metrics - // collection, specific debugging abilities, etc. - // Init hook may return a pointer to a cookie to be stored in the arena. - // reset and destruction hooks will then be called with the same cookie - // pointer. This allows us to save an external object per arena instance and - // use it on the other hooks (Note: It is just as legal for init to return - // NULL and not use the cookie feature). - // on_arena_reset and on_arena_destruction also receive the space used in - // the arena just before the reset. - void* (*on_arena_init)(Arena* arena); - void (*on_arena_reset)(Arena* arena, void* cookie, uint64 space_used); - void (*on_arena_destruction)(Arena* arena, void* cookie, uint64 space_used); - - // type_info is promised to be static - its lifetime extends to - // match program's lifetime (It is given by typeid operator). - // Note: typeid(void) will be passed as allocated_type every time we - // intentionally want to avoid monitoring an allocation. (i.e. internal - // allocations for managing the arena) - void (*on_arena_allocation)(const std::type_info* allocated_type, - uint64 alloc_size, void* cookie); + // If make_metrics_collector is not nullptr, it will be called at Arena init + // time. It may return a pointer to a collector instance that will be notified + // of interesting events related to the arena. + internal::ArenaMetricsCollector* (*make_metrics_collector)(); // Constants define default starting block size and max block size for // arena allocator behavior -- see descriptions above. - static const size_t kDefaultStartBlockSize = 256; - static const size_t kDefaultMaxBlockSize = 8192; + static const size_t kDefaultStartBlockSize = + internal::ArenaImpl::kDefaultStartBlockSize; + static const size_t kDefaultMaxBlockSize = + internal::ArenaImpl::kDefaultMaxBlockSize; + + friend void arena_metrics::EnableArenaMetrics(ArenaOptions*); - friend void ::google::protobuf::arena_metrics::EnableArenaMetrics(ArenaOptions*); - friend void quality_webanswers::TempPrivateWorkAround(ArenaOptions*); friend class Arena; friend class ArenaOptionsTestFriend; + friend class internal::ArenaImpl; }; // Support for non-RTTI environments. (The metrics hooks API uses type // information.) -#ifndef GOOGLE_PROTOBUF_NO_RTTI +#if PROTOBUF_RTTI #define RTTI_TYPE_ID(type) (&typeid(type)) #else #define RTTI_TYPE_ID(type) (NULL) @@ -211,14 +196,15 @@ struct ArenaOptions { // any special requirements on the type T, and will invoke the object's // destructor when the arena is destroyed. // -// The arena message allocation protocol, required by CreateMessage, is as -// follows: +// The arena message allocation protocol, required by +// CreateMessage(Arena* arena, Args&&... args), is as follows: // -// - The type T must have (at least) two constructors: a constructor with no -// arguments, called when a T is allocated on the heap; and a constructor with -// a google::protobuf::Arena* argument, called when a T is allocated on an arena. If the -// second constructor is called with a NULL arena pointer, it must be -// equivalent to invoking the first (no-argument) constructor. +// - The type T must have (at least) two constructors: a constructor callable +// with `args` (without `arena`), called when a T is allocated on the heap; +// and a constructor callable with `Arena* arena, Args&&... args`, called when +// a T is allocated on an arena. If the second constructor is called with a +// NULL arena pointer, it must be equivalent to invoking the first +// (`args`-only) constructor. // // - The type T must have a particular type trait: a nested type // |InternalArenaConstructable_|. This is usually a typedef to |void|. If no @@ -231,23 +217,26 @@ struct ArenaOptions { // present on the type, then its destructor is always called when the // containing arena is destroyed. // -// - One- and two-user-argument forms of CreateMessage() also exist that -// forward these constructor arguments to T's constructor: for example, -// CreateMessage(Arena*, arg1, arg2) forwards to a constructor T(Arena*, -// arg1, arg2). -// // This protocol is implemented by all arena-enabled proto2 message classes as -// well as RepeatedPtrField. -// -// Do NOT subclass Arena. This class will be marked as final when C++11 is -// enabled. -class LIBPROTOBUF_EXPORT Arena { +// well as protobuf container types like RepeatedPtrField and Map. The protocol +// is internal to protobuf and is not guaranteed to be stable. Non-proto types +// should not rely on this protocol. +class PROTOBUF_EXPORT PROTOBUF_ALIGNAS(8) Arena final { public: - // Arena constructor taking custom options. See ArenaOptions below for + // Default constructor with sensible default options, tuned for average + // use-cases. + inline Arena() : impl_() {} + + // Construct an arena with default options, except for the supplied + // initial block. It is more efficient to use this constructor + // instead of passing ArenaOptions if the only configuration needed + // by the caller is supplying an initial block. + inline Arena(char* initial_block, size_t initial_block_size) + : impl_(initial_block, initial_block_size) {} + + // Arena constructor taking custom options. See ArenaOptions above for // descriptions of the options available. - explicit Arena(const ArenaOptions& options) : impl_(options) { - Init(options); - } + explicit Arena(const ArenaOptions& options) : impl_(options) {} // Block overhead. Use this as a guide for how much to over-allocate the // initial block if you want an allocation of size N to fit inside it. @@ -258,27 +247,10 @@ class LIBPROTOBUF_EXPORT Arena { static const size_t kBlockOverhead = internal::ArenaImpl::kBlockHeaderSize + internal::ArenaImpl::kSerialArenaSize; - // Default constructor with sensible default options, tuned for average - // use-cases. - Arena() : impl_(ArenaOptions()) { Init(ArenaOptions()); } + inline ~Arena() {} - ~Arena() { - if (hooks_cookie_) { - CallDestructorHooks(); - } - } - - void Init(const ArenaOptions& options) { - on_arena_allocation_ = options.on_arena_allocation; - on_arena_reset_ = options.on_arena_reset; - on_arena_destruction_ = options.on_arena_destruction; - // Call the initialization hook - if (options.on_arena_init != NULL) { - hooks_cookie_ = options.on_arena_init(this); - } else { - hooks_cookie_ = NULL; - } - } + // TODO(protobuf-team): Fix callers to use constructor and delete this method. + void Init(const ArenaOptions&) {} // API to create proto2 message objects on the arena. If the arena passed in // is NULL, then a heap allocated object is returned. Type T must be a message @@ -291,8 +263,7 @@ class LIBPROTOBUF_EXPORT Arena { // This function also accepts any type T that satisfies the arena message // allocation protocol, documented above. template - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static T* CreateMessage( - Arena* arena, Args&&... args) { + PROTOBUF_ALWAYS_INLINE static T* CreateMessage(Arena* arena, Args&&... args) { static_assert( InternalHelper::is_arena_constructable::value, "CreateMessage can only construct types that are ArenaConstructable"); @@ -318,8 +289,7 @@ class LIBPROTOBUF_EXPORT Arena { // if the object were allocated on the heap (except that the underlying memory // is obtained from the arena). template - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static T* Create(Arena* arena, - Args&&... args) { + PROTOBUF_ALWAYS_INLINE static T* Create(Arena* arena, Args&&... args) { return CreateNoMessage(arena, is_arena_constructable(), std::forward(args)...); } @@ -331,8 +301,8 @@ class LIBPROTOBUF_EXPORT Arena { // (when compiled as C++11) that T is trivially default-constructible and // trivially destructible. template - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static T* CreateArray( - Arena* arena, size_t num_elements) { + PROTOBUF_ALWAYS_INLINE static T* CreateArray(Arena* arena, + size_t num_elements) { static_assert(std::is_pod::value, "CreateArray requires a trivially constructible type"); static_assert(std::is_trivially_destructible::value, @@ -355,32 +325,18 @@ class LIBPROTOBUF_EXPORT Arena { // may not include space used by other threads executing concurrently with // the call to this method. uint64 SpaceUsed() const { return impl_.SpaceUsed(); } - // DEPRECATED. Please use SpaceAllocated() and SpaceUsed(). - // - // Combines SpaceAllocated and SpaceUsed. Returns a pair of - // . - PROTOBUF_RUNTIME_DEPRECATED("Please use SpaceAllocated() and SpaceUsed()") - std::pair SpaceAllocatedAndUsed() const { - return std::make_pair(SpaceAllocated(), SpaceUsed()); - } // Frees all storage allocated by this arena after calling destructors // registered with OwnDestructor() and freeing objects registered with Own(). // Any objects allocated on this arena are unusable after this call. It also // returns the total space used by the arena which is the sums of the sizes // of the allocated blocks. This method is not thread-safe. - GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE uint64 Reset() { - // Call the reset hook - if (on_arena_reset_ != NULL) { - on_arena_reset_(this, hooks_cookie_, impl_.SpaceAllocated()); - } - return impl_.Reset(); - } + uint64 Reset() { return impl_.Reset(); } // Adds |object| to a list of heap-allocated objects to be freed with |delete| // when the arena is destroyed or reset. template - GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE void Own(T* object) { + PROTOBUF_NOINLINE void Own(T* object) { OwnInternal(object, std::is_convertible()); } @@ -390,7 +346,7 @@ class LIBPROTOBUF_EXPORT Arena { // normally only used for objects that are placement-newed into // arena-allocated memory. template - GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE void OwnDestructor(T* object) { + PROTOBUF_NOINLINE void OwnDestructor(T* object) { if (object != NULL) { impl_.AddCleanup(object, &internal::arena_destruct_object); } @@ -400,19 +356,18 @@ class LIBPROTOBUF_EXPORT Arena { // will be manually called when the arena is destroyed or reset. This differs // from OwnDestructor() in that any member function may be specified, not only // the class destructor. - GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE void OwnCustomDestructor( - void* object, void (*destruct)(void*)) { + PROTOBUF_NOINLINE void OwnCustomDestructor(void* object, + void (*destruct)(void*)) { impl_.AddCleanup(object, destruct); } // Retrieves the arena associated with |value| if |value| is an arena-capable - // message, or NULL otherwise. This differs from value->GetArena() in that the - // latter is a virtual call, while this method is a templated call that - // resolves at compile-time. + // message, or NULL otherwise. If possible, the call resolves at compile time. + // Note that we can often devirtualize calls to `value->GetArena()` so usually + // calling this method is unnecessary. template - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static Arena* GetArena( - const T* value) { - return GetArenaInternal(value, is_arena_constructable()); + PROTOBUF_ALWAYS_INLINE static Arena* GetArena(const T* value) { + return GetArenaInternal(value); } template @@ -439,12 +394,25 @@ class LIBPROTOBUF_EXPORT Arena { sizeof(char)> is_arena_constructable; + template () + .GetArena())>::value, + int>::type = 0> + static char HasGetArena(decltype(&U::GetArena)); + template + static double HasGetArena(...); + + typedef std::integral_constant(nullptr)) == + sizeof(char)> + has_get_arena; + template static T* Construct(void* ptr, Args&&... args) { return new (ptr) T(std::forward(args)...); } - static Arena* GetArena(const T* p) { return p->GetArenaNoVirtual(); } + static Arena* GetArena(const T* p) { return p->GetArena(); } friend class Arena; }; @@ -468,9 +436,12 @@ class LIBPROTOBUF_EXPORT Arena { }; private: + template + struct has_get_arena : InternalHelper::has_get_arena {}; + template - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static T* CreateMessageInternal( - Arena* arena, Args&&... args) { + PROTOBUF_ALWAYS_INLINE static T* CreateMessageInternal(Arena* arena, + Args&&... args) { static_assert( InternalHelper::is_arena_constructable::value, "CreateMessage can only construct types that are ArenaConstructable"); @@ -485,8 +456,7 @@ class LIBPROTOBUF_EXPORT Arena { // slightly different. When the arena pointer is nullptr, it calls T() // instead of T(nullptr). template - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static T* CreateMessageInternal( - Arena* arena) { + PROTOBUF_ALWAYS_INLINE static T* CreateMessageInternal(Arena* arena) { static_assert( InternalHelper::is_arena_constructable::value, "CreateMessage can only construct types that are ArenaConstructable"); @@ -498,8 +468,8 @@ class LIBPROTOBUF_EXPORT Arena { } template - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static T* CreateInternal( - Arena* arena, Args&&... args) { + PROTOBUF_ALWAYS_INLINE static T* CreateInternal(Arena* arena, + Args&&... args) { if (arena == NULL) { return new T(std::forward(args)...); } else { @@ -508,28 +478,31 @@ class LIBPROTOBUF_EXPORT Arena { } } - void CallDestructorHooks(); - void OnArenaAllocation(const std::type_info* allocated_type, size_t n) const; inline void AllocHook(const std::type_info* allocated_type, size_t n) const { - if (GOOGLE_PREDICT_FALSE(hooks_cookie_ != NULL)) { - OnArenaAllocation(allocated_type, n); - } + impl_.RecordAlloc(allocated_type, n); } - // Allocate and also optionally call on_arena_allocation callback with the - // allocated type info when the hooks are in place in ArenaOptions and - // the cookie is not null. + // Allocate and also optionally call collector with the allocated type info + // when allocation recording is enabled. template - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE void* AllocateInternal( - bool skip_explicit_ownership) { + PROTOBUF_ALWAYS_INLINE void* AllocateInternal(bool skip_explicit_ownership) { const size_t n = internal::AlignUpTo8(sizeof(T)); - AllocHook(RTTI_TYPE_ID(T), n); // Monitor allocation if needed. + impl_.RecordAlloc(RTTI_TYPE_ID(T), n); if (skip_explicit_ownership) { - return impl_.AllocateAligned(n); + return AllocateAlignedTo(sizeof(T)); } else { - return impl_.AllocateAlignedAndAddCleanup( - n, &internal::arena_destruct_object); + if (alignof(T) <= 8) { + return impl_.AllocateAlignedAndAddCleanup( + n, &internal::arena_destruct_object); + } else { + auto ptr = + reinterpret_cast(impl_.AllocateAlignedAndAddCleanup( + sizeof(T) + alignof(T) - 8, + &internal::arena_destruct_object)); + return reinterpret_cast((ptr + alignof(T) - 8) & + (~alignof(T) + 1)); + } } } @@ -539,27 +512,29 @@ class LIBPROTOBUF_EXPORT Arena { // user code. These are used only internally from LazyField and Repeated // fields, since they are designed to work in all mode combinations. template - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static Msg* DoCreateMaybeMessage( - Arena* arena, std::true_type, Args&&... args) { + PROTOBUF_ALWAYS_INLINE static Msg* DoCreateMaybeMessage(Arena* arena, + std::true_type, + Args&&... args) { return CreateMessageInternal(arena, std::forward(args)...); } template - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static T* DoCreateMaybeMessage( - Arena* arena, std::false_type, Args&&... args) { + PROTOBUF_ALWAYS_INLINE static T* DoCreateMaybeMessage(Arena* arena, + std::false_type, + Args&&... args) { return CreateInternal(arena, std::forward(args)...); } template - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static T* CreateMaybeMessage( - Arena* arena, Args&&... args) { + PROTOBUF_ALWAYS_INLINE static T* CreateMaybeMessage(Arena* arena, + Args&&... args) { return DoCreateMaybeMessage(arena, is_arena_constructable(), std::forward(args)...); } template - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static T* CreateNoMessage( - Arena* arena, std::true_type, Args&&... args) { + PROTOBUF_ALWAYS_INLINE static T* CreateNoMessage(Arena* arena, std::true_type, + Args&&... args) { // User is constructing with Create() despite the fact that T supports arena // construction. In this case we have to delegate to CreateInternal(), and // we can't use any CreateMaybeMessage() specialization that may be defined. @@ -567,8 +542,9 @@ class LIBPROTOBUF_EXPORT Arena { } template - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static T* CreateNoMessage( - Arena* arena, std::false_type, Args&&... args) { + PROTOBUF_ALWAYS_INLINE static T* CreateNoMessage(Arena* arena, + std::false_type, + Args&&... args) { // User is constructing with Create() and the type does not support arena // construction. In this case we can delegate to CreateMaybeMessage() and // use any specialization that may be available for that. @@ -578,50 +554,52 @@ class LIBPROTOBUF_EXPORT Arena { // Just allocate the required size for the given type assuming the // type has a trivial constructor. template - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE T* CreateInternalRawArray( - size_t num_elements) { + PROTOBUF_ALWAYS_INLINE T* CreateInternalRawArray(size_t num_elements) { GOOGLE_CHECK_LE(num_elements, std::numeric_limits::max() / sizeof(T)) << "Requested size is too large to fit into size_t."; + // We count on compiler to realize that if sizeof(T) is a multiple of + // 8 AlignUpTo can be elided. const size_t n = internal::AlignUpTo8(sizeof(T) * num_elements); // Monitor allocation if needed. - AllocHook(RTTI_TYPE_ID(T), n); - return static_cast(impl_.AllocateAligned(n)); + impl_.RecordAlloc(RTTI_TYPE_ID(T), n); + return static_cast(AllocateAlignedTo(n)); } template - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE T* DoCreate( - bool skip_explicit_ownership, Args&&... args) { + PROTOBUF_ALWAYS_INLINE T* DoCreate(bool skip_explicit_ownership, + Args&&... args) { return new (AllocateInternal(skip_explicit_ownership)) T(std::forward(args)...); } template - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE T* DoCreateMessage(Args&&... args) { + PROTOBUF_ALWAYS_INLINE T* DoCreateMessage(Args&&... args) { return InternalHelper::Construct( AllocateInternal(InternalHelper::is_destructor_skippable::value), this, std::forward(args)...); } // CreateInArenaStorage is used to implement map field. Without it, - // google::protobuf::Map need to call generated message's protected arena constructor, - // which needs to declare google::protobuf::Map as friend of generated message. - template - static void CreateInArenaStorage(T* ptr, Arena* arena) { + // Map need to call generated message's protected arena constructor, + // which needs to declare Map as friend of generated message. + template + static void CreateInArenaStorage(T* ptr, Arena* arena, Args&&... args) { CreateInArenaStorageInternal(ptr, arena, - typename is_arena_constructable::type()); + typename is_arena_constructable::type(), + std::forward(args)...); RegisterDestructorInternal( ptr, arena, typename InternalHelper::is_destructor_skippable::type()); } - template + template static void CreateInArenaStorageInternal(T* ptr, Arena* arena, - std::true_type) { - InternalHelper::Construct(ptr, arena); + std::true_type, Args&&... args) { + InternalHelper::Construct(ptr, arena, std::forward(args)...); } - template + template static void CreateInArenaStorageInternal(T* ptr, Arena* /* arena */, - std::false_type) { - new (ptr) T(); + std::false_type, Args&&... args) { + new (ptr) T(std::forward(args)...); } template @@ -635,19 +613,17 @@ class LIBPROTOBUF_EXPORT Arena { // These implement Own(), which registers an object for deletion (destructor // call and operator delete()). The second parameter has type 'true_type' if T - // is a subtype of ::google::protobuf::Message and 'false_type' otherwise. Collapsing + // is a subtype of Message and 'false_type' otherwise. Collapsing // all template instantiations to one for generic Message reduces code size, // using the virtual destructor instead. template - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE void OwnInternal(T* object, - std::true_type) { + PROTOBUF_ALWAYS_INLINE void OwnInternal(T* object, std::true_type) { if (object != NULL) { impl_.AddCleanup(object, &internal::arena_delete_object); } } template - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE void OwnInternal(T* object, - std::false_type) { + PROTOBUF_ALWAYS_INLINE void OwnInternal(T* object, std::false_type) { if (object != NULL) { impl_.AddCleanup(object, &internal::arena_delete_object); } @@ -655,40 +631,53 @@ class LIBPROTOBUF_EXPORT Arena { // Implementation for GetArena(). Only message objects with // InternalArenaConstructable_ tags can be associated with an arena, and such - // objects must implement a GetArenaNoVirtual() method. - template - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static Arena* GetArenaInternal( - const T* value, std::true_type) { + // objects must implement a GetArena() method. + template ::value, int>::type = 0> + PROTOBUF_ALWAYS_INLINE static Arena* GetArenaInternal(const T* value) { return InternalHelper::GetArena(value); } - - template - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static Arena* GetArenaInternal( - const T* /* value */, std::false_type) { - return NULL; + template ::value && + has_get_arena::value, + int>::type = 0> + PROTOBUF_ALWAYS_INLINE static Arena* GetArenaInternal(const T* value) { + return value->GetArena(); + } + template ::value && + !has_get_arena::value, + int>::type = 0> + PROTOBUF_ALWAYS_INLINE static Arena* GetArenaInternal(const T* value) { + (void)value; + return nullptr; } // For friends of arena. void* AllocateAligned(size_t n) { - AllocHook(NULL, n); - return impl_.AllocateAligned(internal::AlignUpTo8(n)); + return AllocateAlignedNoHook(internal::AlignUpTo8(n)); + } + template + void* AllocateAlignedTo(size_t n) { + static_assert(Align > 0, "Alignment must be greater than 0"); + static_assert((Align & (Align - 1)) == 0, "Alignment must be power of two"); + if (Align <= 8) return AllocateAligned(n); + // TODO(b/151247138): if the pointer would have been aligned already, + // this is wasting space. We should pass the alignment down. + uintptr_t ptr = reinterpret_cast(AllocateAligned(n + Align - 8)); + ptr = (ptr + Align - 1) & (~Align + 1); + return reinterpret_cast(ptr); } - internal::ArenaImpl impl_; - - void (*on_arena_allocation_)(const std::type_info* allocated_type, - uint64 alloc_size, void* cookie); - void (*on_arena_reset_)(Arena* arena, void* cookie, uint64 space_used); - void (*on_arena_destruction_)(Arena* arena, void* cookie, uint64 space_used); + void* AllocateAlignedNoHook(size_t n); - // The arena may save a cookie it receives from the external on_init hook - // and then use it when calling the on_reset and on_destruction hooks. - void* hooks_cookie_; + internal::ArenaImpl impl_; template friend class internal::GenericTypeHandler; friend struct internal::ArenaStringPtr; // For AllocateAligned. friend class internal::LazyField; // For CreateMaybeMessage. + friend class internal::EpsCopyInputStream; // For parser performance friend class MessageLite; template friend class Map; @@ -698,6 +687,8 @@ class LIBPROTOBUF_EXPORT Arena { #undef RTTI_TYPE_ID } // namespace protobuf - } // namespace google + +#include + #endif // GOOGLE_PROTOBUF_ARENA_H__ diff --git a/third_party/protobuf-lite/google/protobuf/arena_impl.h b/third_party/protobuf-lite/google/protobuf/arena_impl.h index f648f166..13772686 100644 --- a/third_party/protobuf-lite/google/protobuf/arena_impl.h +++ b/third_party/protobuf-lite/google/protobuf/arena_impl.h @@ -39,84 +39,173 @@ #include #include -#include - #ifdef ADDRESS_SANITIZER #include #endif // ADDRESS_SANITIZER -namespace google { +#include + +namespace google { namespace protobuf { + +struct ArenaOptions; + namespace internal { inline size_t AlignUpTo8(size_t n) { // Align n to next multiple of 8 (from Hacker's Delight, Chapter 3.) - return (n + 7) & -8; + return (n + 7) & static_cast(-8); } -// This class provides the core Arena memory allocation library. Different -// implementations only need to implement the public interface below. -// Arena is not a template type as that would only be useful if all protos -// in turn would be templates, which will/cannot happen. However separating -// the memory allocation part from the cruft of the API users expect we can -// use #ifdef the select the best implementation based on hardware / OS. -class LIBPROTOBUF_EXPORT ArenaImpl { +using LifecycleIdAtomic = uint64_t; + +void PROTOBUF_EXPORT ArenaFree(void* object, size_t size); + +// MetricsCollector collects stats for a particular arena. +class PROTOBUF_EXPORT ArenaMetricsCollector { public: - struct Options { - size_t start_block_size; - size_t max_block_size; - char* initial_block; - size_t initial_block_size; - void* (*block_alloc)(size_t); - void (*block_dealloc)(void*, size_t); + virtual ~ArenaMetricsCollector(); + + // Invoked when the arena is about to be destroyed. This method will + // typically finalize any metric collection and delete the collector. + // space_allocated is the space used by the arena. + virtual void OnDestroy(uint64 space_allocated) = 0; + + // OnReset() is called when the associated arena is reset. + // space_allocated is the space used by the arena just before the reset. + virtual void OnReset(uint64 space_allocated) = 0; + + // Does OnAlloc() need to be called? If false, metric collection overhead + // will be reduced since we will not do extra work per allocation. + virtual bool RecordAllocs() = 0; + + // OnAlloc is called when an allocation happens. + // type_info is promised to be static - its lifetime extends to + // match program's lifetime (It is given by typeid operator). + // Note: typeid(void) will be passed as allocated_type every time we + // intentionally want to avoid monitoring an allocation. (i.e. internal + // allocations for managing the arena) + virtual void OnAlloc(const std::type_info* allocated_type, + uint64 alloc_size) = 0; +}; - template - explicit Options(const O& options) - : start_block_size(options.start_block_size), - max_block_size(options.max_block_size), - initial_block(options.initial_block), - initial_block_size(options.initial_block_size), - block_alloc(options.block_alloc), - block_dealloc(options.block_dealloc) {} - }; +class ArenaImpl; - template - explicit ArenaImpl(const O& options) : options_(options) { - if (options_.initial_block != NULL && options_.initial_block_size > 0) { - GOOGLE_CHECK_GE(options_.initial_block_size, sizeof(Block)) - << ": Initial block size too small for header."; - initial_block_ = reinterpret_cast(options_.initial_block); - } else { - initial_block_ = NULL; +// A thread-unsafe Arena that can only be used within its owning thread. +class PROTOBUF_EXPORT SerialArena { + public: + // Blocks are variable length malloc-ed objects. The following structure + // describes the common header for all blocks. + class PROTOBUF_EXPORT Block { + public: + Block(size_t size, Block* next, bool special, bool user_owned) + : next_and_bits_(reinterpret_cast(next) | (special ? 1 : 0) | + (user_owned ? 2 : 0)), + pos_(kBlockHeaderSize), + size_(size) { + GOOGLE_DCHECK_EQ(reinterpret_cast(next) & 3, 0u); } - Init(); - } + char* Pointer(size_t n) { + GOOGLE_DCHECK(n <= size_); + return reinterpret_cast(this) + n; + } - // Destructor deletes all owned heap allocated objects, and destructs objects - // that have non-trivial destructors, except for proto2 message objects whose - // destructors can be skipped. Also, frees all blocks except the initial block - // if it was passed in. - ~ArenaImpl(); + // One of the blocks may be special. This is either a user-supplied + // initial block, or a block we created at startup to hold Options info. + // A special block is not deleted by Reset. + bool special() const { return (next_and_bits_ & 1) != 0; } - uint64 Reset(); + // Whether or not this current block is owned by the user. + // Only special blocks can be user_owned. + bool user_owned() const { return (next_and_bits_ & 2) != 0; } - uint64 SpaceAllocated() const; + Block* next() const { + const uintptr_t bottom_bits = 3; + return reinterpret_cast(next_and_bits_ & ~bottom_bits); + } + + void clear_next() { + next_and_bits_ &= 3; // Set next to nullptr, preserve bottom bits. + } + + size_t pos() const { return pos_; } + size_t size() const { return size_; } + void set_pos(size_t pos) { pos_ = pos; } + + private: + // Holds pointer to next block for this thread + special/user_owned bits. + uintptr_t next_and_bits_; + + size_t pos_; + size_t size_; + // data follows + }; + + // The allocate/free methods here are a little strange, since SerialArena is + // allocated inside a Block which it also manages. This is to avoid doing + // an extra allocation for the SerialArena itself. + + // Creates a new SerialArena inside Block* and returns it. + static SerialArena* New(Block* b, void* owner, ArenaImpl* arena); + + void CleanupList(); uint64 SpaceUsed() const; - void* AllocateAligned(size_t n); + bool HasSpace(size_t n) { return n <= static_cast(limit_ - ptr_); } - void* AllocateAlignedAndAddCleanup(size_t n, void (*cleanup)(void*)); + void* AllocateAligned(size_t n) { + GOOGLE_DCHECK_EQ(internal::AlignUpTo8(n), n); // Must be already aligned. + GOOGLE_DCHECK_GE(limit_, ptr_); + if (PROTOBUF_PREDICT_FALSE(!HasSpace(n))) { + return AllocateAlignedFallback(n); + } + void* ret = ptr_; + ptr_ += n; +#ifdef ADDRESS_SANITIZER + ASAN_UNPOISON_MEMORY_REGION(ret, n); +#endif // ADDRESS_SANITIZER + return ret; + } - // Add object pointer and cleanup function pointer to the list. - void AddCleanup(void* elem, void (*cleanup)(void*)); + // Allocate space if the current region provides enough space. + bool MaybeAllocateAligned(size_t n, void** out) { + GOOGLE_DCHECK_EQ(internal::AlignUpTo8(n), n); // Must be already aligned. + GOOGLE_DCHECK_GE(limit_, ptr_); + if (PROTOBUF_PREDICT_FALSE(!HasSpace(n))) return false; + void* ret = ptr_; + ptr_ += n; +#ifdef ADDRESS_SANITIZER + ASAN_UNPOISON_MEMORY_REGION(ret, n); +#endif // ADDRESS_SANITIZER + *out = ret; + return true; + } - private: - void* AllocateAlignedFallback(size_t n); - void* AllocateAlignedAndAddCleanupFallback(size_t n, void (*cleanup)(void*)); - void AddCleanupFallback(void* elem, void (*cleanup)(void*)); + void AddCleanup(void* elem, void (*cleanup)(void*)) { + if (PROTOBUF_PREDICT_FALSE(cleanup_ptr_ == cleanup_limit_)) { + AddCleanupFallback(elem, cleanup); + return; + } + cleanup_ptr_->elem = elem; + cleanup_ptr_->cleanup = cleanup; + cleanup_ptr_++; + } + + void* AllocateAlignedAndAddCleanup(size_t n, void (*cleanup)(void*)) { + void* ret = AllocateAligned(n); + AddCleanup(ret, cleanup); + return ret; + } + + Block* head() const { return head_; } + void* owner() const { return owner_; } + SerialArena* next() const { return next_; } + void set_next(SerialArena* next) { next_ = next; } + static Block* NewBlock(Block* last_block, size_t min_bytes, ArenaImpl* arena); + private: // Node contains the ptr of the object to be cleaned up and the associated // cleanup function ptr. struct CleanupNode { @@ -134,138 +223,152 @@ class LIBPROTOBUF_EXPORT ArenaImpl { CleanupNode nodes[1]; // True length is |size|. }; - class Block; + ArenaImpl* arena_; // Containing arena. + void* owner_; // &ThreadCache of this thread; + Block* head_; // Head of linked list of blocks. + CleanupChunk* cleanup_; // Head of cleanup list. + SerialArena* next_; // Next SerialArena in this linked list. - // A thread-unsafe Arena that can only be used within its owning thread. - class LIBPROTOBUF_EXPORT SerialArena { - public: - // The allocate/free methods here are a little strange, since SerialArena is - // allocated inside a Block which it also manages. This is to avoid doing - // an extra allocation for the SerialArena itself. - - // Creates a new SerialArena inside Block* and returns it. - static SerialArena* New(Block* b, void* owner, ArenaImpl* arena); - - // Destroys this SerialArena, freeing all blocks with the given dealloc - // function, except any block equal to |initial_block|. - static uint64 Free(SerialArena* serial, Block* initial_block, - void (*block_dealloc)(void*, size_t)); - - void CleanupList(); - uint64 SpaceUsed() const; - - void* AllocateAligned(size_t n) { - GOOGLE_DCHECK_EQ(internal::AlignUpTo8(n), n); // Must be already aligned. - GOOGLE_DCHECK_GE(limit_, ptr_); - if (GOOGLE_PREDICT_FALSE(static_cast(limit_ - ptr_) < n)) { - return AllocateAlignedFallback(n); - } - void* ret = ptr_; - ptr_ += n; -#ifdef ADDRESS_SANITIZER - ASAN_UNPOISON_MEMORY_REGION(ret, n); -#endif // ADDRESS_SANITIZER - return ret; - } + // Next pointer to allocate from. Always 8-byte aligned. Points inside + // head_ (and head_->pos will always be non-canonical). We keep these + // here to reduce indirection. + char* ptr_; + char* limit_; - void AddCleanup(void* elem, void (*cleanup)(void*)) { - if (GOOGLE_PREDICT_FALSE(cleanup_ptr_ == cleanup_limit_)) { - AddCleanupFallback(elem, cleanup); - return; - } - cleanup_ptr_->elem = elem; - cleanup_ptr_->cleanup = cleanup; - cleanup_ptr_++; + // Next CleanupList members to append to. These point inside cleanup_. + CleanupNode* cleanup_ptr_; + CleanupNode* cleanup_limit_; + + void* AllocateAlignedFallback(size_t n); + void AddCleanupFallback(void* elem, void (*cleanup)(void*)); + void CleanupListFallback(); + + public: + static constexpr size_t kBlockHeaderSize = + (sizeof(Block) + 7) & static_cast(-8); +}; + +// This class provides the core Arena memory allocation library. Different +// implementations only need to implement the public interface below. +// Arena is not a template type as that would only be useful if all protos +// in turn would be templates, which will/cannot happen. However separating +// the memory allocation part from the cruft of the API users expect we can +// use #ifdef the select the best implementation based on hardware / OS. +class PROTOBUF_EXPORT ArenaImpl { + public: + static const size_t kDefaultStartBlockSize = 256; + static const size_t kDefaultMaxBlockSize = 8192; + + ArenaImpl() { Init(false); } + + ArenaImpl(char* mem, size_t size) { + GOOGLE_DCHECK_EQ(reinterpret_cast(mem) & 7, 0u); + Init(false); + + // Ignore initial block if it is too small. + if (mem != nullptr && size >= kBlockHeaderSize + kSerialArenaSize) { + SetInitialBlock(new (mem) SerialArena::Block(size, nullptr, true, true)); } + } + + explicit ArenaImpl(const ArenaOptions& options); - void* AllocateAlignedAndAddCleanup(size_t n, void (*cleanup)(void*)) { - void* ret = AllocateAligned(n); - AddCleanup(ret, cleanup); - return ret; + // Destructor deletes all owned heap allocated objects, and destructs objects + // that have non-trivial destructors, except for proto2 message objects whose + // destructors can be skipped. Also, frees all blocks except the initial block + // if it was passed in. + ~ArenaImpl(); + + uint64 Reset(); + + uint64 SpaceAllocated() const; + uint64 SpaceUsed() const; + + void* AllocateAligned(size_t n) { + SerialArena* arena; + if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFast(&arena))) { + return arena->AllocateAligned(n); + } else { + return AllocateAlignedFallback(n); } + } - void* owner() const { return owner_; } - SerialArena* next() const { return next_; } - void set_next(SerialArena* next) { next_ = next; } + // This function allocates n bytes if the common happy case is true and + // returns true. Otherwise does nothing and returns false. This strange + // semantics is necessary to allow callers to program functions that only + // have fallback function calls in tail position. This substantially improves + // code for the happy path. + PROTOBUF_ALWAYS_INLINE bool MaybeAllocateAligned(size_t n, void** out) { + SerialArena* a; + if (PROTOBUF_PREDICT_TRUE(GetSerialArenaFromThreadCache(&a))) { + return a->MaybeAllocateAligned(n, out); + } + return false; + } - private: - void* AllocateAlignedFallback(size_t n); - void AddCleanupFallback(void* elem, void (*cleanup)(void*)); - void CleanupListFallback(); - - ArenaImpl* arena_; // Containing arena. - void* owner_; // &ThreadCache of this thread; - Block* head_; // Head of linked list of blocks. - CleanupChunk* cleanup_; // Head of cleanup list. - SerialArena* next_; // Next SerialArena in this linked list. - - // Next pointer to allocate from. Always 8-byte aligned. Points inside - // head_ (and head_->pos will always be non-canonical). We keep these - // here to reduce indirection. - char* ptr_; - char* limit_; - - // Next CleanupList members to append to. These point inside cleanup_. - CleanupNode* cleanup_ptr_; - CleanupNode* cleanup_limit_; - }; + void* AllocateAlignedAndAddCleanup(size_t n, void (*cleanup)(void*)); - // Blocks are variable length malloc-ed objects. The following structure - // describes the common header for all blocks. - class LIBPROTOBUF_EXPORT Block { - public: - Block(size_t size, Block* next); + // Add object pointer and cleanup function pointer to the list. + void AddCleanup(void* elem, void (*cleanup)(void*)); - char* Pointer(size_t n) { - GOOGLE_DCHECK(n <= size_); - return reinterpret_cast(this) + n; + inline void RecordAlloc(const std::type_info* allocated_type, + size_t n) const { + if (PROTOBUF_PREDICT_FALSE(record_allocs())) { + options_->metrics_collector->OnAlloc(allocated_type, n); } + } - Block* next() const { return next_; } - size_t pos() const { return pos_; } - size_t size() const { return size_; } - void set_pos(size_t pos) { pos_ = pos; } + std::pair NewBuffer(size_t last_size, size_t min_bytes); - private: - Block* next_; // Next block for this thread. - size_t pos_; - size_t size_; - // data follows - }; + private: + // Pointer to a linked list of SerialArena. + std::atomic threads_; + std::atomic hint_; // Fast thread-local block access + std::atomic space_allocated_; // Total size of all allocated blocks. - struct ThreadCache { -#if defined(GOOGLE_PROTOBUF_NO_THREADLOCAL) - // If we are using the ThreadLocalStorage class to store the ThreadCache, - // then the ThreadCache's default constructor has to be responsible for - // initializing it. - ThreadCache() : last_lifecycle_id_seen(-1), last_serial_arena(NULL) {} -#endif + // Unique for each arena. Changes on Reset(). + // Least-significant-bit is 1 iff allocations should be recorded. + uint64 lifecycle_id_; - // The ThreadCache is considered valid as long as this matches the - // lifecycle_id of the arena being used. - int64 last_lifecycle_id_seen; - SerialArena* last_serial_arena; + struct Options { + size_t start_block_size; + size_t max_block_size; + void* (*block_alloc)(size_t); + void (*block_dealloc)(void*, size_t); + ArenaMetricsCollector* metrics_collector; }; - static std::atomic lifecycle_id_generator_; -#if defined(GOOGLE_PROTOBUF_NO_THREADLOCAL) - // Android ndk does not support GOOGLE_THREAD_LOCAL keyword so we use a custom thread - // local storage class we implemented. - // iOS also does not support the GOOGLE_THREAD_LOCAL keyword. - static ThreadCache& thread_cache(); -#elif defined(PROTOBUF_USE_DLLS) - // Thread local variables cannot be exposed through DLL interface but we can - // wrap them in static functions. - static ThreadCache& thread_cache(); -#else - static GOOGLE_THREAD_LOCAL ThreadCache thread_cache_; - static ThreadCache& thread_cache() { return thread_cache_; } -#endif - void Init(); + Options* options_ = nullptr; + + void* AllocateAlignedFallback(size_t n); + void* AllocateAlignedAndAddCleanupFallback(size_t n, void (*cleanup)(void*)); + void AddCleanupFallback(void* elem, void (*cleanup)(void*)); + + void Init(bool record_allocs); + void SetInitialBlock( + SerialArena::Block* block); // Can be called right after Init() + + // Return true iff allocations should be recorded in a metrics collector. + inline bool record_allocs() const { return lifecycle_id_ & 1; } + + // Invoke fn(b) for every Block* b. + template + void PerBlock(Functor fn) { + // By omitting an Acquire barrier we ensure that any user code that doesn't + // properly synchronize Reset() or the destructor will throw a TSAN warning. + SerialArena* serial = threads_.load(std::memory_order_relaxed); + while (serial) { + // fn() may delete blocks and arenas, so fetch next pointers before fn(); + SerialArena* cur = serial; + serial = serial->next(); + for (auto* block = cur->head(); block != nullptr;) { + auto* b = block; + block = b->next(); + fn(b); + } + } + } - // Free all blocks and return the total space used which is the sums of sizes - // of the all the allocated blocks. - uint64 FreeBlocks(); // Delete or Destruct all objects owned by the arena. void CleanupList(); @@ -279,23 +382,85 @@ class LIBPROTOBUF_EXPORT ArenaImpl { hint_.store(serial, std::memory_order_release); } + PROTOBUF_ALWAYS_INLINE bool GetSerialArenaFast(SerialArena** arena) { + if (GetSerialArenaFromThreadCache(arena)) return true; - std::atomic - threads_; // Pointer to a linked list of SerialArena. - std::atomic hint_; // Fast thread-local block access - std::atomic space_allocated_; // Total size of all allocated blocks. + // Check whether we own the last accessed SerialArena on this arena. This + // fast path optimizes the case where a single thread uses multiple arenas. + ThreadCache* tc = &thread_cache(); + SerialArena* serial = hint_.load(std::memory_order_acquire); + if (PROTOBUF_PREDICT_TRUE(serial != NULL && serial->owner() == tc)) { + *arena = serial; + return true; + } + return false; + } - Block *initial_block_; // If non-NULL, points to the block that came from - // user data. + PROTOBUF_ALWAYS_INLINE bool GetSerialArenaFromThreadCache( + SerialArena** arena) { + // If this thread already owns a block in this arena then try to use that. + // This fast path optimizes the case where multiple threads allocate from + // the same arena. + ThreadCache* tc = &thread_cache(); + if (PROTOBUF_PREDICT_TRUE(tc->last_lifecycle_id_seen == lifecycle_id_)) { + *arena = tc->last_serial_arena; + return true; + } + return false; + } + SerialArena* GetSerialArenaFallback(void* me); - Block* NewBlock(Block* last_block, size_t min_bytes); +#ifdef _MSC_VER +#pragma warning(disable : 4324) +#endif + struct alignas(64) ThreadCache { +#if defined(GOOGLE_PROTOBUF_NO_THREADLOCAL) + // If we are using the ThreadLocalStorage class to store the ThreadCache, + // then the ThreadCache's default constructor has to be responsible for + // initializing it. + ThreadCache() + : next_lifecycle_id(0), + last_lifecycle_id_seen(-1), + last_serial_arena(NULL) {} +#endif - SerialArena* GetSerialArena(); - bool GetSerialArenaFast(SerialArena** arena); - SerialArena* GetSerialArenaFallback(void* me); - int64 lifecycle_id_; // Unique for each arena. Changes on Reset(). + // Number of per-thread lifecycle IDs to reserve. Must be power of two. + // To reduce contention on a global atomic, each thread reserves a batch of + // IDs. The following number is calculated based on a stress test with + // ~6500 threads all frequently allocating a new arena. + static constexpr size_t kPerThreadIds = 256; + // Next lifecycle ID available to this thread. We need to reserve a new + // batch, if `next_lifecycle_id & (kPerThreadIds - 1) == 0`. + uint64 next_lifecycle_id; + // The ThreadCache is considered valid as long as this matches the + // lifecycle_id of the arena being used. + uint64 last_lifecycle_id_seen; + SerialArena* last_serial_arena; + }; - Options options_; + // Lifecycle_id can be highly contended variable in a situation of lots of + // arena creation. Make sure that other global variables are not sharing the + // cacheline. +#ifdef _MSC_VER +#pragma warning(disable : 4324) +#endif + struct alignas(64) CacheAlignedLifecycleIdGenerator { + std::atomic id; + }; + static CacheAlignedLifecycleIdGenerator lifecycle_id_generator_; +#if defined(GOOGLE_PROTOBUF_NO_THREADLOCAL) + // Android ndk does not support __thread keyword so we use a custom thread + // local storage class we implemented. + // iOS also does not support the __thread keyword. + static ThreadCache& thread_cache(); +#elif defined(PROTOBUF_USE_DLLS) + // Thread local variables cannot be exposed through DLL interface but we can + // wrap them in static functions. + static ThreadCache& thread_cache(); +#else + static PROTOBUF_THREAD_LOCAL ThreadCache thread_cache_; + static ThreadCache& thread_cache() { return thread_cache_; } +#endif GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ArenaImpl); // All protos have pointers back to the arena hence Arena must have @@ -306,8 +471,11 @@ class LIBPROTOBUF_EXPORT ArenaImpl { public: // kBlockHeaderSize is sizeof(Block), aligned up to the nearest multiple of 8 // to protect the invariant that pos is always at a multiple of 8. - static const size_t kBlockHeaderSize = (sizeof(Block) + 7) & -8; - static const size_t kSerialArenaSize = (sizeof(SerialArena) + 7) & -8; + static constexpr size_t kBlockHeaderSize = SerialArena::kBlockHeaderSize; + static constexpr size_t kSerialArenaSize = + (sizeof(SerialArena) + 7) & static_cast(-8); + static constexpr size_t kOptionsSize = + (sizeof(Options) + 7) & static_cast(-8); static_assert(kBlockHeaderSize % 8 == 0, "kBlockHeaderSize must be a multiple of 8."); static_assert(kSerialArenaSize % 8 == 0, @@ -316,6 +484,8 @@ class LIBPROTOBUF_EXPORT ArenaImpl { } // namespace internal } // namespace protobuf - } // namespace google + +#include + #endif // GOOGLE_PROTOBUF_ARENA_IMPL_H__ diff --git a/third_party/protobuf-lite/google/protobuf/arenastring.h b/third_party/protobuf-lite/google/protobuf/arenastring.h index 168fc972..60307b27 100644 --- a/third_party/protobuf-lite/google/protobuf/arenastring.h +++ b/third_party/protobuf-lite/google/protobuf/arenastring.h @@ -32,372 +32,344 @@ #define GOOGLE_PROTOBUF_ARENASTRING_H__ #include +#include +#include -#include -#include -#include #include -#include +#include +#include +#include + +#include + +#ifdef SWIG +#error "You cannot SWIG proto headers" +#endif -// This is the implementation of arena string fields written for the open-source -// release. The ArenaStringPtr struct below is an internal implementation class -// and *should not be used* by user code. It is used to collect string -// operations together into one place and abstract away the underlying -// string-field pointer representation, so that (for example) an alternate -// implementation that knew more about ::std::string's internals could integrate more -// closely with the arena allocator. namespace google { namespace protobuf { namespace internal { +// Lazy string instance to support string fields with non-empty default. +// These are initialized on the first call to .get(). +class PROTOBUF_EXPORT LazyString { + public: + // We explicitly make LazyString an aggregate so that MSVC can do constant + // initialization on it without marking it `constexpr`. + // We do not want to use `constexpr` because it makes it harder to have extern + // storage for it and causes library bloat. + struct InitValue { + const char* ptr; + size_t size; + }; + // We keep a union of the initialization value and the std::string to save on + // space. We don't need the string array after Init() is done. + union { + mutable InitValue init_value_; + alignas(std::string) mutable char string_buf_[sizeof(std::string)]; + }; + mutable std::atomic inited_; + + const std::string& get() const { + // This check generates less code than a call-once invocation. + auto* res = inited_.load(std::memory_order_acquire); + if (PROTOBUF_PREDICT_FALSE(res == nullptr)) return Init(); + return *res; + } + + private: + // Initialize the string in `string_buf_`, update `inited_` and return it. + // We return it here to avoid having to read it again in the inlined code. + const std::string& Init() const; +}; + template class TaggedPtr { public: - void Set(T* p) { ptr_ = reinterpret_cast(p); } - T* Get() const { return reinterpret_cast(ptr_); } + TaggedPtr() = default; + explicit constexpr TaggedPtr(const std::string* ptr) + : ptr_(const_cast(ptr)) {} + + void SetTagged(T* p) { + Set(p); + ptr_ = reinterpret_cast(as_int() | 1); + } + void Set(T* p) { ptr_ = p; } + T* Get() const { return reinterpret_cast(as_int() & -2); } + bool IsTagged() const { return as_int() & 1; } - bool IsNull() { return ptr_ == 0; } + // Returned value is only safe to dereference if IsTagged() == false. + // It is safe to compare. + T* UnsafeGet() const { return static_cast(ptr_); } + + bool IsNull() { return ptr_ == nullptr; } private: - uintptr_t ptr_; + uintptr_t as_int() const { return reinterpret_cast(ptr_); } + void* ptr_; }; -struct LIBPROTOBUF_EXPORT ArenaStringPtr { - inline void Set(const ::std::string* default_value, - const ::std::string& value, ::google::protobuf::Arena* arena) { - if (ptr_ == default_value) { - CreateInstance(arena, &value); - } else { - *ptr_ = value; - } - } +static_assert(std::is_trivial>::value, + "TaggedPtr must be trivial"); - inline void SetLite(const ::std::string* default_value, - const ::std::string& value, - ::google::protobuf::Arena* arena) { - Set(default_value, value, arena); - } +// This class encapsulates a pointer to a std::string with or without a donated +// buffer, tagged by bottom bit. It is a high-level wrapper that almost directly +// corresponds to the interface required by string fields in generated +// code. It replaces the old std::string* pointer in such cases. +// +// The object has different but similar code paths for when the default value is +// the empty string and when it is a non-empty string. +// The empty string is handled different throughout the library and there is a +// single global instance of it we can share. +// +// For fields with an empty string default value, there are three distinct +// states: +// +// - Pointer set to 'String' tag (LSB is 0), equal to +// &GetEmptyStringAlreadyInited(): field is set to its default value. Points +// to a true std::string*, but we do not own that std::string* (it's a +// globally shared instance). +// +// - Pointer set to 'String' tag (LSB is 0), but not equal to the global empty +// string: field points to a true std::string* instance that we own. This +// instance is either on the heap or on the arena (i.e. registered on +// free()/destructor-call list) as appropriate. +// +// - Pointer set to 'DonatedString' tag (LSB is 1): points to a std::string +// instance with a buffer on the arena (arena != NULL, always, in this case). +// +// For fields with a non-empty string default value, there are three distinct +// states: +// +// - Pointer set to 'String' tag (LSB is 0), equal to `nullptr`: +// Field is in "default" mode and does not point to any actual instance. +// Methods that might need to create an instance of the object will pass a +// `const LazyString&` for it. +// +// - Pointer set to 'String' tag (LSB is 0), but not equal to `nullptr`: +// field points to a true std::string* instance that we own. This instance is +// either on the heap or on the arena (i.e. registered on +// free()/destructor-call list) as appropriate. +// +// - Pointer set to 'DonatedString' tag (LSB is 1): points to a std::string +// instance with a buffer on the arena (arena != NULL, always, in this case). +// +// Generated code and reflection code both ensure that ptr_ is never null for +// fields with an empty default. +// Because ArenaStringPtr is used in oneof unions, its constructor is a NOP and +// so the field is always manually initialized via method calls. +// +// Side-note: why pass information about the default on every API call? Because +// we don't want to hold it in a member variable, or else this would go into +// every proto message instance. This would be a huge waste of space, since the +// default instance pointer is typically a global (static class field). We want +// the generated code to be as efficient as possible, and if we take +// the default value information as a parameter that's in practice taken from a +// static class field, and compare ptr_ to the default value, we end up with a +// single "cmp %reg, GLOBAL" in the resulting machine code. (Note that this also +// requires the String tag to be 0 so we can avoid the mask before comparing.) +struct PROTOBUF_EXPORT ArenaStringPtr { + ArenaStringPtr() = default; + explicit constexpr ArenaStringPtr(const std::string* default_value) + : tagged_ptr_(default_value) {} + + // Some methods below are overloaded on a `default_value` and on tags. + // The tagged overloads help reduce code size in the callers in generated + // code, while the `default_value` overloads are useful from reflection. + // By-value empty struct arguments are elided in the ABI. + struct EmptyDefault {}; + struct NonEmptyDefault {}; + + void Set(const std::string* default_value, ConstStringParam value, + ::google::protobuf::Arena* arena); + void Set(const std::string* default_value, std::string&& value, + ::google::protobuf::Arena* arena); + void Set(EmptyDefault, ConstStringParam value, ::google::protobuf::Arena* arena); + void Set(EmptyDefault, std::string&& value, ::google::protobuf::Arena* arena); + void Set(NonEmptyDefault, ConstStringParam value, ::google::protobuf::Arena* arena); + void Set(NonEmptyDefault, std::string&& value, ::google::protobuf::Arena* arena); // Basic accessors. - inline const ::std::string& Get() const { return *ptr_; } - - inline ::std::string* Mutable(const ::std::string* default_value, - ::google::protobuf::Arena* arena) { - if (ptr_ == default_value) { - CreateInstance(arena, default_value); - } - return ptr_; + const std::string& Get() const PROTOBUF_ALWAYS_INLINE { + // Unconditionally mask away the tag. + return *tagged_ptr_.Get(); } - - // Release returns a ::std::string* instance that is heap-allocated and is not - // Own()'d by any arena. If the field was not set, it returns NULL. The caller - // retains ownership. Clears this field back to NULL state. Used to implement - // release_() methods on generated classes. - inline ::std::string* Release(const ::std::string* default_value, - ::google::protobuf::Arena* arena) { - if (ptr_ == default_value) { - return NULL; - } - return ReleaseNonDefault(default_value, arena); - } - - // Similar to Release, but ptr_ cannot be the default_value. - inline ::std::string* ReleaseNonDefault( - const ::std::string* default_value, ::google::protobuf::Arena* arena) { - GOOGLE_DCHECK(!IsDefault(default_value)); - ::std::string* released = NULL; - if (arena != NULL) { - // ptr_ is owned by the arena. - released = new ::std::string; - released->swap(*ptr_); - } else { - released = ptr_; - } - ptr_ = const_cast< ::std::string* >(default_value); - return released; + const std::string* GetPointer() const PROTOBUF_ALWAYS_INLINE { + // Unconditionally mask away the tag. + return tagged_ptr_.Get(); } - // UnsafeArenaRelease returns a ::std::string*, but it may be arena-owned (i.e. - // have its destructor already registered) if arena != NULL. If the field was - // not set, this returns NULL. This method clears this field back to NULL - // state. Used to implement unsafe_arena_release_() methods on - // generated classes. - inline ::std::string* UnsafeArenaRelease(const ::std::string* default_value, - ::google::protobuf::Arena* /* arena */) { - if (ptr_ == default_value) { - return NULL; - } - ::std::string* released = ptr_; - ptr_ = const_cast< ::std::string* >(default_value); - return released; - } - - // Takes a string that is heap-allocated, and takes ownership. The string's - // destructor is registered with the arena. Used to implement + // For fields with an empty default value. + std::string* Mutable(EmptyDefault, ::google::protobuf::Arena* arena); + // For fields with a non-empty default value. + std::string* Mutable(const LazyString& default_value, ::google::protobuf::Arena* arena); + + // Release returns a std::string* instance that is heap-allocated and is not + // Own()'d by any arena. If the field is not set, this returns NULL. The + // caller retains ownership. Clears this field back to NULL state. Used to + // implement release_() methods on generated classes. + std::string* Release(const std::string* default_value, + ::google::protobuf::Arena* arena); + std::string* ReleaseNonDefault(const std::string* default_value, + ::google::protobuf::Arena* arena); + + // Takes a std::string that is heap-allocated, and takes ownership. The + // std::string's destructor is registered with the arena. Used to implement // set_allocated_ in generated classes. - inline void SetAllocated(const ::std::string* default_value, - ::std::string* value, ::google::protobuf::Arena* arena) { - if (arena == NULL && ptr_ != default_value) { - Destroy(default_value, arena); - } - if (value != NULL) { - ptr_ = value; - if (arena != NULL) { - arena->Own(value); - } - } else { - ptr_ = const_cast< ::std::string* >(default_value); - } - } - - // Takes a string that has lifetime equal to the arena's lifetime. The arena - // must be non-null. It is safe only to pass this method a value returned by - // UnsafeArenaRelease() on another field of a message in the same arena. Used - // to implement unsafe_arena_set_allocated_ in generated classes. - inline void UnsafeArenaSetAllocated(const ::std::string* default_value, - ::std::string* value, - ::google::protobuf::Arena* /* arena */) { - if (value != NULL) { - ptr_ = value; - } else { - ptr_ = const_cast< ::std::string* >(default_value); - } - } + void SetAllocated(const std::string* default_value, std::string* value, + ::google::protobuf::Arena* arena); // Swaps internal pointers. Arena-safety semantics: this is guarded by the // logic in Swap()/UnsafeArenaSwap() at the message level, so this method is // 'unsafe' if called directly. - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE void Swap(ArenaStringPtr* other) { - std::swap(ptr_, other->ptr_); - } - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE void Swap( - ArenaStringPtr* other, const ::std::string* default_value, Arena* arena) { -#ifndef NDEBUG - // For debug builds, we swap the contents of the string, rather than the - // string instances themselves. This invalidates previously taken const - // references that are (per our documentation) invalidated by calling Swap() - // on the message. - // - // If both strings are the default_value, swapping is uninteresting. - // Otherwise, we use ArenaStringPtr::Mutable() to access the string, to - // ensure that we do not try to mutate default_value itself. - if (IsDefault(default_value) && other->IsDefault(default_value)) { - return; - } - - ::std::string* this_ptr = Mutable(default_value, arena); - ::std::string* other_ptr = other->Mutable(default_value, arena); - - this_ptr->swap(*other_ptr); -#else - std::swap(ptr_, other->ptr_); -#endif - } + inline void Swap(ArenaStringPtr* other, const std::string* default_value, + Arena* arena) PROTOBUF_ALWAYS_INLINE; // Frees storage (if not on an arena). - inline void Destroy(const ::std::string* default_value, - ::google::protobuf::Arena* arena) { - if (arena == NULL && ptr_ != default_value) { - delete ptr_; - } - } + void Destroy(const std::string* default_value, ::google::protobuf::Arena* arena); + void Destroy(EmptyDefault, ::google::protobuf::Arena* arena); + void Destroy(NonEmptyDefault, ::google::protobuf::Arena* arena); - // Clears content, but keeps allocated string if arena != NULL, to avoid the - // overhead of heap operations. After this returns, the content (as seen by - // the user) will always be the empty string. Assumes that |default_value| - // is an empty string. - inline void ClearToEmpty(const ::std::string* default_value, - ::google::protobuf::Arena* /* arena */) { - if (ptr_ == default_value) { - // Already set to default (which is empty) -- do nothing. - } else { - ptr_->clear(); - } - } + // Clears content, but keeps allocated std::string, to avoid the overhead of + // heap operations. After this returns, the content (as seen by the user) will + // always be the empty std::string. Assumes that |default_value| is an empty + // std::string. + void ClearToEmpty(); - // Clears content, assuming that the current value is not the empty string - // default. - inline void ClearNonDefaultToEmpty() { - ptr_->clear(); - } - inline void ClearNonDefaultToEmptyNoArena() { - ptr_->clear(); - } + // Clears content, assuming that the current value is not the empty + // string default. + void ClearNonDefaultToEmpty(); - // Clears content, but keeps allocated string if arena != NULL, to avoid the - // overhead of heap operations. After this returns, the content (as seen by - // the user) will always be equal to |default_value|. - inline void ClearToDefault(const ::std::string* default_value, - ::google::protobuf::Arena* /* arena */) { - if (ptr_ == default_value) { - // Already set to default -- do nothing. - } else { - // Have another allocated string -- rather than throwing this away and - // resetting ptr_ to the canonical default string instance, we just reuse - // this instance. - *ptr_ = *default_value; - } - } + // Clears content, but keeps allocated std::string if arena != NULL, to avoid + // the overhead of heap operations. After this returns, the content (as seen + // by the user) will always be equal to |default_value|. + void ClearToDefault(const LazyString& default_value, ::google::protobuf::Arena* arena); // Called from generated code / reflection runtime only. Resets value to point - // to a default string pointer, with the semantics that this ArenaStringPtr - // does not own the pointed-to memory. Disregards initial value of ptr_ (so - // this is the *ONLY* safe method to call after construction or when - // reinitializing after becoming the active field in a oneof union). - inline void UnsafeSetDefault(const ::std::string* default_value) { - // Casting away 'const' is safe here: accessors ensure that ptr_ is only - // returned as a const if it is equal to default_value. - ptr_ = const_cast< ::std::string* >(default_value); + // to a default string pointer, with the semantics that this + // ArenaStringPtr does not own the pointed-to memory. Disregards initial value + // of ptr_ (so this is the *ONLY* safe method to call after construction or + // when reinitializing after becoming the active field in a oneof union). + inline void UnsafeSetDefault(const std::string* default_value); + + // Returns a mutable pointer, but doesn't initialize the string to the + // default value. + std::string* MutableNoArenaNoDefault(const std::string* default_value); + + // Get a mutable pointer with unspecified contents. + // Similar to `MutableNoArenaNoDefault`, but also handles the arena case. + // If the value was donated, the contents are discarded. + std::string* MutableNoCopy(const std::string* default_value, + ::google::protobuf::Arena* arena); + + // Destroy the string. Assumes `arena == nullptr`. + void DestroyNoArena(const std::string* default_value); + + // Internal setter used only at parse time to directly set a donated string + // value. + void UnsafeSetTaggedPointer(TaggedPtr value) { + tagged_ptr_ = value; } - - // The 'NoArena' variants of methods below assume arena == NULL and are - // optimized to provide very little overhead relative to a raw string pointer - // (while still being in-memory compatible with other code that assumes - // ArenaStringPtr). Note the invariant that a class instance that has only - // ever been mutated by NoArena methods must *only* be in the String state - // (i.e., tag bits are not used), *NEVER* ArenaString. This allows all - // tagged-pointer manipulations to be avoided. - inline void SetNoArena(const ::std::string* default_value, - const ::std::string& value) { - if (ptr_ == default_value) { - CreateInstanceNoArena(&value); - } else { - *ptr_ = value; - } - } - -#if LANG_CXX11 - void SetNoArena(const ::std::string* default_value, ::std::string&& value) { - if (IsDefault(default_value)) { - ptr_ = new ::std::string(std::move(value)); - } else { - *ptr_ = std::move(value); - } + // Generated code only! An optimization, in certain cases the generated + // code is certain we can obtain a std::string with no default checks and + // tag tests. + std::string* UnsafeMutablePointer() PROTOBUF_RETURNS_NONNULL; + + inline bool IsDefault(const std::string* default_value) const { + // Relies on the fact that kPtrTagString == 0, so if IsString(), ptr_ is the + // actual std::string pointer (and if !IsString(), ptr_ will never be equal + // to any aligned |default_value| pointer). The key is that we want to avoid + // masking in the fastpath const-pointer Get() case for non-arena code. + return tagged_ptr_.UnsafeGet() == default_value; } -#endif - void AssignWithDefault(const ::std::string* default_value, ArenaStringPtr value); + private: + TaggedPtr tagged_ptr_; - inline const ::std::string& GetNoArena() const { return *ptr_; } + bool IsDonatedString() const { return false; } - inline ::std::string* MutableNoArena(const ::std::string* default_value) { - if (ptr_ == default_value) { - CreateInstanceNoArena(default_value); - } - return ptr_; - } + // Slow paths. - inline ::std::string* ReleaseNoArena(const ::std::string* default_value) { - if (ptr_ == default_value) { - return NULL; - } else { - return ReleaseNonDefaultNoArena(default_value); - } - } + // MutableSlow requires that !IsString() || IsDefault + // Variadic to support 0 args for EmptyDefault and 1 arg for LazyString. + template + std::string* MutableSlow(::google::protobuf::Arena* arena, const Lazy&... lazy_default); - inline ::std::string* ReleaseNonDefaultNoArena( - const ::std::string* default_value) { - GOOGLE_DCHECK(!IsDefault(default_value)); - ::std::string* released = ptr_; - ptr_ = const_cast< ::std::string* >(default_value); - return released; - } +}; +inline void ArenaStringPtr::UnsafeSetDefault(const std::string* value) { + tagged_ptr_.Set(const_cast(value)); +} - inline void SetAllocatedNoArena(const ::std::string* default_value, - ::std::string* value) { - if (ptr_ != default_value) { - delete ptr_; - } - if (value != NULL) { - ptr_ = value; - } else { - ptr_ = const_cast< ::std::string* >(default_value); - } +inline void ArenaStringPtr::Swap(ArenaStringPtr* other, + const std::string* default_value, + Arena* arena) { +#ifndef NDEBUG + // For debug builds, we swap the contents of the string, rather than the + // std::string instances themselves. This invalidates previously taken const + // references that are (per our documentation) invalidated by calling Swap() + // on the message. + // + // If both strings are the default_value, swapping is uninteresting. + // Otherwise, we use ArenaStringPtr::Mutable() to access the std::string, to + // ensure that we do not try to mutate default_value itself. + if (IsDefault(default_value) && other->IsDefault(default_value)) { + return; } - inline void DestroyNoArena(const ::std::string* default_value) { - if (ptr_ != default_value) { - delete ptr_; - } - } + if (default_value == nullptr) { + // If we have non-empty default, then `default_value` is null and we can't + // call Mutable the same way. Just do the regular swap. + std::swap(tagged_ptr_, other->tagged_ptr_); + } else { + std::string* this_ptr = Mutable(EmptyDefault{}, arena); + std::string* other_ptr = other->Mutable(EmptyDefault{}, arena); - inline void ClearToEmptyNoArena(const ::std::string* default_value) { - if (ptr_ == default_value) { - // Nothing: already equal to default (which is the empty string). - } else { - ptr_->clear(); - } + this_ptr->swap(*other_ptr); } +#else + std::swap(tagged_ptr_, other->tagged_ptr_); +#endif +} - inline void ClearToDefaultNoArena(const ::std::string* default_value) { - if (ptr_ == default_value) { - // Nothing: already set to default. - } else { - // Reuse existing allocated instance. - *ptr_ = *default_value; - } - } +inline void ArenaStringPtr::ClearNonDefaultToEmpty() { + // Unconditionally mask away the tag. + tagged_ptr_.Get()->clear(); +} - // Internal accessor used only at parse time to provide direct access to the - // raw pointer from the shared parse routine (in the non-arenas case). The - // parse routine does the string allocation in order to save code size in the - // generated parsing code. - inline ::std::string** UnsafeRawStringPointer() { - return &ptr_; +inline std::string* ArenaStringPtr::MutableNoArenaNoDefault( + const std::string* default_value) { + // VERY IMPORTANT for performance and code size: this will reduce to a member + // variable load, a pointer check (against |default_value|, in practice a + // static global) and a branch to the slowpath (which calls operator new and + // the ctor). DO NOT add any tagged-pointer operations here. + if (IsDefault(default_value)) { + std::string* new_string = new std::string(); + tagged_ptr_.Set(new_string); + return new_string; + } else { + return UnsafeMutablePointer(); } +} - inline bool IsDefault(const ::std::string* default_value) const { - return ptr_ == default_value; +inline void ArenaStringPtr::DestroyNoArena(const std::string* default_value) { + if (!IsDefault(default_value)) { + delete UnsafeMutablePointer(); } +} - // Internal accessors!!!! - void UnsafeSetTaggedPointer(TaggedPtr< ::std::string> value) { - ptr_ = value.Get(); - } - // Generated code only! An optimization, in certain cases the generated - // code is certain we can obtain a string with no default checks and - // tag tests. - ::std::string* UnsafeMutablePointer() { return ptr_; } +inline std::string* ArenaStringPtr::UnsafeMutablePointer() { + GOOGLE_DCHECK(!tagged_ptr_.IsTagged()); + GOOGLE_DCHECK(tagged_ptr_.UnsafeGet() != nullptr); + return tagged_ptr_.UnsafeGet(); +} - private: - ::std::string* ptr_; - - GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE - void CreateInstance(::google::protobuf::Arena* arena, - const ::std::string* initial_value) { - GOOGLE_DCHECK(initial_value != NULL); - // uses "new ::std::string" when arena is nullptr - ptr_ = Arena::Create< ::std::string >(arena, *initial_value); - } - GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE - void CreateInstanceNoArena(const ::std::string* initial_value) { - GOOGLE_DCHECK(initial_value != NULL); - ptr_ = new ::std::string(*initial_value); - } -}; } // namespace internal } // namespace protobuf +} // namespace google +#include - -namespace protobuf { -namespace internal { - -inline void ArenaStringPtr::AssignWithDefault(const ::std::string* default_value, - ArenaStringPtr value) { - const ::std::string* me = *UnsafeRawStringPointer(); - const ::std::string* other = *value.UnsafeRawStringPointer(); - // If the pointers are the same then do nothing. - if (me != other) { - SetNoArena(default_value, value.GetNoArena()); - } -} - -} // namespace internal -} // namespace protobuf - -} // namespace google #endif // GOOGLE_PROTOBUF_ARENASTRING_H__ diff --git a/third_party/protobuf-lite/google/protobuf/descriptor.h b/third_party/protobuf-lite/google/protobuf/descriptor.h new file mode 100644 index 00000000..5bfecf50 --- /dev/null +++ b/third_party/protobuf-lite/google/protobuf/descriptor.h @@ -0,0 +1,2324 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// This file contains classes which describe a type of protocol message. +// You can use a message's descriptor to learn at runtime what fields +// it contains and what the types of those fields are. The Message +// interface also allows you to dynamically access and modify individual +// fields by passing the FieldDescriptor of the field you are interested +// in. +// +// Most users will not care about descriptors, because they will write +// code specific to certain protocol types and will simply use the classes +// generated by the protocol compiler directly. Advanced users who want +// to operate on arbitrary types (not known at compile time) may want to +// read descriptors in order to learn about the contents of a message. +// A very small number of users will want to construct their own +// Descriptors, either because they are implementing Message manually or +// because they are writing something like the protocol compiler. +// +// For an example of how you might use descriptors, see the code example +// at the top of message.h. + +#ifndef GOOGLE_PROTOBUF_DESCRIPTOR_H__ +#define GOOGLE_PROTOBUF_DESCRIPTOR_H__ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +// TYPE_BOOL is defined in the MacOS's ConditionalMacros.h. +#ifdef TYPE_BOOL +#undef TYPE_BOOL +#endif // TYPE_BOOL + +#ifdef SWIG +#define PROTOBUF_EXPORT +#endif + + +namespace google { +namespace protobuf { + +// Defined in this file. +class Descriptor; +class FieldDescriptor; +class OneofDescriptor; +class EnumDescriptor; +class EnumValueDescriptor; +class ServiceDescriptor; +class MethodDescriptor; +class FileDescriptor; +class DescriptorDatabase; +class DescriptorPool; + +// Defined in descriptor.proto +class DescriptorProto; +class DescriptorProto_ExtensionRange; +class FieldDescriptorProto; +class OneofDescriptorProto; +class EnumDescriptorProto; +class EnumValueDescriptorProto; +class ServiceDescriptorProto; +class MethodDescriptorProto; +class FileDescriptorProto; +class MessageOptions; +class FieldOptions; +class OneofOptions; +class EnumOptions; +class EnumValueOptions; +class ExtensionRangeOptions; +class ServiceOptions; +class MethodOptions; +class FileOptions; +class UninterpretedOption; +class SourceCodeInfo; + +// Defined in message.h +class Message; +class Reflection; + +// Defined in descriptor.cc +class DescriptorBuilder; +class FileDescriptorTables; +struct Symbol; + +// Defined in unknown_field_set.h. +class UnknownField; + +// Defined in command_line_interface.cc +namespace compiler { +class CommandLineInterface; +namespace cpp { +// Defined in helpers.h +class Formatter; +} // namespace cpp +} // namespace compiler + +namespace descriptor_unittest { +class DescriptorTest; +} // namespace descriptor_unittest + +// Defined in printer.h +namespace io { +class Printer; +} // namespace io + +// NB, all indices are zero-based. +struct SourceLocation { + int start_line; + int end_line; + int start_column; + int end_column; + + // Doc comments found at the source location. + // See the comments in SourceCodeInfo.Location (descriptor.proto) for details. + std::string leading_comments; + std::string trailing_comments; + std::vector leading_detached_comments; +}; + +// Options when generating machine-parsable output from a descriptor with +// DebugString(). +struct DebugStringOptions { + // include original user comments as recorded in SourceLocation entries. N.B. + // that this must be |false| by default: several other pieces of code (for + // example, the C++ code generation for fields in the proto compiler) rely on + // DebugString() output being unobstructed by user comments. + bool include_comments; + // If true, elide the braced body in the debug string. + bool elide_group_body; + bool elide_oneof_body; + + DebugStringOptions() + : include_comments(false), + elide_group_body(false), + elide_oneof_body(false) { + } +}; + +// A class to handle the simplest cases of a lazily linked descriptor +// for a message type that isn't built at the time of cross linking, +// which is needed when a pool has lazily_build_dependencies_ set. +// Must be instantiated as mutable in a descriptor. +namespace internal { +class PROTOBUF_EXPORT LazyDescriptor { + public: + // Init function to be called at init time of a descriptor containing + // a LazyDescriptor. + void Init() { + descriptor_ = nullptr; + name_ = nullptr; + once_ = nullptr; + file_ = nullptr; + } + + // Sets the value of the descriptor if it is known during the descriptor + // building process. Not thread safe, should only be called during the + // descriptor build process. Should not be called after SetLazy has been + // called. + void Set(const Descriptor* descriptor); + + // Sets the information needed to lazily cross link the descriptor at a later + // time, SetLazy is not thread safe, should be called only once at descriptor + // build time if the symbol wasn't found and building of the file containing + // that type is delayed because lazily_build_dependencies_ is set on the pool. + // Should not be called after Set() has been called. + void SetLazy(StringPiece name, const FileDescriptor* file); + + // Returns the current value of the descriptor, thread-safe. If SetLazy(...) + // has been called, will do a one-time cross link of the type specified, + // building the descriptor file that contains the type if necessary. + inline const Descriptor* Get() { + Once(); + return descriptor_; + } + + private: + static void OnceStatic(LazyDescriptor* lazy); + void OnceInternal(); + void Once(); + + const Descriptor* descriptor_; + const std::string* name_; + internal::once_flag* once_; + const FileDescriptor* file_; +}; +} // namespace internal + +// Describes a type of protocol message, or a particular group within a +// message. To obtain the Descriptor for a given message object, call +// Message::GetDescriptor(). Generated message classes also have a +// static method called descriptor() which returns the type's descriptor. +// Use DescriptorPool to construct your own descriptors. +class PROTOBUF_EXPORT Descriptor { + public: + typedef DescriptorProto Proto; + + // The name of the message type, not including its scope. + const std::string& name() const; + + // The fully-qualified name of the message type, scope delimited by + // periods. For example, message type "Foo" which is declared in package + // "bar" has full name "bar.Foo". If a type "Baz" is nested within + // Foo, Baz's full_name is "bar.Foo.Baz". To get only the part that + // comes after the last '.', use name(). + const std::string& full_name() const; + + // Index of this descriptor within the file or containing type's message + // type array. + int index() const; + + // The .proto file in which this message type was defined. Never nullptr. + const FileDescriptor* file() const; + + // If this Descriptor describes a nested type, this returns the type + // in which it is nested. Otherwise, returns nullptr. + const Descriptor* containing_type() const; + + // Get options for this message type. These are specified in the .proto file + // by placing lines like "option foo = 1234;" in the message definition. + // Allowed options are defined by MessageOptions in descriptor.proto, and any + // available extensions of that message. + const MessageOptions& options() const; + + // Write the contents of this Descriptor into the given DescriptorProto. + // The target DescriptorProto must be clear before calling this; if it + // isn't, the result may be garbage. + void CopyTo(DescriptorProto* proto) const; + + // Write the contents of this descriptor in a human-readable form. Output + // will be suitable for re-parsing. + std::string DebugString() const; + + // Similar to DebugString(), but additionally takes options (e.g., + // include original user comments in output). + std::string DebugStringWithOptions(const DebugStringOptions& options) const; + + // Returns true if this is a placeholder for an unknown type. This will + // only be the case if this descriptor comes from a DescriptorPool + // with AllowUnknownDependencies() set. + bool is_placeholder() const; + + enum WellKnownType { + WELLKNOWNTYPE_UNSPECIFIED, // Not a well-known type. + + // Wrapper types. + WELLKNOWNTYPE_DOUBLEVALUE, // google.protobuf.DoubleValue + WELLKNOWNTYPE_FLOATVALUE, // google.protobuf.FloatValue + WELLKNOWNTYPE_INT64VALUE, // google.protobuf.Int64Value + WELLKNOWNTYPE_UINT64VALUE, // google.protobuf.UInt64Value + WELLKNOWNTYPE_INT32VALUE, // google.protobuf.Int32Value + WELLKNOWNTYPE_UINT32VALUE, // google.protobuf.UInt32Value + WELLKNOWNTYPE_STRINGVALUE, // google.protobuf.StringValue + WELLKNOWNTYPE_BYTESVALUE, // google.protobuf.BytesValue + WELLKNOWNTYPE_BOOLVALUE, // google.protobuf.BoolValue + + // Other well known types. + WELLKNOWNTYPE_ANY, // google.protobuf.Any + WELLKNOWNTYPE_FIELDMASK, // google.protobuf.FieldMask + WELLKNOWNTYPE_DURATION, // google.protobuf.Duration + WELLKNOWNTYPE_TIMESTAMP, // google.protobuf.Timestamp + WELLKNOWNTYPE_VALUE, // google.protobuf.Value + WELLKNOWNTYPE_LISTVALUE, // google.protobuf.ListValue + WELLKNOWNTYPE_STRUCT, // google.protobuf.Struct + + // New well-known types may be added in the future. + // Please make sure any switch() statements have a 'default' case. + __WELLKNOWNTYPE__DO_NOT_USE__ADD_DEFAULT_INSTEAD__, + }; + + WellKnownType well_known_type() const; + + // Field stuff ----------------------------------------------------- + + // The number of fields in this message type. + int field_count() const; + // Gets a field by index, where 0 <= index < field_count(). + // These are returned in the order they were defined in the .proto file. + const FieldDescriptor* field(int index) const; + + // Looks up a field by declared tag number. Returns nullptr if no such field + // exists. + const FieldDescriptor* FindFieldByNumber(int number) const; + // Looks up a field by name. Returns nullptr if no such field exists. + const FieldDescriptor* FindFieldByName(ConstStringParam name) const; + + // Looks up a field by lowercased name (as returned by lowercase_name()). + // This lookup may be ambiguous if multiple field names differ only by case, + // in which case the field returned is chosen arbitrarily from the matches. + const FieldDescriptor* FindFieldByLowercaseName( + ConstStringParam lowercase_name) const; + + // Looks up a field by camel-case name (as returned by camelcase_name()). + // This lookup may be ambiguous if multiple field names differ in a way that + // leads them to have identical camel-case names, in which case the field + // returned is chosen arbitrarily from the matches. + const FieldDescriptor* FindFieldByCamelcaseName( + ConstStringParam camelcase_name) const; + + // The number of oneofs in this message type. + int oneof_decl_count() const; + // The number of oneofs in this message type, excluding synthetic oneofs. + // Real oneofs always come first, so iterating up to real_oneof_decl_cout() + // will yield all real oneofs. + int real_oneof_decl_count() const; + // Get a oneof by index, where 0 <= index < oneof_decl_count(). + // These are returned in the order they were defined in the .proto file. + const OneofDescriptor* oneof_decl(int index) const; + + // Looks up a oneof by name. Returns nullptr if no such oneof exists. + const OneofDescriptor* FindOneofByName(ConstStringParam name) const; + + // Nested type stuff ----------------------------------------------- + + // The number of nested types in this message type. + int nested_type_count() const; + // Gets a nested type by index, where 0 <= index < nested_type_count(). + // These are returned in the order they were defined in the .proto file. + const Descriptor* nested_type(int index) const; + + // Looks up a nested type by name. Returns nullptr if no such nested type + // exists. + const Descriptor* FindNestedTypeByName(ConstStringParam name) const; + + // Enum stuff ------------------------------------------------------ + + // The number of enum types in this message type. + int enum_type_count() const; + // Gets an enum type by index, where 0 <= index < enum_type_count(). + // These are returned in the order they were defined in the .proto file. + const EnumDescriptor* enum_type(int index) const; + + // Looks up an enum type by name. Returns nullptr if no such enum type + // exists. + const EnumDescriptor* FindEnumTypeByName(ConstStringParam name) const; + + // Looks up an enum value by name, among all enum types in this message. + // Returns nullptr if no such value exists. + const EnumValueDescriptor* FindEnumValueByName(ConstStringParam name) const; + + // Extensions ------------------------------------------------------ + + // A range of field numbers which are designated for third-party + // extensions. + struct ExtensionRange { + typedef DescriptorProto_ExtensionRange Proto; + + typedef ExtensionRangeOptions OptionsType; + + // See Descriptor::CopyTo(). + void CopyTo(DescriptorProto_ExtensionRange* proto) const; + + int start; // inclusive + int end; // exclusive + + const ExtensionRangeOptions* options_; + }; + + // The number of extension ranges in this message type. + int extension_range_count() const; + // Gets an extension range by index, where 0 <= index < + // extension_range_count(). These are returned in the order they were defined + // in the .proto file. + const ExtensionRange* extension_range(int index) const; + + // Returns true if the number is in one of the extension ranges. + bool IsExtensionNumber(int number) const; + + // Returns nullptr if no extension range contains the given number. + const ExtensionRange* FindExtensionRangeContainingNumber(int number) const; + + // The number of extensions defined nested within this message type's scope. + // See doc: + // https://developers.google.com/protocol-buffers/docs/proto#nested-extensions + // + // Note that the extensions may be extending *other* messages. + // + // For example: + // message M1 { + // extensions 1 to max; + // } + // + // message M2 { + // extend M1 { + // optional int32 foo = 1; + // } + // } + // + // In this case, + // DescriptorPool::generated_pool() + // ->FindMessageTypeByName("M2") + // ->extension(0) + // will return "foo", even though "foo" is an extension of M1. + // To find all known extensions of a given message, instead use + // DescriptorPool::FindAllExtensions. + int extension_count() const; + // Get an extension by index, where 0 <= index < extension_count(). + // These are returned in the order they were defined in the .proto file. + const FieldDescriptor* extension(int index) const; + + // Looks up a named extension (which extends some *other* message type) + // defined within this message type's scope. + const FieldDescriptor* FindExtensionByName(ConstStringParam name) const; + + // Similar to FindFieldByLowercaseName(), but finds extensions defined within + // this message type's scope. + const FieldDescriptor* FindExtensionByLowercaseName( + ConstStringParam name) const; + + // Similar to FindFieldByCamelcaseName(), but finds extensions defined within + // this message type's scope. + const FieldDescriptor* FindExtensionByCamelcaseName( + ConstStringParam name) const; + + // Reserved fields ------------------------------------------------- + + // A range of reserved field numbers. + struct ReservedRange { + int start; // inclusive + int end; // exclusive + }; + + // The number of reserved ranges in this message type. + int reserved_range_count() const; + // Gets an reserved range by index, where 0 <= index < + // reserved_range_count(). These are returned in the order they were defined + // in the .proto file. + const ReservedRange* reserved_range(int index) const; + + // Returns true if the number is in one of the reserved ranges. + bool IsReservedNumber(int number) const; + + // Returns nullptr if no reserved range contains the given number. + const ReservedRange* FindReservedRangeContainingNumber(int number) const; + + // The number of reserved field names in this message type. + int reserved_name_count() const; + + // Gets a reserved name by index, where 0 <= index < reserved_name_count(). + const std::string& reserved_name(int index) const; + + // Returns true if the field name is reserved. + bool IsReservedName(ConstStringParam name) const; + + // Source Location --------------------------------------------------- + + // Updates |*out_location| to the source location of the complete + // extent of this message declaration. Returns false and leaves + // |*out_location| unchanged iff location information was not available. + bool GetSourceLocation(SourceLocation* out_location) const; + + // Maps -------------------------------------------------------------- + + // Returns the FieldDescriptor for the "key" field. If this isn't a map entry + // field, returns nullptr. + const FieldDescriptor* map_key() const; + + // Returns the FieldDescriptor for the "value" field. If this isn't a map + // entry field, returns nullptr. + const FieldDescriptor* map_value() const; + + private: + typedef MessageOptions OptionsType; + + // Allows tests to test CopyTo(proto, true). + friend class descriptor_unittest::DescriptorTest; + + // Allows access to GetLocationPath for annotations. + friend class io::Printer; + friend class compiler::cpp::Formatter; + + // Fill the json_name field of FieldDescriptorProto. + void CopyJsonNameTo(DescriptorProto* proto) const; + + // Internal version of DebugString; controls the level of indenting for + // correct depth. Takes |options| to control debug-string options, and + // |include_opening_clause| to indicate whether the "message ... " part of the + // clause has already been generated (this varies depending on context). + void DebugString(int depth, std::string* contents, + const DebugStringOptions& options, + bool include_opening_clause) const; + + // Walks up the descriptor tree to generate the source location path + // to this descriptor from the file root. + void GetLocationPath(std::vector* output) const; + + const std::string* name_; + const std::string* full_name_; + const FileDescriptor* file_; + const Descriptor* containing_type_; + const MessageOptions* options_; + + // These arrays are separated from their sizes to minimize padding on 64-bit. + FieldDescriptor* fields_; + OneofDescriptor* oneof_decls_; + Descriptor* nested_types_; + EnumDescriptor* enum_types_; + ExtensionRange* extension_ranges_; + FieldDescriptor* extensions_; + ReservedRange* reserved_ranges_; + const std::string** reserved_names_; + + int field_count_; + int oneof_decl_count_; + int real_oneof_decl_count_; + int nested_type_count_; + int enum_type_count_; + int extension_range_count_; + int extension_count_; + int reserved_range_count_; + int reserved_name_count_; + + // True if this is a placeholder for an unknown type. + bool is_placeholder_; + // True if this is a placeholder and the type name wasn't fully-qualified. + bool is_unqualified_placeholder_; + // Well known type. Stored as char to conserve space. + char well_known_type_; + + // IMPORTANT: If you add a new field, make sure to search for all instances + // of Allocate() and AllocateArray() in descriptor.cc + // and update them to initialize the field. + + // Must be constructed using DescriptorPool. + Descriptor() {} + friend class DescriptorBuilder; + friend class DescriptorPool; + friend class EnumDescriptor; + friend class FieldDescriptor; + friend class OneofDescriptor; + friend class MethodDescriptor; + friend class FileDescriptor; + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Descriptor); +}; + + +// Describes a single field of a message. To get the descriptor for a given +// field, first get the Descriptor for the message in which it is defined, +// then call Descriptor::FindFieldByName(). To get a FieldDescriptor for +// an extension, do one of the following: +// - Get the Descriptor or FileDescriptor for its containing scope, then +// call Descriptor::FindExtensionByName() or +// FileDescriptor::FindExtensionByName(). +// - Given a DescriptorPool, call DescriptorPool::FindExtensionByNumber() or +// DescriptorPool::FindExtensionByPrintableName(). +// Use DescriptorPool to construct your own descriptors. +class PROTOBUF_EXPORT FieldDescriptor { + public: + typedef FieldDescriptorProto Proto; + + // Identifies a field type. 0 is reserved for errors. The order is weird + // for historical reasons. Types 12 and up are new in proto2. + enum Type { + TYPE_DOUBLE = 1, // double, exactly eight bytes on the wire. + TYPE_FLOAT = 2, // float, exactly four bytes on the wire. + TYPE_INT64 = 3, // int64, varint on the wire. Negative numbers + // take 10 bytes. Use TYPE_SINT64 if negative + // values are likely. + TYPE_UINT64 = 4, // uint64, varint on the wire. + TYPE_INT32 = 5, // int32, varint on the wire. Negative numbers + // take 10 bytes. Use TYPE_SINT32 if negative + // values are likely. + TYPE_FIXED64 = 6, // uint64, exactly eight bytes on the wire. + TYPE_FIXED32 = 7, // uint32, exactly four bytes on the wire. + TYPE_BOOL = 8, // bool, varint on the wire. + TYPE_STRING = 9, // UTF-8 text. + TYPE_GROUP = 10, // Tag-delimited message. Deprecated. + TYPE_MESSAGE = 11, // Length-delimited message. + + TYPE_BYTES = 12, // Arbitrary byte array. + TYPE_UINT32 = 13, // uint32, varint on the wire + TYPE_ENUM = 14, // Enum, varint on the wire + TYPE_SFIXED32 = 15, // int32, exactly four bytes on the wire + TYPE_SFIXED64 = 16, // int64, exactly eight bytes on the wire + TYPE_SINT32 = 17, // int32, ZigZag-encoded varint on the wire + TYPE_SINT64 = 18, // int64, ZigZag-encoded varint on the wire + + MAX_TYPE = 18, // Constant useful for defining lookup tables + // indexed by Type. + }; + + // Specifies the C++ data type used to represent the field. There is a + // fixed mapping from Type to CppType where each Type maps to exactly one + // CppType. 0 is reserved for errors. + enum CppType { + CPPTYPE_INT32 = 1, // TYPE_INT32, TYPE_SINT32, TYPE_SFIXED32 + CPPTYPE_INT64 = 2, // TYPE_INT64, TYPE_SINT64, TYPE_SFIXED64 + CPPTYPE_UINT32 = 3, // TYPE_UINT32, TYPE_FIXED32 + CPPTYPE_UINT64 = 4, // TYPE_UINT64, TYPE_FIXED64 + CPPTYPE_DOUBLE = 5, // TYPE_DOUBLE + CPPTYPE_FLOAT = 6, // TYPE_FLOAT + CPPTYPE_BOOL = 7, // TYPE_BOOL + CPPTYPE_ENUM = 8, // TYPE_ENUM + CPPTYPE_STRING = 9, // TYPE_STRING, TYPE_BYTES + CPPTYPE_MESSAGE = 10, // TYPE_MESSAGE, TYPE_GROUP + + MAX_CPPTYPE = 10, // Constant useful for defining lookup tables + // indexed by CppType. + }; + + // Identifies whether the field is optional, required, or repeated. 0 is + // reserved for errors. + enum Label { + LABEL_OPTIONAL = 1, // optional + LABEL_REQUIRED = 2, // required + LABEL_REPEATED = 3, // repeated + + MAX_LABEL = 3, // Constant useful for defining lookup tables + // indexed by Label. + }; + + // Valid field numbers are positive integers up to kMaxNumber. + static const int kMaxNumber = (1 << 29) - 1; + + // First field number reserved for the protocol buffer library implementation. + // Users may not declare fields that use reserved numbers. + static const int kFirstReservedNumber = 19000; + // Last field number reserved for the protocol buffer library implementation. + // Users may not declare fields that use reserved numbers. + static const int kLastReservedNumber = 19999; + + const std::string& name() const; // Name of this field within the message. + const std::string& full_name() const; // Fully-qualified name of the field. + const std::string& json_name() const; // JSON name of this field. + const FileDescriptor* file() const; // File in which this field was defined. + bool is_extension() const; // Is this an extension field? + int number() const; // Declared tag number. + + // Same as name() except converted to lower-case. This (and especially the + // FindFieldByLowercaseName() method) can be useful when parsing formats + // which prefer to use lowercase naming style. (Although, technically + // field names should be lowercased anyway according to the protobuf style + // guide, so this only makes a difference when dealing with old .proto files + // which do not follow the guide.) + const std::string& lowercase_name() const; + + // Same as name() except converted to camel-case. In this conversion, any + // time an underscore appears in the name, it is removed and the next + // letter is capitalized. Furthermore, the first letter of the name is + // lower-cased. Examples: + // FooBar -> fooBar + // foo_bar -> fooBar + // fooBar -> fooBar + // This (and especially the FindFieldByCamelcaseName() method) can be useful + // when parsing formats which prefer to use camel-case naming style. + const std::string& camelcase_name() const; + + Type type() const; // Declared type of this field. + const char* type_name() const; // Name of the declared type. + CppType cpp_type() const; // C++ type of this field. + const char* cpp_type_name() const; // Name of the C++ type. + Label label() const; // optional/required/repeated + + bool is_required() const; // shorthand for label() == LABEL_REQUIRED + bool is_optional() const; // shorthand for label() == LABEL_OPTIONAL + bool is_repeated() const; // shorthand for label() == LABEL_REPEATED + bool is_packable() const; // shorthand for is_repeated() && + // IsTypePackable(type()) + bool is_packed() const; // shorthand for is_packable() && + // options().packed() + bool is_map() const; // shorthand for type() == TYPE_MESSAGE && + // message_type()->options().map_entry() + + // Returns true if this field was syntactically written with "optional" in the + // .proto file. Excludes singular proto3 fields that do not have a label. + bool has_optional_keyword() const; + + // Returns true if this field tracks presence, ie. does the field + // distinguish between "unset" and "present with default value." + // This includes required, optional, and oneof fields. It excludes maps, + // repeated fields, and singular proto3 fields without "optional". + // + // For fields where has_presence() == true, the return value of + // Reflection::HasField() is semantically meaningful. + bool has_presence() const; + + // Index of this field within the message's field array, or the file or + // extension scope's extensions array. + int index() const; + + // Does this field have an explicitly-declared default value? + bool has_default_value() const; + + // Whether the user has specified the json_name field option in the .proto + // file. + bool has_json_name() const; + + // Get the field default value if cpp_type() == CPPTYPE_INT32. If no + // explicit default was defined, the default is 0. + int32 default_value_int32() const; + // Get the field default value if cpp_type() == CPPTYPE_INT64. If no + // explicit default was defined, the default is 0. + int64 default_value_int64() const; + // Get the field default value if cpp_type() == CPPTYPE_UINT32. If no + // explicit default was defined, the default is 0. + uint32 default_value_uint32() const; + // Get the field default value if cpp_type() == CPPTYPE_UINT64. If no + // explicit default was defined, the default is 0. + uint64 default_value_uint64() const; + // Get the field default value if cpp_type() == CPPTYPE_FLOAT. If no + // explicit default was defined, the default is 0.0. + float default_value_float() const; + // Get the field default value if cpp_type() == CPPTYPE_DOUBLE. If no + // explicit default was defined, the default is 0.0. + double default_value_double() const; + // Get the field default value if cpp_type() == CPPTYPE_BOOL. If no + // explicit default was defined, the default is false. + bool default_value_bool() const; + // Get the field default value if cpp_type() == CPPTYPE_ENUM. If no + // explicit default was defined, the default is the first value defined + // in the enum type (all enum types are required to have at least one value). + // This never returns nullptr. + const EnumValueDescriptor* default_value_enum() const; + // Get the field default value if cpp_type() == CPPTYPE_STRING. If no + // explicit default was defined, the default is the empty string. + const std::string& default_value_string() const; + + // The Descriptor for the message of which this is a field. For extensions, + // this is the extended type. Never nullptr. + const Descriptor* containing_type() const; + + // If the field is a member of a oneof, this is the one, otherwise this is + // nullptr. + const OneofDescriptor* containing_oneof() const; + + // If the field is a member of a non-synthetic oneof, returns the descriptor + // for the oneof, otherwise returns nullptr. + const OneofDescriptor* real_containing_oneof() const; + + // If the field is a member of a oneof, returns the index in that oneof. + int index_in_oneof() const; + + // An extension may be declared within the scope of another message. If this + // field is an extension (is_extension() is true), then extension_scope() + // returns that message, or nullptr if the extension was declared at global + // scope. If this is not an extension, extension_scope() is undefined (may + // assert-fail). + const Descriptor* extension_scope() const; + + // If type is TYPE_MESSAGE or TYPE_GROUP, returns a descriptor for the + // message or the group type. Otherwise, returns null. + const Descriptor* message_type() const; + // If type is TYPE_ENUM, returns a descriptor for the enum. Otherwise, + // returns null. + const EnumDescriptor* enum_type() const; + + // Get the FieldOptions for this field. This includes things listed in + // square brackets after the field definition. E.g., the field: + // optional string text = 1 [ctype=CORD]; + // has the "ctype" option set. Allowed options are defined by FieldOptions in + // descriptor.proto, and any available extensions of that message. + const FieldOptions& options() const; + + // See Descriptor::CopyTo(). + void CopyTo(FieldDescriptorProto* proto) const; + + // See Descriptor::DebugString(). + std::string DebugString() const; + + // See Descriptor::DebugStringWithOptions(). + std::string DebugStringWithOptions(const DebugStringOptions& options) const; + + // Helper method to get the CppType for a particular Type. + static CppType TypeToCppType(Type type); + + // Helper method to get the name of a Type. + static const char* TypeName(Type type); + + // Helper method to get the name of a CppType. + static const char* CppTypeName(CppType cpp_type); + + // Return true iff [packed = true] is valid for fields of this type. + static inline bool IsTypePackable(Type field_type); + + // Returns full_name() except if the field is a MessageSet extension, + // in which case it returns the full_name() of the containing message type + // for backwards compatibility with proto1. + // + // A MessageSet extension is defined as an optional message extension + // whose containing type has the message_set_wire_format option set. + // This should be true of extensions of google.protobuf.bridge.MessageSet; + // by convention, such extensions are named "message_set_extension". + // + // The opposite operation (looking up an extension's FieldDescriptor given + // its printable name) can be accomplished with + // message->file()->pool()->FindExtensionByPrintableName(message, name) + // where the extension extends "message". + const std::string& PrintableNameForExtension() const; + + // Source Location --------------------------------------------------- + + // Updates |*out_location| to the source location of the complete + // extent of this field declaration. Returns false and leaves + // |*out_location| unchanged iff location information was not available. + bool GetSourceLocation(SourceLocation* out_location) const; + + private: + typedef FieldOptions OptionsType; + + // Allows access to GetLocationPath for annotations. + friend class io::Printer; + friend class compiler::cpp::Formatter; + friend class Reflection; + + // Fill the json_name field of FieldDescriptorProto. + void CopyJsonNameTo(FieldDescriptorProto* proto) const; + + // See Descriptor::DebugString(). + void DebugString(int depth, std::string* contents, + const DebugStringOptions& options) const; + + // formats the default value appropriately and returns it as a string. + // Must have a default value to call this. If quote_string_type is true, then + // types of CPPTYPE_STRING whill be surrounded by quotes and CEscaped. + std::string DefaultValueAsString(bool quote_string_type) const; + + // Helper function that returns the field type name for DebugString. + std::string FieldTypeNameDebugString() const; + + // Walks up the descriptor tree to generate the source location path + // to this descriptor from the file root. + void GetLocationPath(std::vector* output) const; + + // Returns true if this is a map message type. + bool is_map_message_type() const; + + const std::string* name_; + const std::string* full_name_; + const std::string* lowercase_name_; + const std::string* camelcase_name_; + // If has_json_name_ is true, it's the value specified by the user. + // Otherwise, it has the same value as camelcase_name_. + const std::string* json_name_; + const FileDescriptor* file_; + internal::once_flag* type_once_; + static void TypeOnceInit(const FieldDescriptor* to_init); + void InternalTypeOnceInit() const; + mutable Type type_; + Label label_; + bool has_default_value_; + bool proto3_optional_; + // Whether the user has specified the json_name field option in the .proto + // file. + bool has_json_name_; + bool is_extension_; + int number_; + int index_in_oneof_; + const Descriptor* containing_type_; + const OneofDescriptor* containing_oneof_; + const Descriptor* extension_scope_; + mutable const Descriptor* message_type_; + mutable const EnumDescriptor* enum_type_; + const FieldOptions* options_; + const std::string* type_name_; + const std::string* default_value_enum_name_; + // IMPORTANT: If you add a new field, make sure to search for all instances + // of Allocate() and AllocateArray() in + // descriptor.cc and update them to initialize the field. + + union { + int32 default_value_int32_; + int64 default_value_int64_; + uint32 default_value_uint32_; + uint64 default_value_uint64_; + float default_value_float_; + double default_value_double_; + bool default_value_bool_; + + mutable const EnumValueDescriptor* default_value_enum_; + const std::string* default_value_string_; + mutable std::atomic default_generated_instance_; + }; + + static const CppType kTypeToCppTypeMap[MAX_TYPE + 1]; + + static const char* const kTypeToName[MAX_TYPE + 1]; + + static const char* const kCppTypeToName[MAX_CPPTYPE + 1]; + + static const char* const kLabelToName[MAX_LABEL + 1]; + + // Must be constructed using DescriptorPool. + FieldDescriptor() {} + friend class DescriptorBuilder; + friend class FileDescriptor; + friend class Descriptor; + friend class OneofDescriptor; + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FieldDescriptor); +}; + + +// Describes a oneof defined in a message type. +class PROTOBUF_EXPORT OneofDescriptor { + public: + typedef OneofDescriptorProto Proto; + + const std::string& name() const; // Name of this oneof. + const std::string& full_name() const; // Fully-qualified name of the oneof. + + // Index of this oneof within the message's oneof array. + int index() const; + + // Returns whether this oneof was inserted by the compiler to wrap a proto3 + // optional field. If this returns true, code generators should *not* emit it. + bool is_synthetic() const; + + // The .proto file in which this oneof was defined. Never nullptr. + const FileDescriptor* file() const; + // The Descriptor for the message containing this oneof. + const Descriptor* containing_type() const; + + // The number of (non-extension) fields which are members of this oneof. + int field_count() const; + // Get a member of this oneof, in the order in which they were declared in the + // .proto file. Does not include extensions. + const FieldDescriptor* field(int index) const; + + const OneofOptions& options() const; + + // See Descriptor::CopyTo(). + void CopyTo(OneofDescriptorProto* proto) const; + + // See Descriptor::DebugString(). + std::string DebugString() const; + + // See Descriptor::DebugStringWithOptions(). + std::string DebugStringWithOptions(const DebugStringOptions& options) const; + + // Source Location --------------------------------------------------- + + // Updates |*out_location| to the source location of the complete + // extent of this oneof declaration. Returns false and leaves + // |*out_location| unchanged iff location information was not available. + bool GetSourceLocation(SourceLocation* out_location) const; + + private: + typedef OneofOptions OptionsType; + + // Allows access to GetLocationPath for annotations. + friend class io::Printer; + friend class compiler::cpp::Formatter; + + // See Descriptor::DebugString(). + void DebugString(int depth, std::string* contents, + const DebugStringOptions& options) const; + + // Walks up the descriptor tree to generate the source location path + // to this descriptor from the file root. + void GetLocationPath(std::vector* output) const; + + const std::string* name_; + const std::string* full_name_; + const Descriptor* containing_type_; + int field_count_; + const FieldDescriptor** fields_; + const OneofOptions* options_; + + // IMPORTANT: If you add a new field, make sure to search for all instances + // of Allocate() and AllocateArray() + // in descriptor.cc and update them to initialize the field. + + // Must be constructed using DescriptorPool. + OneofDescriptor() {} + friend class DescriptorBuilder; + friend class Descriptor; + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(OneofDescriptor); +}; + +// Describes an enum type defined in a .proto file. To get the EnumDescriptor +// for a generated enum type, call TypeName_descriptor(). Use DescriptorPool +// to construct your own descriptors. +class PROTOBUF_EXPORT EnumDescriptor { + public: + typedef EnumDescriptorProto Proto; + + // The name of this enum type in the containing scope. + const std::string& name() const; + + // The fully-qualified name of the enum type, scope delimited by periods. + const std::string& full_name() const; + + // Index of this enum within the file or containing message's enum array. + int index() const; + + // The .proto file in which this enum type was defined. Never nullptr. + const FileDescriptor* file() const; + + // The number of values for this EnumDescriptor. Guaranteed to be greater + // than zero. + int value_count() const; + // Gets a value by index, where 0 <= index < value_count(). + // These are returned in the order they were defined in the .proto file. + const EnumValueDescriptor* value(int index) const; + + // Looks up a value by name. Returns nullptr if no such value exists. + const EnumValueDescriptor* FindValueByName(ConstStringParam name) const; + // Looks up a value by number. Returns nullptr if no such value exists. If + // multiple values have this number, the first one defined is returned. + const EnumValueDescriptor* FindValueByNumber(int number) const; + + // If this enum type is nested in a message type, this is that message type. + // Otherwise, nullptr. + const Descriptor* containing_type() const; + + // Get options for this enum type. These are specified in the .proto file by + // placing lines like "option foo = 1234;" in the enum definition. Allowed + // options are defined by EnumOptions in descriptor.proto, and any available + // extensions of that message. + const EnumOptions& options() const; + + // See Descriptor::CopyTo(). + void CopyTo(EnumDescriptorProto* proto) const; + + // See Descriptor::DebugString(). + std::string DebugString() const; + + // See Descriptor::DebugStringWithOptions(). + std::string DebugStringWithOptions(const DebugStringOptions& options) const; + + // Returns true if this is a placeholder for an unknown enum. This will + // only be the case if this descriptor comes from a DescriptorPool + // with AllowUnknownDependencies() set. + bool is_placeholder() const; + + // Reserved fields ------------------------------------------------- + + // A range of reserved field numbers. + struct ReservedRange { + int start; // inclusive + int end; // inclusive + }; + + // The number of reserved ranges in this message type. + int reserved_range_count() const; + // Gets an reserved range by index, where 0 <= index < + // reserved_range_count(). These are returned in the order they were defined + // in the .proto file. + const EnumDescriptor::ReservedRange* reserved_range(int index) const; + + // Returns true if the number is in one of the reserved ranges. + bool IsReservedNumber(int number) const; + + // Returns nullptr if no reserved range contains the given number. + const EnumDescriptor::ReservedRange* FindReservedRangeContainingNumber( + int number) const; + + // The number of reserved field names in this message type. + int reserved_name_count() const; + + // Gets a reserved name by index, where 0 <= index < reserved_name_count(). + const std::string& reserved_name(int index) const; + + // Returns true if the field name is reserved. + bool IsReservedName(ConstStringParam name) const; + + // Source Location --------------------------------------------------- + + // Updates |*out_location| to the source location of the complete + // extent of this enum declaration. Returns false and leaves + // |*out_location| unchanged iff location information was not available. + bool GetSourceLocation(SourceLocation* out_location) const; + + private: + typedef EnumOptions OptionsType; + + // Allows access to GetLocationPath for annotations. + friend class io::Printer; + friend class compiler::cpp::Formatter; + + // Looks up a value by number. If the value does not exist, dynamically + // creates a new EnumValueDescriptor for that value, assuming that it was + // unknown. If a new descriptor is created, this is done in a thread-safe way, + // and future calls will return the same value descriptor pointer. + // + // This is private but is used by Reflection (which is friended below) to + // return a valid EnumValueDescriptor from GetEnum() when this feature is + // enabled. + const EnumValueDescriptor* FindValueByNumberCreatingIfUnknown( + int number) const; + + // See Descriptor::DebugString(). + void DebugString(int depth, std::string* contents, + const DebugStringOptions& options) const; + + // Walks up the descriptor tree to generate the source location path + // to this descriptor from the file root. + void GetLocationPath(std::vector* output) const; + + const std::string* name_; + const std::string* full_name_; + const FileDescriptor* file_; + const Descriptor* containing_type_; + const EnumOptions* options_; + + // True if this is a placeholder for an unknown type. + bool is_placeholder_; + // True if this is a placeholder and the type name wasn't fully-qualified. + bool is_unqualified_placeholder_; + + int value_count_; + EnumValueDescriptor* values_; + + int reserved_range_count_; + int reserved_name_count_; + EnumDescriptor::ReservedRange* reserved_ranges_; + const std::string** reserved_names_; + + // IMPORTANT: If you add a new field, make sure to search for all instances + // of Allocate() and AllocateArray() in + // descriptor.cc and update them to initialize the field. + + // Must be constructed using DescriptorPool. + EnumDescriptor() {} + friend class DescriptorBuilder; + friend class Descriptor; + friend class FieldDescriptor; + friend class EnumValueDescriptor; + friend class FileDescriptor; + friend class DescriptorPool; + friend class Reflection; + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(EnumDescriptor); +}; + +// Describes an individual enum constant of a particular type. To get the +// EnumValueDescriptor for a given enum value, first get the EnumDescriptor +// for its type, then use EnumDescriptor::FindValueByName() or +// EnumDescriptor::FindValueByNumber(). Use DescriptorPool to construct +// your own descriptors. +class PROTOBUF_EXPORT EnumValueDescriptor { + public: + typedef EnumValueDescriptorProto Proto; + + const std::string& name() const; // Name of this enum constant. + int index() const; // Index within the enums's Descriptor. + int number() const; // Numeric value of this enum constant. + + // The full_name of an enum value is a sibling symbol of the enum type. + // e.g. the full name of FieldDescriptorProto::TYPE_INT32 is actually + // "google.protobuf.FieldDescriptorProto.TYPE_INT32", NOT + // "google.protobuf.FieldDescriptorProto.Type.TYPE_INT32". This is to conform + // with C++ scoping rules for enums. + const std::string& full_name() const; + + // The .proto file in which this value was defined. Never nullptr. + const FileDescriptor* file() const; + // The type of this value. Never nullptr. + const EnumDescriptor* type() const; + + // Get options for this enum value. These are specified in the .proto file by + // adding text like "[foo = 1234]" after an enum value definition. Allowed + // options are defined by EnumValueOptions in descriptor.proto, and any + // available extensions of that message. + const EnumValueOptions& options() const; + + // See Descriptor::CopyTo(). + void CopyTo(EnumValueDescriptorProto* proto) const; + + // See Descriptor::DebugString(). + std::string DebugString() const; + + // See Descriptor::DebugStringWithOptions(). + std::string DebugStringWithOptions(const DebugStringOptions& options) const; + + // Source Location --------------------------------------------------- + + // Updates |*out_location| to the source location of the complete + // extent of this enum value declaration. Returns false and leaves + // |*out_location| unchanged iff location information was not available. + bool GetSourceLocation(SourceLocation* out_location) const; + + private: + typedef EnumValueOptions OptionsType; + + // Allows access to GetLocationPath for annotations. + friend class io::Printer; + friend class compiler::cpp::Formatter; + + // See Descriptor::DebugString(). + void DebugString(int depth, std::string* contents, + const DebugStringOptions& options) const; + + // Walks up the descriptor tree to generate the source location path + // to this descriptor from the file root. + void GetLocationPath(std::vector* output) const; + + const std::string* name_; + const std::string* full_name_; + int number_; + const EnumDescriptor* type_; + const EnumValueOptions* options_; + // IMPORTANT: If you add a new field, make sure to search for all instances + // of Allocate() and AllocateArray() + // in descriptor.cc and update them to initialize the field. + + // Must be constructed using DescriptorPool. + EnumValueDescriptor() {} + friend class DescriptorBuilder; + friend class EnumDescriptor; + friend class DescriptorPool; + friend class FileDescriptorTables; + friend class Reflection; + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(EnumValueDescriptor); +}; + +// Describes an RPC service. Use DescriptorPool to construct your own +// descriptors. +class PROTOBUF_EXPORT ServiceDescriptor { + public: + typedef ServiceDescriptorProto Proto; + + // The name of the service, not including its containing scope. + const std::string& name() const; + // The fully-qualified name of the service, scope delimited by periods. + const std::string& full_name() const; + // Index of this service within the file's services array. + int index() const; + + // The .proto file in which this service was defined. Never nullptr. + const FileDescriptor* file() const; + + // Get options for this service type. These are specified in the .proto file + // by placing lines like "option foo = 1234;" in the service definition. + // Allowed options are defined by ServiceOptions in descriptor.proto, and any + // available extensions of that message. + const ServiceOptions& options() const; + + // The number of methods this service defines. + int method_count() const; + // Gets a MethodDescriptor by index, where 0 <= index < method_count(). + // These are returned in the order they were defined in the .proto file. + const MethodDescriptor* method(int index) const; + + // Look up a MethodDescriptor by name. + const MethodDescriptor* FindMethodByName(ConstStringParam name) const; + // See Descriptor::CopyTo(). + void CopyTo(ServiceDescriptorProto* proto) const; + + // See Descriptor::DebugString(). + std::string DebugString() const; + + // See Descriptor::DebugStringWithOptions(). + std::string DebugStringWithOptions(const DebugStringOptions& options) const; + + // Source Location --------------------------------------------------- + + // Updates |*out_location| to the source location of the complete + // extent of this service declaration. Returns false and leaves + // |*out_location| unchanged iff location information was not available. + bool GetSourceLocation(SourceLocation* out_location) const; + + private: + typedef ServiceOptions OptionsType; + + // Allows access to GetLocationPath for annotations. + friend class io::Printer; + friend class compiler::cpp::Formatter; + + // See Descriptor::DebugString(). + void DebugString(std::string* contents, + const DebugStringOptions& options) const; + + // Walks up the descriptor tree to generate the source location path + // to this descriptor from the file root. + void GetLocationPath(std::vector* output) const; + + const std::string* name_; + const std::string* full_name_; + const FileDescriptor* file_; + const ServiceOptions* options_; + MethodDescriptor* methods_; + int method_count_; + // IMPORTANT: If you add a new field, make sure to search for all instances + // of Allocate() and AllocateArray() in + // descriptor.cc and update them to initialize the field. + + // Must be constructed using DescriptorPool. + ServiceDescriptor() {} + friend class DescriptorBuilder; + friend class FileDescriptor; + friend class MethodDescriptor; + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ServiceDescriptor); +}; + + +// Describes an individual service method. To obtain a MethodDescriptor given +// a service, first get its ServiceDescriptor, then call +// ServiceDescriptor::FindMethodByName(). Use DescriptorPool to construct your +// own descriptors. +class PROTOBUF_EXPORT MethodDescriptor { + public: + typedef MethodDescriptorProto Proto; + + // Name of this method, not including containing scope. + const std::string& name() const; + // The fully-qualified name of the method, scope delimited by periods. + const std::string& full_name() const; + // Index within the service's Descriptor. + int index() const; + + // The .proto file in which this method was defined. Never nullptr. + const FileDescriptor* file() const; + // Gets the service to which this method belongs. Never nullptr. + const ServiceDescriptor* service() const; + + // Gets the type of protocol message which this method accepts as input. + const Descriptor* input_type() const; + // Gets the type of protocol message which this message produces as output. + const Descriptor* output_type() const; + + // Gets whether the client streams multiple requests. + bool client_streaming() const; + // Gets whether the server streams multiple responses. + bool server_streaming() const; + + // Get options for this method. These are specified in the .proto file by + // placing lines like "option foo = 1234;" in curly-braces after a method + // declaration. Allowed options are defined by MethodOptions in + // descriptor.proto, and any available extensions of that message. + const MethodOptions& options() const; + + // See Descriptor::CopyTo(). + void CopyTo(MethodDescriptorProto* proto) const; + + // See Descriptor::DebugString(). + std::string DebugString() const; + + // See Descriptor::DebugStringWithOptions(). + std::string DebugStringWithOptions(const DebugStringOptions& options) const; + + // Source Location --------------------------------------------------- + + // Updates |*out_location| to the source location of the complete + // extent of this method declaration. Returns false and leaves + // |*out_location| unchanged iff location information was not available. + bool GetSourceLocation(SourceLocation* out_location) const; + + private: + typedef MethodOptions OptionsType; + + // Allows access to GetLocationPath for annotations. + friend class io::Printer; + friend class compiler::cpp::Formatter; + + // See Descriptor::DebugString(). + void DebugString(int depth, std::string* contents, + const DebugStringOptions& options) const; + + // Walks up the descriptor tree to generate the source location path + // to this descriptor from the file root. + void GetLocationPath(std::vector* output) const; + + const std::string* name_; + const std::string* full_name_; + const ServiceDescriptor* service_; + mutable internal::LazyDescriptor input_type_; + mutable internal::LazyDescriptor output_type_; + const MethodOptions* options_; + bool client_streaming_; + bool server_streaming_; + // IMPORTANT: If you add a new field, make sure to search for all instances + // of Allocate() and AllocateArray() in + // descriptor.cc and update them to initialize the field. + + // Must be constructed using DescriptorPool. + MethodDescriptor() {} + friend class DescriptorBuilder; + friend class ServiceDescriptor; + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MethodDescriptor); +}; + + +// Describes a whole .proto file. To get the FileDescriptor for a compiled-in +// file, get the descriptor for something defined in that file and call +// descriptor->file(). Use DescriptorPool to construct your own descriptors. +class PROTOBUF_EXPORT FileDescriptor { + public: + typedef FileDescriptorProto Proto; + + // The filename, relative to the source tree. + // e.g. "foo/bar/baz.proto" + const std::string& name() const; + + // The package, e.g. "google.protobuf.compiler". + const std::string& package() const; + + // The DescriptorPool in which this FileDescriptor and all its contents were + // allocated. Never nullptr. + const DescriptorPool* pool() const; + + // The number of files imported by this one. + int dependency_count() const; + // Gets an imported file by index, where 0 <= index < dependency_count(). + // These are returned in the order they were defined in the .proto file. + const FileDescriptor* dependency(int index) const; + + // The number of files public imported by this one. + // The public dependency list is a subset of the dependency list. + int public_dependency_count() const; + // Gets a public imported file by index, where 0 <= index < + // public_dependency_count(). + // These are returned in the order they were defined in the .proto file. + const FileDescriptor* public_dependency(int index) const; + + // The number of files that are imported for weak fields. + // The weak dependency list is a subset of the dependency list. + int weak_dependency_count() const; + // Gets a weak imported file by index, where 0 <= index < + // weak_dependency_count(). + // These are returned in the order they were defined in the .proto file. + const FileDescriptor* weak_dependency(int index) const; + + // Number of top-level message types defined in this file. (This does not + // include nested types.) + int message_type_count() const; + // Gets a top-level message type, where 0 <= index < message_type_count(). + // These are returned in the order they were defined in the .proto file. + const Descriptor* message_type(int index) const; + + // Number of top-level enum types defined in this file. (This does not + // include nested types.) + int enum_type_count() const; + // Gets a top-level enum type, where 0 <= index < enum_type_count(). + // These are returned in the order they were defined in the .proto file. + const EnumDescriptor* enum_type(int index) const; + + // Number of services defined in this file. + int service_count() const; + // Gets a service, where 0 <= index < service_count(). + // These are returned in the order they were defined in the .proto file. + const ServiceDescriptor* service(int index) const; + + // Number of extensions defined at file scope. (This does not include + // extensions nested within message types.) + int extension_count() const; + // Gets an extension's descriptor, where 0 <= index < extension_count(). + // These are returned in the order they were defined in the .proto file. + const FieldDescriptor* extension(int index) const; + + // Get options for this file. These are specified in the .proto file by + // placing lines like "option foo = 1234;" at the top level, outside of any + // other definitions. Allowed options are defined by FileOptions in + // descriptor.proto, and any available extensions of that message. + const FileOptions& options() const; + + // Syntax of this file. + enum Syntax { + SYNTAX_UNKNOWN = 0, + SYNTAX_PROTO2 = 2, + SYNTAX_PROTO3 = 3, + }; + Syntax syntax() const; + static const char* SyntaxName(Syntax syntax); + + // Find a top-level message type by name. Returns nullptr if not found. + const Descriptor* FindMessageTypeByName(ConstStringParam name) const; + // Find a top-level enum type by name. Returns nullptr if not found. + const EnumDescriptor* FindEnumTypeByName(ConstStringParam name) const; + // Find an enum value defined in any top-level enum by name. Returns nullptr + // if not found. + const EnumValueDescriptor* FindEnumValueByName(ConstStringParam name) const; + // Find a service definition by name. Returns nullptr if not found. + const ServiceDescriptor* FindServiceByName(ConstStringParam name) const; + // Find a top-level extension definition by name. Returns nullptr if not + // found. + const FieldDescriptor* FindExtensionByName(ConstStringParam name) const; + // Similar to FindExtensionByName(), but searches by lowercased-name. See + // Descriptor::FindFieldByLowercaseName(). + const FieldDescriptor* FindExtensionByLowercaseName( + ConstStringParam name) const; + // Similar to FindExtensionByName(), but searches by camelcased-name. See + // Descriptor::FindFieldByCamelcaseName(). + const FieldDescriptor* FindExtensionByCamelcaseName( + ConstStringParam name) const; + + // See Descriptor::CopyTo(). + // Notes: + // - This method does NOT copy source code information since it is relatively + // large and rarely needed. See CopySourceCodeInfoTo() below. + void CopyTo(FileDescriptorProto* proto) const; + // Write the source code information of this FileDescriptor into the given + // FileDescriptorProto. See CopyTo() above. + void CopySourceCodeInfoTo(FileDescriptorProto* proto) const; + // Fill the json_name field of FieldDescriptorProto for all fields. Can only + // be called after CopyTo(). + void CopyJsonNameTo(FileDescriptorProto* proto) const; + + // See Descriptor::DebugString(). + std::string DebugString() const; + + // See Descriptor::DebugStringWithOptions(). + std::string DebugStringWithOptions(const DebugStringOptions& options) const; + + // Returns true if this is a placeholder for an unknown file. This will + // only be the case if this descriptor comes from a DescriptorPool + // with AllowUnknownDependencies() set. + bool is_placeholder() const; + + // Updates |*out_location| to the source location of the complete extent of + // this file declaration (namely, the empty path). + bool GetSourceLocation(SourceLocation* out_location) const; + + // Updates |*out_location| to the source location of the complete + // extent of the declaration or declaration-part denoted by |path|. + // Returns false and leaves |*out_location| unchanged iff location + // information was not available. (See SourceCodeInfo for + // description of path encoding.) + bool GetSourceLocation(const std::vector& path, + SourceLocation* out_location) const; + + private: + typedef FileOptions OptionsType; + + const std::string* name_; + const std::string* package_; + const DescriptorPool* pool_; + internal::once_flag* dependencies_once_; + static void DependenciesOnceInit(const FileDescriptor* to_init); + void InternalDependenciesOnceInit() const; + + // These are arranged to minimize padding on 64-bit. + int dependency_count_; + int public_dependency_count_; + int weak_dependency_count_; + int message_type_count_; + int enum_type_count_; + int service_count_; + int extension_count_; + Syntax syntax_; + bool is_placeholder_; + + // Indicates the FileDescriptor is completed building. Used to verify + // that type accessor functions that can possibly build a dependent file + // aren't called during the process of building the file. + bool finished_building_; + + mutable const FileDescriptor** dependencies_; + const std::string** dependencies_names_; + int* public_dependencies_; + int* weak_dependencies_; + Descriptor* message_types_; + EnumDescriptor* enum_types_; + ServiceDescriptor* services_; + FieldDescriptor* extensions_; + const FileOptions* options_; + + const FileDescriptorTables* tables_; + const SourceCodeInfo* source_code_info_; + + // IMPORTANT: If you add a new field, make sure to search for all instances + // of Allocate() and AllocateArray() in + // descriptor.cc and update them to initialize the field. + + FileDescriptor() {} + friend class DescriptorBuilder; + friend class DescriptorPool; + friend class Descriptor; + friend class FieldDescriptor; + friend class internal::LazyDescriptor; + friend class OneofDescriptor; + friend class EnumDescriptor; + friend class EnumValueDescriptor; + friend class MethodDescriptor; + friend class ServiceDescriptor; + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FileDescriptor); +}; + + +// =================================================================== + +// Used to construct descriptors. +// +// Normally you won't want to build your own descriptors. Message classes +// constructed by the protocol compiler will provide them for you. However, +// if you are implementing Message on your own, or if you are writing a +// program which can operate on totally arbitrary types and needs to load +// them from some sort of database, you might need to. +// +// Since Descriptors are composed of a whole lot of cross-linked bits of +// data that would be a pain to put together manually, the +// DescriptorPool class is provided to make the process easier. It can +// take a FileDescriptorProto (defined in descriptor.proto), validate it, +// and convert it to a set of nicely cross-linked Descriptors. +// +// DescriptorPool also helps with memory management. Descriptors are +// composed of many objects containing static data and pointers to each +// other. In all likelihood, when it comes time to delete this data, +// you'll want to delete it all at once. In fact, it is not uncommon to +// have a whole pool of descriptors all cross-linked with each other which +// you wish to delete all at once. This class represents such a pool, and +// handles the memory management for you. +// +// You can also search for descriptors within a DescriptorPool by name, and +// extensions by number. +class PROTOBUF_EXPORT DescriptorPool { + public: + // Create a normal, empty DescriptorPool. + DescriptorPool(); + + // Constructs a DescriptorPool that, when it can't find something among the + // descriptors already in the pool, looks for it in the given + // DescriptorDatabase. + // Notes: + // - If a DescriptorPool is constructed this way, its BuildFile*() methods + // must not be called (they will assert-fail). The only way to populate + // the pool with descriptors is to call the Find*By*() methods. + // - The Find*By*() methods may block the calling thread if the + // DescriptorDatabase blocks. This in turn means that parsing messages + // may block if they need to look up extensions. + // - The Find*By*() methods will use mutexes for thread-safety, thus making + // them slower even when they don't have to fall back to the database. + // In fact, even the Find*By*() methods of descriptor objects owned by + // this pool will be slower, since they will have to obtain locks too. + // - An ErrorCollector may optionally be given to collect validation errors + // in files loaded from the database. If not given, errors will be printed + // to GOOGLE_LOG(ERROR). Remember that files are built on-demand, so this + // ErrorCollector may be called from any thread that calls one of the + // Find*By*() methods. + // - The DescriptorDatabase must not be mutated during the lifetime of + // the DescriptorPool. Even if the client takes care to avoid data races, + // changes to the content of the DescriptorDatabase may not be reflected + // in subsequent lookups in the DescriptorPool. + class ErrorCollector; + explicit DescriptorPool(DescriptorDatabase* fallback_database, + ErrorCollector* error_collector = nullptr); + + ~DescriptorPool(); + + // Get a pointer to the generated pool. Generated protocol message classes + // which are compiled into the binary will allocate their descriptors in + // this pool. Do not add your own descriptors to this pool. + static const DescriptorPool* generated_pool(); + + + // Find a FileDescriptor in the pool by file name. Returns nullptr if not + // found. + const FileDescriptor* FindFileByName(ConstStringParam name) const; + + // Find the FileDescriptor in the pool which defines the given symbol. + // If any of the Find*ByName() methods below would succeed, then this is + // equivalent to calling that method and calling the result's file() method. + // Otherwise this returns nullptr. + const FileDescriptor* FindFileContainingSymbol( + ConstStringParam symbol_name) const; + + // Looking up descriptors ------------------------------------------ + // These find descriptors by fully-qualified name. These will find both + // top-level descriptors and nested descriptors. They return nullptr if not + // found. + + const Descriptor* FindMessageTypeByName(ConstStringParam name) const; + const FieldDescriptor* FindFieldByName(ConstStringParam name) const; + const FieldDescriptor* FindExtensionByName(ConstStringParam name) const; + const OneofDescriptor* FindOneofByName(ConstStringParam name) const; + const EnumDescriptor* FindEnumTypeByName(ConstStringParam name) const; + const EnumValueDescriptor* FindEnumValueByName(ConstStringParam name) const; + const ServiceDescriptor* FindServiceByName(ConstStringParam name) const; + const MethodDescriptor* FindMethodByName(ConstStringParam name) const; + + // Finds an extension of the given type by number. The extendee must be + // a member of this DescriptorPool or one of its underlays. + const FieldDescriptor* FindExtensionByNumber(const Descriptor* extendee, + int number) const; + + // Finds an extension of the given type by its printable name. + // See comments above PrintableNameForExtension() for the definition of + // "printable name". The extendee must be a member of this DescriptorPool + // or one of its underlays. Returns nullptr if there is no known message + // extension with the given printable name. + const FieldDescriptor* FindExtensionByPrintableName( + const Descriptor* extendee, ConstStringParam printable_name) const; + + // Finds extensions of extendee. The extensions will be appended to + // out in an undefined order. Only extensions defined directly in + // this DescriptorPool or one of its underlays are guaranteed to be + // found: extensions defined in the fallback database might not be found + // depending on the database implementation. + void FindAllExtensions(const Descriptor* extendee, + std::vector* out) const; + + // Building descriptors -------------------------------------------- + + // When converting a FileDescriptorProto to a FileDescriptor, various + // errors might be detected in the input. The caller may handle these + // programmatically by implementing an ErrorCollector. + class PROTOBUF_EXPORT ErrorCollector { + public: + inline ErrorCollector() {} + virtual ~ErrorCollector(); + + // These constants specify what exact part of the construct is broken. + // This is useful e.g. for mapping the error back to an exact location + // in a .proto file. + enum ErrorLocation { + NAME, // the symbol name, or the package name for files + NUMBER, // field or extension range number + TYPE, // field type + EXTENDEE, // field extendee + DEFAULT_VALUE, // field default value + INPUT_TYPE, // method input type + OUTPUT_TYPE, // method output type + OPTION_NAME, // name in assignment + OPTION_VALUE, // value in option assignment + IMPORT, // import error + OTHER // some other problem + }; + + // Reports an error in the FileDescriptorProto. Use this function if the + // problem occurred should interrupt building the FileDescriptorProto. + virtual void AddError( + const std::string& filename, // File name in which the error occurred. + const std::string& element_name, // Full name of the erroneous element. + const Message* descriptor, // Descriptor of the erroneous element. + ErrorLocation location, // One of the location constants, above. + const std::string& message // Human-readable error message. + ) = 0; + + // Reports a warning in the FileDescriptorProto. Use this function if the + // problem occurred should NOT interrupt building the FileDescriptorProto. + virtual void AddWarning( + const std::string& /*filename*/, // File name in which the error + // occurred. + const std::string& /*element_name*/, // Full name of the erroneous + // element. + const Message* /*descriptor*/, // Descriptor of the erroneous element. + ErrorLocation /*location*/, // One of the location constants, above. + const std::string& /*message*/ // Human-readable error message. + ) {} + + private: + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ErrorCollector); + }; + + // Convert the FileDescriptorProto to real descriptors and place them in + // this DescriptorPool. All dependencies of the file must already be in + // the pool. Returns the resulting FileDescriptor, or nullptr if there were + // problems with the input (e.g. the message was invalid, or dependencies + // were missing). Details about the errors are written to GOOGLE_LOG(ERROR). + const FileDescriptor* BuildFile(const FileDescriptorProto& proto); + + // Same as BuildFile() except errors are sent to the given ErrorCollector. + const FileDescriptor* BuildFileCollectingErrors( + const FileDescriptorProto& proto, ErrorCollector* error_collector); + + // By default, it is an error if a FileDescriptorProto contains references + // to types or other files that are not found in the DescriptorPool (or its + // backing DescriptorDatabase, if any). If you call + // AllowUnknownDependencies(), however, then unknown types and files + // will be replaced by placeholder descriptors (which can be identified by + // the is_placeholder() method). This can allow you to + // perform some useful operations with a .proto file even if you do not + // have access to other .proto files on which it depends. However, some + // heuristics must be used to fill in the gaps in information, and these + // can lead to descriptors which are inaccurate. For example, the + // DescriptorPool may be forced to guess whether an unknown type is a message + // or an enum, as well as what package it resides in. Furthermore, + // placeholder types will not be discoverable via FindMessageTypeByName() + // and similar methods, which could confuse some descriptor-based algorithms. + // Generally, the results of this option should be handled with extreme care. + void AllowUnknownDependencies() { allow_unknown_ = true; } + + // By default, weak imports are allowed to be missing, in which case we will + // use a placeholder for the dependency and convert the field to be an Empty + // message field. If you call EnforceWeakDependencies(true), however, the + // DescriptorPool will report a import not found error. + void EnforceWeakDependencies(bool enforce) { enforce_weak_ = enforce; } + + // Internal stuff -------------------------------------------------- + // These methods MUST NOT be called from outside the proto2 library. + // These methods may contain hidden pitfalls and may be removed in a + // future library version. + + // Create a DescriptorPool which is overlaid on top of some other pool. + // If you search for a descriptor in the overlay and it is not found, the + // underlay will be searched as a backup. If the underlay has its own + // underlay, that will be searched next, and so on. This also means that + // files built in the overlay will be cross-linked with the underlay's + // descriptors if necessary. The underlay remains property of the caller; + // it must remain valid for the lifetime of the newly-constructed pool. + // + // Example: Say you want to parse a .proto file at runtime in order to use + // its type with a DynamicMessage. Say this .proto file has dependencies, + // but you know that all the dependencies will be things that are already + // compiled into the binary. For ease of use, you'd like to load the types + // right out of generated_pool() rather than have to parse redundant copies + // of all these .protos and runtime. But, you don't want to add the parsed + // types directly into generated_pool(): this is not allowed, and would be + // bad design anyway. So, instead, you could use generated_pool() as an + // underlay for a new DescriptorPool in which you add only the new file. + // + // WARNING: Use of underlays can lead to many subtle gotchas. Instead, + // try to formulate what you want to do in terms of DescriptorDatabases. + explicit DescriptorPool(const DescriptorPool* underlay); + + // Called by generated classes at init time to add their descriptors to + // generated_pool. Do NOT call this in your own code! filename must be a + // permanent string (e.g. a string literal). + static void InternalAddGeneratedFile(const void* encoded_file_descriptor, + int size); + + // Disallow [enforce_utf8 = false] in .proto files. + void DisallowEnforceUtf8() { disallow_enforce_utf8_ = true; } + + + // For internal use only: Gets a non-const pointer to the generated pool. + // This is called at static-initialization time only, so thread-safety is + // not a concern. If both an underlay and a fallback database are present, + // the underlay takes precedence. + static DescriptorPool* internal_generated_pool(); + + // For internal use only: Gets a non-const pointer to the generated + // descriptor database. + // Only used for testing. + static DescriptorDatabase* internal_generated_database(); + + // For internal use only: Changes the behavior of BuildFile() such that it + // allows the file to make reference to message types declared in other files + // which it did not officially declare as dependencies. + void InternalDontEnforceDependencies(); + + // For internal use only: Enables lazy building of dependencies of a file. + // Delay the building of dependencies of a file descriptor until absolutely + // necessary, like when message_type() is called on a field that is defined + // in that dependency's file. This will cause functional issues if a proto + // or one of its dependencies has errors. Should only be enabled for the + // generated_pool_ (because no descriptor build errors are guaranteed by + // the compilation generation process), testing, or if a lack of descriptor + // build errors can be guaranteed for a pool. + void InternalSetLazilyBuildDependencies() { + lazily_build_dependencies_ = true; + // This needs to be set when lazily building dependencies, as it breaks + // dependency checking. + InternalDontEnforceDependencies(); + } + + // For internal use only. + void internal_set_underlay(const DescriptorPool* underlay) { + underlay_ = underlay; + } + + // For internal (unit test) use only: Returns true if a FileDescriptor has + // been constructed for the given file, false otherwise. Useful for testing + // lazy descriptor initialization behavior. + bool InternalIsFileLoaded(ConstStringParam filename) const; + + // Add a file to unused_import_track_files_. DescriptorBuilder will log + // warnings or errors for those files if there is any unused import. + void AddUnusedImportTrackFile(ConstStringParam file_name, + bool is_error = false); + void ClearUnusedImportTrackFiles(); + + private: + friend class Descriptor; + friend class internal::LazyDescriptor; + friend class FieldDescriptor; + friend class EnumDescriptor; + friend class ServiceDescriptor; + friend class MethodDescriptor; + friend class FileDescriptor; + friend class StreamDescriptor; + friend class DescriptorBuilder; + friend class FileDescriptorTables; + + // Return true if the given name is a sub-symbol of any non-package + // descriptor that already exists in the descriptor pool. (The full + // definition of such types is already known.) + bool IsSubSymbolOfBuiltType(StringPiece name) const; + + // Tries to find something in the fallback database and link in the + // corresponding proto file. Returns true if successful, in which case + // the caller should search for the thing again. These are declared + // const because they are called by (semantically) const methods. + bool TryFindFileInFallbackDatabase(StringPiece name) const; + bool TryFindSymbolInFallbackDatabase(StringPiece name) const; + bool TryFindExtensionInFallbackDatabase(const Descriptor* containing_type, + int field_number) const; + + // This internal find extension method only check with its table and underlay + // descriptor_pool's table. It does not check with fallback DB and no + // additional proto file will be build in this method. + const FieldDescriptor* InternalFindExtensionByNumberNoLock( + const Descriptor* extendee, int number) const; + + // Like BuildFile() but called internally when the file has been loaded from + // fallback_database_. Declared const because it is called by (semantically) + // const methods. + const FileDescriptor* BuildFileFromDatabase( + const FileDescriptorProto& proto) const; + + // Helper for when lazily_build_dependencies_ is set, can look up a symbol + // after the file's descriptor is built, and can build the file where that + // symbol is defined if necessary. Will create a placeholder if the type + // doesn't exist in the fallback database, or the file doesn't build + // successfully. + Symbol CrossLinkOnDemandHelper(StringPiece name, + bool expecting_enum) const; + + // Create a placeholder FileDescriptor of the specified name + FileDescriptor* NewPlaceholderFile(StringPiece name) const; + FileDescriptor* NewPlaceholderFileWithMutexHeld(StringPiece name) const; + + enum PlaceholderType { + PLACEHOLDER_MESSAGE, + PLACEHOLDER_ENUM, + PLACEHOLDER_EXTENDABLE_MESSAGE + }; + // Create a placeholder Descriptor of the specified name + Symbol NewPlaceholder(StringPiece name, + PlaceholderType placeholder_type) const; + Symbol NewPlaceholderWithMutexHeld(StringPiece name, + PlaceholderType placeholder_type) const; + + // If fallback_database_ is nullptr, this is nullptr. Otherwise, this is a + // mutex which must be locked while accessing tables_. + internal::WrappedMutex* mutex_; + + // See constructor. + DescriptorDatabase* fallback_database_; + ErrorCollector* default_error_collector_; + const DescriptorPool* underlay_; + + // This class contains a lot of hash maps with complicated types that + // we'd like to keep out of the header. + class Tables; + std::unique_ptr tables_; + + bool enforce_dependencies_; + bool lazily_build_dependencies_; + bool allow_unknown_; + bool enforce_weak_; + bool disallow_enforce_utf8_; + + // Set of files to track for unused imports. The bool value when true means + // unused imports are treated as errors (and as warnings when false). + std::map unused_import_track_files_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(DescriptorPool); +}; + + +// inline methods ==================================================== + +// These macros makes this repetitive code more readable. +#define PROTOBUF_DEFINE_ACCESSOR(CLASS, FIELD, TYPE) \ + inline TYPE CLASS::FIELD() const { return FIELD##_; } + +// Strings fields are stored as pointers but returned as const references. +#define PROTOBUF_DEFINE_STRING_ACCESSOR(CLASS, FIELD) \ + inline const std::string& CLASS::FIELD() const { return *FIELD##_; } + +// Arrays take an index parameter, obviously. +#define PROTOBUF_DEFINE_ARRAY_ACCESSOR(CLASS, FIELD, TYPE) \ + inline TYPE CLASS::FIELD(int index) const { return FIELD##s_ + index; } + +#define PROTOBUF_DEFINE_OPTIONS_ACCESSOR(CLASS, TYPE) \ + inline const TYPE& CLASS::options() const { return *options_; } + +PROTOBUF_DEFINE_STRING_ACCESSOR(Descriptor, name) +PROTOBUF_DEFINE_STRING_ACCESSOR(Descriptor, full_name) +PROTOBUF_DEFINE_ACCESSOR(Descriptor, file, const FileDescriptor*) +PROTOBUF_DEFINE_ACCESSOR(Descriptor, containing_type, const Descriptor*) + +PROTOBUF_DEFINE_ACCESSOR(Descriptor, field_count, int) +PROTOBUF_DEFINE_ACCESSOR(Descriptor, oneof_decl_count, int) +PROTOBUF_DEFINE_ACCESSOR(Descriptor, real_oneof_decl_count, int) +PROTOBUF_DEFINE_ACCESSOR(Descriptor, nested_type_count, int) +PROTOBUF_DEFINE_ACCESSOR(Descriptor, enum_type_count, int) + +PROTOBUF_DEFINE_ARRAY_ACCESSOR(Descriptor, field, const FieldDescriptor*) +PROTOBUF_DEFINE_ARRAY_ACCESSOR(Descriptor, oneof_decl, const OneofDescriptor*) +PROTOBUF_DEFINE_ARRAY_ACCESSOR(Descriptor, nested_type, const Descriptor*) +PROTOBUF_DEFINE_ARRAY_ACCESSOR(Descriptor, enum_type, const EnumDescriptor*) + +PROTOBUF_DEFINE_ACCESSOR(Descriptor, extension_range_count, int) +PROTOBUF_DEFINE_ACCESSOR(Descriptor, extension_count, int) +PROTOBUF_DEFINE_ARRAY_ACCESSOR(Descriptor, extension_range, + const Descriptor::ExtensionRange*) +PROTOBUF_DEFINE_ARRAY_ACCESSOR(Descriptor, extension, const FieldDescriptor*) + +PROTOBUF_DEFINE_ACCESSOR(Descriptor, reserved_range_count, int) +PROTOBUF_DEFINE_ARRAY_ACCESSOR(Descriptor, reserved_range, + const Descriptor::ReservedRange*) +PROTOBUF_DEFINE_ACCESSOR(Descriptor, reserved_name_count, int) + +PROTOBUF_DEFINE_OPTIONS_ACCESSOR(Descriptor, MessageOptions) +PROTOBUF_DEFINE_ACCESSOR(Descriptor, is_placeholder, bool) + +PROTOBUF_DEFINE_STRING_ACCESSOR(FieldDescriptor, name) +PROTOBUF_DEFINE_STRING_ACCESSOR(FieldDescriptor, full_name) +PROTOBUF_DEFINE_STRING_ACCESSOR(FieldDescriptor, json_name) +PROTOBUF_DEFINE_STRING_ACCESSOR(FieldDescriptor, lowercase_name) +PROTOBUF_DEFINE_STRING_ACCESSOR(FieldDescriptor, camelcase_name) +PROTOBUF_DEFINE_ACCESSOR(FieldDescriptor, file, const FileDescriptor*) +PROTOBUF_DEFINE_ACCESSOR(FieldDescriptor, number, int) +PROTOBUF_DEFINE_ACCESSOR(FieldDescriptor, is_extension, bool) +PROTOBUF_DEFINE_ACCESSOR(FieldDescriptor, label, FieldDescriptor::Label) +PROTOBUF_DEFINE_ACCESSOR(FieldDescriptor, containing_type, const Descriptor*) +PROTOBUF_DEFINE_ACCESSOR(FieldDescriptor, containing_oneof, + const OneofDescriptor*) +PROTOBUF_DEFINE_ACCESSOR(FieldDescriptor, index_in_oneof, int) +PROTOBUF_DEFINE_ACCESSOR(FieldDescriptor, extension_scope, const Descriptor*) +PROTOBUF_DEFINE_OPTIONS_ACCESSOR(FieldDescriptor, FieldOptions) +PROTOBUF_DEFINE_ACCESSOR(FieldDescriptor, has_default_value, bool) +PROTOBUF_DEFINE_ACCESSOR(FieldDescriptor, has_json_name, bool) +PROTOBUF_DEFINE_ACCESSOR(FieldDescriptor, default_value_int32, int32) +PROTOBUF_DEFINE_ACCESSOR(FieldDescriptor, default_value_int64, int64) +PROTOBUF_DEFINE_ACCESSOR(FieldDescriptor, default_value_uint32, uint32) +PROTOBUF_DEFINE_ACCESSOR(FieldDescriptor, default_value_uint64, uint64) +PROTOBUF_DEFINE_ACCESSOR(FieldDescriptor, default_value_float, float) +PROTOBUF_DEFINE_ACCESSOR(FieldDescriptor, default_value_double, double) +PROTOBUF_DEFINE_ACCESSOR(FieldDescriptor, default_value_bool, bool) +PROTOBUF_DEFINE_STRING_ACCESSOR(FieldDescriptor, default_value_string) + +PROTOBUF_DEFINE_STRING_ACCESSOR(OneofDescriptor, name) +PROTOBUF_DEFINE_STRING_ACCESSOR(OneofDescriptor, full_name) +PROTOBUF_DEFINE_ACCESSOR(OneofDescriptor, containing_type, const Descriptor*) +PROTOBUF_DEFINE_ACCESSOR(OneofDescriptor, field_count, int) +PROTOBUF_DEFINE_OPTIONS_ACCESSOR(OneofDescriptor, OneofOptions) + +PROTOBUF_DEFINE_STRING_ACCESSOR(EnumDescriptor, name) +PROTOBUF_DEFINE_STRING_ACCESSOR(EnumDescriptor, full_name) +PROTOBUF_DEFINE_ACCESSOR(EnumDescriptor, file, const FileDescriptor*) +PROTOBUF_DEFINE_ACCESSOR(EnumDescriptor, containing_type, const Descriptor*) +PROTOBUF_DEFINE_ACCESSOR(EnumDescriptor, value_count, int) +PROTOBUF_DEFINE_ARRAY_ACCESSOR(EnumDescriptor, value, + const EnumValueDescriptor*) +PROTOBUF_DEFINE_OPTIONS_ACCESSOR(EnumDescriptor, EnumOptions) +PROTOBUF_DEFINE_ACCESSOR(EnumDescriptor, is_placeholder, bool) +PROTOBUF_DEFINE_ACCESSOR(EnumDescriptor, reserved_range_count, int) +PROTOBUF_DEFINE_ARRAY_ACCESSOR(EnumDescriptor, reserved_range, + const EnumDescriptor::ReservedRange*) +PROTOBUF_DEFINE_ACCESSOR(EnumDescriptor, reserved_name_count, int) + +PROTOBUF_DEFINE_STRING_ACCESSOR(EnumValueDescriptor, name) +PROTOBUF_DEFINE_STRING_ACCESSOR(EnumValueDescriptor, full_name) +PROTOBUF_DEFINE_ACCESSOR(EnumValueDescriptor, number, int) +PROTOBUF_DEFINE_ACCESSOR(EnumValueDescriptor, type, const EnumDescriptor*) +PROTOBUF_DEFINE_OPTIONS_ACCESSOR(EnumValueDescriptor, EnumValueOptions) + +PROTOBUF_DEFINE_STRING_ACCESSOR(ServiceDescriptor, name) +PROTOBUF_DEFINE_STRING_ACCESSOR(ServiceDescriptor, full_name) +PROTOBUF_DEFINE_ACCESSOR(ServiceDescriptor, file, const FileDescriptor*) +PROTOBUF_DEFINE_ACCESSOR(ServiceDescriptor, method_count, int) +PROTOBUF_DEFINE_ARRAY_ACCESSOR(ServiceDescriptor, method, + const MethodDescriptor*) +PROTOBUF_DEFINE_OPTIONS_ACCESSOR(ServiceDescriptor, ServiceOptions) + +PROTOBUF_DEFINE_STRING_ACCESSOR(MethodDescriptor, name) +PROTOBUF_DEFINE_STRING_ACCESSOR(MethodDescriptor, full_name) +PROTOBUF_DEFINE_ACCESSOR(MethodDescriptor, service, const ServiceDescriptor*) +PROTOBUF_DEFINE_OPTIONS_ACCESSOR(MethodDescriptor, MethodOptions) +PROTOBUF_DEFINE_ACCESSOR(MethodDescriptor, client_streaming, bool) +PROTOBUF_DEFINE_ACCESSOR(MethodDescriptor, server_streaming, bool) + +PROTOBUF_DEFINE_STRING_ACCESSOR(FileDescriptor, name) +PROTOBUF_DEFINE_STRING_ACCESSOR(FileDescriptor, package) +PROTOBUF_DEFINE_ACCESSOR(FileDescriptor, pool, const DescriptorPool*) +PROTOBUF_DEFINE_ACCESSOR(FileDescriptor, dependency_count, int) +PROTOBUF_DEFINE_ACCESSOR(FileDescriptor, public_dependency_count, int) +PROTOBUF_DEFINE_ACCESSOR(FileDescriptor, weak_dependency_count, int) +PROTOBUF_DEFINE_ACCESSOR(FileDescriptor, message_type_count, int) +PROTOBUF_DEFINE_ACCESSOR(FileDescriptor, enum_type_count, int) +PROTOBUF_DEFINE_ACCESSOR(FileDescriptor, service_count, int) +PROTOBUF_DEFINE_ACCESSOR(FileDescriptor, extension_count, int) +PROTOBUF_DEFINE_OPTIONS_ACCESSOR(FileDescriptor, FileOptions) +PROTOBUF_DEFINE_ACCESSOR(FileDescriptor, is_placeholder, bool) + +PROTOBUF_DEFINE_ARRAY_ACCESSOR(FileDescriptor, message_type, const Descriptor*) +PROTOBUF_DEFINE_ARRAY_ACCESSOR(FileDescriptor, enum_type, const EnumDescriptor*) +PROTOBUF_DEFINE_ARRAY_ACCESSOR(FileDescriptor, service, + const ServiceDescriptor*) +PROTOBUF_DEFINE_ARRAY_ACCESSOR(FileDescriptor, extension, + const FieldDescriptor*) + +#undef PROTOBUF_DEFINE_ACCESSOR +#undef PROTOBUF_DEFINE_STRING_ACCESSOR +#undef PROTOBUF_DEFINE_ARRAY_ACCESSOR + +// A few accessors differ from the macros... + +inline Descriptor::WellKnownType Descriptor::well_known_type() const { + return static_cast(well_known_type_); +} + +inline bool Descriptor::IsExtensionNumber(int number) const { + return FindExtensionRangeContainingNumber(number) != nullptr; +} + +inline bool Descriptor::IsReservedNumber(int number) const { + return FindReservedRangeContainingNumber(number) != nullptr; +} + +inline bool Descriptor::IsReservedName(ConstStringParam name) const { + for (int i = 0; i < reserved_name_count(); i++) { + if (name == static_cast(reserved_name(i))) { + return true; + } + } + return false; +} + +// Can't use PROTOBUF_DEFINE_ARRAY_ACCESSOR because reserved_names_ is actually +// an array of pointers rather than the usual array of objects. +inline const std::string& Descriptor::reserved_name(int index) const { + return *reserved_names_[index]; +} + +inline bool EnumDescriptor::IsReservedNumber(int number) const { + return FindReservedRangeContainingNumber(number) != nullptr; +} + +inline bool EnumDescriptor::IsReservedName(ConstStringParam name) const { + for (int i = 0; i < reserved_name_count(); i++) { + if (name == static_cast(reserved_name(i))) { + return true; + } + } + return false; +} + +// Can't use PROTOBUF_DEFINE_ARRAY_ACCESSOR because reserved_names_ is actually +// an array of pointers rather than the usual array of objects. +inline const std::string& EnumDescriptor::reserved_name(int index) const { + return *reserved_names_[index]; +} + +inline FieldDescriptor::Type FieldDescriptor::type() const { + if (type_once_) { + internal::call_once(*type_once_, &FieldDescriptor::TypeOnceInit, this); + } + return type_; +} + +inline bool FieldDescriptor::is_required() const { + return label() == LABEL_REQUIRED; +} + +inline bool FieldDescriptor::is_optional() const { + return label() == LABEL_OPTIONAL; +} + +inline bool FieldDescriptor::is_repeated() const { + return label() == LABEL_REPEATED; +} + +inline bool FieldDescriptor::is_packable() const { + return is_repeated() && IsTypePackable(type()); +} + +inline bool FieldDescriptor::is_map() const { + return type() == TYPE_MESSAGE && is_map_message_type(); +} + +inline bool FieldDescriptor::has_optional_keyword() const { + return proto3_optional_ || + (file()->syntax() == FileDescriptor::SYNTAX_PROTO2 && is_optional() && + !containing_oneof()); +} + +inline const OneofDescriptor* FieldDescriptor::real_containing_oneof() const { + return containing_oneof_ && !containing_oneof_->is_synthetic() + ? containing_oneof_ + : nullptr; +} + +inline bool FieldDescriptor::has_presence() const { + if (is_repeated()) return false; + return cpp_type() == CPPTYPE_MESSAGE || containing_oneof() || + file()->syntax() == FileDescriptor::SYNTAX_PROTO2; +} + +// To save space, index() is computed by looking at the descriptor's position +// in the parent's array of children. +inline int FieldDescriptor::index() const { + if (!is_extension_) { + return static_cast(this - containing_type()->fields_); + } else if (extension_scope_ != nullptr) { + return static_cast(this - extension_scope_->extensions_); + } else { + return static_cast(this - file_->extensions_); + } +} + +inline int Descriptor::index() const { + if (containing_type_ == nullptr) { + return static_cast(this - file_->message_types_); + } else { + return static_cast(this - containing_type_->nested_types_); + } +} + +inline const FileDescriptor* OneofDescriptor::file() const { + return containing_type()->file(); +} + +inline int OneofDescriptor::index() const { + return static_cast(this - containing_type_->oneof_decls_); +} + +inline bool OneofDescriptor::is_synthetic() const { + return field_count() == 1 && field(0)->proto3_optional_; +} + +inline int EnumDescriptor::index() const { + if (containing_type_ == nullptr) { + return static_cast(this - file_->enum_types_); + } else { + return static_cast(this - containing_type_->enum_types_); + } +} + +inline const FileDescriptor* EnumValueDescriptor::file() const { + return type()->file(); +} + +inline int EnumValueDescriptor::index() const { + return static_cast(this - type_->values_); +} + +inline int ServiceDescriptor::index() const { + return static_cast(this - file_->services_); +} + +inline const FileDescriptor* MethodDescriptor::file() const { + return service()->file(); +} + +inline int MethodDescriptor::index() const { + return static_cast(this - service_->methods_); +} + +inline const char* FieldDescriptor::type_name() const { + return kTypeToName[type()]; +} + +inline FieldDescriptor::CppType FieldDescriptor::cpp_type() const { + return kTypeToCppTypeMap[type()]; +} + +inline const char* FieldDescriptor::cpp_type_name() const { + return kCppTypeToName[kTypeToCppTypeMap[type()]]; +} + +inline FieldDescriptor::CppType FieldDescriptor::TypeToCppType(Type type) { + return kTypeToCppTypeMap[type]; +} + +inline const char* FieldDescriptor::TypeName(Type type) { + return kTypeToName[type]; +} + +inline const char* FieldDescriptor::CppTypeName(CppType cpp_type) { + return kCppTypeToName[cpp_type]; +} + +inline bool FieldDescriptor::IsTypePackable(Type field_type) { + return (field_type != FieldDescriptor::TYPE_STRING && + field_type != FieldDescriptor::TYPE_GROUP && + field_type != FieldDescriptor::TYPE_MESSAGE && + field_type != FieldDescriptor::TYPE_BYTES); +} + +inline const FileDescriptor* FileDescriptor::public_dependency( + int index) const { + return dependency(public_dependencies_[index]); +} + +inline const FileDescriptor* FileDescriptor::weak_dependency(int index) const { + return dependency(weak_dependencies_[index]); +} + +inline FileDescriptor::Syntax FileDescriptor::syntax() const { return syntax_; } + +// Can't use PROTOBUF_DEFINE_ARRAY_ACCESSOR because fields_ is actually an array +// of pointers rather than the usual array of objects. +inline const FieldDescriptor* OneofDescriptor::field(int index) const { + return fields_[index]; +} + +} // namespace protobuf +} // namespace google + +#include + +#endif // GOOGLE_PROTOBUF_DESCRIPTOR_H__ diff --git a/third_party/protobuf-lite/google/protobuf/extension_set.h b/third_party/protobuf-lite/google/protobuf/extension_set.h index a1535baa..b3a6e3a0 100644 --- a/third_party/protobuf-lite/google/protobuf/extension_set.h +++ b/third_party/protobuf-lite/google/protobuf/extension_set.h @@ -47,32 +47,40 @@ #include #include -#include +#include +#include +#include #include +#include -namespace google { +#include + +#ifdef SWIG +#error "You cannot SWIG proto headers" +#endif +namespace google { namespace protobuf { - class Arena; - class Descriptor; // descriptor.h - class FieldDescriptor; // descriptor.h - class DescriptorPool; // descriptor.h - class MessageLite; // message_lite.h - class Message; // message.h - class MessageFactory; // message.h - class UnknownFieldSet; // unknown_field_set.h - namespace io { - class CodedInputStream; // coded_stream.h - class CodedOutputStream; // coded_stream.h - } - namespace internal { - class FieldSkipper; // wire_format_lite.h - } -} +class Arena; +class Descriptor; // descriptor.h +class FieldDescriptor; // descriptor.h +class DescriptorPool; // descriptor.h +class MessageLite; // message_lite.h +class Message; // message.h +class MessageFactory; // message.h +class UnknownFieldSet; // unknown_field_set.h +namespace internal { +class FieldSkipper; // wire_format_lite.h +} // namespace internal +} // namespace protobuf +} // namespace google +namespace google { namespace protobuf { namespace internal { +class InternalMetadata; + // Used to store values of type WireFormatLite::FieldType without having to // #include wire_format_lite.h. Also, ensures that we use only one byte to // store these values, which is important to keep the layout of @@ -92,7 +100,9 @@ typedef bool EnumValidityFuncWithArg(const void* arg, int number); struct ExtensionInfo { inline ExtensionInfo() {} inline ExtensionInfo(FieldType type_param, bool isrepeated, bool ispacked) - : type(type_param), is_repeated(isrepeated), is_packed(ispacked), + : type(type_param), + is_repeated(isrepeated), + is_packed(ispacked), descriptor(NULL) {} FieldType type; @@ -104,9 +114,13 @@ struct ExtensionInfo { const void* arg; }; + struct MessageInfo { + const MessageLite* prototype; + }; + union { EnumValidityCheck enum_validity_check; - const MessageLite* message_prototype; + MessageInfo message_info; }; // The descriptor for this extension, if one exists and is known. May be @@ -117,7 +131,7 @@ struct ExtensionInfo { // Abstract interface for an object which looks up extension definitions. Used // when parsing. -class LIBPROTOBUF_EXPORT ExtensionFinder { +class PROTOBUF_EXPORT ExtensionFinder { public: virtual ~ExtensionFinder(); @@ -127,14 +141,14 @@ class LIBPROTOBUF_EXPORT ExtensionFinder { // Implementation of ExtensionFinder which finds extensions defined in .proto // files which have been compiled into the binary. -class LIBPROTOBUF_EXPORT GeneratedExtensionFinder : public ExtensionFinder { +class PROTOBUF_EXPORT GeneratedExtensionFinder : public ExtensionFinder { public: GeneratedExtensionFinder(const MessageLite* containing_type) : containing_type_(containing_type) {} - virtual ~GeneratedExtensionFinder() {} + ~GeneratedExtensionFinder() override {} // Returns true and fills in *output if found, otherwise returns false. - virtual bool Find(int number, ExtensionInfo* output); + bool Find(int number, ExtensionInfo* output) override; private: const MessageLite* containing_type_; @@ -157,10 +171,10 @@ class MessageSetFieldSkipper; // ExtensionSet. When parsing, if a tag number is encountered which is // inside one of the message type's extension ranges, the tag is passed // off to the ExtensionSet for parsing. Etc. -class LIBPROTOBUF_EXPORT ExtensionSet { +class PROTOBUF_EXPORT ExtensionSet { public: - ExtensionSet(); - explicit ExtensionSet(::google::protobuf::Arena* arena); + constexpr ExtensionSet(); + explicit ExtensionSet(Arena* arena); ~ExtensionSet(); // These are called at startup by protocol-compiler-generated code to @@ -168,9 +182,9 @@ class LIBPROTOBUF_EXPORT ExtensionSet { // to look up extensions for parsed field numbers. Note that dynamic parsing // does not use ParseField(); only protocol-compiler-generated parsing // methods do. - static void RegisterExtension(const MessageLite* containing_type, - int number, FieldType type, - bool is_repeated, bool is_packed); + static void RegisterExtension(const MessageLite* containing_type, int number, + FieldType type, bool is_repeated, + bool is_packed); static void RegisterEnumExtension(const MessageLite* containing_type, int number, FieldType type, bool is_repeated, bool is_packed, @@ -219,22 +233,23 @@ class LIBPROTOBUF_EXPORT ExtensionSet { // - Strings provide Mutable() in addition to Set() accessors. bool Has(int number) const; - int ExtensionSize(int number) const; // Size of a repeated extension. - int NumExtensions() const; // The number of extensions + int ExtensionSize(int number) const; // Size of a repeated extension. + int NumExtensions() const; // The number of extensions FieldType ExtensionType(int number) const; void ClearExtension(int number); // singular fields ------------------------------------------------- - int32 GetInt32 (int number, int32 default_value) const; - int64 GetInt64 (int number, int64 default_value) const; + int32 GetInt32(int number, int32 default_value) const; + int64 GetInt64(int number, int64 default_value) const; uint32 GetUInt32(int number, uint32 default_value) const; uint64 GetUInt64(int number, uint64 default_value) const; - float GetFloat (int number, float default_value) const; + float GetFloat(int number, float default_value) const; double GetDouble(int number, double default_value) const; - bool GetBool (int number, bool default_value) const; - int GetEnum (int number, int default_value) const; - const string & GetString (int number, const string& default_value) const; + bool GetBool(int number, bool default_value) const; + int GetEnum(int number, int default_value) const; + const std::string& GetString(int number, + const std::string& default_value) const; const MessageLite& GetMessage(int number, const MessageLite& default_value) const; const MessageLite& GetMessage(int number, const Descriptor* message_type, @@ -244,19 +259,19 @@ class LIBPROTOBUF_EXPORT ExtensionSet { // the extension lives in the same pool as the descriptor for the containing // type. #define desc const FieldDescriptor* descriptor // avoid line wrapping - void SetInt32 (int number, FieldType type, int32 value, desc); - void SetInt64 (int number, FieldType type, int64 value, desc); + void SetInt32(int number, FieldType type, int32 value, desc); + void SetInt64(int number, FieldType type, int64 value, desc); void SetUInt32(int number, FieldType type, uint32 value, desc); void SetUInt64(int number, FieldType type, uint64 value, desc); - void SetFloat (int number, FieldType type, float value, desc); + void SetFloat(int number, FieldType type, float value, desc); void SetDouble(int number, FieldType type, double value, desc); - void SetBool (int number, FieldType type, bool value, desc); - void SetEnum (int number, FieldType type, int value, desc); - void SetString(int number, FieldType type, const string& value, desc); - string * MutableString (int number, FieldType type, desc); + void SetBool(int number, FieldType type, bool value, desc); + void SetEnum(int number, FieldType type, int value, desc); + void SetString(int number, FieldType type, std::string value, desc); + std::string* MutableString(int number, FieldType type, desc); MessageLite* MutableMessage(int number, FieldType type, const MessageLite& prototype, desc); - MessageLite* MutableMessage(const FieldDescriptor* decsriptor, + MessageLite* MutableMessage(const FieldDescriptor* descriptor, MessageFactory* factory); // Adds the given message to the ExtensionSet, taking ownership of the // message object. Existing message with the same number will be deleted. @@ -268,15 +283,15 @@ class LIBPROTOBUF_EXPORT ExtensionSet { const FieldDescriptor* descriptor, MessageLite* message); MessageLite* ReleaseMessage(int number, const MessageLite& prototype); - MessageLite* UnsafeArenaReleaseMessage( - int number, const MessageLite& prototype); + MessageLite* UnsafeArenaReleaseMessage(int number, + const MessageLite& prototype); MessageLite* ReleaseMessage(const FieldDescriptor* descriptor, MessageFactory* factory); MessageLite* UnsafeArenaReleaseMessage(const FieldDescriptor* descriptor, MessageFactory* factory); #undef desc - ::google::protobuf::Arena* GetArenaNoVirtual() const { return arena_; } + Arena* GetArena() const { return arena_; } // repeated fields ------------------------------------------------- @@ -287,8 +302,8 @@ class LIBPROTOBUF_EXPORT ExtensionSet { // Fetches a mutable version of a RepeatedField extension by number, // instantiating one if none exists. Similar to above, user should not use // this directly; it underlies MutableRepeatedExtension(). - void* MutableRawRepeatedField(int number, FieldType field_type, - bool packed, const FieldDescriptor* desc); + void* MutableRawRepeatedField(int number, FieldType field_type, bool packed, + const FieldDescriptor* desc); // This is an overload of MutableRawRepeatedField to maintain compatibility // with old code using a previous API. This version of @@ -296,40 +311,40 @@ class LIBPROTOBUF_EXPORT ExtensionSet { // (E.g.: borg/clients/internal/proto1/proto2_reflection.cc.) void* MutableRawRepeatedField(int number); - int32 GetRepeatedInt32 (int number, int index) const; - int64 GetRepeatedInt64 (int number, int index) const; + int32 GetRepeatedInt32(int number, int index) const; + int64 GetRepeatedInt64(int number, int index) const; uint32 GetRepeatedUInt32(int number, int index) const; uint64 GetRepeatedUInt64(int number, int index) const; - float GetRepeatedFloat (int number, int index) const; + float GetRepeatedFloat(int number, int index) const; double GetRepeatedDouble(int number, int index) const; - bool GetRepeatedBool (int number, int index) const; - int GetRepeatedEnum (int number, int index) const; - const string & GetRepeatedString (int number, int index) const; + bool GetRepeatedBool(int number, int index) const; + int GetRepeatedEnum(int number, int index) const; + const std::string& GetRepeatedString(int number, int index) const; const MessageLite& GetRepeatedMessage(int number, int index) const; - void SetRepeatedInt32 (int number, int index, int32 value); - void SetRepeatedInt64 (int number, int index, int64 value); + void SetRepeatedInt32(int number, int index, int32 value); + void SetRepeatedInt64(int number, int index, int64 value); void SetRepeatedUInt32(int number, int index, uint32 value); void SetRepeatedUInt64(int number, int index, uint64 value); - void SetRepeatedFloat (int number, int index, float value); + void SetRepeatedFloat(int number, int index, float value); void SetRepeatedDouble(int number, int index, double value); - void SetRepeatedBool (int number, int index, bool value); - void SetRepeatedEnum (int number, int index, int value); - void SetRepeatedString(int number, int index, const string& value); - string * MutableRepeatedString (int number, int index); + void SetRepeatedBool(int number, int index, bool value); + void SetRepeatedEnum(int number, int index, int value); + void SetRepeatedString(int number, int index, std::string value); + std::string* MutableRepeatedString(int number, int index); MessageLite* MutableRepeatedMessage(int number, int index); #define desc const FieldDescriptor* descriptor // avoid line wrapping - void AddInt32 (int number, FieldType type, bool packed, int32 value, desc); - void AddInt64 (int number, FieldType type, bool packed, int64 value, desc); + void AddInt32(int number, FieldType type, bool packed, int32 value, desc); + void AddInt64(int number, FieldType type, bool packed, int64 value, desc); void AddUInt32(int number, FieldType type, bool packed, uint32 value, desc); void AddUInt64(int number, FieldType type, bool packed, uint64 value, desc); - void AddFloat (int number, FieldType type, bool packed, float value, desc); + void AddFloat(int number, FieldType type, bool packed, float value, desc); void AddDouble(int number, FieldType type, bool packed, double value, desc); - void AddBool (int number, FieldType type, bool packed, bool value, desc); - void AddEnum (int number, FieldType type, bool packed, int value, desc); - void AddString(int number, FieldType type, const string& value, desc); - string * AddString (int number, FieldType type, desc); + void AddBool(int number, FieldType type, bool packed, bool value, desc); + void AddEnum(int number, FieldType type, bool packed, int value, desc); + void AddString(int number, FieldType type, std::string value, desc); + std::string* AddString(int number, FieldType type, desc); MessageLite* AddMessage(int number, FieldType type, const MessageLite& prototype, desc); MessageLite* AddMessage(const FieldDescriptor* descriptor, @@ -378,8 +393,52 @@ class LIBPROTOBUF_EXPORT ExtensionSet { const MessageLite* containing_type, io::CodedOutputStream* unknown_fields); + // Lite parser + const char* ParseField(uint64 tag, const char* ptr, + const MessageLite* containing_type, + internal::InternalMetadata* metadata, + internal::ParseContext* ctx); + // Full parser + const char* ParseField(uint64 tag, const char* ptr, + const Message* containing_type, + internal::InternalMetadata* metadata, + internal::ParseContext* ctx); + template + const char* ParseMessageSet(const char* ptr, const Msg* containing_type, + InternalMetadata* metadata, + internal::ParseContext* ctx) { + struct MessageSetItem { + const char* _InternalParse(const char* ptr, ParseContext* ctx) { + return me->ParseMessageSetItem(ptr, containing_type, metadata, ctx); + } + ExtensionSet* me; + const Msg* containing_type; + InternalMetadata* metadata; + } item{this, containing_type, metadata}; + while (!ctx->Done(&ptr)) { + uint32 tag; + ptr = ReadTag(ptr, &tag); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); + if (tag == WireFormatLite::kMessageSetItemStartTag) { + ptr = ctx->ParseGroup(&item, ptr, tag); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); + } else { + if (tag == 0 || (tag & 7) == 4) { + ctx->SetLastTag(tag); + return ptr; + } + ptr = ParseField(tag, ptr, containing_type, metadata, ctx); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); + } + } + return ptr; + } + // Parse an entire message in MessageSet format. Such messages have no // fields, only extensions. + bool ParseMessageSetLite(io::CodedInputStream* input, + ExtensionFinder* extension_finder, + FieldSkipper* field_skipper); bool ParseMessageSet(io::CodedInputStream* input, ExtensionFinder* extension_finder, MessageSetFieldSkipper* field_skipper); @@ -387,7 +446,8 @@ class LIBPROTOBUF_EXPORT ExtensionSet { // Specific versions for lite or full messages (constructs the appropriate // FieldSkipper automatically). bool ParseMessageSet(io::CodedInputStream* input, - const MessageLite* containing_type); + const MessageLite* containing_type, + std::string* unknown_fields); bool ParseMessageSet(io::CodedInputStream* input, const Message* containing_type, UnknownFieldSet* unknown_fields); @@ -396,24 +456,28 @@ class LIBPROTOBUF_EXPORT ExtensionSet { // [start_field_number, end_field_number) // to the output stream, using the cached sizes computed when ByteSize() was // last called. Note that the range bounds are inclusive-exclusive. - void SerializeWithCachedSizes(int start_field_number, - int end_field_number, - io::CodedOutputStream* output) const; + void SerializeWithCachedSizes(int start_field_number, int end_field_number, + io::CodedOutputStream* output) const { + output->SetCur(_InternalSerialize(start_field_number, end_field_number, + output->Cur(), output->EpsCopy())); + } // Same as SerializeWithCachedSizes, but without any bounds checking. // The caller must ensure that target has sufficient capacity for the // serialized extensions. // // Returns a pointer past the last written byte. - uint8* InternalSerializeWithCachedSizesToArray(int start_field_number, - int end_field_number, - bool deterministic, - uint8* target) const; + uint8* _InternalSerialize(int start_field_number, int end_field_number, + uint8* target, + io::EpsCopyOutputStream* stream) const; // Like above but serializes in MessageSet format. - void SerializeMessageSetWithCachedSizes(io::CodedOutputStream* output) const; - uint8* InternalSerializeMessageSetWithCachedSizesToArray(bool deterministic, - uint8* target) const; + void SerializeMessageSetWithCachedSizes(io::CodedOutputStream* output) const { + output->SetCur(InternalSerializeMessageSetWithCachedSizesToArray( + output->Cur(), output->EpsCopy())); + } + uint8* InternalSerializeMessageSetWithCachedSizesToArray( + uint8* target, io::EpsCopyOutputStream* stream) const; // For backward-compatibility, versions of two of the above methods that // serialize deterministically iff SetDefaultSerializationDeterministic() @@ -446,29 +510,26 @@ class LIBPROTOBUF_EXPORT ExtensionSet { int SpaceUsedExcludingSelf() const; private: - // Interface of a lazily parsed singular message extension. - class LIBPROTOBUF_EXPORT LazyMessageExtension { + class PROTOBUF_EXPORT LazyMessageExtension { public: LazyMessageExtension() {} virtual ~LazyMessageExtension() {} - virtual LazyMessageExtension* New(::google::protobuf::Arena* arena) const = 0; + virtual LazyMessageExtension* New(Arena* arena) const = 0; virtual const MessageLite& GetMessage( const MessageLite& prototype) const = 0; virtual MessageLite* MutableMessage(const MessageLite& prototype) = 0; - virtual void SetAllocatedMessage(MessageLite *message) = 0; - virtual void UnsafeArenaSetAllocatedMessage(MessageLite *message) = 0; + virtual void SetAllocatedMessage(MessageLite* message) = 0; + virtual void UnsafeArenaSetAllocatedMessage(MessageLite* message) = 0; virtual MessageLite* ReleaseMessage(const MessageLite& prototype) = 0; virtual MessageLite* UnsafeArenaReleaseMessage( const MessageLite& prototype) = 0; virtual bool IsInitialized() const = 0; - PROTOBUF_RUNTIME_DEPRECATED("Please use ByteSizeLong() instead") - virtual int ByteSize() const { - return internal::ToIntSize(ByteSizeLong()); - } + PROTOBUF_DEPRECATED_MSG("Please use ByteSizeLong() instead") + virtual int ByteSize() const { return internal::ToIntSize(ByteSizeLong()); } virtual size_t ByteSizeLong() const = 0; virtual size_t SpaceUsedLong() const = 0; @@ -477,15 +538,9 @@ class LIBPROTOBUF_EXPORT ExtensionSet { virtual bool ReadMessage(const MessageLite& prototype, io::CodedInputStream* input) = 0; - virtual void WriteMessage(int number, - io::CodedOutputStream* output) const = 0; - virtual uint8* WriteMessageToArray(int number, uint8* target) const = 0; - virtual uint8* InternalWriteMessageToArray(int number, bool, - uint8* target) const { - // TODO(gpike): make this pure virtual. This is a placeholder because we - // need to update third_party/upb, for example. - return WriteMessageToArray(number, target); - } + virtual const char* _InternalParse(const char* ptr, ParseContext* ctx) = 0; + virtual uint8* WriteMessageToArray( + int number, uint8* target, io::EpsCopyOutputStream* stream) const = 0; private: virtual void UnusedKeyMethod(); // Dummy key method to avoid weak vtable. @@ -496,27 +551,27 @@ class LIBPROTOBUF_EXPORT ExtensionSet { // The order of these fields packs Extension into 24 bytes when using 8 // byte alignment. Consider this when adding or removing fields here. union { - int32 int32_value; - int64 int64_value; - uint32 uint32_value; - uint64 uint64_value; - float float_value; - double double_value; - bool bool_value; - int enum_value; - string* string_value; - MessageLite* message_value; + int32 int32_value; + int64 int64_value; + uint32 uint32_value; + uint64 uint64_value; + float float_value; + double double_value; + bool bool_value; + int enum_value; + std::string* string_value; + MessageLite* message_value; LazyMessageExtension* lazymessage_value; - RepeatedField * repeated_int32_value; - RepeatedField * repeated_int64_value; - RepeatedField * repeated_uint32_value; - RepeatedField * repeated_uint64_value; - RepeatedField * repeated_float_value; - RepeatedField * repeated_double_value; - RepeatedField * repeated_bool_value; - RepeatedField * repeated_enum_value; - RepeatedPtrField* repeated_string_value; + RepeatedField* repeated_int32_value; + RepeatedField* repeated_int64_value; + RepeatedField* repeated_uint32_value; + RepeatedField* repeated_uint64_value; + RepeatedField* repeated_float_value; + RepeatedField* repeated_double_value; + RepeatedField* repeated_bool_value; + RepeatedField* repeated_enum_value; + RepeatedPtrField* repeated_string_value; RepeatedPtrField* repeated_message_value; }; @@ -552,20 +607,10 @@ class LIBPROTOBUF_EXPORT ExtensionSet { const FieldDescriptor* descriptor; // Some helper methods for operations on a single Extension. - void SerializeFieldWithCachedSizes( - int number, - io::CodedOutputStream* output) const; uint8* InternalSerializeFieldWithCachedSizesToArray( - int number, - bool deterministic, - uint8* target) const; - void SerializeMessageSetItemWithCachedSizes( - int number, - io::CodedOutputStream* output) const; + int number, uint8* target, io::EpsCopyOutputStream* stream) const; uint8* InternalSerializeMessageSetItemWithCachedSizesToArray( - int number, - bool deterministic, - uint8* target) const; + int number, uint8* target, io::EpsCopyOutputStream* stream) const; size_t ByteSize(int number) const; size_t MessageSetItemByteSize(int number) const; void Clear(); @@ -625,7 +670,7 @@ class LIBPROTOBUF_EXPORT ExtensionSet { void Erase(int key); size_t Size() const { - return GOOGLE_PREDICT_FALSE(is_large()) ? map_.large->size() : flat_size_; + return PROTOBUF_PREDICT_FALSE(is_large()) ? map_.large->size() : flat_size_; } // Similar to std::for_each. @@ -641,7 +686,7 @@ class LIBPROTOBUF_EXPORT ExtensionSet { // Applies a functor to the pairs in sorted order. template KeyValueFunctor ForEach(KeyValueFunctor func) { - if (GOOGLE_PREDICT_FALSE(is_large())) { + if (PROTOBUF_PREDICT_FALSE(is_large())) { return ForEach(map_.large->begin(), map_.large->end(), std::move(func)); } return ForEach(flat_begin(), flat_end(), std::move(func)); @@ -650,7 +695,7 @@ class LIBPROTOBUF_EXPORT ExtensionSet { // Applies a functor to the pairs in sorted order. template KeyValueFunctor ForEach(KeyValueFunctor func) const { - if (GOOGLE_PREDICT_FALSE(is_large())) { + if (PROTOBUF_PREDICT_FALSE(is_large())) { return ForEach(map_.large->begin(), map_.large->end(), std::move(func)); } return ForEach(flat_begin(), flat_end(), std::move(func)); @@ -680,8 +725,7 @@ class LIBPROTOBUF_EXPORT ExtensionSet { // positioned immediately after the wire tag. This method is called in // ParseField() after field number and was_packed_on_wire is extracted from // the wire tag and ExtensionInfo is found by the field number. - bool ParseFieldWithExtensionInfo(int field_number, - bool was_packed_on_wire, + bool ParseFieldWithExtensionInfo(int field_number, bool was_packed_on_wire, const ExtensionInfo& extension, io::CodedInputStream* input, FieldSkipper* field_skipper); @@ -702,12 +746,63 @@ class LIBPROTOBUF_EXPORT ExtensionSet { // it does not exist. Extension* MaybeNewRepeatedExtension(const FieldDescriptor* descriptor); + // Parse a single MessageSet item -- called just after the item group start + // tag has been read. + bool ParseMessageSetItemLite(io::CodedInputStream* input, + ExtensionFinder* extension_finder, + FieldSkipper* field_skipper); // Parse a single MessageSet item -- called just after the item group start // tag has been read. bool ParseMessageSetItem(io::CodedInputStream* input, ExtensionFinder* extension_finder, MessageSetFieldSkipper* field_skipper); + bool FindExtension(int wire_type, uint32 field, + const MessageLite* containing_type, + const internal::ParseContext* /*ctx*/, + ExtensionInfo* extension, bool* was_packed_on_wire) { + GeneratedExtensionFinder finder(containing_type); + return FindExtensionInfoFromFieldNumber(wire_type, field, &finder, + extension, was_packed_on_wire); + } + inline bool FindExtension(int wire_type, uint32 field, + const Message* containing_type, + const internal::ParseContext* ctx, + ExtensionInfo* extension, bool* was_packed_on_wire); + // Used for MessageSet only + const char* ParseFieldMaybeLazily(uint64 tag, const char* ptr, + const MessageLite* containing_type, + internal::InternalMetadata* metadata, + internal::ParseContext* ctx) { + // Lite MessageSet doesn't implement lazy. + return ParseField(tag, ptr, containing_type, metadata, ctx); + } + const char* ParseFieldMaybeLazily(uint64 tag, const char* ptr, + const Message* containing_type, + internal::InternalMetadata* metadata, + internal::ParseContext* ctx); + const char* ParseMessageSetItem(const char* ptr, + const MessageLite* containing_type, + internal::InternalMetadata* metadata, + internal::ParseContext* ctx); + const char* ParseMessageSetItem(const char* ptr, + const Message* containing_type, + internal::InternalMetadata* metadata, + internal::ParseContext* ctx); + + // Implemented in extension_set_inl.h to keep code out of the header file. + template + const char* ParseFieldWithExtensionInfo(int number, bool was_packed_on_wire, + const ExtensionInfo& info, + internal::InternalMetadata* metadata, + const char* ptr, + internal::ParseContext* ctx); + template + const char* ParseMessageSetItemTmpl(const char* ptr, + const Msg* containing_type, + internal::InternalMetadata* metadata, + internal::ParseContext* ctx); + // Hack: RepeatedPtrFieldBase declares ExtensionSet as a friend. This // friendship should automatically extend to ExtensionSet::Extension, but // unfortunately some older compilers (e.g. GCC 3.4.4) do not implement this @@ -735,7 +830,7 @@ class LIBPROTOBUF_EXPORT ExtensionSet { return map_.flat + flat_size_; } - ::google::protobuf::Arena* arena_; + Arena* arena_; // Manual memory-management: // map_.flat is an allocated array of flat_capacity_ elements. @@ -750,25 +845,29 @@ class LIBPROTOBUF_EXPORT ExtensionSet { LargeMap* large; } map_; + static void DeleteFlatMap(const KeyValue* flat, uint16 flat_capacity); + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ExtensionSet); }; +constexpr ExtensionSet::ExtensionSet() + : arena_(nullptr), flat_capacity_(0), flat_size_(0), map_{nullptr} {} + // These are just for convenience... inline void ExtensionSet::SetString(int number, FieldType type, - const string& value, + std::string value, const FieldDescriptor* descriptor) { - MutableString(number, type, descriptor)->assign(value); + MutableString(number, type, descriptor)->assign(std::move(value)); } inline void ExtensionSet::SetRepeatedString(int number, int index, - const string& value) { - MutableRepeatedString(number, index)->assign(value); + std::string value) { + MutableRepeatedString(number, index)->assign(std::move(value)); } inline void ExtensionSet::AddString(int number, FieldType type, - const string& value, + std::string value, const FieldDescriptor* descriptor) { - AddString(number, type, descriptor)->assign(value); + AddString(number, type, descriptor)->assign(std::move(value)); } - // =================================================================== // Glue for generated extension accessors @@ -841,8 +940,8 @@ class PrimitiveTypeTraits { static inline ConstType Get(int number, const ExtensionSet& set, ConstType default_value); - static inline void Set(int number, FieldType field_type, - ConstType value, ExtensionSet* set); + static inline void Set(int number, FieldType field_type, ConstType value, + ExtensionSet* set); template static void Register(int number, FieldType type, bool is_packed) { ExtensionSet::RegisterExtension(&ExtendeeT::default_instance(), number, @@ -861,14 +960,15 @@ class RepeatedPrimitiveTypeTraits { static inline Type Get(int number, const ExtensionSet& set, int index); static inline void Set(int number, int index, Type value, ExtensionSet* set); - static inline void Add(int number, FieldType field_type, - bool is_packed, Type value, ExtensionSet* set); + static inline void Add(int number, FieldType field_type, bool is_packed, + Type value, ExtensionSet* set); - static inline const RepeatedField& - GetRepeated(int number, const ExtensionSet& set); - static inline RepeatedField* - MutableRepeated(int number, FieldType field_type, - bool is_packed, ExtensionSet* set); + static inline const RepeatedField& GetRepeated( + int number, const ExtensionSet& set); + static inline RepeatedField* MutableRepeated(int number, + FieldType field_type, + bool is_packed, + ExtensionSet* set); static const RepeatedFieldType* GetDefaultRepeatedField(); template @@ -878,11 +978,10 @@ class RepeatedPrimitiveTypeTraits { } }; -LIBPROTOBUF_EXPORT extern ProtobufOnceType repeated_primitive_generic_type_traits_once_init_; - -class LIBPROTOBUF_EXPORT RepeatedPrimitiveDefaults { +class PROTOBUF_EXPORT RepeatedPrimitiveDefaults { private: - template friend class RepeatedPrimitiveTypeTraits; + template + friend class RepeatedPrimitiveTypeTraits; static const RepeatedPrimitiveDefaults* default_instance(); RepeatedField default_repeated_field_int32_; RepeatedField default_repeated_field_int64_; @@ -893,57 +992,62 @@ class LIBPROTOBUF_EXPORT RepeatedPrimitiveDefaults { RepeatedField default_repeated_field_bool_; }; -#define PROTOBUF_DEFINE_PRIMITIVE_TYPE(TYPE, METHOD) \ -template<> inline TYPE PrimitiveTypeTraits::Get( \ - int number, const ExtensionSet& set, TYPE default_value) { \ - return set.Get##METHOD(number, default_value); \ -} \ -template<> inline void PrimitiveTypeTraits::Set( \ - int number, FieldType field_type, TYPE value, ExtensionSet* set) { \ - set->Set##METHOD(number, field_type, value, NULL); \ -} \ - \ -template<> inline TYPE RepeatedPrimitiveTypeTraits::Get( \ - int number, const ExtensionSet& set, int index) { \ - return set.GetRepeated##METHOD(number, index); \ -} \ -template<> inline void RepeatedPrimitiveTypeTraits::Set( \ - int number, int index, TYPE value, ExtensionSet* set) { \ - set->SetRepeated##METHOD(number, index, value); \ -} \ -template<> inline void RepeatedPrimitiveTypeTraits::Add( \ - int number, FieldType field_type, bool is_packed, \ - TYPE value, ExtensionSet* set) { \ - set->Add##METHOD(number, field_type, is_packed, value, NULL); \ -} \ -template<> inline const RepeatedField* \ - RepeatedPrimitiveTypeTraits::GetDefaultRepeatedField() { \ - return &RepeatedPrimitiveDefaults::default_instance() \ - ->default_repeated_field_##TYPE##_; \ -} \ -template<> inline const RepeatedField& \ - RepeatedPrimitiveTypeTraits::GetRepeated(int number, \ - const ExtensionSet& set) { \ - return *reinterpret_cast*>( \ - set.GetRawRepeatedField( \ - number, GetDefaultRepeatedField())); \ -} \ -template<> inline RepeatedField* \ - RepeatedPrimitiveTypeTraits::MutableRepeated(int number, \ - FieldType field_type, \ - bool is_packed, \ - ExtensionSet* set) { \ - return reinterpret_cast*>( \ - set->MutableRawRepeatedField(number, field_type, is_packed, NULL)); \ -} +#define PROTOBUF_DEFINE_PRIMITIVE_TYPE(TYPE, METHOD) \ + template <> \ + inline TYPE PrimitiveTypeTraits::Get( \ + int number, const ExtensionSet& set, TYPE default_value) { \ + return set.Get##METHOD(number, default_value); \ + } \ + template <> \ + inline void PrimitiveTypeTraits::Set(int number, FieldType field_type, \ + TYPE value, ExtensionSet* set) { \ + set->Set##METHOD(number, field_type, value, NULL); \ + } \ + \ + template <> \ + inline TYPE RepeatedPrimitiveTypeTraits::Get( \ + int number, const ExtensionSet& set, int index) { \ + return set.GetRepeated##METHOD(number, index); \ + } \ + template <> \ + inline void RepeatedPrimitiveTypeTraits::Set( \ + int number, int index, TYPE value, ExtensionSet* set) { \ + set->SetRepeated##METHOD(number, index, value); \ + } \ + template <> \ + inline void RepeatedPrimitiveTypeTraits::Add( \ + int number, FieldType field_type, bool is_packed, TYPE value, \ + ExtensionSet* set) { \ + set->Add##METHOD(number, field_type, is_packed, value, NULL); \ + } \ + template <> \ + inline const RepeatedField* \ + RepeatedPrimitiveTypeTraits::GetDefaultRepeatedField() { \ + return &RepeatedPrimitiveDefaults::default_instance() \ + ->default_repeated_field_##TYPE##_; \ + } \ + template <> \ + inline const RepeatedField& \ + RepeatedPrimitiveTypeTraits::GetRepeated(int number, \ + const ExtensionSet& set) { \ + return *reinterpret_cast*>( \ + set.GetRawRepeatedField(number, GetDefaultRepeatedField())); \ + } \ + template <> \ + inline RepeatedField* \ + RepeatedPrimitiveTypeTraits::MutableRepeated( \ + int number, FieldType field_type, bool is_packed, ExtensionSet* set) { \ + return reinterpret_cast*>( \ + set->MutableRawRepeatedField(number, field_type, is_packed, NULL)); \ + } -PROTOBUF_DEFINE_PRIMITIVE_TYPE( int32, Int32) -PROTOBUF_DEFINE_PRIMITIVE_TYPE( int64, Int64) +PROTOBUF_DEFINE_PRIMITIVE_TYPE(int32, Int32) +PROTOBUF_DEFINE_PRIMITIVE_TYPE(int64, Int64) PROTOBUF_DEFINE_PRIMITIVE_TYPE(uint32, UInt32) PROTOBUF_DEFINE_PRIMITIVE_TYPE(uint64, UInt64) -PROTOBUF_DEFINE_PRIMITIVE_TYPE( float, Float) +PROTOBUF_DEFINE_PRIMITIVE_TYPE(float, Float) PROTOBUF_DEFINE_PRIMITIVE_TYPE(double, Double) -PROTOBUF_DEFINE_PRIMITIVE_TYPE( bool, Bool) +PROTOBUF_DEFINE_PRIMITIVE_TYPE(bool, Bool) #undef PROTOBUF_DEFINE_PRIMITIVE_TYPE @@ -951,22 +1055,22 @@ PROTOBUF_DEFINE_PRIMITIVE_TYPE( bool, Bool) // StringTypeTraits // Strings support both Set() and Mutable(). -class LIBPROTOBUF_EXPORT StringTypeTraits { +class PROTOBUF_EXPORT StringTypeTraits { public: - typedef const string& ConstType; - typedef string* MutableType; + typedef const std::string& ConstType; + typedef std::string* MutableType; typedef StringTypeTraits Singular; - static inline const string& Get(int number, const ExtensionSet& set, - ConstType default_value) { + static inline const std::string& Get(int number, const ExtensionSet& set, + ConstType default_value) { return set.GetString(number, default_value); } static inline void Set(int number, FieldType field_type, - const string& value, ExtensionSet* set) { + const std::string& value, ExtensionSet* set) { set->SetString(number, field_type, value, NULL); } - static inline string* Mutable(int number, FieldType field_type, - ExtensionSet* set) { + static inline std::string* Mutable(int number, FieldType field_type, + ExtensionSet* set) { return set->MutableString(number, field_type, NULL); } template @@ -976,46 +1080,43 @@ class LIBPROTOBUF_EXPORT StringTypeTraits { } }; -class LIBPROTOBUF_EXPORT RepeatedStringTypeTraits { +class PROTOBUF_EXPORT RepeatedStringTypeTraits { public: - typedef const string& ConstType; - typedef string* MutableType; + typedef const std::string& ConstType; + typedef std::string* MutableType; typedef RepeatedStringTypeTraits Repeated; - typedef RepeatedPtrField RepeatedFieldType; + typedef RepeatedPtrField RepeatedFieldType; - static inline const string& Get(int number, const ExtensionSet& set, - int index) { + static inline const std::string& Get(int number, const ExtensionSet& set, + int index) { return set.GetRepeatedString(number, index); } - static inline void Set(int number, int index, - const string& value, ExtensionSet* set) { + static inline void Set(int number, int index, const std::string& value, + ExtensionSet* set) { set->SetRepeatedString(number, index, value); } - static inline string* Mutable(int number, int index, ExtensionSet* set) { + static inline std::string* Mutable(int number, int index, ExtensionSet* set) { return set->MutableRepeatedString(number, index); } - static inline void Add(int number, FieldType field_type, - bool /*is_packed*/, const string& value, - ExtensionSet* set) { + static inline void Add(int number, FieldType field_type, bool /*is_packed*/, + const std::string& value, ExtensionSet* set) { set->AddString(number, field_type, value, NULL); } - static inline string* Add(int number, FieldType field_type, - ExtensionSet* set) { + static inline std::string* Add(int number, FieldType field_type, + ExtensionSet* set) { return set->AddString(number, field_type, NULL); } - static inline const RepeatedPtrField& - GetRepeated(int number, const ExtensionSet& set) { - return *reinterpret_cast*>( + static inline const RepeatedPtrField& GetRepeated( + int number, const ExtensionSet& set) { + return *reinterpret_cast*>( set.GetRawRepeatedField(number, GetDefaultRepeatedField())); } - static inline RepeatedPtrField* - MutableRepeated(int number, FieldType field_type, - bool is_packed, ExtensionSet* set) { - return reinterpret_cast*>( - set->MutableRawRepeatedField(number, field_type, - is_packed, NULL)); + static inline RepeatedPtrField* MutableRepeated( + int number, FieldType field_type, bool is_packed, ExtensionSet* set) { + return reinterpret_cast*>( + set->MutableRawRepeatedField(number, field_type, is_packed, NULL)); } static const RepeatedFieldType* GetDefaultRepeatedField(); @@ -1047,8 +1148,8 @@ class EnumTypeTraits { ConstType default_value) { return static_cast(set.GetEnum(number, default_value)); } - static inline void Set(int number, FieldType field_type, - ConstType value, ExtensionSet* set) { + static inline void Set(int number, FieldType field_type, ConstType value, + ExtensionSet* set) { GOOGLE_DCHECK(IsValid(value)); set->SetEnum(number, field_type, value, NULL); } @@ -1071,19 +1172,18 @@ class RepeatedEnumTypeTraits { static inline ConstType Get(int number, const ExtensionSet& set, int index) { return static_cast(set.GetRepeatedEnum(number, index)); } - static inline void Set(int number, int index, - ConstType value, ExtensionSet* set) { + static inline void Set(int number, int index, ConstType value, + ExtensionSet* set) { GOOGLE_DCHECK(IsValid(value)); set->SetRepeatedEnum(number, index, value); } - static inline void Add(int number, FieldType field_type, - bool is_packed, ConstType value, ExtensionSet* set) { + static inline void Add(int number, FieldType field_type, bool is_packed, + ConstType value, ExtensionSet* set) { GOOGLE_DCHECK(IsValid(value)); set->AddEnum(number, field_type, is_packed, value, NULL); } - static inline const RepeatedField& GetRepeated(int number, - const ExtensionSet& - set) { + static inline const RepeatedField& GetRepeated( + int number, const ExtensionSet& set) { // Hack: the `Extension` struct stores a RepeatedField for enums. // RepeatedField cannot implicitly convert to RepeatedField // so we need to do some casting magic. See message.h for similar @@ -1132,13 +1232,12 @@ class MessageTypeTraits { static inline ConstType Get(int number, const ExtensionSet& set, ConstType default_value) { - return static_cast( - set.GetMessage(number, default_value)); + return static_cast(set.GetMessage(number, default_value)); } static inline MutableType Mutable(int number, FieldType field_type, ExtensionSet* set) { - return static_cast( - set->MutableMessage(number, field_type, Type::default_instance(), NULL)); + return static_cast(set->MutableMessage( + number, field_type, Type::default_instance(), NULL)); } static inline void SetAllocated(int number, FieldType field_type, MutableType message, ExtensionSet* set) { @@ -1151,14 +1250,14 @@ class MessageTypeTraits { } static inline MutableType Release(int number, FieldType /* field_type */, ExtensionSet* set) { - return static_cast(set->ReleaseMessage( - number, Type::default_instance())); + return static_cast( + set->ReleaseMessage(number, Type::default_instance())); } static inline MutableType UnsafeArenaRelease(int number, FieldType /* field_type */, ExtensionSet* set) { - return static_cast(set->UnsafeArenaReleaseMessage( - number, Type::default_instance())); + return static_cast( + set->UnsafeArenaReleaseMessage(number, Type::default_instance())); } template static void Register(int number, FieldType type, bool is_packed) { @@ -1191,9 +1290,8 @@ class RepeatedMessageTypeTraits { return static_cast( set->AddMessage(number, field_type, Type::default_instance(), NULL)); } - static inline const RepeatedPtrField& GetRepeated(int number, - const ExtensionSet& - set) { + static inline const RepeatedPtrField& GetRepeated( + int number, const ExtensionSet& set) { // See notes above in RepeatedEnumTypeTraits::GetRepeated(): same // casting hack applies here, because a RepeatedPtrField // cannot naturally become a RepeatedPtrType even though Type is @@ -1219,9 +1317,9 @@ class RepeatedMessageTypeTraits { } }; -template inline - const typename RepeatedMessageTypeTraits::RepeatedFieldType* - RepeatedMessageTypeTraits::GetDefaultRepeatedField() { +template +inline const typename RepeatedMessageTypeTraits::RepeatedFieldType* +RepeatedMessageTypeTraits::GetDefaultRepeatedField() { static auto instance = OnShutdownDelete(new RepeatedFieldType); return instance; } @@ -1232,19 +1330,19 @@ template inline // This is the type of actual extension objects. E.g. if you have: // extends Foo with optional int32 bar = 1234; // then "bar" will be defined in C++ as: -// ExtensionIdentifier, 1, false> bar(1234); +// ExtensionIdentifier, 5, false> bar(1234); // // Note that we could, in theory, supply the field number as a template // parameter, and thus make an instance of ExtensionIdentifier have no -// actual contents. However, if we did that, then using at extension +// actual contents. However, if we did that, then using an extension // identifier would not necessarily cause the compiler to output any sort // of reference to any symbol defined in the extension's .pb.o file. Some // linkers will actually drop object files that are not explicitly referenced, // but that would be bad because it would cause this extension to not be // registered at static initialization, and therefore using it would crash. -template +template class ExtensionIdentifier { public: typedef TypeTraitsType TypeTraits; @@ -1281,182 +1379,213 @@ class ExtensionIdentifier { // // For similar reason, we use "_field_type" and "_is_packed" as parameter names // below, so that "field_type" and "is_packed" can be used as field names. -#define GOOGLE_PROTOBUF_EXTENSION_ACCESSORS(CLASSNAME) \ +#define GOOGLE_PROTOBUF_EXTENSION_ACCESSORS(CLASSNAME) \ /* Has, Size, Clear */ \ template \ inline bool HasExtension( \ - const ::google::protobuf::internal::ExtensionIdentifier< \ - CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id) const { \ + const ::PROTOBUF_NAMESPACE_ID::internal::ExtensionIdentifier< \ + CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id) const { \ return _extensions_.Has(id.number()); \ } \ \ template \ inline void ClearExtension( \ - const ::google::protobuf::internal::ExtensionIdentifier< \ - CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id) { \ + const ::PROTOBUF_NAMESPACE_ID::internal::ExtensionIdentifier< \ + CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id) { \ _extensions_.ClearExtension(id.number()); \ } \ \ template \ inline int ExtensionSize( \ - const ::google::protobuf::internal::ExtensionIdentifier< \ - CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id) const { \ + const ::PROTOBUF_NAMESPACE_ID::internal::ExtensionIdentifier< \ + CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id) const { \ return _extensions_.ExtensionSize(id.number()); \ } \ \ /* Singular accessors */ \ template \ inline typename _proto_TypeTraits::Singular::ConstType GetExtension( \ - const ::google::protobuf::internal::ExtensionIdentifier< \ - CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id) const { \ + const ::PROTOBUF_NAMESPACE_ID::internal::ExtensionIdentifier< \ + CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id) const { \ return _proto_TypeTraits::Get(id.number(), _extensions_, \ id.default_value()); \ } \ \ template \ inline typename _proto_TypeTraits::Singular::MutableType MutableExtension( \ - const ::google::protobuf::internal::ExtensionIdentifier< \ - CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id) { \ + const ::PROTOBUF_NAMESPACE_ID::internal::ExtensionIdentifier< \ + CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id) { \ return _proto_TypeTraits::Mutable(id.number(), _field_type, \ &_extensions_); \ } \ \ template \ inline void SetExtension( \ - const ::google::protobuf::internal::ExtensionIdentifier< \ - CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id, \ + const ::PROTOBUF_NAMESPACE_ID::internal::ExtensionIdentifier< \ + CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id, \ typename _proto_TypeTraits::Singular::ConstType value) { \ _proto_TypeTraits::Set(id.number(), _field_type, value, &_extensions_); \ } \ \ template \ inline void SetAllocatedExtension( \ - const ::google::protobuf::internal::ExtensionIdentifier< \ - CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id, \ + const ::PROTOBUF_NAMESPACE_ID::internal::ExtensionIdentifier< \ + CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id, \ typename _proto_TypeTraits::Singular::MutableType value) { \ - _proto_TypeTraits::SetAllocated(id.number(), _field_type, \ - value, &_extensions_); \ + _proto_TypeTraits::SetAllocated(id.number(), _field_type, value, \ + &_extensions_); \ } \ template \ inline void UnsafeArenaSetAllocatedExtension( \ - const ::google::protobuf::internal::ExtensionIdentifier< \ - CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id, \ + const ::PROTOBUF_NAMESPACE_ID::internal::ExtensionIdentifier< \ + CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id, \ typename _proto_TypeTraits::Singular::MutableType value) { \ _proto_TypeTraits::UnsafeArenaSetAllocated(id.number(), _field_type, \ value, &_extensions_); \ } \ template \ inline typename _proto_TypeTraits::Singular::MutableType ReleaseExtension( \ - const ::google::protobuf::internal::ExtensionIdentifier< \ - CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id) { \ + const ::PROTOBUF_NAMESPACE_ID::internal::ExtensionIdentifier< \ + CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id) { \ return _proto_TypeTraits::Release(id.number(), _field_type, \ &_extensions_); \ } \ template \ inline typename _proto_TypeTraits::Singular::MutableType \ - UnsafeArenaReleaseExtension( \ - const ::google::protobuf::internal::ExtensionIdentifier< \ - CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id) { \ + UnsafeArenaReleaseExtension( \ + const ::PROTOBUF_NAMESPACE_ID::internal::ExtensionIdentifier< \ + CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id) { \ return _proto_TypeTraits::UnsafeArenaRelease(id.number(), _field_type, \ &_extensions_); \ } \ \ /* Repeated accessors */ \ template \ inline typename _proto_TypeTraits::Repeated::ConstType GetExtension( \ - const ::google::protobuf::internal::ExtensionIdentifier< \ - CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id, \ + const ::PROTOBUF_NAMESPACE_ID::internal::ExtensionIdentifier< \ + CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id, \ int index) const { \ return _proto_TypeTraits::Get(id.number(), _extensions_, index); \ } \ \ template \ inline typename _proto_TypeTraits::Repeated::MutableType MutableExtension( \ - const ::google::protobuf::internal::ExtensionIdentifier< \ - CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id, \ + const ::PROTOBUF_NAMESPACE_ID::internal::ExtensionIdentifier< \ + CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id, \ int index) { \ return _proto_TypeTraits::Mutable(id.number(), index, &_extensions_); \ } \ \ template \ inline void SetExtension( \ - const ::google::protobuf::internal::ExtensionIdentifier< \ - CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id, \ + const ::PROTOBUF_NAMESPACE_ID::internal::ExtensionIdentifier< \ + CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id, \ int index, typename _proto_TypeTraits::Repeated::ConstType value) { \ _proto_TypeTraits::Set(id.number(), index, value, &_extensions_); \ } \ \ template \ inline typename _proto_TypeTraits::Repeated::MutableType AddExtension( \ - const ::google::protobuf::internal::ExtensionIdentifier< \ - CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id) { \ + const ::PROTOBUF_NAMESPACE_ID::internal::ExtensionIdentifier< \ + CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id) { \ return _proto_TypeTraits::Add(id.number(), _field_type, &_extensions_); \ } \ \ template \ inline void AddExtension( \ - const ::google::protobuf::internal::ExtensionIdentifier< \ - CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id, \ + const ::PROTOBUF_NAMESPACE_ID::internal::ExtensionIdentifier< \ + CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id, \ typename _proto_TypeTraits::Repeated::ConstType value) { \ - _proto_TypeTraits::Add(id.number(), _field_type, _is_packed, \ - value, &_extensions_); \ + _proto_TypeTraits::Add(id.number(), _field_type, _is_packed, value, \ + &_extensions_); \ } \ \ template \ inline const typename _proto_TypeTraits::Repeated::RepeatedFieldType& \ - GetRepeatedExtension( \ - const ::google::protobuf::internal::ExtensionIdentifier< \ - CLASSNAME, _proto_TypeTraits, _field_type, \ - _is_packed>& id) const { \ + GetRepeatedExtension( \ + const ::PROTOBUF_NAMESPACE_ID::internal::ExtensionIdentifier< \ + CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id) const { \ return _proto_TypeTraits::GetRepeated(id.number(), _extensions_); \ } \ \ template \ inline typename _proto_TypeTraits::Repeated::RepeatedFieldType* \ - MutableRepeatedExtension( \ - const ::google::protobuf::internal::ExtensionIdentifier< \ - CLASSNAME, _proto_TypeTraits, _field_type, \ - _is_packed>& id) { \ + MutableRepeatedExtension( \ + const ::PROTOBUF_NAMESPACE_ID::internal::ExtensionIdentifier< \ + CLASSNAME, _proto_TypeTraits, _field_type, _is_packed>& id) { \ return _proto_TypeTraits::MutableRepeated(id.number(), _field_type, \ _is_packed, &_extensions_); \ } } // namespace internal -} // namespace protobuf +// Call this function to ensure that this extensions's reflection is linked into +// the binary: +// +// google::protobuf::LinkExtensionReflection(Foo::my_extension); +// +// This will ensure that the following lookup will succeed: +// +// DescriptorPool::generated_pool()->FindExtensionByName("Foo.my_extension"); +// +// This is often relevant for parsing extensions in text mode. +// +// As a side-effect, it will also guarantee that anything else from the same +// .proto file will also be available for lookup in the generated pool. +// +// This function does not actually register the extension, so it does not need +// to be called before the lookup. However it does need to occur in a function +// that cannot be stripped from the binary (ie. it must be reachable from main). +// +// Best practice is to call this function as close as possible to where the +// reflection is actually needed. This function is very cheap to call, so you +// should not need to worry about its runtime overhead except in tight loops (on +// x86-64 it compiles into two "mov" instructions). +template +void LinkExtensionReflection( + const google::protobuf::internal::ExtensionIdentifier< + ExtendeeType, TypeTraitsType, field_type, is_packed>& extension) { + internal::StrongReference(extension); +} + +} // namespace protobuf } // namespace google + +#include + #endif // GOOGLE_PROTOBUF_EXTENSION_SET_H__ diff --git a/third_party/protobuf-lite/google/protobuf/extension_set_inl.h b/third_party/protobuf-lite/google/protobuf/extension_set_inl.h new file mode 100644 index 00000000..074784b9 --- /dev/null +++ b/third_party/protobuf-lite/google/protobuf/extension_set_inl.h @@ -0,0 +1,276 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef GOOGLE_PROTOBUF_EXTENSION_SET_INL_H__ +#define GOOGLE_PROTOBUF_EXTENSION_SET_INL_H__ + +#include +#include +#include + +namespace google { +namespace protobuf { +namespace internal { + +template +const char* ExtensionSet::ParseFieldWithExtensionInfo( + int number, bool was_packed_on_wire, const ExtensionInfo& extension, + InternalMetadata* metadata, const char* ptr, internal::ParseContext* ctx) { + if (was_packed_on_wire) { + switch (extension.type) { +#define HANDLE_TYPE(UPPERCASE, CPP_CAMELCASE) \ + case WireFormatLite::TYPE_##UPPERCASE: \ + return internal::Packed##CPP_CAMELCASE##Parser( \ + MutableRawRepeatedField(number, extension.type, extension.is_packed, \ + extension.descriptor), \ + ptr, ctx); + HANDLE_TYPE(INT32, Int32); + HANDLE_TYPE(INT64, Int64); + HANDLE_TYPE(UINT32, UInt32); + HANDLE_TYPE(UINT64, UInt64); + HANDLE_TYPE(SINT32, SInt32); + HANDLE_TYPE(SINT64, SInt64); + HANDLE_TYPE(FIXED32, Fixed32); + HANDLE_TYPE(FIXED64, Fixed64); + HANDLE_TYPE(SFIXED32, SFixed32); + HANDLE_TYPE(SFIXED64, SFixed64); + HANDLE_TYPE(FLOAT, Float); + HANDLE_TYPE(DOUBLE, Double); + HANDLE_TYPE(BOOL, Bool); +#undef HANDLE_TYPE + + case WireFormatLite::TYPE_ENUM: + return internal::PackedEnumParserArg( + MutableRawRepeatedField(number, extension.type, extension.is_packed, + extension.descriptor), + ptr, ctx, extension.enum_validity_check.func, + extension.enum_validity_check.arg, metadata, number); + case WireFormatLite::TYPE_STRING: + case WireFormatLite::TYPE_BYTES: + case WireFormatLite::TYPE_GROUP: + case WireFormatLite::TYPE_MESSAGE: + GOOGLE_LOG(FATAL) << "Non-primitive types can't be packed."; + break; + } + } else { + switch (extension.type) { +#define HANDLE_VARINT_TYPE(UPPERCASE, CPP_CAMELCASE) \ + case WireFormatLite::TYPE_##UPPERCASE: { \ + uint64 value; \ + ptr = VarintParse(ptr, &value); \ + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); \ + if (extension.is_repeated) { \ + Add##CPP_CAMELCASE(number, WireFormatLite::TYPE_##UPPERCASE, \ + extension.is_packed, value, extension.descriptor); \ + } else { \ + Set##CPP_CAMELCASE(number, WireFormatLite::TYPE_##UPPERCASE, value, \ + extension.descriptor); \ + } \ + } break + + HANDLE_VARINT_TYPE(INT32, Int32); + HANDLE_VARINT_TYPE(INT64, Int64); + HANDLE_VARINT_TYPE(UINT32, UInt32); + HANDLE_VARINT_TYPE(UINT64, UInt64); + HANDLE_VARINT_TYPE(BOOL, Bool); +#undef HANDLE_VARINT_TYPE +#define HANDLE_SVARINT_TYPE(UPPERCASE, CPP_CAMELCASE, SIZE) \ + case WireFormatLite::TYPE_##UPPERCASE: { \ + uint64 val; \ + ptr = VarintParse(ptr, &val); \ + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); \ + auto value = WireFormatLite::ZigZagDecode##SIZE(val); \ + if (extension.is_repeated) { \ + Add##CPP_CAMELCASE(number, WireFormatLite::TYPE_##UPPERCASE, \ + extension.is_packed, value, extension.descriptor); \ + } else { \ + Set##CPP_CAMELCASE(number, WireFormatLite::TYPE_##UPPERCASE, value, \ + extension.descriptor); \ + } \ + } break + + HANDLE_SVARINT_TYPE(SINT32, Int32, 32); + HANDLE_SVARINT_TYPE(SINT64, Int64, 64); +#undef HANDLE_SVARINT_TYPE +#define HANDLE_FIXED_TYPE(UPPERCASE, CPP_CAMELCASE, CPPTYPE) \ + case WireFormatLite::TYPE_##UPPERCASE: { \ + auto value = UnalignedLoad(ptr); \ + ptr += sizeof(CPPTYPE); \ + if (extension.is_repeated) { \ + Add##CPP_CAMELCASE(number, WireFormatLite::TYPE_##UPPERCASE, \ + extension.is_packed, value, extension.descriptor); \ + } else { \ + Set##CPP_CAMELCASE(number, WireFormatLite::TYPE_##UPPERCASE, value, \ + extension.descriptor); \ + } \ + } break + + HANDLE_FIXED_TYPE(FIXED32, UInt32, uint32); + HANDLE_FIXED_TYPE(FIXED64, UInt64, uint64); + HANDLE_FIXED_TYPE(SFIXED32, Int32, int32); + HANDLE_FIXED_TYPE(SFIXED64, Int64, int64); + HANDLE_FIXED_TYPE(FLOAT, Float, float); + HANDLE_FIXED_TYPE(DOUBLE, Double, double); +#undef HANDLE_FIXED_TYPE + + case WireFormatLite::TYPE_ENUM: { + uint64 val; + ptr = VarintParse(ptr, &val); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); + int value = val; + + if (!extension.enum_validity_check.func( + extension.enum_validity_check.arg, value)) { + WriteVarint(number, val, metadata->mutable_unknown_fields()); + } else if (extension.is_repeated) { + AddEnum(number, WireFormatLite::TYPE_ENUM, extension.is_packed, value, + extension.descriptor); + } else { + SetEnum(number, WireFormatLite::TYPE_ENUM, value, + extension.descriptor); + } + break; + } + + case WireFormatLite::TYPE_BYTES: + case WireFormatLite::TYPE_STRING: { + std::string* value = + extension.is_repeated + ? AddString(number, WireFormatLite::TYPE_STRING, + extension.descriptor) + : MutableString(number, WireFormatLite::TYPE_STRING, + extension.descriptor); + int size = ReadSize(&ptr); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); + return ctx->ReadString(ptr, size, value); + } + + case WireFormatLite::TYPE_GROUP: { + MessageLite* value = + extension.is_repeated + ? AddMessage(number, WireFormatLite::TYPE_GROUP, + *extension.message_info.prototype, + extension.descriptor) + : MutableMessage(number, WireFormatLite::TYPE_GROUP, + *extension.message_info.prototype, + extension.descriptor); + uint32 tag = (number << 3) + WireFormatLite::WIRETYPE_START_GROUP; + return ctx->ParseGroup(value, ptr, tag); + } + + case WireFormatLite::TYPE_MESSAGE: { + MessageLite* value = + extension.is_repeated + ? AddMessage(number, WireFormatLite::TYPE_MESSAGE, + *extension.message_info.prototype, + extension.descriptor) + : MutableMessage(number, WireFormatLite::TYPE_MESSAGE, + *extension.message_info.prototype, + extension.descriptor); + return ctx->ParseMessage(value, ptr); + } + } + } + return ptr; +} + +template +const char* ExtensionSet::ParseMessageSetItemTmpl( + const char* ptr, const Msg* containing_type, + internal::InternalMetadata* metadata, internal::ParseContext* ctx) { + std::string payload; + uint32 type_id = 0; + bool payload_read = false; + while (!ctx->Done(&ptr)) { + uint32 tag = static_cast(*ptr++); + if (tag == WireFormatLite::kMessageSetTypeIdTag) { + uint64 tmp; + ptr = ParseBigVarint(ptr, &tmp); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); + type_id = tmp; + if (payload_read) { + ExtensionInfo extension; + bool was_packed_on_wire; + if (!FindExtension(2, type_id, containing_type, ctx, &extension, + &was_packed_on_wire)) { + WriteLengthDelimited(type_id, payload, + metadata->mutable_unknown_fields()); + } else { + MessageLite* value = + extension.is_repeated + ? AddMessage(type_id, WireFormatLite::TYPE_MESSAGE, + *extension.message_info.prototype, + extension.descriptor) + : MutableMessage(type_id, WireFormatLite::TYPE_MESSAGE, + *extension.message_info.prototype, + extension.descriptor); + + const char* p; + // We can't use regular parse from string as we have to track + // proper recursion depth and descriptor pools. + ParseContext tmp_ctx(ctx->depth(), false, &p, payload); + tmp_ctx.data().pool = ctx->data().pool; + tmp_ctx.data().factory = ctx->data().factory; + GOOGLE_PROTOBUF_PARSER_ASSERT(value->_InternalParse(p, &tmp_ctx) && + tmp_ctx.EndedAtLimit()); + } + type_id = 0; + } + } else if (tag == WireFormatLite::kMessageSetMessageTag) { + if (type_id != 0) { + ptr = ParseFieldMaybeLazily(static_cast(type_id) * 8 + 2, ptr, + containing_type, metadata, ctx); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr); + type_id = 0; + } else { + int32 size = ReadSize(&ptr); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); + ptr = ctx->ReadString(ptr, size, &payload); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); + payload_read = true; + } + } else { + ptr = ReadTag(ptr - 1, &tag); + if (tag == 0 || (tag & 7) == 4) { + ctx->SetLastTag(tag); + return ptr; + } + ptr = ParseField(tag, ptr, containing_type, metadata, ctx); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); + } + } + return ptr; +} + +} // namespace internal +} // namespace protobuf +} // namespace google + +#endif // GOOGLE_PROTOBUF_EXTENSION_SET_INL_H__ diff --git a/third_party/protobuf-lite/google/protobuf/generated_enum_reflection.h b/third_party/protobuf-lite/google/protobuf/generated_enum_reflection.h new file mode 100644 index 00000000..5debc0a2 --- /dev/null +++ b/third_party/protobuf-lite/google/protobuf/generated_enum_reflection.h @@ -0,0 +1,98 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: jasonh@google.com (Jason Hsueh) +// +// This header is logically internal, but is made public because it is used +// from protocol-compiler-generated code, which may reside in other components. +// It provides reflection support for generated enums, and is included in +// generated .pb.h files and should have minimal dependencies. The methods are +// implemented in generated_message_reflection.cc. + +#ifndef GOOGLE_PROTOBUF_GENERATED_ENUM_REFLECTION_H__ +#define GOOGLE_PROTOBUF_GENERATED_ENUM_REFLECTION_H__ + +#include + +#include +#include +#include + +#ifdef SWIG +#error "You cannot SWIG proto headers" +#endif + +#include + +namespace google { +namespace protobuf { +class EnumDescriptor; +} // namespace protobuf +} // namespace google + +namespace google { +namespace protobuf { + +// Returns the EnumDescriptor for enum type E, which must be a +// proto-declared enum type. Code generated by the protocol compiler +// will include specializations of this template for each enum type declared. +template +const EnumDescriptor* GetEnumDescriptor(); + +namespace internal { + +// Helper for EnumType_Parse functions: try to parse the string 'name' as +// an enum name of the given type, returning true and filling in value on +// success, or returning false and leaving value unchanged on failure. +PROTOBUF_EXPORT bool ParseNamedEnum(const EnumDescriptor* descriptor, + ConstStringParam name, int* value); + +template +bool ParseNamedEnum(const EnumDescriptor* descriptor, ConstStringParam name, + EnumType* value) { + int tmp; + if (!ParseNamedEnum(descriptor, name, &tmp)) return false; + *value = static_cast(tmp); + return true; +} + +// Just a wrapper around printing the name of a value. The main point of this +// function is not to be inlined, so that you can do this without including +// descriptor.h. +PROTOBUF_EXPORT const std::string& NameOfEnum(const EnumDescriptor* descriptor, + int value); + +} // namespace internal +} // namespace protobuf +} // namespace google + +#include + +#endif // GOOGLE_PROTOBUF_GENERATED_ENUM_REFLECTION_H__ diff --git a/third_party/protobuf-lite/google/protobuf/generated_enum_util.h b/third_party/protobuf-lite/google/protobuf/generated_enum_util.h index 96b03cc9..f1002e2d 100644 --- a/third_party/protobuf-lite/google/protobuf/generated_enum_util.h +++ b/third_party/protobuf-lite/google/protobuf/generated_enum_util.h @@ -33,14 +33,51 @@ #include +#include +#include + +#include + +#ifdef SWIG +#error "You cannot SWIG proto headers" +#endif + namespace google { namespace protobuf { // This type trait can be used to cause templates to only match proto2 enum // types. -template struct is_proto_enum : ::std::false_type {}; +template +struct is_proto_enum : ::std::false_type {}; -} // namespace protobuf +namespace internal { + +// The table entry format for storing enum name-to-value mapping used with lite +// protos. This struct and the following related functions should only be used +// by protobuf generated code. +struct EnumEntry { + StringPiece name; + int value; +}; + +// Looks up a numeric enum value given the string name. +PROTOBUF_EXPORT bool LookUpEnumValue(const EnumEntry* enums, size_t size, + StringPiece name, int* value); +// Looks up an enum name given the numeric value. +PROTOBUF_EXPORT int LookUpEnumName(const EnumEntry* enums, + const int* sorted_indices, size_t size, + int value); + +// Initializes the list of enum names in std::string form. +PROTOBUF_EXPORT bool InitializeEnumStrings( + const EnumEntry* enums, const int* sorted_indices, size_t size, + internal::ExplicitlyConstructed* enum_strings); + +} // namespace internal +} // namespace protobuf } // namespace google + +#include + #endif // GOOGLE_PROTOBUF_GENERATED_ENUM_UTIL_H__ diff --git a/third_party/protobuf-lite/google/protobuf/generated_message_table_driven.h b/third_party/protobuf-lite/google/protobuf/generated_message_table_driven.h index 10ca3aaa..731d6c52 100644 --- a/third_party/protobuf-lite/google/protobuf/generated_message_table_driven.h +++ b/third_party/protobuf-lite/google/protobuf/generated_message_table_driven.h @@ -36,7 +36,6 @@ #include #include #include -#include // We require C++11 and Clang to use constexpr for variables, as GCC 4.8 // requires constexpr to be consistent between declarations of variables @@ -48,6 +47,12 @@ #define PROTOBUF_CONSTEXPR_VAR #endif // !_clang +#ifdef SWIG +#error "You cannot SWIG proto headers" +#endif + +#include + namespace google { namespace protobuf { namespace internal { @@ -64,20 +69,55 @@ static constexpr const unsigned char kNotPackedMask = 0x10; static constexpr const unsigned char kInvalidMask = 0x20; enum ProcessingTypes { - TYPE_STRING_INLINED = 23, - TYPE_BYTES_INLINED = 24, - TYPE_MAP = 25, + TYPE_STRING_CORD = 19, + TYPE_STRING_STRING_PIECE = 20, + TYPE_BYTES_CORD = 21, + TYPE_BYTES_STRING_PIECE = 22, + TYPE_MAP = 23, }; static_assert(TYPE_MAP < kRepeatedMask, "Invalid enum"); +struct PROTOBUF_EXPORT FieldMetadata { + uint32 offset; // offset of this field in the struct + uint32 tag; // field * 8 + wire_type + // byte offset * 8 + bit_offset; + // if the high bit is set then this is the byte offset of the oneof_case + // for this field. + uint32 has_offset; + uint32 type; // the type of this field. + const void* ptr; // auxiliary data + + // From the serializer point of view each fundamental type can occur in + // 4 different ways. For simplicity we treat all combinations as a cartesion + // product although not all combinations are allowed. + enum FieldTypeClass { + kPresence, + kNoPresence, + kRepeated, + kPacked, + kOneOf, + kNumTypeClasses // must be last enum + }; + // C++ protobuf has 20 fundamental types, were we added Cord and StringPiece + // and also distinguish the same types if they have different wire format. + enum { + kCordType = 19, + kStringPieceType = 20, + kNumTypes = 20, + kSpecial = kNumTypes * kNumTypeClasses, + }; + + static int CalculateType(int fundamental_type, FieldTypeClass type_class); +}; + // TODO(ckennelly): Add a static assertion to ensure that these masks do not // conflict with wiretypes. // ParseTableField is kept small to help simplify instructions for computing // offsets, as we will always need this information to parse a field. // Additional data, needed for some types, is stored in -// AuxillaryParseTableField. +// AuxiliaryParseTableField. struct ParseTableField { uint32 offset; // The presence_index ordinarily represents a has_bit index, but for fields @@ -95,7 +135,7 @@ struct ParseTableField { struct ParseTable; -union AuxillaryParseTableField { +union AuxiliaryParseTableField { typedef bool (*EnumValidator)(int); // Enums @@ -126,20 +166,20 @@ union AuxillaryParseTableField { }; map_aux maps; - AuxillaryParseTableField() = default; - constexpr AuxillaryParseTableField(AuxillaryParseTableField::enum_aux e) + AuxiliaryParseTableField() = default; + constexpr AuxiliaryParseTableField(AuxiliaryParseTableField::enum_aux e) : enums(e) {} - constexpr AuxillaryParseTableField(AuxillaryParseTableField::message_aux m) + constexpr AuxiliaryParseTableField(AuxiliaryParseTableField::message_aux m) : messages(m) {} - constexpr AuxillaryParseTableField(AuxillaryParseTableField::string_aux s) + constexpr AuxiliaryParseTableField(AuxiliaryParseTableField::string_aux s) : strings(s) {} - constexpr AuxillaryParseTableField(AuxillaryParseTableField::map_aux m) + constexpr AuxiliaryParseTableField(AuxiliaryParseTableField::map_aux m) : maps(m) {} }; struct ParseTable { const ParseTableField* fields; - const AuxillaryParseTableField* aux; + const AuxiliaryParseTableField* aux; int max_field_number; // TODO(ckennelly): Do something with this padding. @@ -164,37 +204,133 @@ static_assert(sizeof(ParseTableField) <= 16, "ParseTableField is too large"); // The tables must be composed of POD components to ensure link-time // initialization. static_assert(std::is_pod::value, ""); -static_assert(std::is_pod::value, ""); -static_assert(std::is_pod::value, ""); -static_assert(std::is_pod::value, ""); +static_assert(std::is_pod::value, ""); +static_assert(std::is_pod::value, ""); +static_assert(std::is_pod::value, ""); +static_assert(std::is_pod::value, ""); static_assert(std::is_pod::value, ""); -#ifndef __NVCC__ // This assertion currently fails under NVCC. -static_assert(std::is_pod::value, ""); -#endif - // TODO(ckennelly): Consolidate these implementations into a single one, using // dynamic dispatch to the appropriate unknown field handler. bool MergePartialFromCodedStream(MessageLite* msg, const ParseTable& table, io::CodedInputStream* input); bool MergePartialFromCodedStreamLite(MessageLite* msg, const ParseTable& table, - io::CodedInputStream* input); + io::CodedInputStream* input); template bool ParseMap(io::CodedInputStream* input, void* map_field) { typedef typename MapEntryToMapField::MapFieldType MapFieldType; - typedef google::protobuf::Map + typedef Map MapType; typedef typename Entry::template Parser ParserType; ParserType parser(static_cast(map_field)); - return ::google::protobuf::internal::WireFormatLite::ReadMessageNoVirtual(input, - &parser); + return WireFormatLite::ReadMessageNoVirtual(input, &parser); +} + +struct SerializationTable { + int num_fields; + const FieldMetadata* field_table; +}; + +PROTOBUF_EXPORT void SerializeInternal(const uint8* base, + const FieldMetadata* table, + int32 num_fields, + io::CodedOutputStream* output); + +inline void TableSerialize(const MessageLite& msg, + const SerializationTable* table, + io::CodedOutputStream* output) { + const FieldMetadata* field_table = table->field_table; + int num_fields = table->num_fields - 1; + const uint8* base = reinterpret_cast(&msg); + // TODO(gerbens) This skips the first test if we could use the fast + // array serialization path, we should make this + // int cached_size = + // *reinterpret_cast(base + field_table->offset); + // SerializeWithCachedSize(msg, field_table + 1, num_fields, cached_size, ...) + // But we keep conformance with the old way for now. + SerializeInternal(base, field_table + 1, num_fields, output); +} + +uint8* SerializeInternalToArray(const uint8* base, const FieldMetadata* table, + int32 num_fields, bool is_deterministic, + uint8* buffer); + +inline uint8* TableSerializeToArray(const MessageLite& msg, + const SerializationTable* table, + bool is_deterministic, uint8* buffer) { + const uint8* base = reinterpret_cast(&msg); + const FieldMetadata* field_table = table->field_table + 1; + int num_fields = table->num_fields - 1; + return SerializeInternalToArray(base, field_table, num_fields, + is_deterministic, buffer); +} + +template +struct CompareHelper { + bool operator()(const T& a, const T& b) const { return a < b; } +}; + +template <> +struct CompareHelper { + bool operator()(const ArenaStringPtr& a, const ArenaStringPtr& b) const { + return a.Get() < b.Get(); + } +}; + +struct CompareMapKey { + template + bool operator()(const MapEntryHelper& a, + const MapEntryHelper& b) const { + return Compare(a.key_, b.key_); + } + template + bool Compare(const T& a, const T& b) const { + return CompareHelper()(a, b); + } +}; + +template +void MapFieldSerializer(const uint8* base, uint32 offset, uint32 tag, + uint32 has_offset, io::CodedOutputStream* output) { + typedef MapEntryHelper Entry; + typedef typename MapFieldType::MapType::const_iterator Iter; + + const MapFieldType& map_field = + *reinterpret_cast(base + offset); + const SerializationTable* t = + table + + has_offset; // has_offset is overloaded for maps to mean table offset + if (!output->IsSerializationDeterministic()) { + for (Iter it = map_field.GetMap().begin(); it != map_field.GetMap().end(); + ++it) { + Entry map_entry(*it); + output->WriteVarint32(tag); + output->WriteVarint32(map_entry._cached_size_); + SerializeInternal(reinterpret_cast(&map_entry), + t->field_table, t->num_fields, output); + } + } else { + std::vector v; + for (Iter it = map_field.GetMap().begin(); it != map_field.GetMap().end(); + ++it) { + v.push_back(Entry(*it)); + } + std::sort(v.begin(), v.end(), CompareMapKey()); + for (int i = 0; i < v.size(); i++) { + output->WriteVarint32(tag); + output->WriteVarint32(v[i]._cached_size_); + SerializeInternal(reinterpret_cast(&v[i]), t->field_table, + t->num_fields, output); + } + } } } // namespace internal } // namespace protobuf - } // namespace google + +#include + #endif // GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_DRIVEN_H__ diff --git a/third_party/protobuf-lite/google/protobuf/generated_message_table_driven_lite.h b/third_party/protobuf-lite/google/protobuf/generated_message_table_driven_lite.h index 0d90fe33..3c65acdf 100644 --- a/third_party/protobuf-lite/google/protobuf/generated_message_table_driven_lite.h +++ b/third_party/protobuf-lite/google/protobuf/generated_message_table_driven_lite.h @@ -31,19 +31,17 @@ #ifndef GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_DRIVEN_LITE_H__ #define GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_DRIVEN_LITE_H__ -#include - #include #include +#include #include -#include -#include #include #include -#include #include +#include + namespace google { namespace protobuf { namespace internal { @@ -51,7 +49,6 @@ namespace internal { enum StringType { StringType_STRING = 0, - StringType_INLINED = 3 }; // Logically a superset of StringType, consisting of all field types that @@ -60,8 +57,7 @@ enum ProcessingType { ProcessingType_STRING = 0, ProcessingType_CORD = 1, ProcessingType_STRING_PIECE = 2, - ProcessingType_INLINED = 3, - ProcessingType_MESSAGE = 4, + ProcessingType_MESSAGE = 3, }; enum Cardinality { @@ -81,15 +77,6 @@ inline const Type* Raw(const MessageLite* msg, int64 offset) { offset); } -template -inline Arena* GetArena(MessageLite* msg, int64 arena_offset) { - if (GOOGLE_PREDICT_FALSE(arena_offset == -1)) { - return NULL; - } - - return Raw(msg, arena_offset)->arena(); -} - inline ExtensionSet* GetExtensionSet(MessageLite* msg, int64 extension_offset) { if (extension_offset == -1) { return NULL; @@ -100,19 +87,17 @@ inline ExtensionSet* GetExtensionSet(MessageLite* msg, int64 extension_offset) { template inline Type* AddField(MessageLite* msg, int64 offset) { - static_assert(std::is_pod::value || - std::is_same::value, + static_assert(std::is_pod::value, "Do not assign"); - google::protobuf::RepeatedField* repeated = - Raw >(msg, offset); + RepeatedField* repeated = Raw>(msg, offset); return repeated->Add(); } template <> -inline string* AddField(MessageLite* msg, int64 offset) { - google::protobuf::RepeatedPtrField* repeated = - Raw >(msg, offset); +inline std::string* AddField(MessageLite* msg, int64 offset) { + RepeatedPtrField* repeated = + Raw>(msg, offset); return repeated->Add(); } @@ -157,7 +142,7 @@ inline void SetOneofField(MessageLite* msg, uint32* oneof_case, // Clears a oneof field. The field argument should correspond to the particular // field that is currently set in the oneof. inline void ClearOneofField(const ParseTableField& field, Arena* arena, - MessageLite* msg) { + MessageLite* msg) { switch (field.processing_type & kTypeMask) { case WireFormatLite::TYPE_MESSAGE: if (arena == NULL) { @@ -168,12 +153,7 @@ inline void ClearOneofField(const ParseTableField& field, Arena* arena, case WireFormatLite::TYPE_STRING: case WireFormatLite::TYPE_BYTES: Raw(msg, field.offset) - ->Destroy(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), arena); - break; - - case TYPE_STRING_INLINED: - case TYPE_BYTES_INLINED: - Raw(msg, field.offset)->DestroyNoArena(NULL); + ->Destroy(ArenaStringPtr::EmptyDefault{}, arena); break; default: @@ -206,11 +186,7 @@ inline void ResetOneofField(const ParseTable& table, int field_number, switch (field_type) { case ProcessingType_STRING: Raw(msg, offset) - ->UnsafeSetDefault(static_cast(default_ptr)); - break; - case ProcessingType_INLINED: - new (Raw(msg, offset)) - InlinedStringField(*static_cast(default_ptr)); + ->UnsafeSetDefault(static_cast(default_ptr)); break; case ProcessingType_MESSAGE: MessageLite** submessage = Raw(msg, offset); @@ -221,103 +197,85 @@ inline void ResetOneofField(const ParseTable& table, int field_number, } } -template +template static inline bool HandleString(io::CodedInputStream* input, MessageLite* msg, Arena* arena, uint32* has_bits, uint32 has_bit_index, int64 offset, const void* default_ptr, const char* field_name) { + StringPiece utf8_string_data; #ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED - const char* sdata; - size_t size; -#endif + constexpr bool kValidateUtf8 = is_string_type; +#else + constexpr bool kValidateUtf8 = false; +#endif // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED switch (ctype) { - case StringType_INLINED: { - InlinedStringField* s; - switch (cardinality) { - case Cardinality_SINGULAR: - // TODO(ckennelly): Is this optimal? - s = MutableField( - msg, has_bits, has_bit_index, offset); - break; - case Cardinality_REPEATED: - s = AddField(msg, offset); - break; - case Cardinality_ONEOF: - s = Raw(msg, offset); - break; - } - GOOGLE_DCHECK(s != nullptr); - ::std::string* value = s->MutableNoArena(NULL); - - if (GOOGLE_PREDICT_FALSE(!WireFormatLite::ReadString(input, value))) { - return false; - } - -#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED - sdata = value->data(); - size = value->size(); -#endif - break; - } case StringType_STRING: { - string* value; switch (cardinality) { - case Cardinality_SINGULAR: - // TODO(ckennelly): Is this optimal? - value = - MutableField(msg, has_bits, has_bit_index, offset) - ->Mutable(static_cast(default_ptr), arena); - break; - case Cardinality_REPEATED: - value = AddField(msg, offset); - break; - case Cardinality_ONEOF: - value = Raw(msg, offset) - ->Mutable(static_cast(default_ptr), arena); - break; - } - GOOGLE_DCHECK(value != nullptr); - - if (GOOGLE_PREDICT_FALSE(!WireFormatLite::ReadString(input, value))) { - return false; + case Cardinality_SINGULAR: { + ArenaStringPtr* field = MutableField( + msg, has_bits, has_bit_index, offset); + std::string* value = field->MutableNoCopy( + static_cast(default_ptr), arena); + if (PROTOBUF_PREDICT_FALSE( + !WireFormatLite::ReadString(input, value))) { + return false; + } + utf8_string_data = field->Get(); + } break; + case Cardinality_REPEATED: { + std::string* value = AddField(msg, offset); + if (PROTOBUF_PREDICT_FALSE( + !WireFormatLite::ReadString(input, value))) { + return false; + } + utf8_string_data = *value; + } break; + case Cardinality_ONEOF: { + ArenaStringPtr* field = Raw(msg, offset); + std::string* value = field->MutableNoCopy( + static_cast(default_ptr), arena); + if (PROTOBUF_PREDICT_FALSE( + !WireFormatLite::ReadString(input, value))) { + return false; + } + utf8_string_data = field->Get(); + } break; + default: + PROTOBUF_ASSUME(false); } - -#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED - sdata = value->data(); - size = value->size(); -#endif break; } + default: + PROTOBUF_ASSUME(false); } -#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED - if (validate) { - WireFormatLite::VerifyUtf8String(sdata, size, WireFormatLite::PARSE, - field_name); + if (kValidateUtf8) { + // TODO(b/118759213): fail if proto3 + WireFormatLite::VerifyUtf8String(utf8_string_data.data(), + utf8_string_data.length(), + WireFormatLite::PARSE, field_name); } -#endif - return true; } -template +template inline bool HandleEnum(const ParseTable& table, io::CodedInputStream* input, MessageLite* msg, uint32* presence, uint32 presence_index, int64 offset, uint32 tag, int field_number) { int value; - if (GOOGLE_PREDICT_FALSE( + if (PROTOBUF_PREDICT_FALSE( (!WireFormatLite::ReadPrimitive( input, &value)))) { return false; } - AuxillaryParseTableField::EnumValidator validator = + AuxiliaryParseTableField::EnumValidator validator = table.aux[field_number].enums.validator; - if (validator(value)) { + if (validator == nullptr || validator(value)) { switch (cardinality) { case Cardinality_SINGULAR: SetField(msg, presence, presence_index, offset, value); @@ -326,12 +284,13 @@ inline bool HandleEnum(const ParseTable& table, io::CodedInputStream* input, AddField(msg, offset, value); break; case Cardinality_ONEOF: - ClearOneofField(table.fields[presence[presence_index]], - GetArena(msg, table.arena_offset), + ClearOneofField(table.fields[presence[presence_index]], msg->GetArena(), msg); SetOneofField(msg, presence, presence_index, offset, field_number, value); break; + default: + PROTOBUF_ASSUME(false); } } else { UnknownFieldHandler::Varint(msg, table, tag, value); @@ -370,9 +329,10 @@ class MergePartialFromCodedStreamHelper { } }; -template -bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, - io::CodedInputStream* input) { +template +bool MergePartialFromCodedStreamInlined(MessageLite* msg, + const ParseTable& table, + io::CodedInputStream* input) { // We require that has_bits are present, as to avoid having to check for them // for every field. // @@ -382,20 +342,19 @@ bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, GOOGLE_DCHECK(has_bits != NULL); while (true) { - uint32 tag = input->ReadTag(); - + uint32 tag = input->ReadTagWithCutoffNoLastTag(kMaxTag).first; const WireFormatLite::WireType wire_type = WireFormatLite::GetTagWireType(tag); const int field_number = WireFormatLite::GetTagFieldNumber(tag); - if (field_number > table.max_field_number) { + if (PROTOBUF_PREDICT_FALSE(field_number > table.max_field_number)) { // check for possible extensions if (UnknownFieldHandler::ParseExtension(msg, table, input, tag)) { // successfully parsed continue; } - if (GOOGLE_PREDICT_FALSE( + if (PROTOBUF_PREDICT_FALSE( !UnknownFieldHandler::Skip(msg, table, input, tag))) { return false; } @@ -416,14 +375,11 @@ bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, const unsigned char processing_type = data->processing_type; if (data->normal_wiretype == static_cast(wire_type)) { - // TODO(ckennelly): Use a computed goto on GCC/LLVM or otherwise eliminate - // the bounds check on processing_type. - switch (processing_type) { #define HANDLE_TYPE(TYPE, CPPTYPE) \ case (WireFormatLite::TYPE_##TYPE): { \ CPPTYPE value; \ - if (GOOGLE_PREDICT_FALSE( \ + if (PROTOBUF_PREDICT_FALSE( \ (!WireFormatLite::ReadPrimitive< \ CPPTYPE, WireFormatLite::TYPE_##TYPE>(input, &value)))) { \ return false; \ @@ -432,10 +388,9 @@ bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, break; \ } \ case (WireFormatLite::TYPE_##TYPE) | kRepeatedMask: { \ - google::protobuf::RepeatedField* values = \ - Raw >(msg, offset); \ - if (GOOGLE_PREDICT_FALSE((!WireFormatLite::ReadRepeatedPrimitive< \ - CPPTYPE, WireFormatLite::TYPE_##TYPE>( \ + RepeatedField* values = Raw>(msg, offset); \ + if (PROTOBUF_PREDICT_FALSE((!WireFormatLite::ReadRepeatedPrimitive< \ + CPPTYPE, WireFormatLite::TYPE_##TYPE>( \ data->tag_size, tag, input, values)))) { \ return false; \ } \ @@ -444,13 +399,13 @@ bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, case (WireFormatLite::TYPE_##TYPE) | kOneofMask: { \ uint32* oneof_case = Raw(msg, table.oneof_case_offset); \ CPPTYPE value; \ - if (GOOGLE_PREDICT_FALSE( \ + if (PROTOBUF_PREDICT_FALSE( \ (!WireFormatLite::ReadPrimitive< \ CPPTYPE, WireFormatLite::TYPE_##TYPE>(input, &value)))) { \ return false; \ } \ - ClearOneofField(table.fields[oneof_case[presence_index]], \ - GetArena(msg, table.arena_offset), msg); \ + ClearOneofField(table.fields[oneof_case[presence_index]], msg->GetArena(), \ + msg); \ SetOneofField(msg, oneof_case, presence_index, offset, field_number, \ value); \ break; \ @@ -476,33 +431,16 @@ bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, case WireFormatLite::TYPE_BYTES: #ifndef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED case WireFormatLite::TYPE_STRING: -#endif +#endif // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED { - Arena* const arena = - GetArena(msg, table.arena_offset); + Arena* const arena = msg->GetArena(); const void* default_ptr = table.aux[field_number].strings.default_ptr; - if (GOOGLE_PREDICT_FALSE(( - !HandleString( + if (PROTOBUF_PREDICT_FALSE( + (!HandleString( input, msg, arena, has_bits, presence_index, offset, - default_ptr, NULL)))) { - return false; - } - break; - } - case TYPE_BYTES_INLINED: -#ifndef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED - case TYPE_STRING_INLINED: -#endif - { - Arena* const arena = - GetArena(msg, table.arena_offset); - const void* default_ptr = table.aux[field_number].strings.default_ptr; - - if (GOOGLE_PREDICT_FALSE((!HandleString( - input, msg, arena, has_bits, presence_index, offset, - default_ptr, NULL)))) { + default_ptr, nullptr)))) { return false; } break; @@ -510,10 +448,9 @@ bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, case WireFormatLite::TYPE_BYTES | kOneofMask: #ifndef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED case WireFormatLite::TYPE_STRING | kOneofMask: -#endif +#endif // !GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED { - Arena* const arena = - GetArena(msg, table.arena_offset); + Arena* const arena = msg->GetArena(); uint32* oneof_case = Raw(msg, table.oneof_case_offset); const void* default_ptr = table.aux[field_number].strings.default_ptr; @@ -521,58 +458,55 @@ bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, table, field_number, arena, msg, oneof_case + presence_index, offset, default_ptr); - if (GOOGLE_PREDICT_FALSE( - (!HandleString( - input, msg, arena, has_bits, presence_index, offset, - default_ptr, NULL)))) { + if (PROTOBUF_PREDICT_FALSE( + (!HandleString(input, msg, arena, has_bits, + presence_index, offset, + default_ptr, nullptr)))) { return false; } break; } case (WireFormatLite::TYPE_BYTES) | kRepeatedMask: - case TYPE_BYTES_INLINED | kRepeatedMask: #ifndef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED case (WireFormatLite::TYPE_STRING) | kRepeatedMask: - case TYPE_STRING_INLINED | kRepeatedMask: -#endif +#endif // !GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED { - Arena* const arena = - GetArena(msg, table.arena_offset); - const void* default_ptr = - table.aux[field_number].strings.default_ptr; + Arena* const arena = msg->GetArena(); + const void* default_ptr = table.aux[field_number].strings.default_ptr; - if (GOOGLE_PREDICT_FALSE(( - !HandleString( + if (PROTOBUF_PREDICT_FALSE( + (!HandleString( input, msg, arena, has_bits, presence_index, offset, - default_ptr, NULL)))) { + default_ptr, nullptr)))) { return false; } break; } #ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED case (WireFormatLite::TYPE_STRING): { - Arena* const arena = - GetArena(msg, table.arena_offset); + Arena* const arena = msg->GetArena(); const void* default_ptr = table.aux[field_number].strings.default_ptr; const char* field_name = table.aux[field_number].strings.field_name; - if (GOOGLE_PREDICT_FALSE( - (!HandleString( + if (PROTOBUF_PREDICT_FALSE( + (!HandleString( input, msg, arena, has_bits, presence_index, offset, default_ptr, field_name)))) { return false; } break; } - case TYPE_STRING_INLINED | kRepeatedMask: case (WireFormatLite::TYPE_STRING) | kRepeatedMask: { - Arena* const arena = - GetArena(msg, table.arena_offset); + Arena* const arena = msg->GetArena(); const void* default_ptr = table.aux[field_number].strings.default_ptr; const char* field_name = table.aux[field_number].strings.field_name; - if (GOOGLE_PREDICT_FALSE( - (!HandleString( + if (PROTOBUF_PREDICT_FALSE( + (!HandleString( input, msg, arena, has_bits, presence_index, offset, default_ptr, field_name)))) { return false; @@ -580,8 +514,7 @@ bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, break; } case (WireFormatLite::TYPE_STRING) | kOneofMask: { - Arena* const arena = - GetArena(msg, table.arena_offset); + Arena* const arena = msg->GetArena(); uint32* oneof_case = Raw(msg, table.oneof_case_offset); const void* default_ptr = table.aux[field_number].strings.default_ptr; const char* field_name = table.aux[field_number].strings.field_name; @@ -590,19 +523,19 @@ bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, table, field_number, arena, msg, oneof_case + presence_index, offset, default_ptr); - if (GOOGLE_PREDICT_FALSE( - (!HandleString( + if (PROTOBUF_PREDICT_FALSE( + (!HandleString( input, msg, arena, has_bits, presence_index, offset, default_ptr, field_name)))) { return false; } break; } -#endif +#endif // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED case WireFormatLite::TYPE_ENUM: { - if (GOOGLE_PREDICT_FALSE( - (!HandleEnum( + if (PROTOBUF_PREDICT_FALSE( + (!HandleEnum( table, input, msg, has_bits, presence_index, offset, tag, field_number)))) { return false; @@ -610,9 +543,8 @@ bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, break; } case WireFormatLite::TYPE_ENUM | kRepeatedMask: { - if (GOOGLE_PREDICT_FALSE( - (!HandleEnum( + if (PROTOBUF_PREDICT_FALSE( + (!HandleEnum( table, input, msg, has_bits, presence_index, offset, tag, field_number)))) { return false; @@ -621,11 +553,10 @@ bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, } case WireFormatLite::TYPE_ENUM | kOneofMask: { uint32* oneof_case = Raw(msg, table.oneof_case_offset); - if (GOOGLE_PREDICT_FALSE( - (!HandleEnum(table, input, msg, oneof_case, - presence_index, offset, tag, - field_number)))) { + if (PROTOBUF_PREDICT_FALSE( + (!HandleEnum( + table, input, msg, oneof_case, presence_index, offset, + tag, field_number)))) { return false; } break; @@ -636,15 +567,14 @@ bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, MessageLite* submsg = *submsg_holder; if (submsg == NULL) { - Arena* const arena = - GetArena(msg, table.arena_offset); + Arena* const arena = msg->GetArena(); const MessageLite* prototype = table.aux[field_number].messages.default_message(); submsg = prototype->New(arena); *submsg_holder = submsg; } - if (GOOGLE_PREDICT_FALSE( + if (PROTOBUF_PREDICT_FALSE( !WireFormatLite::ReadGroup(field_number, input, submsg))) { return false; } @@ -660,7 +590,7 @@ bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, MessageLite* submsg = MergePartialFromCodedStreamHelper::Add(field, prototype); - if (GOOGLE_PREDICT_FALSE( + if (PROTOBUF_PREDICT_FALSE( !WireFormatLite::ReadGroup(field_number, input, submsg))) { return false; } @@ -673,19 +603,18 @@ bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, MessageLite* submsg = *submsg_holder; if (submsg == NULL) { - Arena* const arena = - GetArena(msg, table.arena_offset); + Arena* const arena = msg->GetArena(); const MessageLite* prototype = table.aux[field_number].messages.default_message(); if (prototype == NULL) { - prototype = - ::google::protobuf::internal::ImplicitWeakMessage::default_instance(); + prototype = ImplicitWeakMessage::default_instance(); } submsg = prototype->New(arena); *submsg_holder = submsg; } - if (GOOGLE_PREDICT_FALSE(!WireFormatLite::ReadMessage(input, submsg))) { + if (PROTOBUF_PREDICT_FALSE( + !WireFormatLite::ReadMessage(input, submsg))) { return false; } @@ -698,22 +627,21 @@ bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, const MessageLite* prototype = table.aux[field_number].messages.default_message(); if (prototype == NULL) { - prototype = - ::google::protobuf::internal::ImplicitWeakMessage::default_instance(); + prototype = ImplicitWeakMessage::default_instance(); } MessageLite* submsg = MergePartialFromCodedStreamHelper::Add(field, prototype); - if (GOOGLE_PREDICT_FALSE(!WireFormatLite::ReadMessage(input, submsg))) { + if (PROTOBUF_PREDICT_FALSE( + !WireFormatLite::ReadMessage(input, submsg))) { return false; } break; } case WireFormatLite::TYPE_MESSAGE | kOneofMask: { - Arena* const arena = - GetArena(msg, table.arena_offset); + Arena* const arena = msg->GetArena(); uint32* oneof_case = Raw(msg, table.oneof_case_offset); MessageLite** submsg_holder = Raw(msg, offset); ResetOneofField( @@ -721,30 +649,15 @@ bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, offset, NULL); MessageLite* submsg = *submsg_holder; - if (GOOGLE_PREDICT_FALSE(!WireFormatLite::ReadMessage(input, submsg))) { + if (PROTOBUF_PREDICT_FALSE( + !WireFormatLite::ReadMessage(input, submsg))) { return false; } break; } -#ifdef GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED - case TYPE_STRING_INLINED: { - Arena* const arena = - GetArena(msg, table.arena_offset); - const void* default_ptr = table.aux[field_number].strings.default_ptr; - const char* field_name = table.aux[field_number].strings.field_name; - - if (GOOGLE_PREDICT_FALSE(( - !HandleString( - input, msg, arena, has_bits, presence_index, offset, - default_ptr, field_name)))) { - return false; - } - break; - } -#endif // GOOGLE_PROTOBUF_UTF8_VALIDATION_ENABLED case TYPE_MAP: { - if (GOOGLE_PREDICT_FALSE(!(*table.aux[field_number].maps.parse_map)( + if (PROTOBUF_PREDICT_FALSE(!(*table.aux[field_number].maps.parse_map)( input, Raw(msg, offset)))) { return false; } @@ -752,10 +665,11 @@ bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, } case 0: { // Done. + input->SetLastTag(tag); return true; } default: - break; + PROTOBUF_ASSUME(false); } } else if (data->packed_wiretype == static_cast(wire_type)) { // Non-packable fields have their packed_wiretype masked with @@ -764,24 +678,19 @@ bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, GOOGLE_DCHECK_NE(processing_type, kRepeatedMask); GOOGLE_DCHECK_EQ(0, processing_type & kOneofMask); - GOOGLE_DCHECK_NE(TYPE_BYTES_INLINED | kRepeatedMask, processing_type); - GOOGLE_DCHECK_NE(TYPE_STRING_INLINED | kRepeatedMask, processing_type); - // TODO(ckennelly): Use a computed goto on GCC/LLVM. - // // Mask out kRepeatedMask bit, allowing the jump table to be smaller. - switch (static_cast( - processing_type ^ kRepeatedMask)) { -#define HANDLE_PACKED_TYPE(TYPE, CPPTYPE, CPPTYPE_METHOD) \ - case WireFormatLite::TYPE_##TYPE: { \ - google::protobuf::RepeatedField* values = \ - Raw >(msg, offset); \ - if (GOOGLE_PREDICT_FALSE( \ - (!WireFormatLite::ReadPackedPrimitive< \ - CPPTYPE, WireFormatLite::TYPE_##TYPE>(input, values)))) { \ - return false; \ - } \ - break; \ + switch (static_cast(processing_type ^ + kRepeatedMask)) { +#define HANDLE_PACKED_TYPE(TYPE, CPPTYPE, CPPTYPE_METHOD) \ + case WireFormatLite::TYPE_##TYPE: { \ + RepeatedField* values = Raw>(msg, offset); \ + if (PROTOBUF_PREDICT_FALSE( \ + (!WireFormatLite::ReadPackedPrimitive< \ + CPPTYPE, WireFormatLite::TYPE_##TYPE>(input, values)))) { \ + return false; \ + } \ + break; \ } HANDLE_PACKED_TYPE(INT32, int32, Int32) @@ -803,29 +712,28 @@ bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, #undef HANDLE_PACKED_TYPE case WireFormatLite::TYPE_ENUM: { // To avoid unnecessarily calling MutableUnknownFields (which mutates - // InternalMetadataWithArena) when all inputs in the repeated series + // InternalMetadata) when all inputs in the repeated series // are valid, we implement our own parser rather than call // WireFormat::ReadPackedEnumPreserveUnknowns. uint32 length; - if (GOOGLE_PREDICT_FALSE(!input->ReadVarint32(&length))) { + if (PROTOBUF_PREDICT_FALSE(!input->ReadVarint32(&length))) { return false; } - AuxillaryParseTableField::EnumValidator validator = + AuxiliaryParseTableField::EnumValidator validator = table.aux[field_number].enums.validator; - google::protobuf::RepeatedField* values = - Raw >(msg, offset); + RepeatedField* values = Raw>(msg, offset); io::CodedInputStream::Limit limit = input->PushLimit(length); while (input->BytesUntilLimit() > 0) { int value; - if (GOOGLE_PREDICT_FALSE( - (!google::protobuf::internal::WireFormatLite::ReadPrimitive< + if (PROTOBUF_PREDICT_FALSE( + (!WireFormatLite::ReadPrimitive< int, WireFormatLite::TYPE_ENUM>(input, &value)))) { return false; } - if (validator(value)) { + if (validator == nullptr || validator(value)) { values->Add(value); } else { // TODO(ckennelly): Consider caching here. @@ -843,11 +751,12 @@ bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, GOOGLE_DCHECK(false); return false; default: - break; + PROTOBUF_ASSUME(false); } } else { if (wire_type == WireFormatLite::WIRETYPE_END_GROUP) { // Must be the end of the message. + input->SetLastTag(tag); return true; } @@ -858,16 +767,36 @@ bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, } // process unknown field. - if (GOOGLE_PREDICT_FALSE( + if (PROTOBUF_PREDICT_FALSE( !UnknownFieldHandler::Skip(msg, table, input, tag))) { return false; } } } +} // NOLINT(readability/fn_size) + +template +bool MergePartialFromCodedStreamImpl(MessageLite* msg, const ParseTable& table, + io::CodedInputStream* input) { + // The main beneficial cutoff values are 1 and 2 byte tags. + // Instantiate calls with the appropriate upper tag range + if (table.max_field_number <= (0x7F >> 3)) { + return MergePartialFromCodedStreamInlined( + msg, table, input); + } else if (table.max_field_number <= (0x3FFF >> 3)) { + return MergePartialFromCodedStreamInlined( + msg, table, input); + } else { + return MergePartialFromCodedStreamInlined< + UnknownFieldHandler, std::numeric_limits::max()>(msg, table, + input); + } } } // namespace internal } // namespace protobuf - } // namespace google + +#include + #endif // GOOGLE_PROTOBUF_GENERATED_MESSAGE_TABLE_DRIVEN_LITE_H__ diff --git a/third_party/protobuf-lite/google/protobuf/generated_message_util.h b/third_party/protobuf-lite/google/protobuf/generated_message_util.h index 04f68a6e..bae0c1f7 100644 --- a/third_party/protobuf-lite/google/protobuf/generated_message_util.h +++ b/third_party/protobuf-lite/google/protobuf/generated_message_util.h @@ -39,91 +39,79 @@ #define GOOGLE_PROTOBUF_GENERATED_MESSAGE_UTIL_H__ #include + #include #include #include #include -#include #include -#include // Add direct dep on port for pb.cc +#include #include #include -#include #include +#include // Add direct dep on port for pb.cc +#include +#include #include +#include +#include -namespace google { +#include +#ifdef SWIG +#error "You cannot SWIG proto headers" +#endif + +namespace google { namespace protobuf { class Arena; +class Message; -namespace io { class CodedInputStream; } +namespace io { +class CodedInputStream; +} namespace internal { +template +inline To DownCast(From* f) { + return PROTOBUF_NAMESPACE_ID::internal::down_cast(f); +} +template +inline To DownCast(From& f) { + return PROTOBUF_NAMESPACE_ID::internal::down_cast(f); +} -// Annotation for the compiler to emit a deprecation message if a field marked -// with option 'deprecated=true' is used in the code, or for other things in -// generated code which are deprecated. -// -// For internal use in the pb.cc files, deprecation warnings are suppressed -// there. -#undef DEPRECATED_PROTOBUF_FIELD -#define PROTOBUF_DEPRECATED - -#define GOOGLE_PROTOBUF_DEPRECATED_ATTR - - -// Returns the offset of the given field within the given aggregate type. -// This is equivalent to the ANSI C offsetof() macro. However, according -// to the C++ standard, offsetof() only works on POD types, and GCC -// enforces this requirement with a warning. In practice, this rule is -// unnecessarily strict; there is probably no compiler or platform on -// which the offsets of the direct fields of a class are non-constant. -// Fields inherited from superclasses *can* have non-constant offsets, -// but that's not what this macro will be used for. -#if defined(__clang__) -// For Clang we use __builtin_offsetof() and suppress the warning, -// to avoid Control Flow Integrity and UBSan vptr sanitizers from -// crashing while trying to validate the invalid reinterpet_casts. -#define GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(TYPE, FIELD) \ - _Pragma("clang diagnostic push") \ - _Pragma("clang diagnostic ignored \"-Winvalid-offsetof\"") \ - __builtin_offsetof(TYPE, FIELD) \ - _Pragma("clang diagnostic pop") -#else -// Note that we calculate relative to the pointer value 16 here since if we -// just use zero, GCC complains about dereferencing a NULL pointer. We -// choose 16 rather than some other number just in case the compiler would -// be confused by an unaligned pointer. -#define GOOGLE_PROTOBUF_GENERATED_MESSAGE_FIELD_OFFSET(TYPE, FIELD) \ - static_cast< ::google::protobuf::uint32>( \ - reinterpret_cast( \ - &reinterpret_cast(16)->FIELD) - \ - reinterpret_cast(16)) -#endif - -// Constants for special floating point values. -LIBPROTOBUF_EXPORT double Infinity(); -LIBPROTOBUF_EXPORT double NaN(); -LIBPROTOBUF_EXPORT void InitProtobufDefaults(); +// This fastpath inlines a single branch instead of having to make the +// InitProtobufDefaults function call. +// It also generates less inlined code than a function-scope static initializer. +PROTOBUF_EXPORT extern std::atomic init_protobuf_defaults_state; +PROTOBUF_EXPORT void InitProtobufDefaultsSlow(); +PROTOBUF_EXPORT inline void InitProtobufDefaults() { + if (PROTOBUF_PREDICT_FALSE( + !init_protobuf_defaults_state.load(std::memory_order_acquire))) { + InitProtobufDefaultsSlow(); + } +} // This used by proto1 -inline const std::string& GetEmptyString() { +PROTOBUF_EXPORT inline const std::string& GetEmptyString() { InitProtobufDefaults(); return GetEmptyStringAlreadyInited(); } + // True if IsInitialized() is true for all elements of t. Type is expected // to be a RepeatedPtrField. It's useful to have this // helper here to keep the protobuf compiler from ever having to emit loops in // IsInitialized() methods. We want the C++ compiler to inline this or not // as it sees fit. -template bool AllAreInitialized(const Type& t) { - for (int i = t.size(); --i >= 0; ) { +template +bool AllAreInitialized(const RepeatedPtrField& t) { + for (int i = t.size(); --i >= 0;) { if (!t.Get(i).IsInitialized()) return false; } return true; @@ -133,10 +121,10 @@ template bool AllAreInitialized(const Type& t) { // This version operates on MessageLite to avoid introducing a dependency on the // concrete message type. template -bool AllAreInitializedWeak(const ::google::protobuf::RepeatedPtrField& t) { +bool AllAreInitializedWeak(const RepeatedPtrField& t) { for (int i = t.size(); --i >= 0;) { - if (!reinterpret_cast(t) - .Get<::google::protobuf::internal::ImplicitWeakTypeHandler >(i) + if (!reinterpret_cast(t) + .Get >(i) .IsInitialized()) { return false; } @@ -144,40 +132,6 @@ bool AllAreInitializedWeak(const ::google::protobuf::RepeatedPtrField& t) { return true; } -struct LIBPROTOBUF_EXPORT FieldMetadata { - uint32 offset; // offset of this field in the struct - uint32 tag; // field * 8 + wire_type - // byte offset * 8 + bit_offset; - // if the high bit is set then this is the byte offset of the oneof_case - // for this field. - uint32 has_offset; - uint32 type; // the type of this field. - const void* ptr; // auxiliary data - - // From the serializer point of view each fundamental type can occur in - // 4 different ways. For simplicity we treat all combinations as a cartesion - // product although not all combinations are allowed. - enum FieldTypeClass { - kPresence, - kNoPresence, - kRepeated, - kPacked, - kOneOf, - kNumTypeClasses // must be last enum - }; - // C++ protobuf has 20 fundamental types, were we added Cord and StringPiece - // and also distinquish the same types if they have different wire format. - enum { - kCordType = 19, - kStringPieceType = 20, - kInlinedType = 21, - kNumTypes = 21, - kSpecial = kNumTypes * kNumTypeClasses, - }; - - static int CalculateType(int fundamental_type, FieldTypeClass type_class); -}; - inline bool IsPresent(const void* base, uint32 hasbit) { const uint32* has_bits_array = static_cast(base); return (has_bits_array[hasbit / 32] & (1u << (hasbit & 31))) != 0; @@ -191,116 +145,23 @@ inline bool IsOneofPresent(const void* base, uint32 offset, uint32 tag) { typedef void (*SpecialSerializer)(const uint8* base, uint32 offset, uint32 tag, uint32 has_offset, - ::google::protobuf::io::CodedOutputStream* output); - -LIBPROTOBUF_EXPORT void ExtensionSerializer(const uint8* base, uint32 offset, uint32 tag, - uint32 has_offset, - ::google::protobuf::io::CodedOutputStream* output); -LIBPROTOBUF_EXPORT void UnknownFieldSerializerLite(const uint8* base, uint32 offset, uint32 tag, - uint32 has_offset, - ::google::protobuf::io::CodedOutputStream* output); - -struct SerializationTable { - int num_fields; - const FieldMetadata* field_table; -}; - -LIBPROTOBUF_EXPORT void SerializeInternal(const uint8* base, const FieldMetadata* table, - int num_fields, ::google::protobuf::io::CodedOutputStream* output); - -inline void TableSerialize(const ::google::protobuf::MessageLite& msg, - const SerializationTable* table, - ::google::protobuf::io::CodedOutputStream* output) { - const FieldMetadata* field_table = table->field_table; - int num_fields = table->num_fields - 1; - const uint8* base = reinterpret_cast(&msg); - // TODO(gerbens) This skips the first test if we could use the fast - // array serialization path, we should make this - // int cached_size = - // *reinterpret_cast(base + field_table->offset); - // SerializeWithCachedSize(msg, field_table + 1, num_fields, cached_size, ...) - // But we keep conformance with the old way for now. - SerializeInternal(base, field_table + 1, num_fields, output); -} - -uint8* SerializeInternalToArray(const uint8* base, const FieldMetadata* table, - int num_fields, bool is_deterministic, - uint8* buffer); - -inline uint8* TableSerializeToArray(const ::google::protobuf::MessageLite& msg, - const SerializationTable* table, - bool is_deterministic, uint8* buffer) { - const uint8* base = reinterpret_cast(&msg); - const FieldMetadata* field_table = table->field_table + 1; - int num_fields = table->num_fields - 1; - return SerializeInternalToArray(base, field_table, num_fields, - is_deterministic, buffer); -} - -template -struct CompareHelper { - bool operator()(const T& a, const T& b) { return a < b; } -}; - -template <> -struct CompareHelper { - bool operator()(const ArenaStringPtr& a, const ArenaStringPtr& b) { - return a.Get() < b.Get(); - } -}; - -struct CompareMapKey { - template - bool operator()(const MapEntryHelper& a, const MapEntryHelper& b) { - return Compare(a.key_, b.key_); - } - template - bool Compare(const T& a, const T& b) { - return CompareHelper()(a, b); - } -}; - -template -void MapFieldSerializer(const uint8* base, uint32 offset, uint32 tag, - uint32 has_offset, - ::google::protobuf::io::CodedOutputStream* output) { - typedef MapEntryHelper Entry; - typedef typename MapFieldType::MapType::const_iterator Iter; - - const MapFieldType& map_field = - *reinterpret_cast(base + offset); - const SerializationTable* t = - table + - has_offset; // has_offset is overloaded for maps to mean table offset - if (!output->IsSerializationDeterministic()) { - for (Iter it = map_field.GetMap().begin(); it != map_field.GetMap().end(); - ++it) { - Entry map_entry(*it); - output->WriteVarint32(tag); - output->WriteVarint32(map_entry._cached_size_); - SerializeInternal(reinterpret_cast(&map_entry), - t->field_table, t->num_fields, output); - } - } else { - std::vector v; - for (Iter it = map_field.GetMap().begin(); it != map_field.GetMap().end(); - ++it) { - v.push_back(Entry(*it)); - } - std::sort(v.begin(), v.end(), CompareMapKey()); - for (int i = 0; i < v.size(); i++) { - output->WriteVarint32(tag); - output->WriteVarint32(v[i]._cached_size_); - SerializeInternal(reinterpret_cast(&v[i]), t->field_table, - t->num_fields, output); - } - } -} - -LIBPROTOBUF_EXPORT MessageLite* DuplicateIfNonNullInternal(MessageLite* message); -LIBPROTOBUF_EXPORT MessageLite* GetOwnedMessageInternal(Arena* message_arena, - MessageLite* submessage, - Arena* submessage_arena); + io::CodedOutputStream* output); + +PROTOBUF_EXPORT void ExtensionSerializer(const uint8* base, uint32 offset, + uint32 tag, uint32 has_offset, + io::CodedOutputStream* output); +PROTOBUF_EXPORT void UnknownFieldSerializerLite(const uint8* base, + uint32 offset, uint32 tag, + uint32 has_offset, + io::CodedOutputStream* output); + +PROTOBUF_EXPORT MessageLite* DuplicateIfNonNullInternal(MessageLite* message); +PROTOBUF_EXPORT MessageLite* GetOwnedMessageInternal(Arena* message_arena, + MessageLite* submessage, + Arena* submessage_arena); +PROTOBUF_EXPORT void GenericSwap(MessageLite* m1, MessageLite* m2); +// We specialize GenericSwap for non-lite messages to benefit from reflection. +PROTOBUF_EXPORT void GenericSwap(Message* m1, Message* m2); template T* DuplicateIfNonNull(T* message) { @@ -322,17 +183,18 @@ T* GetOwnedMessage(Arena* message_arena, T* submessage, // Hide atomic from the public header and allow easy change to regular int // on platforms where the atomic might have a perf impact. -class LIBPROTOBUF_EXPORT CachedSize { +class PROTOBUF_EXPORT CachedSize { public: int Get() const { return size_.load(std::memory_order_relaxed); } void Set(int size) { size_.store(size, std::memory_order_relaxed); } + private: std::atomic size_{0}; }; // SCCInfo represents information of a strongly connected component of // mutual dependent messages. -struct LIBPROTOBUF_EXPORT SCCInfoBase { +struct PROTOBUF_EXPORT SCCInfoBase { // We use 0 for the Initialized state, because test eax,eax, jnz is smaller // and is subject to macro fusion. enum { @@ -340,52 +202,69 @@ struct LIBPROTOBUF_EXPORT SCCInfoBase { kRunning = 1, kUninitialized = -1, // initial state }; -#ifndef _MSC_VER - std::atomic visit_status; -#else - // MSVC doesnt make std::atomic constant initialized. This union trick +#if defined(_MSC_VER) && !defined(__clang__) + // MSVC doesn't make std::atomic constant initialized. This union trick // makes it so. union { int visit_status_to_make_linker_init; std::atomic visit_status; }; +#else + std::atomic visit_status; #endif int num_deps; + int num_implicit_weak_deps; void (*init_func)(); // This is followed by an array of num_deps // const SCCInfoBase* deps[]; }; +// Zero-length arrays are a language extension available in GCC and Clang but +// not MSVC. +#ifdef __GNUC__ +#define PROTOBUF_ARRAY_SIZE(n) (n) +#else +#define PROTOBUF_ARRAY_SIZE(n) ((n) ? (n) : 1) +#endif + template struct SCCInfo { SCCInfoBase base; // Semantically this is const SCCInfo* which is is a templated type. // The obvious inheriting from SCCInfoBase mucks with struct initialization. // Attempts showed the compiler was generating dynamic initialization code. - // Zero length arrays produce warnings with MSVC. - SCCInfoBase* deps[N ? N : 1]; + // This deps array consists of base.num_deps pointers to SCCInfoBase followed + // by base.num_implicit_weak_deps pointers to SCCInfoBase*. We need the extra + // pointer indirection for implicit weak fields. We cannot use a union type + // here, since that would prevent the array from being linker-initialized. + void* deps[PROTOBUF_ARRAY_SIZE(N)]; }; -LIBPROTOBUF_EXPORT void InitSCCImpl(SCCInfoBase* scc); +#undef PROTOBUF_ARRAY_SIZE + +PROTOBUF_EXPORT void InitSCCImpl(SCCInfoBase* scc); inline void InitSCC(SCCInfoBase* scc) { auto status = scc->visit_status.load(std::memory_order_acquire); - if (GOOGLE_PREDICT_FALSE(status != SCCInfoBase::kInitialized)) InitSCCImpl(scc); + if (PROTOBUF_PREDICT_FALSE(status != SCCInfoBase::kInitialized)) + InitSCCImpl(scc); } -LIBPROTOBUF_EXPORT void DestroyMessage(const void* message); -LIBPROTOBUF_EXPORT void DestroyString(const void* s); +PROTOBUF_EXPORT void DestroyMessage(const void* message); +PROTOBUF_EXPORT void DestroyString(const void* s); // Destroy (not delete) the message inline void OnShutdownDestroyMessage(const void* ptr) { OnShutdownRun(DestroyMessage, ptr); } -// Destroy the string (call string destructor) +// Destroy the string (call std::string destructor) inline void OnShutdownDestroyString(const std::string* ptr) { OnShutdownRun(DestroyString, ptr); } } // namespace internal } // namespace protobuf - } // namespace google + +#include + #endif // GOOGLE_PROTOBUF_GENERATED_MESSAGE_UTIL_H__ diff --git a/third_party/protobuf-lite/google/protobuf/has_bits.h b/third_party/protobuf-lite/google/protobuf/has_bits.h index e3a0149a..1144b9f4 100644 --- a/third_party/protobuf-lite/google/protobuf/has_bits.h +++ b/third_party/protobuf-lite/google/protobuf/has_bits.h @@ -32,27 +32,32 @@ #define GOOGLE_PROTOBUF_HAS_BITS_H__ #include -#include +#include + +#include + +#ifdef SWIG +#error "You cannot SWIG proto headers" +#endif namespace google { namespace protobuf { namespace internal { -template +template class HasBits { public: - HasBits() GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE { Clear(); } + constexpr HasBits() PROTOBUF_ALWAYS_INLINE : has_bits_{} {} - void Clear() GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE { + void Clear() PROTOBUF_ALWAYS_INLINE { memset(has_bits_, 0, sizeof(has_bits_)); } - ::google::protobuf::uint32& operator[](int index) GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE { + uint32& operator[](int index) PROTOBUF_ALWAYS_INLINE { return has_bits_[index]; } - const ::google::protobuf::uint32& operator[](int index) const - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE { + const uint32& operator[](int index) const PROTOBUF_ALWAYS_INLINE { return has_bits_[index]; } @@ -64,10 +69,14 @@ class HasBits { return !(*this == rhs); } + void Or(const HasBits& rhs) { + for (size_t i = 0; i < doublewords; i++) has_bits_[i] |= rhs[i]; + } + bool empty() const; private: - ::google::protobuf::uint32 has_bits_[doublewords]; + uint32 has_bits_[doublewords]; }; template <> @@ -100,6 +109,8 @@ inline bool HasBits::empty() const { } // namespace internal } // namespace protobuf - } // namespace google + +#include + #endif // GOOGLE_PROTOBUF_HAS_BITS_H__ diff --git a/third_party/protobuf-lite/google/protobuf/implicit_weak_message.h b/third_party/protobuf-lite/google/protobuf/implicit_weak_message.h index 3279bd17..bfa6a813 100644 --- a/third_party/protobuf-lite/google/protobuf/implicit_weak_message.h +++ b/third_party/protobuf-lite/google/protobuf/implicit_weak_message.h @@ -31,9 +31,18 @@ #ifndef GOOGLE_PROTOBUF_IMPLICIT_WEAK_MESSAGE_H__ #define GOOGLE_PROTOBUF_IMPLICIT_WEAK_MESSAGE_H__ +#include + #include #include #include +#include + +#ifdef SWIG +#error "You cannot SWIG proto headers" +#endif + +#include // This file is logically internal-only and should only be used by protobuf // generated code. @@ -45,45 +54,44 @@ namespace internal { // An implementation of MessageLite that treats all data as unknown. This type // acts as a placeholder for an implicit weak field in the case where the true // message type does not get linked into the binary. -class LIBPROTOBUF_EXPORT ImplicitWeakMessage : public MessageLite { +class PROTOBUF_EXPORT ImplicitWeakMessage : public MessageLite { public: - ImplicitWeakMessage() : arena_(NULL) {} - explicit ImplicitWeakMessage(Arena* arena) : arena_(arena) {} + ImplicitWeakMessage() {} + explicit ImplicitWeakMessage(Arena* arena) : MessageLite(arena) {} static const ImplicitWeakMessage* default_instance(); - string GetTypeName() const { return ""; } + std::string GetTypeName() const override { return ""; } - MessageLite* New() const { return new ImplicitWeakMessage; } - MessageLite* New(Arena* arena) const { + MessageLite* New() const override { return new ImplicitWeakMessage; } + MessageLite* New(Arena* arena) const override { return Arena::CreateMessage(arena); } - Arena* GetArena() const { return arena_; } + void Clear() override { data_.clear(); } - void Clear() { data_.clear(); } + bool IsInitialized() const override { return true; } - bool IsInitialized() const { return true; } - - void CheckTypeAndMergeFrom(const MessageLite& other) { + void CheckTypeAndMergeFrom(const MessageLite& other) override { data_.append(static_cast(other).data_); } - bool MergePartialFromCodedStream(io::CodedInputStream* input); + const char* _InternalParse(const char* ptr, ParseContext* ctx) final; - size_t ByteSizeLong() const { return data_.size(); } + size_t ByteSizeLong() const override { return data_.size(); } - void SerializeWithCachedSizes(io::CodedOutputStream* output) const { - output->WriteString(data_); + uint8* _InternalSerialize(uint8* target, + io::EpsCopyOutputStream* stream) const final { + return stream->WriteRaw(data_.data(), static_cast(data_.size()), + target); } - int GetCachedSize() const { return static_cast(data_.size()); } + int GetCachedSize() const override { return static_cast(data_.size()); } typedef void InternalArenaConstructable_; private: - Arena* const arena_; - string data_; + std::string data_; GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ImplicitWeakMessage); }; @@ -91,45 +99,92 @@ class LIBPROTOBUF_EXPORT ImplicitWeakMessage : public MessageLite { template class ImplicitWeakTypeHandler { public: - typedef ImplicitWeakType Type; - typedef ::google::protobuf::MessageLite WeakType; - static const bool Moveable = false; - - // With implicit weak fields, we need separate NewFromPrototype and - // NewFromPrototypeWeak functions. The former is used when we want to create a - // strong dependency on the message type, and it just delegates to the - // GenericTypeHandler. The latter avoids creating a strong dependency, by - // simply calling MessageLite::New. - static inline ::google::protobuf::MessageLite* NewFromPrototype( - const ::google::protobuf::MessageLite* prototype, ::google::protobuf::Arena* arena = NULL) { + typedef MessageLite Type; + static constexpr bool Moveable = false; + + static inline MessageLite* NewFromPrototype(const MessageLite* prototype, + Arena* arena = NULL) { return prototype->New(arena); } - static inline void Delete(::google::protobuf::MessageLite* value, Arena* arena) { + static inline void Delete(MessageLite* value, Arena* arena) { if (arena == NULL) { delete value; } } - static inline ::google::protobuf::Arena* GetArena(::google::protobuf::MessageLite* value) { + static inline Arena* GetArena(MessageLite* value) { return value->GetArena(); } - static inline void* GetMaybeArenaPointer(::google::protobuf::MessageLite* value) { + static inline void* GetMaybeArenaPointer(MessageLite* value) { return value->GetArena(); } - static inline void Clear(::google::protobuf::MessageLite* value) { - value->Clear(); - } - static void Merge(const ::google::protobuf::MessageLite& from, - ::google::protobuf::MessageLite* to) { + static inline void Clear(MessageLite* value) { value->Clear(); } + static void Merge(const MessageLite& from, MessageLite* to) { to->CheckTypeAndMergeFrom(from); } - static inline size_t SpaceUsedLong(const Type& value) { - return value.SpaceUsedLong(); - } }; } // namespace internal -} // namespace protobuf +template +struct WeakRepeatedPtrField { + using TypeHandler = internal::ImplicitWeakTypeHandler; + constexpr WeakRepeatedPtrField() : weak() {} + explicit WeakRepeatedPtrField(Arena* arena) : weak(arena) {} + ~WeakRepeatedPtrField() { weak.template Destroy(); } + + typedef internal::RepeatedPtrIterator iterator; + typedef internal::RepeatedPtrIterator const_iterator; + typedef internal::RepeatedPtrOverPtrsIterator + pointer_iterator; + typedef internal::RepeatedPtrOverPtrsIterator + const_pointer_iterator; + + iterator begin() { return iterator(base().raw_data()); } + const_iterator begin() const { return iterator(base().raw_data()); } + const_iterator cbegin() const { return begin(); } + iterator end() { return begin() + base().size(); } + const_iterator end() const { return begin() + base().size(); } + const_iterator cend() const { return end(); } + pointer_iterator pointer_begin() { + return pointer_iterator(base().raw_mutable_data()); + } + const_pointer_iterator pointer_begin() const { + return const_pointer_iterator(base().raw_mutable_data()); + } + pointer_iterator pointer_end() { + return pointer_iterator(base().raw_mutable_data() + base().size()); + } + const_pointer_iterator pointer_end() const { + return const_pointer_iterator(base().raw_mutable_data() + base().size()); + } + + MessageLite* AddWeak(const MessageLite* prototype) { + return base().AddWeak(prototype); + } + T* Add() { return weak.Add(); } + void Clear() { base().template Clear(); } + void MergeFrom(const WeakRepeatedPtrField& other) { + base().template MergeFrom(other.base()); + } + void InternalSwap(WeakRepeatedPtrField* other) { + base().InternalSwap(&other->base()); + } + + const internal::RepeatedPtrFieldBase& base() const { return weak; } + internal::RepeatedPtrFieldBase& base() { return weak; } + // Union disables running the destructor. Which would create a strong link. + // Instead we explicitly destroy the underlying base through the virtual + // destructor. + union { + RepeatedPtrField weak; + }; +}; + +} // namespace protobuf } // namespace google + +#include + #endif // GOOGLE_PROTOBUF_IMPLICIT_WEAK_MESSAGE_H__ diff --git a/third_party/protobuf-lite/google/protobuf/inlined_string_field.h b/third_party/protobuf-lite/google/protobuf/inlined_string_field.h deleted file mode 100644 index 95d4687b..00000000 --- a/third_party/protobuf-lite/google/protobuf/inlined_string_field.h +++ /dev/null @@ -1,271 +0,0 @@ -// Protocol Buffers - Google's data interchange format -// Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#ifndef GOOGLE_PROTOBUF_INLINED_STRING_FIELD_H__ -#define GOOGLE_PROTOBUF_INLINED_STRING_FIELD_H__ - -#include - -#include -#include - -namespace google { -namespace protobuf { - -class Arena; - -namespace internal { - -// InlinedStringField wraps a ::std::string instance and exposes an API similar to -// ArenaStringPtr's wrapping of a ::std::string* instance. As ::std::string is never -// allocated on the Arena, we expose only the *NoArena methods of -// ArenaStringPtr. -// -// default_value parameters are taken for consistency with ArenaStringPtr, but -// are not used for most methods. With inlining, these should be removed from -// the generated binary. -class LIBPROTOBUF_EXPORT InlinedStringField { - public: - InlinedStringField() - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE; - explicit InlinedStringField(const ::std::string& default_value); - - void AssignWithDefault(const ::std::string* default_value, - const InlinedStringField& from) - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE; - - void ClearToEmpty(const ::std::string* default_value, Arena* arena) - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE { - ClearToEmptyNoArena(default_value); - } - void ClearNonDefaultToEmpty() GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE { - ClearNonDefaultToEmptyNoArena(); - } - void ClearToEmptyNoArena(const ::std::string* default_value) - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE { - ClearNonDefaultToEmptyNoArena(); - } - void ClearNonDefaultToEmptyNoArena() - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE; - - void ClearToDefault(const ::std::string* default_value, Arena* arena) - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE { - ClearToDefaultNoArena(default_value); - } - void ClearToDefaultNoArena(const ::std::string* default_value) - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE; - - void Destroy(const ::std::string* default_value, Arena* arena) - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE { - DestroyNoArena(default_value); - } - void DestroyNoArena(const ::std::string* default_value) - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE; - - const ::std::string& Get() const GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE { - return GetNoArena(); - } - const ::std::string& GetNoArena() const GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE; - - ::std::string* Mutable(const ::std::string* default_value, Arena* arena) - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE { - return MutableNoArena(default_value); - } - ::std::string* MutableNoArena(const ::std::string* default_value) - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE; - - ::std::string* Release(const ::std::string* default_value, Arena* arena) { - return ReleaseNoArena(default_value); - } - ::std::string* ReleaseNonDefault(const ::std::string* default_value, Arena* arena) { - return ReleaseNonDefaultNoArena(default_value); - } - ::std::string* ReleaseNoArena(const ::std::string* default_value) { - return ReleaseNonDefaultNoArena(default_value); - } - ::std::string* ReleaseNonDefaultNoArena(const ::std::string* default_value); - - void Set(const ::std::string* default_value, - StringPiece value, - Arena* arena) GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE { - SetNoArena(default_value, value); - } - void SetLite(const ::std::string* default_value, - StringPiece value, - Arena* arena) GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE { - SetNoArena(default_value, value); - } - void SetNoArena(const ::std::string* default_value, - StringPiece value) GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE; - - void Set(const ::std::string* default_value, - const ::std::string& value, - Arena* arena) GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE { - SetNoArena(default_value, value); - } - void SetLite(const ::std::string* default_value, - const ::std::string& value, - Arena* arena) GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE { - SetNoArena(default_value, value); - } - void SetNoArena(const ::std::string* default_value, - const ::std::string& value) - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE; - -#if LANG_CXX11 - void SetNoArena(const ::std::string* default_value, - ::std::string&& value) - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE; -#endif - void SetAllocated(const ::std::string* default_value, - ::std::string* value, - Arena* arena) { - SetAllocatedNoArena(default_value, value); - } - void SetAllocatedNoArena(const ::std::string* default_value, - ::std::string* value); - void Swap(InlinedStringField* from) - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE; - ::std::string* UnsafeMutablePointer(); - void UnsafeSetDefault(const ::std::string* default_value); - ::std::string* UnsafeArenaRelease(const ::std::string* default_value, Arena* arena); - void UnsafeArenaSetAllocated( - const ::std::string* default_value, ::std::string* value, Arena* arena); - - bool IsDefault(const ::std::string* default_value) { - return false; - } - private: - ::std::string value_; -}; - -inline InlinedStringField::InlinedStringField() {} - -inline InlinedStringField::InlinedStringField(const ::std::string& default_value) : - value_(default_value) {} - -inline void InlinedStringField::AssignWithDefault( - const ::std::string* default_value, const InlinedStringField& from) { - value_ = from.value_; -} - -inline const ::std::string& InlinedStringField::GetNoArena() const { - return value_; -} - -inline ::std::string* InlinedStringField::MutableNoArena(const ::std::string*) { - return &value_; -} - -inline void InlinedStringField::SetAllocatedNoArena( - const ::std::string* default_value, ::std::string* value) { - if (value == NULL) { - value_.assign(*default_value); - } else { -#if LANG_CXX11 - value_.assign(std::move(*value)); -#else - value_.swap(*value); -#endif - delete value; - } -} - -inline void InlinedStringField::DestroyNoArena(const ::std::string*) { - // This is invoked from the generated message's ArenaDtor, which is used to - // clean up objects not allocated on the Arena. - this->~InlinedStringField(); -} - -inline void InlinedStringField::ClearNonDefaultToEmptyNoArena() { - value_.clear(); -} - -inline void InlinedStringField::ClearToDefaultNoArena( - const ::std::string* default_value) { - value_.assign(*default_value); -} - -inline ::std::string* InlinedStringField::ReleaseNonDefaultNoArena( - const ::std::string* default_value) { - ::std::string* released = new ::std::string(*default_value); - value_.swap(*released); - return released; -} - -inline void InlinedStringField::SetNoArena( - const ::std::string* default_value, StringPiece value) { - value_.assign(value.data(), value.length()); -} - -inline void InlinedStringField::SetNoArena( - const ::std::string* default_value, const ::std::string& value) { - value_.assign(value); -} - -#if LANG_CXX11 -inline void InlinedStringField::SetNoArena( - const ::std::string* default_value, ::std::string&& value) { - value_.assign(std::move(value)); -} -#endif - -inline void InlinedStringField::Swap(InlinedStringField* from) { - value_.swap(from->value_); -} - -inline ::std::string* InlinedStringField::UnsafeMutablePointer() { - return &value_; -} - -inline void InlinedStringField::UnsafeSetDefault( - const ::std::string* default_value) { - value_.assign(*default_value); -} - -inline ::std::string* InlinedStringField::UnsafeArenaRelease( - const ::std::string* default_value, Arena* arena) { - return ReleaseNoArena(default_value); -} - -inline void InlinedStringField::UnsafeArenaSetAllocated( - const ::std::string* default_value, ::std::string* value, Arena* arena) { - if (value == NULL) { - value_.assign(*default_value); - } else { - value_.assign(*value); - } -} - -} // namespace internal -} // namespace protobuf - -} // namespace google -#endif // GOOGLE_PROTOBUF_INLINED_STRING_FIELD_H__ diff --git a/third_party/protobuf-lite/google/protobuf/io/coded_stream.h b/third_party/protobuf-lite/google/protobuf/io/coded_stream.h index 0f70ecde..0fff1782 100644 --- a/third_party/protobuf-lite/google/protobuf/io/coded_stream.h +++ b/third_party/protobuf-lite/google/protobuf/io/coded_stream.h @@ -67,7 +67,7 @@ // // Read a file created by the above code. // int fd = open("myfile", O_RDONLY); // ZeroCopyInputStream* raw_input = new FileInputStream(fd); -// CodedInputStream coded_input = new CodedInputStream(raw_input); +// CodedInputStream* coded_input = new CodedInputStream(raw_input); // // coded_input->ReadLittleEndian32(&magic_number); // if (magic_number != 1234) { @@ -109,41 +109,55 @@ #ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__ #define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__ + #include + #include #include +#include +#include #include +#include #include + #ifdef _MSC_VER - // Assuming windows is always little-endian. - #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) - #define PROTOBUF_LITTLE_ENDIAN 1 - #endif - #if _MSC_VER >= 1300 && !defined(__INTEL_COMPILER) - // If MSVC has "/RTCc" set, it will complain about truncating casts at - // runtime. This file contains some intentional truncating casts. - #pragma runtime_checks("c", off) - #endif +// Assuming windows is always little-endian. +#if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) +#define PROTOBUF_LITTLE_ENDIAN 1 +#endif +#if _MSC_VER >= 1300 && !defined(__INTEL_COMPILER) +// If MSVC has "/RTCc" set, it will complain about truncating casts at +// runtime. This file contains some intentional truncating casts. +#pragma runtime_checks("c", off) +#endif #else - #include // __BYTE_ORDER - #if ((defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)) || \ - (defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN)) && \ - !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) - #define PROTOBUF_LITTLE_ENDIAN 1 - #endif +#include // __BYTE_ORDER +#if ((defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)) || \ + (defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN)) && \ + !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) +#define PROTOBUF_LITTLE_ENDIAN 1 +#endif #endif #include -#include +#include +#include +#include #include -namespace google { +#include + +namespace google { namespace protobuf { class DescriptorPool; class MessageFactory; +class ZeroCopyCodedInputStream; -namespace internal { void MapTestForceDeterministic(); } +namespace internal { +void MapTestForceDeterministic(); +class EpsCopyByteStream; +} // namespace internal namespace io { @@ -152,8 +166,8 @@ class CodedInputStream; class CodedOutputStream; // Defined in other files. -class ZeroCopyInputStream; // zero_copy_stream.h -class ZeroCopyOutputStream; // zero_copy_stream.h +class ZeroCopyInputStream; // zero_copy_stream.h +class ZeroCopyOutputStream; // zero_copy_stream.h // Class which reads and decodes binary data which is composed of varint- // encoded integers and fixed-width pieces. Wraps a ZeroCopyInputStream. @@ -162,7 +176,9 @@ class ZeroCopyOutputStream; // zero_copy_stream.h // Most methods of CodedInputStream that return a bool return false if an // underlying I/O error occurs or if the data is malformed. Once such a // failure occurs, the CodedInputStream is broken and is no longer useful. -class LIBPROTOBUF_EXPORT CodedInputStream { +// After a failure, callers also should assume writes to "out" args may have +// occurred, though nothing useful can be determined from those writes. +class PROTOBUF_EXPORT CodedInputStream { public: // Create a CodedInputStream that reads from the given ZeroCopyInputStream. explicit CodedInputStream(ZeroCopyInputStream* input); @@ -198,23 +214,14 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // Like GetDirectBufferPointer, but this method is inlined, and does not // attempt to Refresh() if the buffer is currently empty. - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE + PROTOBUF_ALWAYS_INLINE void GetDirectBufferPointerInline(const void** data, int* size); // Read raw bytes, copying them into the given buffer. bool ReadRaw(void* buffer, int size); - // Like the above, with inlined optimizations. This should only be used - // by the protobuf implementation. - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE - bool InternalReadRawInline(void* buffer, int size); - // Like ReadRaw, but reads into a string. - bool ReadString(string* buffer, int size); - // Like the above, with inlined optimizations. This should only be used - // by the protobuf implementation. - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE - bool InternalReadStringInline(string* buffer, int size); + bool ReadString(std::string* buffer, int size); // Read a 32-bit little-endian integer. @@ -226,10 +233,10 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // responsible for ensuring that the buffer has sufficient space. // Read a 32-bit little-endian integer. static const uint8* ReadLittleEndian32FromArray(const uint8* buffer, - uint32* value); + uint32* value); // Read a 64-bit little-endian integer. static const uint8* ReadLittleEndian64FromArray(const uint8* buffer, - uint64* value); + uint64* value); // Read an unsigned integer with Varint encoding, truncating to 32 bits. // Reading a 32-bit value is equivalent to reading a 64-bit one and casting @@ -257,12 +264,11 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // Always inline because this is only called in one place per parse loop // but it is called for every iteration of said loop, so it should be fast. // GCC doesn't want to inline this by default. - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE uint32 ReadTag() { + PROTOBUF_ALWAYS_INLINE uint32 ReadTag() { return last_tag_ = ReadTagNoLastTag(); } - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE uint32 ReadTagNoLastTag(); - + PROTOBUF_ALWAYS_INLINE uint32 ReadTagNoLastTag(); // This usually a faster alternative to ReadTag() when cutoff is a manifest // constant. It does particularly well for cutoff >= 127. The first part @@ -272,14 +278,14 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // above cutoff or is 0. (There's intentional wiggle room when tag is 0, // because that can arise in several ways, and for best performance we want // to avoid an extra "is tag == 0?" check here.) - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE + PROTOBUF_ALWAYS_INLINE std::pair ReadTagWithCutoff(uint32 cutoff) { std::pair result = ReadTagWithCutoffNoLastTag(cutoff); last_tag_ = result.first; return result; } - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE + PROTOBUF_ALWAYS_INLINE std::pair ReadTagWithCutoffNoLastTag(uint32 cutoff); // Usually returns true if calling ReadVarint32() now would produce the given @@ -289,7 +295,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // parameter. // Always inline because this collapses to a small number of instructions // when given a constant parameter, but GCC doesn't want to inline by default. - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE bool ExpectTag(uint32 expected); + PROTOBUF_ALWAYS_INLINE bool ExpectTag(uint32 expected); // Like above, except this reads from the specified buffer. The caller is // responsible for ensuring that the buffer is large enough to read a varint @@ -298,7 +304,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // // Returns a pointer beyond the expected tag if it was found, or NULL if it // was not. - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE + PROTOBUF_ALWAYS_INLINE static const uint8* ExpectTagFromArray(const uint8* buffer, uint32 expected); // Usually returns true if no more bytes can be read. Always returns false @@ -328,6 +334,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // It also checks for some cases where, due to optimizations, // MergeFromCodedStream() can incorrectly return true. bool ConsumedEntireMessage(); + void SetConsumed() { legitimate_message_end_ = true; } // Limits ---------------------------------------------------------- // Limits are used when parsing length-delimited embedded messages. @@ -384,7 +391,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // This is unrelated to PushLimit()/PopLimit(). void SetTotalBytesLimit(int total_bytes_limit); - PROTOBUF_RUNTIME_DEPRECATED( + PROTOBUF_DEPRECATED_MSG( "Please use the single parameter version of SetTotalBytesLimit(). The " "second parameter is ignored.") void SetTotalBytesLimit(int total_bytes_limit, int) { @@ -403,7 +410,9 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // Sets the maximum recursion depth. The default is 100. void SetRecursionLimit(int limit); + int RecursionBudget() { return recursion_budget_; } + static int GetDefaultRecursionLimit() { return default_recursion_limit_; } // Increments the current recursion depth. Returns true if the depth is // under the limit, false if it has gone over. @@ -527,7 +536,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream { GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream); const uint8* buffer_; - const uint8* buffer_end_; // pointer to the end of the buffer. + const uint8* buffer_end_; // pointer to the end of the buffer. ZeroCopyInputStream* input_; int total_bytes_read_; // total bytes read from input_, including // the current buffer @@ -537,7 +546,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream { int overflow_bytes_; // LastTagWas() stuff. - uint32 last_tag_; // result of last ReadTag() or ReadTagWithCutoff(). + uint32 last_tag_; // result of last ReadTag() or ReadTagWithCutoff(). // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly // at EOF, or by ExpectAtEnd() when it returns true. This happens when we @@ -548,7 +557,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream { bool aliasing_enabled_; // Limits - Limit current_limit_; // if position = -1, no limit is applied + Limit current_limit_; // if position = -1, no limit is applied // For simplicity, if the current buffer crosses a limit (either a normal // limit created by PushLimit() or the total bytes limit), buffer_size_ @@ -621,7 +630,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // stream. uint32 ReadTagFallback(uint32 first_byte_or_zero); uint32 ReadTagSlow(); - bool ReadStringFallback(string* buffer, int size); + bool ReadStringFallback(std::string* buffer, int size); // Return the size of the buffer. int BufferSize() const; @@ -629,8 +638,365 @@ class LIBPROTOBUF_EXPORT CodedInputStream { static const int kDefaultTotalBytesLimit = INT_MAX; static int default_recursion_limit_; // 100 by default. + + friend class google::protobuf::ZeroCopyCodedInputStream; + friend class google::protobuf::internal::EpsCopyByteStream; +}; + +// EpsCopyOutputStream wraps a ZeroCopyOutputStream and exposes a new stream, +// which has the property you can write kSlopBytes (16 bytes) from the current +// position without bounds checks. The cursor into the stream is managed by +// the user of the class and is an explicit parameter in the methods. Careful +// use of this class, ie. keep ptr a local variable, eliminates the need to +// for the compiler to sync the ptr value between register and memory. +class PROTOBUF_EXPORT EpsCopyOutputStream { + public: + enum { kSlopBytes = 16 }; + + // Initialize from a stream. + EpsCopyOutputStream(ZeroCopyOutputStream* stream, bool deterministic, + uint8** pp) + : end_(buffer_), + stream_(stream), + is_serialization_deterministic_(deterministic) { + *pp = buffer_; + } + + // Only for array serialization. No overflow protection, end_ will be the + // pointed to the end of the array. When using this the total size is already + // known, so no need to maintain the slop region. + EpsCopyOutputStream(void* data, int size, bool deterministic) + : end_(static_cast(data) + size), + buffer_end_(nullptr), + stream_(nullptr), + is_serialization_deterministic_(deterministic) {} + + // Initialize from stream but with the first buffer already given (eager). + EpsCopyOutputStream(void* data, int size, ZeroCopyOutputStream* stream, + bool deterministic, uint8** pp) + : stream_(stream), is_serialization_deterministic_(deterministic) { + *pp = SetInitialBuffer(data, size); + } + + // Flush everything that's written into the underlying ZeroCopyOutputStream + // and trims the underlying stream to the location of ptr. + uint8* Trim(uint8* ptr); + + // After this it's guaranteed you can safely write kSlopBytes to ptr. This + // will never fail! The underlying stream can produce an error. Use HadError + // to check for errors. + PROTOBUF_MUST_USE_RESULT uint8* EnsureSpace(uint8* ptr) { + if (PROTOBUF_PREDICT_FALSE(ptr >= end_)) { + return EnsureSpaceFallback(ptr); + } + return ptr; + } + + uint8* WriteRaw(const void* data, int size, uint8* ptr) { + if (PROTOBUF_PREDICT_FALSE(end_ - ptr < size)) { + return WriteRawFallback(data, size, ptr); + } + std::memcpy(ptr, data, size); + return ptr + size; + } + // Writes the buffer specified by data, size to the stream. Possibly by + // aliasing the buffer (ie. not copying the data). The caller is responsible + // to make sure the buffer is alive for the duration of the + // ZeroCopyOutputStream. + uint8* WriteRawMaybeAliased(const void* data, int size, uint8* ptr) { + if (aliasing_enabled_) { + return WriteAliasedRaw(data, size, ptr); + } else { + return WriteRaw(data, size, ptr); + } + } + + + uint8* WriteStringMaybeAliased(uint32 num, const std::string& s, uint8* ptr) { + std::ptrdiff_t size = s.size(); + if (PROTOBUF_PREDICT_FALSE( + size >= 128 || end_ - ptr + 16 - TagSize(num << 3) - 1 < size)) { + return WriteStringMaybeAliasedOutline(num, s, ptr); + } + ptr = UnsafeVarint((num << 3) | 2, ptr); + *ptr++ = static_cast(size); + std::memcpy(ptr, s.data(), size); + return ptr + size; + } + uint8* WriteBytesMaybeAliased(uint32 num, const std::string& s, uint8* ptr) { + return WriteStringMaybeAliased(num, s, ptr); + } + + template + PROTOBUF_ALWAYS_INLINE uint8* WriteString(uint32 num, const T& s, + uint8* ptr) { + std::ptrdiff_t size = s.size(); + if (PROTOBUF_PREDICT_FALSE( + size >= 128 || end_ - ptr + 16 - TagSize(num << 3) - 1 < size)) { + return WriteStringOutline(num, s, ptr); + } + ptr = UnsafeVarint((num << 3) | 2, ptr); + *ptr++ = static_cast(size); + std::memcpy(ptr, s.data(), size); + return ptr + size; + } + template + uint8* WriteBytes(uint32 num, const T& s, uint8* ptr) { + return WriteString(num, s, ptr); + } + + template + PROTOBUF_ALWAYS_INLINE uint8* WriteInt32Packed(int num, const T& r, int size, + uint8* ptr) { + return WriteVarintPacked(num, r, size, ptr, Encode64); + } + template + PROTOBUF_ALWAYS_INLINE uint8* WriteUInt32Packed(int num, const T& r, int size, + uint8* ptr) { + return WriteVarintPacked(num, r, size, ptr, Encode32); + } + template + PROTOBUF_ALWAYS_INLINE uint8* WriteSInt32Packed(int num, const T& r, int size, + uint8* ptr) { + return WriteVarintPacked(num, r, size, ptr, ZigZagEncode32); + } + template + PROTOBUF_ALWAYS_INLINE uint8* WriteInt64Packed(int num, const T& r, int size, + uint8* ptr) { + return WriteVarintPacked(num, r, size, ptr, Encode64); + } + template + PROTOBUF_ALWAYS_INLINE uint8* WriteUInt64Packed(int num, const T& r, int size, + uint8* ptr) { + return WriteVarintPacked(num, r, size, ptr, Encode64); + } + template + PROTOBUF_ALWAYS_INLINE uint8* WriteSInt64Packed(int num, const T& r, int size, + uint8* ptr) { + return WriteVarintPacked(num, r, size, ptr, ZigZagEncode64); + } + template + PROTOBUF_ALWAYS_INLINE uint8* WriteEnumPacked(int num, const T& r, int size, + uint8* ptr) { + return WriteVarintPacked(num, r, size, ptr, Encode64); + } + + template + PROTOBUF_ALWAYS_INLINE uint8* WriteFixedPacked(int num, const T& r, + uint8* ptr) { + ptr = EnsureSpace(ptr); + constexpr auto element_size = sizeof(typename T::value_type); + auto size = r.size() * element_size; + ptr = WriteLengthDelim(num, size, ptr); + return WriteRawLittleEndian(r.data(), static_cast(size), + ptr); + } + + // Returns true if there was an underlying I/O error since this object was + // created. + bool HadError() const { return had_error_; } + + // Instructs the EpsCopyOutputStream to allow the underlying + // ZeroCopyOutputStream to hold pointers to the original structure instead of + // copying, if it supports it (i.e. output->AllowsAliasing() is true). If the + // underlying stream does not support aliasing, then enabling it has no + // affect. For now, this only affects the behavior of + // WriteRawMaybeAliased(). + // + // NOTE: It is caller's responsibility to ensure that the chunk of memory + // remains live until all of the data has been consumed from the stream. + void EnableAliasing(bool enabled); + + // See documentation on CodedOutputStream::SetSerializationDeterministic. + void SetSerializationDeterministic(bool value) { + is_serialization_deterministic_ = value; + } + + // See documentation on CodedOutputStream::IsSerializationDeterministic. + bool IsSerializationDeterministic() const { + return is_serialization_deterministic_; + } + + // The number of bytes written to the stream at position ptr, relative to the + // stream's overall position. + int64 ByteCount(uint8* ptr) const; + + + private: + uint8* end_; + uint8* buffer_end_ = buffer_; + uint8 buffer_[2 * kSlopBytes]; + ZeroCopyOutputStream* stream_; + bool had_error_ = false; + bool aliasing_enabled_ = false; // See EnableAliasing(). + bool is_serialization_deterministic_; + + uint8* EnsureSpaceFallback(uint8* ptr); + inline uint8* Next(); + int Flush(uint8* ptr); + std::ptrdiff_t GetSize(uint8* ptr) const { + GOOGLE_DCHECK(ptr <= end_ + kSlopBytes); // NOLINT + return end_ + kSlopBytes - ptr; + } + + uint8* Error() { + had_error_ = true; + // We use the patch buffer to always guarantee space to write to. + end_ = buffer_ + kSlopBytes; + return buffer_; + } + + static constexpr int TagSize(uint32 tag) { + return (tag < (1 << 7)) + ? 1 + : (tag < (1 << 14)) + ? 2 + : (tag < (1 << 21)) ? 3 : (tag < (1 << 28)) ? 4 : 5; + } + + PROTOBUF_ALWAYS_INLINE uint8* WriteTag(uint32 num, uint32 wt, uint8* ptr) { + GOOGLE_DCHECK(ptr < end_); // NOLINT + return UnsafeVarint((num << 3) | wt, ptr); + } + + PROTOBUF_ALWAYS_INLINE uint8* WriteLengthDelim(int num, uint32 size, + uint8* ptr) { + ptr = WriteTag(num, 2, ptr); + return UnsafeWriteSize(size, ptr); + } + + uint8* WriteRawFallback(const void* data, int size, uint8* ptr); + + uint8* WriteAliasedRaw(const void* data, int size, uint8* ptr); + + uint8* WriteStringMaybeAliasedOutline(uint32 num, const std::string& s, + uint8* ptr); + uint8* WriteStringOutline(uint32 num, const std::string& s, uint8* ptr); + + template + PROTOBUF_ALWAYS_INLINE uint8* WriteVarintPacked(int num, const T& r, int size, + uint8* ptr, const E& encode) { + ptr = EnsureSpace(ptr); + ptr = WriteLengthDelim(num, size, ptr); + auto it = r.data(); + auto end = it + r.size(); + do { + ptr = EnsureSpace(ptr); + ptr = UnsafeVarint(encode(*it++), ptr); + } while (it < end); + return ptr; + } + + static uint32 Encode32(uint32 v) { return v; } + static uint64 Encode64(uint64 v) { return v; } + static uint32 ZigZagEncode32(int32 v) { + return (static_cast(v) << 1) ^ static_cast(v >> 31); + } + static uint64 ZigZagEncode64(int64 v) { + return (static_cast(v) << 1) ^ static_cast(v >> 63); + } + + template + PROTOBUF_ALWAYS_INLINE static uint8* UnsafeVarint(T value, uint8* ptr) { + static_assert(std::is_unsigned::value, + "Varint serialization must be unsigned"); + if (value < 0x80) { + ptr[0] = static_cast(value); + return ptr + 1; + } + ptr[0] = static_cast(value | 0x80); + value >>= 7; + if (value < 0x80) { + ptr[1] = static_cast(value); + return ptr + 2; + } + ptr++; + do { + *ptr = static_cast(value | 0x80); + value >>= 7; + ++ptr; + } while (PROTOBUF_PREDICT_FALSE(value >= 0x80)); + *ptr++ = static_cast(value); + return ptr; + } + + PROTOBUF_ALWAYS_INLINE static uint8* UnsafeWriteSize(uint32 value, + uint8* ptr) { + while (PROTOBUF_PREDICT_FALSE(value >= 0x80)) { + *ptr = static_cast(value | 0x80); + value >>= 7; + ++ptr; + } + *ptr++ = static_cast(value); + return ptr; + } + + template + uint8* WriteRawLittleEndian(const void* data, int size, uint8* ptr); +#ifndef PROTOBUF_LITTLE_ENDIAN + uint8* WriteRawLittleEndian32(const void* data, int size, uint8* ptr); + uint8* WriteRawLittleEndian64(const void* data, int size, uint8* ptr); +#endif + + // These methods are for CodedOutputStream. Ideally they should be private + // but to match current behavior of CodedOutputStream as close as possible + // we allow it some functionality. + public: + uint8* SetInitialBuffer(void* data, int size) { + auto ptr = static_cast(data); + if (size > kSlopBytes) { + end_ = ptr + size - kSlopBytes; + buffer_end_ = nullptr; + return ptr; + } else { + end_ = buffer_ + size; + buffer_end_ = ptr; + return buffer_; + } + } + + private: + // Needed by CodedOutputStream HadError. HadError needs to flush the patch + // buffers to ensure there is no error as of yet. + uint8* FlushAndResetBuffer(uint8*); + + // The following functions mimic the old CodedOutputStream behavior as close + // as possible. They flush the current state to the stream, behave as + // the old CodedOutputStream and then return to normal operation. + bool Skip(int count, uint8** pp); + bool GetDirectBufferPointer(void** data, int* size, uint8** pp); + uint8* GetDirectBufferForNBytesAndAdvance(int size, uint8** pp); + + friend class CodedOutputStream; }; +template <> +inline uint8* EpsCopyOutputStream::WriteRawLittleEndian<1>(const void* data, + int size, + uint8* ptr) { + return WriteRaw(data, size, ptr); +} +template <> +inline uint8* EpsCopyOutputStream::WriteRawLittleEndian<4>(const void* data, + int size, + uint8* ptr) { +#ifdef PROTOBUF_LITTLE_ENDIAN + return WriteRaw(data, size, ptr); +#else + return WriteRawLittleEndian32(data, size, ptr); +#endif +} +template <> +inline uint8* EpsCopyOutputStream::WriteRawLittleEndian<8>(const void* data, + int size, + uint8* ptr) { +#ifdef PROTOBUF_LITTLE_ENDIAN + return WriteRaw(data, size, ptr); +#else + return WriteRawLittleEndian64(data, size, ptr); +#endif +} + // Class which encodes and writes binary data which is composed of varint- // encoded integers and fixed-width pieces. Wraps a ZeroCopyOutputStream. // Most users will not need to deal with CodedOutputStream. @@ -651,7 +1017,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // individual value. // i.e., in the example above: // -// CodedOutputStream coded_output = new CodedOutputStream(raw_output); +// CodedOutputStream* coded_output = new CodedOutputStream(raw_output); // int magic_number = 1234; // char text[] = "Hello world!"; // @@ -661,7 +1027,7 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // // uint8* buffer = // coded_output->GetDirectBufferForNBytesAndAdvance(coded_size); -// if (buffer != NULL) { +// if (buffer != nullptr) { // // The output stream has enough space in the buffer: write directly to // // the array. // buffer = CodedOutputStream::WriteLittleEndian32ToArray(magic_number, @@ -677,27 +1043,40 @@ class LIBPROTOBUF_EXPORT CodedInputStream { // } // // delete coded_output; -class LIBPROTOBUF_EXPORT CodedOutputStream { +class PROTOBUF_EXPORT CodedOutputStream { public: // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream. - explicit CodedOutputStream(ZeroCopyOutputStream* output); - CodedOutputStream(ZeroCopyOutputStream* output, bool do_eager_refresh); + explicit CodedOutputStream(ZeroCopyOutputStream* stream) + : CodedOutputStream(stream, true) {} + CodedOutputStream(ZeroCopyOutputStream* stream, bool do_eager_refresh); // Destroy the CodedOutputStream and position the underlying // ZeroCopyOutputStream immediately after the last byte written. ~CodedOutputStream(); + // Returns true if there was an underlying I/O error since this object was + // created. On should call Trim before this function in order to catch all + // errors. + bool HadError() { + cur_ = impl_.FlushAndResetBuffer(cur_); + GOOGLE_DCHECK(cur_); + return impl_.HadError(); + } + // Trims any unused space in the underlying buffer so that its size matches // the number of bytes written by this stream. The underlying buffer will // automatically be trimmed when this stream is destroyed; this call is only // necessary if the underlying buffer is accessed *before* the stream is // destroyed. - void Trim(); + void Trim() { cur_ = impl_.Trim(cur_); } // Skips a number of bytes, leaving the bytes unmodified in the underlying // buffer. Returns false if an underlying write error occurs. This is // mainly useful with GetDirectBufferPointer(). - bool Skip(int count); + // Note of caution, the skipped bytes may contain uninitialized data. The + // caller must make sure that the skipped bytes are properly initialized, + // otherwise you might leak bytes from your heap. + bool Skip(int count) { return impl_.Skip(count, &cur_); } // Sets *data to point directly at the unwritten part of the // CodedOutputStream's underlying buffer, and *size to the size of that @@ -707,7 +1086,9 @@ class LIBPROTOBUF_EXPORT CodedOutputStream { // the consumed bytes. This may be useful for implementing external fast // serialization routines for types of data not covered by the // CodedOutputStream interface. - bool GetDirectBufferPointer(void** data, int* size); + bool GetDirectBufferPointer(void** data, int* size) { + return impl_.GetDirectBufferPointer(data, size, &cur_); + } // If there are at least "size" bytes available in the current buffer, // returns a pointer directly into the buffer and advances over these bytes. @@ -716,10 +1097,14 @@ class LIBPROTOBUF_EXPORT CodedOutputStream { // there are not enough bytes available, returns NULL. The return pointer is // invalidated as soon as any other non-const method of CodedOutputStream // is called. - inline uint8* GetDirectBufferForNBytesAndAdvance(int size); + inline uint8* GetDirectBufferForNBytesAndAdvance(int size) { + return impl_.GetDirectBufferForNBytesAndAdvance(size, &cur_); + } // Write raw bytes, copying them from the given buffer. - void WriteRaw(const void* buffer, int size); + void WriteRaw(const void* buffer, int size) { + cur_ = impl_.WriteRaw(buffer, size, cur_); + } // Like WriteRaw() but will try to write aliased data if aliasing is // turned on. void WriteRawMaybeAliased(const void* data, int size); @@ -731,30 +1116,26 @@ class LIBPROTOBUF_EXPORT CodedOutputStream { static uint8* WriteRawToArray(const void* buffer, int size, uint8* target); // Equivalent to WriteRaw(str.data(), str.size()). - void WriteString(const string& str); + void WriteString(const std::string& str); // Like WriteString() but writing directly to the target array. - static uint8* WriteStringToArray(const string& str, uint8* target); + static uint8* WriteStringToArray(const std::string& str, uint8* target); // Write the varint-encoded size of str followed by str. - static uint8* WriteStringWithSizeToArray(const string& str, uint8* target); + static uint8* WriteStringWithSizeToArray(const std::string& str, + uint8* target); - // Instructs the CodedOutputStream to allow the underlying - // ZeroCopyOutputStream to hold pointers to the original structure instead of - // copying, if it supports it (i.e. output->AllowsAliasing() is true). If the - // underlying stream does not support aliasing, then enabling it has no - // affect. For now, this only affects the behavior of - // WriteRawMaybeAliased(). - // - // NOTE: It is caller's responsibility to ensure that the chunk of memory - // remains live until all of the data has been consumed from the stream. - void EnableAliasing(bool enabled); - // Write a 32-bit little-endian integer. - void WriteLittleEndian32(uint32 value); + void WriteLittleEndian32(uint32 value) { + cur_ = impl_.EnsureSpace(cur_); + SetCur(WriteLittleEndian32ToArray(value, Cur())); + } // Like WriteLittleEndian32() but writing directly to the target array. static uint8* WriteLittleEndian32ToArray(uint32 value, uint8* target); // Write a 64-bit little-endian integer. - void WriteLittleEndian64(uint64 value); + void WriteLittleEndian64(uint64 value) { + cur_ = impl_.EnsureSpace(cur_); + SetCur(WriteLittleEndian64ToArray(value, Cur())); + } // Like WriteLittleEndian64() but writing directly to the target array. static uint8* WriteLittleEndian64ToArray(uint64 value, uint8* target); @@ -778,11 +1159,11 @@ class LIBPROTOBUF_EXPORT CodedOutputStream { // This is identical to WriteVarint32(), but optimized for writing tags. // In particular, if the input is a compile-time constant, this method // compiles down to a couple instructions. - // Always inline because otherwise the aformentioned optimization can't work, + // Always inline because otherwise the aforementioned optimization can't work, // but GCC by default doesn't want to inline this. void WriteTag(uint32 value); // Like WriteTag() but writing directly to the target array. - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE + PROTOBUF_ALWAYS_INLINE static uint8* WriteTagToArray(uint32 value, uint8* target); // Returns the number of bytes needed to encode the given value as a varint. @@ -790,7 +1171,7 @@ class LIBPROTOBUF_EXPORT CodedOutputStream { // Returns the number of bytes needed to encode the given value as a varint. static size_t VarintSize64(uint64 value); - // If negative, 10 bytes. Otheriwse, same as VarintSize32(). + // If negative, 10 bytes. Otherwise, same as VarintSize32(). static size_t VarintSize32SignExtended(int32 value); // Compile-time equivalent of VarintSize32(). @@ -800,87 +1181,73 @@ class LIBPROTOBUF_EXPORT CodedOutputStream { (Value < (1 << 7)) ? 1 : (Value < (1 << 14)) - ? 2 - : (Value < (1 << 21)) - ? 3 - : (Value < (1 << 28)) - ? 4 - : 5; + ? 2 + : (Value < (1 << 21)) ? 3 : (Value < (1 << 28)) ? 4 : 5; }; // Returns the total number of bytes written since this object was created. - inline int ByteCount() const; + int ByteCount() const { + return static_cast(impl_.ByteCount(cur_) - start_count_); + } - // Returns true if there was an underlying I/O error since this object was - // created. - bool HadError() const { return had_error_; } + // Instructs the CodedOutputStream to allow the underlying + // ZeroCopyOutputStream to hold pointers to the original structure instead of + // copying, if it supports it (i.e. output->AllowsAliasing() is true). If the + // underlying stream does not support aliasing, then enabling it has no + // affect. For now, this only affects the behavior of + // WriteRawMaybeAliased(). + // + // NOTE: It is caller's responsibility to ensure that the chunk of memory + // remains live until all of the data has been consumed from the stream. + void EnableAliasing(bool enabled) { impl_.EnableAliasing(enabled); } - // Deterministic serialization, if requested, guarantees that for a given - // binary, equal messages will always be serialized to the same bytes. This - // implies: - // . repeated serialization of a message will return the same bytes - // . different processes of the same binary (which may be executing on - // different machines) will serialize equal messages to the same bytes. + // Indicate to the serializer whether the user wants derministic + // serialization. The default when this is not called comes from the global + // default, controlled by SetDefaultSerializationDeterministic. // - // Note the deterministic serialization is NOT canonical across languages; it - // is also unstable across different builds with schema changes due to unknown - // fields. Users who need canonical serialization, e.g., persistent storage in - // a canonical form, fingerprinting, etc., should define their own - // canonicalization specification and implement the serializer using - // reflection APIs rather than relying on this API. + // What deterministic serialization means is entirely up to the driver of the + // serialization process (i.e. the caller of methods like WriteVarint32). In + // the case of serializing a proto buffer message using one of the methods of + // MessageLite, this means that for a given binary equal messages will always + // be serialized to the same bytes. This implies: + // + // * Repeated serialization of a message will return the same bytes. // - // If deterministic serialization is requested, the serializer will - // sort map entries by keys in lexicographical order or numerical order. - // (This is an implementation detail and may subject to change.) + // * Different processes running the same binary (including on different + // machines) will serialize equal messages to the same bytes. // - // There are two ways to determine whether serialization should be - // deterministic for this CodedOutputStream. If SetSerializationDeterministic - // has not yet been called, then the default comes from the global default, - // which is false, until SetDefaultSerializationDeterministic has been called. - // Otherwise, SetSerializationDeterministic has been called, and the last - // value passed to it is all that matters. + // Note that this is *not* canonical across languages. It is also unstable + // across different builds with intervening message definition changes, due to + // unknown fields. Users who need canonical serialization (e.g. persistent + // storage in a canonical form, fingerprinting) should define their own + // canonicalization specification and implement the serializer using + // reflection APIs rather than relying on this API. void SetSerializationDeterministic(bool value) { - is_serialization_deterministic_ = value; + impl_.SetSerializationDeterministic(value); } - // See above. Also, note that users of this CodedOutputStream may need to - // call IsSerializationDeterministic() to serialize in the intended way. This - // CodedOutputStream cannot enforce a desire for deterministic serialization - // by itself. + + // Return whether the user wants deterministic serialization. See above. bool IsSerializationDeterministic() const { - return is_serialization_deterministic_; + return impl_.IsSerializationDeterministic(); } static bool IsDefaultSerializationDeterministic() { - return default_serialization_deterministic_.load(std::memory_order_relaxed) != 0; + return default_serialization_deterministic_.load( + std::memory_order_relaxed) != 0; } - private: - GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream); - - ZeroCopyOutputStream* output_; - uint8* buffer_; - int buffer_size_; - int total_bytes_; // Sum of sizes of all buffers seen so far. - bool had_error_; // Whether an error occurred during output. - bool aliasing_enabled_; // See EnableAliasing(). - bool is_serialization_deterministic_; - static std::atomic default_serialization_deterministic_; - - // Advance the buffer by a given number of bytes. - void Advance(int amount); - - // Called when the buffer runs out to request more data. Implies an - // Advance(buffer_size_). - bool Refresh(); + template + void Serialize(const Func& func); - // Like WriteRaw() but may avoid copying if the underlying - // ZeroCopyOutputStream supports it. - void WriteAliasedRaw(const void* buffer, int size); + uint8* Cur() const { return cur_; } + void SetCur(uint8* ptr) { cur_ = ptr; } + EpsCopyOutputStream* EpsCopy() { return &impl_; } - // If this write might cross the end of the buffer, we compose the bytes first - // then use WriteRaw(). - void WriteVarint32SlowPath(uint32 value); - void WriteVarint64SlowPath(uint64 value); + private: + EpsCopyOutputStream impl_; + uint8* cur_; + int64 start_count_; + static std::atomic default_serialization_deterministic_; // See above. Other projects may use "friend" to allow them to call this. // After SetDefaultSerializationDeterministic() completes, all protocol @@ -889,10 +1256,11 @@ class LIBPROTOBUF_EXPORT CodedOutputStream { // that wants deterministic serialization by default needs to call // SetDefaultSerializationDeterministic() or ensure on its own that another // thread has done so. - friend void ::google::protobuf::internal::MapTestForceDeterministic(); + friend void internal::MapTestForceDeterministic(); static void SetDefaultSerializationDeterministic() { default_serialization_deterministic_.store(true, std::memory_order_relaxed); } + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream); }; // inline methods ==================================================== @@ -901,7 +1269,7 @@ class LIBPROTOBUF_EXPORT CodedOutputStream { inline bool CodedInputStream::ReadVarint32(uint32* value) { uint32 v = 0; - if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) { + if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) { v = *buffer_; if (v < 0x80) { *value = v; @@ -915,7 +1283,7 @@ inline bool CodedInputStream::ReadVarint32(uint32* value) { } inline bool CodedInputStream::ReadVarint64(uint64* value) { - if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) { + if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) { *value = *buffer_; Advance(1); return true; @@ -926,7 +1294,7 @@ inline bool CodedInputStream::ReadVarint64(uint64* value) { } inline bool CodedInputStream::ReadVarintSizeAsInt(int* value) { - if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) { + if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) { int v = *buffer_; if (v < 0x80) { *value = v; @@ -940,14 +1308,13 @@ inline bool CodedInputStream::ReadVarintSizeAsInt(int* value) { // static inline const uint8* CodedInputStream::ReadLittleEndian32FromArray( - const uint8* buffer, - uint32* value) { + const uint8* buffer, uint32* value) { #if defined(PROTOBUF_LITTLE_ENDIAN) memcpy(value, buffer, sizeof(*value)); return buffer + sizeof(*value); #else - *value = (static_cast(buffer[0]) ) | - (static_cast(buffer[1]) << 8) | + *value = (static_cast(buffer[0])) | + (static_cast(buffer[1]) << 8) | (static_cast(buffer[2]) << 16) | (static_cast(buffer[3]) << 24); return buffer + sizeof(*value); @@ -955,29 +1322,27 @@ inline const uint8* CodedInputStream::ReadLittleEndian32FromArray( } // static inline const uint8* CodedInputStream::ReadLittleEndian64FromArray( - const uint8* buffer, - uint64* value) { + const uint8* buffer, uint64* value) { #if defined(PROTOBUF_LITTLE_ENDIAN) memcpy(value, buffer, sizeof(*value)); return buffer + sizeof(*value); #else - uint32 part0 = (static_cast(buffer[0]) ) | - (static_cast(buffer[1]) << 8) | + uint32 part0 = (static_cast(buffer[0])) | + (static_cast(buffer[1]) << 8) | (static_cast(buffer[2]) << 16) | (static_cast(buffer[3]) << 24); - uint32 part1 = (static_cast(buffer[4]) ) | - (static_cast(buffer[5]) << 8) | + uint32 part1 = (static_cast(buffer[4])) | + (static_cast(buffer[5]) << 8) | (static_cast(buffer[6]) << 16) | (static_cast(buffer[7]) << 24); - *value = static_cast(part0) | - (static_cast(part1) << 32); + *value = static_cast(part0) | (static_cast(part1) << 32); return buffer + sizeof(*value); #endif } inline bool CodedInputStream::ReadLittleEndian32(uint32* value) { #if defined(PROTOBUF_LITTLE_ENDIAN) - if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast(sizeof(*value)))) { + if (PROTOBUF_PREDICT_TRUE(BufferSize() >= static_cast(sizeof(*value)))) { buffer_ = ReadLittleEndian32FromArray(buffer_, value); return true; } else { @@ -990,7 +1355,7 @@ inline bool CodedInputStream::ReadLittleEndian32(uint32* value) { inline bool CodedInputStream::ReadLittleEndian64(uint64* value) { #if defined(PROTOBUF_LITTLE_ENDIAN) - if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast(sizeof(*value)))) { + if (PROTOBUF_PREDICT_TRUE(BufferSize() >= static_cast(sizeof(*value)))) { buffer_ = ReadLittleEndian64FromArray(buffer_, value); return true; } else { @@ -1003,7 +1368,7 @@ inline bool CodedInputStream::ReadLittleEndian64(uint64* value) { inline uint32 CodedInputStream::ReadTagNoLastTag() { uint32 v = 0; - if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) { + if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) { v = *buffer_; if (v < 0x80) { Advance(1); @@ -1020,7 +1385,7 @@ inline std::pair CodedInputStream::ReadTagWithCutoffNoLastTag( // constant, and things like "cutoff >= kMax1ByteVarint" to be evaluated at // compile time. uint32 first_byte_or_zero = 0; - if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) { + if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_)) { // Hot case: buffer_ non_empty, buffer_[0] in [1, 128). // TODO(gpike): Is it worth rearranging this? E.g., if the number of fields // is large enough then is it better to check for the two-byte case first? @@ -1034,8 +1399,8 @@ inline std::pair CodedInputStream::ReadTagWithCutoffNoLastTag( // Other hot case: cutoff >= 0x80, buffer_ has at least two bytes available, // and tag is two bytes. The latter is tested by bitwise-and-not of the // first byte and the second byte. - if (cutoff >= 0x80 && GOOGLE_PREDICT_TRUE(buffer_ + 1 < buffer_end_) && - GOOGLE_PREDICT_TRUE((buffer_[0] & ~buffer_[1]) >= 0x80)) { + if (cutoff >= 0x80 && PROTOBUF_PREDICT_TRUE(buffer_ + 1 < buffer_end_) && + PROTOBUF_PREDICT_TRUE((buffer_[0] & ~buffer_[1]) >= 0x80)) { const uint32 kMax2ByteVarint = (0x7f << 7) + 0x7f; uint32 tag = (1u << 7) * buffer_[1] + (buffer_[0] - 0x80); Advance(2); @@ -1064,14 +1429,15 @@ inline bool CodedInputStream::ConsumedEntireMessage() { inline bool CodedInputStream::ExpectTag(uint32 expected) { if (expected < (1 << 7)) { - if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] == expected) { + if (PROTOBUF_PREDICT_TRUE(buffer_ < buffer_end_) && + buffer_[0] == expected) { Advance(1); return true; } else { return false; } } else if (expected < (1 << 14)) { - if (GOOGLE_PREDICT_TRUE(BufferSize() >= 2) && + if (PROTOBUF_PREDICT_TRUE(BufferSize() >= 2) && buffer_[0] == static_cast(expected | 0x80) && buffer_[1] == static_cast(expected >> 7)) { Advance(2); @@ -1085,8 +1451,8 @@ inline bool CodedInputStream::ExpectTag(uint32 expected) { } } -inline const uint8* CodedInputStream::ExpectTagFromArray( - const uint8* buffer, uint32 expected) { +inline const uint8* CodedInputStream::ExpectTagFromArray(const uint8* buffer, + uint32 expected) { if (expected < (1 << 7)) { if (buffer[0] == expected) { return buffer + 1; @@ -1097,7 +1463,7 @@ inline const uint8* CodedInputStream::ExpectTagFromArray( return buffer + 2; } } - return NULL; + return nullptr; } inline void CodedInputStream::GetDirectBufferPointerInline(const void** data, @@ -1110,9 +1476,8 @@ inline bool CodedInputStream::ExpectAtEnd() { // If we are at a limit we know no more bytes can be read. Otherwise, it's // hard to say without calling Refresh(), and we'd rather not do that. - if (buffer_ == buffer_end_ && - ((buffer_size_after_limit_ != 0) || - (total_bytes_read_ == current_limit_))) { + if (buffer_ == buffer_end_ && ((buffer_size_after_limit_ != 0) || + (total_bytes_read_ == current_limit_))) { last_tag_ = 0; // Pretend we called ReadTag()... legitimate_message_end_ = true; // ... and it hit EOF. return true; @@ -1125,36 +1490,109 @@ inline int CodedInputStream::CurrentPosition() const { return total_bytes_read_ - (BufferSize() + buffer_size_after_limit_); } -inline uint8* CodedOutputStream::GetDirectBufferForNBytesAndAdvance(int size) { - if (buffer_size_ < size) { - return NULL; - } else { - uint8* result = buffer_; - Advance(size); - return result; +inline void CodedInputStream::Advance(int amount) { buffer_ += amount; } + +inline void CodedInputStream::SetRecursionLimit(int limit) { + recursion_budget_ += limit - recursion_limit_; + recursion_limit_ = limit; +} + +inline bool CodedInputStream::IncrementRecursionDepth() { + --recursion_budget_; + return recursion_budget_ >= 0; +} + +inline void CodedInputStream::DecrementRecursionDepth() { + if (recursion_budget_ < recursion_limit_) ++recursion_budget_; +} + +inline void CodedInputStream::UnsafeDecrementRecursionDepth() { + assert(recursion_budget_ < recursion_limit_); + ++recursion_budget_; +} + +inline void CodedInputStream::SetExtensionRegistry(const DescriptorPool* pool, + MessageFactory* factory) { + extension_pool_ = pool; + extension_factory_ = factory; +} + +inline const DescriptorPool* CodedInputStream::GetExtensionPool() { + return extension_pool_; +} + +inline MessageFactory* CodedInputStream::GetExtensionFactory() { + return extension_factory_; +} + +inline int CodedInputStream::BufferSize() const { + return static_cast(buffer_end_ - buffer_); +} + +inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input) + : buffer_(nullptr), + buffer_end_(nullptr), + input_(input), + total_bytes_read_(0), + overflow_bytes_(0), + last_tag_(0), + legitimate_message_end_(false), + aliasing_enabled_(false), + current_limit_(kint32max), + buffer_size_after_limit_(0), + total_bytes_limit_(kDefaultTotalBytesLimit), + recursion_budget_(default_recursion_limit_), + recursion_limit_(default_recursion_limit_), + extension_pool_(nullptr), + extension_factory_(nullptr) { + // Eagerly Refresh() so buffer space is immediately available. + Refresh(); +} + +inline CodedInputStream::CodedInputStream(const uint8* buffer, int size) + : buffer_(buffer), + buffer_end_(buffer + size), + input_(nullptr), + total_bytes_read_(size), + overflow_bytes_(0), + last_tag_(0), + legitimate_message_end_(false), + aliasing_enabled_(false), + current_limit_(size), + buffer_size_after_limit_(0), + total_bytes_limit_(kDefaultTotalBytesLimit), + recursion_budget_(default_recursion_limit_), + recursion_limit_(default_recursion_limit_), + extension_pool_(nullptr), + extension_factory_(nullptr) { + // Note that setting current_limit_ == size is important to prevent some + // code paths from trying to access input_ and segfaulting. +} + +inline bool CodedInputStream::IsFlat() const { return input_ == nullptr; } + +inline bool CodedInputStream::Skip(int count) { + if (count < 0) return false; // security: count is often user-supplied + + const int original_buffer_size = BufferSize(); + + if (count <= original_buffer_size) { + // Just skipping within the current buffer. Easy. + Advance(count); + return true; } + + return SkipFallback(count, original_buffer_size); } inline uint8* CodedOutputStream::WriteVarint32ToArray(uint32 value, uint8* target) { - while (value >= 0x80) { - *target = static_cast(value | 0x80); - value >>= 7; - ++target; - } - *target = static_cast(value); - return target + 1; + return EpsCopyOutputStream::UnsafeVarint(value, target); } inline uint8* CodedOutputStream::WriteVarint64ToArray(uint64 value, uint8* target) { - while (value >= 0x80) { - *target = static_cast(value | 0x80); - value >>= 7; - ++target; - } - *target = static_cast(value); - return target + 1; + return EpsCopyOutputStream::UnsafeVarint(value, target); } inline void CodedOutputStream::WriteVarint32SignExtended(int32 value) { @@ -1172,7 +1610,7 @@ inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value, memcpy(target, &value, sizeof(value)); #else target[0] = static_cast(value); - target[1] = static_cast(value >> 8); + target[1] = static_cast(value >> 8); target[2] = static_cast(value >> 16); target[3] = static_cast(value >> 24); #endif @@ -1188,11 +1626,11 @@ inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value, uint32 part1 = static_cast(value >> 32); target[0] = static_cast(part0); - target[1] = static_cast(part0 >> 8); + target[1] = static_cast(part0 >> 8); target[2] = static_cast(part0 >> 16); target[3] = static_cast(part0 >> 24); target[4] = static_cast(part1); - target[5] = static_cast(part1 >> 8); + target[5] = static_cast(part1 >> 8); target[6] = static_cast(part1 >> 16); target[7] = static_cast(part1 >> 24); #endif @@ -1200,37 +1638,18 @@ inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value, } inline void CodedOutputStream::WriteVarint32(uint32 value) { - if (buffer_size_ >= 5) { - // Fast path: We have enough bytes left in the buffer to guarantee that - // this write won't cross the end, so we can skip the checks. - uint8* target = buffer_; - uint8* end = WriteVarint32ToArray(value, target); - int size = static_cast(end - target); - Advance(size); - } else { - WriteVarint32SlowPath(value); - } + cur_ = impl_.EnsureSpace(cur_); + SetCur(WriteVarint32ToArray(value, Cur())); } inline void CodedOutputStream::WriteVarint64(uint64 value) { - if (buffer_size_ >= 10) { - // Fast path: We have enough bytes left in the buffer to guarantee that - // this write won't cross the end, so we can skip the checks. - uint8* target = buffer_; - uint8* end = WriteVarint64ToArray(value, target); - int size = static_cast(end - target); - Advance(size); - } else { - WriteVarint64SlowPath(value); - } + cur_ = impl_.EnsureSpace(cur_); + SetCur(WriteVarint64ToArray(value, Cur())); } -inline void CodedOutputStream::WriteTag(uint32 value) { - WriteVarint32(value); -} +inline void CodedOutputStream::WriteTag(uint32 value) { WriteVarint32(value); } -inline uint8* CodedOutputStream::WriteTagToArray( - uint32 value, uint8* target) { +inline uint8* CodedOutputStream::WriteTagToArray(uint32 value, uint8* target) { return WriteVarint32ToArray(value, target); } @@ -1256,145 +1675,40 @@ inline size_t CodedOutputStream::VarintSize64(uint64 value) { inline size_t CodedOutputStream::VarintSize32SignExtended(int32 value) { if (value < 0) { - return 10; // TODO(kenton): Make this a symbolic constant. + return 10; // TODO(kenton): Make this a symbolic constant. } else { return VarintSize32(static_cast(value)); } } -inline void CodedOutputStream::WriteString(const string& str) { +inline void CodedOutputStream::WriteString(const std::string& str) { WriteRaw(str.data(), static_cast(str.size())); } -inline void CodedOutputStream::WriteRawMaybeAliased( - const void* data, int size) { - if (aliasing_enabled_) { - WriteAliasedRaw(data, size); - } else { - WriteRaw(data, size); - } -} - -inline uint8* CodedOutputStream::WriteStringToArray( - const string& str, uint8* target) { - return WriteRawToArray(str.data(), static_cast(str.size()), target); -} - -inline int CodedOutputStream::ByteCount() const { - return total_bytes_ - buffer_size_; -} - -inline void CodedInputStream::Advance(int amount) { - buffer_ += amount; -} - -inline void CodedOutputStream::Advance(int amount) { - buffer_ += amount; - buffer_size_ -= amount; -} - -inline void CodedInputStream::SetRecursionLimit(int limit) { - recursion_budget_ += limit - recursion_limit_; - recursion_limit_ = limit; -} - -inline bool CodedInputStream::IncrementRecursionDepth() { - --recursion_budget_; - return recursion_budget_ >= 0; -} - -inline void CodedInputStream::DecrementRecursionDepth() { - if (recursion_budget_ < recursion_limit_) ++recursion_budget_; -} - -inline void CodedInputStream::UnsafeDecrementRecursionDepth() { - assert(recursion_budget_ < recursion_limit_); - ++recursion_budget_; +inline void CodedOutputStream::WriteRawMaybeAliased(const void* data, + int size) { + cur_ = impl_.WriteRawMaybeAliased(data, size, cur_); } -inline void CodedInputStream::SetExtensionRegistry(const DescriptorPool* pool, - MessageFactory* factory) { - extension_pool_ = pool; - extension_factory_ = factory; +inline uint8* CodedOutputStream::WriteRawToArray(const void* data, int size, + uint8* target) { + memcpy(target, data, size); + return target + size; } -inline const DescriptorPool* CodedInputStream::GetExtensionPool() { - return extension_pool_; -} - -inline MessageFactory* CodedInputStream::GetExtensionFactory() { - return extension_factory_; -} - -inline int CodedInputStream::BufferSize() const { - return static_cast(buffer_end_ - buffer_); -} - -inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input) - : buffer_(NULL), - buffer_end_(NULL), - input_(input), - total_bytes_read_(0), - overflow_bytes_(0), - last_tag_(0), - legitimate_message_end_(false), - aliasing_enabled_(false), - current_limit_(kint32max), - buffer_size_after_limit_(0), - total_bytes_limit_(kDefaultTotalBytesLimit), - recursion_budget_(default_recursion_limit_), - recursion_limit_(default_recursion_limit_), - extension_pool_(NULL), - extension_factory_(NULL) { - // Eagerly Refresh() so buffer space is immediately available. - Refresh(); -} - -inline CodedInputStream::CodedInputStream(const uint8* buffer, int size) - : buffer_(buffer), - buffer_end_(buffer + size), - input_(NULL), - total_bytes_read_(size), - overflow_bytes_(0), - last_tag_(0), - legitimate_message_end_(false), - aliasing_enabled_(false), - current_limit_(size), - buffer_size_after_limit_(0), - total_bytes_limit_(kDefaultTotalBytesLimit), - recursion_budget_(default_recursion_limit_), - recursion_limit_(default_recursion_limit_), - extension_pool_(NULL), - extension_factory_(NULL) { - // Note that setting current_limit_ == size is important to prevent some - // code paths from trying to access input_ and segfaulting. -} - -inline bool CodedInputStream::IsFlat() const { - return input_ == NULL; -} - -inline bool CodedInputStream::Skip(int count) { - if (count < 0) return false; // security: count is often user-supplied - - const int original_buffer_size = BufferSize(); - - if (count <= original_buffer_size) { - // Just skipping within the current buffer. Easy. - Advance(count); - return true; - } - - return SkipFallback(count, original_buffer_size); +inline uint8* CodedOutputStream::WriteStringToArray(const std::string& str, + uint8* target) { + return WriteRawToArray(str.data(), static_cast(str.size()), target); } } // namespace io } // namespace protobuf - +} // namespace google #if defined(_MSC_VER) && _MSC_VER >= 1300 && !defined(__INTEL_COMPILER) - #pragma runtime_checks("c", restore) +#pragma runtime_checks("c", restore) #endif // _MSC_VER && !defined(__INTEL_COMPILER) -} // namespace google +#include + #endif // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__ diff --git a/third_party/protobuf-lite/google/protobuf/stubs/io_win32.h b/third_party/protobuf-lite/google/protobuf/io/io_win32.h similarity index 58% rename from third_party/protobuf-lite/google/protobuf/stubs/io_win32.h rename to third_party/protobuf-lite/google/protobuf/io/io_win32.h index 9e17d253..7d11dc2b 100644 --- a/third_party/protobuf-lite/google/protobuf/stubs/io_win32.h +++ b/third_party/protobuf-lite/google/protobuf/io/io_win32.h @@ -29,68 +29,90 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Author: laszlocsomor@google.com (Laszlo Csomor) -// +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. + // This file contains the declarations for Windows implementations of // commonly used POSIX functions such as open(2) and access(2), as well // as macro definitions for flags of these functions. // // By including this file you'll redefine open/access/etc. to -// ::google::protobuf::internal::win32::{open/access/etc.}. +// ::google::protobuf::io::win32::{open/access/etc.}. // Make sure you don't include a header that attempts to redeclare or // redefine these functions, that'll lead to confusing compilation // errors. It's best to #include this file as the last one to ensure that. // // This file is only used on Windows, it's empty on other platforms. -#ifndef GOOGLE_PROTOBUF_STUBS_IO_WIN32_H__ -#define GOOGLE_PROTOBUF_STUBS_IO_WIN32_H__ +#ifndef GOOGLE_PROTOBUF_IO_IO_WIN32_H__ +#define GOOGLE_PROTOBUF_IO_IO_WIN32_H__ #if defined(_WIN32) +#include #include -#include + +#include +#include // Compilers on Windows other than MSVC (e.g. Cygwin, MinGW32) define the // following functions already, except for mkdir. namespace google { namespace protobuf { -namespace internal { +namespace io { namespace win32 { -LIBPROTOBUF_EXPORT FILE* fopen(const char* path, const char* mode); -LIBPROTOBUF_EXPORT int access(const char* path, int mode); -LIBPROTOBUF_EXPORT int chdir(const char* path); -LIBPROTOBUF_EXPORT int close(int fd); -LIBPROTOBUF_EXPORT int dup(int fd); -LIBPROTOBUF_EXPORT int dup2(int fd1, int fd2); -LIBPROTOBUF_EXPORT int mkdir(const char* path, int _mode); -LIBPROTOBUF_EXPORT int open(const char* path, int flags, int mode = 0); -LIBPROTOBUF_EXPORT int read(int fd, void* buffer, size_t size); -LIBPROTOBUF_EXPORT int setmode(int fd, int mode); -LIBPROTOBUF_EXPORT int stat(const char* path, struct _stat* buffer); -LIBPROTOBUF_EXPORT int write(int fd, const void* buffer, size_t size); -LIBPROTOBUF_EXPORT std::wstring testonly_utf8_to_winpath(const char* path); +PROTOBUF_EXPORT FILE* fopen(const char* path, const char* mode); +PROTOBUF_EXPORT int access(const char* path, int mode); +PROTOBUF_EXPORT int chdir(const char* path); +PROTOBUF_EXPORT int close(int fd); +PROTOBUF_EXPORT int dup(int fd); +PROTOBUF_EXPORT int dup2(int fd1, int fd2); +PROTOBUF_EXPORT int mkdir(const char* path, int _mode); +PROTOBUF_EXPORT int open(const char* path, int flags, int mode = 0); +PROTOBUF_EXPORT int read(int fd, void* buffer, size_t size); +PROTOBUF_EXPORT int setmode(int fd, int mode); +PROTOBUF_EXPORT int stat(const char* path, struct _stat* buffer); +PROTOBUF_EXPORT int write(int fd, const void* buffer, size_t size); +PROTOBUF_EXPORT std::wstring testonly_utf8_to_winpath(const char* path); + +enum class ExpandWildcardsResult { + kSuccess = 0, + kErrorNoMatchingFile = 1, + kErrorInputPathConversion = 2, + kErrorOutputPathConversion = 3, +}; + +// Expand wildcards in a path pattern, feed the result to a consumer function. +// +// `path` must be a valid, Windows-style path. It may be absolute, or relative +// to the current working directory, and it may contain wildcards ("*" and "?") +// in the last path segment. This function passes all matching file names to +// `consume`. The resulting paths may not be absolute nor normalized. +// +// The function returns a value from `ExpandWildcardsResult`. +PROTOBUF_EXPORT ExpandWildcardsResult ExpandWildcards( + const std::string& path, std::function consume); namespace strings { // Convert from UTF-16 to Active-Code-Page-encoded or to UTF-8-encoded text. -LIBPROTOBUF_EXPORT bool wcs_to_mbs( - const wchar_t* s, std::string* out, bool outUtf8); +PROTOBUF_EXPORT bool wcs_to_mbs(const wchar_t* s, std::string* out, + bool outUtf8); // Convert from Active-Code-Page-encoded or UTF-8-encoded text to UTF-16. -LIBPROTOBUF_EXPORT bool mbs_to_wcs( - const char* s, std::wstring* out, bool inUtf8); +PROTOBUF_EXPORT bool mbs_to_wcs(const char* s, std::wstring* out, bool inUtf8); // Convert from UTF-8-encoded text to UTF-16. -LIBPROTOBUF_EXPORT bool utf8_to_wcs(const char* input, std::wstring* out); +PROTOBUF_EXPORT bool utf8_to_wcs(const char* input, std::wstring* out); // Convert from UTF-16-encoded text to UTF-8. -LIBPROTOBUF_EXPORT bool wcs_to_utf8(const wchar_t* input, std::string* out); +PROTOBUF_EXPORT bool wcs_to_utf8(const wchar_t* input, std::string* out); } // namespace strings } // namespace win32 -} // namespace internal +} // namespace io } // namespace protobuf } // namespace google @@ -110,6 +132,8 @@ LIBPROTOBUF_EXPORT bool wcs_to_utf8(const wchar_t* input, std::string* out); #define STDOUT_FILENO 1 #endif +#include + #endif // defined(_WIN32) -#endif // GOOGLE_PROTOBUF_STUBS_IO_WIN32_H__ +#endif // GOOGLE_PROTOBUF_IO_IO_WIN32_H__ diff --git a/third_party/protobuf-lite/google/protobuf/io/zero_copy_stream.h b/third_party/protobuf-lite/google/protobuf/io/zero_copy_stream.h index 62ace7ae..d3bd6dae 100644 --- a/third_party/protobuf-lite/google/protobuf/io/zero_copy_stream.h +++ b/third_party/protobuf-lite/google/protobuf/io/zero_copy_stream.h @@ -107,11 +107,14 @@ #ifndef GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_H__ #define GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_H__ + #include + #include +#include -namespace google { +namespace google { namespace protobuf { namespace io { @@ -121,7 +124,7 @@ class ZeroCopyOutputStream; // Abstract interface similar to an input stream but designed to minimize // copying. -class LIBPROTOBUF_EXPORT ZeroCopyInputStream { +class PROTOBUF_EXPORT ZeroCopyInputStream { public: ZeroCopyInputStream() {} virtual ~ZeroCopyInputStream() {} @@ -169,7 +172,7 @@ class LIBPROTOBUF_EXPORT ZeroCopyInputStream { virtual bool Skip(int count) = 0; // Returns the total number of bytes read since this object was created. - virtual int64 ByteCount() const = 0; + virtual int64_t ByteCount() const = 0; private: @@ -178,7 +181,7 @@ class LIBPROTOBUF_EXPORT ZeroCopyInputStream { // Abstract interface similar to an output stream but designed to minimize // copying. -class LIBPROTOBUF_EXPORT ZeroCopyOutputStream { +class PROTOBUF_EXPORT ZeroCopyOutputStream { public: ZeroCopyOutputStream() {} virtual ~ZeroCopyOutputStream() {} @@ -224,7 +227,7 @@ class LIBPROTOBUF_EXPORT ZeroCopyOutputStream { virtual void BackUp(int count) = 0; // Returns the total number of bytes written since this object was created. - virtual int64 ByteCount() const = 0; + virtual int64_t ByteCount() const = 0; // Write a given chunk of data to the output. Some output streams may // implement this in a way that avoids copying. Check AllowsAliasing() before @@ -243,6 +246,8 @@ class LIBPROTOBUF_EXPORT ZeroCopyOutputStream { } // namespace io } // namespace protobuf - } // namespace google + +#include + #endif // GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_H__ diff --git a/third_party/protobuf-lite/google/protobuf/io/zero_copy_stream_impl.h b/third_party/protobuf-lite/google/protobuf/io/zero_copy_stream_impl.h new file mode 100644 index 00000000..0206e388 --- /dev/null +++ b/third_party/protobuf-lite/google/protobuf/io/zero_copy_stream_impl.h @@ -0,0 +1,327 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// This file contains common implementations of the interfaces defined in +// zero_copy_stream.h which are only included in the full (non-lite) +// protobuf library. These implementations include Unix file descriptors +// and C++ iostreams. See also: zero_copy_stream_impl_lite.h + +#ifndef GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_H__ +#define GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_H__ + + +#include +#include + +#include +#include +#include + +#include + +namespace google { +namespace protobuf { +namespace io { + +// =================================================================== + +// A ZeroCopyInputStream which reads from a file descriptor. +// +// FileInputStream is preferred over using an ifstream with IstreamInputStream. +// The latter will introduce an extra layer of buffering, harming performance. +// Also, it's conceivable that FileInputStream could someday be enhanced +// to use zero-copy file descriptors on OSs which support them. +class PROTOBUF_EXPORT FileInputStream : public ZeroCopyInputStream { + public: + // Creates a stream that reads from the given Unix file descriptor. + // If a block_size is given, it specifies the number of bytes that + // should be read and returned with each call to Next(). Otherwise, + // a reasonable default is used. + explicit FileInputStream(int file_descriptor, int block_size = -1); + + // Flushes any buffers and closes the underlying file. Returns false if + // an error occurs during the process; use GetErrno() to examine the error. + // Even if an error occurs, the file descriptor is closed when this returns. + bool Close(); + + // By default, the file descriptor is not closed when the stream is + // destroyed. Call SetCloseOnDelete(true) to change that. WARNING: + // This leaves no way for the caller to detect if close() fails. If + // detecting close() errors is important to you, you should arrange + // to close the descriptor yourself. + void SetCloseOnDelete(bool value) { copying_input_.SetCloseOnDelete(value); } + + // If an I/O error has occurred on this file descriptor, this is the + // errno from that error. Otherwise, this is zero. Once an error + // occurs, the stream is broken and all subsequent operations will + // fail. + int GetErrno() const { return copying_input_.GetErrno(); } + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size) override; + void BackUp(int count) override; + bool Skip(int count) override; + int64_t ByteCount() const override; + + private: + class PROTOBUF_EXPORT CopyingFileInputStream : public CopyingInputStream { + public: + CopyingFileInputStream(int file_descriptor); + ~CopyingFileInputStream() override; + + bool Close(); + void SetCloseOnDelete(bool value) { close_on_delete_ = value; } + int GetErrno() const { return errno_; } + + // implements CopyingInputStream --------------------------------- + int Read(void* buffer, int size) override; + int Skip(int count) override; + + private: + // The file descriptor. + const int file_; + bool close_on_delete_; + bool is_closed_; + + // The errno of the I/O error, if one has occurred. Otherwise, zero. + int errno_; + + // Did we try to seek once and fail? If so, we assume this file descriptor + // doesn't support seeking and won't try again. + bool previous_seek_failed_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingFileInputStream); + }; + + CopyingFileInputStream copying_input_; + CopyingInputStreamAdaptor impl_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FileInputStream); +}; + +// =================================================================== + +// A ZeroCopyOutputStream which writes to a file descriptor. +// +// FileOutputStream is preferred over using an ofstream with +// OstreamOutputStream. The latter will introduce an extra layer of buffering, +// harming performance. Also, it's conceivable that FileOutputStream could +// someday be enhanced to use zero-copy file descriptors on OSs which +// support them. +class PROTOBUF_EXPORT FileOutputStream : public CopyingOutputStreamAdaptor { + public: + // Creates a stream that writes to the given Unix file descriptor. + // If a block_size is given, it specifies the size of the buffers + // that should be returned by Next(). Otherwise, a reasonable default + // is used. + explicit FileOutputStream(int file_descriptor, int block_size = -1); + + ~FileOutputStream() override; + + // Flushes any buffers and closes the underlying file. Returns false if + // an error occurs during the process; use GetErrno() to examine the error. + // Even if an error occurs, the file descriptor is closed when this returns. + bool Close(); + + // By default, the file descriptor is not closed when the stream is + // destroyed. Call SetCloseOnDelete(true) to change that. WARNING: + // This leaves no way for the caller to detect if close() fails. If + // detecting close() errors is important to you, you should arrange + // to close the descriptor yourself. + void SetCloseOnDelete(bool value) { copying_output_.SetCloseOnDelete(value); } + + // If an I/O error has occurred on this file descriptor, this is the + // errno from that error. Otherwise, this is zero. Once an error + // occurs, the stream is broken and all subsequent operations will + // fail. + int GetErrno() const { return copying_output_.GetErrno(); } + + private: + class PROTOBUF_EXPORT CopyingFileOutputStream : public CopyingOutputStream { + public: + CopyingFileOutputStream(int file_descriptor); + ~CopyingFileOutputStream() override; + + bool Close(); + void SetCloseOnDelete(bool value) { close_on_delete_ = value; } + int GetErrno() const { return errno_; } + + // implements CopyingOutputStream -------------------------------- + bool Write(const void* buffer, int size) override; + + private: + // The file descriptor. + const int file_; + bool close_on_delete_; + bool is_closed_; + + // The errno of the I/O error, if one has occurred. Otherwise, zero. + int errno_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingFileOutputStream); + }; + + CopyingFileOutputStream copying_output_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FileOutputStream); +}; + +// =================================================================== + +// A ZeroCopyInputStream which reads from a C++ istream. +// +// Note that for reading files (or anything represented by a file descriptor), +// FileInputStream is more efficient. +class PROTOBUF_EXPORT IstreamInputStream : public ZeroCopyInputStream { + public: + // Creates a stream that reads from the given C++ istream. + // If a block_size is given, it specifies the number of bytes that + // should be read and returned with each call to Next(). Otherwise, + // a reasonable default is used. + explicit IstreamInputStream(std::istream* stream, int block_size = -1); + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size) override; + void BackUp(int count) override; + bool Skip(int count) override; + int64_t ByteCount() const override; + + private: + class PROTOBUF_EXPORT CopyingIstreamInputStream : public CopyingInputStream { + public: + CopyingIstreamInputStream(std::istream* input); + ~CopyingIstreamInputStream() override; + + // implements CopyingInputStream --------------------------------- + int Read(void* buffer, int size) override; + // (We use the default implementation of Skip().) + + private: + // The stream. + std::istream* input_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingIstreamInputStream); + }; + + CopyingIstreamInputStream copying_input_; + CopyingInputStreamAdaptor impl_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(IstreamInputStream); +}; + +// =================================================================== + +// A ZeroCopyOutputStream which writes to a C++ ostream. +// +// Note that for writing files (or anything represented by a file descriptor), +// FileOutputStream is more efficient. +class PROTOBUF_EXPORT OstreamOutputStream : public ZeroCopyOutputStream { + public: + // Creates a stream that writes to the given C++ ostream. + // If a block_size is given, it specifies the size of the buffers + // that should be returned by Next(). Otherwise, a reasonable default + // is used. + explicit OstreamOutputStream(std::ostream* stream, int block_size = -1); + ~OstreamOutputStream() override; + + // implements ZeroCopyOutputStream --------------------------------- + bool Next(void** data, int* size) override; + void BackUp(int count) override; + int64_t ByteCount() const override; + + private: + class PROTOBUF_EXPORT CopyingOstreamOutputStream + : public CopyingOutputStream { + public: + CopyingOstreamOutputStream(std::ostream* output); + ~CopyingOstreamOutputStream() override; + + // implements CopyingOutputStream -------------------------------- + bool Write(const void* buffer, int size) override; + + private: + // The stream. + std::ostream* output_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingOstreamOutputStream); + }; + + CopyingOstreamOutputStream copying_output_; + CopyingOutputStreamAdaptor impl_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(OstreamOutputStream); +}; + +// =================================================================== + +// A ZeroCopyInputStream which reads from several other streams in sequence. +// ConcatenatingInputStream is unable to distinguish between end-of-stream +// and read errors in the underlying streams, so it assumes any errors mean +// end-of-stream. So, if the underlying streams fail for any other reason, +// ConcatenatingInputStream may do odd things. It is suggested that you do +// not use ConcatenatingInputStream on streams that might produce read errors +// other than end-of-stream. +class PROTOBUF_EXPORT ConcatenatingInputStream : public ZeroCopyInputStream { + public: + // All streams passed in as well as the array itself must remain valid + // until the ConcatenatingInputStream is destroyed. + ConcatenatingInputStream(ZeroCopyInputStream* const streams[], int count); + ~ConcatenatingInputStream() override = default; + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size) override; + void BackUp(int count) override; + bool Skip(int count) override; + int64_t ByteCount() const override; + + + private: + // As streams are retired, streams_ is incremented and count_ is + // decremented. + ZeroCopyInputStream* const* streams_; + int stream_count_; + int64 bytes_retired_; // Bytes read from previous streams. + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ConcatenatingInputStream); +}; + +// =================================================================== + +} // namespace io +} // namespace protobuf +} // namespace google + +#include + +#endif // GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_H__ diff --git a/third_party/protobuf-lite/google/protobuf/io/zero_copy_stream_impl_lite.h b/third_party/protobuf-lite/google/protobuf/io/zero_copy_stream_impl_lite.h index 29f63bf0..cfe81d2c 100644 --- a/third_party/protobuf-lite/google/protobuf/io/zero_copy_stream_impl_lite.h +++ b/third_party/protobuf-lite/google/protobuf/io/zero_copy_stream_impl_lite.h @@ -44,15 +44,19 @@ #ifndef GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_LITE_H__ #define GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_LITE_H__ + +#include #include #include -#include -#include + #include #include +#include #include +#include + namespace google { namespace protobuf { namespace io { @@ -60,7 +64,7 @@ namespace io { // =================================================================== // A ZeroCopyInputStream backed by an in-memory array of bytes. -class LIBPROTOBUF_EXPORT ArrayInputStream : public ZeroCopyInputStream { +class PROTOBUF_EXPORT ArrayInputStream : public ZeroCopyInputStream { public: // Create an InputStream that returns the bytes pointed to by "data". // "data" remains the property of the caller but must remain valid until @@ -70,12 +74,13 @@ class LIBPROTOBUF_EXPORT ArrayInputStream : public ZeroCopyInputStream { // useful for testing; in production you would probably never want to set // it. ArrayInputStream(const void* data, int size, int block_size = -1); + ~ArrayInputStream() override = default; // implements ZeroCopyInputStream ---------------------------------- - bool Next(const void** data, int* size); - void BackUp(int count); - bool Skip(int count); - int64 ByteCount() const; + bool Next(const void** data, int* size) override; + void BackUp(int count) override; + bool Skip(int count) override; + int64_t ByteCount() const override; private: @@ -84,8 +89,8 @@ class LIBPROTOBUF_EXPORT ArrayInputStream : public ZeroCopyInputStream { const int block_size_; // How many bytes to return at a time. int position_; - int last_returned_size_; // How many bytes we returned last time Next() - // was called (used for error checking only). + int last_returned_size_; // How many bytes we returned last time Next() + // was called (used for error checking only). GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ArrayInputStream); }; @@ -93,7 +98,7 @@ class LIBPROTOBUF_EXPORT ArrayInputStream : public ZeroCopyInputStream { // =================================================================== // A ZeroCopyOutputStream backed by an in-memory array of bytes. -class LIBPROTOBUF_EXPORT ArrayOutputStream : public ZeroCopyOutputStream { +class PROTOBUF_EXPORT ArrayOutputStream : public ZeroCopyOutputStream { public: // Create an OutputStream that writes to the bytes pointed to by "data". // "data" remains the property of the caller but must remain valid until @@ -103,20 +108,21 @@ class LIBPROTOBUF_EXPORT ArrayOutputStream : public ZeroCopyOutputStream { // useful for testing; in production you would probably never want to set // it. ArrayOutputStream(void* data, int size, int block_size = -1); + ~ArrayOutputStream() override = default; // implements ZeroCopyOutputStream --------------------------------- - bool Next(void** data, int* size); - void BackUp(int count); - int64 ByteCount() const; + bool Next(void** data, int* size) override; + void BackUp(int count) override; + int64_t ByteCount() const override; private: - uint8* const data_; // The byte array. - const int size_; // Total size of the array. - const int block_size_; // How many bytes to return at a time. + uint8* const data_; // The byte array. + const int size_; // Total size of the array. + const int block_size_; // How many bytes to return at a time. int position_; - int last_returned_size_; // How many bytes we returned last time Next() - // was called (used for error checking only). + int last_returned_size_; // How many bytes we returned last time Next() + // was called (used for error checking only). GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ArrayOutputStream); }; @@ -124,7 +130,7 @@ class LIBPROTOBUF_EXPORT ArrayOutputStream : public ZeroCopyOutputStream { // =================================================================== // A ZeroCopyOutputStream which appends bytes to a string. -class LIBPROTOBUF_EXPORT StringOutputStream : public ZeroCopyOutputStream { +class PROTOBUF_EXPORT StringOutputStream : public ZeroCopyOutputStream { public: // Create a StringOutputStream which appends bytes to the given string. // The string remains property of the caller, but it is mutated in arbitrary @@ -135,20 +141,18 @@ class LIBPROTOBUF_EXPORT StringOutputStream : public ZeroCopyOutputStream { // Hint: If you call target->reserve(n) before creating the stream, // the first call to Next() will return at least n bytes of buffer // space. - explicit StringOutputStream(string* target); + explicit StringOutputStream(std::string* target); + ~StringOutputStream() override = default; // implements ZeroCopyOutputStream --------------------------------- - bool Next(void** data, int* size); - void BackUp(int count); - int64 ByteCount() const; - - protected: - void SetString(string* target); + bool Next(void** data, int* size) override; + void BackUp(int count) override; + int64_t ByteCount() const override; private: - static const int kMinimumSize = 16; + static constexpr size_t kMinimumSize = 16; - string* target_; + std::string* target_; GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(StringOutputStream); }; @@ -170,7 +174,7 @@ class LIBPROTOBUF_EXPORT StringOutputStream : public ZeroCopyOutputStream { // CopyingInputStream implementations should avoid buffering if possible. // CopyingInputStreamAdaptor does its own buffering and will read data // in large blocks. -class LIBPROTOBUF_EXPORT CopyingInputStream { +class PROTOBUF_EXPORT CopyingInputStream { public: virtual ~CopyingInputStream() {} @@ -196,7 +200,7 @@ class LIBPROTOBUF_EXPORT CopyingInputStream { // If you want to read from file descriptors or C++ istreams, this is // already implemented for you: use FileInputStream or IstreamInputStream // respectively. -class LIBPROTOBUF_EXPORT CopyingInputStreamAdaptor : public ZeroCopyInputStream { +class PROTOBUF_EXPORT CopyingInputStreamAdaptor : public ZeroCopyInputStream { public: // Creates a stream that reads from the given CopyingInputStream. // If a block_size is given, it specifies the number of bytes that @@ -205,17 +209,17 @@ class LIBPROTOBUF_EXPORT CopyingInputStreamAdaptor : public ZeroCopyInputStream // copying_stream unless SetOwnsCopyingStream(true) is called. explicit CopyingInputStreamAdaptor(CopyingInputStream* copying_stream, int block_size = -1); - ~CopyingInputStreamAdaptor(); + ~CopyingInputStreamAdaptor() override; // Call SetOwnsCopyingStream(true) to tell the CopyingInputStreamAdaptor to // delete the underlying CopyingInputStream when it is destroyed. void SetOwnsCopyingStream(bool value) { owns_copying_stream_ = value; } // implements ZeroCopyInputStream ---------------------------------- - bool Next(const void** data, int* size); - void BackUp(int count); - bool Skip(int count); - int64 ByteCount() const; + bool Next(const void** data, int* size) override; + void BackUp(int count) override; + bool Skip(int count) override; + int64_t ByteCount() const override; private: // Insures that buffer_ is not NULL. @@ -227,7 +231,7 @@ class LIBPROTOBUF_EXPORT CopyingInputStreamAdaptor : public ZeroCopyInputStream CopyingInputStream* copying_stream_; bool owns_copying_stream_; - // True if we have seen a permenant error from the underlying stream. + // True if we have seen a permanent error from the underlying stream. bool failed_; // The current position of copying_stream_, relative to the point where @@ -264,7 +268,7 @@ class LIBPROTOBUF_EXPORT CopyingInputStreamAdaptor : public ZeroCopyInputStream // CopyingOutputStream implementations should avoid buffering if possible. // CopyingOutputStreamAdaptor does its own buffering and will write data // in large blocks. -class LIBPROTOBUF_EXPORT CopyingOutputStream { +class PROTOBUF_EXPORT CopyingOutputStream { public: virtual ~CopyingOutputStream() {} @@ -280,7 +284,7 @@ class LIBPROTOBUF_EXPORT CopyingOutputStream { // If you want to write to file descriptors or C++ ostreams, this is // already implemented for you: use FileOutputStream or OstreamOutputStream // respectively. -class LIBPROTOBUF_EXPORT CopyingOutputStreamAdaptor : public ZeroCopyOutputStream { +class PROTOBUF_EXPORT CopyingOutputStreamAdaptor : public ZeroCopyOutputStream { public: // Creates a stream that writes to the given Unix file descriptor. // If a block_size is given, it specifies the size of the buffers @@ -288,7 +292,7 @@ class LIBPROTOBUF_EXPORT CopyingOutputStreamAdaptor : public ZeroCopyOutputStrea // is used. explicit CopyingOutputStreamAdaptor(CopyingOutputStream* copying_stream, int block_size = -1); - ~CopyingOutputStreamAdaptor(); + ~CopyingOutputStreamAdaptor() override; // Writes all pending data to the underlying stream. Returns false if a // write error occurred on the underlying stream. (The underlying @@ -300,9 +304,11 @@ class LIBPROTOBUF_EXPORT CopyingOutputStreamAdaptor : public ZeroCopyOutputStrea void SetOwnsCopyingStream(bool value) { owns_copying_stream_ = value; } // implements ZeroCopyOutputStream --------------------------------- - bool Next(void** data, int* size); - void BackUp(int count); - int64 ByteCount() const; + bool Next(void** data, int* size) override; + void BackUp(int count) override; + int64_t ByteCount() const override; + bool WriteAliasedRaw(const void* data, int size) override; + bool AllowsAliasing() const override { return true; } private: // Write the current buffer, if it is present. @@ -316,7 +322,7 @@ class LIBPROTOBUF_EXPORT CopyingOutputStreamAdaptor : public ZeroCopyOutputStrea CopyingOutputStream* copying_stream_; bool owns_copying_stream_; - // True if we have seen a permenant error from the underlying stream. + // True if we have seen a permanent error from the underlying stream. bool failed_; // The current position of copying_stream_, relative to the point where @@ -336,6 +342,31 @@ class LIBPROTOBUF_EXPORT CopyingOutputStreamAdaptor : public ZeroCopyOutputStrea GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingOutputStreamAdaptor); }; +// =================================================================== + +// A ZeroCopyInputStream which wraps some other stream and limits it to +// a particular byte count. +class PROTOBUF_EXPORT LimitingInputStream : public ZeroCopyInputStream { + public: + LimitingInputStream(ZeroCopyInputStream* input, int64 limit); + ~LimitingInputStream() override; + + // implements ZeroCopyInputStream ---------------------------------- + bool Next(const void** data, int* size) override; + void BackUp(int count) override; + bool Skip(int count) override; + int64_t ByteCount() const override; + + + private: + ZeroCopyInputStream* input_; + int64 limit_; // Decreases as we go, becomes negative if we overshoot. + int64 prior_bytes_read_; // Bytes read on underlying stream at construction + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(LimitingInputStream); +}; + + // =================================================================== // mutable_string_data() and as_string_data() are workarounds to improve @@ -353,31 +384,25 @@ class LIBPROTOBUF_EXPORT CopyingOutputStreamAdaptor : public ZeroCopyOutputStrea // Return a pointer to mutable characters underlying the given string. The // return value is valid until the next time the string is resized. We // trust the caller to treat the return value as an array of length s->size(). -inline char* mutable_string_data(string* s) { -#ifdef LANG_CXX11 +inline char* mutable_string_data(std::string* s) { // This should be simpler & faster than string_as_array() because the latter // is guaranteed to return NULL when *s is empty, so it has to check for that. return &(*s)[0]; -#else - return string_as_array(s); -#endif } // as_string_data(s) is equivalent to // ({ char* p = mutable_string_data(s); make_pair(p, p != NULL); }) // Sometimes it's faster: in some scenarios p cannot be NULL, and then the // code can avoid that check. -inline std::pair as_string_data(string* s) { - char *p = mutable_string_data(s); -#ifdef LANG_CXX11 +inline std::pair as_string_data(std::string* s) { + char* p = mutable_string_data(s); return std::make_pair(p, true); -#else - return std::make_pair(p, p != NULL); -#endif } } // namespace io } // namespace protobuf - } // namespace google + +#include + #endif // GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_LITE_H__ diff --git a/third_party/protobuf-lite/google/protobuf/map.h b/third_party/protobuf-lite/google/protobuf/map.h index 6463ac2e..2453246d 100644 --- a/third_party/protobuf-lite/google/protobuf/map.h +++ b/third_party/protobuf-lite/google/protobuf/map.h @@ -37,18 +37,31 @@ #ifndef GOOGLE_PROTOBUF_MAP_H__ #define GOOGLE_PROTOBUF_MAP_H__ +#include #include #include #include // To support Visual Studio 2008 -#include +#include +#include +#include #include +#if defined(__cpp_lib_string_view) +#include +#endif // defined(__cpp_lib_string_view) + #include #include #include #include #include +#ifdef SWIG +#error "You cannot SWIG proto headers" +#endif + +#include + namespace google { namespace protobuf { @@ -57,17 +70,18 @@ class Map; class MapIterator; -template struct is_proto_enum; +template +struct is_proto_enum; namespace internal { template + WireFormatLite::FieldType value_wire_type> class MapFieldLite; template + WireFormatLite::FieldType value_wire_type> class MapField; template @@ -76,28 +90,262 @@ class TypeDefinedMapFieldBase; class DynamicMapField; class GeneratedMessageReflection; + +// re-implement std::allocator to use arena allocator for memory allocation. +// Used for Map implementation. Users should not use this class +// directly. +template +class MapAllocator { + public: + using value_type = U; + using pointer = value_type*; + using const_pointer = const value_type*; + using reference = value_type&; + using const_reference = const value_type&; + using size_type = size_t; + using difference_type = ptrdiff_t; + + constexpr MapAllocator() : arena_(nullptr) {} + explicit constexpr MapAllocator(Arena* arena) : arena_(arena) {} + template + MapAllocator(const MapAllocator& allocator) // NOLINT(runtime/explicit) + : arena_(allocator.arena()) {} + + pointer allocate(size_type n, const void* /* hint */ = nullptr) { + // If arena is not given, malloc needs to be called which doesn't + // construct element object. + if (arena_ == nullptr) { + return static_cast(::operator new(n * sizeof(value_type))); + } else { + return reinterpret_cast( + Arena::CreateArray(arena_, n * sizeof(value_type))); + } + } + + void deallocate(pointer p, size_type n) { + if (arena_ == nullptr) { +#if defined(__GXX_DELETE_WITH_SIZE__) || defined(__cpp_sized_deallocation) + ::operator delete(p, n * sizeof(value_type)); +#else + (void)n; + ::operator delete(p); +#endif + } + } + +#if !defined(GOOGLE_PROTOBUF_OS_APPLE) && !defined(GOOGLE_PROTOBUF_OS_NACL) && \ + !defined(GOOGLE_PROTOBUF_OS_EMSCRIPTEN) + template + void construct(NodeType* p, Args&&... args) { + // Clang 3.6 doesn't compile static casting to void* directly. (Issue + // #1266) According C++ standard 5.2.9/1: "The static_cast operator shall + // not cast away constness". So first the maybe const pointer is casted to + // const void* and after the const void* is const casted. + new (const_cast(static_cast(p))) + NodeType(std::forward(args)...); + } + + template + void destroy(NodeType* p) { + p->~NodeType(); + } +#else + void construct(pointer p, const_reference t) { new (p) value_type(t); } + + void destroy(pointer p) { p->~value_type(); } +#endif + + template + struct rebind { + using other = MapAllocator; + }; + + template + bool operator==(const MapAllocator& other) const { + return arena_ == other.arena_; + } + + template + bool operator!=(const MapAllocator& other) const { + return arena_ != other.arena_; + } + + // To support Visual Studio 2008 + size_type max_size() const { + // parentheses around (std::...:max) prevents macro warning of max() + return (std::numeric_limits::max)(); + } + + // To support gcc-4.4, which does not properly + // support templated friend classes + Arena* arena() const { return arena_; } + + private: + using DestructorSkippable_ = void; + Arena* arena_; +}; + +template +using KeyForTree = + typename std::conditional::value, T, + std::reference_wrapper>::type; + +// Default case: Not transparent. +// We use std::hash/std::less and all the lookup functions +// only accept `key_type`. +template +struct TransparentSupport { + using hash = std::hash; + using less = std::less; + + static bool Equals(const key_type& a, const key_type& b) { return a == b; } + + template + using key_arg = key_type; +}; + +#if defined(__cpp_lib_string_view) +// If std::string_view is available, we add transparent support for std::string +// keys. We use std::hash as it supports the input types we +// care about. The lookup functions accept arbitrary `K`. This will include any +// key type that is convertible to std::string_view. +template <> +struct TransparentSupport { + static std::string_view ImplicitConvert(std::string_view str) { return str; } + // If the element is not convertible to std::string_view, try to convert to + // std::string first. + // The template makes this overload lose resolution when both have the same + // rank otherwise. + template + static std::string_view ImplicitConvert(const std::string& str) { + return str; + } + + struct hash : private std::hash { + using is_transparent = void; + + template + size_t operator()(const T& str) const { + return base()(ImplicitConvert(str)); + } + + private: + const std::hash& base() const { return *this; } + }; + struct less { + using is_transparent = void; + + template + bool operator()(const T& t, const U& u) const { + return ImplicitConvert(t) < ImplicitConvert(u); + } + }; + + template + static bool Equals(const T& t, const U& u) { + return ImplicitConvert(t) == ImplicitConvert(u); + } + + template + using key_arg = K; +}; +#endif // defined(__cpp_lib_string_view) + +template +using TreeForMap = + std::map, void*, typename TransparentSupport::less, + MapAllocator, void*>>>; + +inline bool TableEntryIsEmpty(void* const* table, size_t b) { + return table[b] == nullptr; +} +inline bool TableEntryIsNonEmptyList(void* const* table, size_t b) { + return table[b] != nullptr && table[b] != table[b ^ 1]; +} +inline bool TableEntryIsTree(void* const* table, size_t b) { + return !TableEntryIsEmpty(table, b) && !TableEntryIsNonEmptyList(table, b); +} +inline bool TableEntryIsList(void* const* table, size_t b) { + return !TableEntryIsTree(table, b); +} + +// This captures all numeric types. +inline size_t MapValueSpaceUsedExcludingSelfLong(bool) { return 0; } +inline size_t MapValueSpaceUsedExcludingSelfLong(const std::string& str) { + return StringSpaceUsedExcludingSelfLong(str); +} +template ().SpaceUsedLong())> +size_t MapValueSpaceUsedExcludingSelfLong(const T& message) { + return message.SpaceUsedLong() - sizeof(T); +} + +constexpr size_t kGlobalEmptyTableSize = 1; +PROTOBUF_EXPORT extern void* const kGlobalEmptyTable[kGlobalEmptyTableSize]; + +// Space used for the table, trees, and nodes. +// Does not include the indirect space used. Eg the data of a std::string. +template +PROTOBUF_NOINLINE size_t SpaceUsedInTable(void** table, size_t num_buckets, + size_t num_elements, + size_t sizeof_node) { + size_t size = 0; + // The size of the table. + size += sizeof(void*) * num_buckets; + // All the nodes. + size += sizeof_node * num_elements; + // For each tree, count the overhead of the those nodes. + // Two buckets at a time because we only care about trees. + for (size_t b = 0; b < num_buckets; b += 2) { + if (internal::TableEntryIsTree(table, b)) { + using Tree = TreeForMap; + Tree* tree = static_cast(table[b]); + // Estimated cost of the red-black tree nodes, 3 pointers plus a + // bool (plus alignment, so 4 pointers). + size += tree->size() * + (sizeof(typename Tree::value_type) + sizeof(void*) * 4); + } + } + return size; +} + +template ::value || + !std::is_scalar::value>::type> +size_t SpaceUsedInValues(const Map* map) { + size_t size = 0; + for (const auto& v : *map) { + size += internal::MapValueSpaceUsedExcludingSelfLong(v.first) + + internal::MapValueSpaceUsedExcludingSelfLong(v.second); + } + return size; +} + +inline size_t SpaceUsedInValues(const void*) { return 0; } + } // namespace internal -// This is the class for google::protobuf::Map's internal value_type. Instead of using +// This is the class for Map's internal value_type. Instead of using // std::pair as value_type, we use this class which provides us more control of // its process of construction and destruction. template -class MapPair { - public: - typedef const Key first_type; - typedef T second_type; +struct MapPair { + using first_type = const Key; + using second_type = T; MapPair(const Key& other_first, const T& other_second) : first(other_first), second(other_second) {} explicit MapPair(const Key& other_first) : first(other_first), second() {} - MapPair(const MapPair& other) - : first(other.first), second(other.second) {} + explicit MapPair(Key&& other_first) + : first(std::move(other_first)), second() {} + MapPair(const MapPair& other) : first(other.first), second(other.second) {} ~MapPair() {} // Implicitly convertible to std::pair of compatible types. template - operator std::pair() const { + operator std::pair() const { // NOLINT(runtime/explicit) return std::pair(first, second); } @@ -105,11 +353,11 @@ class MapPair { T second; private: - friend class ::google::protobuf::Arena; + friend class Arena; friend class Map; }; -// google::protobuf::Map is an associative container type used to store protobuf map +// Map is an associative container type used to store protobuf map // fields. Each Map instance may or may not use a different hash function, a // different iteration order, and so on. E.g., please don't examine // implementation details to decide if the following would work: @@ -122,37 +370,34 @@ class MapPair { template class Map { public: - typedef Key key_type; - typedef T mapped_type; - typedef MapPair value_type; + using key_type = Key; + using mapped_type = T; + using value_type = MapPair; - typedef value_type* pointer; - typedef const value_type* const_pointer; - typedef value_type& reference; - typedef const value_type& const_reference; + using pointer = value_type*; + using const_pointer = const value_type*; + using reference = value_type&; + using const_reference = const value_type&; - typedef size_t size_type; - typedef hash hasher; + using size_type = size_t; + using hasher = typename internal::TransparentSupport::hash; - Map() : arena_(NULL), default_enum_value_(0) { Init(); } - explicit Map(Arena* arena) : arena_(arena), default_enum_value_(0) { Init(); } + constexpr Map() : elements_(nullptr) {} + explicit Map(Arena* arena) : elements_(arena) {} - Map(const Map& other) - : arena_(NULL), default_enum_value_(other.default_enum_value_) { - Init(); - insert(other.begin(), other.end()); - } + Map(const Map& other) : Map() { insert(other.begin(), other.end()); } Map(Map&& other) noexcept : Map() { - if (other.arena_) { + if (other.arena() != nullptr) { *this = other; } else { swap(other); } } + Map& operator=(Map&& other) noexcept { if (this != &other) { - if (arena_ != other.arena_) { + if (arena() != other.arena()) { *this = other; } else { swap(other); @@ -162,140 +407,14 @@ class Map { } template - Map(const InputIt& first, const InputIt& last) - : arena_(NULL), default_enum_value_(0) { - Init(); + Map(const InputIt& first, const InputIt& last) : Map() { insert(first, last); } - ~Map() { - clear(); - if (arena_ == NULL) { - delete elements_; - } - } + ~Map() {} private: - void Init() { - elements_ = Arena::Create(arena_, 0u, hasher(), Allocator(arena_)); - } - - // re-implement std::allocator to use arena allocator for memory allocation. - // Used for google::protobuf::Map implementation. Users should not use this class - // directly. - template - class MapAllocator { - public: - typedef U value_type; - typedef value_type* pointer; - typedef const value_type* const_pointer; - typedef value_type& reference; - typedef const value_type& const_reference; - typedef size_t size_type; - typedef ptrdiff_t difference_type; - - MapAllocator() : arena_(NULL) {} - explicit MapAllocator(Arena* arena) : arena_(arena) {} - template - MapAllocator(const MapAllocator& allocator) - : arena_(allocator.arena()) {} - - pointer allocate(size_type n, const void* /* hint */ = 0) { - // If arena is not given, malloc needs to be called which doesn't - // construct element object. - if (arena_ == NULL) { - return static_cast(::operator new(n * sizeof(value_type))); - } else { - return reinterpret_cast( - Arena::CreateArray(arena_, n * sizeof(value_type))); - } - } - - void deallocate(pointer p, size_type n) { - if (arena_ == NULL) { -#if defined(__GXX_DELETE_WITH_SIZE__) || defined(__cpp_sized_deallocation) - ::operator delete(p, n * sizeof(value_type)); -#else - (void)n; - ::operator delete(p); -#endif - } - } - -#if __cplusplus >= 201103L && !defined(GOOGLE_PROTOBUF_OS_APPLE) && \ - !defined(GOOGLE_PROTOBUF_OS_NACL) && \ - !defined(GOOGLE_PROTOBUF_OS_EMSCRIPTEN) - template - void construct(NodeType* p, Args&&... args) { - // Clang 3.6 doesn't compile static casting to void* directly. (Issue - // #1266) According C++ standard 5.2.9/1: "The static_cast operator shall - // not cast away constness". So first the maybe const pointer is casted to - // const void* and after the const void* is const casted. - new (const_cast(static_cast(p))) - NodeType(std::forward(args)...); - } - - template - void destroy(NodeType* p) { - p->~NodeType(); - } -#else - void construct(pointer p, const_reference t) { new (p) value_type(t); } - - void destroy(pointer p) { p->~value_type(); } -#endif - - template - struct rebind { - typedef MapAllocator other; - }; - - template - bool operator==(const MapAllocator& other) const { - return arena_ == other.arena_; - } - - template - bool operator!=(const MapAllocator& other) const { - return arena_ != other.arena_; - } - - // To support Visual Studio 2008 - size_type max_size() const { - // parentheses around (std::...:max) prevents macro warning of max() - return (std::numeric_limits::max)(); - } - - // To support gcc-4.4, which does not properly - // support templated friend classes - Arena* arena() const { - return arena_; - } - - private: - typedef void DestructorSkippable_; - Arena* const arena_; - }; - - // InnerMap's key type is Key and its value type is value_type*. We use a - // custom class here and for Node, below, to ensure that k_ is at offset 0, - // allowing safe conversion from pointer to Node to pointer to Key, and vice - // versa when appropriate. - class KeyValuePair { - public: - KeyValuePair(const Key& k, value_type* v) : k_(k), v_(v) {} - - const Key& key() const { return k_; } - Key& key() { return k_; } - value_type* value() const { return v_; } - value_type*& value() { return v_; } - - private: - Key k_; - value_type* v_; - }; - - typedef MapAllocator Allocator; + using Allocator = internal::MapAllocator; // InnerMap is a generic hash-based map. It doesn't contain any // protocol-buffer-specific logic. It is a chaining hash map with the @@ -312,7 +431,7 @@ class Map { // 2. The number of buckets is a power of two. // 3. Buckets are converted to trees in pairs: if we convert bucket b then // buckets b and b^1 will share a tree. Invariant: buckets b and b^1 have - // the same non-NULL value iff they are sharing a tree. (An alternative + // the same non-null value iff they are sharing a tree. (An alternative // implementation strategy would be to have a tag bit per bucket.) // 4. As is typical for hash_map and such, the Keys and Values are always // stored in linked list nodes. Pointers to elements are never invalidated @@ -322,27 +441,27 @@ class Map { // 6. Once we've tree-converted a bucket, it is never converted back. However, // the items a tree contains may wind up assigned to trees or lists upon a // rehash. - // 7. The code requires no C++ features from C++11 or later. + // 7. The code requires no C++ features from C++14 or later. // 8. Mutations to a map do not invalidate the map's iterators, pointers to // elements, or references to elements. // 9. Except for erase(iterator), any non-const method can reorder iterators. + // 10. InnerMap uses KeyForTree when using the Tree representation, which + // is either `Key`, if Key is a scalar, or `reference_wrapper` + // otherwise. This avoids unnecessary copies of string keys, for example. class InnerMap : private hasher { public: - typedef value_type* Value; - - InnerMap(size_type n, hasher h, Allocator alloc) - : hasher(h), + explicit constexpr InnerMap(Arena* arena) + : hasher(), num_elements_(0), - seed_(Seed()), - table_(NULL), - alloc_(alloc) { - n = TableSize(n); - table_ = CreateEmptyTable(n); - num_buckets_ = index_of_first_non_null_ = n; - } + num_buckets_(internal::kGlobalEmptyTableSize), + seed_(0), + index_of_first_non_null_(internal::kGlobalEmptyTableSize), + table_(const_cast(internal::kGlobalEmptyTable)), + alloc_(arena) {} ~InnerMap() { - if (table_ != NULL) { + if (alloc_.arena() == nullptr && + num_buckets_ != internal::kGlobalEmptyTableSize) { clear(); Dealloc(table_, num_buckets_); } @@ -353,42 +472,37 @@ class Map { // Linked-list nodes, as one would expect for a chaining hash table. struct Node { - KeyValuePair kv; + value_type kv; Node* next; }; - // This is safe only if the given pointer is known to point to a Key that is - // part of a Node. - static Node* NodePtrFromKeyPtr(Key* k) { - return reinterpret_cast(k); - } + // Trees. The payload type is a copy of Key, so that we can query the tree + // with Keys that are not in any particular data structure. + // The value is a void* pointing to Node. We use void* instead of Node* to + // avoid code bloat. That way there is only one instantiation of the tree + // class per key type. + using Tree = internal::TreeForMap; + using TreeIterator = typename Tree::iterator; - static Key* KeyPtrFromNodePtr(Node* node) { return &node->kv.key(); } - - // Trees. The payload type is pointer to Key, so that we can query the tree - // with Keys that are not in any particular data structure. When we insert, - // though, the pointer is always pointing to a Key that is inside a Node. - struct KeyCompare { - bool operator()(const Key* n0, const Key* n1) const { return *n0 < *n1; } - }; - typedef typename Allocator::template rebind::other KeyPtrAllocator; - typedef std::set Tree; - typedef typename Tree::iterator TreeIterator; + static Node* NodeFromTreeIterator(TreeIterator it) { + return static_cast(it->second); + } // iterator and const_iterator are instantiations of iterator_base. template - struct iterator_base { - typedef KeyValueType& reference; - typedef KeyValueType* pointer; + class iterator_base { + public: + using reference = KeyValueType&; + using pointer = KeyValueType*; // Invariants: // node_ is always correct. This is handy because the most common // operations are operator* and operator-> and they only use node_. - // When node_ is set to a non-NULL value, all the other non-const fields + // When node_ is set to a non-null value, all the other non-const fields // are updated to be correct also, but those fields can become stale // if the underlying map is modified. When those fields are needed they // are rechecked, and updated if necessary. - iterator_base() : node_(NULL), m_(NULL), bucket_index_(0) {} + iterator_base() : node_(nullptr), m_(nullptr), bucket_index_(0) {} explicit iterator_base(const InnerMap* m) : m_(m) { SearchFrom(m->index_of_first_non_null_); @@ -405,18 +519,18 @@ class Map { : node_(n), m_(m), bucket_index_(index) {} iterator_base(TreeIterator tree_it, const InnerMap* m, size_type index) - : node_(NodePtrFromKeyPtr(*tree_it)), m_(m), bucket_index_(index) { + : node_(NodeFromTreeIterator(tree_it)), m_(m), bucket_index_(index) { // Invariant: iterators that use buckets with trees have an even // bucket_index_. - GOOGLE_DCHECK_EQ(bucket_index_ % 2, 0); + GOOGLE_DCHECK_EQ(bucket_index_ % 2, 0u); } // Advance through buckets, looking for the first that isn't empty. - // If nothing non-empty is found then leave node_ == NULL. + // If nothing non-empty is found then leave node_ == nullptr. void SearchFrom(size_type start_bucket) { GOOGLE_DCHECK(m_->index_of_first_non_null_ == m_->num_buckets_ || - m_->table_[m_->index_of_first_non_null_] != NULL); - node_ = NULL; + m_->table_[m_->index_of_first_non_null_] != nullptr); + node_ = nullptr; for (bucket_index_ = start_bucket; bucket_index_ < m_->num_buckets_; bucket_index_++) { if (m_->TableEntryIsNonEmptyList(bucket_index_)) { @@ -425,7 +539,7 @@ class Map { } else if (m_->TableEntryIsTree(bucket_index_)) { Tree* tree = static_cast(m_->table_[bucket_index_]); GOOGLE_DCHECK(!tree->empty()); - node_ = NodePtrFromKeyPtr(*tree->begin()); + node_ = NodeFromTreeIterator(tree->begin()); break; } } @@ -442,18 +556,18 @@ class Map { } iterator_base& operator++() { - if (node_->next == NULL) { + if (node_->next == nullptr) { TreeIterator tree_it; const bool is_list = revalidate_if_necessary(&tree_it); if (is_list) { SearchFrom(bucket_index_ + 1); } else { - GOOGLE_DCHECK_EQ(bucket_index_ & 1, 0); + GOOGLE_DCHECK_EQ(bucket_index_ & 1, 0u); Tree* tree = static_cast(m_->table_[bucket_index_]); if (++tree_it == tree->end()) { SearchFrom(bucket_index_ + 2); } else { - node_ = NodePtrFromKeyPtr(*tree_it); + node_ = NodeFromTreeIterator(tree_it); } } } else { @@ -468,22 +582,21 @@ class Map { return tmp; } - // Assumes node_ and m_ are correct and non-NULL, but other fields may be + // Assumes node_ and m_ are correct and non-null, but other fields may be // stale. Fix them as needed. Then return true iff node_ points to a // Node in a list. If false is returned then *it is modified to be // a valid iterator for node_. bool revalidate_if_necessary(TreeIterator* it) { - GOOGLE_DCHECK(node_ != NULL && m_ != NULL); + GOOGLE_DCHECK(node_ != nullptr && m_ != nullptr); // Force bucket_index_ to be in range. bucket_index_ &= (m_->num_buckets_ - 1); // Common case: the bucket we think is relevant points to node_. - if (m_->table_[bucket_index_] == static_cast(node_)) - return true; + if (m_->table_[bucket_index_] == static_cast(node_)) return true; // Less common: the bucket is a linked list with node_ somewhere in it, // but not at the head. if (m_->TableEntryIsNonEmptyList(bucket_index_)) { Node* l = static_cast(m_->table_[bucket_index_]); - while ((l = l->next) != NULL) { + while ((l = l->next) != nullptr) { if (l == node_) { return true; } @@ -492,8 +605,8 @@ class Map { // Well, bucket_index_ still might be correct, but probably // not. Revalidate just to be sure. This case is rare enough that we // don't worry about potential optimizations, such as having a custom - // find-like method that compares Node* instead of const Key&. - iterator_base i(m_->find(*KeyPtrFromNodePtr(node_), it)); + // find-like method that compares Node* instead of the key. + iterator_base i(m_->find(node_->kv.first, it)); bucket_index_ = i.bucket_index_; return m_->TableEntryIsList(bucket_index_); } @@ -504,8 +617,19 @@ class Map { }; public: - typedef iterator_base iterator; - typedef iterator_base const_iterator; + using iterator = iterator_base; + using const_iterator = iterator_base; + + Arena* arena() const { return alloc_.arena(); } + + void Swap(InnerMap* other) { + std::swap(num_elements_, other->num_elements_); + std::swap(num_buckets_, other->num_buckets_); + std::swap(seed_, other->seed_); + std::swap(index_of_first_non_null_, other->index_of_first_non_null_); + std::swap(table_, other->table_); + std::swap(alloc_, other->alloc_); + } iterator begin() { return iterator(this); } iterator end() { return iterator(); } @@ -516,19 +640,19 @@ class Map { for (size_type b = 0; b < num_buckets_; b++) { if (TableEntryIsNonEmptyList(b)) { Node* node = static_cast(table_[b]); - table_[b] = NULL; + table_[b] = nullptr; do { Node* next = node->next; DestroyNode(node); node = next; - } while (node != NULL); + } while (node != nullptr); } else if (TableEntryIsTree(b)) { Tree* tree = static_cast(table_[b]); GOOGLE_DCHECK(table_[b] == table_[b + 1] && (b & 1) == 0); - table_[b] = table_[b + 1] = NULL; + table_[b] = table_[b + 1] = nullptr; typename Tree::iterator tree_it = tree->begin(); do { - Node* node = NodePtrFromKeyPtr(*tree_it); + Node* node = NodeFromTreeIterator(tree_it); typename Tree::iterator next = tree_it; ++next; tree->erase(tree_it); @@ -551,50 +675,53 @@ class Map { size_type size() const { return num_elements_; } bool empty() const { return size() == 0; } - iterator find(const Key& k) { return iterator(FindHelper(k).first); } - const_iterator find(const Key& k) const { return find(k, NULL); } + template + iterator find(const K& k) { + return iterator(FindHelper(k).first); + } - // In traditional C++ style, this performs "insert if not present." - std::pair insert(const KeyValuePair& kv) { - std::pair p = FindHelper(kv.key()); - // Case 1: key was already present. - if (p.first.node_ != NULL) - return std::make_pair(iterator(p.first), false); - // Case 2: insert. - if (ResizeIfLoadIsOutOfRange(num_elements_ + 1)) { - p = FindHelper(kv.key()); - } - const size_type b = p.second; // bucket number - Node* node = Alloc(1); - alloc_.construct(&node->kv, kv); - iterator result = InsertUnique(b, node); - ++num_elements_; - return std::make_pair(result, true); + template + const_iterator find(const K& k) const { + return FindHelper(k).first; } - // The same, but if an insertion is necessary then the value portion of the - // inserted key-value pair is left uninitialized. - std::pair insert(const Key& k) { + // Insert the key into the map, if not present. In that case, the value will + // be value initialized. + template + std::pair insert(K&& k) { std::pair p = FindHelper(k); // Case 1: key was already present. - if (p.first.node_ != NULL) + if (p.first.node_ != nullptr) return std::make_pair(iterator(p.first), false); // Case 2: insert. if (ResizeIfLoadIsOutOfRange(num_elements_ + 1)) { p = FindHelper(k); } const size_type b = p.second; // bucket number - Node* node = Alloc(1); - typedef typename Allocator::template rebind::other KeyAllocator; - KeyAllocator(alloc_).construct(&node->kv.key(), k); + Node* node; + // If K is not key_type, make the conversion to key_type explicit. + using TypeToInit = typename std::conditional< + std::is_same::type, key_type>::value, K&&, + key_type>::type; + if (alloc_.arena() == nullptr) { + node = new Node{value_type(static_cast(std::forward(k))), + nullptr}; + } else { + node = Alloc(1); + Arena::CreateInArenaStorage( + const_cast(&node->kv.first), alloc_.arena(), + static_cast(std::forward(k))); + Arena::CreateInArenaStorage(&node->kv.second, alloc_.arena()); + } + iterator result = InsertUnique(b, node); ++num_elements_; return std::make_pair(result, true); } - Value& operator[](const Key& k) { - KeyValuePair kv(k, Value()); - return insert(kv).first->value(); + template + value_type& operator[](K&& k) { + return *insert(std::forward(k)).first; } void erase(iterator it) { @@ -611,52 +738,58 @@ class Map { } else { GOOGLE_DCHECK(TableEntryIsTree(b)); Tree* tree = static_cast(table_[b]); - tree->erase(*tree_it); + tree->erase(tree_it); if (tree->empty()) { // Force b to be the minimum of b and b ^ 1. This is important // only because we want index_of_first_non_null_ to be correct. b &= ~static_cast(1); DestroyTree(tree); - table_[b] = table_[b + 1] = NULL; + table_[b] = table_[b + 1] = nullptr; } } DestroyNode(item); --num_elements_; - if (GOOGLE_PREDICT_FALSE(b == index_of_first_non_null_)) { + if (PROTOBUF_PREDICT_FALSE(b == index_of_first_non_null_)) { while (index_of_first_non_null_ < num_buckets_ && - table_[index_of_first_non_null_] == NULL) { + table_[index_of_first_non_null_] == nullptr) { ++index_of_first_non_null_; } } } + size_t SpaceUsedInternal() const { + return internal::SpaceUsedInTable(table_, num_buckets_, + num_elements_, sizeof(Node)); + } + private: const_iterator find(const Key& k, TreeIterator* it) const { return FindHelper(k, it).first; } - std::pair FindHelper(const Key& k) const { - return FindHelper(k, NULL); + template + std::pair FindHelper(const K& k) const { + return FindHelper(k, nullptr); } - std::pair FindHelper(const Key& k, + template + std::pair FindHelper(const K& k, TreeIterator* it) const { size_type b = BucketNumber(k); if (TableEntryIsNonEmptyList(b)) { Node* node = static_cast(table_[b]); do { - if (IsMatch(*KeyPtrFromNodePtr(node), k)) { + if (internal::TransparentSupport::Equals(node->kv.first, k)) { return std::make_pair(const_iterator(node, this, b), b); } else { node = node->next; } - } while (node != NULL); + } while (node != nullptr); } else if (TableEntryIsTree(b)) { GOOGLE_DCHECK_EQ(table_[b], table_[b ^ 1]); b &= ~static_cast(1); Tree* tree = static_cast(table_[b]); - Key* key = const_cast(&k); - typename Tree::iterator tree_it = tree->find(key); + auto tree_it = tree->find(k); if (tree_it != tree->end()) { - if (it != NULL) *it = tree_it; + if (it != nullptr) *it = tree_it; return std::make_pair(const_iterator(tree_it, this, b), b); } } @@ -669,17 +802,17 @@ class Map { // bucket. num_elements_ is not modified. iterator InsertUnique(size_type b, Node* node) { GOOGLE_DCHECK(index_of_first_non_null_ == num_buckets_ || - table_[index_of_first_non_null_] != NULL); + table_[index_of_first_non_null_] != nullptr); // In practice, the code that led to this point may have already // determined whether we are inserting into an empty list, a short list, // or whatever. But it's probably cheap enough to recompute that here; // it's likely that we're inserting into an empty or short list. iterator result; - GOOGLE_DCHECK(find(*KeyPtrFromNodePtr(node)) == end()); + GOOGLE_DCHECK(find(node->kv.first) == end()); if (TableEntryIsEmpty(b)) { result = InsertUniqueInList(b, node); } else if (TableEntryIsNonEmptyList(b)) { - if (GOOGLE_PREDICT_FALSE(TableEntryIsTooLong(b))) { + if (PROTOBUF_PREDICT_FALSE(TableEntryIsTooLong(b))) { TreeConvert(b); result = InsertUniqueInTree(b, node); GOOGLE_DCHECK_EQ(result.bucket_index_, b & ~static_cast(1)); @@ -699,9 +832,30 @@ class Map { return result; } + // Returns whether we should insert after the head of the list. For + // non-optimized builds, we randomly decide whether to insert right at the + // head of the list or just after the head. This helps add a little bit of + // non-determinism to the map ordering. + bool ShouldInsertAfterHead(void* node) { +#ifdef NDEBUG + (void) node; + return false; +#else + // Doing modulo with a prime mixes the bits more. + return (reinterpret_cast(node) ^ seed_) % 13 > 6; +#endif + } + // Helper for InsertUnique. Handles the case where bucket b is a // not-too-long linked list. iterator InsertUniqueInList(size_type b, Node* node) { + if (table_[b] != nullptr && ShouldInsertAfterHead(node)) { + Node* first = static_cast(table_[b]); + node->next = first->next; + first->next = node; + return iterator(node, this, b); + } + node->next = static_cast(table_[b]); table_[b] = static_cast(node); return iterator(node, this, b); @@ -711,12 +865,11 @@ class Map { // Tree. iterator InsertUniqueInTree(size_type b, Node* node) { GOOGLE_DCHECK_EQ(table_[b], table_[b ^ 1]); - // Maintain the invariant that node->next is NULL for all Nodes in Trees. - node->next = NULL; - return iterator(static_cast(table_[b]) - ->insert(KeyPtrFromNodePtr(node)) - .first, - this, b & ~static_cast(1)); + // Maintain the invariant that node->next is null for all Nodes in Trees. + node->next = nullptr; + return iterator( + static_cast(table_[b])->insert({node->kv.first, node}).first, + this, b & ~static_cast(1)); } // Returns whether it did resize. Currently this is only used when @@ -734,13 +887,13 @@ class Map { // We don't care how many elements are in trees. If a lot are, // we may resize even though there are many empty buckets. In // practice, this seems fine. - if (GOOGLE_PREDICT_FALSE(new_size >= hi_cutoff)) { + if (PROTOBUF_PREDICT_FALSE(new_size >= hi_cutoff)) { if (num_buckets_ <= max_size() / 2) { Resize(num_buckets_ * 2); return true; } - } else if (GOOGLE_PREDICT_FALSE(new_size <= lo_cutoff && - num_buckets_ > kMinTableSize)) { + } else if (PROTOBUF_PREDICT_FALSE(new_size <= lo_cutoff && + num_buckets_ > kMinTableSize)) { size_type lg2_of_size_reduction_factor = 1; // It's possible we want to shrink a lot here... size() could even be 0. // So, estimate how much to shrink by making sure we don't shrink so @@ -762,6 +915,15 @@ class Map { // Resize to the given number of buckets. void Resize(size_t new_num_buckets) { + if (num_buckets_ == internal::kGlobalEmptyTableSize) { + // This is the global empty array. + // Just overwrite with a new one. No need to transfer or free anything. + num_buckets_ = index_of_first_non_null_ = kMinTableSize; + table_ = CreateEmptyTable(num_buckets_); + seed_ = Seed(); + return; + } + GOOGLE_DCHECK_GE(new_num_buckets, kMinTableSize); void** const old_table = table_; const size_type old_table_size = num_buckets_; @@ -770,9 +932,9 @@ class Map { const size_type start = index_of_first_non_null_; index_of_first_non_null_ = num_buckets_; for (size_type i = start; i < old_table_size; i++) { - if (TableEntryIsNonEmptyList(old_table, i)) { + if (internal::TableEntryIsNonEmptyList(old_table, i)) { TransferList(old_table, i); - } else if (TableEntryIsTree(old_table, i)) { + } else if (internal::TableEntryIsTree(old_table, i)) { TransferTree(old_table, i++); } } @@ -783,17 +945,17 @@ class Map { Node* node = static_cast(table[index]); do { Node* next = node->next; - InsertUnique(BucketNumber(*KeyPtrFromNodePtr(node)), node); + InsertUnique(BucketNumber(node->kv.first), node); node = next; - } while (node != NULL); + } while (node != nullptr); } void TransferTree(void* const* table, size_type index) { Tree* tree = static_cast(table[index]); typename Tree::iterator tree_it = tree->begin(); do { - Node* node = NodePtrFromKeyPtr(*tree_it); - InsertUnique(BucketNumber(**tree_it), node); + InsertUnique(BucketNumber(std::cref(tree_it->first).get()), + NodeFromTreeIterator(tree_it)); } while (++tree_it != tree->end()); DestroyTree(tree); } @@ -808,42 +970,23 @@ class Map { } bool TableEntryIsEmpty(size_type b) const { - return TableEntryIsEmpty(table_, b); + return internal::TableEntryIsEmpty(table_, b); } bool TableEntryIsNonEmptyList(size_type b) const { - return TableEntryIsNonEmptyList(table_, b); + return internal::TableEntryIsNonEmptyList(table_, b); } bool TableEntryIsTree(size_type b) const { - return TableEntryIsTree(table_, b); + return internal::TableEntryIsTree(table_, b); } bool TableEntryIsList(size_type b) const { - return TableEntryIsList(table_, b); - } - static bool TableEntryIsEmpty(void* const* table, size_type b) { - return table[b] == NULL; - } - static bool TableEntryIsNonEmptyList(void* const* table, size_type b) { - return table[b] != NULL && table[b] != table[b ^ 1]; - } - static bool TableEntryIsTree(void* const* table, size_type b) { - return !TableEntryIsEmpty(table, b) && - !TableEntryIsNonEmptyList(table, b); - } - static bool TableEntryIsList(void* const* table, size_type b) { - return !TableEntryIsTree(table, b); + return internal::TableEntryIsList(table_, b); } void TreeConvert(size_type b) { GOOGLE_DCHECK(!TableEntryIsTree(b) && !TableEntryIsTree(b ^ 1)); - typename Allocator::template rebind::other tree_allocator(alloc_); - Tree* tree = tree_allocator.allocate(1); - // We want to use the three-arg form of construct, if it exists, but we - // create a temporary and use the two-arg construct that's known to exist. - // It's clunky, but the compiler should be able to generate more-or-less - // the same code. - tree_allocator.construct(tree, - Tree(KeyCompare(), KeyPtrAllocator(alloc_))); - // Now the tree is ready to use. + Tree* tree = + Arena::Create(alloc_.arena(), typename Tree::key_compare(), + typename Tree::allocator_type(alloc_)); size_type count = CopyListToTree(b, tree) + CopyListToTree(b ^ 1, tree); GOOGLE_DCHECK_EQ(count, tree->size()); table_[b] = table_[b ^ 1] = static_cast(tree); @@ -854,11 +997,11 @@ class Map { size_type CopyListToTree(size_type b, Tree* tree) { size_type count = 0; Node* node = static_cast(table_[b]); - while (node != NULL) { - tree->insert(KeyPtrFromNodePtr(node)); + while (node != nullptr) { + tree->insert({node->kv.first, node}); ++count; Node* next = node->next; - node->next = NULL; + node->next = nullptr; node = next; } return count; @@ -873,20 +1016,23 @@ class Map { do { ++count; node = node->next; - } while (node != NULL); + } while (node != nullptr); // Invariant: no linked list ever is more than kMaxLength in length. GOOGLE_DCHECK_LE(count, kMaxLength); return count >= kMaxLength; } - size_type BucketNumber(const Key& k) const { - // We inherit from hasher, so one-arg operator() provides a hash function. - size_type h = (*const_cast(this))(k); - return (h + seed_) & (num_buckets_ - 1); - } + template + size_type BucketNumber(const K& k) const { + // We xor the hash value against the random seed so that we effectively + // have a random hash function. + uint64 h = hash_function()(k) ^ seed_; - bool IsMatch(const Key& k0, const Key& k1) const { - return std::equal_to()(k0, k1); + // We use the multiplication method to determine the bucket number from + // the hash value. The constant kPhi (suggested by Knuth) is roughly + // (sqrt(5) - 1) / 2 * 2^64. + constexpr uint64 kPhi = uint64{0x9e3779b97f4a7c15}; + return ((kPhi * h) >> 32) & (num_buckets_ - 1); } // Return a power of two no less than max(kMinTableSize, n). @@ -900,26 +1046,27 @@ class Map { // Use alloc_ to allocate an array of n objects of type U. template U* Alloc(size_type n) { - typedef typename Allocator::template rebind::other alloc_type; + using alloc_type = typename Allocator::template rebind::other; return alloc_type(alloc_).allocate(n); } // Use alloc_ to deallocate an array of n objects of type U. template void Dealloc(U* t, size_type n) { - typedef typename Allocator::template rebind::other alloc_type; + using alloc_type = typename Allocator::template rebind::other; alloc_type(alloc_).deallocate(t, n); } void DestroyNode(Node* node) { - alloc_.destroy(&node->kv); - Dealloc(node, 1); + if (alloc_.arena() == nullptr) { + delete node; + } } void DestroyTree(Tree* tree) { - typename Allocator::template rebind::other tree_allocator(alloc_); - tree_allocator.destroy(tree); - tree_allocator.deallocate(tree, 1); + if (alloc_.arena() == nullptr) { + delete tree; + } } void** CreateEmptyTable(size_type n) { @@ -932,15 +1079,23 @@ class Map { // Return a randomish value. size_type Seed() const { - size_type s = static_cast(reinterpret_cast(this)); -#if defined(__x86_64__) && defined(__GNUC__) + // We get a little bit of randomness from the address of the map. The + // lower bits are not very random, due to alignment, so we discard them + // and shift the higher bits into their place. + size_type s = reinterpret_cast(this) >> 12; +#if defined(__x86_64__) && defined(__GNUC__) && \ + !defined(GOOGLE_PROTOBUF_NO_RDTSC) uint32 hi, lo; - asm("rdtsc" : "=a" (lo), "=d" (hi)); + asm volatile("rdtsc" : "=a"(lo), "=d"(hi)); s += ((static_cast(hi) << 32) | lo); #endif return s; } + friend class Arena; + using InternalArenaConstructable_ = void; + using DestructorSkippable_ = void; + size_type num_elements_; size_type num_buckets_; size_type seed_; @@ -950,24 +1105,26 @@ class Map { GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(InnerMap); }; // end of class InnerMap + template + using key_arg = typename internal::TransparentSupport< + key_type>::template key_arg; + public: // Iterators class const_iterator { - typedef typename InnerMap::const_iterator InnerIt; + using InnerIt = typename InnerMap::const_iterator; public: - typedef std::forward_iterator_tag iterator_category; - typedef typename Map::value_type value_type; - typedef ptrdiff_t difference_type; - typedef const value_type* pointer; - typedef const value_type& reference; + using iterator_category = std::forward_iterator_tag; + using value_type = typename Map::value_type; + using difference_type = ptrdiff_t; + using pointer = const value_type*; + using reference = const value_type&; const_iterator() {} explicit const_iterator(const InnerIt& it) : it_(it) {} - const_reference operator*() const { - return *it_->value(); - } + const_reference operator*() const { return *it_; } const_pointer operator->() const { return &(operator*()); } const_iterator& operator++() { @@ -988,19 +1145,19 @@ class Map { }; class iterator { - typedef typename InnerMap::iterator InnerIt; + using InnerIt = typename InnerMap::iterator; public: - typedef std::forward_iterator_tag iterator_category; - typedef typename Map::value_type value_type; - typedef ptrdiff_t difference_type; - typedef value_type* pointer; - typedef value_type& reference; + using iterator_category = std::forward_iterator_tag; + using value_type = typename Map::value_type; + using difference_type = ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; iterator() {} explicit iterator(const InnerIt& it) : it_(it) {} - reference operator*() const { return *it_->value(); } + reference operator*() const { return *it_; } pointer operator->() const { return &(operator*()); } iterator& operator++() { @@ -1010,7 +1167,7 @@ class Map { iterator operator++(int) { return iterator(it_++); } // Allow implicit conversion to const_iterator. - operator const_iterator() const { + operator const_iterator() const { // NOLINT(runtime/explicit) return const_iterator(typename InnerMap::const_iterator(it_)); } @@ -1027,55 +1184,67 @@ class Map { InnerIt it_; }; - iterator begin() { return iterator(elements_->begin()); } - iterator end() { return iterator(elements_->end()); } - const_iterator begin() const { - return const_iterator(iterator(elements_->begin())); - } - const_iterator end() const { - return const_iterator(iterator(elements_->end())); - } + iterator begin() { return iterator(elements_.begin()); } + iterator end() { return iterator(elements_.end()); } + const_iterator begin() const { return const_iterator(elements_.begin()); } + const_iterator end() const { return const_iterator(elements_.end()); } const_iterator cbegin() const { return begin(); } const_iterator cend() const { return end(); } // Capacity - size_type size() const { return elements_->size(); } + size_type size() const { return elements_.size(); } bool empty() const { return size() == 0; } // Element access - T& operator[](const key_type& key) { - value_type** value = &(*elements_)[key]; - if (*value == NULL) { - *value = CreateValueTypeInternal(key); - internal::MapValueInitializer::value, - T>::Initialize((*value)->second, - default_enum_value_); - } - return (*value)->second; + template + T& operator[](const key_arg& key) { + return elements_[key].second; } - const T& at(const key_type& key) const { + template < + typename K = key_type, + // Disable for integral types to reduce code bloat. + typename = typename std::enable_if::value>::type> + T& operator[](key_arg&& key) { + return elements_[std::forward(key)].second; + } + + template + const T& at(const key_arg& key) const { const_iterator it = find(key); - GOOGLE_CHECK(it != end()) << "key not found: " << key; + GOOGLE_CHECK(it != end()) << "key not found: " << static_cast(key); return it->second; } - T& at(const key_type& key) { + + template + T& at(const key_arg& key) { iterator it = find(key); - GOOGLE_CHECK(it != end()) << "key not found: " << key; + GOOGLE_CHECK(it != end()) << "key not found: " << static_cast(key); return it->second; } // Lookup - size_type count(const key_type& key) const { - const_iterator it = find(key); - GOOGLE_DCHECK(it == end() || key == it->first); - return it == end() ? 0 : 1; + template + size_type count(const key_arg& key) const { + return find(key) == end() ? 0 : 1; + } + + template + const_iterator find(const key_arg& key) const { + return const_iterator(elements_.find(key)); + } + template + iterator find(const key_arg& key) { + return iterator(elements_.find(key)); } - const_iterator find(const key_type& key) const { - return const_iterator(iterator(elements_->find(key))); + + template + bool contains(const key_arg& key) const { + return find(key) != end(); } - iterator find(const key_type& key) { return iterator(elements_->find(key)); } + + template std::pair equal_range( - const key_type& key) const { + const key_arg& key) const { const_iterator it = find(key); if (it == end()) { return std::pair(it, it); @@ -1084,7 +1253,9 @@ class Map { return std::pair(begin, it); } } - std::pair equal_range(const key_type& key) { + + template + std::pair equal_range(const key_arg& key) { iterator it = find(key); if (it == end()) { return std::pair(it, it); @@ -1097,9 +1268,9 @@ class Map { // insert std::pair insert(const value_type& value) { std::pair p = - elements_->insert(value.first); + elements_.insert(value.first); if (p.second) { - p.first->value() = CreateValueTypeInternal(value); + p.first->second = value.second; } return std::pair(iterator(p.first), p.second); } @@ -1117,7 +1288,8 @@ class Map { } // Erase and clear - size_type erase(const key_type& key) { + template + size_type erase(const key_arg& key) { iterator it = find(key); if (it == end()) { return 0; @@ -1127,9 +1299,8 @@ class Map { } } iterator erase(iterator pos) { - if (arena_ == NULL) delete pos.operator->(); iterator i = pos++; - elements_->erase(i.it_); + elements_.erase(i.it_); return pos; } void erase(iterator first, iterator last) { @@ -1137,7 +1308,7 @@ class Map { first = erase(first); } } - void clear() { erase(begin(), end()); } + void clear() { elements_.clear(); } // Assign Map& operator=(const Map& other) { @@ -1149,9 +1320,8 @@ class Map { } void swap(Map& other) { - if (arena_ == other.arena_) { - std::swap(default_enum_value_, other.default_enum_value_); - std::swap(elements_, other.elements_); + if (arena() == other.arena()) { + elements_.Swap(&other.elements_); } else { // TODO(zuguang): optimize this. The temporary copy can be allocated // in the same arena as the other message, and the "other = copy" can @@ -1164,56 +1334,29 @@ class Map { // Access to hasher. Currently this returns a copy, but it may // be modified to return a const reference in the future. - hasher hash_function() const { return elements_->hash_function(); } - - private: - // Set default enum value only for proto2 map field whose value is enum type. - void SetDefaultEnumValue(int default_enum_value) { - default_enum_value_ = default_enum_value; - } + hasher hash_function() const { return elements_.hash_function(); } - value_type* CreateValueTypeInternal(const Key& key) { - if (arena_ == NULL) { - return new value_type(key); - } else { - value_type* value = reinterpret_cast( - Arena::CreateArray(arena_, sizeof(value_type))); - Arena::CreateInArenaStorage(const_cast(&value->first), arena_); - Arena::CreateInArenaStorage(&value->second, arena_); - const_cast(value->first) = key; - return value; - } - } - - value_type* CreateValueTypeInternal(const value_type& value) { - if (arena_ == NULL) { - return new value_type(value); - } else { - value_type* p = reinterpret_cast( - Arena::CreateArray(arena_, sizeof(value_type))); - Arena::CreateInArenaStorage(const_cast(&p->first), arena_); - Arena::CreateInArenaStorage(&p->second, arena_); - const_cast(p->first) = value.first; - p->second = value.second; - return p; - } + size_t SpaceUsedExcludingSelfLong() const { + if (empty()) return 0; + return elements_.SpaceUsedInternal() + internal::SpaceUsedInValues(this); } - Arena* arena_; - int default_enum_value_; - InnerMap* elements_; + private: + Arena* arena() const { return elements_.arena(); } + InnerMap elements_; - friend class ::google::protobuf::Arena; - typedef void InternalArenaConstructable_; - typedef void DestructorSkippable_; + friend class Arena; + using InternalArenaConstructable_ = void; + using DestructorSkippable_ = void; template + internal::WireFormatLite::FieldType value_wire_type> friend class internal::MapFieldLite; }; } // namespace protobuf - } // namespace google + +#include + #endif // GOOGLE_PROTOBUF_MAP_H__ diff --git a/third_party/protobuf-lite/google/protobuf/map_entry_lite.h b/third_party/protobuf-lite/google/protobuf/map_entry_lite.h index 85a0bed7..1caf59df 100644 --- a/third_party/protobuf-lite/google/protobuf/map_entry_lite.h +++ b/third_party/protobuf-lite/google/protobuf/map_entry_lite.h @@ -32,29 +32,40 @@ #define GOOGLE_PROTOBUF_MAP_ENTRY_LITE_H__ #include +#include #include +#include +#include #include #include +#include #include #include -#include -#include +#include +#include + +#include +#ifdef SWIG +#error "You cannot SWIG proto headers" +#endif namespace google { namespace protobuf { namespace internal { template + WireFormatLite::FieldType kValueFieldType> class MapEntry; template + WireFormatLite::FieldType kValueFieldType> class MapFieldLite; } // namespace internal } // namespace protobuf +} // namespace google +namespace google { namespace protobuf { namespace internal { @@ -82,11 +93,45 @@ struct MoveHelper { // messages template struct MoveHelper { // strings and similar static void Move(T* src, T* dest) { -#if __cplusplus >= 201103L *dest = std::move(*src); -#else - dest->swap(*src); -#endif + } +}; + +// Functions for operating on a map entry. Does not contain any representation +// (this class is not intended to be instantiated). +template +struct MapEntryFuncs { + typedef MapTypeHandler KeyTypeHandler; + typedef MapTypeHandler ValueTypeHandler; + static const int kKeyFieldNumber = 1; + static const int kValueFieldNumber = 2; + + static uint8* InternalSerialize(int field_number, const Key& key, + const Value& value, uint8* ptr, + io::EpsCopyOutputStream* stream) { + ptr = stream->EnsureSpace(ptr); + ptr = WireFormatLite::WriteTagToArray( + field_number, WireFormatLite::WIRETYPE_LENGTH_DELIMITED, ptr); + ptr = io::CodedOutputStream::WriteVarint32ToArray(GetCachedSize(key, value), + ptr); + + ptr = KeyTypeHandler::Write(kKeyFieldNumber, key, ptr, stream); + return ValueTypeHandler::Write(kValueFieldNumber, value, ptr, stream); + } + + static size_t ByteSizeLong(const Key& key, const Value& value) { + // Tags for key and value will both be one byte (field numbers 1 and 2). + size_t inner_length = + 2 + KeyTypeHandler::ByteSize(key) + ValueTypeHandler::ByteSize(value); + return inner_length + io::CodedOutputStream::VarintSize32( + static_cast(inner_length)); + } + + static int GetCachedSize(const Key& key, const Value& value) { + // Tags for key and value will both be one byte (field numbers 1 and 2). + return 2 + KeyTypeHandler::GetCachedSize(key) + + ValueTypeHandler::GetCachedSize(value); } }; @@ -95,8 +140,11 @@ struct MoveHelper { // strings and similar // the eventual code to the template code. template + WireFormatLite::FieldType kValueFieldType> class MapEntryImpl : public Base { + public: + typedef MapEntryFuncs Funcs; + protected: // Provide utilities to parse/serialize key/value. Provide utilities to // manipulate internal stored type. @@ -111,16 +159,16 @@ class MapEntryImpl : public Base { // Enum type cannot be used for MapTypeHandler::Read. Define a type // which will replace Enum with int. typedef typename KeyTypeHandler::MapEntryAccessorType KeyMapEntryAccessorType; - typedef typename ValueTypeHandler::MapEntryAccessorType - ValueMapEntryAccessorType; + typedef + typename ValueTypeHandler::MapEntryAccessorType ValueMapEntryAccessorType; // Constants for field number. static const int kKeyFieldNumber = 1; static const int kValueFieldNumber = 2; // Constants for field tag. - static const uint8 kKeyTag = GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG( - kKeyFieldNumber, KeyTypeHandler::kWireType); + static const uint8 kKeyTag = + GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG(kKeyFieldNumber, KeyTypeHandler::kWireType); static const uint8 kValueTag = GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG( kValueFieldNumber, ValueTypeHandler::kWireType); static const size_t kTagSize = 1; @@ -133,24 +181,20 @@ class MapEntryImpl : public Base { typedef Value EntryValueType; static const WireFormatLite::FieldType kEntryKeyFieldType = kKeyFieldType; static const WireFormatLite::FieldType kEntryValueFieldType = kValueFieldType; - static const int kEntryDefaultEnumValue = default_enum_value; - MapEntryImpl() : arena_(NULL) { - KeyTypeHandler::Initialize(&key_, NULL); - ValueTypeHandler::InitializeMaybeByDefaultEnum(&value_, default_enum_value, - NULL); - _has_bits_[0] = 0; - } + constexpr MapEntryImpl() + : key_(KeyTypeHandler::Constinit()), + value_(ValueTypeHandler::Constinit()), + _has_bits_{} {} - explicit MapEntryImpl(Arena* arena) : arena_(arena) { - KeyTypeHandler::Initialize(&key_, arena); - ValueTypeHandler::InitializeMaybeByDefaultEnum(&value_, default_enum_value, - arena); - _has_bits_[0] = 0; - } + explicit MapEntryImpl(Arena* arena) + : Base(arena), + key_(KeyTypeHandler::Constinit()), + value_(ValueTypeHandler::Constinit()), + _has_bits_{} {} ~MapEntryImpl() { - if (GetArenaNoVirtual() != NULL) return; + if (Base::GetArena() != NULL) return; KeyTypeHandler::DeleteNoArena(key_); ValueTypeHandler::DeleteNoArena(value_); } @@ -161,186 +205,136 @@ class MapEntryImpl : public Base { return KeyTypeHandler::GetExternalReference(key_); } virtual inline const ValueMapEntryAccessorType& value() const { - return ValueTypeHandler::DefaultIfNotInitialized( - value_, Derived::internal_default_instance()->value_); + return ValueTypeHandler::DefaultIfNotInitialized(value_); } inline KeyMapEntryAccessorType* mutable_key() { set_has_key(); - return KeyTypeHandler::EnsureMutable(&key_, GetArenaNoVirtual()); + return KeyTypeHandler::EnsureMutable(&key_, Base::GetArena()); } inline ValueMapEntryAccessorType* mutable_value() { set_has_value(); - return ValueTypeHandler::EnsureMutable(&value_, GetArenaNoVirtual()); + return ValueTypeHandler::EnsureMutable(&value_, Base::GetArena()); } // implements MessageLite ========================================= // MapEntryImpl is for implementation only and this function isn't called // anywhere. Just provide a fake implementation here for MessageLite. - string GetTypeName() const { return ""; } + std::string GetTypeName() const override { return ""; } - void CheckTypeAndMergeFrom(const MessageLite& other) { - MergeFromInternal(*::google::protobuf::down_cast(&other)); + void CheckTypeAndMergeFrom(const MessageLite& other) override { + MergeFromInternal(*::google::protobuf::internal::DownCast(&other)); } - bool MergePartialFromCodedStream(::google::protobuf::io::CodedInputStream* input) { - uint32 tag; - - for (;;) { - // 1) corrupted data: return false; - // 2) unknown field: skip without putting into unknown field set; - // 3) unknown enum value: keep it in parsing. In proto2, caller should - // check the value and put this entry into containing message's unknown - // field set if the value is an unknown enum. In proto3, caller doesn't - // need to care whether the value is unknown enum; - // 4) missing key/value: missed key/value will have default value. caller - // should take this entry as if key/value is set to default value. - tag = input->ReadTagNoLastTag(); - switch (tag) { - case kKeyTag: - if (!KeyTypeHandler::Read(input, mutable_key())) { - return false; - } - set_has_key(); - break; - - case kValueTag: - if (!ValueTypeHandler::Read(input, mutable_value())) { - return false; - } - set_has_value(); - if (input->ExpectAtEnd()) return true; - break; - - default: - if (tag == 0 || - WireFormatLite::GetTagWireType(tag) == - WireFormatLite::WIRETYPE_END_GROUP) { - return true; - } - if (!WireFormatLite::SkipField(input, tag)) return false; - break; + const char* _InternalParse(const char* ptr, ParseContext* ctx) final { + while (!ctx->Done(&ptr)) { + uint32 tag; + ptr = ReadTag(ptr, &tag); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); + if (tag == kKeyTag) { + set_has_key(); + KeyMapEntryAccessorType* key = mutable_key(); + ptr = KeyTypeHandler::Read(ptr, ctx, key); + if (!Derived::ValidateKey(key)) return nullptr; + } else if (tag == kValueTag) { + set_has_value(); + ValueMapEntryAccessorType* value = mutable_value(); + ptr = ValueTypeHandler::Read(ptr, ctx, value); + if (!Derived::ValidateValue(value)) return nullptr; + } else { + if (tag == 0 || WireFormatLite::GetTagWireType(tag) == + WireFormatLite::WIRETYPE_END_GROUP) { + ctx->SetLastTag(tag); + return ptr; + } + ptr = UnknownFieldParse(tag, static_cast(nullptr), ptr, + ctx); } + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); } + return ptr; } - size_t ByteSizeLong() const { + size_t ByteSizeLong() const override { size_t size = 0; - size += has_key() ? - kTagSize + static_cast(KeyTypeHandler::ByteSize(key())) : 0; - size += has_value() ? - kTagSize + static_cast(ValueTypeHandler::ByteSize(value())) : 0; + size += kTagSize + static_cast(KeyTypeHandler::ByteSize(key())); + size += kTagSize + static_cast(ValueTypeHandler::ByteSize(value())); return size; } - void SerializeWithCachedSizes(::google::protobuf::io::CodedOutputStream* output) const { - KeyTypeHandler::Write(kKeyFieldNumber, key(), output); - ValueTypeHandler::Write(kValueFieldNumber, value(), output); - } - - ::google::protobuf::uint8* InternalSerializeWithCachedSizesToArray(bool deterministic, - ::google::protobuf::uint8* output) const { - output = KeyTypeHandler::InternalWriteToArray(kKeyFieldNumber, key(), - deterministic, output); - output = ValueTypeHandler::InternalWriteToArray(kValueFieldNumber, value(), - deterministic, output); - return output; + ::google::protobuf::uint8* _InternalSerialize(::google::protobuf::uint8* ptr, + io::EpsCopyOutputStream* stream) const override { + ptr = KeyTypeHandler::Write(kKeyFieldNumber, key(), ptr, stream); + return ValueTypeHandler::Write(kValueFieldNumber, value(), ptr, stream); } // Don't override SerializeWithCachedSizesToArray. Use MessageLite's. - int GetCachedSize() const { + int GetCachedSize() const override { int size = 0; - size += has_key() - ? static_cast(kTagSize) + KeyTypeHandler::GetCachedSize(key()) - : 0; - size += has_value() - ? static_cast(kTagSize) + ValueTypeHandler::GetCachedSize(value()) - : 0; + size += has_key() ? static_cast(kTagSize) + + KeyTypeHandler::GetCachedSize(key()) + : 0; + size += has_value() ? static_cast(kTagSize) + + ValueTypeHandler::GetCachedSize(value()) + : 0; return size; } - bool IsInitialized() const { return ValueTypeHandler::IsInitialized(value_); } + bool IsInitialized() const override { + return ValueTypeHandler::IsInitialized(value_); + } - Base* New() const { + Base* New() const override { Derived* entry = new Derived; return entry; } - Base* New(Arena* arena) const { + Base* New(Arena* arena) const override { Derived* entry = Arena::CreateMessage(arena); return entry; } - size_t SpaceUsedLong() const { - size_t size = sizeof(Derived); - size += KeyTypeHandler::SpaceUsedInMapEntryLong(key_); - size += ValueTypeHandler::SpaceUsedInMapEntryLong(value_); - return size; - } - protected: // We can't declare this function directly here as it would hide the other // overload (const Message&). void MergeFromInternal(const MapEntryImpl& from) { if (from._has_bits_[0]) { if (from.has_key()) { - KeyTypeHandler::EnsureMutable(&key_, GetArenaNoVirtual()); - KeyTypeHandler::Merge(from.key(), &key_, GetArenaNoVirtual()); + KeyTypeHandler::EnsureMutable(&key_, Base::GetArena()); + KeyTypeHandler::Merge(from.key(), &key_, Base::GetArena()); set_has_key(); } if (from.has_value()) { - ValueTypeHandler::EnsureMutable(&value_, GetArenaNoVirtual()); - ValueTypeHandler::Merge(from.value(), &value_, GetArenaNoVirtual()); + ValueTypeHandler::EnsureMutable(&value_, Base::GetArena()); + ValueTypeHandler::Merge(from.value(), &value_, Base::GetArena()); set_has_value(); } } } public: - void Clear() { - KeyTypeHandler::Clear(&key_, GetArenaNoVirtual()); - ValueTypeHandler::ClearMaybeByDefaultEnum( - &value_, GetArenaNoVirtual(), default_enum_value); + void Clear() override { + KeyTypeHandler::Clear(&key_, Base::GetArena()); + ValueTypeHandler::Clear(&value_, Base::GetArena()); clear_has_key(); clear_has_value(); } - static void InitAsDefaultInstance() { - Derived* d = const_cast(Derived::internal_default_instance()); - KeyTypeHandler::AssignDefaultValue(&d->key_); - ValueTypeHandler::AssignDefaultValue(&d->value_); - } - - Arena* GetArena() const { - return GetArenaNoVirtual(); - } - - // Create a MapEntryImpl for given key and value from google::protobuf::Map in - // serialization. This function is only called when value is enum. Enum is - // treated differently because its type in MapEntry is int and its type in - // google::protobuf::Map is enum. We cannot create a reference to int from an enum. - static Derived* EnumWrap(const Key& key, const Value value, Arena* arena) { - return Arena::CreateMessage(arena, key, value); - } - - // Like above, but for all the other types. This avoids value copy to create - // MapEntryImpl from google::protobuf::Map in serialization. - static Derived* Wrap(const Key& key, const Value& value, Arena* arena) { - return Arena::CreateMessage(arena, key, value); - } - // Parsing using MergePartialFromCodedStream, above, is not as // efficient as it could be. This helper class provides a speedier way. template class Parser { public: explicit Parser(MapField* mf) : mf_(mf), map_(mf->MutableMap()) {} + ~Parser() { + if (entry_ != nullptr && entry_->GetArena() == nullptr) delete entry_; + } // This does what the typical MergePartialFromCodedStream() is expected to // do, with the additional side-effect that if successful (i.e., if true is // going to be its return value) it inserts the key-value pair into map_. - bool MergePartialFromCodedStream(::google::protobuf::io::CodedInputStream* input) { + bool MergePartialFromCodedStream(io::CodedInputStream* input) { // Look for the expected thing: a key and then a value. If it fails, // invoke the enclosing class's MergePartialFromCodedStream, or return // false if that would be pointless. @@ -353,11 +347,11 @@ class MapEntryImpl : public Base { int size; input->GetDirectBufferPointerInline(&data, &size); // We could use memcmp here, but we don't bother. The tag is one byte. - GOOGLE_COMPILE_ASSERT(kTagSize == 1, tag_size_error); + static_assert(kTagSize == 1, "tag size must be 1"); if (size > 0 && *reinterpret_cast(data) == kValueTag) { typename Map::size_type map_size = map_->size(); value_ptr_ = &(*map_)[key_]; - if (GOOGLE_PREDICT_TRUE(map_size != map_->size())) { + if (PROTOBUF_PREDICT_TRUE(map_size != map_->size())) { // We created a new key-value pair. Fill in the value. typedef typename MapIf::type T; @@ -375,64 +369,118 @@ class MapEntryImpl : public Base { key_ = Key(); } - entry_.reset(mf_->NewEntry()); + NewEntry(); *entry_->mutable_key() = key_; const bool result = entry_->MergePartialFromCodedStream(input); if (result) UseKeyAndValueFromEntry(); - if (entry_->GetArena() != NULL) entry_.release(); return result; } + const char* _InternalParse(const char* ptr, ParseContext* ctx) { + if (PROTOBUF_PREDICT_TRUE(!ctx->Done(&ptr) && *ptr == kKeyTag)) { + ptr = KeyTypeHandler::Read(ptr + 1, ctx, &key_); + if (PROTOBUF_PREDICT_FALSE(!ptr || !Derived::ValidateKey(&key_))) { + return nullptr; + } + if (PROTOBUF_PREDICT_TRUE(!ctx->Done(&ptr) && *ptr == kValueTag)) { + typename Map::size_type map_size = map_->size(); + value_ptr_ = &(*map_)[key_]; + if (PROTOBUF_PREDICT_TRUE(map_size != map_->size())) { + using T = + typename MapIf::type; + ptr = ValueTypeHandler::Read(ptr + 1, ctx, + reinterpret_cast(value_ptr_)); + if (PROTOBUF_PREDICT_FALSE(!ptr || + !Derived::ValidateValue(value_ptr_))) { + map_->erase(key_); // Failure! Undo insertion. + return nullptr; + } + if (PROTOBUF_PREDICT_TRUE(ctx->Done(&ptr))) return ptr; + if (!ptr) return nullptr; + NewEntry(); + ValueMover::Move(value_ptr_, entry_->mutable_value()); + map_->erase(key_); + goto move_key; + } + } else { + if (!ptr) return nullptr; + } + NewEntry(); + move_key: + KeyMover::Move(&key_, entry_->mutable_key()); + } else { + if (!ptr) return nullptr; + NewEntry(); + } + ptr = entry_->_InternalParse(ptr, ctx); + if (ptr) UseKeyAndValueFromEntry(); + return ptr; + } + + template + const char* ParseWithEnumValidation(const char* ptr, ParseContext* ctx, + bool (*is_valid)(int), uint32 field_num, + InternalMetadata* metadata) { + auto entry = NewEntry(); + ptr = entry->_InternalParse(ptr, ctx); + if (!ptr) return nullptr; + if (is_valid(entry->value())) { + UseKeyAndValueFromEntry(); + } else { + WriteLengthDelimited(field_num, entry->SerializeAsString(), + metadata->mutable_unknown_fields()); + } + return ptr; + } + + MapEntryImpl* NewEntry() { return entry_ = mf_->NewEntry(); } + const Key& key() const { return key_; } const Value& value() const { return *value_ptr_; } + const Key& entry_key() const { return entry_->key(); } + const Value& entry_value() const { return entry_->value(); } + private: - void UseKeyAndValueFromEntry() GOOGLE_PROTOBUF_ATTRIBUTE_COLD { + void UseKeyAndValueFromEntry() { // Update key_ in case we need it later (because key() is called). // This is potentially inefficient, especially if the key is // expensive to copy (e.g., a long string), but this is a cold // path, so it's not a big deal. key_ = entry_->key(); value_ptr_ = &(*map_)[key_]; - MoveHelper::Move(entry_->mutable_value(), value_ptr_); + ValueMover::Move(entry_->mutable_value(), value_ptr_); } // After reading a key and value successfully, and inserting that data // into map_, we are not at the end of the input. This is unusual, but // allowed by the spec. - bool ReadBeyondKeyValuePair(::google::protobuf::io::CodedInputStream* input) - GOOGLE_PROTOBUF_ATTRIBUTE_COLD { - typedef MoveHelper KeyMover; - typedef MoveHelper ValueMover; - entry_.reset(mf_->NewEntry()); + bool ReadBeyondKeyValuePair(io::CodedInputStream* input) PROTOBUF_COLD { + NewEntry(); ValueMover::Move(value_ptr_, entry_->mutable_value()); map_->erase(key_); KeyMover::Move(&key_, entry_->mutable_key()); const bool result = entry_->MergePartialFromCodedStream(input); if (result) UseKeyAndValueFromEntry(); - if (entry_->GetArena() != NULL) entry_.release(); return result; } + typedef MoveHelper + KeyMover; + typedef MoveHelper + ValueMover; + MapField* const mf_; Map* const map_; Key key_; Value* value_ptr_; - // On the fast path entry_ is not used. And, when entry_ is used, it's set - // to mf_->NewEntry(), so in the arena case we must call entry_.release. - std::unique_ptr entry_; + MapEntryImpl* entry_ = nullptr; }; protected: @@ -443,89 +491,23 @@ class MapEntryImpl : public Base { bool has_value() const { return (_has_bits_[0] & 0x00000002u) != 0; } void clear_has_value() { _has_bits_[0] &= ~0x00000002u; } - private: - // Serializing a generated message containing map field involves serializing - // key-value pairs from google::protobuf::Map. The wire format of each key-value pair - // after serialization should be the same as that of a MapEntry message - // containing the same key and value inside it. However, google::protobuf::Map doesn't - // store key and value as MapEntry message, which disables us to use existing - // code to serialize message. In order to use existing code to serialize - // message, we need to construct a MapEntry from key-value pair. But it - // involves copy of key and value to construct a MapEntry. In order to avoid - // this copy in constructing a MapEntry, we need the following class which - // only takes references of given key and value. - class MapEntryWrapper : public Derived { - typedef Derived BaseClass; - typedef typename BaseClass::KeyMapEntryAccessorType KeyMapEntryAccessorType; - typedef - typename BaseClass::ValueMapEntryAccessorType ValueMapEntryAccessorType; - - public: - MapEntryWrapper(Arena* arena, const Key& key, const Value& value) - : Derived(arena), key_(key), value_(value) { - BaseClass::set_has_key(); - BaseClass::set_has_value(); - } - inline const KeyMapEntryAccessorType& key() const { return key_; } - inline const ValueMapEntryAccessorType& value() const { return value_; } - - private: - const Key& key_; - const Value& value_; - - friend class ::google::protobuf::Arena; - typedef void InternalArenaConstructable_; - typedef void DestructorSkippable_; - }; - - // Like above, but for enum value only, which stores value instead of - // reference of value field inside. This is needed because the type of value - // field in constructor is an enum, while we need to store it as an int. If we - // initialize a reference to int with a reference to enum, compiler will - // generate a temporary int from enum and initialize the reference to int with - // the temporary. - class MapEnumEntryWrapper : public Derived { - typedef Derived BaseClass; - typedef typename BaseClass::KeyMapEntryAccessorType KeyMapEntryAccessorType; - typedef - typename BaseClass::ValueMapEntryAccessorType ValueMapEntryAccessorType; - - public: - MapEnumEntryWrapper(Arena* arena, const Key& key, const Value& value) - : Derived(arena), key_(key), value_(value) { - BaseClass::set_has_key(); - BaseClass::set_has_value(); - } - inline const KeyMapEntryAccessorType& key() const { return key_; } - inline const ValueMapEntryAccessorType& value() const { return value_; } - - private: - const KeyMapEntryAccessorType& key_; - const ValueMapEntryAccessorType value_; - - friend class google::protobuf::Arena; - typedef void DestructorSkippable_; - }; - - inline Arena* GetArenaNoVirtual() const { - return arena_; - } + public: + inline Arena* GetArena() const { return Base::GetArena(); } public: // Needed for constructing tables KeyOnMemory key_; ValueOnMemory value_; - Arena* arena_; uint32 _has_bits_[1]; private: - friend class ::google::protobuf::Arena; + friend class ::PROTOBUF_NAMESPACE_ID::Arena; typedef void InternalArenaConstructable_; typedef void DestructorSkippable_; template + WireFormatLite::FieldType> friend class internal::MapEntry; template + WireFormatLite::FieldType> friend class internal::MapFieldLite; GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MapEntryImpl); @@ -533,16 +515,16 @@ class MapEntryImpl : public Base { template -class MapEntryLite - : public MapEntryImpl { + WireFormatLite::FieldType kValueFieldType> +class MapEntryLite : public MapEntryImpl { public: typedef MapEntryImpl + kValueFieldType> SuperType; - MapEntryLite() {} + constexpr MapEntryLite() {} explicit MapEntryLite(Arena* arena) : SuperType(arena) {} + ~MapEntryLite() { MessageLite::_internal_metadata_.Delete(); } void MergeFrom(const MapEntryLite& other) { MergeFromInternal(other); } private: @@ -555,13 +537,12 @@ template struct DeconstructMapEntry; template -struct DeconstructMapEntry > { + WireFormatLite::FieldType value> +struct DeconstructMapEntry > { typedef K Key; typedef V Value; static const WireFormatLite::FieldType kKeyFieldType = key; static const WireFormatLite::FieldType kValueFieldType = value; - static const int default_enum_value = default_enum; }; // Helpers for deterministic serialization ============================= @@ -570,7 +551,8 @@ struct DeconstructMapEntry > { // type is relatively small and easy to copy then copying Keys into an // array of SortItems can be beneficial. Then all the data the sorting // algorithm needs to touch is in that one array. -template struct SortItem { +template +struct SortItem { SortItem() {} explicit SortItem(PtrToKeyValuePair p) : first(p->first), second(p) {} @@ -578,16 +560,14 @@ template struct SortItem { PtrToKeyValuePair second; }; -template struct CompareByFirstField { - bool operator()(const T& a, const T& b) const { - return a.first < b.first; - } +template +struct CompareByFirstField { + bool operator()(const T& a, const T& b) const { return a.first < b.first; } }; -template struct CompareByDerefFirst { - bool operator()(const T& a, const T& b) const { - return a->first < b->first; - } +template +struct CompareByDerefFirst { + bool operator()(const T& a, const T& b) const { return a->first < b->first; } }; // Helper for table driven serialization @@ -602,20 +582,20 @@ struct FromHelper { template <> struct FromHelper { - static ArenaStringPtr From(const string& x) { + static ArenaStringPtr From(const std::string& x) { ArenaStringPtr res; - TaggedPtr<::std::string> ptr; - ptr.Set(const_cast(&x)); + TaggedPtr ptr; + ptr.Set(const_cast(&x)); res.UnsafeSetTaggedPointer(ptr); return res; } }; template <> struct FromHelper { - static ArenaStringPtr From(const string& x) { + static ArenaStringPtr From(const std::string& x) { ArenaStringPtr res; - TaggedPtr<::std::string> ptr; - ptr.Set(const_cast(&x)); + TaggedPtr ptr; + ptr.Set(const_cast(&x)); res.UnsafeSetTaggedPointer(ptr); return res; } @@ -633,9 +613,9 @@ struct MapEntryHelper; template -struct MapEntryHelper > { + WireFormatLite::FieldType kValueFieldType> +struct MapEntryHelper< + MapEntryLite > { // Provide utilities to parse/serialize key/value. Provide utilities to // manipulate internal stored type. typedef MapTypeHandler KeyTypeHandler; @@ -653,7 +633,7 @@ struct MapEntryHelper::From(map_pair.first)), value_(FromHelper::From(map_pair.second)) {} - // Purposely not folowing the style guide naming. These are the names + // Purposely not following the style guide naming. These are the names // the proto compiler would generate given the map entry descriptor. // The proto compiler generates the offsets in this struct as if this was // a regular message. This way the table driven code barely notices it's @@ -666,6 +646,8 @@ struct MapEntryHelper + #endif // GOOGLE_PROTOBUF_MAP_ENTRY_LITE_H__ diff --git a/third_party/protobuf-lite/google/protobuf/map_field_lite.h b/third_party/protobuf-lite/google/protobuf/map_field_lite.h index f648b430..665cb0ee 100644 --- a/third_party/protobuf-lite/google/protobuf/map_field_lite.h +++ b/third_party/protobuf-lite/google/protobuf/map_field_lite.h @@ -31,20 +31,30 @@ #ifndef GOOGLE_PROTOBUF_MAP_FIELD_LITE_H__ #define GOOGLE_PROTOBUF_MAP_FIELD_LITE_H__ +#include +#include +#include #include #include +#include #include +#include + +#ifdef SWIG +#error "You cannot SWIG proto headers" +#endif + namespace google { namespace protobuf { namespace internal { // This class provides access to map field using generated api. It is used for -// internal generated message implentation only. Users should never use this +// internal generated message implementation only. Users should never use this // directly. template + WireFormatLite::FieldType value_wire_type> class MapFieldLite { // Define message type for internal repeated field. typedef Derived EntryType; @@ -53,11 +63,9 @@ class MapFieldLite { typedef Map MapType; typedef EntryType EntryTypeTrait; - MapFieldLite() : arena_(NULL) { SetDefaultEnumValue(); } + constexpr MapFieldLite() {} - explicit MapFieldLite(Arena* arena) : arena_(arena), map_(arena) { - SetDefaultEnumValue(); - } + explicit MapFieldLite(Arena* arena) : map_(arena) {} // Accessors const Map& GetMap() const { return map_; } @@ -74,46 +82,77 @@ class MapFieldLite { } void Swap(MapFieldLite* other) { map_.swap(other->map_); } - // Set default enum value only for proto2 map field whose value is enum type. - void SetDefaultEnumValue() { - MutableMap()->SetDefaultEnumValue(default_enum_value); - } - // Used in the implementation of parsing. Caller should take the ownership iff // arena_ is NULL. EntryType* NewEntry() const { - if (arena_ == NULL) { - return new EntryType(); - } else { - return Arena::CreateMessage(arena_); - } + return Arena::CreateMessage(map_.arena()); } // Used in the implementation of serializing enum value type. Caller should // take the ownership iff arena_ is NULL. EntryType* NewEnumEntryWrapper(const Key& key, const T t) const { - return EntryType::EnumWrap(key, t, arena_); + return EntryType::EnumWrap(key, t, map_.arena_); } // Used in the implementation of serializing other value types. Caller should // take the ownership iff arena_ is NULL. EntryType* NewEntryWrapper(const Key& key, const T& t) const { - return EntryType::Wrap(key, t, arena_); + return EntryType::Wrap(key, t, map_.arena_); + } + + const char* _InternalParse(const char* ptr, ParseContext* ctx) { + typename Derived::template Parser> parser(this); + return parser._InternalParse(ptr, ctx); + } + + template + const char* ParseWithEnumValidation(const char* ptr, ParseContext* ctx, + bool (*is_valid)(int), uint32 field_num, + InternalMetadata* metadata) { + typename Derived::template Parser> parser(this); + return parser.template ParseWithEnumValidation( + ptr, ctx, is_valid, field_num, metadata); } private: typedef void DestructorSkippable_; - Arena* arena_; Map map_; - friend class ::google::protobuf::Arena; + friend class ::PROTOBUF_NAMESPACE_ID::Arena; }; +template +struct EnumParseWrapper { + const char* _InternalParse(const char* ptr, ParseContext* ctx) { + return map_field->template ParseWithEnumValidation( + ptr, ctx, is_valid, field_num, metadata); + } + T* map_field; + bool (*is_valid)(int); + uint32 field_num; + InternalMetadata* metadata; +}; + +// Helper function because the typenames of maps are horrendous to print. This +// leverages compiler type deduction, to keep all type data out of the +// generated code +template +EnumParseWrapper InitEnumParseWrapper( + T* map_field, bool (*is_valid)(int), uint32 field_num, + InternalMetadata* metadata) { + return EnumParseWrapper{map_field, is_valid, field_num, + metadata}; +} + // True if IsInitialized() is true for value field in all elements of t. T is // expected to be message. It's useful to have this helper here to keep the // protobuf compiler from ever having to emit loops in IsInitialized() methods. // We want the C++ compiler to inline this or not as it sees fit. -template -bool AllAreInitialized(const Map& t) { +template +bool AllAreInitialized(const MapFieldLite& field) { + const auto& t = field.GetMap(); for (typename Map::const_iterator it = t.begin(); it != t.end(); ++it) { if (!it->second.IsInitialized()) return false; @@ -126,18 +165,19 @@ struct MapEntryToMapField : MapEntryToMapField {}; template -struct MapEntryToMapField > { - typedef MapFieldLite, - Key, Value, kKeyFieldType, kValueFieldType, - default_enum_value> + WireFormatLite::FieldType kValueFieldType> +struct MapEntryToMapField< + MapEntryLite> { + typedef MapFieldLite< + MapEntryLite, Key, Value, + kKeyFieldType, kValueFieldType> MapFieldType; }; } // namespace internal } // namespace protobuf - } // namespace google + +#include + #endif // GOOGLE_PROTOBUF_MAP_FIELD_LITE_H__ diff --git a/third_party/protobuf-lite/google/protobuf/map_type_handler.h b/third_party/protobuf-lite/google/protobuf/map_type_handler.h index 7f7b1e0e..8b8fd14c 100644 --- a/third_party/protobuf-lite/google/protobuf/map_type_handler.h +++ b/third_party/protobuf-lite/google/protobuf/map_type_handler.h @@ -31,8 +31,14 @@ #ifndef GOOGLE_PROTOBUF_TYPE_HANDLER_H__ #define GOOGLE_PROTOBUF_TYPE_HANDLER_H__ +#include +#include #include -#include +#include + +#ifdef SWIG +#error "You cannot SWIG proto headers" +#endif namespace google { namespace protobuf { @@ -40,41 +46,19 @@ namespace internal { // Used for compile time type selection. MapIf::type will be TrueType if Flag is // true and FalseType otherwise. -template +template struct MapIf; -template +template struct MapIf { typedef TrueType type; }; -template +template struct MapIf { typedef FalseType type; }; -// In proto2 Map, enum needs to be initialized to given default value, while -// other types' default value can be inferred from the type. -template -class MapValueInitializer { - public: - static inline void Initialize(Type& type, int default_enum_value); -}; - -template -class MapValueInitializer { - public: - static inline void Initialize(Type& value, int default_enum_value) { - value = static_cast(default_enum_value); - } -}; - -template -class MapValueInitializer { - public: - static inline void Initialize(Type& /* value */, int /* default_enum_value */) {} -}; - template class MapArenaMessageCreator { public: @@ -113,23 +97,23 @@ class MapWireFieldTypeTraits {}; WireFormatLite::WIRETYPE_##WireFormatType; \ }; -TYPE_TRAITS(MESSAGE , Type, LENGTH_DELIMITED, true, false) -TYPE_TRAITS(STRING , ArenaStringPtr, LENGTH_DELIMITED, false, false) -TYPE_TRAITS(BYTES , ArenaStringPtr , LENGTH_DELIMITED, false, false) -TYPE_TRAITS(INT64 , int64 , VARINT , false, false) -TYPE_TRAITS(UINT64 , uint64 , VARINT , false, false) -TYPE_TRAITS(INT32 , int32 , VARINT , false, false) -TYPE_TRAITS(UINT32 , uint32 , VARINT , false, false) -TYPE_TRAITS(SINT64 , int64 , VARINT , false, false) -TYPE_TRAITS(SINT32 , int32 , VARINT , false, false) -TYPE_TRAITS(ENUM , int , VARINT , false, true ) -TYPE_TRAITS(DOUBLE , double , FIXED64, false, false) -TYPE_TRAITS(FLOAT , float , FIXED32, false, false) -TYPE_TRAITS(FIXED64 , uint64 , FIXED64, false, false) -TYPE_TRAITS(FIXED32 , uint32 , FIXED32, false, false) -TYPE_TRAITS(SFIXED64, int64 , FIXED64, false, false) -TYPE_TRAITS(SFIXED32, int32 , FIXED32, false, false) -TYPE_TRAITS(BOOL , bool , VARINT , false, false) +TYPE_TRAITS(MESSAGE, Type, LENGTH_DELIMITED, true, false) +TYPE_TRAITS(STRING, ArenaStringPtr, LENGTH_DELIMITED, false, false) +TYPE_TRAITS(BYTES, ArenaStringPtr, LENGTH_DELIMITED, false, false) +TYPE_TRAITS(INT64, int64, VARINT, false, false) +TYPE_TRAITS(UINT64, uint64, VARINT, false, false) +TYPE_TRAITS(INT32, int32, VARINT, false, false) +TYPE_TRAITS(UINT32, uint32, VARINT, false, false) +TYPE_TRAITS(SINT64, int64, VARINT, false, false) +TYPE_TRAITS(SINT32, int32, VARINT, false, false) +TYPE_TRAITS(ENUM, int, VARINT, false, true) +TYPE_TRAITS(DOUBLE, double, FIXED64, false, false) +TYPE_TRAITS(FLOAT, float, FIXED32, false, false) +TYPE_TRAITS(FIXED64, uint64, FIXED64, false, false) +TYPE_TRAITS(FIXED32, uint32, FIXED32, false, false) +TYPE_TRAITS(SFIXED64, int64, FIXED64, false, false) +TYPE_TRAITS(SFIXED32, int32, FIXED32, false, false) +TYPE_TRAITS(BOOL, bool, VARINT, false, false) #undef TYPE_TRAITS @@ -142,18 +126,19 @@ class MapTypeHandler { // Enum type cannot be used for MapTypeHandler::Read. Define a type which will // replace Enum with int. typedef typename MapWireFieldTypeTraits::MapEntryAccessorType MapEntryAccessorType; + Type>::MapEntryAccessorType + MapEntryAccessorType; // Internal stored type in MapEntryLite for given wire field type. typedef typename MapWireFieldTypeTraits::TypeOnMemory TypeOnMemory; // Corresponding wire type for field type. - static const WireFormatLite::WireType kWireType = + static constexpr WireFormatLite::WireType kWireType = MapWireFieldTypeTraits::kWireType; // Whether wire type is for message. - static const bool kIsMessage = + static constexpr bool kIsMessage = MapWireFieldTypeTraits::kIsMessage; // Whether wire type is for enum. - static const bool kIsEnum = + static constexpr bool kIsEnum = MapWireFieldTypeTraits::kIsEnum; // Functions used in parsing and serialization. =================== @@ -161,39 +146,26 @@ class MapTypeHandler { static inline int GetCachedSize(const MapEntryAccessorType& value); static inline bool Read(io::CodedInputStream* input, MapEntryAccessorType* value); - static inline void Write(int field, const MapEntryAccessorType& value, - io::CodedOutputStream* output); - static inline uint8* InternalWriteToArray(int field, - const MapEntryAccessorType& value, - bool deterministic, uint8* target); - static inline uint8* WriteToArray(int field, - const MapEntryAccessorType& value, - uint8* target); + static inline const char* Read(const char* ptr, ParseContext* ctx, + MapEntryAccessorType* value); + + static inline uint8* Write(int field, const MapEntryAccessorType& value, + uint8* ptr, io::EpsCopyOutputStream* stream); // Functions to manipulate data on memory. ======================== static inline const Type& GetExternalReference(const Type* value); static inline void DeleteNoArena(const Type* x); static inline void Merge(const Type& from, Type** to, Arena* arena); static inline void Clear(Type** value, Arena* arena); - static inline void ClearMaybeByDefaultEnum(Type** value, Arena* arena, - int default_enum_value); - static inline void Initialize(Type** x, Arena* arena); + static constexpr TypeOnMemory Constinit(); - static inline void InitializeMaybeByDefaultEnum(Type** x, - int default_enum_value, - Arena* arena); static inline Type* EnsureMutable(Type** value, Arena* arena); // SpaceUsedInMapEntry: Return bytes used by value in MapEntry, excluding // those already calculate in sizeof(MapField). static inline size_t SpaceUsedInMapEntryLong(const Type* value); - // Return bytes used by value in Map. - static inline size_t SpaceUsedInMapLong(const Type& value); - // Assign default value to given instance. - static inline void AssignDefaultValue(Type** value); // Return default instance if value is not initialized when calling const // reference accessor. - static inline const Type& DefaultIfNotInitialized( - const Type* value, const Type* default_value); + static inline const Type& DefaultIfNotInitialized(const Type* value); // Check if all required fields have values set. static inline bool IsInitialized(Type* value); }; @@ -220,37 +192,22 @@ class MapTypeHandler { static inline int GetCachedSize(const MapEntryAccessorType& value); \ static inline bool Read(io::CodedInputStream* input, \ MapEntryAccessorType* value); \ - static inline void Write(int field, const MapEntryAccessorType& value, \ - io::CodedOutputStream* output); \ - static inline uint8* InternalWriteToArray( \ - int field, const MapEntryAccessorType& value, bool deterministic, \ - uint8* target); \ - static inline uint8* WriteToArray(int field, \ - const MapEntryAccessorType& value, \ - uint8* target) { \ - return InternalWriteToArray(field, value, false, target); \ - } \ + static inline const char* Read(const char* begin, ParseContext* ctx, \ + MapEntryAccessorType* value); \ + static inline uint8* Write(int field, const MapEntryAccessorType& value, \ + uint8* ptr, io::EpsCopyOutputStream* stream); \ static inline const MapEntryAccessorType& GetExternalReference( \ const TypeOnMemory& value); \ static inline void DeleteNoArena(const TypeOnMemory& x); \ static inline void Merge(const MapEntryAccessorType& from, \ TypeOnMemory* to, Arena* arena); \ static inline void Clear(TypeOnMemory* value, Arena* arena); \ - static inline void ClearMaybeByDefaultEnum(TypeOnMemory* value, \ - Arena* arena, \ - int default_enum); \ static inline size_t SpaceUsedInMapEntryLong(const TypeOnMemory& value); \ - static inline size_t SpaceUsedInMapLong(const TypeOnMemory& value); \ - static inline size_t SpaceUsedInMapLong(const string& value); \ - static inline void AssignDefaultValue(TypeOnMemory* value); \ static inline const MapEntryAccessorType& DefaultIfNotInitialized( \ - const TypeOnMemory& value, const TypeOnMemory& default_value); \ + const TypeOnMemory& value); \ static inline bool IsInitialized(const TypeOnMemory& value); \ static void DeleteNoArena(TypeOnMemory& value); \ - static inline void Initialize(TypeOnMemory* value, Arena* arena); \ - static inline void InitializeMaybeByDefaultEnum(TypeOnMemory* value, \ - int default_enum_value, \ - Arena* arena); \ + static constexpr TypeOnMemory Constinit(); \ static inline MapEntryAccessorType* EnsureMutable(TypeOnMemory* value, \ Arena* arena); \ }; @@ -273,8 +230,7 @@ MAP_HANDLER(BOOL) #undef MAP_HANDLER template -inline size_t -MapTypeHandler::ByteSize( +inline size_t MapTypeHandler::ByteSize( const MapEntryAccessorType& value) { return WireFormatLite::MessageSizeNoVirtual(value); } @@ -287,14 +243,14 @@ MapTypeHandler::ByteSize( } GOOGLE_PROTOBUF_BYTE_SIZE(STRING, String) -GOOGLE_PROTOBUF_BYTE_SIZE(BYTES , Bytes) -GOOGLE_PROTOBUF_BYTE_SIZE(INT64 , Int64) +GOOGLE_PROTOBUF_BYTE_SIZE(BYTES, Bytes) +GOOGLE_PROTOBUF_BYTE_SIZE(INT64, Int64) GOOGLE_PROTOBUF_BYTE_SIZE(UINT64, UInt64) -GOOGLE_PROTOBUF_BYTE_SIZE(INT32 , Int32) +GOOGLE_PROTOBUF_BYTE_SIZE(INT32, Int32) GOOGLE_PROTOBUF_BYTE_SIZE(UINT32, UInt32) GOOGLE_PROTOBUF_BYTE_SIZE(SINT64, SInt64) GOOGLE_PROTOBUF_BYTE_SIZE(SINT32, SInt32) -GOOGLE_PROTOBUF_BYTE_SIZE(ENUM , Enum) +GOOGLE_PROTOBUF_BYTE_SIZE(ENUM, Enum) #undef GOOGLE_PROTOBUF_BYTE_SIZE @@ -305,23 +261,21 @@ GOOGLE_PROTOBUF_BYTE_SIZE(ENUM , Enum) return WireFormatLite::k##DeclaredType##Size; \ } -FIXED_BYTE_SIZE(DOUBLE , Double) -FIXED_BYTE_SIZE(FLOAT , Float) -FIXED_BYTE_SIZE(FIXED64 , Fixed64) -FIXED_BYTE_SIZE(FIXED32 , Fixed32) +FIXED_BYTE_SIZE(DOUBLE, Double) +FIXED_BYTE_SIZE(FLOAT, Float) +FIXED_BYTE_SIZE(FIXED64, Fixed64) +FIXED_BYTE_SIZE(FIXED32, Fixed32) FIXED_BYTE_SIZE(SFIXED64, SFixed64) FIXED_BYTE_SIZE(SFIXED32, SFixed32) -FIXED_BYTE_SIZE(BOOL , Bool) +FIXED_BYTE_SIZE(BOOL, Bool) #undef FIXED_BYTE_SIZE template -inline int -MapTypeHandler::GetCachedSize( +inline int MapTypeHandler::GetCachedSize( const MapEntryAccessorType& value) { - return static_cast( - WireFormatLite::LengthDelimitedSize( - static_cast(value.GetCachedSize()))); + return static_cast(WireFormatLite::LengthDelimitedSize( + static_cast(value.GetCachedSize()))); } #define GET_CACHED_SIZE(FieldType, DeclaredType) \ @@ -333,14 +287,14 @@ MapTypeHandler::GetCachedSize( } GET_CACHED_SIZE(STRING, String) -GET_CACHED_SIZE(BYTES , Bytes) -GET_CACHED_SIZE(INT64 , Int64) +GET_CACHED_SIZE(BYTES, Bytes) +GET_CACHED_SIZE(INT64, Int64) GET_CACHED_SIZE(UINT64, UInt64) -GET_CACHED_SIZE(INT32 , Int32) +GET_CACHED_SIZE(INT32, Int32) GET_CACHED_SIZE(UINT32, UInt32) GET_CACHED_SIZE(SINT64, SInt64) GET_CACHED_SIZE(SINT32, SInt32) -GET_CACHED_SIZE(ENUM , Enum) +GET_CACHED_SIZE(ENUM, Enum) #undef GET_CACHED_SIZE @@ -352,63 +306,60 @@ GET_CACHED_SIZE(ENUM , Enum) return WireFormatLite::k##DeclaredType##Size; \ } -GET_FIXED_CACHED_SIZE(DOUBLE , Double) -GET_FIXED_CACHED_SIZE(FLOAT , Float) -GET_FIXED_CACHED_SIZE(FIXED64 , Fixed64) -GET_FIXED_CACHED_SIZE(FIXED32 , Fixed32) +GET_FIXED_CACHED_SIZE(DOUBLE, Double) +GET_FIXED_CACHED_SIZE(FLOAT, Float) +GET_FIXED_CACHED_SIZE(FIXED64, Fixed64) +GET_FIXED_CACHED_SIZE(FIXED32, Fixed32) GET_FIXED_CACHED_SIZE(SFIXED64, SFixed64) GET_FIXED_CACHED_SIZE(SFIXED32, SFixed32) -GET_FIXED_CACHED_SIZE(BOOL , Bool) +GET_FIXED_CACHED_SIZE(BOOL, Bool) #undef GET_FIXED_CACHED_SIZE template -inline void MapTypeHandler::Write( - int field, const MapEntryAccessorType& value, - io::CodedOutputStream* output) { - WireFormatLite::WriteMessageMaybeToArray(field, value, output); -} - -template -inline uint8* -MapTypeHandler::InternalWriteToArray( - int field, const MapEntryAccessorType& value, bool deterministic, - uint8* target) { - return WireFormatLite::InternalWriteMessageToArray(field, value, - deterministic, target); +inline uint8* MapTypeHandler::Write( + int field, const MapEntryAccessorType& value, uint8* ptr, + io::EpsCopyOutputStream* stream) { + ptr = stream->EnsureSpace(ptr); + return WireFormatLite::InternalWriteMessage(field, value, ptr, stream); } #define WRITE_METHOD(FieldType, DeclaredType) \ template \ - inline void MapTypeHandler::Write( \ - int field, const MapEntryAccessorType& value, \ - io::CodedOutputStream* output) { \ - return WireFormatLite::Write##DeclaredType(field, value, output); \ - } \ + inline uint8* MapTypeHandler::Write( \ + int field, const MapEntryAccessorType& value, uint8* ptr, \ + io::EpsCopyOutputStream* stream) { \ + ptr = stream->EnsureSpace(ptr); \ + return stream->Write##DeclaredType(field, value, ptr); \ + } + +WRITE_METHOD(STRING, String) +WRITE_METHOD(BYTES, Bytes) + +#undef WRITE_METHOD +#define WRITE_METHOD(FieldType, DeclaredType) \ template \ - inline uint8* \ - MapTypeHandler::InternalWriteToArray( \ - int field, const MapEntryAccessorType& value, bool, uint8* target) { \ - return WireFormatLite::Write##DeclaredType##ToArray(field, value, target); \ + inline uint8* MapTypeHandler::Write( \ + int field, const MapEntryAccessorType& value, uint8* ptr, \ + io::EpsCopyOutputStream* stream) { \ + ptr = stream->EnsureSpace(ptr); \ + return WireFormatLite::Write##DeclaredType##ToArray(field, value, ptr); \ } -WRITE_METHOD(STRING , String) -WRITE_METHOD(BYTES , Bytes) -WRITE_METHOD(INT64 , Int64) -WRITE_METHOD(UINT64 , UInt64) -WRITE_METHOD(INT32 , Int32) -WRITE_METHOD(UINT32 , UInt32) -WRITE_METHOD(SINT64 , SInt64) -WRITE_METHOD(SINT32 , SInt32) -WRITE_METHOD(ENUM , Enum) -WRITE_METHOD(DOUBLE , Double) -WRITE_METHOD(FLOAT , Float) -WRITE_METHOD(FIXED64 , Fixed64) -WRITE_METHOD(FIXED32 , Fixed32) +WRITE_METHOD(INT64, Int64) +WRITE_METHOD(UINT64, UInt64) +WRITE_METHOD(INT32, Int32) +WRITE_METHOD(UINT32, UInt32) +WRITE_METHOD(SINT64, SInt64) +WRITE_METHOD(SINT32, SInt32) +WRITE_METHOD(ENUM, Enum) +WRITE_METHOD(DOUBLE, Double) +WRITE_METHOD(FLOAT, Float) +WRITE_METHOD(FIXED64, Fixed64) +WRITE_METHOD(FIXED32, Fixed32) WRITE_METHOD(SFIXED64, SFixed64) WRITE_METHOD(SFIXED32, SFixed32) -WRITE_METHOD(BOOL , Bool) +WRITE_METHOD(BOOL, Bool) #undef WRITE_METHOD @@ -430,6 +381,82 @@ inline bool MapTypeHandler::Read( return WireFormatLite::ReadBytes(input, value); } +template +const char* MapTypeHandler::Read( + const char* ptr, ParseContext* ctx, MapEntryAccessorType* value) { + return ctx->ParseMessage(value, ptr); +} + +template +const char* MapTypeHandler::Read( + const char* ptr, ParseContext* ctx, MapEntryAccessorType* value) { + int size = ReadSize(&ptr); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); + return ctx->ReadString(ptr, size, value); +} + +template +const char* MapTypeHandler::Read( + const char* ptr, ParseContext* ctx, MapEntryAccessorType* value) { + int size = ReadSize(&ptr); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); + return ctx->ReadString(ptr, size, value); +} + +inline const char* ReadINT64(const char* ptr, int64* value) { + return VarintParse(ptr, reinterpret_cast(value)); +} +inline const char* ReadUINT64(const char* ptr, uint64* value) { + return VarintParse(ptr, value); +} +inline const char* ReadINT32(const char* ptr, int32* value) { + return VarintParse(ptr, reinterpret_cast(value)); +} +inline const char* ReadUINT32(const char* ptr, uint32* value) { + return VarintParse(ptr, value); +} +inline const char* ReadSINT64(const char* ptr, int64* value) { + *value = ReadVarintZigZag64(&ptr); + return ptr; +} +inline const char* ReadSINT32(const char* ptr, int32* value) { + *value = ReadVarintZigZag32(&ptr); + return ptr; +} +template +inline const char* ReadENUM(const char* ptr, E* value) { + *value = static_cast(ReadVarint32(&ptr)); + return ptr; +} +inline const char* ReadBOOL(const char* ptr, bool* value) { + *value = static_cast(ReadVarint32(&ptr)); + return ptr; +} + +template +inline const char* ReadUnaligned(const char* ptr, F* value) { + *value = UnalignedLoad(ptr); + return ptr + sizeof(F); +} +inline const char* ReadFLOAT(const char* ptr, float* value) { + return ReadUnaligned(ptr, value); +} +inline const char* ReadDOUBLE(const char* ptr, double* value) { + return ReadUnaligned(ptr, value); +} +inline const char* ReadFIXED64(const char* ptr, uint64* value) { + return ReadUnaligned(ptr, value); +} +inline const char* ReadFIXED32(const char* ptr, uint32* value) { + return ReadUnaligned(ptr, value); +} +inline const char* ReadSFIXED64(const char* ptr, int64* value) { + return ReadUnaligned(ptr, value); +} +inline const char* ReadSFIXED32(const char* ptr, int32* value) { + return ReadUnaligned(ptr, value); +} + #define READ_METHOD(FieldType) \ template \ inline bool MapTypeHandler::Read( \ @@ -437,6 +464,12 @@ inline bool MapTypeHandler::Read( return WireFormatLite::ReadPrimitive( \ input, value); \ + } \ + template \ + const char* MapTypeHandler::Read( \ + const char* begin, ParseContext* ctx, MapEntryAccessorType* value) { \ + (void)ctx; \ + return Read##FieldType(begin, value); \ } READ_METHOD(INT64) @@ -460,8 +493,8 @@ READ_METHOD(BOOL) template inline const Type& -MapTypeHandler::GetExternalReference(const Type* value) { +MapTypeHandler::GetExternalReference( + const Type* value) { return *value; } @@ -471,26 +504,12 @@ inline size_t MapTypeHandlerSpaceUsedLong(); } -template -size_t MapTypeHandler::SpaceUsedInMapLong( - const Type& value) { - return value.SpaceUsedLong(); -} - template inline void MapTypeHandler::Clear( Type** value, Arena* /* arena */) { if (*value != NULL) (*value)->Clear(); } template -inline void -MapTypeHandler::ClearMaybeByDefaultEnum(Type** value, - Arena* /* arena */, - int /* default_enum_value */) { - if (*value != NULL) (*value)->Clear(); -} -template inline void MapTypeHandler::Merge( const Type& from, Type** to, Arena* /* arena */) { (*to)->MergeFrom(from); @@ -503,218 +522,148 @@ void MapTypeHandler::DeleteNoArena( } template -inline void MapTypeHandler::AssignDefaultValue(Type** value) { - *value = const_cast(Type::internal_default_instance()); +constexpr auto MapTypeHandler::Constinit() + -> TypeOnMemory { + return nullptr; } template -inline void MapTypeHandler::Initialize(Type** x, - Arena* /* arena */) { - *x = NULL; -} - -template -inline void MapTypeHandler:: - InitializeMaybeByDefaultEnum(Type** x, int /* default_enum_value */, - Arena* /* arena */) { - *x = NULL; -} - -template -inline Type* MapTypeHandler::EnsureMutable(Type** value, - Arena* arena) { +inline Type* MapTypeHandler::EnsureMutable( + Type** value, Arena* arena) { if (*value == NULL) { - *value = - MapArenaMessageCreator:: - type::value>::CreateMessage(arena); + *value = MapArenaMessageCreator< + Type, + Arena::is_arena_constructable::type::value>::CreateMessage(arena); } return *value; } template -inline const Type& MapTypeHandler:: - DefaultIfNotInitialized(const Type* value, const Type* default_value) { - return value != NULL ? *value : *default_value; +inline const Type& +MapTypeHandler::DefaultIfNotInitialized( + const Type* value) { + return value != NULL ? *value : *Type::internal_default_instance(); } template -inline bool MapTypeHandler::IsInitialized(Type* value) { - return value->IsInitialized(); +inline bool MapTypeHandler::IsInitialized( + Type* value) { + return value ? value->IsInitialized() : false; } // Definition for string/bytes handler -#define STRING_OR_BYTES_HANDLER_FUNCTIONS(FieldType) \ - template \ - inline const typename MapTypeHandler::MapEntryAccessorType& \ - MapTypeHandler::GetExternalReference(const TypeOnMemory& value) { \ - return value.Get(); \ - } \ - template \ - inline size_t \ - MapTypeHandler::SpaceUsedInMapEntryLong(const TypeOnMemory& value) { \ - return sizeof(value); \ - } \ - template \ - inline size_t \ - MapTypeHandler::SpaceUsedInMapLong( \ - const TypeOnMemory& value) { \ - return sizeof(value); \ - } \ - template \ - inline size_t \ - MapTypeHandler::SpaceUsedInMapLong( \ - const string& value) { \ - return sizeof(value); \ - } \ - template \ - inline void MapTypeHandler::Clear( \ - TypeOnMemory* value, Arena* arena) { \ - value->ClearToEmpty(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), \ - arena); \ - } \ - template \ - inline void MapTypeHandler:: \ - ClearMaybeByDefaultEnum(TypeOnMemory* value, Arena* arena, \ - int /* default_enum */) { \ - Clear(value, arena); \ - } \ - template \ - inline void MapTypeHandler::Merge( \ - const MapEntryAccessorType& from, TypeOnMemory* to, Arena* arena) { \ - to->Set(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), from, arena); \ - } \ - template \ - void MapTypeHandler::DeleteNoArena( \ - TypeOnMemory& value) { \ - value.DestroyNoArena(&::google::protobuf::internal::GetEmptyStringAlreadyInited()); \ - } \ - template \ - inline void MapTypeHandler::AssignDefaultValue(TypeOnMemory* /* value */) {} \ - template \ - inline void \ - MapTypeHandler::Initialize( \ - TypeOnMemory* value, Arena* /* arena */) { \ - value->UnsafeSetDefault( \ - &::google::protobuf::internal::GetEmptyStringAlreadyInited()); \ - } \ - template \ - inline void MapTypeHandler:: \ - InitializeMaybeByDefaultEnum(TypeOnMemory* value, \ - int /* default_enum_value */, \ - Arena* arena) { \ - Initialize(value, arena); \ - } \ - template \ - inline typename MapTypeHandler::MapEntryAccessorType* \ - MapTypeHandler::EnsureMutable( \ - TypeOnMemory* value, Arena* arena) { \ - return value->Mutable(&::google::protobuf::internal::GetEmptyStringAlreadyInited(), \ - arena); \ - } \ - template \ - inline const typename MapTypeHandler::MapEntryAccessorType& \ - MapTypeHandler::DefaultIfNotInitialized(const TypeOnMemory& value, \ - const TypeOnMemory& \ - /* default_value */) { \ - return value.Get(); \ - } \ - template \ - inline bool MapTypeHandler::IsInitialized(const TypeOnMemory& /* value */) { \ - return true; \ +#define STRING_OR_BYTES_HANDLER_FUNCTIONS(FieldType) \ + template \ + inline const typename MapTypeHandler::MapEntryAccessorType& \ + MapTypeHandler::GetExternalReference(const TypeOnMemory& value) { \ + return value.Get(); \ + } \ + template \ + inline size_t \ + MapTypeHandler::SpaceUsedInMapEntryLong(const TypeOnMemory& value) { \ + return sizeof(value); \ + } \ + template \ + inline void MapTypeHandler::Clear( \ + TypeOnMemory* value, Arena* arena) { \ + value->ClearToEmpty(); \ + } \ + template \ + inline void MapTypeHandler::Merge( \ + const MapEntryAccessorType& from, TypeOnMemory* to, Arena* arena) { \ + to->Set(&internal::GetEmptyStringAlreadyInited(), from, arena); \ + } \ + template \ + void MapTypeHandler::DeleteNoArena( \ + TypeOnMemory& value) { \ + value.DestroyNoArena(&internal::GetEmptyStringAlreadyInited()); \ + } \ + template \ + constexpr auto \ + MapTypeHandler::Constinit() \ + ->TypeOnMemory { \ + return TypeOnMemory(&internal::GetEmptyStringAlreadyInited()); \ + } \ + template \ + inline typename MapTypeHandler::MapEntryAccessorType* \ + MapTypeHandler::EnsureMutable( \ + TypeOnMemory* value, Arena* arena) { \ + return value->Mutable(ArenaStringPtr::EmptyDefault{}, arena); \ + } \ + template \ + inline const typename MapTypeHandler::MapEntryAccessorType& \ + MapTypeHandler::DefaultIfNotInitialized(const TypeOnMemory& value) { \ + return value.Get(); \ + } \ + template \ + inline bool \ + MapTypeHandler::IsInitialized( \ + const TypeOnMemory& /* value */) { \ + return true; \ } STRING_OR_BYTES_HANDLER_FUNCTIONS(STRING) STRING_OR_BYTES_HANDLER_FUNCTIONS(BYTES) #undef STRING_OR_BYTES_HANDLER_FUNCTIONS -#define PRIMITIVE_HANDLER_FUNCTIONS(FieldType) \ - template \ - inline const typename MapTypeHandler::MapEntryAccessorType& \ - MapTypeHandler::GetExternalReference(const TypeOnMemory& value) { \ - return value; \ - } \ - template \ - inline size_t \ - MapTypeHandler::SpaceUsedInMapEntryLong(const TypeOnMemory& /* value */) { \ - return 0; \ - } \ - template \ - inline size_t \ - MapTypeHandler::SpaceUsedInMapLong( \ - const TypeOnMemory& /* value */) { \ - return sizeof(Type); \ - } \ - template \ - inline void MapTypeHandler::Clear( \ - TypeOnMemory* value, Arena* /* arena */) { \ - *value = 0; \ - } \ - template \ - inline void MapTypeHandler:: \ - ClearMaybeByDefaultEnum(TypeOnMemory* value, Arena* /* arena */, \ - int default_enum_value) { \ - *value = static_cast(default_enum_value); \ - } \ - template \ - inline void MapTypeHandler::Merge( \ - const MapEntryAccessorType& from, TypeOnMemory* to, \ - Arena* /* arena */) { \ - *to = from; \ - } \ - template \ - inline void MapTypeHandler::DeleteNoArena(TypeOnMemory& /* x */) {} \ - template \ - inline void MapTypeHandler::AssignDefaultValue(TypeOnMemory* /* value */) {} \ - template \ - inline void \ - MapTypeHandler::Initialize( \ - TypeOnMemory* value, Arena* /* arena */) { \ - *value = 0; \ - } \ - template \ - inline void MapTypeHandler:: \ - InitializeMaybeByDefaultEnum(TypeOnMemory* value, \ - int default_enum_value, \ - Arena* /* arena */) { \ - *value = static_cast(default_enum_value); \ - } \ - template \ - inline typename MapTypeHandler::MapEntryAccessorType* \ - MapTypeHandler::EnsureMutable( \ - TypeOnMemory* value, Arena* /* arena */) { \ - return value; \ - } \ - template \ - inline const typename MapTypeHandler::MapEntryAccessorType& \ - MapTypeHandler::DefaultIfNotInitialized(const TypeOnMemory& value, \ - const TypeOnMemory& \ - /* default_value */) { \ - return value; \ - } \ - template \ - inline bool MapTypeHandler::IsInitialized(const TypeOnMemory& /* value */) { \ - return true; \ +#define PRIMITIVE_HANDLER_FUNCTIONS(FieldType) \ + template \ + inline const typename MapTypeHandler::MapEntryAccessorType& \ + MapTypeHandler::GetExternalReference(const TypeOnMemory& value) { \ + return value; \ + } \ + template \ + inline size_t MapTypeHandler:: \ + SpaceUsedInMapEntryLong(const TypeOnMemory& /* value */) { \ + return 0; \ + } \ + template \ + inline void MapTypeHandler::Clear( \ + TypeOnMemory* value, Arena* /* arena */) { \ + *value = 0; \ + } \ + template \ + inline void MapTypeHandler::Merge( \ + const MapEntryAccessorType& from, TypeOnMemory* to, \ + Arena* /* arena */) { \ + *to = from; \ + } \ + template \ + inline void MapTypeHandler::DeleteNoArena(TypeOnMemory& /* x */) {} \ + template \ + constexpr auto \ + MapTypeHandler::Constinit() \ + ->TypeOnMemory { \ + return 0; \ + } \ + template \ + inline typename MapTypeHandler::MapEntryAccessorType* \ + MapTypeHandler::EnsureMutable( \ + TypeOnMemory* value, Arena* /* arena */) { \ + return value; \ + } \ + template \ + inline const typename MapTypeHandler::MapEntryAccessorType& \ + MapTypeHandler::DefaultIfNotInitialized(const TypeOnMemory& value) { \ + return value; \ + } \ + template \ + inline bool \ + MapTypeHandler::IsInitialized( \ + const TypeOnMemory& /* value */) { \ + return true; \ } PRIMITIVE_HANDLER_FUNCTIONS(INT64) PRIMITIVE_HANDLER_FUNCTIONS(UINT64) @@ -734,6 +683,6 @@ PRIMITIVE_HANDLER_FUNCTIONS(BOOL) } // namespace internal } // namespace protobuf - } // namespace google + #endif // GOOGLE_PROTOBUF_TYPE_HANDLER_H__ diff --git a/third_party/protobuf-lite/google/protobuf/message_lite.h b/third_party/protobuf-lite/google/protobuf/message_lite.h index b8644142..a76c16e5 100644 --- a/third_party/protobuf-lite/google/protobuf/message_lite.h +++ b/third_party/protobuf-lite/google/protobuf/message_lite.h @@ -40,29 +40,53 @@ #define GOOGLE_PROTOBUF_MESSAGE_LITE_H__ #include +#include + #include #include -#include +#include #include -#include +#include +#include +#include +#include + + +#include + +#ifdef SWIG +#error "You cannot SWIG proto headers" +#endif namespace google { namespace protobuf { + template class RepeatedPtrField; + namespace io { + class CodedInputStream; class CodedOutputStream; class ZeroCopyInputStream; class ZeroCopyOutputStream; -} + +} // namespace io namespace internal { +// Tag type used to invoke the constinit constructor overload of some classes. +// Such constructors are internal implementation details of the library. +struct ConstantInitialized { + explicit ConstantInitialized() = default; +}; + +// See parse_context.h for explanation +class ParseContext; + class RepeatedPtrFieldBase; class WireFormatLite; class WeakFieldMap; -#ifndef SWIG // We compute sizes as size_t but cache them as int. This function converts a // computed size to a cached size. Since we don't proceed with serialization // if the total size was > INT_MAX, it is not important what this function @@ -98,22 +122,23 @@ inline int ToIntSize(size_t size) { // // Pay special attention to the initialization state of the object. // 1. The object is "uninitialized" to begin with. -// 2. Call DefaultConstruct() only if the object is uninitialized. -// After the call, the object becomes "initialized". +// 2. Call Construct() or DefaultConstruct() only if the object is +// uninitialized. After the call, the object becomes "initialized". // 3. Call get() and get_mutable() only if the object is initialized. // 4. Call Destruct() only if the object is initialized. // After the call, the object becomes uninitialized. template class ExplicitlyConstructed { public: - void DefaultConstruct() { - new (&union_) T(); - } + void DefaultConstruct() { new (&union_) T(); } - void Destruct() { - get_mutable()->~T(); + template + void Construct(Args&&... args) { + new (&union_) T(std::forward(args)...); } + void Destruct() { get_mutable()->~T(); } + constexpr const T& get() const { return reinterpret_cast(union_); } T* get_mutable() { return reinterpret_cast(&union_); } @@ -126,16 +151,31 @@ class ExplicitlyConstructed { } union_; }; +PROTOBUF_DISABLE_MSVC_UNION_WARNING +// We need a publicly accessible `value` object to allow constexpr +// support in C++11. +// A constexpr accessor does not work portably. +union EmptyString { + constexpr EmptyString() : dummy{} {} + ~EmptyString() {} + + // We need a dummy object for constant initialization. + std::false_type dummy; + std::string value; +}; +PROTOBUF_ENABLE_MSVC_UNION_WARNING + // Default empty string object. Don't use this directly. Instead, call // GetEmptyString() to get the reference. -LIBPROTOBUF_EXPORT extern ExplicitlyConstructed<::std::string> fixed_address_empty_string; +PROTOBUF_EXPORT extern EmptyString fixed_address_empty_string; + -LIBPROTOBUF_EXPORT inline const ::std::string& GetEmptyStringAlreadyInited() { - return fixed_address_empty_string.get(); +PROTOBUF_EXPORT constexpr const std::string& GetEmptyStringAlreadyInited() { + return fixed_address_empty_string.value; } -LIBPROTOBUF_EXPORT size_t StringSpaceUsedExcludingSelfLong(const string& str); -#endif // SWIG +PROTOBUF_EXPORT size_t StringSpaceUsedExcludingSelfLong(const std::string& str); + } // namespace internal // Interface to light weight protocol messages. @@ -161,15 +201,18 @@ LIBPROTOBUF_EXPORT size_t StringSpaceUsedExcludingSelfLong(const string& str); // is best when you only have a small number of message types linked // into your binary, in which case the size of the protocol buffers // runtime itself is the biggest problem. -class LIBPROTOBUF_EXPORT MessageLite { +// +// Users must not derive from this class. Only the protocol compiler and +// the internal library are allowed to create subclasses. +class PROTOBUF_EXPORT MessageLite { public: - inline MessageLite() {} - virtual ~MessageLite() {} + constexpr MessageLite() = default; + virtual ~MessageLite() = default; // Basic Operations ------------------------------------------------ // Get the name of this message type, e.g. "foo.bar.BazProto". - virtual string GetTypeName() const = 0; + virtual std::string GetTypeName() const = 0; // Construct a new instance of the same type. Ownership is passed to the // caller. @@ -177,14 +220,14 @@ class LIBPROTOBUF_EXPORT MessageLite { // Construct a new instance on the arena. Ownership is passed to the caller // if arena is a NULL. Default implementation for backwards compatibility. - virtual MessageLite* New(::google::protobuf::Arena* arena) const; + virtual MessageLite* New(Arena* arena) const; // Get the arena, if any, associated with this message. Virtual method // required for generic operations but most arena-related operations should - // use the GetArenaNoVirtual() generated-code method. Default implementation + // use the GetArena() generated-code method. Default implementation // to reduce code size by avoiding the need for per-type implementations // when types do not implement arena support. - virtual ::google::protobuf::Arena* GetArena() const { return NULL; } + Arena* GetArena() const { return _internal_metadata_.arena(); } // Get a pointer that may be equal to this message's arena, or may not be. // If the value returned by this method is equal to some arena pointer, then @@ -195,7 +238,9 @@ class LIBPROTOBUF_EXPORT MessageLite { // store the arena pointer directly, and sometimes in a more indirect way, // and allow a fastpath comparison against the arena pointer when it's easy // to obtain. - virtual void* GetMaybeArenaPointer() const { return GetArena(); } + void* GetMaybeArenaPointer() const { + return _internal_metadata_.raw_arena_ptr(); + } // Clear all fields of the message and set them to their default values. // Clear() avoids freeing memory, assuming that any memory allocated @@ -210,12 +255,27 @@ class LIBPROTOBUF_EXPORT MessageLite { // This is not implemented for Lite messages -- it just returns "(cannot // determine missing fields for lite message)". However, it is implemented // for full messages. See message.h. - virtual string InitializationErrorString() const; + virtual std::string InitializationErrorString() const; // If |other| is the exact same class as this, calls MergeFrom(). Otherwise, // results are undefined (probably crash). virtual void CheckTypeAndMergeFrom(const MessageLite& other) = 0; + // These methods return a human-readable summary of the message. Note that + // since the MessageLite interface does not support reflection, there is very + // little information that these methods can provide. They are shadowed by + // methods of the same name on the Message interface which provide much more + // information. The methods here are intended primarily to facilitate code + // reuse for logic that needs to interoperate with both full and lite protos. + // + // The format of the returned string is subject to change, so please do not + // assume it will remain stable over time. + std::string DebugString() const; + std::string ShortDebugString() const { return DebugString(); } + // MessageLite::DebugString is already Utf8 Safe. This is to add compatibility + // with Message. + std::string Utf8DebugString() const { return DebugString(); } + // Parsing --------------------------------------------------------- // Methods for parsing in protocol buffer format. Most of these are // just simple wrappers around MergeFromCodedStream(). Clear() will be @@ -226,38 +286,66 @@ class LIBPROTOBUF_EXPORT MessageLite { // format. A successful return does not indicate the entire input is // consumed, ensure you call ConsumedEntireMessage() to check that if // applicable. - bool ParseFromCodedStream(io::CodedInputStream* input); + PROTOBUF_ATTRIBUTE_REINITIALIZES bool ParseFromCodedStream( + io::CodedInputStream* input); // Like ParseFromCodedStream(), but accepts messages that are missing // required fields. - bool ParsePartialFromCodedStream(io::CodedInputStream* input); + PROTOBUF_ATTRIBUTE_REINITIALIZES bool ParsePartialFromCodedStream( + io::CodedInputStream* input); // Read a protocol buffer from the given zero-copy input stream. If // successful, the entire input will be consumed. - bool ParseFromZeroCopyStream(io::ZeroCopyInputStream* input); + PROTOBUF_ATTRIBUTE_REINITIALIZES bool ParseFromZeroCopyStream( + io::ZeroCopyInputStream* input); // Like ParseFromZeroCopyStream(), but accepts messages that are missing // required fields. - bool ParsePartialFromZeroCopyStream(io::ZeroCopyInputStream* input); + PROTOBUF_ATTRIBUTE_REINITIALIZES bool ParsePartialFromZeroCopyStream( + io::ZeroCopyInputStream* input); + // Parse a protocol buffer from a file descriptor. If successful, the entire + // input will be consumed. + PROTOBUF_ATTRIBUTE_REINITIALIZES bool ParseFromFileDescriptor( + int file_descriptor); + // Like ParseFromFileDescriptor(), but accepts messages that are missing + // required fields. + PROTOBUF_ATTRIBUTE_REINITIALIZES bool ParsePartialFromFileDescriptor( + int file_descriptor); + // Parse a protocol buffer from a C++ istream. If successful, the entire + // input will be consumed. + PROTOBUF_ATTRIBUTE_REINITIALIZES bool ParseFromIstream(std::istream* input); + // Like ParseFromIstream(), but accepts messages that are missing + // required fields. + PROTOBUF_ATTRIBUTE_REINITIALIZES bool ParsePartialFromIstream( + std::istream* input); // Read a protocol buffer from the given zero-copy input stream, expecting // the message to be exactly "size" bytes long. If successful, exactly // this many bytes will have been consumed from the input. - bool ParseFromBoundedZeroCopyStream(io::ZeroCopyInputStream* input, int size); + bool MergePartialFromBoundedZeroCopyStream(io::ZeroCopyInputStream* input, + int size); // Like ParseFromBoundedZeroCopyStream(), but accepts messages that are // missing required fields. - bool ParsePartialFromBoundedZeroCopyStream(io::ZeroCopyInputStream* input, - int size); + bool MergeFromBoundedZeroCopyStream(io::ZeroCopyInputStream* input, int size); + PROTOBUF_ATTRIBUTE_REINITIALIZES bool ParseFromBoundedZeroCopyStream( + io::ZeroCopyInputStream* input, int size); + // Like ParseFromBoundedZeroCopyStream(), but accepts messages that are + // missing required fields. + PROTOBUF_ATTRIBUTE_REINITIALIZES bool ParsePartialFromBoundedZeroCopyStream( + io::ZeroCopyInputStream* input, int size); // Parses a protocol buffer contained in a string. Returns true on success. // This function takes a string in the (non-human-readable) binary wire // format, matching the encoding output by MessageLite::SerializeToString(). // If you'd like to convert a human-readable string into a protocol buffer // object, see google::protobuf::TextFormat::ParseFromString(). - bool ParseFromString(const string& data); + PROTOBUF_ATTRIBUTE_REINITIALIZES bool ParseFromString(ConstStringParam data); // Like ParseFromString(), but accepts messages that are missing // required fields. - bool ParsePartialFromString(const string& data); + PROTOBUF_ATTRIBUTE_REINITIALIZES bool ParsePartialFromString( + ConstStringParam data); // Parse a protocol buffer contained in an array of bytes. - bool ParseFromArray(const void* data, int size); + PROTOBUF_ATTRIBUTE_REINITIALIZES bool ParseFromArray(const void* data, + int size); // Like ParseFromArray(), but accepts messages that are missing // required fields. - bool ParsePartialFromArray(const void* data, int size); + PROTOBUF_ATTRIBUTE_REINITIALIZES bool ParsePartialFromArray(const void* data, + int size); // Reads a protocol buffer from the stream and merges it into this @@ -269,7 +357,7 @@ class LIBPROTOBUF_EXPORT MessageLite { // (for groups) or input->ConsumedEntireMessage() (for non-groups) after // this returns to verify that the message's end was delimited correctly. // - // ParsefromCodedStream() is implemented as Clear() followed by + // ParseFromCodedStream() is implemented as Clear() followed by // MergeFromCodedStream(). bool MergeFromCodedStream(io::CodedInputStream* input); @@ -278,7 +366,10 @@ class LIBPROTOBUF_EXPORT MessageLite { // // MergeFromCodedStream() is just implemented as MergePartialFromCodedStream() // followed by IsInitialized(). - virtual bool MergePartialFromCodedStream(io::CodedInputStream* input) = 0; + bool MergePartialFromCodedStream(io::CodedInputStream* input); + + // Merge a protocol buffer contained in a string. + bool MergeFromString(ConstStringParam data); // Serialization --------------------------------------------------- @@ -298,9 +389,9 @@ class LIBPROTOBUF_EXPORT MessageLite { bool SerializePartialToZeroCopyStream(io::ZeroCopyOutputStream* output) const; // Serialize the message and store it in the given string. All required // fields must be set. - bool SerializeToString(string* output) const; + bool SerializeToString(std::string* output) const; // Like SerializeToString(), but allows missing required fields. - bool SerializePartialToString(string* output) const; + bool SerializePartialToString(std::string* output) const; // Serialize the message and store it in the given byte array. All required // fields must be set. bool SerializeToArray(void* data, int size) const; @@ -313,15 +404,27 @@ class LIBPROTOBUF_EXPORT MessageLite { // Note: If you intend to generate many such strings, you may // reduce heap fragmentation by instead re-using the same string // object with calls to SerializeToString(). - string SerializeAsString() const; + std::string SerializeAsString() const; // Like SerializeAsString(), but allows missing required fields. - string SerializePartialAsString() const; - - // Like SerializeToString(), but appends to the data to the string's existing - // contents. All required fields must be set. - bool AppendToString(string* output) const; + std::string SerializePartialAsString() const; + + // Serialize the message and write it to the given file descriptor. All + // required fields must be set. + bool SerializeToFileDescriptor(int file_descriptor) const; + // Like SerializeToFileDescriptor(), but allows missing required fields. + bool SerializePartialToFileDescriptor(int file_descriptor) const; + // Serialize the message and write it to the given C++ ostream. All + // required fields must be set. + bool SerializeToOstream(std::ostream* output) const; + // Like SerializeToOstream(), but allows missing required fields. + bool SerializePartialToOstream(std::ostream* output) const; + + // Like SerializeToString(), but appends to the data to the string's + // existing contents. All required fields must be set. + bool AppendToString(std::string* output) const; // Like AppendToString(), but allows missing required fields. - bool AppendPartialToString(string* output) const; + bool AppendPartialToString(std::string* output) const; + // Computes the serialized size of the message. This recursively calls // ByteSizeLong() on all embedded messages. @@ -331,16 +434,15 @@ class LIBPROTOBUF_EXPORT MessageLite { virtual size_t ByteSizeLong() const = 0; // Legacy ByteSize() API. - PROTOBUF_RUNTIME_DEPRECATED("Please use ByteSizeLong() instead") - int ByteSize() const { - return internal::ToIntSize(ByteSizeLong()); - } + PROTOBUF_DEPRECATED_MSG("Please use ByteSizeLong() instead") + int ByteSize() const { return internal::ToIntSize(ByteSizeLong()); } // Serializes the message without recomputing the size. The message must not // have changed since the last call to ByteSize(), and the value returned by // ByteSize must be non-negative. Otherwise the results are undefined. - virtual void SerializeWithCachedSizes( - io::CodedOutputStream* output) const; + void SerializeWithCachedSizes(io::CodedOutputStream* output) const { + output->SetCur(_InternalSerialize(output->Cur(), output->EpsCopy())); + } // Functions below here are not part of the public interface. It isn't // enforced, but they should be treated as private, and will be private @@ -352,7 +454,7 @@ class LIBPROTOBUF_EXPORT MessageLite { // must point at a byte array of at least ByteSize() bytes. Whether to use // deterministic serialization, e.g., maps in sorted order, is determined by // CodedOutputStream::IsDefaultSerializationDeterministic(). - virtual uint8* SerializeWithCachedSizesToArray(uint8* target) const; + uint8* SerializeWithCachedSizesToArray(uint8* target) const; // Returns the result of the last call to ByteSize(). An embedded message's // size is needed both to serialize it (because embedded messages are @@ -367,30 +469,48 @@ class LIBPROTOBUF_EXPORT MessageLite { // method.) virtual int GetCachedSize() const = 0; - virtual uint8* InternalSerializeWithCachedSizesToArray(bool deterministic, - uint8* target) const; - - protected: - // CastToBase allows generated code to cast a RepeatedPtrField to - // RepeatedPtrFieldBase. We try to restrict access to RepeatedPtrFieldBase - // because it is an implementation detail that user code should not access - // directly. - template - static ::google::protobuf::internal::RepeatedPtrFieldBase* CastToBase( - ::google::protobuf::RepeatedPtrField* repeated) { - return repeated; - } - template - static const ::google::protobuf::internal::RepeatedPtrFieldBase& CastToBase( - const ::google::protobuf::RepeatedPtrField& repeated) { - return repeated; + virtual const char* _InternalParse(const char* /*ptr*/, + internal::ParseContext* /*ctx*/) { + return nullptr; } + protected: template static T* CreateMaybeMessage(Arena* arena) { return Arena::CreateMaybeMessage(arena); } + inline explicit MessageLite(Arena* arena) : _internal_metadata_(arena) {} + + internal::InternalMetadata _internal_metadata_; + + public: + enum ParseFlags { + kMerge = 0, + kParse = 1, + kMergePartial = 2, + kParsePartial = 3, + kMergeWithAliasing = 4, + kParseWithAliasing = 5, + kMergePartialWithAliasing = 6, + kParsePartialWithAliasing = 7 + }; + + template + bool ParseFrom(const T& input); + + // Fast path when conditions match (ie. non-deterministic) + // uint8* _InternalSerialize(uint8* ptr) const; + virtual uint8* _InternalSerialize(uint8* ptr, + io::EpsCopyOutputStream* stream) const = 0; + + // Identical to IsInitialized() except that it logs an error message. + bool IsInitializedWithErrors() const { + if (IsInitialized()) return true; + LogInitializationErrorMessage(); + return false; + } + private: // TODO(gerbens) make this a pure abstract function virtual const void* InternalGetTable() const { return NULL; } @@ -399,26 +519,103 @@ class LIBPROTOBUF_EXPORT MessageLite { friend class Message; friend class internal::WeakFieldMap; + void LogInitializationErrorMessage() const; + + bool MergeFromImpl(io::CodedInputStream* input, ParseFlags parse_flags); + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MessageLite); }; namespace internal { -extern bool LIBPROTOBUF_EXPORT proto3_preserve_unknown_; +template +bool MergeFromImpl(StringPiece input, MessageLite* msg, + MessageLite::ParseFlags parse_flags); +extern template bool MergeFromImpl(StringPiece input, + MessageLite* msg, + MessageLite::ParseFlags parse_flags); +extern template bool MergeFromImpl(StringPiece input, + MessageLite* msg, + MessageLite::ParseFlags parse_flags); + +template +bool MergeFromImpl(io::ZeroCopyInputStream* input, MessageLite* msg, + MessageLite::ParseFlags parse_flags); +extern template bool MergeFromImpl(io::ZeroCopyInputStream* input, + MessageLite* msg, + MessageLite::ParseFlags parse_flags); +extern template bool MergeFromImpl(io::ZeroCopyInputStream* input, + MessageLite* msg, + MessageLite::ParseFlags parse_flags); + +struct BoundedZCIS { + io::ZeroCopyInputStream* zcis; + int limit; +}; + +template +bool MergeFromImpl(BoundedZCIS input, MessageLite* msg, + MessageLite::ParseFlags parse_flags); +extern template bool MergeFromImpl(BoundedZCIS input, MessageLite* msg, + MessageLite::ParseFlags parse_flags); +extern template bool MergeFromImpl(BoundedZCIS input, MessageLite* msg, + MessageLite::ParseFlags parse_flags); + +template +struct SourceWrapper; -// DO NOT USE: For migration only. Will be removed when Proto3 defaults to -// preserve unknowns. -inline bool GetProto3PreserveUnknownsDefault() { - return proto3_preserve_unknown_; +template +bool MergeFromImpl(const SourceWrapper& input, MessageLite* msg, + MessageLite::ParseFlags parse_flags) { + return input.template MergeInto(msg, parse_flags); } -// DO NOT USE: For migration only. Will be removed when Proto3 defaults to -// preserve unknowns. -void LIBPROTOBUF_EXPORT SetProto3PreserveUnknownsDefault(bool preserve); } // namespace internal +template +bool MessageLite::ParseFrom(const T& input) { + if (flags & kParse) Clear(); + constexpr bool alias = (flags & kMergeWithAliasing) != 0; + return internal::MergeFromImpl(input, this, flags); +} -} // namespace protobuf +// =================================================================== +// Shutdown support. + + +// Shut down the entire protocol buffers library, deleting all static-duration +// objects allocated by the library or by generated .pb.cc files. +// +// There are two reasons you might want to call this: +// * You use a draconian definition of "memory leak" in which you expect +// every single malloc() to have a corresponding free(), even for objects +// which live until program exit. +// * You are writing a dynamically-loaded library which needs to clean up +// after itself when the library is unloaded. +// +// It is safe to call this multiple times. However, it is not safe to use +// any other part of the protocol buffers library after +// ShutdownProtobufLibrary() has been called. Furthermore this call is not +// thread safe, user needs to synchronize multiple calls. +PROTOBUF_EXPORT void ShutdownProtobufLibrary(); + +namespace internal { +// Register a function to be called when ShutdownProtocolBuffers() is called. +PROTOBUF_EXPORT void OnShutdown(void (*func)()); +// Run an arbitrary function on an arg +PROTOBUF_EXPORT void OnShutdownRun(void (*f)(const void*), const void* arg); + +template +T* OnShutdownDelete(T* p) { + OnShutdownRun([](const void* pp) { delete static_cast(pp); }, p); + return p; +} + +} // namespace internal +} // namespace protobuf } // namespace google + +#include + #endif // GOOGLE_PROTOBUF_MESSAGE_LITE_H__ diff --git a/third_party/protobuf-lite/google/protobuf/metadata_lite.h b/third_party/protobuf-lite/google/protobuf/metadata_lite.h index 454d088c..ec5f9238 100644 --- a/third_party/protobuf-lite/google/protobuf/metadata_lite.h +++ b/third_party/protobuf-lite/google/protobuf/metadata_lite.h @@ -31,10 +31,16 @@ #ifndef GOOGLE_PROTOBUF_METADATA_LITE_H__ #define GOOGLE_PROTOBUF_METADATA_LITE_H__ +#include #include #include -#include -#include +#include + +#include + +#ifdef SWIG +#error "You cannot SWIG proto headers" +#endif namespace google { namespace protobuf { @@ -50,48 +56,54 @@ namespace internal { // The tagged pointer uses the LSB to disambiguate cases, and uses bit 0 == 0 to // indicate an arena pointer and bit 0 == 1 to indicate a UFS+Arena-container // pointer. -template -class InternalMetadataWithArenaBase { +class InternalMetadata { public: - InternalMetadataWithArenaBase() : ptr_(NULL) {} - explicit InternalMetadataWithArenaBase(Arena* arena) : ptr_(arena) {} + constexpr InternalMetadata() : ptr_(nullptr) {} + explicit InternalMetadata(Arena* arena) : ptr_(arena) {} - ~InternalMetadataWithArenaBase() { + template + void Delete() { + // Note that Delete<> should be called not more than once. if (have_unknown_fields() && arena() == NULL) { - delete PtrValue(); + delete PtrValue>(); } - ptr_ = NULL; } - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE const T& unknown_fields() const { - if (GOOGLE_PREDICT_FALSE(have_unknown_fields())) { - return PtrValue()->unknown_fields; + PROTOBUF_ALWAYS_INLINE Arena* arena() const { + if (PROTOBUF_PREDICT_FALSE(have_unknown_fields())) { + return PtrValue()->arena; } else { - return Derived::default_instance(); + return PtrValue(); } } - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE T* mutable_unknown_fields() { - if (GOOGLE_PREDICT_TRUE(have_unknown_fields())) { - return &PtrValue()->unknown_fields; - } else { - return mutable_unknown_fields_slow(); - } + PROTOBUF_ALWAYS_INLINE bool have_unknown_fields() const { + return PtrTag() == kTagContainer; } - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE Arena* arena() const { - if (GOOGLE_PREDICT_FALSE(have_unknown_fields())) { - return PtrValue()->arena; + PROTOBUF_ALWAYS_INLINE void* raw_arena_ptr() const { return ptr_; } + + template + PROTOBUF_ALWAYS_INLINE const T& unknown_fields( + const T& (*default_instance)()) const { + if (PROTOBUF_PREDICT_FALSE(have_unknown_fields())) { + return PtrValue>()->unknown_fields; } else { - return PtrValue(); + return default_instance(); } } - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE bool have_unknown_fields() const { - return PtrTag() == kTagContainer; + template + PROTOBUF_ALWAYS_INLINE T* mutable_unknown_fields() { + if (PROTOBUF_PREDICT_TRUE(have_unknown_fields())) { + return &PtrValue>()->unknown_fields; + } else { + return mutable_unknown_fields_slow(); + } } - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE void Swap(Derived* other) { + template + PROTOBUF_ALWAYS_INLINE void Swap(InternalMetadata* other) { // Semantics here are that we swap only the unknown fields, not the arena // pointer. We cannot simply swap ptr_ with other->ptr_ because we need to // maintain our own arena ptr. Also, our ptr_ and other's ptr_ may be in @@ -99,26 +111,24 @@ class InternalMetadataWithArenaBase { // cannot simply swap ptr_ and then restore the arena pointers. We reuse // UFS's swap implementation instead. if (have_unknown_fields() || other->have_unknown_fields()) { - static_cast(this)->DoSwap(other->mutable_unknown_fields()); + DoSwap(other->mutable_unknown_fields()); } } - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE void MergeFrom(const Derived& other) { + template + PROTOBUF_ALWAYS_INLINE void MergeFrom(const InternalMetadata& other) { if (other.have_unknown_fields()) { - static_cast(this)->DoMergeFrom(other.unknown_fields()); + DoMergeFrom(other.unknown_fields(nullptr)); } } - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE void Clear() { + template + PROTOBUF_ALWAYS_INLINE void Clear() { if (have_unknown_fields()) { - static_cast(this)->DoClear(); + DoClear(); } } - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE void* raw_arena_ptr() const { - return ptr_; - } - private: void* ptr_; @@ -129,96 +139,110 @@ class InternalMetadataWithArenaBase { // ptr_ is a Container*. kTagContainer = 1, }; - static const intptr_t kPtrTagMask = 1; - static const intptr_t kPtrValueMask = ~kPtrTagMask; + static constexpr intptr_t kPtrTagMask = 1; + static constexpr intptr_t kPtrValueMask = ~kPtrTagMask; // Accessors for pointer tag and pointer value. - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE int PtrTag() const { + PROTOBUF_ALWAYS_INLINE int PtrTag() const { return reinterpret_cast(ptr_) & kPtrTagMask; } - template U* PtrValue() const { - return reinterpret_cast( - reinterpret_cast(ptr_) & kPtrValueMask); + template + U* PtrValue() const { + return reinterpret_cast(reinterpret_cast(ptr_) & + kPtrValueMask); } // If ptr_'s tag is kTagContainer, it points to an instance of this struct. - struct Container { - T unknown_fields; + struct ContainerBase { Arena* arena; }; - GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE T* mutable_unknown_fields_slow() { + template + struct Container : public ContainerBase { + T unknown_fields; + }; + + template + PROTOBUF_NOINLINE T* mutable_unknown_fields_slow() { Arena* my_arena = arena(); - Container* container = Arena::Create(my_arena); + Container* container = Arena::Create>(my_arena); // Two-step assignment works around a bug in clang's static analyzer: // https://bugs.llvm.org/show_bug.cgi?id=34198. ptr_ = container; - ptr_ = reinterpret_cast( - reinterpret_cast(ptr_) | kTagContainer); + ptr_ = reinterpret_cast(reinterpret_cast(ptr_) | + kTagContainer); container->arena = my_arena; return &(container->unknown_fields); } -}; - -// We store unknown fields as a string right now, because there is currently no -// good interface for reading unknown fields into an ArenaString. We may want -// to revisit this to allow unknown fields to be parsed onto the Arena. -class InternalMetadataWithArenaLite - : public InternalMetadataWithArenaBase { - public: - InternalMetadataWithArenaLite() {} - explicit InternalMetadataWithArenaLite(Arena* arena) - : InternalMetadataWithArenaBase(arena) {} + // Templated functions. - void DoSwap(string* other) { - mutable_unknown_fields()->swap(*other); - } - - void DoMergeFrom(const string& other) { - mutable_unknown_fields()->append(other); + template + void DoClear() { + mutable_unknown_fields()->Clear(); } - void DoClear() { - mutable_unknown_fields()->clear(); + template + void DoMergeFrom(const T& other) { + mutable_unknown_fields()->MergeFrom(other); } - static const string& default_instance() { - return GetEmptyStringAlreadyInited(); + template + void DoSwap(T* other) { + mutable_unknown_fields()->Swap(other); } }; +// String Template specializations. + +template <> +inline void InternalMetadata::DoClear() { + mutable_unknown_fields()->clear(); +} + +template <> +inline void InternalMetadata::DoMergeFrom( + const std::string& other) { + mutable_unknown_fields()->append(other); +} + +template <> +inline void InternalMetadata::DoSwap(std::string* other) { + mutable_unknown_fields()->swap(*other); +} + // This helper RAII class is needed to efficiently parse unknown fields. We // should only call mutable_unknown_fields if there are actual unknown fields. -// The obvious thing to just use a stack string and swap it at the end of the -// parse won't work, because the destructor of StringOutputStream needs to be -// called before we can modify the string (it check-fails). Using +// The obvious thing to just use a stack string and swap it at the end of +// the parse won't work, because the destructor of StringOutputStream needs to +// be called before we can modify the string (it check-fails). Using // LiteUnknownFieldSetter setter(&_internal_metadata_); // StringOutputStream stream(setter.buffer()); // guarantees that the string is only swapped after stream is destroyed. -class LIBPROTOBUF_EXPORT LiteUnknownFieldSetter { +class PROTOBUF_EXPORT LiteUnknownFieldSetter { public: - explicit LiteUnknownFieldSetter(InternalMetadataWithArenaLite* metadata) + explicit LiteUnknownFieldSetter(InternalMetadata* metadata) : metadata_(metadata) { if (metadata->have_unknown_fields()) { - buffer_.swap(*metadata->mutable_unknown_fields()); + buffer_.swap(*metadata->mutable_unknown_fields()); } } ~LiteUnknownFieldSetter() { - if (!buffer_.empty()) metadata_->mutable_unknown_fields()->swap(buffer_); + if (!buffer_.empty()) + metadata_->mutable_unknown_fields()->swap(buffer_); } - string* buffer() { return &buffer_; } + std::string* buffer() { return &buffer_; } private: - InternalMetadataWithArenaLite* metadata_; - string buffer_; + InternalMetadata* metadata_; + std::string buffer_; }; } // namespace internal } // namespace protobuf - } // namespace google + +#include + #endif // GOOGLE_PROTOBUF_METADATA_LITE_H__ diff --git a/third_party/protobuf-lite/google/protobuf/parse_context.h b/third_party/protobuf-lite/google/protobuf/parse_context.h new file mode 100644 index 00000000..66100858 --- /dev/null +++ b/third_party/protobuf-lite/google/protobuf/parse_context.h @@ -0,0 +1,869 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef GOOGLE_PROTOBUF_PARSE_CONTEXT_H__ +#define GOOGLE_PROTOBUF_PARSE_CONTEXT_H__ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +namespace google { +namespace protobuf { + +class UnknownFieldSet; +class DescriptorPool; +class MessageFactory; + +namespace internal { + +// Template code below needs to know about the existence of these functions. +PROTOBUF_EXPORT void WriteVarint(uint32 num, uint64 val, std::string* s); +PROTOBUF_EXPORT void WriteLengthDelimited(uint32 num, StringPiece val, + std::string* s); +// Inline because it is just forwarding to s->WriteVarint +inline void WriteVarint(uint32 num, uint64 val, UnknownFieldSet* s); +inline void WriteLengthDelimited(uint32 num, StringPiece val, + UnknownFieldSet* s); + + +// The basic abstraction the parser is designed for is a slight modification +// of the ZeroCopyInputStream (ZCIS) abstraction. A ZCIS presents a serialized +// stream as a series of buffers that concatenate to the full stream. +// Pictorially a ZCIS presents a stream in chunks like so +// [---------------------------------------------------------------] +// [---------------------] chunk 1 +// [----------------------------] chunk 2 +// chunk 3 [--------------] +// +// Where the '-' represent the bytes which are vertically lined up with the +// bytes of the stream. The proto parser requires its input to be presented +// similarly with the extra +// property that each chunk has kSlopBytes past its end that overlaps with the +// first kSlopBytes of the next chunk, or if there is no next chunk at least its +// still valid to read those bytes. Again, pictorially, we now have +// +// [---------------------------------------------------------------] +// [-------------------....] chunk 1 +// [------------------------....] chunk 2 +// chunk 3 [------------------..**] +// chunk 4 [--****] +// Here '-' mean the bytes of the stream or chunk and '.' means bytes past the +// chunk that match up with the start of the next chunk. Above each chunk has +// 4 '.' after the chunk. In the case these 'overflow' bytes represents bytes +// past the stream, indicated by '*' above, their values are unspecified. It is +// still legal to read them (ie. should not segfault). Reading past the +// end should be detected by the user and indicated as an error. +// +// The reason for this, admittedly, unconventional invariant is to ruthlessly +// optimize the protobuf parser. Having an overlap helps in two important ways. +// Firstly it alleviates having to performing bounds checks if a piece of code +// is guaranteed to not read more than kSlopBytes. Secondly, and more +// importantly, the protobuf wireformat is such that reading a key/value pair is +// always less than 16 bytes. This removes the need to change to next buffer in +// the middle of reading primitive values. Hence there is no need to store and +// load the current position. + +class PROTOBUF_EXPORT EpsCopyInputStream { + public: + enum { kSlopBytes = 16, kMaxCordBytesToCopy = 512 }; + + explicit EpsCopyInputStream(bool enable_aliasing) + : aliasing_(enable_aliasing ? kOnPatch : kNoAliasing) {} + + void BackUp(const char* ptr) { + GOOGLE_DCHECK(ptr <= buffer_end_ + kSlopBytes); + int count; + if (next_chunk_ == buffer_) { + count = static_cast(buffer_end_ + kSlopBytes - ptr); + } else { + count = size_ + static_cast(buffer_end_ - ptr); + } + if (count > 0) StreamBackUp(count); + } + + // If return value is negative it's an error + PROTOBUF_MUST_USE_RESULT int PushLimit(const char* ptr, int limit) { + GOOGLE_DCHECK(limit >= 0 && limit <= INT_MAX - kSlopBytes); + // This add is safe due to the invariant above, because + // ptr - buffer_end_ <= kSlopBytes. + limit += static_cast(ptr - buffer_end_); + limit_end_ = buffer_end_ + (std::min)(0, limit); + auto old_limit = limit_; + limit_ = limit; + return old_limit - limit; + } + + PROTOBUF_MUST_USE_RESULT bool PopLimit(int delta) { + if (PROTOBUF_PREDICT_FALSE(!EndedAtLimit())) return false; + limit_ = limit_ + delta; + // TODO(gerbens) We could remove this line and hoist the code to + // DoneFallback. Study the perf/bin-size effects. + limit_end_ = buffer_end_ + (std::min)(0, limit_); + return true; + } + + PROTOBUF_MUST_USE_RESULT const char* Skip(const char* ptr, int size) { + if (size <= buffer_end_ + kSlopBytes - ptr) { + return ptr + size; + } + return SkipFallback(ptr, size); + } + PROTOBUF_MUST_USE_RESULT const char* ReadString(const char* ptr, int size, + std::string* s) { + if (size <= buffer_end_ + kSlopBytes - ptr) { + s->assign(ptr, size); + return ptr + size; + } + return ReadStringFallback(ptr, size, s); + } + PROTOBUF_MUST_USE_RESULT const char* AppendString(const char* ptr, int size, + std::string* s) { + if (size <= buffer_end_ + kSlopBytes - ptr) { + s->append(ptr, size); + return ptr + size; + } + return AppendStringFallback(ptr, size, s); + } + + template + PROTOBUF_MUST_USE_RESULT const char* ReadRepeatedFixed(const char* ptr, + Tag expected_tag, + RepeatedField* out); + + template + PROTOBUF_MUST_USE_RESULT const char* ReadPackedFixed(const char* ptr, + int size, + RepeatedField* out); + template + PROTOBUF_MUST_USE_RESULT const char* ReadPackedVarint(const char* ptr, + Add add); + + uint32 LastTag() const { return last_tag_minus_1_ + 1; } + bool ConsumeEndGroup(uint32 start_tag) { + bool res = last_tag_minus_1_ == start_tag; + last_tag_minus_1_ = 0; + return res; + } + bool EndedAtLimit() const { return last_tag_minus_1_ == 0; } + bool EndedAtEndOfStream() const { return last_tag_minus_1_ == 1; } + void SetLastTag(uint32 tag) { last_tag_minus_1_ = tag - 1; } + void SetEndOfStream() { last_tag_minus_1_ = 1; } + bool IsExceedingLimit(const char* ptr) { + return ptr > limit_end_ && + (next_chunk_ == nullptr || ptr - buffer_end_ > limit_); + } + int BytesUntilLimit(const char* ptr) const { + return limit_ + static_cast(buffer_end_ - ptr); + } + // Returns true if more data is available, if false is returned one has to + // call Done for further checks. + bool DataAvailable(const char* ptr) { return ptr < limit_end_; } + + protected: + // Returns true is limit (either an explicit limit or end of stream) is + // reached. It aligns *ptr across buffer seams. + // If limit is exceeded it returns true and ptr is set to null. + bool DoneWithCheck(const char** ptr, int d) { + GOOGLE_DCHECK(*ptr); + if (PROTOBUF_PREDICT_TRUE(*ptr < limit_end_)) return false; + int overrun = *ptr - buffer_end_; + GOOGLE_DCHECK_LE(overrun, kSlopBytes); // Guaranteed by parse loop. + if (overrun == + limit_) { // No need to flip buffers if we ended on a limit. + // If we actually overrun the buffer and next_chunk_ is null. It means + // the stream ended and we passed the stream end. + if (overrun > 0 && next_chunk_ == nullptr) *ptr = nullptr; + return true; + } + auto res = DoneFallback(overrun, d); + *ptr = res.first; + return res.second; + } + + const char* InitFrom(StringPiece flat) { + overall_limit_ = 0; + if (flat.size() > kSlopBytes) { + limit_ = kSlopBytes; + limit_end_ = buffer_end_ = flat.data() + flat.size() - kSlopBytes; + next_chunk_ = buffer_; + if (aliasing_ == kOnPatch) aliasing_ = kNoDelta; + return flat.data(); + } else { + std::memcpy(buffer_, flat.data(), flat.size()); + limit_ = 0; + limit_end_ = buffer_end_ = buffer_ + flat.size(); + next_chunk_ = nullptr; + if (aliasing_ == kOnPatch) { + aliasing_ = reinterpret_cast(flat.data()) - + reinterpret_cast(buffer_); + } + return buffer_; + } + } + + const char* InitFrom(io::ZeroCopyInputStream* zcis); + + const char* InitFrom(io::ZeroCopyInputStream* zcis, int limit) { + if (limit == -1) return InitFrom(zcis); + overall_limit_ = limit; + auto res = InitFrom(zcis); + limit_ = limit - static_cast(buffer_end_ - res); + limit_end_ = buffer_end_ + (std::min)(0, limit_); + return res; + } + + private: + const char* limit_end_; // buffer_end_ + min(limit_, 0) + const char* buffer_end_; + const char* next_chunk_; + int size_; + int limit_; // relative to buffer_end_; + io::ZeroCopyInputStream* zcis_ = nullptr; + char buffer_[2 * kSlopBytes] = {}; + enum { kNoAliasing = 0, kOnPatch = 1, kNoDelta = 2 }; + std::uintptr_t aliasing_ = kNoAliasing; + // This variable is used to communicate how the parse ended, in order to + // completely verify the parsed data. A wire-format parse can end because of + // one of the following conditions: + // 1) A parse can end on a pushed limit. + // 2) A parse can end on End Of Stream (EOS). + // 3) A parse can end on 0 tag (only valid for toplevel message). + // 4) A parse can end on an end-group tag. + // This variable should always be set to 0, which indicates case 1. If the + // parse terminated due to EOS (case 2), it's set to 1. In case the parse + // ended due to a terminating tag (case 3 and 4) it's set to (tag - 1). + // This var doesn't really belong in EpsCopyInputStream and should be part of + // the ParseContext, but case 2 is most easily and optimally implemented in + // DoneFallback. + uint32 last_tag_minus_1_ = 0; + int overall_limit_ = INT_MAX; // Overall limit independent of pushed limits. + // Pretty random large number that seems like a safe allocation on most + // systems. TODO(gerbens) do we need to set this as build flag? + enum { kSafeStringSize = 50000000 }; + + // Advances to next buffer chunk returns a pointer to the same logical place + // in the stream as set by overrun. Overrun indicates the position in the slop + // region the parse was left (0 <= overrun <= kSlopBytes). Returns true if at + // limit, at which point the returned pointer maybe null if there was an + // error. The invariant of this function is that it's guaranteed that + // kSlopBytes bytes can be accessed from the returned ptr. This function might + // advance more buffers than one in the underlying ZeroCopyInputStream. + std::pair DoneFallback(int overrun, int depth); + // Advances to the next buffer, at most one call to Next() on the underlying + // ZeroCopyInputStream is made. This function DOES NOT match the returned + // pointer to where in the slop region the parse ends, hence no overrun + // parameter. This is useful for string operations where you always copy + // to the end of the buffer (including the slop region). + const char* Next(); + // overrun is the location in the slop region the stream currently is + // (0 <= overrun <= kSlopBytes). To prevent flipping to the next buffer of + // the ZeroCopyInputStream in the case the parse will end in the last + // kSlopBytes of the current buffer. depth is the current depth of nested + // groups (or negative if the use case does not need careful tracking). + inline const char* NextBuffer(int overrun, int depth); + const char* SkipFallback(const char* ptr, int size); + const char* AppendStringFallback(const char* ptr, int size, std::string* str); + const char* ReadStringFallback(const char* ptr, int size, std::string* str); + bool StreamNext(const void** data) { + bool res = zcis_->Next(data, &size_); + if (res) overall_limit_ -= size_; + return res; + } + void StreamBackUp(int count) { + zcis_->BackUp(count); + overall_limit_ += count; + } + + template + const char* AppendSize(const char* ptr, int size, const A& append) { + int chunk_size = buffer_end_ + kSlopBytes - ptr; + do { + GOOGLE_DCHECK(size > chunk_size); + if (next_chunk_ == nullptr) return nullptr; + append(ptr, chunk_size); + ptr += chunk_size; + size -= chunk_size; + // TODO(gerbens) Next calls NextBuffer which generates buffers with + // overlap and thus incurs cost of copying the slop regions. This is not + // necessary for reading strings. We should just call Next buffers. + if (limit_ <= kSlopBytes) return nullptr; + ptr = Next(); + if (ptr == nullptr) return nullptr; // passed the limit + ptr += kSlopBytes; + chunk_size = buffer_end_ + kSlopBytes - ptr; + } while (size > chunk_size); + append(ptr, size); + return ptr + size; + } + + // AppendUntilEnd appends data until a limit (either a PushLimit or end of + // stream. Normal payloads are from length delimited fields which have an + // explicit size. Reading until limit only comes when the string takes + // the place of a protobuf, ie RawMessage/StringRawMessage, lazy fields and + // implicit weak messages. We keep these methods private and friend them. + template + const char* AppendUntilEnd(const char* ptr, const A& append) { + if (ptr - buffer_end_ > limit_) return nullptr; + while (limit_ > kSlopBytes) { + int chunk_size = buffer_end_ + kSlopBytes - ptr; + GOOGLE_DCHECK_GE(chunk_size, 0); + append(ptr, chunk_size); + ptr = Next(); + if (ptr == nullptr) return limit_end_; + ptr += kSlopBytes; + } + auto end = buffer_end_ + limit_; + GOOGLE_DCHECK(end >= ptr); + append(ptr, end - ptr); + return end; + } + + PROTOBUF_MUST_USE_RESULT const char* AppendString(const char* ptr, + std::string* str) { + return AppendUntilEnd( + ptr, [str](const char* p, ptrdiff_t s) { str->append(p, s); }); + } + friend class ImplicitWeakMessage; +}; + +// ParseContext holds all data that is global to the entire parse. Most +// importantly it contains the input stream, but also recursion depth and also +// stores the end group tag, in case a parser ended on a endgroup, to verify +// matching start/end group tags. +class PROTOBUF_EXPORT ParseContext : public EpsCopyInputStream { + public: + struct Data { + const DescriptorPool* pool = nullptr; + MessageFactory* factory = nullptr; + }; + + template + ParseContext(int depth, bool aliasing, const char** start, T&&... args) + : EpsCopyInputStream(aliasing), depth_(depth) { + *start = InitFrom(std::forward(args)...); + } + + void TrackCorrectEnding() { group_depth_ = 0; } + + bool Done(const char** ptr) { return DoneWithCheck(ptr, group_depth_); } + + int depth() const { return depth_; } + + Data& data() { return data_; } + const Data& data() const { return data_; } + + template + PROTOBUF_MUST_USE_RESULT const char* ParseMessage(T* msg, const char* ptr); + // We outline when the type is generic and we go through a virtual + const char* ParseMessage(MessageLite* msg, const char* ptr); + const char* ParseMessage(Message* msg, const char* ptr); + + template + PROTOBUF_MUST_USE_RESULT PROTOBUF_ALWAYS_INLINE const char* ParseGroup( + T* msg, const char* ptr, uint32 tag) { + if (--depth_ < 0) return nullptr; + group_depth_++; + ptr = msg->_InternalParse(ptr, this); + group_depth_--; + depth_++; + if (PROTOBUF_PREDICT_FALSE(!ConsumeEndGroup(tag))) return nullptr; + return ptr; + } + + private: + // The context keeps an internal stack to keep track of the recursive + // part of the parse state. + // Current depth of the active parser, depth counts down. + // This is used to limit recursion depth (to prevent overflow on malicious + // data), but is also used to index in stack_ to store the current state. + int depth_; + // Unfortunately necessary for the fringe case of ending on 0 or end-group tag + // in the last kSlopBytes of a ZeroCopyInputStream chunk. + int group_depth_ = INT_MIN; + Data data_; +}; + +template +bool ExpectTag(const char* ptr) { + if (tag < 128) { + return *ptr == tag; + } else { + static_assert(tag < 128 * 128, "We only expect tags for 1 or 2 bytes"); + char buf[2] = {static_cast(tag | 0x80), static_cast(tag >> 7)}; + return std::memcmp(ptr, buf, 2) == 0; + } +} + +template +struct EndianHelper; + +template <> +struct EndianHelper<1> { + static uint8 Load(const void* p) { return *static_cast(p); } +}; + +template <> +struct EndianHelper<2> { + static uint16 Load(const void* p) { + uint16 tmp; + std::memcpy(&tmp, p, 2); +#ifndef PROTOBUF_LITTLE_ENDIAN + tmp = bswap_16(tmp); +#endif + return tmp; + } +}; + +template <> +struct EndianHelper<4> { + static uint32 Load(const void* p) { + uint32 tmp; + std::memcpy(&tmp, p, 4); +#ifndef PROTOBUF_LITTLE_ENDIAN + tmp = bswap_32(tmp); +#endif + return tmp; + } +}; + +template <> +struct EndianHelper<8> { + static uint64 Load(const void* p) { + uint64 tmp; + std::memcpy(&tmp, p, 8); +#ifndef PROTOBUF_LITTLE_ENDIAN + tmp = bswap_64(tmp); +#endif + return tmp; + } +}; + +template +T UnalignedLoad(const char* p) { + auto tmp = EndianHelper::Load(p); + T res; + memcpy(&res, &tmp, sizeof(T)); + return res; +} + +PROTOBUF_EXPORT +std::pair VarintParseSlow32(const char* p, uint32 res); +PROTOBUF_EXPORT +std::pair VarintParseSlow64(const char* p, uint32 res); + +inline const char* VarintParseSlow(const char* p, uint32 res, uint32* out) { + auto tmp = VarintParseSlow32(p, res); + *out = tmp.second; + return tmp.first; +} + +inline const char* VarintParseSlow(const char* p, uint32 res, uint64* out) { + auto tmp = VarintParseSlow64(p, res); + *out = tmp.second; + return tmp.first; +} + +template +PROTOBUF_MUST_USE_RESULT const char* VarintParse(const char* p, T* out) { + auto ptr = reinterpret_cast(p); + uint32 res = ptr[0]; + if (!(res & 0x80)) { + *out = res; + return p + 1; + } + uint32 byte = ptr[1]; + res += (byte - 1) << 7; + if (!(byte & 0x80)) { + *out = res; + return p + 2; + } + return VarintParseSlow(p, res, out); +} + +// Used for tags, could read up to 5 bytes which must be available. +// Caller must ensure its safe to call. + +PROTOBUF_EXPORT +std::pair ReadTagFallback(const char* p, uint32 res); + +// Same as ParseVarint but only accept 5 bytes at most. +inline const char* ReadTag(const char* p, uint32* out, uint32 /*max_tag*/ = 0) { + uint32 res = static_cast(p[0]); + if (res < 128) { + *out = res; + return p + 1; + } + uint32 second = static_cast(p[1]); + res += (second - 1) << 7; + if (second < 128) { + *out = res; + return p + 2; + } + auto tmp = ReadTagFallback(p, res); + *out = tmp.second; + return tmp.first; +} + +// Decode 2 consecutive bytes of a varint and returns the value, shifted left +// by 1. It simultaneous updates *ptr to *ptr + 1 or *ptr + 2 depending if the +// first byte's continuation bit is set. +// If bit 15 of return value is set (equivalent to the continuation bits of both +// bytes being set) the varint continues, otherwise the parse is done. On x86 +// movsx eax, dil +// add edi, eax +// adc [rsi], 1 +// add eax, eax +// and eax, edi +inline uint32 DecodeTwoBytes(const char** ptr) { + uint32 value = UnalignedLoad(*ptr); + // Sign extend the low byte continuation bit + uint32_t x = static_cast(value); + // This add is an amazing operation, it cancels the low byte continuation bit + // from y transferring it to the carry. Simultaneously it also shifts the 7 + // LSB left by one tightly against high byte varint bits. Hence value now + // contains the unpacked value shifted left by 1. + value += x; + // Use the carry to update the ptr appropriately. + *ptr += value < x ? 2 : 1; + return value & (x + x); // Mask out the high byte iff no continuation +} + +// More efficient varint parsing for big varints +inline const char* ParseBigVarint(const char* p, uint64* out) { + auto pnew = p; + auto tmp = DecodeTwoBytes(&pnew); + uint64 res = tmp >> 1; + if (PROTOBUF_PREDICT_TRUE(std::int16_t(tmp) >= 0)) { + *out = res; + return pnew; + } + for (std::uint32_t i = 1; i < 5; i++) { + pnew = p + 2 * i; + tmp = DecodeTwoBytes(&pnew); + res += (static_cast(tmp) - 2) << (14 * i - 1); + if (PROTOBUF_PREDICT_TRUE(std::int16_t(tmp) >= 0)) { + *out = res; + return pnew; + } + } + return nullptr; +} + +PROTOBUF_EXPORT +std::pair ReadSizeFallback(const char* p, uint32 first); +// Used for tags, could read up to 5 bytes which must be available. Additionally +// it makes sure the unsigned value fits a int32, otherwise returns nullptr. +// Caller must ensure its safe to call. +inline uint32 ReadSize(const char** pp) { + auto p = *pp; + uint32 res = static_cast(p[0]); + if (res < 128) { + *pp = p + 1; + return res; + } + auto x = ReadSizeFallback(p, res); + *pp = x.first; + return x.second; +} + +// Some convenience functions to simplify the generated parse loop code. +// Returning the value and updating the buffer pointer allows for nicer +// function composition. We rely on the compiler to inline this. +// Also in debug compiles having local scoped variables tend to generated +// stack frames that scale as O(num fields). +inline uint64 ReadVarint64(const char** p) { + uint64 tmp; + *p = VarintParse(*p, &tmp); + return tmp; +} + +inline uint32 ReadVarint32(const char** p) { + uint32 tmp; + *p = VarintParse(*p, &tmp); + return tmp; +} + +inline int64 ReadVarintZigZag64(const char** p) { + uint64 tmp; + *p = VarintParse(*p, &tmp); + return WireFormatLite::ZigZagDecode64(tmp); +} + +inline int32 ReadVarintZigZag32(const char** p) { + uint64 tmp; + *p = VarintParse(*p, &tmp); + return WireFormatLite::ZigZagDecode32(static_cast(tmp)); +} + +template +PROTOBUF_MUST_USE_RESULT const char* ParseContext::ParseMessage( + T* msg, const char* ptr) { + int size = ReadSize(&ptr); + if (!ptr) return nullptr; + auto old = PushLimit(ptr, size); + if (--depth_ < 0) return nullptr; + ptr = msg->_InternalParse(ptr, this); + if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr; + depth_++; + if (!PopLimit(old)) return nullptr; + return ptr; +} + +template +const char* ReadPackedVarintArray(const char* ptr, const char* end, Add add) { + while (ptr < end) { + uint64 varint; + ptr = VarintParse(ptr, &varint); + if (ptr == nullptr) return nullptr; + add(varint); + } + return ptr; +} + +template +const char* EpsCopyInputStream::ReadPackedVarint(const char* ptr, Add add) { + int size = ReadSize(&ptr); + if (ptr == nullptr) return nullptr; + int chunk_size = buffer_end_ - ptr; + while (size > chunk_size) { + ptr = ReadPackedVarintArray(ptr, buffer_end_, add); + if (ptr == nullptr) return nullptr; + int overrun = ptr - buffer_end_; + GOOGLE_DCHECK(overrun >= 0 && overrun <= kSlopBytes); + if (size - chunk_size <= kSlopBytes) { + // The current buffer contains all the information needed, we don't need + // to flip buffers. However we must parse from a buffer with enough space + // so we are not prone to a buffer overflow. + char buf[kSlopBytes + 10] = {}; + std::memcpy(buf, buffer_end_, kSlopBytes); + GOOGLE_CHECK_LE(size - chunk_size, kSlopBytes); + auto end = buf + (size - chunk_size); + auto res = ReadPackedVarintArray(buf + overrun, end, add); + if (res == nullptr || res != end) return nullptr; + return buffer_end_ + (res - buf); + } + size -= overrun + chunk_size; + GOOGLE_DCHECK_GT(size, 0); + // We must flip buffers + if (limit_ <= kSlopBytes) return nullptr; + ptr = Next(); + if (ptr == nullptr) return nullptr; + ptr += overrun; + chunk_size = buffer_end_ - ptr; + } + auto end = ptr + size; + ptr = ReadPackedVarintArray(ptr, end, add); + return end == ptr ? ptr : nullptr; +} + +// Helper for verification of utf8 +PROTOBUF_EXPORT +bool VerifyUTF8(StringPiece s, const char* field_name); + +inline bool VerifyUTF8(const std::string* s, const char* field_name) { + return VerifyUTF8(*s, field_name); +} + +// All the string parsers with or without UTF checking and for all CTypes. +PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* InlineGreedyStringParser( + std::string* s, const char* ptr, ParseContext* ctx); + + +// Add any of the following lines to debug which parse function is failing. + +#define GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, ret) \ + if (!(predicate)) { \ + /* ::raise(SIGINT); */ \ + /* GOOGLE_LOG(ERROR) << "Parse failure"; */ \ + return ret; \ + } + +#define GOOGLE_PROTOBUF_PARSER_ASSERT(predicate) \ + GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, nullptr) + +template +PROTOBUF_MUST_USE_RESULT const char* FieldParser(uint64 tag, T& field_parser, + const char* ptr, + ParseContext* ctx) { + uint32 number = tag >> 3; + GOOGLE_PROTOBUF_PARSER_ASSERT(number != 0); + using WireType = internal::WireFormatLite::WireType; + switch (tag & 7) { + case WireType::WIRETYPE_VARINT: { + uint64 value; + ptr = VarintParse(ptr, &value); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); + field_parser.AddVarint(number, value); + break; + } + case WireType::WIRETYPE_FIXED64: { + uint64 value = UnalignedLoad(ptr); + ptr += 8; + field_parser.AddFixed64(number, value); + break; + } + case WireType::WIRETYPE_LENGTH_DELIMITED: { + ptr = field_parser.ParseLengthDelimited(number, ptr, ctx); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); + break; + } + case WireType::WIRETYPE_START_GROUP: { + ptr = field_parser.ParseGroup(number, ptr, ctx); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); + break; + } + case WireType::WIRETYPE_END_GROUP: { + GOOGLE_LOG(FATAL) << "Can't happen"; + break; + } + case WireType::WIRETYPE_FIXED32: { + uint32 value = UnalignedLoad(ptr); + ptr += 4; + field_parser.AddFixed32(number, value); + break; + } + default: + return nullptr; + } + return ptr; +} + +template +PROTOBUF_MUST_USE_RESULT const char* WireFormatParser(T& field_parser, + const char* ptr, + ParseContext* ctx) { + while (!ctx->Done(&ptr)) { + uint32 tag; + ptr = ReadTag(ptr, &tag); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr); + if (tag == 0 || (tag & 7) == 4) { + ctx->SetLastTag(tag); + return ptr; + } + ptr = FieldParser(tag, field_parser, ptr, ctx); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr); + } + return ptr; +} + +// The packed parsers parse repeated numeric primitives directly into the +// corresponding field + +// These are packed varints +PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedInt32Parser( + void* object, const char* ptr, ParseContext* ctx); +PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedUInt32Parser( + void* object, const char* ptr, ParseContext* ctx); +PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedInt64Parser( + void* object, const char* ptr, ParseContext* ctx); +PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedUInt64Parser( + void* object, const char* ptr, ParseContext* ctx); +PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSInt32Parser( + void* object, const char* ptr, ParseContext* ctx); +PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSInt64Parser( + void* object, const char* ptr, ParseContext* ctx); +PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedEnumParser( + void* object, const char* ptr, ParseContext* ctx); + +template +PROTOBUF_MUST_USE_RESULT const char* PackedEnumParser( + void* object, const char* ptr, ParseContext* ctx, bool (*is_valid)(int), + InternalMetadata* metadata, int field_num) { + return ctx->ReadPackedVarint( + ptr, [object, is_valid, metadata, field_num](uint64 val) { + if (is_valid(val)) { + static_cast*>(object)->Add(val); + } else { + WriteVarint(field_num, val, metadata->mutable_unknown_fields()); + } + }); +} + +template +PROTOBUF_MUST_USE_RESULT const char* PackedEnumParserArg( + void* object, const char* ptr, ParseContext* ctx, + bool (*is_valid)(const void*, int), const void* data, + InternalMetadata* metadata, int field_num) { + return ctx->ReadPackedVarint( + ptr, [object, is_valid, data, metadata, field_num](uint64 val) { + if (is_valid(data, val)) { + static_cast*>(object)->Add(val); + } else { + WriteVarint(field_num, val, metadata->mutable_unknown_fields()); + } + }); +} + +PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedBoolParser( + void* object, const char* ptr, ParseContext* ctx); +PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFixed32Parser( + void* object, const char* ptr, ParseContext* ctx); +PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSFixed32Parser( + void* object, const char* ptr, ParseContext* ctx); +PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFixed64Parser( + void* object, const char* ptr, ParseContext* ctx); +PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSFixed64Parser( + void* object, const char* ptr, ParseContext* ctx); +PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFloatParser( + void* object, const char* ptr, ParseContext* ctx); +PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedDoubleParser( + void* object, const char* ptr, ParseContext* ctx); + +// This is the only recursive parser. +PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* UnknownGroupLiteParse( + std::string* unknown, const char* ptr, ParseContext* ctx); +// This is a helper to for the UnknownGroupLiteParse but is actually also +// useful in the generated code. It uses overload on std::string* vs +// UnknownFieldSet* to make the generated code isomorphic between full and lite. +PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* UnknownFieldParse( + uint32 tag, std::string* unknown, const char* ptr, ParseContext* ctx); + +} // namespace internal +} // namespace protobuf +} // namespace google + +#include + +#endif // GOOGLE_PROTOBUF_PARSE_CONTEXT_H__ diff --git a/third_party/protobuf-lite/google/protobuf/port.h b/third_party/protobuf-lite/google/protobuf/port.h new file mode 100644 index 00000000..555fd4eb --- /dev/null +++ b/third_party/protobuf-lite/google/protobuf/port.h @@ -0,0 +1,43 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// A common header that is included across all protobuf headers. We do our best +// to avoid #defining any macros here; instead we generally put macros in +// port_def.inc and port_undef.inc so they are not visible from outside of +// protobuf. + +#ifndef GOOGLE_PROTOBUF_PORT_H__ +#define GOOGLE_PROTOBUF_PORT_H__ + + +#include + + +#endif // GOOGLE_PROTOBUF_PORT_H__ diff --git a/third_party/protobuf-lite/google/protobuf/port_def.inc b/third_party/protobuf-lite/google/protobuf/port_def.inc new file mode 100644 index 00000000..320e888e --- /dev/null +++ b/third_party/protobuf-lite/google/protobuf/port_def.inc @@ -0,0 +1,606 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// This file defines common macros that are used in protobuf. +// +// To hide these definitions from the outside world (and to prevent collisions +// if more than one version of protobuf is #included in the same project) you +// must follow this pattern when #including port_def.inc in a header file: +// +// #include "other_header.h" +// #include "message.h" +// // etc. +// +// #include "port_def.inc" // MUST be last header included +// +// // Definitions for this header. +// +// #include "port_undef.inc" +// +// This is a textual header with no include guard, because we want to +// detect/prohibit anytime it is #included twice without a corresponding +// #undef. + +// These macros are private and should always be +// ::util::RetrieveErrorSpace(*this) headers. If any of these errors fire, you +// should either properly #include port_undef.h at the end of your header that +// #includes port.h, or don't #include port.h twice in a .cc file. +#ifdef PROTOBUF_NAMESPACE +#error PROTOBUF_NAMESPACE was previously defined +#endif +#ifdef PROTOBUF_NAMESPACE_ID +#error PROTOBUF_NAMESPACE_ID was previously defined +#endif +#ifdef PROTOBUF_ALWAYS_INLINE +#error PROTOBUF_ALWAYS_INLINE was previously defined +#endif +#ifdef PROTOBUF_COLD +#error PROTOBUF_COLD was previously defined +#endif +#ifdef PROTOBUF_NOINLINE +#error PROTOBUF_NOINLINE was previously defined +#endif +#ifdef PROTOBUF_SECTION_VARIABLE +#error PROTOBUF_SECTION_VARIABLE was previously defined +#endif +#ifdef PROTOBUF_DEPRECATED +#error PROTOBUF_DEPRECATED was previously defined +#endif +#ifdef PROTOBUF_DEPRECATED_MSG +#error PROTOBUF_DEPRECATED_MSG was previously defined +#endif +#ifdef PROTOBUF_FUNC_ALIGN +#error PROTOBUF_FUNC_ALIGN was previously defined +#endif +#ifdef PROTOBUF_RETURNS_NONNULL +#error PROTOBUF_RETURNS_NONNULL was previously defined +#endif +#ifdef PROTOBUF_ATTRIBUTE_REINITIALIZES +#error PROTOBUF_ATTRIBUTE_REINITIALIZES was previously defined +#endif +#ifdef PROTOBUF_RTTI +#error PROTOBUF_RTTI was previously defined +#endif +#ifdef PROTOBUF_VERSION +#error PROTOBUF_VERSION was previously defined +#endif +#ifdef PROTOBUF_VERSION_SUFFIX +#error PROTOBUF_VERSION_SUFFIX was previously defined +#endif +#ifdef PROTOBUF_MIN_HEADER_VERSION_FOR_PROTOC +#error PROTOBUF_MIN_HEADER_VERSION_FOR_PROTOC was previously defined +#endif +#ifdef PROTOBUF_MIN_PROTOC_VERSION +#error PROTOBUF_MIN_PROTOC_VERSION was previously defined +#endif +#ifdef PROTOBUF_PREDICT_TRUE +#error PROTOBUF_PREDICT_TRUE was previously defined +#endif +#ifdef PROTOBUF_PREDICT_FALSE +#error PROTOBUF_PREDICT_FALSE was previously defined +#endif +#ifdef PROTOBUF_FIELD_OFFSET +#error PROTOBUF_FIELD_OFFSET was previously defined +#endif +#ifdef PROTOBUF_LL_FORMAT +#error PROTOBUF_LL_FORMAT was previously defined +#endif +#ifdef PROTOBUF_GUARDED_BY +#error PROTOBUF_GUARDED_BY was previously defined +#endif +#ifdef PROTOBUF_LONGLONG +#error PROTOBUF_LONGLONG was previously defined +#endif +#ifdef PROTOBUF_ULONGLONG +#error PROTOBUF_ULONGLONG was previously defined +#endif +#ifdef PROTOBUF_FALLTHROUGH_INTENDED +#error PROTOBUF_FALLTHROUGH_INTENDED was previously defined +#endif +#ifdef PROTOBUF_EXPORT +#error PROTOBUF_EXPORT was previously defined +#endif +#ifdef PROTOC_EXPORT +#error PROTOC_EXPORT was previously defined +#endif +#ifdef PROTOBUF_MUST_USE_RESULT +#error PROTOBUF_MUST_USE_RESULT was previously defined +#endif +#ifdef PROTOBUF_UNUSED +#error PROTOBUF_UNUSED was previously defined +#endif +#ifdef PROTOBUF_FINAL +#error PROTOBUF_FINAL was previously defined +#endif +#ifdef PROTOBUF_DISABLE_MSVC_UNION_WARNING +#error PROTOBUF_DISABLE_MSVC_UNION_WARNING was previously defined +#endif +#ifdef PROTOBUF_ENABLE_MSVC_UNION_WARNING +#error PROTOBUF_ENABLE_MSVC_UNION_WARNING was previously defined +#endif +#ifdef PROTOBUF_CONSTINIT +#error PROTOBUF_CONSTINIT was previously defined +#endif +#ifdef PROTOBUF_MAYBE_CONSTEXPR +#error PROTOBUF_MAYBE_CONSTEXPR was previously defined +#endif +#ifdef PROTOBUF_ATTRIBUTE_NO_DESTROY +#error PROTOBUF_ATTRIBUTE_NO_DESTROY was previously defined +#endif + + +#define PROTOBUF_NAMESPACE "google::protobuf" +#define PROTOBUF_NAMESPACE_ID google::protobuf +#define PROTOBUF_NAMESPACE_OPEN \ + namespace google { \ + namespace protobuf { +#define PROTOBUF_NAMESPACE_CLOSE \ + } /* namespace protobuf */ \ + } /* namespace google */ + +#if defined(__GNUC__) || defined(__clang__) +#define PROTOBUF_DEPRECATED __attribute__((deprecated)) +#define PROTOBUF_DEPRECATED_ENUM __attribute__((deprecated)) +#define PROTOBUF_DEPRECATED_MSG(msg) __attribute__((deprecated(msg))) +#elif defined(_MSC_VER) +#define PROTOBUF_DEPRECATED __declspec(deprecated) +#define PROTOBUF_DEPRECATED_ENUM +#define PROTOBUF_DEPRECATED_MSG(msg) __declspec(deprecated(msg)) +#endif + +#define PROTOBUF_SECTION_VARIABLE(x) +#define PROTOBUF_MUST_USE_RESULT + +// ---------------------------------------------------------------------------- +// Annotations: Some parts of the code have been annotated in ways that might +// be useful to some compilers or tools, but are not supported universally. +// You can #define these annotations yourself if the default implementation +// is not right for you. + +#ifdef GOOGLE_ATTRIBUTE_ALWAYS_INLINE +#define PROTOBUF_ALWAYS_INLINE GOOGLE_ATTRIBUTE_ALWAYS_INLINE +#else +#if defined(__GNUC__) && \ + (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) +// For functions we want to force inline. +// Introduced in gcc 3.1. +#define PROTOBUF_ALWAYS_INLINE __attribute__((always_inline)) +#else +// Other compilers will have to figure it out for themselves. +#define PROTOBUF_ALWAYS_INLINE +#endif +#endif + +#ifdef GOOGLE_ATTRIBUTE_NOINLINE +#define PROTOBUF_NOINLINE GOOGLE_ATTRIBUTE_NOINLINE +#else +#if defined(__GNUC__) && \ + (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) +// For functions we want to force not inline. +// Introduced in gcc 3.1. +#define PROTOBUF_NOINLINE __attribute__((noinline)) +#elif defined(_MSC_VER) && (_MSC_VER >= 1400) +// Seems to have been around since at least Visual Studio 2005 +#define PROTOBUF_NOINLINE __declspec(noinline) +#else +// Other compilers will have to figure it out for themselves. +#define PROTOBUF_NOINLINE +#endif +#endif + +#ifdef GOOGLE_ATTRIBUTE_FUNC_ALIGN +#define PROTOBUF_FUNC_ALIGN GOOGLE_ATTRIBUTE_FUNC_ALIGN +#else +#if defined(__clang__) || \ + defined(__GNUC__) && \ + (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) +// Function alignment attribute introduced in gcc 4.3 +#define PROTOBUF_FUNC_ALIGN(bytes) __attribute__((aligned(bytes))) +#else +#define PROTOBUF_FUNC_ALIGN(bytes) +#endif +#endif + +#ifdef GOOGLE_PREDICT_TRUE +#define PROTOBUF_PREDICT_TRUE GOOGLE_PREDICT_TRUE +#else +#ifdef __GNUC__ +// Provided at least since GCC 3.0. +#define PROTOBUF_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) +#else +#define PROTOBUF_PREDICT_TRUE(x) (x) +#endif +#endif + +#ifdef GOOGLE_PREDICT_FALSE +#define PROTOBUF_PREDICT_FALSE GOOGLE_PREDICT_FALSE +#else +#ifdef __GNUC__ +// Provided at least since GCC 3.0. +#define PROTOBUF_PREDICT_FALSE(x) (__builtin_expect(x, 0)) +#else +#define PROTOBUF_PREDICT_FALSE(x) (x) +#endif +#endif + +#ifdef GOOGLE_PROTOBUF_ATTRIBUTE_RETURNS_NONNULL +#define PROTOBUF_RETURNS_NONNULL GOOGLE_PROTOBUF_ATTRIBUTE_RETURNS_NONNULL +#else +#if defined(__has_attribute) +#if __has_attribute(returns_nonnull) +#define PROTOBUF_RETURNS_NONNULL __attribute__((returns_nonnull)) +#endif +#endif +#endif +#ifndef PROTOBUF_RETURNS_NONNULL +#define PROTOBUF_RETURNS_NONNULL +#endif + +#if defined(__has_cpp_attribute) +#if __has_cpp_attribute(clang::reinitializes) +#define PROTOBUF_ATTRIBUTE_REINITIALIZES [[clang::reinitializes]] +#endif +#endif +#ifndef PROTOBUF_ATTRIBUTE_REINITIALIZES +#define PROTOBUF_ATTRIBUTE_REINITIALIZES +#endif + +#define PROTOBUF_GUARDED_BY(x) +#define PROTOBUF_COLD + +// Copied from ABSL. +#if defined(__clang__) && defined(__has_warning) +#if __has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough") +#define PROTOBUF_FALLTHROUGH_INTENDED [[clang::fallthrough]] +#endif +#elif defined(__GNUC__) && __GNUC__ >= 7 +#define PROTOBUF_FALLTHROUGH_INTENDED [[gnu::fallthrough]] +#endif + +#ifndef PROTOBUF_FALLTHROUGH_INTENDED +#define PROTOBUF_FALLTHROUGH_INTENDED +#endif + +#if defined(__has_cpp_attribute) +#define HAS_ATTRIBUTE(attr) __has_cpp_attribute(attr) +#else +#define HAS_ATTRIBUTE(attr) 0 +#endif + +#if HAS_ATTRIBUTE(unused) || (defined(__GNUC__) && !defined(__clang__)) +#define PROTOBUF_UNUSED __attribute__((__unused__)) +#else +#define PROTOBUF_UNUSED +#endif + +#undef HAS_ATTRIBUTE + +#ifdef _MSC_VER +#define PROTOBUF_LONGLONG(x) x##I64 +#define PROTOBUF_ULONGLONG(x) x##UI64 +#define PROTOBUF_LL_FORMAT "I64" // As in printf("%I64d", ...) +#else +// By long long, we actually mean int64. +#define PROTOBUF_LONGLONG(x) x##LL +#define PROTOBUF_ULONGLONG(x) x##ULL +// Used to format real long long integers. +#define PROTOBUF_LL_FORMAT \ + "ll" // As in "%lld". Note that "q" is poor form also. +#endif + + +// Shared google3/opensource definitions. ////////////////////////////////////// + +#define PROTOBUF_VERSION 3014000 +#define PROTOBUF_MIN_HEADER_VERSION_FOR_PROTOC 3014000 +#define PROTOBUF_MIN_PROTOC_VERSION 3014000 +#define PROTOBUF_VERSION_SUFFIX "" + +// The minimum library version which works with the current version of the +// headers. +#define GOOGLE_PROTOBUF_MIN_LIBRARY_VERSION 3014000 + +#if defined(GOOGLE_PROTOBUF_NO_RTTI) && GOOGLE_PROTOBUF_NO_RTTI +#define PROTOBUF_RTTI 0 +#elif defined(__has_feature) +// https://clang.llvm.org/docs/LanguageExtensions.html#has-feature-and-has-extension +#define PROTOBUF_RTTI __has_feature(cxx_rtti) +#elif !defined(__cxx_rtti) +// https://en.cppreference.com/w/User:D41D8CD98F/feature_testing_macros#C.2B.2B98 +#define PROTOBUF_RTTI 0 +#elif defined(__GNUC__) && !defined(__GXX_RTTI) +#https: // gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html +#define PROTOBUF_RTTI 0 +#else +#define PROTOBUF_RTTI 1 +#endif + +// Returns the offset of the given field within the given aggregate type. +// This is equivalent to the ANSI C offsetof() macro. However, according +// to the C++ standard, offsetof() only works on POD types, and GCC +// enforces this requirement with a warning. In practice, this rule is +// unnecessarily strict; there is probably no compiler or platform on +// which the offsets of the direct fields of a class are non-constant. +// Fields inherited from superclasses *can* have non-constant offsets, +// but that's not what this macro will be used for. +#if defined(__clang__) +// For Clang we use __builtin_offsetof() and suppress the warning, +// to avoid Control Flow Integrity and UBSan vptr sanitizers from +// crashing while trying to validate the invalid reinterpet_casts. +#define PROTOBUF_FIELD_OFFSET(TYPE, FIELD) \ + _Pragma("clang diagnostic push") \ + _Pragma("clang diagnostic ignored \"-Winvalid-offsetof\"") \ + __builtin_offsetof(TYPE, FIELD) \ + _Pragma("clang diagnostic pop") +#elif defined(__GNUC__) && \ + (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) +#define PROTOBUF_FIELD_OFFSET(TYPE, FIELD) __builtin_offsetof(TYPE, FIELD) +#else // defined(__clang__) +// Note that we calculate relative to the pointer value 16 here since if we +// just use zero, GCC complains about dereferencing a NULL pointer. We +// choose 16 rather than some other number just in case the compiler would +// be confused by an unaligned pointer. +#define PROTOBUF_FIELD_OFFSET(TYPE, FIELD) \ + static_cast< ::google::protobuf::uint32>(reinterpret_cast( \ + &reinterpret_cast(16)->FIELD) - \ + reinterpret_cast(16)) +#endif + +#if defined(PROTOBUF_USE_DLLS) +#if defined(_MSC_VER) +#ifdef LIBPROTOBUF_EXPORTS +#define PROTOBUF_EXPORT __declspec(dllexport) +#define PROTOBUF_EXPORT_TEMPLATE_DECLARE +#define PROTOBUF_EXPORT_TEMPLATE_DEFINE __declspec(dllexport) +#else +#define PROTOBUF_EXPORT __declspec(dllimport) +#define PROTOBUF_EXPORT_TEMPLATE_DECLARE +#define PROTOBUF_EXPORT_TEMPLATE_DEFINE __declspec(dllimport) +#endif +#ifdef LIBPROTOC_EXPORTS +#define PROTOC_EXPORT __declspec(dllexport) +#else +#define PROTOC_EXPORT __declspec(dllimport) +#endif +#else // defined(_MSC_VER) +#ifdef LIBPROTOBUF_EXPORTS +#define PROTOBUF_EXPORT __attribute__((visibility("default"))) +#define PROTOBUF_EXPORT_TEMPLATE_DECLARE __attribute__((visibility("default"))) +#define PROTOBUF_EXPORT_TEMPLATE_DEFINE +#else +#define PROTOBUF_EXPORT +#define PROTOBUF_EXPORT_TEMPLATE_DECLARE +#define PROTOBUF_EXPORT_TEMPLATE_DEFINE +#endif +#ifdef LIBPROTOC_EXPORTS +#define PROTOC_EXPORT __attribute__((visibility("default"))) +#else +#define PROTOC_EXPORT +#endif +#endif +#else // defined(PROTOBUF_USE_DLLS) +#define PROTOBUF_EXPORT +#define PROTOC_EXPORT +#define PROTOBUF_EXPORT_TEMPLATE_DECLARE +#define PROTOBUF_EXPORT_TEMPLATE_DEFINE +#endif + +// Windows declares several inconvenient macro names. We #undef them and then +// restore them in port_undef.inc. +#ifdef _MSC_VER +#pragma push_macro("CREATE_NEW") +#undef CREATE_NEW +#pragma push_macro("DOUBLE_CLICK") +#undef DOUBLE_CLICK +#pragma push_macro("ERROR") +#undef ERROR +#pragma push_macro("ERROR_BUSY") +#undef ERROR_BUSY +#pragma push_macro("ERROR_NOT_FOUND") +#undef ERROR_NOT_FOUND +#pragma push_macro("GetMessage") +#undef GetMessage +#pragma push_macro("IGNORE") +#undef IGNORE +#pragma push_macro("IN") +#undef IN +#pragma push_macro("INPUT_KEYBOARD") +#undef INPUT_KEYBOARD +#pragma push_macro("NO_ERROR") +#undef NO_ERROR +#pragma push_macro("OUT") +#undef OUT +#pragma push_macro("OPTIONAL") +#undef OPTIONAL +#pragma push_macro("min") +#undef min +#pragma push_macro("max") +#undef max +#pragma push_macro("NEAR") +#undef NEAR +#pragma push_macro("NO_DATA") +#undef NO_DATA +#pragma push_macro("REASON_UNKNOWN") +#undef REASON_UNKNOWN +#pragma push_macro("SERVICE_DISABLED") +#undef SERVICE_DISABLED +#pragma push_macro("SEVERITY_ERROR") +#undef SEVERITY_ERROR +#pragma push_macro("STRICT") +#undef STRICT +#pragma push_macro("timezone") +#undef timezone +#endif // _MSC_VER + +#if defined(__clang__) || defined(__GNUC__) || defined(_MSC_VER) +// Don't let Objective-C Macros interfere with proto identifiers with the same +// name. +#pragma push_macro("DEBUG") +#undef DEBUG +#pragma push_macro("TRUE") +#undef TRUE +#pragma push_macro("FALSE") +#undef FALSE +#endif // defined(__clang__) || defined(__GNUC__) || defined(_MSC_VER) + +#if defined(__clang__) +#pragma clang diagnostic push +// TODO(gerbens) ideally we cleanup the code. But a cursory try shows many +// violations. So let's ignore for now. +#pragma clang diagnostic ignored "-Wshorten-64-to-32" +#elif defined(__GNUC__) +// GCC does not allow disabling diagnostics within an expression: +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=60875, so we disable this one +// globally even though it's only used for PROTOBUF_FIELD_OFFSET. +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Winvalid-offsetof" +#endif + +// PROTOBUF_ASSUME(pred) tells the compiler that it can assume pred is true. To +// be safe, we also validate the assumption with a GOOGLE_DCHECK in unoptimized +// builds. The macro does not do anything useful if the compiler does not +// support __builtin_assume. +#ifdef __has_builtin +#if __has_builtin(__builtin_assume) +#define PROTOBUF_ASSUME(pred) \ + GOOGLE_DCHECK(pred); \ + __builtin_assume(pred) +#else +#define PROTOBUF_ASSUME(pred) GOOGLE_DCHECK(pred) +#endif +#else +#define PROTOBUF_ASSUME(pred) GOOGLE_DCHECK(pred) +#endif + +// Specify memory alignment for structs, classes, etc. +// Use like: +// class PROTOBUF_ALIGNAS(16) MyClass { ... } +// PROTOBUF_ALIGNAS(16) int array[4]; +// +// In most places you can use the C++11 keyword "alignas", which is preferred. +// +// But compilers have trouble mixing __attribute__((...)) syntax with +// alignas(...) syntax. +// +// Doesn't work in clang or gcc: +// struct alignas(16) __attribute__((packed)) S { char c; }; +// Works in clang but not gcc: +// struct __attribute__((packed)) alignas(16) S2 { char c; }; +// Works in clang and gcc: +// struct alignas(16) S3 { char c; } __attribute__((packed)); +// +// There are also some attributes that must be specified *before* a class +// definition: visibility (used for exporting functions/classes) is one of +// these attributes. This means that it is not possible to use alignas() with a +// class that is marked as exported. +#if defined(_MSC_VER) +#define PROTOBUF_ALIGNAS(byte_alignment) __declspec(align(byte_alignment)) +#elif defined(__GNUC__) +#define PROTOBUF_ALIGNAS(byte_alignment) \ + __attribute__((aligned(byte_alignment))) +#else +#define PROTOBUF_ALIGNAS(byte_alignment) alignas(byte_alignment) +#endif + +#define PROTOBUF_FINAL final + +#if defined(_MSC_VER) +#define PROTOBUF_THREAD_LOCAL __declspec(thread) +#else +#define PROTOBUF_THREAD_LOCAL __thread +#endif + +// For enabling message owned arena, one major blocker is semantic change from +// moving to copying when there is ownership transfer (e.g., move ctor, swap, +// set allocated, release). This change not only causes performance regression +// but also breaks users code (e.g., dangling reference). For top-level +// messages, since it owns the arena, we can mitigate the issue by transferring +// ownership of arena. However, we cannot do that for nested messages. In order +// to tell how many usages of nested messages affected by message owned arena, +// we need to simulate the arena ownership. +// This experiment is purely for the purpose of gathering data. All code guarded +// by this flag is supposed to be removed after this experiment. +// #define PROTOBUF_MESSAGE_OWNED_ARENA_EXPERIMENT + +#if defined(__cpp_constinit) +#define PROTOBUF_CONSTINIT constinit +#elif defined(__has_cpp_attribute) +#if __has_cpp_attribute(clang::require_constant_initialization) +#define PROTOBUF_CONSTINIT [[clang::require_constant_initialization]] +#endif +#endif +#ifndef PROTOBUF_CONSTINIT +#define PROTOBUF_CONSTINIT +#endif + +// Some constructors can't be constexpr under MSVC, but given that MSVC will not +// do constant initialization of globals anyway we can omit `constexpr` from +// them. These constructors are marked with PROTOBUF_MAYBE_CONSTEXPR +#if defined(_MSC_VER) +#define PROTOBUF_MAYBE_CONSTEXPR +#else +#define PROTOBUF_MAYBE_CONSTEXPR constexpr +#endif + +#if _MSC_VER +#define PROTOBUF_DISABLE_MSVC_UNION_WARNING \ + __pragma(warning(push)) \ + __pragma(warning(disable : 4582)) \ + __pragma(warning(disable : 4583)) + +#define PROTOBUF_ENABLE_MSVC_UNION_WARNING \ + __pragma(warning(pop)) +#else +#define PROTOBUF_DISABLE_MSVC_UNION_WARNING +#define PROTOBUF_ENABLE_MSVC_UNION_WARNING +#endif + +#if defined(__cpp_constinit) +#define PROTOBUF_CONSTINIT constinit +#elif defined(__has_cpp_attribute) +#if __has_cpp_attribute(clang::require_constant_initialization) +#define PROTOBUF_CONSTINIT [[clang::require_constant_initialization]] +#endif +#endif +#ifndef PROTOBUF_CONSTINIT +#define PROTOBUF_CONSTINIT +#endif + +// Some globals with an empty non-trivial destructor are annotated with +// no_destroy for performance reasons. It reduces the cost of these globals in +// non-opt mode and under sanitizers. +#if defined(__has_cpp_attribute) +#if __has_cpp_attribute(clang::no_destroy) +#define PROTOBUF_ATTRIBUTE_NO_DESTROY [[clang::no_destroy]] +#endif +#endif +#if !defined(PROTOBUF_ATTRIBUTE_NO_DESTROY) +#define PROTOBUF_ATTRIBUTE_NO_DESTROY +#endif diff --git a/third_party/protobuf-lite/google/protobuf/port_undef.inc b/third_party/protobuf-lite/google/protobuf/port_undef.inc new file mode 100644 index 00000000..d1414285 --- /dev/null +++ b/third_party/protobuf-lite/google/protobuf/port_undef.inc @@ -0,0 +1,116 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// #undefs all macros defined in port_def.inc. See comments in port_def.inc +// for more info. + +#ifndef PROTOBUF_NAMESPACE +#error "port_undef.inc must be included after port_def.inc" +#endif +#undef PROTOBUF_NAMESPACE +#undef PROTOBUF_NAMESPACE_ID +#undef PROTOBUF_ALWAYS_INLINE +#undef PROTOBUF_COLD +#undef PROTOBUF_NOINLINE +#undef PROTOBUF_SECTION_VARIABLE +#undef PROTOBUF_DEPRECATED +#undef PROTOBUF_DEPRECATED_ENUM +#undef PROTOBUF_DEPRECATED_MSG +#undef PROTOBUF_FUNC_ALIGN +#undef PROTOBUF_RETURNS_NONNULL +#undef PROTOBUF_ATTRIBUTE_REINITIALIZES +#undef PROTOBUF_RTTI +#undef PROTOBUF_VERSION +#undef PROTOBUF_VERSION_SUFFIX +#undef PROTOBUF_FIELD_OFFSET +#undef PROTOBUF_MIN_HEADER_VERSION_FOR_PROTOC +#undef PROTOBUF_MIN_PROTOC_VERSION +#undef PROTOBUF_PREDICT_TRUE +#undef PROTOBUF_PREDICT_FALSE +#undef PROTOBUF_LONGLONG +#undef PROTOBUF_ULONGLONG +#undef PROTOBUF_LL_FORMAT +#undef PROTOBUF_GUARDED_BY +#undef PROTOBUF_FALLTHROUGH_INTENDED +#undef PROTOBUF_EXPORT +#undef PROTOC_EXPORT +#undef PROTOBUF_MUST_USE_RESULT +#undef PROTOBUF_NAMESPACE_OPEN +#undef PROTOBUF_NAMESPACE_CLOSE +#undef PROTOBUF_UNUSED +#undef PROTOBUF_ASSUME +#undef PROTOBUF_EXPORT_TEMPLATE_DECLARE +#undef PROTOBUF_EXPORT_TEMPLATE_DEFINE +#undef PROTOBUF_ALIGNAS +#undef PROTOBUF_FINAL +#undef PROTOBUF_THREAD_LOCAL +#undef PROTOBUF_MESSAGE_OWNED_ARENA_EXPERIMENT +#undef PROTOBUF_DISABLE_MSVC_UNION_WARNING +#undef PROTOBUF_ENABLE_MSVC_UNION_WARNING +#undef PROTOBUF_CONSTINIT +#undef PROTOBUF_MAYBE_CONSTEXPR +#undef PROTOBUF_ATTRIBUTE_NO_DESTROY + +// Restore macro that may have been #undef'd in port_def.inc. +#ifdef _MSC_VER +#pragma pop_macro("CREATE_NEW") +#pragma pop_macro("DOUBLE_CLICK") +#pragma pop_macro("ERROR") +#pragma pop_macro("ERROR_BUSY") +#pragma pop_macro("ERROR_NOT_FOUND") +#pragma pop_macro("GetMessage") +#pragma pop_macro("IGNORE") +#pragma pop_macro("IN") +#pragma pop_macro("INPUT_KEYBOARD") +#pragma pop_macro("OUT") +#pragma pop_macro("OPTIONAL") +#pragma pop_macro("min") +#pragma pop_macro("max") +#pragma pop_macro("NEAR") +#pragma pop_macro("NO_DATA") +#pragma pop_macro("NO_ERROR") +#pragma pop_macro("REASON_UNKNOWN") +#pragma pop_macro("SERVICE_DISABLED") +#pragma pop_macro("SEVERITY_ERROR") +#pragma pop_macro("STRICT") +#pragma pop_macro("timezone") +#endif + +#if defined(__clang__) || defined(__GNUC__) || defined(_MSC_VER) +#pragma pop_macro("DEBUG") +#pragma pop_macro("TRUE") +#pragma pop_macro("FALSE") +#endif // defined(__clang__) || defined(__GNUC__) || defined(_MSC_VER) + +#if defined(__clang__) +#pragma clang diagnostic pop +#elif defined(__GNUC__) +#pragma GCC diagnostic pop +#endif diff --git a/third_party/protobuf-lite/google/protobuf/repeated_field.h b/third_party/protobuf-lite/google/protobuf/repeated_field.h index b47ea994..26d58473 100644 --- a/third_party/protobuf-lite/google/protobuf/repeated_field.h +++ b/third_party/protobuf-lite/google/protobuf/repeated_field.h @@ -46,6 +46,7 @@ #ifndef GOOGLE_PROTOBUF_REPEATED_FIELD_H__ #define GOOGLE_PROTOBUF_REPEATED_FIELD_H__ +#include #ifdef _MSC_VER // This is required for min/max on VS2013 only. #include @@ -54,33 +55,47 @@ #include #include #include -#include +#include + #include #include #include -#include #include -#include +#include +#include #include -// Forward-declare these so that we can make them friends. -namespace google { -namespace upb { -namespace google_opensource { -class GMR_Handlers; -} // namespace google_opensource -} // namespace upb +// Must be included last. +#include + +#ifdef SWIG +#error "You cannot SWIG proto headers" +#endif +namespace google { namespace protobuf { class Message; +class Reflection; + +template +struct WeakRepeatedPtrField; namespace internal { class MergePartialFromCodedStreamHelper; -static const int kMinRepeatedFieldAllocationSize = 4; +// kRepeatedFieldLowerClampLimit is the smallest size that will be allocated +// when growing a repeated field. +constexpr int kRepeatedFieldLowerClampLimit = 4; + +// kRepeatedFieldUpperClampLimit is the lowest signed integer value that +// overflows when multiplied by 2 (which is undefined behavior). Sizes above +// this will clamp to the maximum int value instead of following exponential +// growth when growing a repeated field. +constexpr int kRepeatedFieldUpperClampLimit = + (std::numeric_limits::max() / 2) + 1; // A utility function for logging that doesn't need any template types. void LogIndexOutOfBounds(int index, int size); @@ -101,8 +116,47 @@ inline int CalculateReserve(Iter begin, Iter end) { typedef typename std::iterator_traits::iterator_category Category; return CalculateReserve(begin, end, Category()); } -} // namespace internal +// Swaps two blocks of memory of size sizeof(T). +template +inline void SwapBlock(char* p, char* q) { + T tmp; + memcpy(&tmp, p, sizeof(T)); + memcpy(p, q, sizeof(T)); + memcpy(q, &tmp, sizeof(T)); +} + +// Swaps two blocks of memory of size kSize: +// template void memswap(char* p, char* q); + +template +inline typename std::enable_if<(kSize == 0), void>::type memswap(char*, char*) { +} + +#define PROTO_MEMSWAP_DEF_SIZE(reg_type, max_size) \ + template \ + typename std::enable_if<(kSize >= sizeof(reg_type) && kSize < (max_size)), \ + void>::type \ + memswap(char* p, char* q) { \ + SwapBlock(p, q); \ + memswap(p + sizeof(reg_type), \ + q + sizeof(reg_type)); \ + } + +PROTO_MEMSWAP_DEF_SIZE(uint8, 2) +PROTO_MEMSWAP_DEF_SIZE(uint16, 4) +PROTO_MEMSWAP_DEF_SIZE(uint32, 8) + +#ifdef __SIZEOF_INT128__ +PROTO_MEMSWAP_DEF_SIZE(uint64, 16) +PROTO_MEMSWAP_DEF_SIZE(__uint128_t, (1u << 31)) +#else +PROTO_MEMSWAP_DEF_SIZE(uint64, (1u << 31)) +#endif + +#undef PROTO_MEMSWAP_DEF_SIZE + +} // namespace internal // RepeatedField is used to represent repeated fields of a primitive type (in // other words, everything except strings and nested Messages). Most users will @@ -110,8 +164,12 @@ inline int CalculateReserve(Iter begin, Iter end) { // set-by-index, and add accessors that are generated for all repeated fields. template class RepeatedField final { + static_assert( + alignof(Arena) >= alignof(Element), + "We only support types that have an alignment smaller than Arena"); + public: - RepeatedField(); + constexpr RepeatedField(); explicit RepeatedField(Arena* arena); RepeatedField(const RepeatedField& other); template @@ -132,11 +190,19 @@ class RepeatedField final { const Element& operator[](int index) const { return Get(index); } Element& operator[](int index) { return *Mutable(index); } + const Element& at(int index) const; + Element& at(int index); + void Set(int index, const Element& value); void Add(const Element& value); // Appends a new element and return a pointer to it. // The new element is uninitialized if |Element| is a POD type. Element* Add(); + // Append elements in the range [begin, end) after reserving + // the appropriate number of elements. + template + void Add(Iter begin, Iter end); + // Remove the last element in the array. void RemoveLast(); @@ -209,15 +275,11 @@ class RepeatedField final { // Reverse iterator support typedef std::reverse_iterator const_reverse_iterator; typedef std::reverse_iterator reverse_iterator; - reverse_iterator rbegin() { - return reverse_iterator(end()); - } + reverse_iterator rbegin() { return reverse_iterator(end()); } const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } - reverse_iterator rend() { - return reverse_iterator(begin()); - } + reverse_iterator rend() { return reverse_iterator(begin()); } const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } @@ -246,8 +308,9 @@ class RepeatedField final { iterator erase(const_iterator first, const_iterator last); // Get the Arena on which this RepeatedField stores its elements. - ::google::protobuf::Arena* GetArena() const { - return GetArenaNoVirtual(); + inline Arena* GetArena() const { + return (total_size_ == 0) ? static_cast(arena_or_elements_) + : rep()->arena; } // For internal use only. @@ -256,7 +319,7 @@ class RepeatedField final { inline void InternalSwap(RepeatedField* other); private: - static const int kInitialSize = 0; + static constexpr int kInitialSize = 0; // A note on the representation here (see also comment below for // RepeatedPtrFieldBase's struct Rep): // @@ -271,31 +334,43 @@ class RepeatedField final { int total_size_; struct Rep { Arena* arena; - Element elements[1]; + // Here we declare a huge array as a way of approximating C's "flexible + // array member" feature without relying on undefined behavior. + Element elements[(std::numeric_limits::max() - 2 * sizeof(Arena*)) / + sizeof(Element)]; }; - // We can not use sizeof(Rep) - sizeof(Element) due to the trailing padding on - // the struct. We can not use sizeof(Arena*) as well because there might be - // a "gap" after the field arena and before the field elements (e.g., when - // Element is double and pointer is 32bit). - static const size_t kRepHeaderSize; - - // We reuse the Rep* for an Arena* when total_size == 0, to avoid having to do - // an allocation in the constructor when we have an Arena. - union Pointer { - Pointer(Arena* a) : arena(a) {} - Arena* arena; // When total_size_ == 0. - Rep* rep; // When total_size_ != 0. - } ptr_; + static constexpr size_t kRepHeaderSize = offsetof(Rep, elements); - Rep* rep() const { + // If total_size_ == 0 this points to an Arena otherwise it points to the + // elements member of a Rep struct. Using this invariant allows the storage of + // the arena pointer without an extra allocation in the constructor. + void* arena_or_elements_; + + // Return pointer to elements array. + // pre-condition: the array must have been allocated. + Element* elements() const { GOOGLE_DCHECK_GT(total_size_, 0); - return ptr_.rep; + // Because of above pre-condition this cast is safe. + return unsafe_elements(); + } + + // Return pointer to elements array if it exists otherwise either null or + // a invalid pointer is returned. This only happens for empty repeated fields, + // where you can't dereference this pointer anyway (it's empty). + Element* unsafe_elements() const { + return static_cast(arena_or_elements_); + } + + // Return pointer to the Rep struct. + // pre-condition: the Rep must have been allocated, ie elements() is safe. + Rep* rep() const { + char* addr = reinterpret_cast(elements()) - offsetof(Rep, elements); + return reinterpret_cast(addr); } friend class Arena; typedef void InternalArenaConstructable_; - // Move the contents of |from| into |to|, possibly clobbering |from| in the // process. For primitive types this is just a memcpy(), but it could be // specialized for non-primitive types to, say, swap each element instead. @@ -304,20 +379,15 @@ class RepeatedField final { // Copy the elements of |from| into |to|. void CopyArray(Element* to, const Element* from, int size); - // Internal helper expected by Arena methods. - inline Arena* GetArenaNoVirtual() const { - return (total_size_ == 0) ? ptr_.arena : ptr_.rep->arena; - } - // Internal helper to delete all elements and deallocate the storage. - // If Element has a trivial destructor (for example, if it's a fundamental - // type, like int32), the loop will be removed by the optimizer. void InternalDeallocate(Rep* rep, int size) { if (rep != NULL) { Element* e = &rep->elements[0]; - Element* limit = &rep->elements[size]; - for (; e < limit; e++) { - e->~Element(); + if (!std::is_trivial::value) { + Element* limit = &rep->elements[size]; + for (; e < limit; e++) { + e->~Element(); + } } if (rep->arena == NULL) { #if defined(__GXX_DELETE_WITH_SIZE__) || defined(__cpp_sized_deallocation) @@ -330,17 +400,97 @@ class RepeatedField final { } } - friend class internal::WireFormatLite; - const Element* unsafe_data() const; -}; + // This class is a performance wrapper around RepeatedField::Add(const T&) + // function. In general unless a RepeatedField is a local stack variable LLVM + // has a hard time optimizing Add. The machine code tends to be + // loop: + // mov %size, dword ptr [%repeated_field] // load + // cmp %size, dword ptr [%repeated_field + 4] + // jae fallback + // mov %buffer, qword ptr [%repeated_field + 8] + // mov dword [%buffer + %size * 4], %value + // inc %size // increment + // mov dword ptr [%repeated_field], %size // store + // jmp loop + // + // This puts a load/store in each iteration of the important loop variable + // size. It's a pretty bad compile that happens even in simple cases, but + // largely the presence of the fallback path disturbs the compilers mem-to-reg + // analysis. + // + // This class takes ownership of a repeated field for the duration of it's + // lifetime. The repeated field should not be accessed during this time, ie. + // only access through this class is allowed. This class should always be a + // function local stack variable. Intended use + // + // void AddSequence(const int* begin, const int* end, RepeatedField* out) + // { + // RepeatedFieldAdder adder(out); // Take ownership of out + // for (auto it = begin; it != end; ++it) { + // adder.Add(*it); + // } + // } + // + // Typically due to the fact adder is a local stack variable. The compiler + // will be successful in mem-to-reg transformation and the machine code will + // be loop: cmp %size, %capacity jae fallback mov dword ptr [%buffer + %size * + // 4], %val inc %size jmp loop + // + // The first version executes at 7 cycles per iteration while the second + // version near 1 or 2 cycles. + template ::value> + class FastAdderImpl { + public: + explicit FastAdderImpl(RepeatedField* rf) : repeated_field_(rf) { + index_ = repeated_field_->current_size_; + capacity_ = repeated_field_->total_size_; + buffer_ = repeated_field_->unsafe_elements(); + } + ~FastAdderImpl() { repeated_field_->current_size_ = index_; } + + void Add(Element val) { + if (index_ == capacity_) { + repeated_field_->current_size_ = index_; + repeated_field_->Reserve(index_ + 1); + capacity_ = repeated_field_->total_size_; + buffer_ = repeated_field_->unsafe_elements(); + } + buffer_[index_++] = val; + } -template -const size_t RepeatedField::kRepHeaderSize = - reinterpret_cast(&reinterpret_cast(16)->elements[0]) - 16; + private: + RepeatedField* repeated_field_; + int index_; + int capacity_; + Element* buffer_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FastAdderImpl); + }; + + // FastAdder is a wrapper for adding fields. The specialization above handles + // POD types more efficiently than RepeatedField. + template + class FastAdderImpl { + public: + explicit FastAdderImpl(RepeatedField* rf) : repeated_field_(rf) {} + void Add(const Element& val) { repeated_field_->Add(val); } + + private: + RepeatedField* repeated_field_; + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FastAdderImpl); + }; + + using FastAdder = FastAdderImpl<>; + + friend class TestRepeatedFieldHelper; + friend class ::google::protobuf::internal::ParseContext; +}; namespace internal { -template class RepeatedPtrIterator; -template class RepeatedPtrOverPtrsIterator; +template +class RepeatedPtrIterator; +template +class RepeatedPtrOverPtrsIterator; } // namespace internal namespace internal { @@ -364,7 +514,7 @@ namespace internal { // arena-related "copy if on different arena" behavior if the necessary methods // exist on the contained type. In particular, we rely on MergeFrom() existing // as a general proxy for the fact that a copy will work, and we also provide a -// specific override for string*. +// specific override for std::string*. template struct TypeImplementsMergeBehaviorProbeForMergeFrom { typedef char HasMerge; @@ -377,29 +527,36 @@ struct TypeImplementsMergeBehaviorProbeForMergeFrom { // We mangle these names a bit to avoid compatibility issues in 'unclean' // include environments that may have, e.g., "#define test ..." (yes, this // exists). - template - struct CheckType; - template static HasMerge Check( - CheckType*); - template static HasMerge Check( - CheckType*); - template static HasNoMerge Check(...); + template + struct CheckType; + template + static HasMerge Check(CheckType*); + template + static HasMerge Check(CheckType*); + template + static HasNoMerge Check(...); // Resolves to either std::true_type or std::false_type. typedef std::integral_constant(0)) == sizeof(HasMerge))> type; + (sizeof(Check(0)) == sizeof(HasMerge))> + type; }; template -struct TypeImplementsMergeBehavior : - TypeImplementsMergeBehaviorProbeForMergeFrom {}; +struct TypeImplementsMergeBehavior + : TypeImplementsMergeBehaviorProbeForMergeFrom {}; template <> -struct TypeImplementsMergeBehavior< ::std::string> { +struct TypeImplementsMergeBehavior { typedef std::true_type type; }; +template +struct IsMovable + : std::integral_constant::value && + std::is_move_assignable::value> {}; + // This is the common base class for RepeatedPtrFields. It deals only in void* // pointers. Users should not use this interface directly. // @@ -408,12 +565,9 @@ struct TypeImplementsMergeBehavior< ::std::string> { // class TypeHandler { // public: // typedef MyType Type; -// // WeakType is almost always the same as MyType, but we use it in -// // ImplicitWeakTypeHandler. -// typedef MyType WeakType; // static Type* New(); -// static WeakType* NewFromPrototype(const WeakType* prototype, -// ::google::protobuf::Arena* arena); +// static Type* NewFromPrototype(const Type* prototype, +// Arena* arena); // static void Delete(Type*); // static void Clear(Type*); // static void Merge(const Type& from, Type* to); @@ -421,19 +575,32 @@ struct TypeImplementsMergeBehavior< ::std::string> { // // Only needs to be implemented if SpaceUsedExcludingSelf() is called. // static int SpaceUsedLong(const Type&); // }; -class LIBPROTOBUF_EXPORT RepeatedPtrFieldBase { +class PROTOBUF_EXPORT RepeatedPtrFieldBase { protected: - RepeatedPtrFieldBase(); - explicit RepeatedPtrFieldBase(::google::protobuf::Arena* arena); - ~RepeatedPtrFieldBase() {} + constexpr RepeatedPtrFieldBase(); + explicit RepeatedPtrFieldBase(Arena* arena); + ~RepeatedPtrFieldBase() { +#ifndef NDEBUG + // Try to trigger segfault / asan failure in non-opt builds. If arena_ + // lifetime has ended before the destructor. + if (arena_) (void)arena_->SpaceAllocated(); +#endif + } + public: // Must be called from destructor. template void Destroy(); + protected: bool empty() const; int size() const; + template + const typename TypeHandler::Type& at(int index) const; + template + typename TypeHandler::Type& at(int index); + template typename TypeHandler::Type* Mutable(int index); template @@ -447,13 +614,13 @@ class LIBPROTOBUF_EXPORT RepeatedPtrFieldBase { // application code. template - const typename TypeHandler::WeakType& Get(int index) const; + const typename TypeHandler::Type& Get(int index) const; // Creates and adds an element using the given prototype, without introducing // a link-time dependency on the concrete message type. This method is used to // implement implicit weak fields. The prototype may be NULL, in which case an // ImplicitWeakMessage will be used as a placeholder. - google::protobuf::MessageLite* AddWeak(const google::protobuf::MessageLite* prototype); + MessageLite* AddWeak(const MessageLite* prototype); template void Clear(); @@ -464,9 +631,10 @@ class LIBPROTOBUF_EXPORT RepeatedPtrFieldBase { inline void InternalSwap(RepeatedPtrFieldBase* other); protected: - template - void Add(typename TypeHandler::Type&& value, - std::enable_if* dummy = NULL); + template < + typename TypeHandler, + typename std::enable_if::type* = nullptr> + void Add(typename TypeHandler::Type&& value); template void RemoveLast(); @@ -488,8 +656,8 @@ class LIBPROTOBUF_EXPORT RepeatedPtrFieldBase { template const typename TypeHandler::Type* const* data() const; - template GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE - void Swap(RepeatedPtrFieldBase* other); + template + PROTOBUF_ALWAYS_INLINE void Swap(RepeatedPtrFieldBase* other); void SwapElements(int index1, int index2); @@ -502,7 +670,7 @@ class LIBPROTOBUF_EXPORT RepeatedPtrFieldBase { template typename TypeHandler::Type* AddFromCleared(); - template + template void AddAllocated(typename TypeHandler::Type* value) { typename TypeImplementsMergeBehavior::type t; AddAllocatedInternal(value, t); @@ -533,27 +701,25 @@ class LIBPROTOBUF_EXPORT RepeatedPtrFieldBase { template void AddAllocatedInternal(typename TypeHandler::Type* value, std::false_type); - template GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE - void AddAllocatedSlowWithCopy(typename TypeHandler::Type* value, - Arena* value_arena, - Arena* my_arena); - template GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE - void AddAllocatedSlowWithoutCopy(typename TypeHandler::Type* value); + template + PROTOBUF_NOINLINE void AddAllocatedSlowWithCopy( + typename TypeHandler::Type* value, Arena* value_arena, Arena* my_arena); + template + PROTOBUF_NOINLINE void AddAllocatedSlowWithoutCopy( + typename TypeHandler::Type* value); template typename TypeHandler::Type* ReleaseLastInternal(std::true_type); template typename TypeHandler::Type* ReleaseLastInternal(std::false_type); - template GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE - void SwapFallback(RepeatedPtrFieldBase* other); + template + PROTOBUF_NOINLINE void SwapFallback(RepeatedPtrFieldBase* other); - inline Arena* GetArenaNoVirtual() const { - return arena_; - } + inline Arena* GetArena() const { return arena_; } private: - static const int kInitialSize = 0; + static constexpr int kInitialSize = 0; // A few notes on internal representation: // // We use an indirected approach, with struct Rep, to keep @@ -566,15 +732,16 @@ class LIBPROTOBUF_EXPORT RepeatedPtrFieldBase { // Placing all fields directly in the RepeatedPtrFieldBase instance costs // significant performance for memory-sensitive workloads. Arena* arena_; - int current_size_; - int total_size_; + int current_size_; + int total_size_; struct Rep { - int allocated_size; - void* elements[1]; + int allocated_size; + // Here we declare a huge array as a way of approximating C's "flexible + // array member" feature without relying on undefined behavior. + void* elements[(std::numeric_limits::max() - 2 * sizeof(int)) / + sizeof(void*)]; }; - static const size_t kRepHeaderSize = sizeof(Rep) - sizeof(void*); - // Contains arena ptr and the elements array. We also keep the invariant that - // if rep_ is NULL, then arena is NULL. + static constexpr size_t kRepHeaderSize = offsetof(Rep, elements); Rep* rep_; template @@ -588,13 +755,14 @@ class LIBPROTOBUF_EXPORT RepeatedPtrFieldBase { // Non-templated inner function to avoid code duplication. Takes a function // pointer to the type-specific (templated) inner allocate/merge loop. - void MergeFromInternal( - const RepeatedPtrFieldBase& other, - void (RepeatedPtrFieldBase::*inner_loop)(void**, void**, int, int)); + void MergeFromInternal(const RepeatedPtrFieldBase& other, + void (RepeatedPtrFieldBase::*inner_loop)(void**, + void**, int, + int)); - template - void MergeFromInnerLoop( - void** our_elems, void** other_elems, int length, int already_allocated); + template + void MergeFromInnerLoop(void** our_elems, void** other_elems, int length, + int already_allocated); // Internal helper: extend array space if necessary to contain |extend_amount| // more elements, and return a pointer to the element immediately following @@ -605,7 +773,7 @@ class LIBPROTOBUF_EXPORT RepeatedPtrFieldBase { // The reflection implementation needs to call protected methods directly, // reinterpreting pointers as being to Message instead of a specific Message // subclass. - friend class GeneratedMessageReflection; + friend class ::PROTOBUF_NAMESPACE_ID::Reflection; // ExtensionSet stores repeated message extensions as // RepeatedPtrField, but non-lite ExtensionSets need to implement @@ -622,12 +790,9 @@ class LIBPROTOBUF_EXPORT RepeatedPtrFieldBase { // The table-driven MergePartialFromCodedStream implementation needs to // operate on RepeatedPtrField. friend class MergePartialFromCodedStreamHelper; - - // To parse directly into a proto2 generated class, the upb class GMR_Handlers - // needs to be able to modify a RepeatedPtrFieldBase directly. - friend class upb::google_opensource::GMR_Handlers; - friend class AccessorHelper; + template + friend struct google::protobuf::WeakRepeatedPtrField; GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(RepeatedPtrFieldBase); }; @@ -636,28 +801,30 @@ template class GenericTypeHandler { public: typedef GenericType Type; - typedef GenericType WeakType; - static const bool Moveable = false; + using Movable = IsMovable; static inline GenericType* New(Arena* arena) { - return ::google::protobuf::Arena::CreateMaybeMessage(arena); + return Arena::CreateMaybeMessage(arena); } - static inline GenericType* NewFromPrototype( - const GenericType* prototype, ::google::protobuf::Arena* arena = NULL); + static inline GenericType* New(Arena* arena, GenericType&& value) { + return Arena::Create(arena, std::move(value)); + } + static inline GenericType* NewFromPrototype(const GenericType* prototype, + Arena* arena = NULL); static inline void Delete(GenericType* value, Arena* arena) { if (arena == NULL) { delete value; } } - static inline ::google::protobuf::Arena* GetArena(GenericType* value) { - return ::google::protobuf::Arena::GetArena(value); + static inline Arena* GetArena(GenericType* value) { + return Arena::GetArena(value); } static inline void* GetMaybeArenaPointer(GenericType* value) { - return ::google::protobuf::Arena::GetArena(value); + return Arena::GetArena(value); } static inline void Clear(GenericType* value) { value->Clear(); } - GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE + PROTOBUF_NOINLINE static void Merge(const GenericType& from, GenericType* to); static inline size_t SpaceUsedLong(const GenericType& value) { return value.SpaceUsedLong(); @@ -666,7 +833,7 @@ class GenericTypeHandler { template GenericType* GenericTypeHandler::NewFromPrototype( - const GenericType* /* prototype */, ::google::protobuf::Arena* arena) { + const GenericType* /* prototype */, Arena* arena) { return New(arena); } template @@ -678,15 +845,14 @@ void GenericTypeHandler::Merge(const GenericType& from, // NewFromPrototype() and Merge() are not defined inline here, as we will need // to do a virtual function dispatch anyways to go from Message* to call // New/Merge. -template<> +template <> MessageLite* GenericTypeHandler::NewFromPrototype( - const MessageLite* prototype, google::protobuf::Arena* arena); -template<> -inline google::protobuf::Arena* GenericTypeHandler::GetArena( - MessageLite* value) { + const MessageLite* prototype, Arena* arena); +template <> +inline Arena* GenericTypeHandler::GetArena(MessageLite* value) { return value->GetArena(); } -template<> +template <> inline void* GenericTypeHandler::GetMaybeArenaPointer( MessageLite* value) { return value->GetMaybeArenaPointer(); @@ -694,66 +860,55 @@ inline void* GenericTypeHandler::GetMaybeArenaPointer( template <> void GenericTypeHandler::Merge(const MessageLite& from, MessageLite* to); -template<> -inline void GenericTypeHandler::Clear(string* value) { +template <> +inline void GenericTypeHandler::Clear(std::string* value) { value->clear(); } -template<> -void GenericTypeHandler::Merge(const string& from, - string* to); - -// Declarations of the specialization as we cannot define them here, as the -// header that defines ProtocolMessage depends on types defined in this header. -#define DECLARE_SPECIALIZATIONS_FOR_BASE_PROTO_TYPES(TypeName) \ - template<> \ - TypeName* GenericTypeHandler::NewFromPrototype( \ - const TypeName* prototype, google::protobuf::Arena* arena); \ - template<> \ - google::protobuf::Arena* GenericTypeHandler::GetArena( \ - TypeName* value); \ - template<> \ - void* GenericTypeHandler::GetMaybeArenaPointer( \ - TypeName* value); +template <> +void GenericTypeHandler::Merge(const std::string& from, + std::string* to); // Message specialization bodies defined in message.cc. This split is necessary // to allow proto2-lite (which includes this header) to be independent of // Message. -DECLARE_SPECIALIZATIONS_FOR_BASE_PROTO_TYPES(Message) - - -#undef DECLARE_SPECIALIZATIONS_FOR_BASE_PROTO_TYPES +template <> +PROTOBUF_EXPORT Message* GenericTypeHandler::NewFromPrototype( + const Message* prototype, Arena* arena); +template <> +PROTOBUF_EXPORT Arena* GenericTypeHandler::GetArena(Message* value); +template <> +PROTOBUF_EXPORT void* GenericTypeHandler::GetMaybeArenaPointer( + Message* value); class StringTypeHandler { public: - typedef string Type; - typedef string WeakType; - static const bool Moveable = std::is_move_constructible::value && - std::is_move_assignable::value; + typedef std::string Type; + using Movable = IsMovable; - static inline string* New(Arena* arena) { - return Arena::Create(arena); + static inline std::string* New(Arena* arena) { + return Arena::Create(arena); } - static inline string* New(Arena* arena, string&& value) { - return Arena::Create(arena, std::move(value)); + static inline std::string* New(Arena* arena, std::string&& value) { + return Arena::Create(arena, std::move(value)); } - static inline string* NewFromPrototype(const string*, - ::google::protobuf::Arena* arena) { + static inline std::string* NewFromPrototype(const std::string*, + Arena* arena) { return New(arena); } - static inline ::google::protobuf::Arena* GetArena(string*) { - return NULL; - } - static inline void* GetMaybeArenaPointer(string* /* value */) { + static inline Arena* GetArena(std::string*) { return NULL; } + static inline void* GetMaybeArenaPointer(std::string* /* value */) { return NULL; } - static inline void Delete(string* value, Arena* arena) { + static inline void Delete(std::string* value, Arena* arena) { if (arena == NULL) { delete value; } } - static inline void Clear(string* value) { value->clear(); } - static inline void Merge(const string& from, string* to) { *to = from; } - static size_t SpaceUsedLong(const string& value) { + static inline void Clear(std::string* value) { value->clear(); } + static inline void Merge(const std::string& from, std::string* to) { + *to = from; + } + static size_t SpaceUsedLong(const std::string& value) { return sizeof(value) + StringSpaceUsedExcludingSelfLong(value); } }; @@ -765,8 +920,8 @@ class StringTypeHandler { template class RepeatedPtrField final : private internal::RepeatedPtrFieldBase { public: - RepeatedPtrField(); - explicit RepeatedPtrField(::google::protobuf::Arena* arena); + constexpr RepeatedPtrField(); + explicit RepeatedPtrField(Arena* arena); RepeatedPtrField(const RepeatedPtrField& other); template @@ -789,6 +944,9 @@ class RepeatedPtrField final : private internal::RepeatedPtrFieldBase { const Element& operator[](int index) const { return Get(index); } Element& operator[](int index) { return *Mutable(index); } + const Element& at(int index) const; + Element& at(int index); + // Remove the last element in the array. // Ownership of the element is retained by the array. void RemoveLast(); @@ -848,15 +1006,11 @@ class RepeatedPtrField final : private internal::RepeatedPtrFieldBase { // Reverse iterator support typedef std::reverse_iterator const_reverse_iterator; typedef std::reverse_iterator reverse_iterator; - reverse_iterator rbegin() { - return reverse_iterator(end()); - } + reverse_iterator rbegin() { return reverse_iterator(end()); } const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); } - reverse_iterator rend() { - return reverse_iterator(begin()); - } + reverse_iterator rend() { return reverse_iterator(begin()); } const_reverse_iterator rend() const { return const_reverse_iterator(begin()); } @@ -992,22 +1146,19 @@ class RepeatedPtrField final : private internal::RepeatedPtrFieldBase { iterator erase(const_iterator first, const_iterator last); // Gets the arena on which this RepeatedPtrField stores its elements. - ::google::protobuf::Arena* GetArena() const { - return GetArenaNoVirtual(); - } + inline Arena* GetArena() const; // For internal use only. // // This is public due to it being called by generated code. - using RepeatedPtrFieldBase::InternalSwap; + void InternalSwap(RepeatedPtrField* other) { + internal::RepeatedPtrFieldBase::InternalSwap(other); + } private: // Note: RepeatedPtrField SHOULD NOT be subclassed by users. class TypeHandler; - // Internal arena accessor expected by helpers in Arena. - inline Arena* GetArenaNoVirtual() const; - // Implementations for ExtractSubrange(). The copying behavior must be // included only if the type supports the necessary operations (e.g., // MergeFrom()), so we must resolve this at compile time. ExtractSubrange() @@ -1018,7 +1169,9 @@ class RepeatedPtrField final : private internal::RepeatedPtrFieldBase { std::false_type); friend class Arena; - friend class MessageLite; + + template + friend struct WeakRepeatedPtrField; typedef void InternalArenaConstructable_; @@ -1027,24 +1180,16 @@ class RepeatedPtrField final : private internal::RepeatedPtrFieldBase { // implementation ==================================================== template -inline RepeatedField::RepeatedField() - : current_size_(0), - total_size_(0), - ptr_(NULL) { -} +constexpr RepeatedField::RepeatedField() + : current_size_(0), total_size_(0), arena_or_elements_(nullptr) {} template inline RepeatedField::RepeatedField(Arena* arena) - : current_size_(0), - total_size_(0), - ptr_(arena) { -} + : current_size_(0), total_size_(0), arena_or_elements_(arena) {} template inline RepeatedField::RepeatedField(const RepeatedField& other) - : current_size_(0), - total_size_(0), - ptr_(NULL) { + : current_size_(0), total_size_(0), arena_or_elements_(nullptr) { if (other.current_size_ != 0) { Reserve(other.size()); AddNAlreadyReserved(other.size()); @@ -1055,20 +1200,8 @@ inline RepeatedField::RepeatedField(const RepeatedField& other) template template RepeatedField::RepeatedField(Iter begin, const Iter& end) - : current_size_(0), - total_size_(0), - ptr_(NULL) { - int reserve = internal::CalculateReserve(begin, end); - if (reserve != -1) { - Reserve(reserve); - for (; begin != end; ++begin) { - AddAlreadyReserved(*begin); - } - } else { - for (; begin != end; ++begin) { - Add(*begin); - } - } + : current_size_(0), total_size_(0), arena_or_elements_(nullptr) { + Add(begin, end); } template @@ -1079,10 +1212,9 @@ RepeatedField::~RepeatedField() { } template -inline RepeatedField& -RepeatedField::operator=(const RepeatedField& other) { - if (this != &other) - CopyFrom(other); +inline RepeatedField& RepeatedField::operator=( + const RepeatedField& other) { + if (this != &other) CopyFrom(other); return *this; } @@ -1090,8 +1222,9 @@ template inline RepeatedField::RepeatedField(RepeatedField&& other) noexcept : RepeatedField() { // We don't just call Swap(&other) here because it would perform 3 copies if - // the two fields are on different arenas. - if (other.GetArenaNoVirtual()) { + // other is on an arena. This field can't be on an arena because arena + // construction always uses the Arena* accepting constructor. + if (other.GetArena()) { CopyFrom(other); } else { InternalSwap(&other); @@ -1104,7 +1237,7 @@ inline RepeatedField& RepeatedField::operator=( // We don't just call Swap(&other) here because it would perform 3 copies if // the two fields are on different arenas. if (this != &other) { - if (this->GetArenaNoVirtual() != other.GetArenaNoVirtual()) { + if (this->GetArena() != other.GetArena()) { CopyFrom(other); } else { InternalSwap(&other); @@ -1128,35 +1261,37 @@ inline int RepeatedField::Capacity() const { return total_size_; } -template +template inline void RepeatedField::AddAlreadyReserved(const Element& value) { GOOGLE_DCHECK_LT(current_size_, total_size_); - rep()->elements[current_size_++] = value; + elements()[current_size_++] = value; } -template +template inline Element* RepeatedField::AddAlreadyReserved() { GOOGLE_DCHECK_LT(current_size_, total_size_); - return &rep()->elements[current_size_++]; + return &elements()[current_size_++]; } -template -inline Element* RepeatedField::AddNAlreadyReserved(int elements) { - GOOGLE_DCHECK_LE(current_size_ + elements, total_size_); - // Warning: total_size_ can be NULL if elements == 0 && current_size_ == 0. - // Existing callers depend on this behavior. :( - Element* ret = &ptr_.rep->elements[current_size_]; - current_size_ += elements; +template +inline Element* RepeatedField::AddNAlreadyReserved(int n) { + GOOGLE_DCHECK_GE(total_size_ - current_size_, n) + << total_size_ << ", " << current_size_; + // Warning: sometimes people call this when n == 0 and total_size_ == 0. In + // this case the return pointer points to a zero size array (n == 0). Hence + // we can just use unsafe_elements(), because the user cannot dereference the + // pointer anyway. + Element* ret = unsafe_elements() + current_size_; + current_size_ += n; return ret; } -template +template inline void RepeatedField::Resize(int new_size, const Element& value) { GOOGLE_DCHECK_GE(new_size, 0); if (new_size > current_size_) { Reserve(new_size); - std::fill(&rep()->elements[current_size_], - &rep()->elements[new_size], value); + std::fill(&elements()[current_size_], &elements()[new_size], value); } current_size_ = new_size; } @@ -1165,33 +1300,83 @@ template inline const Element& RepeatedField::Get(int index) const { GOOGLE_DCHECK_GE(index, 0); GOOGLE_DCHECK_LT(index, current_size_); - return rep()->elements[index]; + return elements()[index]; +} + +template +inline const Element& RepeatedField::at(int index) const { + GOOGLE_CHECK_GE(index, 0); + GOOGLE_CHECK_LT(index, current_size_); + return elements()[index]; +} + +template +inline Element& RepeatedField::at(int index) { + GOOGLE_CHECK_GE(index, 0); + GOOGLE_CHECK_LT(index, current_size_); + return elements()[index]; } template inline Element* RepeatedField::Mutable(int index) { GOOGLE_DCHECK_GE(index, 0); GOOGLE_DCHECK_LT(index, current_size_); - return &rep()->elements[index]; + return &elements()[index]; } template inline void RepeatedField::Set(int index, const Element& value) { GOOGLE_DCHECK_GE(index, 0); GOOGLE_DCHECK_LT(index, current_size_); - rep()->elements[index] = value; + elements()[index] = value; } template inline void RepeatedField::Add(const Element& value) { - if (current_size_ == total_size_) Reserve(total_size_ + 1); - rep()->elements[current_size_++] = value; + uint32 size = current_size_; + if (static_cast(size) == total_size_) { + // value could reference an element of the array. Reserving new space will + // invalidate the reference. So we must make a copy first. + auto tmp = value; + Reserve(total_size_ + 1); + elements()[size] = std::move(tmp); + } else { + elements()[size] = value; + } + current_size_ = size + 1; } template inline Element* RepeatedField::Add() { - if (current_size_ == total_size_) Reserve(total_size_ + 1); - return &rep()->elements[current_size_++]; + uint32 size = current_size_; + if (static_cast(size) == total_size_) Reserve(total_size_ + 1); + auto ptr = &elements()[size]; + current_size_ = size + 1; + return ptr; +} + +template +template +inline void RepeatedField::Add(Iter begin, Iter end) { + int reserve = internal::CalculateReserve(begin, end); + if (reserve != -1) { + if (reserve == 0) { + return; + } + + Reserve(reserve + size()); + // TODO(ckennelly): The compiler loses track of the buffer freshly + // allocated by Reserve() by the time we call elements, so it cannot + // guarantee that elements does not alias [begin(), end()). + // + // If restrict is available, annotating the pointer obtained from elements() + // causes this to lower to memcpy instead of memmove. + std::copy(begin, end, elements() + size()); + current_size_ = reserve + size(); + } else { + FastAdder fast_adder(this); + for (; begin != end; ++begin) fast_adder.Add(*begin); + } } template @@ -1201,16 +1386,15 @@ inline void RepeatedField::RemoveLast() { } template -void RepeatedField::ExtractSubrange( - int start, int num, Element* elements) { +void RepeatedField::ExtractSubrange(int start, int num, + Element* elements) { GOOGLE_DCHECK_GE(start, 0); GOOGLE_DCHECK_GE(num, 0); GOOGLE_DCHECK_LE(start + num, this->current_size_); // Save the values of the removed elements if requested. if (elements != NULL) { - for (int i = 0; i < num; ++i) - elements[i] = this->Get(i + start); + for (int i = 0; i < num; ++i) elements[i] = this->Get(i + start); } // Slide remaining elements down to fill the gap. @@ -1262,36 +1446,36 @@ inline typename RepeatedField::iterator RepeatedField::erase( template inline Element* RepeatedField::mutable_data() { - return total_size_ > 0 ? rep()->elements : NULL; + return unsafe_elements(); } template inline const Element* RepeatedField::data() const { - return total_size_ > 0 ? rep()->elements : NULL; -} - -template -inline const Element* RepeatedField::unsafe_data() const { - return rep()->elements; + return unsafe_elements(); } template inline void RepeatedField::InternalSwap(RepeatedField* other) { GOOGLE_DCHECK(this != other); - GOOGLE_DCHECK(GetArenaNoVirtual() == other->GetArenaNoVirtual()); + GOOGLE_DCHECK(GetArena() == other->GetArena()); - std::swap(ptr_, other->ptr_); - std::swap(current_size_, other->current_size_); - std::swap(total_size_, other->total_size_); + // Swap all fields at once. + static_assert(std::is_standard_layout>::value, + "offsetof() requires standard layout before c++17"); + internal::memswaparena_or_elements_) - + offsetof(RepeatedField, current_size_)>( + reinterpret_cast(this) + offsetof(RepeatedField, current_size_), + reinterpret_cast(other) + offsetof(RepeatedField, current_size_)); } template void RepeatedField::Swap(RepeatedField* other) { if (this == other) return; - if (GetArenaNoVirtual() == other->GetArenaNoVirtual()) { + if (GetArena() == other->GetArena()) { InternalSwap(other); } else { - RepeatedField temp(other->GetArenaNoVirtual()); + RepeatedField temp(other->GetArena()); temp.MergeFrom(*this); CopyFrom(*other); other->UnsafeArenaSwap(&temp); @@ -1307,38 +1491,37 @@ void RepeatedField::UnsafeArenaSwap(RepeatedField* other) { template void RepeatedField::SwapElements(int index1, int index2) { using std::swap; // enable ADL with fallback - swap(rep()->elements[index1], rep()->elements[index2]); + swap(elements()[index1], elements()[index2]); } template inline typename RepeatedField::iterator RepeatedField::begin() { - return total_size_ > 0 ? rep()->elements : NULL; + return unsafe_elements(); } template inline typename RepeatedField::const_iterator RepeatedField::begin() const { - return total_size_ > 0 ? rep()->elements : NULL; + return unsafe_elements(); } template inline typename RepeatedField::const_iterator RepeatedField::cbegin() const { - return total_size_ > 0 ? rep()->elements : NULL; + return unsafe_elements(); } template -inline typename RepeatedField::iterator -RepeatedField::end() { - return total_size_ > 0 ? rep()->elements + current_size_ : NULL; +inline typename RepeatedField::iterator RepeatedField::end() { + return unsafe_elements() + current_size_; } template inline typename RepeatedField::const_iterator RepeatedField::end() const { - return total_size_ > 0 ? rep()->elements + current_size_ : NULL; + return unsafe_elements() + current_size_; } template inline typename RepeatedField::const_iterator RepeatedField::cend() const { - return total_size_ > 0 ? rep()->elements + current_size_ : NULL; + return unsafe_elements() + current_size_; } template @@ -1346,29 +1529,58 @@ inline size_t RepeatedField::SpaceUsedExcludingSelfLong() const { return total_size_ > 0 ? (total_size_ * sizeof(Element) + kRepHeaderSize) : 0; } +namespace internal { +// Returns the new size for a reserved field based on its 'total_size' and the +// requested 'new_size'. The result is clamped to the closed interval: +// [internal::kMinRepeatedFieldAllocationSize, +// std::numeric_limits::max()] +// Requires: +// new_size > total_size && +// (total_size == 0 || +// total_size >= kRepeatedFieldLowerClampLimit) +inline int CalculateReserveSize(int total_size, int new_size) { + if (new_size < kRepeatedFieldLowerClampLimit) { + // Clamp to smallest allowed size. + return kRepeatedFieldLowerClampLimit; + } + if (total_size < kRepeatedFieldUpperClampLimit) { + return std::max(total_size * 2, new_size); + } else { + // Clamp to largest allowed size. + GOOGLE_DCHECK_GT(new_size, kRepeatedFieldUpperClampLimit); + return std::numeric_limits::max(); + } +} +} // namespace internal + // Avoid inlining of Reserve(): new, copy, and delete[] lead to a significant // amount of code bloat. template void RepeatedField::Reserve(int new_size) { if (total_size_ >= new_size) return; Rep* old_rep = total_size_ > 0 ? rep() : NULL; - Arena* arena = GetArenaNoVirtual(); - new_size = std::max(google::protobuf::internal::kMinRepeatedFieldAllocationSize, - std::max(total_size_ * 2, new_size)); + Rep* new_rep; + Arena* arena = GetArena(); + new_size = internal::CalculateReserveSize(total_size_, new_size); GOOGLE_DCHECK_LE( static_cast(new_size), (std::numeric_limits::max() - kRepHeaderSize) / sizeof(Element)) << "Requested size is too large to fit into size_t."; - size_t bytes = kRepHeaderSize + sizeof(Element) * static_cast(new_size); + size_t bytes = + kRepHeaderSize + sizeof(Element) * static_cast(new_size); if (arena == NULL) { - ptr_.rep = static_cast(::operator new(bytes)); + new_rep = static_cast(::operator new(bytes)); } else { - ptr_.rep = reinterpret_cast( - ::google::protobuf::Arena::CreateArray(arena, bytes)); + new_rep = reinterpret_cast(Arena::CreateArray(arena, bytes)); } - ptr_.rep->arena = arena; + new_rep->arena = arena; int old_total_size = total_size_; + // Already known: new_size >= internal::kMinRepeatedFieldAllocationSize + // Maintain invariant: + // total_size_ == 0 || + // total_size_ >= internal::kMinRepeatedFieldAllocationSize total_size_ = new_size; + arena_or_elements_ = new_rep->elements; // Invoke placement-new on newly allocated elements. We shouldn't have to do // this, since Element is supposed to be POD, but a previous version of this // code allocated storage with "new Element[size]" and some code uses @@ -1378,13 +1590,13 @@ void RepeatedField::Reserve(int new_size) { // effect unless its side-effects are required for correctness. // Note that we do this before MoveArray() below because Element's copy // assignment implementation will want an initialized instance first. - Element* e = &rep()->elements[0]; + Element* e = &elements()[0]; Element* limit = e + total_size_; for (; e < limit; e++) { new (e) Element; } if (current_size_ > 0) { - MoveArray(&rep()->elements[0], old_rep->elements, current_size_); + MoveArray(&elements()[0], old_rep->elements, current_size_); } // Likewise, we need to invoke destructors on the old array. @@ -1401,22 +1613,23 @@ inline void RepeatedField::Truncate(int new_size) { } template -inline void RepeatedField::MoveArray( - Element* to, Element* from, int array_size) { +inline void RepeatedField::MoveArray(Element* to, Element* from, + int array_size) { CopyArray(to, from, array_size); } template -inline void RepeatedField::CopyArray( - Element* to, const Element* from, int array_size) { +inline void RepeatedField::CopyArray(Element* to, const Element* from, + int array_size) { internal::ElementCopier()(to, from, array_size); } namespace internal { template -void ElementCopier::operator()( - Element* to, const Element* from, int array_size) { +void ElementCopier::operator()(Element* to, + const Element* from, + int array_size) { std::copy(from, from + array_size, to); } @@ -1434,19 +1647,11 @@ struct ElementCopier { namespace internal { -inline RepeatedPtrFieldBase::RepeatedPtrFieldBase() - : arena_(NULL), - current_size_(0), - total_size_(0), - rep_(NULL) { -} +constexpr RepeatedPtrFieldBase::RepeatedPtrFieldBase() + : arena_(NULL), current_size_(0), total_size_(0), rep_(NULL) {} -inline RepeatedPtrFieldBase::RepeatedPtrFieldBase(::google::protobuf::Arena* arena) - : arena_(arena), - current_size_(0), - total_size_(0), - rep_(NULL) { -} +inline RepeatedPtrFieldBase::RepeatedPtrFieldBase(Arena* arena) + : arena_(arena), current_size_(0), total_size_(0), rep_(NULL) {} template void RepeatedPtrFieldBase::Destroy() { @@ -1468,7 +1673,7 @@ void RepeatedPtrFieldBase::Destroy() { template inline void RepeatedPtrFieldBase::Swap(RepeatedPtrFieldBase* other) { - if (other->GetArenaNoVirtual() == GetArenaNoVirtual()) { + if (other->GetArena() == GetArena()) { InternalSwap(other); } else { SwapFallback(other); @@ -1477,39 +1682,48 @@ inline void RepeatedPtrFieldBase::Swap(RepeatedPtrFieldBase* other) { template void RepeatedPtrFieldBase::SwapFallback(RepeatedPtrFieldBase* other) { - GOOGLE_DCHECK(other->GetArenaNoVirtual() != GetArenaNoVirtual()); + GOOGLE_DCHECK(other->GetArena() != GetArena()); // Copy semantics in this case. We try to improve efficiency by placing the - // temporary on |other|'s arena so that messages are copied cross-arena only - // once, not twice. - RepeatedPtrFieldBase temp(other->GetArenaNoVirtual()); + // temporary on |other|'s arena so that messages are copied twice rather than + // three times. + RepeatedPtrFieldBase temp(other->GetArena()); temp.MergeFrom(*this); this->Clear(); this->MergeFrom(*other); - other->Clear(); other->InternalSwap(&temp); temp.Destroy(); // Frees rep_ if `other` had no arena. } -inline bool RepeatedPtrFieldBase::empty() const { - return current_size_ == 0; -} +inline bool RepeatedPtrFieldBase::empty() const { return current_size_ == 0; } -inline int RepeatedPtrFieldBase::size() const { - return current_size_; -} +inline int RepeatedPtrFieldBase::size() const { return current_size_; } template -inline const typename TypeHandler::WeakType& -RepeatedPtrFieldBase::Get(int index) const { +inline const typename TypeHandler::Type& RepeatedPtrFieldBase::Get( + int index) const { GOOGLE_DCHECK_GE(index, 0); GOOGLE_DCHECK_LT(index, current_size_); return *cast(rep_->elements[index]); } template -inline typename TypeHandler::Type* -RepeatedPtrFieldBase::Mutable(int index) { +inline const typename TypeHandler::Type& RepeatedPtrFieldBase::at( + int index) const { + GOOGLE_CHECK_GE(index, 0); + GOOGLE_CHECK_LT(index, current_size_); + return *cast(rep_->elements[index]); +} + +template +inline typename TypeHandler::Type& RepeatedPtrFieldBase::at(int index) { + GOOGLE_CHECK_GE(index, 0); + GOOGLE_CHECK_LT(index, current_size_); + return *cast(rep_->elements[index]); +} + +template +inline typename TypeHandler::Type* RepeatedPtrFieldBase::Mutable(int index) { GOOGLE_DCHECK_GE(index, 0); GOOGLE_DCHECK_LT(index, current_size_); return cast(rep_->elements[index]); @@ -1538,10 +1752,9 @@ inline typename TypeHandler::Type* RepeatedPtrFieldBase::Add( return result; } -template -inline void RepeatedPtrFieldBase::Add( - typename TypeHandler::Type&& value, - std::enable_if*) { +template ::type*> +inline void RepeatedPtrFieldBase::Add(typename TypeHandler::Type&& value) { if (rep_ != NULL && current_size_ < rep_->allocated_size) { *cast(rep_->elements[current_size_++]) = std::move(value); return; @@ -1585,8 +1798,8 @@ template inline void RepeatedPtrFieldBase::MergeFrom(const RepeatedPtrFieldBase& other) { GOOGLE_DCHECK_NE(&other, this); if (other.current_size_ == 0) return; - MergeFromInternal( - other, &RepeatedPtrFieldBase::MergeFromInnerLoop); + MergeFromInternal(other, + &RepeatedPtrFieldBase::MergeFromInnerLoop); } inline void RepeatedPtrFieldBase::MergeFromInternal( @@ -1597,8 +1810,8 @@ inline void RepeatedPtrFieldBase::MergeFromInternal( void** other_elements = other.rep_->elements; void** new_elements = InternalExtend(other_size); int allocated_elems = rep_->allocated_size - current_size_; - (this->*inner_loop)(new_elements, other_elements, - other_size, allocated_elems); + (this->*inner_loop)(new_elements, other_elements, other_size, + allocated_elems); current_size_ += other_size; if (rep_->allocated_size < current_size_) { rep_->allocated_size = current_size_; @@ -1606,25 +1819,26 @@ inline void RepeatedPtrFieldBase::MergeFromInternal( } // Merges other_elems to our_elems. -template -void RepeatedPtrFieldBase::MergeFromInnerLoop( - void** our_elems, void** other_elems, int length, int already_allocated) { +template +void RepeatedPtrFieldBase::MergeFromInnerLoop(void** our_elems, + void** other_elems, int length, + int already_allocated) { // Split into two loops, over ranges [0, allocated) and [allocated, length), // to avoid a branch within the loop. for (int i = 0; i < already_allocated && i < length; i++) { // Already allocated: use existing element. - typename TypeHandler::WeakType* other_elem = - reinterpret_cast(other_elems[i]); - typename TypeHandler::WeakType* new_elem = - reinterpret_cast(our_elems[i]); + typename TypeHandler::Type* other_elem = + reinterpret_cast(other_elems[i]); + typename TypeHandler::Type* new_elem = + reinterpret_cast(our_elems[i]); TypeHandler::Merge(*other_elem, new_elem); } - Arena* arena = GetArenaNoVirtual(); + Arena* arena = GetArena(); for (int i = already_allocated; i < length; i++) { // Not allocated: alloc a new element first, then merge it. - typename TypeHandler::WeakType* other_elem = - reinterpret_cast(other_elems[i]); - typename TypeHandler::WeakType* new_elem = + typename TypeHandler::Type* other_elem = + reinterpret_cast(other_elems[i]); + typename TypeHandler::Type* new_elem = TypeHandler::NewFromPrototype(other_elem, arena); TypeHandler::Merge(*other_elem, new_elem); our_elems[i] = new_elem; @@ -1638,9 +1852,7 @@ inline void RepeatedPtrFieldBase::CopyFrom(const RepeatedPtrFieldBase& other) { RepeatedPtrFieldBase::MergeFrom(other); } -inline int RepeatedPtrFieldBase::Capacity() const { - return total_size_; -} +inline int RepeatedPtrFieldBase::Capacity() const { return total_size_; } inline void* const* RepeatedPtrFieldBase::raw_data() const { return rep_ ? rep_->elements : NULL; @@ -1658,8 +1870,8 @@ inline typename TypeHandler::Type** RepeatedPtrFieldBase::mutable_data() { } template -inline const typename TypeHandler::Type* const* -RepeatedPtrFieldBase::data() const { +inline const typename TypeHandler::Type* const* RepeatedPtrFieldBase::data() + const { // TODO(kenton): Breaks C++ aliasing rules. We should probably remove this // method entirely. return reinterpret_cast(raw_data()); @@ -1675,8 +1887,8 @@ inline size_t RepeatedPtrFieldBase::SpaceUsedExcludingSelfLong() const { size_t allocated_bytes = static_cast(total_size_) * sizeof(void*); if (rep_ != NULL) { for (int i = 0; i < rep_->allocated_size; ++i) { - allocated_bytes += TypeHandler::SpaceUsedLong( - *cast(rep_->elements[i])); + allocated_bytes += + TypeHandler::SpaceUsedLong(*cast(rep_->elements[i])); } allocated_bytes += kRepHeaderSize; } @@ -1695,13 +1907,11 @@ inline typename TypeHandler::Type* RepeatedPtrFieldBase::AddFromCleared() { // AddAllocated version that implements arena-safe copying behavior. template void RepeatedPtrFieldBase::AddAllocatedInternal( - typename TypeHandler::Type* value, - std::true_type) { - Arena* element_arena = reinterpret_cast( - TypeHandler::GetMaybeArenaPointer(value)); - Arena* arena = GetArenaNoVirtual(); - if (arena == element_arena && rep_ && - rep_->allocated_size < total_size_) { + typename TypeHandler::Type* value, std::true_type) { + Arena* element_arena = + reinterpret_cast(TypeHandler::GetMaybeArenaPointer(value)); + Arena* arena = GetArena(); + if (arena == element_arena && rep_ && rep_->allocated_size < total_size_) { // Fast path: underlying arena representation (tagged pointer) is equal to // our arena pointer, and we can add to array without resizing it (at least // one slot that is not allocated). @@ -1715,13 +1925,13 @@ void RepeatedPtrFieldBase::AddAllocatedInternal( current_size_ = current_size_ + 1; rep_->allocated_size = rep_->allocated_size + 1; } else { - AddAllocatedSlowWithCopy( - value, TypeHandler::GetArena(value), arena); + AddAllocatedSlowWithCopy(value, TypeHandler::GetArena(value), + arena); } } // Slowpath handles all cases, copying if necessary. -template +template void RepeatedPtrFieldBase::AddAllocatedSlowWithCopy( // Pass value_arena and my_arena to avoid duplicate virtual call (value) or // load (mine). @@ -1745,9 +1955,8 @@ void RepeatedPtrFieldBase::AddAllocatedSlowWithCopy( // AddAllocated version that does not implement arena-safe copying behavior. template void RepeatedPtrFieldBase::AddAllocatedInternal( - typename TypeHandler::Type* value, - std::false_type) { - if (rep_ && rep_->allocated_size < total_size_) { + typename TypeHandler::Type* value, std::false_type) { + if (rep_ && rep_->allocated_size < total_size_) { // Fast path: underlying arena representation (tagged pointer) is equal to // our arena pointer, and we can add to array without resizing it (at least // one slot that is not allocated). @@ -1778,8 +1987,8 @@ void RepeatedPtrFieldBase::UnsafeArenaAddAllocated( // cleared objects awaiting reuse. We don't want to grow the array in this // case because otherwise a loop calling AddAllocated() followed by Clear() // would leak memory. - TypeHandler::Delete( - cast(rep_->elements[current_size_]), arena_); + TypeHandler::Delete(cast(rep_->elements[current_size_]), + arena_); } else if (current_size_ < rep_->allocated_size) { // We have some cleared objects. We don't care about their order, so we // can just move the first one to the end to make space. @@ -1795,12 +2004,12 @@ void RepeatedPtrFieldBase::UnsafeArenaAddAllocated( // ReleaseLast() for types that implement merge/copy behavior. template -inline typename TypeHandler::Type* -RepeatedPtrFieldBase::ReleaseLastInternal(std::true_type) { +inline typename TypeHandler::Type* RepeatedPtrFieldBase::ReleaseLastInternal( + std::true_type) { // First, release an element. typename TypeHandler::Type* result = UnsafeArenaReleaseLast(); // Now perform a copy if we're on an arena. - Arena* arena = GetArenaNoVirtual(); + Arena* arena = GetArena(); if (arena == NULL) { return result; } else { @@ -1816,9 +2025,9 @@ RepeatedPtrFieldBase::ReleaseLastInternal(std::true_type) { // an arena, since the user really should implement the copy operation in this // case. template -inline typename TypeHandler::Type* -RepeatedPtrFieldBase::ReleaseLastInternal(std::false_type) { - GOOGLE_DCHECK(GetArenaNoVirtual() == NULL) +inline typename TypeHandler::Type* RepeatedPtrFieldBase::ReleaseLastInternal( + std::false_type) { + GOOGLE_DCHECK(GetArena() == NULL) << "ReleaseLast() called on a RepeatedPtrField that is on an arena, " << "with a type that does not implement MergeFrom. This is unsafe; " << "please implement MergeFrom for your type."; @@ -1827,7 +2036,7 @@ RepeatedPtrFieldBase::ReleaseLastInternal(std::false_type) { template inline typename TypeHandler::Type* - RepeatedPtrFieldBase::UnsafeArenaReleaseLast() { +RepeatedPtrFieldBase::UnsafeArenaReleaseLast() { GOOGLE_DCHECK_GT(current_size_, 0); typename TypeHandler::Type* result = cast(rep_->elements[--current_size_]); @@ -1847,7 +2056,7 @@ inline int RepeatedPtrFieldBase::ClearedCount() const { template inline void RepeatedPtrFieldBase::AddCleared( typename TypeHandler::Type* value) { - GOOGLE_DCHECK(GetArenaNoVirtual() == NULL) + GOOGLE_DCHECK(GetArena() == NULL) << "AddCleared() can only be used on a RepeatedPtrField not on an arena."; GOOGLE_DCHECK(TypeHandler::GetArena(value) == NULL) << "AddCleared() can only accept values not on an arena."; @@ -1859,10 +2068,10 @@ inline void RepeatedPtrFieldBase::AddCleared( template inline typename TypeHandler::Type* RepeatedPtrFieldBase::ReleaseCleared() { - GOOGLE_DCHECK(GetArenaNoVirtual() == NULL) + GOOGLE_DCHECK(GetArena() == NULL) << "ReleaseCleared() can only be used on a RepeatedPtrField not on " << "an arena."; - GOOGLE_DCHECK(GetArenaNoVirtual() == NULL); + GOOGLE_DCHECK(GetArena() == NULL); GOOGLE_DCHECK(rep_ != NULL); GOOGLE_DCHECK_GT(rep_->allocated_size, current_size_); return cast(rep_->elements[--rep_->allocated_size]); @@ -1874,33 +2083,31 @@ inline typename TypeHandler::Type* RepeatedPtrFieldBase::ReleaseCleared() { template class RepeatedPtrField::TypeHandler - : public internal::GenericTypeHandler { -}; + : public internal::GenericTypeHandler {}; template <> -class RepeatedPtrField::TypeHandler - : public internal::StringTypeHandler { -}; +class RepeatedPtrField::TypeHandler + : public internal::StringTypeHandler {}; template -inline RepeatedPtrField::RepeatedPtrField() - : RepeatedPtrFieldBase() {} +constexpr RepeatedPtrField::RepeatedPtrField() + : RepeatedPtrFieldBase() {} template -inline RepeatedPtrField::RepeatedPtrField(::google::protobuf::Arena* arena) : - RepeatedPtrFieldBase(arena) {} +inline RepeatedPtrField::RepeatedPtrField(Arena* arena) + : RepeatedPtrFieldBase(arena) {} template inline RepeatedPtrField::RepeatedPtrField( const RepeatedPtrField& other) - : RepeatedPtrFieldBase() { + : RepeatedPtrFieldBase() { MergeFrom(other); } template template -inline RepeatedPtrField::RepeatedPtrField( - Iter begin, const Iter& end) { +inline RepeatedPtrField::RepeatedPtrField(Iter begin, + const Iter& end) { int reserve = internal::CalculateReserve(begin, end); if (reserve != -1) { Reserve(reserve); @@ -1918,8 +2125,7 @@ RepeatedPtrField::~RepeatedPtrField() { template inline RepeatedPtrField& RepeatedPtrField::operator=( const RepeatedPtrField& other) { - if (this != &other) - CopyFrom(other); + if (this != &other) CopyFrom(other); return *this; } @@ -1928,8 +2134,9 @@ inline RepeatedPtrField::RepeatedPtrField( RepeatedPtrField&& other) noexcept : RepeatedPtrField() { // We don't just call Swap(&other) here because it would perform 3 copies if - // the two fields are on different arenas. - if (other.GetArenaNoVirtual()) { + // other is on an arena. This field can't be on an arena because arena + // construction always uses the Arena* accepting constructor. + if (other.GetArena()) { CopyFrom(other); } else { InternalSwap(&other); @@ -1942,7 +2149,7 @@ inline RepeatedPtrField& RepeatedPtrField::operator=( // We don't just call Swap(&other) here because it would perform 3 copies if // the two fields are on different arenas. if (this != &other) { - if (this->GetArenaNoVirtual() != other.GetArenaNoVirtual()) { + if (this->GetArena() != other.GetArena()) { CopyFrom(other); } else { InternalSwap(&other); @@ -1966,6 +2173,16 @@ inline const Element& RepeatedPtrField::Get(int index) const { return RepeatedPtrFieldBase::Get(index); } +template +inline const Element& RepeatedPtrField::at(int index) const { + return RepeatedPtrFieldBase::at(index); +} + +template +inline Element& RepeatedPtrField::at(int index) { + return RepeatedPtrFieldBase::at(index); +} + template inline Element* RepeatedPtrField::Mutable(int index) { @@ -1999,8 +2216,8 @@ inline void RepeatedPtrField::DeleteSubrange(int start, int num) { } template -inline void RepeatedPtrField::ExtractSubrange( - int start, int num, Element** elements) { +inline void RepeatedPtrField::ExtractSubrange(int start, int num, + Element** elements) { typename internal::TypeImplementsMergeBehavior< typename TypeHandler::Type>::type t; ExtractSubrangeInternal(start, num, elements, t); @@ -2018,12 +2235,12 @@ inline void RepeatedPtrField::ExtractSubrangeInternal( if (num > 0) { // Save the values of the removed elements if requested. if (elements != NULL) { - if (GetArenaNoVirtual() != NULL) { + if (GetArena() != NULL) { // If we're on an arena, we perform a copy for each element so that the // returned elements are heap-allocated. for (int i = 0; i < num; ++i) { - Element* element = RepeatedPtrFieldBase:: - Mutable(i + start); + Element* element = + RepeatedPtrFieldBase::Mutable(i + start); typename TypeHandler::Type* new_value = TypeHandler::NewFromPrototype(element, NULL); TypeHandler::Merge(*element, new_value); @@ -2041,14 +2258,14 @@ inline void RepeatedPtrField::ExtractSubrangeInternal( // ExtractSubrange() implementation for types that do not implement merge/copy // behavior. -template +template inline void RepeatedPtrField::ExtractSubrangeInternal( int start, int num, Element** elements, std::false_type) { // This case is identical to UnsafeArenaExtractSubrange(). However, since // ExtractSubrange() must return heap-allocated objects by contract, and we // cannot fulfill this contract if we are an on arena, we must GOOGLE_DCHECK() that // we are not on an arena. - GOOGLE_DCHECK(GetArenaNoVirtual() == NULL) + GOOGLE_DCHECK(GetArena() == NULL) << "ExtractSubrange() when arena is non-NULL is only supported when " << "the Element type supplies a MergeFrom() operation to make copies."; UnsafeArenaExtractSubrange(start, num, elements); @@ -2084,8 +2301,7 @@ inline void RepeatedPtrField::MergeFrom( } template -inline void RepeatedPtrField::CopyFrom( - const RepeatedPtrField& other) { +inline void RepeatedPtrField::CopyFrom(const RepeatedPtrField& other) { RepeatedPtrFieldBase::CopyFrom(other); } @@ -2116,16 +2332,14 @@ inline const Element* const* RepeatedPtrField::data() const { template inline void RepeatedPtrField::Swap(RepeatedPtrField* other) { - if (this == other) - return; + if (this == other) return; RepeatedPtrFieldBase::Swap(other); } template inline void RepeatedPtrField::UnsafeArenaSwap( RepeatedPtrField* other) { - if (this == other) - return; + if (this == other) return; RepeatedPtrFieldBase::InternalSwap(other); } @@ -2135,8 +2349,8 @@ inline void RepeatedPtrField::SwapElements(int index1, int index2) { } template -inline Arena* RepeatedPtrField::GetArenaNoVirtual() const { - return RepeatedPtrFieldBase::GetArenaNoVirtual(); +inline Arena* RepeatedPtrField::GetArena() const { + return RepeatedPtrFieldBase::GetArena(); } template @@ -2204,31 +2418,22 @@ namespace internal { // // This code based on net/proto/proto-array-internal.h by Jeffrey Yasskin // (jyasskin@google.com). -template -class RepeatedPtrIterator - : public std::iterator< - std::random_access_iterator_tag, Element> { +template +class RepeatedPtrIterator { public: - typedef RepeatedPtrIterator iterator; - typedef std::iterator< - std::random_access_iterator_tag, Element> superclass; - - // Shadow the value_type in std::iterator<> because const_iterator::value_type - // needs to be T, not const T. - typedef typename std::remove_const::type value_type; - - // Let the compiler know that these are type names, so we don't have to - // write "typename" in front of them everywhere. - typedef typename superclass::reference reference; - typedef typename superclass::pointer pointer; - typedef typename superclass::difference_type difference_type; + using iterator = RepeatedPtrIterator; + using iterator_category = std::random_access_iterator_tag; + using value_type = typename std::remove_const::type; + using difference_type = std::ptrdiff_t; + using pointer = Element*; + using reference = Element&; RepeatedPtrIterator() : it_(NULL) {} explicit RepeatedPtrIterator(void* const* it) : it_(it) {} // Allow "upcasting" from RepeatedPtrIterator to // RepeatedPtrIterator. - template + template RepeatedPtrIterator(const RepeatedPtrIterator& other) : it_(other.it_) { // Force a compiler error if the other type is not convertible to ours. @@ -2239,13 +2444,19 @@ class RepeatedPtrIterator // dereferenceable reference operator*() const { return *reinterpret_cast(*it_); } - pointer operator->() const { return &(operator*()); } + pointer operator->() const { return &(operator*()); } // {inc,dec}rementable - iterator& operator++() { ++it_; return *this; } - iterator operator++(int) { return iterator(it_++); } - iterator& operator--() { --it_; return *this; } - iterator operator--(int) { return iterator(it_--); } + iterator& operator++() { + ++it_; + return *this; + } + iterator operator++(int) { return iterator(it_++); } + iterator& operator--() { + --it_; + return *this; + } + iterator operator--(int) { return iterator(it_--); } // equality_comparable bool operator==(const iterator& x) const { return it_ == x.it_; } @@ -2286,7 +2497,7 @@ class RepeatedPtrIterator difference_type operator-(const iterator& x) const { return it_ - x.it_; } private: - template + template friend class RepeatedPtrIterator; // The internal iterator. @@ -2301,34 +2512,33 @@ class RepeatedPtrIterator // referenced by the iterator. It should either be "void *" for a mutable // iterator, or "const void* const" for a constant iterator. template -class RepeatedPtrOverPtrsIterator - : public std::iterator { +class RepeatedPtrOverPtrsIterator { public: - typedef RepeatedPtrOverPtrsIterator iterator; - typedef std::iterator superclass; - - // Shadow the value_type in std::iterator<> because const_iterator::value_type - // needs to be T, not const T. - typedef typename std::remove_const::type value_type; - - // Let the compiler know that these are type names, so we don't have to - // write "typename" in front of them everywhere. - typedef typename superclass::reference reference; - typedef typename superclass::pointer pointer; - typedef typename superclass::difference_type difference_type; + using iterator = RepeatedPtrOverPtrsIterator; + using iterator_category = std::random_access_iterator_tag; + using value_type = typename std::remove_const::type; + using difference_type = std::ptrdiff_t; + using pointer = Element*; + using reference = Element&; RepeatedPtrOverPtrsIterator() : it_(NULL) {} explicit RepeatedPtrOverPtrsIterator(VoidPtr* it) : it_(it) {} // dereferenceable reference operator*() const { return *reinterpret_cast(it_); } - pointer operator->() const { return &(operator*()); } + pointer operator->() const { return &(operator*()); } // {inc,dec}rementable - iterator& operator++() { ++it_; return *this; } - iterator operator++(int) { return iterator(it_++); } - iterator& operator--() { --it_; return *this; } - iterator operator--(int) { return iterator(it_--); } + iterator& operator++() { + ++it_; + return *this; + } + iterator operator++(int) { return iterator(it_++); } + iterator& operator--() { + --it_; + return *this; + } + iterator operator--(int) { return iterator(it_--); } // equality_comparable bool operator==(const iterator& x) const { return it_ == x.it_; } @@ -2369,7 +2579,7 @@ class RepeatedPtrOverPtrsIterator difference_type operator-(const iterator& x) const { return it_ - x.it_; } private: - template + template friend class RepeatedPtrIterator; // The internal iterator. @@ -2378,11 +2588,17 @@ class RepeatedPtrOverPtrsIterator void RepeatedPtrFieldBase::InternalSwap(RepeatedPtrFieldBase* other) { GOOGLE_DCHECK(this != other); - GOOGLE_DCHECK(GetArenaNoVirtual() == other->GetArenaNoVirtual()); + GOOGLE_DCHECK(GetArena() == other->GetArena()); - std::swap(rep_, other->rep_); - std::swap(current_size_, other->current_size_); - std::swap(total_size_, other->total_size_); + // Swap all fields at once. + static_assert(std::is_standard_layout::value, + "offsetof() requires standard layout before c++17"); + internal::memswaprep_) - + offsetof(RepeatedPtrFieldBase, current_size_)>( + reinterpret_cast(this) + + offsetof(RepeatedPtrFieldBase, current_size_), + reinterpret_cast(other) + + offsetof(RepeatedPtrFieldBase, current_size_)); } } // namespace internal @@ -2440,35 +2656,30 @@ RepeatedPtrField::pointer_end() const { const_cast(raw_data() + size())); } - // Iterators and helper functions that follow the spirit of the STL // std::back_insert_iterator and std::back_inserter but are tailor-made // for RepeatedField and RepeatedPtrField. Typical usage would be: // // std::copy(some_sequence.begin(), some_sequence.end(), -// google::protobuf::RepeatedFieldBackInserter(proto.mutable_sequence())); +// RepeatedFieldBackInserter(proto.mutable_sequence())); // // Ported by johannes from util/gtl/proto-array-iterators.h namespace internal { // A back inserter for RepeatedField objects. -template class RepeatedFieldBackInsertIterator +template +class RepeatedFieldBackInsertIterator : public std::iterator { public: explicit RepeatedFieldBackInsertIterator( RepeatedField* const mutable_field) - : field_(mutable_field) { - } + : field_(mutable_field) {} RepeatedFieldBackInsertIterator& operator=(const T& value) { field_->Add(value); return *this; } - RepeatedFieldBackInsertIterator& operator*() { - return *this; - } - RepeatedFieldBackInsertIterator& operator++() { - return *this; - } + RepeatedFieldBackInsertIterator& operator*() { return *this; } + RepeatedFieldBackInsertIterator& operator++() { return *this; } RepeatedFieldBackInsertIterator& operator++(int /* unused */) { return *this; } @@ -2478,13 +2689,12 @@ template class RepeatedFieldBackInsertIterator }; // A back inserter for RepeatedPtrField objects. -template class RepeatedPtrFieldBackInsertIterator +template +class RepeatedPtrFieldBackInsertIterator : public std::iterator { public: - RepeatedPtrFieldBackInsertIterator( - RepeatedPtrField* const mutable_field) - : field_(mutable_field) { - } + RepeatedPtrFieldBackInsertIterator(RepeatedPtrField* const mutable_field) + : field_(mutable_field) {} RepeatedPtrFieldBackInsertIterator& operator=(const T& value) { *field_->Add() = value; return *this; @@ -2498,12 +2708,8 @@ template class RepeatedPtrFieldBackInsertIterator *field_->Add() = std::move(value); return *this; } - RepeatedPtrFieldBackInsertIterator& operator*() { - return *this; - } - RepeatedPtrFieldBackInsertIterator& operator++() { - return *this; - } + RepeatedPtrFieldBackInsertIterator& operator*() { return *this; } + RepeatedPtrFieldBackInsertIterator& operator++() { return *this; } RepeatedPtrFieldBackInsertIterator& operator++(int /* unused */) { return *this; } @@ -2514,26 +2720,21 @@ template class RepeatedPtrFieldBackInsertIterator // A back inserter for RepeatedPtrFields that inserts by transferring ownership // of a pointer. -template class AllocatedRepeatedPtrFieldBackInsertIterator +template +class AllocatedRepeatedPtrFieldBackInsertIterator : public std::iterator { public: explicit AllocatedRepeatedPtrFieldBackInsertIterator( RepeatedPtrField* const mutable_field) - : field_(mutable_field) { - } + : field_(mutable_field) {} AllocatedRepeatedPtrFieldBackInsertIterator& operator=( T* const ptr_to_value) { field_->AddAllocated(ptr_to_value); return *this; } - AllocatedRepeatedPtrFieldBackInsertIterator& operator*() { - return *this; - } - AllocatedRepeatedPtrFieldBackInsertIterator& operator++() { - return *this; - } - AllocatedRepeatedPtrFieldBackInsertIterator& operator++( - int /* unused */) { + AllocatedRepeatedPtrFieldBackInsertIterator& operator*() { return *this; } + AllocatedRepeatedPtrFieldBackInsertIterator& operator++() { return *this; } + AllocatedRepeatedPtrFieldBackInsertIterator& operator++(int /* unused */) { return *this; } @@ -2543,16 +2744,15 @@ template class AllocatedRepeatedPtrFieldBackInsertIterator // Almost identical to AllocatedRepeatedPtrFieldBackInsertIterator. This one // uses the UnsafeArenaAddAllocated instead. -template +template class UnsafeArenaAllocatedRepeatedPtrFieldBackInsertIterator : public std::iterator { public: explicit UnsafeArenaAllocatedRepeatedPtrFieldBackInsertIterator( - ::google::protobuf::RepeatedPtrField* const mutable_field) - : field_(mutable_field) { - } + RepeatedPtrField* const mutable_field) + : field_(mutable_field) {} UnsafeArenaAllocatedRepeatedPtrFieldBackInsertIterator& operator=( - T const* const ptr_to_value) { + T const* const ptr_to_value) { field_->UnsafeArenaAddAllocated(const_cast(ptr_to_value)); return *this; } @@ -2568,37 +2768,41 @@ class UnsafeArenaAllocatedRepeatedPtrFieldBackInsertIterator } private: - ::google::protobuf::RepeatedPtrField* field_; + RepeatedPtrField* field_; }; } // namespace internal // Provides a back insert iterator for RepeatedField instances, // similar to std::back_inserter(). -template internal::RepeatedFieldBackInsertIterator -RepeatedFieldBackInserter(RepeatedField* const mutable_field) { +template +internal::RepeatedFieldBackInsertIterator RepeatedFieldBackInserter( + RepeatedField* const mutable_field) { return internal::RepeatedFieldBackInsertIterator(mutable_field); } // Provides a back insert iterator for RepeatedPtrField instances, // similar to std::back_inserter(). -template internal::RepeatedPtrFieldBackInsertIterator -RepeatedPtrFieldBackInserter(RepeatedPtrField* const mutable_field) { +template +internal::RepeatedPtrFieldBackInsertIterator RepeatedPtrFieldBackInserter( + RepeatedPtrField* const mutable_field) { return internal::RepeatedPtrFieldBackInsertIterator(mutable_field); } // Special back insert iterator for RepeatedPtrField instances, just in // case someone wants to write generic template code that can access both // RepeatedFields and RepeatedPtrFields using a common name. -template internal::RepeatedPtrFieldBackInsertIterator -RepeatedFieldBackInserter(RepeatedPtrField* const mutable_field) { +template +internal::RepeatedPtrFieldBackInsertIterator RepeatedFieldBackInserter( + RepeatedPtrField* const mutable_field) { return internal::RepeatedPtrFieldBackInsertIterator(mutable_field); } // Provides a back insert iterator for RepeatedPtrField instances // similar to std::back_inserter() which transfers the ownership while // copying elements. -template internal::AllocatedRepeatedPtrFieldBackInsertIterator +template +internal::AllocatedRepeatedPtrFieldBackInsertIterator AllocatedRepeatedPtrFieldBackInserter( RepeatedPtrField* const mutable_field) { return internal::AllocatedRepeatedPtrFieldBackInsertIterator( @@ -2616,15 +2820,28 @@ AllocatedRepeatedPtrFieldBackInserter( // If you put temp_field on the arena this fails, because the ownership // transfers to the arena at the "AddAllocated" call and is not released anymore // causing a double delete. Using UnsafeArenaAddAllocated prevents this. -template +template internal::UnsafeArenaAllocatedRepeatedPtrFieldBackInsertIterator UnsafeArenaAllocatedRepeatedPtrFieldBackInserter( - ::google::protobuf::RepeatedPtrField* const mutable_field) { + RepeatedPtrField* const mutable_field) { return internal::UnsafeArenaAllocatedRepeatedPtrFieldBackInsertIterator( mutable_field); } -} // namespace protobuf +// Extern declarations of common instantiations to reduce library bloat. +extern template class PROTOBUF_EXPORT_TEMPLATE_DECLARE RepeatedField; +extern template class PROTOBUF_EXPORT_TEMPLATE_DECLARE RepeatedField; +extern template class PROTOBUF_EXPORT_TEMPLATE_DECLARE RepeatedField; +extern template class PROTOBUF_EXPORT_TEMPLATE_DECLARE RepeatedField; +extern template class PROTOBUF_EXPORT_TEMPLATE_DECLARE RepeatedField; +extern template class PROTOBUF_EXPORT_TEMPLATE_DECLARE RepeatedField; +extern template class PROTOBUF_EXPORT_TEMPLATE_DECLARE RepeatedField; +extern template class PROTOBUF_EXPORT_TEMPLATE_DECLARE + RepeatedPtrField; +} // namespace protobuf } // namespace google + +#include + #endif // GOOGLE_PROTOBUF_REPEATED_FIELD_H__ diff --git a/third_party/protobuf-lite/google/protobuf/stubs/bytestream.h b/third_party/protobuf-lite/google/protobuf/stubs/bytestream.h index 86510d14..c7a48dea 100644 --- a/third_party/protobuf-lite/google/protobuf/stubs/bytestream.h +++ b/third_party/protobuf-lite/google/protobuf/stubs/bytestream.h @@ -56,8 +56,9 @@ #include #include +#include + class CordByteSink; -class MemBlock; namespace google { namespace protobuf { @@ -74,7 +75,7 @@ namespace strings { // sink->Append(my_data.data(), my_data.size()); // sink->Flush(); // -class LIBPROTOBUF_EXPORT ByteSink { +class PROTOBUF_EXPORT ByteSink { public: ByteSink() {} virtual ~ByteSink() {} @@ -82,7 +83,7 @@ class LIBPROTOBUF_EXPORT ByteSink { // Appends the "n" bytes starting at "bytes". virtual void Append(const char* bytes, size_t n) = 0; - // Flushes internal buffers. The default implemenation does nothing. ByteSink + // Flushes internal buffers. The default implementation does nothing. ByteSink // subclasses may use internal buffers that require calling Flush() at the end // of the stream. virtual void Flush(); @@ -103,7 +104,7 @@ class LIBPROTOBUF_EXPORT ByteSink { // source->Skip(data.length()); // } // -class LIBPROTOBUF_EXPORT ByteSource { +class PROTOBUF_EXPORT ByteSource { public: ByteSource() {} virtual ~ByteSource() {} @@ -159,10 +160,10 @@ class LIBPROTOBUF_EXPORT ByteSource { // sink.Append("hi", 2); // OK // sink.Append(data, 100); // WOOPS! Overflows buf[10]. // -class LIBPROTOBUF_EXPORT UncheckedArrayByteSink : public ByteSink { +class PROTOBUF_EXPORT UncheckedArrayByteSink : public ByteSink { public: explicit UncheckedArrayByteSink(char* dest) : dest_(dest) {} - virtual void Append(const char* data, size_t n); + virtual void Append(const char* data, size_t n) override; // Returns the current output pointer so that a caller can see how many bytes // were produced. @@ -187,10 +188,10 @@ class LIBPROTOBUF_EXPORT UncheckedArrayByteSink : public ByteSink { // sink.Append("hi", 2); // OK // sink.Append(data, 100); // Will only write 8 more bytes // -class LIBPROTOBUF_EXPORT CheckedArrayByteSink : public ByteSink { +class PROTOBUF_EXPORT CheckedArrayByteSink : public ByteSink { public: CheckedArrayByteSink(char* outbuf, size_t capacity); - virtual void Append(const char* bytes, size_t n); + virtual void Append(const char* bytes, size_t n) override; // Returns the number of bytes actually written to the sink. size_t NumberOfBytesWritten() const { return size_; } @@ -223,11 +224,11 @@ class LIBPROTOBUF_EXPORT CheckedArrayByteSink : public ByteSink { // const char* buf = sink.GetBuffer(); // Ownership transferred // delete[] buf; // -class LIBPROTOBUF_EXPORT GrowingArrayByteSink : public strings::ByteSink { +class PROTOBUF_EXPORT GrowingArrayByteSink : public strings::ByteSink { public: explicit GrowingArrayByteSink(size_t estimated_size); virtual ~GrowingArrayByteSink(); - virtual void Append(const char* bytes, size_t n); + virtual void Append(const char* bytes, size_t n) override; // Returns the allocated buffer, and sets nbytes to its size. The caller takes // ownership of the buffer and must delete it with delete[]. @@ -253,13 +254,13 @@ class LIBPROTOBUF_EXPORT GrowingArrayByteSink : public strings::ByteSink { // sink.Append("World", 5); // assert(dest == "Hello World"); // -class LIBPROTOBUF_EXPORT StringByteSink : public ByteSink { +class PROTOBUF_EXPORT StringByteSink : public ByteSink { public: - explicit StringByteSink(string* dest) : dest_(dest) {} - virtual void Append(const char* data, size_t n); + explicit StringByteSink(std::string* dest) : dest_(dest) {} + virtual void Append(const char* data, size_t n) override; private: - string* dest_; + std::string* dest_; GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(StringByteSink); }; @@ -270,10 +271,10 @@ class LIBPROTOBUF_EXPORT StringByteSink : public ByteSink { // NullByteSink sink; // sink.Append(data, data.size()); // All data ignored. // -class LIBPROTOBUF_EXPORT NullByteSink : public ByteSink { +class PROTOBUF_EXPORT NullByteSink : public ByteSink { public: NullByteSink() {} - virtual void Append(const char *data, size_t n) {} + void Append(const char* /*data*/, size_t /*n*/) override {} private: GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(NullByteSink); @@ -292,13 +293,13 @@ class LIBPROTOBUF_EXPORT NullByteSink : public ByteSink { // assert(source.Available() == 5); // assert(source.Peek() == "Hello"); // -class LIBPROTOBUF_EXPORT ArrayByteSource : public ByteSource { +class PROTOBUF_EXPORT ArrayByteSource : public ByteSource { public: explicit ArrayByteSource(StringPiece s) : input_(s) {} - virtual size_t Available() const; - virtual StringPiece Peek(); - virtual void Skip(size_t n); + virtual size_t Available() const override; + virtual StringPiece Peek() override; + virtual void Skip(size_t n) override; private: StringPiece input_; @@ -323,18 +324,18 @@ class LIBPROTOBUF_EXPORT ArrayByteSource : public ByteSource { // assert(limit.Available() == 5); // assert(limit.Peek() == "Hello"); // -class LIBPROTOBUF_EXPORT LimitByteSource : public ByteSource { +class PROTOBUF_EXPORT LimitByteSource : public ByteSource { public: // Returns at most "limit" bytes from "source". LimitByteSource(ByteSource* source, size_t limit); - virtual size_t Available() const; - virtual StringPiece Peek(); - virtual void Skip(size_t n); + virtual size_t Available() const override; + virtual StringPiece Peek() override; + virtual void Skip(size_t n) override; // We override CopyTo so that we can forward to the underlying source, in // case it has an efficient implementation of CopyTo. - virtual void CopyTo(ByteSink* sink, size_t n); + virtual void CopyTo(ByteSink* sink, size_t n) override; private: ByteSource* source_; @@ -345,4 +346,6 @@ class LIBPROTOBUF_EXPORT LimitByteSource : public ByteSource { } // namespace protobuf } // namespace google +#include + #endif // GOOGLE_PROTOBUF_STUBS_BYTESTREAM_H_ diff --git a/third_party/protobuf-lite/google/protobuf/stubs/callback.h b/third_party/protobuf-lite/google/protobuf/stubs/callback.h index 6888f136..43d546d1 100644 --- a/third_party/protobuf-lite/google/protobuf/stubs/callback.h +++ b/third_party/protobuf-lite/google/protobuf/stubs/callback.h @@ -5,6 +5,8 @@ #include +#include + // =================================================================== // emulates google3/base/callback.h @@ -60,15 +62,15 @@ namespace protobuf { // Note that NewCallback() is a bit touchy regarding argument types. Generally, // the values you provide for the parameter bindings must exactly match the // types accepted by the callback function. For example: -// void Foo(string s); +// void Foo(std::string s); // NewCallback(&Foo, "foo"); // WON'T WORK: const char* != string -// NewCallback(&Foo, string("foo")); // WORKS +// NewCallback(&Foo, std::string("foo")); // WORKS // Also note that the arguments cannot be references: -// void Foo(const string& s); -// string my_str; -// NewCallback(&Foo, my_str); // WON'T WORK: Can't use referecnes. +// void Foo(const std::string& s); +// std::string my_str; +// NewCallback(&Foo, my_str); // WON'T WORK: Can't use references. // However, correctly-typed pointers will work just fine. -class LIBPROTOBUF_EXPORT Closure { +class PROTOBUF_EXPORT Closure { public: Closure() {} virtual ~Closure(); @@ -91,8 +93,8 @@ class ResultCallback { GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ResultCallback); }; -template -class LIBPROTOBUF_EXPORT ResultCallback1 { +template +class PROTOBUF_EXPORT ResultCallback1 { public: ResultCallback1() {} virtual ~ResultCallback1() {} @@ -103,8 +105,8 @@ class LIBPROTOBUF_EXPORT ResultCallback1 { GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ResultCallback1); }; -template -class LIBPROTOBUF_EXPORT ResultCallback2 { +template +class PROTOBUF_EXPORT ResultCallback2 { public: ResultCallback2() {} virtual ~ResultCallback2() {} @@ -117,7 +119,7 @@ class LIBPROTOBUF_EXPORT ResultCallback2 { namespace internal { -class LIBPROTOBUF_EXPORT FunctionClosure0 : public Closure { +class PROTOBUF_EXPORT FunctionClosure0 : public Closure { public: typedef void (*FunctionType)(); @@ -125,7 +127,7 @@ class LIBPROTOBUF_EXPORT FunctionClosure0 : public Closure { : function_(function), self_deleting_(self_deleting) {} ~FunctionClosure0(); - void Run() { + void Run() override { bool needs_delete = self_deleting_; // read in case callback deletes function_(); if (needs_delete) delete this; @@ -145,7 +147,7 @@ class MethodClosure0 : public Closure { : object_(object), method_(method), self_deleting_(self_deleting) {} ~MethodClosure0() {} - void Run() { + void Run() override { bool needs_delete = self_deleting_; // read in case callback deletes (object_->*method_)(); if (needs_delete) delete this; @@ -168,7 +170,7 @@ class FunctionClosure1 : public Closure { arg1_(arg1) {} ~FunctionClosure1() {} - void Run() { + void Run() override { bool needs_delete = self_deleting_; // read in case callback deletes function_(arg1_); if (needs_delete) delete this; @@ -191,7 +193,7 @@ class MethodClosure1 : public Closure { arg1_(arg1) {} ~MethodClosure1() {} - void Run() { + void Run() override { bool needs_delete = self_deleting_; // read in case callback deletes (object_->*method_)(arg1_); if (needs_delete) delete this; @@ -215,7 +217,7 @@ class FunctionClosure2 : public Closure { arg1_(arg1), arg2_(arg2) {} ~FunctionClosure2() {} - void Run() { + void Run() override { bool needs_delete = self_deleting_; // read in case callback deletes function_(arg1_, arg2_); if (needs_delete) delete this; @@ -239,7 +241,7 @@ class MethodClosure2 : public Closure { arg1_(arg1), arg2_(arg2) {} ~MethodClosure2() {} - void Run() { + void Run() override { bool needs_delete = self_deleting_; // read in case callback deletes (object_->*method_)(arg1_, arg2_); if (needs_delete) delete this; @@ -262,7 +264,7 @@ class FunctionResultCallback_0_0 : public ResultCallback { : function_(function), self_deleting_(self_deleting) {} ~FunctionResultCallback_0_0() {} - R Run() { + R Run() override { bool needs_delete = self_deleting_; // read in case callback deletes R result = function_(); if (needs_delete) delete this; @@ -284,7 +286,7 @@ class FunctionResultCallback_1_0 : public ResultCallback { : function_(function), self_deleting_(self_deleting), p1_(p1) {} ~FunctionResultCallback_1_0() {} - R Run() { + R Run() override { bool needs_delete = self_deleting_; // read in case callback deletes R result = function_(p1_); if (needs_delete) delete this; @@ -306,7 +308,7 @@ class FunctionResultCallback_0_1 : public ResultCallback1 { : function_(function), self_deleting_(self_deleting) {} ~FunctionResultCallback_0_1() {} - R Run(Arg1 a1) { + R Run(Arg1 a1) override { bool needs_delete = self_deleting_; // read in case callback deletes R result = function_(a1); if (needs_delete) delete this; @@ -328,7 +330,7 @@ class FunctionResultCallback_1_1 : public ResultCallback1 { : function_(function), self_deleting_(self_deleting), p1_(p1) {} ~FunctionResultCallback_1_1() {} - R Run(A1 a1) { + R Run(A1 a1) override { bool needs_delete = self_deleting_; // read in case callback deletes R result = function_(p1_, a1); if (needs_delete) delete this; @@ -371,12 +373,12 @@ class MethodResultCallback_0_0 : public ResultCallback { }; template -class MethodResultCallback_5_2 : public ResultCallback2 { + typename P4, typename P5, typename P6, typename A1, typename A2> +class MethodResultCallback_6_2 : public ResultCallback2 { public: - typedef R (T::*MethodType)(P1, P2, P3, P4, P5, A1, A2); - MethodResultCallback_5_2(T* object, MethodType method, bool self_deleting, - P1 p1, P2 p2, P3 p3, P4 p4, P5 p5) + typedef R (T::*MethodType)(P1, P2, P3, P4, P5, P6, A1, A2); + MethodResultCallback_6_2(T* object, MethodType method, bool self_deleting, + P1 p1, P2 p2, P3 p3, P4 p4, P5 p5, P6 p6) : object_(object), method_(method), self_deleting_(self_deleting), @@ -384,12 +386,13 @@ class MethodResultCallback_5_2 : public ResultCallback2 { p2_(p2), p3_(p3), p4_(p4), - p5_(p5) {} - ~MethodResultCallback_5_2() {} + p5_(p5), + p6_(p6) {} + ~MethodResultCallback_6_2() {} - R Run(A1 a1, A2 a2) { + R Run(A1 a1, A2 a2) override { bool needs_delete = self_deleting_; - R result = (object_->*method_)(p1_, p2_, p3_, p4_, p5_, a1, a2); + R result = (object_->*method_)(p1_, p2_, p3_, p4_, p5_, p6_, a1, a2); if (needs_delete) delete this; return result; } @@ -403,6 +406,7 @@ class MethodResultCallback_5_2 : public ResultCallback2 { typename std::remove_reference::type p3_; typename std::remove_reference::type p4_; typename std::remove_reference::type p5_; + typename std::remove_reference::type p6_; }; } // namespace internal @@ -551,27 +555,29 @@ inline ResultCallback* NewPermanentCallback( return new internal::MethodResultCallback_0_0(object, function, false); } -// See MethodResultCallback_5_2 +// See MethodResultCallback_6_2 template + typename P4, typename P5, typename P6, typename A1, typename A2> inline ResultCallback2* NewPermanentCallback( - T* object, R (T::*function)(P1, P2, P3, P4, P5, A1, A2), + T* object, R (T::*function)(P1, P2, P3, P4, P5, P6, A1, A2), typename internal::InternalConstRef::type p1, typename internal::InternalConstRef::type p2, typename internal::InternalConstRef::type p3, typename internal::InternalConstRef::type p4, - typename internal::InternalConstRef::type p5) { - return new internal::MethodResultCallback_5_2(object, function, false, p1, - p2, p3, p4, p5); + typename internal::InternalConstRef::type p5, + typename internal::InternalConstRef::type p6) { + return new internal::MethodResultCallback_6_2(object, function, false, + p1, p2, p3, p4, p5, p6); } // A function which does nothing. Useful for creating no-op callbacks, e.g.: // Closure* nothing = NewCallback(&DoNothing); -void LIBPROTOBUF_EXPORT DoNothing(); - +void PROTOBUF_EXPORT DoNothing(); } // namespace protobuf } // namespace google +#include + #endif // GOOGLE_PROTOBUF_STUBS_CALLBACK_H_ diff --git a/third_party/protobuf-lite/google/protobuf/stubs/casts.h b/third_party/protobuf-lite/google/protobuf/stubs/casts.h index 35e2dba0..d8a49cec 100644 --- a/third_party/protobuf-lite/google/protobuf/stubs/casts.h +++ b/third_party/protobuf-lite/google/protobuf/stubs/casts.h @@ -31,13 +31,15 @@ #ifndef GOOGLE_PROTOBUF_CASTS_H__ #define GOOGLE_PROTOBUF_CASTS_H__ -#include - #include +#include +#include + namespace google { namespace protobuf { namespace internal { + // Use implicit_cast as a safe version of static_cast or const_cast // for upcasting in the type hierarchy (i.e. casting a pointer to Foo // to a pointer to SuperclassOfFoo or casting a pointer to Foo to @@ -45,7 +47,7 @@ namespace internal { // When you use implicit_cast, the compiler checks that the cast is safe. // Such explicit implicit_casts are necessary in surprisingly many // situations where C++ demands an exact type match instead of an -// argument type convertable to a target type. +// argument type convertible to a target type. // // The From type can be inferred, so the preferred syntax for using // implicit_cast is the same as for static_cast etc.: @@ -88,8 +90,8 @@ inline To down_cast(From* f) { // so we only accept pointers implicit_cast(0); } -#if !defined(NDEBUG) && !defined(GOOGLE_PROTOBUF_NO_RTTI) - assert(f == NULL || dynamic_cast(f) != NULL); // RTTI: debug mode only! +#if !defined(NDEBUG) && PROTOBUF_RTTI + assert(f == nullptr || dynamic_cast(f) != nullptr); // RTTI: debug mode only! #endif return static_cast(f); } @@ -105,9 +107,9 @@ inline To down_cast(From& f) { implicit_cast(0); } -#if !defined(NDEBUG) && !defined(GOOGLE_PROTOBUF_NO_RTTI) +#if !defined(NDEBUG) && PROTOBUF_RTTI // RTTI: debug mode only! - assert(dynamic_cast(&f) != NULL); + assert(dynamic_cast(&f) != nullptr); #endif return *static_cast(&f); } @@ -131,4 +133,7 @@ using internal::bit_cast; } // namespace protobuf } // namespace google + +#include + #endif // GOOGLE_PROTOBUF_CASTS_H__ diff --git a/third_party/protobuf-lite/google/protobuf/stubs/common.h b/third_party/protobuf-lite/google/protobuf/stubs/common.h index d35377ae..2d210d72 100644 --- a/third_party/protobuf-lite/google/protobuf/stubs/common.h +++ b/third_party/protobuf-lite/google/protobuf/stubs/common.h @@ -43,14 +43,10 @@ #include #include -#include #include #include - -// TODO(liujisi): Remove the following includes after the include clean-up. -#include -#include -#include +#include +#include #ifndef PROTOBUF_USE_EXCEPTIONS #if defined(_MSC_VER) && defined(_CPPUNWIND) @@ -73,22 +69,7 @@ #include #endif -#if defined(_WIN32) && defined(GetMessage) -// Allow GetMessage to be used as a valid method name in protobuf classes. -// windows.h defines GetMessage() as a macro. Let's re-define it as an inline -// function. The inline function should be equivalent for C++ users. -inline BOOL GetMessage_Win32( - LPMSG lpMsg, HWND hWnd, - UINT wMsgFilterMin, UINT wMsgFilterMax) { - return GetMessage(lpMsg, hWnd, wMsgFilterMin, wMsgFilterMax); -} -#undef GetMessage -inline BOOL GetMessage( - LPMSG lpMsg, HWND hWnd, - UINT wMsgFilterMin, UINT wMsgFilterMax) { - return GetMessage_Win32(lpMsg, hWnd, wMsgFilterMin, wMsgFilterMax); -} -#endif +#include namespace std {} @@ -101,35 +82,31 @@ namespace internal { // The current version, represented as a single integer to make comparison // easier: major * 10^6 + minor * 10^3 + micro -#define GOOGLE_PROTOBUF_VERSION 3006001 +#define GOOGLE_PROTOBUF_VERSION 3014000 // A suffix string for alpha, beta or rc releases. Empty for stable releases. #define GOOGLE_PROTOBUF_VERSION_SUFFIX "" -// The minimum library version which works with the current version of the -// headers. -#define GOOGLE_PROTOBUF_MIN_LIBRARY_VERSION 3006001 - // The minimum header version which works with the current version of // the library. This constant should only be used by protoc's C++ code // generator. -static const int kMinHeaderVersionForLibrary = 3006001; +static const int kMinHeaderVersionForLibrary = 3014000; // The minimum protoc version which works with the current version of the // headers. -#define GOOGLE_PROTOBUF_MIN_PROTOC_VERSION 3006001 +#define GOOGLE_PROTOBUF_MIN_PROTOC_VERSION 3014000 // The minimum header version which works with the current version of // protoc. This constant should only be used in VerifyVersion(). -static const int kMinHeaderVersionForProtoc = 3006001; +static const int kMinHeaderVersionForProtoc = 3014000; // Verifies that the headers and libraries are compatible. Use the macro // below to call this. -void LIBPROTOBUF_EXPORT VerifyVersion(int headerVersion, int minLibraryVersion, - const char* filename); +void PROTOBUF_EXPORT VerifyVersion(int headerVersion, int minLibraryVersion, + const char* filename); // Converts a numeric version number to a string. -std::string LIBPROTOBUF_EXPORT VersionString(int version); +std::string PROTOBUF_EXPORT VersionString(int version); } // namespace internal @@ -151,14 +128,14 @@ namespace internal { // Checks if the buffer contains structurally-valid UTF-8. Implemented in // structurally_valid.cc. -LIBPROTOBUF_EXPORT bool IsStructurallyValidUTF8(const char* buf, int len); +PROTOBUF_EXPORT bool IsStructurallyValidUTF8(const char* buf, int len); -inline bool IsStructurallyValidUTF8(const std::string& str) { +inline bool IsStructurallyValidUTF8(StringPiece str) { return IsStructurallyValidUTF8(str.data(), static_cast(str.length())); } -// Returns initial number of bytes of structually valid UTF-8. -LIBPROTOBUF_EXPORT int UTF8SpnStructurallyValid(const StringPiece& str); +// Returns initial number of bytes of structurally valid UTF-8. +PROTOBUF_EXPORT int UTF8SpnStructurallyValid(StringPiece str); // Coerce UTF-8 byte string in src_str to be // a structurally-valid equal-length string by selectively @@ -172,42 +149,23 @@ LIBPROTOBUF_EXPORT int UTF8SpnStructurallyValid(const StringPiece& str); // // Optimized for: all structurally valid and no byte copying is done. // -LIBPROTOBUF_EXPORT char* UTF8CoerceToStructurallyValid( - const StringPiece& str, char* dst, char replace_char); +PROTOBUF_EXPORT char* UTF8CoerceToStructurallyValid(StringPiece str, char* dst, + char replace_char); } // namespace internal - -// =================================================================== -// Shutdown support. - -// Shut down the entire protocol buffers library, deleting all static-duration -// objects allocated by the library or by generated .pb.cc files. -// -// There are two reasons you might want to call this: -// * You use a draconian definition of "memory leak" in which you expect -// every single malloc() to have a corresponding free(), even for objects -// which live until program exit. -// * You are writing a dynamically-loaded library which needs to clean up -// after itself when the library is unloaded. -// -// It is safe to call this multiple times. However, it is not safe to use -// any other part of the protocol buffers library after -// ShutdownProtobufLibrary() has been called. Furthermore this call is not -// thread safe, user needs to synchronize multiple calls. -LIBPROTOBUF_EXPORT void ShutdownProtobufLibrary(); +// This lives in message_lite.h now, but we leave this here for any users that +// #include common.h and not message_lite.h. +PROTOBUF_EXPORT void ShutdownProtobufLibrary(); namespace internal { -// Register a function to be called when ShutdownProtocolBuffers() is called. -LIBPROTOBUF_EXPORT void OnShutdown(void (*func)()); -// Run an arbitrary function on an arg -LIBPROTOBUF_EXPORT void OnShutdownRun(void (*f)(const void*), const void* arg); - +// Strongly references the given variable such that the linker will be forced +// to pull in this variable's translation unit. template -T* OnShutdownDelete(T* p) { - OnShutdownRun([](const void* p) { delete static_cast(p); }, p); - return p; +void StrongReference(const T& var) { + auto volatile unused = &var; + (void)&unused; // Use address to avoid an extra load of "unused". } } // namespace internal @@ -239,4 +197,6 @@ using std::string; } // namespace protobuf } // namespace google +#include + #endif // GOOGLE_PROTOBUF_COMMON_H__ diff --git a/third_party/protobuf-lite/google/protobuf/stubs/fastmem.h b/third_party/protobuf-lite/google/protobuf/stubs/fastmem.h deleted file mode 100644 index 1f1f6ed3..00000000 --- a/third_party/protobuf-lite/google/protobuf/stubs/fastmem.h +++ /dev/null @@ -1,153 +0,0 @@ -// Protocol Buffers - Google's data interchange format -// Copyright 2014 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Fast memory copying and comparison routines. -// strings::fastmemcmp_inlined() replaces memcmp() -// strings::memcpy_inlined() replaces memcpy() -// strings::memeq(a, b, n) replaces memcmp(a, b, n) == 0 -// -// strings::*_inlined() routines are inline versions of the -// routines exported by this module. Sometimes using the inlined -// versions is faster. Measure before using the inlined versions. -// -// Performance measurement: -// strings::fastmemcmp_inlined -// Analysis: memcmp, fastmemcmp_inlined, fastmemcmp -// 2012-01-30 - -#ifndef GOOGLE_PROTOBUF_STUBS_FASTMEM_H_ -#define GOOGLE_PROTOBUF_STUBS_FASTMEM_H_ - -#include -#include -#include - -#include - -namespace google { -namespace protobuf { -namespace internal { - -// Return true if the n bytes at a equal the n bytes at b. -// The regions are allowed to overlap. -// -// The performance is similar to the performance memcmp(), but faster for -// moderately-sized inputs, or inputs that share a common prefix and differ -// somewhere in their last 8 bytes. Further optimizations can be added later -// if it makes sense to do so.:w -inline bool memeq(const char* a, const char* b, size_t n) { - size_t n_rounded_down = n & ~static_cast(7); - if (GOOGLE_PREDICT_FALSE(n_rounded_down == 0)) { // n <= 7 - return memcmp(a, b, n) == 0; - } - // n >= 8 - uint64 u = GOOGLE_UNALIGNED_LOAD64(a) ^ GOOGLE_UNALIGNED_LOAD64(b); - uint64 v = GOOGLE_UNALIGNED_LOAD64(a + n - 8) ^ GOOGLE_UNALIGNED_LOAD64(b + n - 8); - if ((u | v) != 0) { // The first or last 8 bytes differ. - return false; - } - a += 8; - b += 8; - n = n_rounded_down - 8; - if (n > 128) { - // As of 2012, memcmp on x86-64 uses a big unrolled loop with SSE2 - // instructions, and while we could try to do something faster, it - // doesn't seem worth pursuing. - return memcmp(a, b, n) == 0; - } - for (; n >= 16; n -= 16) { - uint64 x = GOOGLE_UNALIGNED_LOAD64(a) ^ GOOGLE_UNALIGNED_LOAD64(b); - uint64 y = GOOGLE_UNALIGNED_LOAD64(a + 8) ^ GOOGLE_UNALIGNED_LOAD64(b + 8); - if ((x | y) != 0) { - return false; - } - a += 16; - b += 16; - } - // n must be 0 or 8 now because it was a multiple of 8 at the top of the loop. - return n == 0 || GOOGLE_UNALIGNED_LOAD64(a) == GOOGLE_UNALIGNED_LOAD64(b); -} - -inline int fastmemcmp_inlined(const char *a, const char *b, size_t n) { - if (n >= 64) { - return memcmp(a, b, n); - } - const char* a_limit = a + n; - while (a + sizeof(uint64) <= a_limit && - GOOGLE_UNALIGNED_LOAD64(a) == GOOGLE_UNALIGNED_LOAD64(b)) { - a += sizeof(uint64); - b += sizeof(uint64); - } - if (a + sizeof(uint32) <= a_limit && - GOOGLE_UNALIGNED_LOAD32(a) == GOOGLE_UNALIGNED_LOAD32(b)) { - a += sizeof(uint32); - b += sizeof(uint32); - } - while (a < a_limit) { - int d = - static_cast(static_cast(*a++) - static_cast(*b++)); - if (d) return d; - } - return 0; -} - -// The standard memcpy operation is slow for variable small sizes. -// This implementation inlines the optimal realization for sizes 1 to 16. -// To avoid code bloat don't use it in case of not performance-critical spots, -// nor when you don't expect very frequent values of size <= 16. -inline void memcpy_inlined(char *dst, const char *src, size_t size) { - // Compiler inlines code with minimal amount of data movement when third - // parameter of memcpy is a constant. - switch (size) { - case 1: memcpy(dst, src, 1); break; - case 2: memcpy(dst, src, 2); break; - case 3: memcpy(dst, src, 3); break; - case 4: memcpy(dst, src, 4); break; - case 5: memcpy(dst, src, 5); break; - case 6: memcpy(dst, src, 6); break; - case 7: memcpy(dst, src, 7); break; - case 8: memcpy(dst, src, 8); break; - case 9: memcpy(dst, src, 9); break; - case 10: memcpy(dst, src, 10); break; - case 11: memcpy(dst, src, 11); break; - case 12: memcpy(dst, src, 12); break; - case 13: memcpy(dst, src, 13); break; - case 14: memcpy(dst, src, 14); break; - case 15: memcpy(dst, src, 15); break; - case 16: memcpy(dst, src, 16); break; - default: memcpy(dst, src, size); break; - } -} - -} // namespace internal -} // namespace protobuf -} // namespace google - -#endif // GOOGLE_PROTOBUF_STUBS_FASTMEM_H_ diff --git a/third_party/protobuf-lite/google/protobuf/stubs/hash.h b/third_party/protobuf-lite/google/protobuf/stubs/hash.h index 8edd3db4..a7ec0680 100644 --- a/third_party/protobuf-lite/google/protobuf/stubs/hash.h +++ b/third_party/protobuf-lite/google/protobuf/stubs/hash.h @@ -29,318 +29,25 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Author: kenton@google.com (Kenton Varda) -// -// Deals with the fact that hash_map is not defined everywhere. #ifndef GOOGLE_PROTOBUF_STUBS_HASH_H__ #define GOOGLE_PROTOBUF_STUBS_HASH_H__ -#include -#include - -#define GOOGLE_PROTOBUF_HAVE_HASH_MAP 1 -#define GOOGLE_PROTOBUF_HAVE_HASH_SET 1 - -// Use C++11 unordered_{map|set} if available. -#if ((defined(_LIBCPP_STD_VER) && _LIBCPP_STD_VER >= 11) || \ - (((__cplusplus >= 201103L) || defined(__GXX_EXPERIMENTAL_CXX0X)) && \ - (__GLIBCXX__ > 20090421))) -# define GOOGLE_PROTOBUF_HAS_CXX11_HASH - -// For XCode >= 4.6: the compiler is clang with libc++. -// For earlier XCode version: the compiler is gcc-4.2.1 with libstdc++. -// libc++ provides and friends even in non C++11 mode, -// and it does not provide the tr1 library. Therefore the following macro -// checks against this special case. -// Note that we should not test the __APPLE_CC__ version number or the -// __clang__ macro, since the new compiler can still use -stdlib=libstdc++, in -// which case is not compilable without -std=c++11 -#elif defined(__APPLE_CC__) -# if __GNUC__ >= 4 -# define GOOGLE_PROTOBUF_HAS_TR1 -# else -// Not tested for gcc < 4... These setting can compile under 4.2.1 though. -# define GOOGLE_PROTOBUF_HASH_NAMESPACE __gnu_cxx -# include -# define GOOGLE_PROTOBUF_HASH_MAP_CLASS hash_map -# include -# define GOOGLE_PROTOBUF_HASH_SET_CLASS hash_set -# endif - -// Version checks for gcc. -#elif defined(__GNUC__) -// For GCC 4.x+, use tr1::unordered_map/set; otherwise, follow the -// instructions from: -// https://gcc.gnu.org/onlinedocs/libstdc++/manual/backwards.html -# if __GNUC__ >= 4 -# define GOOGLE_PROTOBUF_HAS_TR1 -# elif __GNUC__ >= 3 -# include -# define GOOGLE_PROTOBUF_HASH_MAP_CLASS hash_map -# include -# define GOOGLE_PROTOBUF_HASH_SET_CLASS hash_set -# if __GNUC__ == 3 && __GNUC_MINOR__ == 0 -# define GOOGLE_PROTOBUF_HASH_NAMESPACE std // GCC 3.0 -# else -# define GOOGLE_PROTOBUF_HASH_NAMESPACE __gnu_cxx // GCC 3.1 and later -# endif -# else -# define GOOGLE_PROTOBUF_HASH_NAMESPACE -# include -# define GOOGLE_PROTOBUF_HASH_MAP_CLASS hash_map -# include -# define GOOGLE_PROTOBUF_HASH_SET_CLASS hash_set -# endif - -// GCC <= 4.1 does not define std::tr1::hash for `long long int` or `long long unsigned int` -# if __GNUC__ == 4 && defined(__GNUC_MINOR__) && __GNUC_MINOR__ <= 1 -# undef GOOGLE_PROTOBUF_HAS_TR1 -# undef GOOGLE_PROTOBUF_HAVE_HASH_MAP -# undef GOOGLE_PROTOBUF_HAVE_HASH_SET -# endif - -// Version checks for MSC. -// Apparently Microsoft decided to move hash_map *back* to the std namespace in -// MSVC 2010: -// http://blogs.msdn.com/vcblog/archive/2009/05/25/stl-breaking-changes-in-visual-studio-2010-beta-1.aspx -// And.. they are moved back to stdext in MSVC 2013 (haven't checked 2012). That -// said, use unordered_map for MSVC 2010 and beyond is our safest bet. -#elif defined(_MSC_VER) -# if _MSC_VER >= 1900 -# ifndef _LIBCPP_STD_VER // Visual Studio 2022 doesn't export this for some reason. We have AT LEAST C++11 support -# define _LIBCPP_STD_VER 11 -# endif -# define GOOGLE_PROTOBUF_HAS_CXX11_HASH -# elif _MSC_VER >= 1600 // Since Visual Studio 2010 -# define GOOGLE_PROTOBUF_HAS_CXX11_HASH -# define GOOGLE_PROTOBUF_HASH_COMPARE std::hash_compare -# elif _MSC_VER >= 1500 // Since Visual Studio 2008 -# define GOOGLE_PROTOBUF_HASH_NAMESPACE stdext -# include -# define GOOGLE_PROTOBUF_HASH_MAP_CLASS hash_map -# include -# define GOOGLE_PROTOBUF_HASH_SET_CLASS hash_set -# define GOOGLE_PROTOBUF_HASH_COMPARE stdext::hash_compare -# define GOOGLE_PROTOBUF_CONTAINERS_NEED_HASH_COMPARE -# elif _MSC_VER >= 1310 -# define GOOGLE_PROTOBUF_HASH_NAMESPACE stdext -# include -# define GOOGLE_PROTOBUF_HASH_MAP_CLASS hash_map -# include -# define GOOGLE_PROTOBUF_HASH_SET_CLASS hash_set -# define GOOGLE_PROTOBUF_HASH_COMPARE stdext::hash_compare -# else -# define GOOGLE_PROTOBUF_HASH_NAMESPACE std -# include -# define GOOGLE_PROTOBUF_HASH_MAP_CLASS hash_map -# include -# define GOOGLE_PROTOBUF_HASH_SET_CLASS hash_set -# define GOOGLE_PROTOBUF_HASH_COMPARE stdext::hash_compare -# endif - -// **ADD NEW COMPILERS SUPPORT HERE.** -// For other compilers, undefine the macro and fallback to use std::map, in -// google/protobuf/stubs/hash.h -#else -# undef GOOGLE_PROTOBUF_HAVE_HASH_MAP -# undef GOOGLE_PROTOBUF_HAVE_HASH_SET -#endif - -#if defined(GOOGLE_PROTOBUF_HAS_CXX11_HASH) -# define GOOGLE_PROTOBUF_HASH_NAMESPACE std -# include -# define GOOGLE_PROTOBUF_HASH_MAP_CLASS unordered_map -# include -# define GOOGLE_PROTOBUF_HASH_SET_CLASS unordered_set -#elif defined(GOOGLE_PROTOBUF_HAS_TR1) -# define GOOGLE_PROTOBUF_HASH_NAMESPACE std::tr1 -# include -# define GOOGLE_PROTOBUF_HASH_MAP_CLASS unordered_map -# include -# define GOOGLE_PROTOBUF_HASH_SET_CLASS unordered_set -#endif +#include +#include +#include +#include # define GOOGLE_PROTOBUF_HASH_NAMESPACE_DECLARATION_START \ namespace google { \ namespace protobuf { # define GOOGLE_PROTOBUF_HASH_NAMESPACE_DECLARATION_END }} -#undef GOOGLE_PROTOBUF_HAS_CXX11_HASH -#undef GOOGLE_PROTOBUF_HAS_TR1 - -#if defined(GOOGLE_PROTOBUF_HAVE_HASH_MAP) && \ - defined(GOOGLE_PROTOBUF_HAVE_HASH_SET) -#else -#define GOOGLE_PROTOBUF_MISSING_HASH -#include -#include -#endif - namespace google { namespace protobuf { -#ifdef GOOGLE_PROTOBUF_MISSING_HASH -#undef GOOGLE_PROTOBUF_MISSING_HASH - -// This system doesn't have hash_map or hash_set. Emulate them using map and -// set. - -// Make hash be the same as less. Note that everywhere where custom -// hash functions are defined in the protobuf code, they are also defined such -// that they can be used as "less" functions, which is required by MSVC anyway. template -struct hash { - // Dummy, just to make derivative hash functions compile. - int operator()(const Key& key) { - GOOGLE_LOG(FATAL) << "Should never be called."; - return 0; - } - - inline bool operator()(const Key& a, const Key& b) const { - return a < b; - } -}; - -// Make sure char* is compared by value. -template <> -struct hash { - // Dummy, just to make derivative hash functions compile. - int operator()(const char* key) { - GOOGLE_LOG(FATAL) << "Should never be called."; - return 0; - } - - inline bool operator()(const char* a, const char* b) const { - return strcmp(a, b) < 0; - } -}; - -template , - typename EqualKey = std::equal_to, - typename Alloc = std::allocator< std::pair > > -class hash_map : public std::map { - typedef std::map BaseClass; - - public: - hash_map(int a = 0, const HashFcn& b = HashFcn(), - const EqualKey& c = EqualKey(), - const Alloc& d = Alloc()) : BaseClass(b, d) {} - - HashFcn hash_function() const { return HashFcn(); } -}; - -template , - typename EqualKey = std::equal_to > -class hash_set : public std::set { - public: - hash_set(int = 0) {} - - HashFcn hash_function() const { return HashFcn(); } -}; - -#elif defined(_MSC_VER) && !defined(_STLPORT_VERSION) && \ - !(defined(_LIBCPP_STD_VER) && _LIBCPP_STD_VER >= 11) - -template -struct hash : public GOOGLE_PROTOBUF_HASH_COMPARE { -}; - -// MSVC's hash_compare hashes based on the string contents but -// compares based on the string pointer. WTF? -class CstringLess { - public: - inline bool operator()(const char* a, const char* b) const { - return strcmp(a, b) < 0; - } -}; - -template <> -struct hash - : public GOOGLE_PROTOBUF_HASH_COMPARE {}; - -#ifdef GOOGLE_PROTOBUF_CONTAINERS_NEED_HASH_COMPARE - -template -struct InternalHashCompare : public GOOGLE_PROTOBUF_HASH_COMPARE { - InternalHashCompare() {} - InternalHashCompare(HashFcn hashfcn, EqualKey equalkey) - : hashfcn_(hashfcn), equalkey_(equalkey) {} - size_t operator()(const Key& key) const { return hashfcn_(key); } - bool operator()(const Key& key1, const Key& key2) const { - return !equalkey_(key1, key2); - } - HashFcn hashfcn_; - EqualKey equalkey_; -}; - -template , - typename EqualKey = std::equal_to, - typename Alloc = std::allocator< std::pair > > -class hash_map - : public GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_MAP_CLASS< - Key, Data, InternalHashCompare, Alloc> { - typedef GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_MAP_CLASS< - Key, Data, InternalHashCompare, Alloc> BaseClass; - - public: - hash_map(int a = 0, const HashFcn& b = HashFcn(), - const EqualKey& c = EqualKey(), const Alloc& d = Alloc()) - : BaseClass(InternalHashCompare(b, c), d) {} - - HashFcn hash_function() const { return HashFcn(); } -}; - -template , - typename EqualKey = std::equal_to > -class hash_set - : public GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_SET_CLASS< - Key, InternalHashCompare > { - public: - hash_set(int = 0) {} - - HashFcn hash_function() const { return HashFcn(); } -}; - -#else // GOOGLE_PROTOBUF_CONTAINERS_NEED_HASH_COMPARE - -template , - typename EqualKey = std::equal_to, - typename Alloc = std::allocator< std::pair > > -class hash_map - : public GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_MAP_CLASS< - Key, Data, HashFcn, EqualKey, Alloc> { - typedef GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_MAP_CLASS< - Key, Data, HashFcn, EqualKey, Alloc> BaseClass; - - public: - hash_map(int a = 0, const HashFcn& b = HashFcn(), - const EqualKey& c = EqualKey(), - const Alloc& d = Alloc()) : BaseClass(a, b, c, d) {} - - HashFcn hash_function() const { return HashFcn(); } -}; - -template , - typename EqualKey = std::equal_to > -class hash_set - : public GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_SET_CLASS< - Key, HashFcn, EqualKey> { - public: - hash_set(int = 0) {} - - HashFcn hash_function() const { return HashFcn(); } -}; -#endif // GOOGLE_PROTOBUF_CONTAINERS_NEED_HASH_COMPARE - -#else // defined(_MSC_VER) && !defined(_STLPORT_VERSION) - -template -struct hash : public GOOGLE_PROTOBUF_HASH_NAMESPACE::hash { -}; +struct hash : public std::hash {}; template struct hash { @@ -369,46 +76,15 @@ struct hash { } }; -template , - typename EqualKey = std::equal_to, - typename Alloc = std::allocator< std::pair > > -class hash_map - : public GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_MAP_CLASS< - Key, Data, HashFcn, EqualKey, Alloc> { - typedef GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_MAP_CLASS< - Key, Data, HashFcn, EqualKey, Alloc> BaseClass; - - public: - hash_map(int a = 0, const HashFcn& b = HashFcn(), - const EqualKey& c = EqualKey(), - const Alloc& d = Alloc()) : BaseClass(a, b, c, d) {} - - HashFcn hash_function() const { return HashFcn(); } -}; - -template , - typename EqualKey = std::equal_to > -class hash_set - : public GOOGLE_PROTOBUF_HASH_NAMESPACE::GOOGLE_PROTOBUF_HASH_SET_CLASS< - Key, HashFcn, EqualKey> { - public: - hash_set(int = 0) {} - - HashFcn hash_function() const { return HashFcn(); } -}; - -#endif // !GOOGLE_PROTOBUF_MISSING_HASH - template <> -struct hash { - inline size_t operator()(const string& key) const { +struct hash { + inline size_t operator()(const std::string& key) const { return hash()(key.c_str()); } static const size_t bucket_size = 4; static const size_t min_buckets = 8; - inline bool operator()(const string& a, const string& b) const { + inline bool operator()(const std::string& a, const std::string& b) const { return a < b; } }; @@ -432,14 +108,6 @@ struct hash > { } }; -// Used by GCC/SGI STL only. (Why isn't this provided by the standard -// library? :( ) -struct streq { - inline bool operator()(const char* a, const char* b) const { - return strcmp(a, b) == 0; - } -}; - } // namespace protobuf } // namespace google diff --git a/third_party/protobuf-lite/google/protobuf/stubs/int128.h b/third_party/protobuf-lite/google/protobuf/stubs/int128.h index 1499bb76..dc70d96e 100644 --- a/third_party/protobuf-lite/google/protobuf/stubs/int128.h +++ b/third_party/protobuf-lite/google/protobuf/stubs/int128.h @@ -34,6 +34,8 @@ #include +#include + namespace google { namespace protobuf { @@ -48,7 +50,7 @@ struct uint128_pod; #endif // An unsigned 128-bit integer type. Thread-compatible. -class LIBPROTOBUF_EXPORT uint128 { +class PROTOBUF_EXPORT uint128 { public: UINT128_CONSTEXPR uint128(); // Sets to 0, but don't trust on this behavior. UINT128_CONSTEXPR uint128(uint64 top, uint64 bottom); @@ -84,8 +86,8 @@ class LIBPROTOBUF_EXPORT uint128 { friend uint64 Uint128High64(const uint128& v); // We add "std::" to avoid including all of port.h. - LIBPROTOBUF_EXPORT friend std::ostream& operator<<(std::ostream& o, - const uint128& b); + PROTOBUF_EXPORT friend std::ostream& operator<<(std::ostream& o, + const uint128& b); private: static void DivModImpl(uint128 dividend, uint128 divisor, @@ -116,11 +118,11 @@ struct uint128_pod { uint64 lo; }; -LIBPROTOBUF_EXPORT extern const uint128_pod kuint128max; +PROTOBUF_EXPORT extern const uint128_pod kuint128max; // allow uint128 to be logged -LIBPROTOBUF_EXPORT extern std::ostream& operator<<(std::ostream& o, - const uint128& b); +PROTOBUF_EXPORT extern std::ostream& operator<<(std::ostream& o, + const uint128& b); // Methods to access low and high pieces of 128-bit value. // Defined externally from uint128 to facilitate conversion @@ -380,4 +382,6 @@ inline uint128& uint128::operator--() { } // namespace protobuf } // namespace google +#include + #endif // GOOGLE_PROTOBUF_STUBS_INT128_H_ diff --git a/third_party/protobuf-lite/google/protobuf/stubs/logging.h b/third_party/protobuf-lite/google/protobuf/stubs/logging.h index f69605d9..f37048d6 100644 --- a/third_party/protobuf-lite/google/protobuf/stubs/logging.h +++ b/third_party/protobuf-lite/google/protobuf/stubs/logging.h @@ -34,6 +34,8 @@ #include #include +#include + // =================================================================== // emulates google3/base/logging.h @@ -70,7 +72,7 @@ namespace internal { class LogFinisher; -class LIBPROTOBUF_EXPORT LogMessage { +class PROTOBUF_EXPORT LogMessage { public: LogMessage(LogLevel level, const char* filename, int line); ~LogMessage(); @@ -87,7 +89,7 @@ class LIBPROTOBUF_EXPORT LogMessage { LogMessage& operator<<(double value); LogMessage& operator<<(void* value); LogMessage& operator<<(const StringPiece& value); - LogMessage& operator<<(const ::google::protobuf::util::Status& status); + LogMessage& operator<<(const util::Status& status); LogMessage& operator<<(const uint128& value); private: @@ -102,7 +104,7 @@ class LIBPROTOBUF_EXPORT LogMessage { // Used to make the entire "LOG(BLAH) << etc." expression have a void return // type and print a newline after each message. -class LIBPROTOBUF_EXPORT LogFinisher { +class PROTOBUF_EXPORT LogFinisher { public: void operator=(LogMessage& other); }; @@ -141,10 +143,10 @@ inline bool IsOk(bool status) { return status; } #undef GOOGLE_DCHECK_GT #undef GOOGLE_DCHECK_GE -#define GOOGLE_LOG(LEVEL) \ - ::google::protobuf::internal::LogFinisher() = \ - ::google::protobuf::internal::LogMessage( \ - ::google::protobuf::LOGLEVEL_##LEVEL, __FILE__, __LINE__) +#define GOOGLE_LOG(LEVEL) \ + ::google::protobuf::internal::LogFinisher() = \ + ::google::protobuf::internal::LogMessage( \ + ::google::protobuf::LOGLEVEL_##LEVEL, __FILE__, __LINE__) #define GOOGLE_LOG_IF(LEVEL, CONDITION) \ !(CONDITION) ? (void)0 : GOOGLE_LOG(LEVEL) @@ -162,15 +164,15 @@ namespace internal { template T* CheckNotNull(const char* /* file */, int /* line */, const char* name, T* val) { - if (val == NULL) { + if (val == nullptr) { GOOGLE_LOG(FATAL) << name; } return val; } } // namespace internal -#define GOOGLE_CHECK_NOTNULL(A) \ - ::google::protobuf::internal::CheckNotNull(\ - __FILE__, __LINE__, "'" #A "' must not be NULL", (A)) +#define GOOGLE_CHECK_NOTNULL(A) \ + ::google::protobuf::internal::CheckNotNull( \ + __FILE__, __LINE__, "'" #A "' must not be nullptr", (A)) #ifdef NDEBUG @@ -208,7 +210,7 @@ typedef void LogHandler(LogLevel level, const char* filename, int line, // also help end users figure out a problem. If you would prefer that // these messages be sent somewhere other than stderr, call SetLogHandler() // to set your own handler. This returns the old handler. Set the handler -// to NULL to ignore log messages (but see also LogSilencer, below). +// to nullptr to ignore log messages (but see also LogSilencer, below). // // Obviously, SetLogHandler is not thread-safe. You should only call it // at initialization time, and probably not from library code. If you @@ -216,7 +218,7 @@ typedef void LogHandler(LogLevel level, const char* filename, int line, // have some code that tends to trigger them frequently and you know // the warnings are not important to you), use the LogSilencer class // below. -LIBPROTOBUF_EXPORT LogHandler* SetLogHandler(LogHandler* new_func); +PROTOBUF_EXPORT LogHandler* SetLogHandler(LogHandler* new_func); // Create a LogSilencer if you want to temporarily suppress all log // messages. As long as any LogSilencer objects exist, non-fatal @@ -225,7 +227,7 @@ LIBPROTOBUF_EXPORT LogHandler* SetLogHandler(LogHandler* new_func); // accidentally suppress log messages occurring in another thread, but // since messages are generally for debugging purposes only, this isn't // a big deal. If you want to intercept log messages, use SetLogHandler(). -class LIBPROTOBUF_EXPORT LogSilencer { +class PROTOBUF_EXPORT LogSilencer { public: LogSilencer(); ~LogSilencer(); @@ -234,4 +236,6 @@ class LIBPROTOBUF_EXPORT LogSilencer { } // namespace protobuf } // namespace google +#include + #endif // GOOGLE_PROTOBUF_STUBS_LOGGING_H_ diff --git a/third_party/protobuf-lite/google/protobuf/stubs/macros.h b/third_party/protobuf-lite/google/protobuf/stubs/macros.h index 0e9a9ec1..c556d022 100644 --- a/third_party/protobuf-lite/google/protobuf/stubs/macros.h +++ b/third_party/protobuf-lite/google/protobuf/stubs/macros.h @@ -112,55 +112,7 @@ struct CompileAssert { } // namespace internal -#undef GOOGLE_COMPILE_ASSERT -#if __cplusplus >= 201103L #define GOOGLE_COMPILE_ASSERT(expr, msg) static_assert(expr, #msg) -#else -#define GOOGLE_COMPILE_ASSERT(expr, msg) \ - ::google::protobuf::internal::CompileAssert<(bool(expr))> \ - msg[bool(expr) ? 1 : -1]; \ - (void)msg -// Implementation details of COMPILE_ASSERT: -// -// - COMPILE_ASSERT works by defining an array type that has -1 -// elements (and thus is invalid) when the expression is false. -// -// - The simpler definition -// -// #define COMPILE_ASSERT(expr, msg) typedef char msg[(expr) ? 1 : -1] -// -// does not work, as gcc supports variable-length arrays whose sizes -// are determined at run-time (this is gcc's extension and not part -// of the C++ standard). As a result, gcc fails to reject the -// following code with the simple definition: -// -// int foo; -// COMPILE_ASSERT(foo, msg); // not supposed to compile as foo is -// // not a compile-time constant. -// -// - By using the type CompileAssert<(bool(expr))>, we ensures that -// expr is a compile-time constant. (Template arguments must be -// determined at compile-time.) -// -// - The outter parentheses in CompileAssert<(bool(expr))> are necessary -// to work around a bug in gcc 3.4.4 and 4.0.1. If we had written -// -// CompileAssert -// -// instead, these compilers will refuse to compile -// -// COMPILE_ASSERT(5 > 0, some_message); -// -// (They seem to think the ">" in "5 > 0" marks the end of the -// template argument list.) -// -// - The array size is (bool(expr) ? 1 : -1), instead of simply -// -// ((expr) ? 1 : -1). -// -// This is to avoid running into a bug in MS VC 7.1, which -// causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1. -#endif // __cplusplus >= 201103L } // namespace protobuf } // namespace google diff --git a/third_party/protobuf-lite/google/protobuf/stubs/map_util.h b/third_party/protobuf-lite/google/protobuf/stubs/map_util.h index 3e6d381f..24e098ad 100644 --- a/third_party/protobuf-lite/google/protobuf/stubs/map_util.h +++ b/third_party/protobuf-lite/google/protobuf/stubs/map_util.h @@ -131,7 +131,7 @@ FindWithDefault(const Collection& collection, } // Returns a pointer to the const value associated with the given key if it -// exists, or NULL otherwise. +// exists, or nullptr otherwise. template const typename Collection::value_type::second_type* FindOrNull(const Collection& collection, @@ -156,11 +156,11 @@ FindOrNull(Collection& collection, // NOLINT } // Returns the pointer value associated with the given key. If none is found, -// NULL is returned. The function is designed to be used with a map of keys to +// nullptr is returned. The function is designed to be used with a map of keys to // pointers. // // This function does not distinguish between a missing key and a key mapped -// to a NULL value. +// to nullptr. template typename Collection::value_type::second_type FindPtrOrNull(const Collection& collection, @@ -188,7 +188,7 @@ FindPtrOrNull(Collection& collection, // NOLINT } // Finds the pointer value associated with the given key in a map whose values -// are linked_ptrs. Returns NULL if key is not found. +// are linked_ptrs. Returns nullptr if key is not found. template typename Collection::value_type::second_type::element_type* FindLinkedPtrOrNull(const Collection& collection, @@ -215,7 +215,7 @@ FindLinkedPtrOrDie(const Collection& collection, } // Finds the value associated with the given key and copies it to *value (if not -// NULL). Returns false if the key was not found, true otherwise. +// nullptr). Returns false if the key was not found, true otherwise. template bool FindCopy(const Collection& collection, const Key& key, @@ -447,7 +447,7 @@ LookupOrInsertNew(Collection* const collection, std::pair ret = collection->insert(typename Collection::value_type( key, - static_cast(NULL))); + static_cast(nullptr))); if (ret.second) { ret.first->second = new Element(); } @@ -466,7 +466,7 @@ LookupOrInsertNew(Collection* const collection, std::pair ret = collection->insert(typename Collection::value_type( key, - static_cast(NULL))); + static_cast(nullptr))); if (ret.second) { ret.first->second = new Element(arg); } @@ -612,7 +612,7 @@ bool UpdateReturnCopy(Collection* const collection, return false; } -// Tries to insert the given key-value pair into the collection. Returns NULL if +// Tries to insert the given key-value pair into the collection. Returns nullptr if // the insert succeeds. Otherwise, returns a pointer to the existing value. // // This complements UpdateReturnCopy in that it allows to update only after @@ -620,12 +620,11 @@ bool UpdateReturnCopy(Collection* const collection, // twice. Unlike UpdateReturnCopy this also does not come with the issue of an // undefined previous* in case new data was inserted. template -typename Collection::value_type::second_type* const -InsertOrReturnExisting(Collection* const collection, - const typename Collection::value_type& vt) { +typename Collection::value_type::second_type* InsertOrReturnExisting( + Collection* const collection, const typename Collection::value_type& vt) { std::pair ret = collection->insert(vt); if (ret.second) { - return NULL; // Inserted, no existing previous value. + return nullptr; // Inserted, no existing previous value. } else { return &ret.first->second; // Return address of already existing value. } @@ -633,8 +632,7 @@ InsertOrReturnExisting(Collection* const collection, // Same as above, except for explicit key and data. template -typename Collection::value_type::second_type* const -InsertOrReturnExisting( +typename Collection::value_type::second_type* InsertOrReturnExisting( Collection* const collection, const typename Collection::value_type::first_type& key, const typename Collection::value_type::second_type& data) { @@ -644,7 +642,7 @@ InsertOrReturnExisting( // Erases the collection item identified by the given key, and returns the value // associated with that key. It is assumed that the value (i.e., the -// mapped_type) is a pointer. Returns NULL if the key was not found in the +// mapped_type) is a pointer. Returns nullptr if the key was not found in the // collection. // // Examples: @@ -665,7 +663,7 @@ typename Collection::value_type::second_type EraseKeyReturnValuePtr( const typename Collection::value_type::first_type& key) { typename Collection::iterator it = collection->find(key); if (it == collection->end()) { - return NULL; + return nullptr; } typename Collection::value_type::second_type v = it->second; collection->erase(it); @@ -679,7 +677,7 @@ typename Collection::value_type::second_type EraseKeyReturnValuePtr( template void InsertKeysFromMap(const MapContainer& map_container, KeyContainer* key_container) { - GOOGLE_CHECK(key_container != NULL); + GOOGLE_CHECK(key_container != nullptr); for (typename MapContainer::const_iterator it = map_container.begin(); it != map_container.end(); ++it) { key_container->insert(it->first); @@ -693,7 +691,7 @@ void InsertKeysFromMap(const MapContainer& map_container, template void AppendKeysFromMap(const MapContainer& map_container, KeyContainer* key_container) { - GOOGLE_CHECK(key_container != NULL); + GOOGLE_CHECK(key_container != nullptr); for (typename MapContainer::const_iterator it = map_container.begin(); it != map_container.end(); ++it) { key_container->push_back(it->first); @@ -710,7 +708,7 @@ void AppendKeysFromMap(const MapContainer& map_container, template void AppendKeysFromMap(const MapContainer& map_container, std::vector* key_container) { - GOOGLE_CHECK(key_container != NULL); + GOOGLE_CHECK(key_container != nullptr); // We now have the opportunity to call reserve(). Calling reserve() every // time is a bad idea for some use cases: libstdc++'s implementation of // vector<>::reserve() resizes the vector's backing store to exactly the @@ -737,7 +735,7 @@ void AppendKeysFromMap(const MapContainer& map_container, template void AppendValuesFromMap(const MapContainer& map_container, ValueContainer* value_container) { - GOOGLE_CHECK(value_container != NULL); + GOOGLE_CHECK(value_container != nullptr); for (typename MapContainer::const_iterator it = map_container.begin(); it != map_container.end(); ++it) { value_container->push_back(it->second); @@ -754,7 +752,7 @@ void AppendValuesFromMap(const MapContainer& map_container, template void AppendValuesFromMap(const MapContainer& map_container, std::vector* value_container) { - GOOGLE_CHECK(value_container != NULL); + GOOGLE_CHECK(value_container != nullptr); // See AppendKeysFromMap for why this is done. if (value_container->empty()) { value_container->reserve(map_container.size()); diff --git a/third_party/protobuf-lite/google/protobuf/stubs/mathlimits.h b/third_party/protobuf-lite/google/protobuf/stubs/mathlimits.h deleted file mode 100644 index 9c9d0e9a..00000000 --- a/third_party/protobuf-lite/google/protobuf/stubs/mathlimits.h +++ /dev/null @@ -1,303 +0,0 @@ -// Protocol Buffers - Google's data interchange format -// Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// All Rights Reserved. -// -// Author: Maxim Lifantsev -// -// Useful integer and floating point limits and type traits. -// -// This partially replaces/duplictes numeric_limits<> from . -// We get a Google-style class that we have a greater control over -// and thus can add new features to it or fix whatever happens to be broken in -// numeric_limits for the compilers we use. -// - -#ifndef UTIL_MATH_MATHLIMITS_H__ -#define UTIL_MATH_MATHLIMITS_H__ - -// Note that for Windows we do something different because it does not support -// the plain isinf and isnan. -#if __cplusplus >= 201103L -// GCC 4.9 has a bug that makes isinf and isnan ambigious when both -// and get pulled into the same translation unit. We use the ones in -// std:: namespace explicitly for C++11 -#include -#define GOOGLE_PROTOBUF_USE_STD_CMATH -#elif _GLIBCXX_USE_C99_MATH && !_GLIBCXX_USE_C99_FP_MACROS_DYNAMIC -// libstdc++ header undefines the global macros and put functions in -// std:: namespace even before C++11. Use the ones in std:: instead too. -#include -#define GOOGLE_PROTOBUF_USE_STD_CMATH -#else -#include -#endif - -#include - -#include - -#include - -// ========================================================================= // - -// Useful integer and floating point limits and type traits. -// This is just for the documentation; -// real members are defined in our specializations below. -namespace google { -namespace protobuf { -template struct MathLimits { - // Type name. - typedef T Type; - // Unsigned version of the Type with the same byte size. - // Same as Type for floating point and unsigned types. - typedef T UnsignedType; - // If the type supports negative values. - static const bool kIsSigned; - // If the type supports only integer values. - static const bool kIsInteger; - // Magnitude-wise smallest representable positive value. - static const Type kPosMin; - // Magnitude-wise largest representable positive value. - static const Type kPosMax; - // Smallest representable value. - static const Type kMin; - // Largest representable value. - static const Type kMax; - // Magnitude-wise smallest representable negative value. - // Present only if kIsSigned. - static const Type kNegMin; - // Magnitude-wise largest representable negative value. - // Present only if kIsSigned. - static const Type kNegMax; - // Smallest integer x such that 10^x is representable. - static const int kMin10Exp; - // Largest integer x such that 10^x is representable. - static const int kMax10Exp; - // Smallest positive value such that Type(1) + kEpsilon != Type(1) - static const Type kEpsilon; - // Typical rounding error that is enough to cover - // a few simple floating-point operations. - // Slightly larger than kEpsilon to account for a few rounding errors. - // Is zero if kIsInteger. - static const Type kStdError; - // Number of decimal digits of mantissa precision. - // Present only if !kIsInteger. - static const int kPrecisionDigits; - // Not a number, i.e. result of 0/0. - // Present only if !kIsInteger. - static const Type kNaN; - // Positive infinity, i.e. result of 1/0. - // Present only if !kIsInteger. - static const Type kPosInf; - // Negative infinity, i.e. result of -1/0. - // Present only if !kIsInteger. - static const Type kNegInf; - - // NOTE: Special floating point values behave - // in a special (but mathematically-logical) way - // in terms of (in)equalty comparison and mathematical operations - // -- see out unittest for examples. - - // Special floating point value testers. - // Present in integer types for convenience. - static bool IsFinite(const Type x); - static bool IsNaN(const Type x); - static bool IsInf(const Type x); - static bool IsPosInf(const Type x); - static bool IsNegInf(const Type x); -}; - -// ========================================================================= // - -// All #define-s below are simply to refactor the declarations of -// MathLimits template specializations. -// They are all #undef-ined below. - -// The hoop-jumping in *_INT_(MAX|MIN) below is so that the compiler does not -// get an overflow while computing the constants. - -#define SIGNED_INT_MAX(Type) \ - (((Type(1) << (sizeof(Type)*8 - 2)) - 1) + (Type(1) << (sizeof(Type)*8 - 2))) - -#define SIGNED_INT_MIN(Type) \ - (-(Type(1) << (sizeof(Type)*8 - 2)) - (Type(1) << (sizeof(Type)*8 - 2))) - -#define UNSIGNED_INT_MAX(Type) \ - (((Type(1) << (sizeof(Type)*8 - 1)) - 1) + (Type(1) << (sizeof(Type)*8 - 1))) - -// Compile-time selected log10-related constants for integer types. -#define SIGNED_MAX_10_EXP(Type) \ - (sizeof(Type) == 1 ? 2 : ( \ - sizeof(Type) == 2 ? 4 : ( \ - sizeof(Type) == 4 ? 9 : ( \ - sizeof(Type) == 8 ? 18 : -1)))) - -#define UNSIGNED_MAX_10_EXP(Type) \ - (sizeof(Type) == 1 ? 2 : ( \ - sizeof(Type) == 2 ? 4 : ( \ - sizeof(Type) == 4 ? 9 : ( \ - sizeof(Type) == 8 ? 19 : -1)))) - -#define DECL_INT_LIMIT_FUNCS \ - static bool IsFinite(const Type /*x*/) { return true; } \ - static bool IsNaN(const Type /*x*/) { return false; } \ - static bool IsInf(const Type /*x*/) { return false; } \ - static bool IsPosInf(const Type /*x*/) { return false; } \ - static bool IsNegInf(const Type /*x*/) { return false; } - -#define DECL_SIGNED_INT_LIMITS(IntType, UnsignedIntType) \ -template<> \ -struct LIBPROTOBUF_EXPORT MathLimits { \ - typedef IntType Type; \ - typedef UnsignedIntType UnsignedType; \ - static const bool kIsSigned = true; \ - static const bool kIsInteger = true; \ - static const Type kPosMin = 1; \ - static const Type kPosMax = SIGNED_INT_MAX(Type); \ - static const Type kMin = SIGNED_INT_MIN(Type); \ - static const Type kMax = kPosMax; \ - static const Type kNegMin = -1; \ - static const Type kNegMax = kMin; \ - static const int kMin10Exp = 0; \ - static const int kMax10Exp = SIGNED_MAX_10_EXP(Type); \ - static const Type kEpsilon = 1; \ - static const Type kStdError = 0; \ - DECL_INT_LIMIT_FUNCS \ -}; - -#define DECL_UNSIGNED_INT_LIMITS(IntType) \ -template<> \ -struct LIBPROTOBUF_EXPORT MathLimits { \ - typedef IntType Type; \ - typedef IntType UnsignedType; \ - static const bool kIsSigned = false; \ - static const bool kIsInteger = true; \ - static const Type kPosMin = 1; \ - static const Type kPosMax = UNSIGNED_INT_MAX(Type); \ - static const Type kMin = 0; \ - static const Type kMax = kPosMax; \ - static const int kMin10Exp = 0; \ - static const int kMax10Exp = UNSIGNED_MAX_10_EXP(Type); \ - static const Type kEpsilon = 1; \ - static const Type kStdError = 0; \ - DECL_INT_LIMIT_FUNCS \ -}; - -DECL_SIGNED_INT_LIMITS(signed char, unsigned char) -DECL_SIGNED_INT_LIMITS(signed short int, unsigned short int) -DECL_SIGNED_INT_LIMITS(signed int, unsigned int) -DECL_SIGNED_INT_LIMITS(signed long int, unsigned long int) -DECL_SIGNED_INT_LIMITS(signed long long int, unsigned long long int) -DECL_UNSIGNED_INT_LIMITS(unsigned char) -DECL_UNSIGNED_INT_LIMITS(unsigned short int) -DECL_UNSIGNED_INT_LIMITS(unsigned int) -DECL_UNSIGNED_INT_LIMITS(unsigned long int) -DECL_UNSIGNED_INT_LIMITS(unsigned long long int) - -#undef DECL_SIGNED_INT_LIMITS -#undef DECL_UNSIGNED_INT_LIMITS -#undef SIGNED_INT_MAX -#undef SIGNED_INT_MIN -#undef UNSIGNED_INT_MAX -#undef SIGNED_MAX_10_EXP -#undef UNSIGNED_MAX_10_EXP -#undef DECL_INT_LIMIT_FUNCS - -// For non-Windows builds we use the std:: versions of isinf and isnan if they -// are available; see the comment about at the top of this file for the -// details on why we need to do this. -#ifdef GOOGLE_PROTOBUF_USE_STD_CMATH -#define ISINF std::isinf -#define ISNAN std::isnan -#else -#define ISINF isinf -#define ISNAN isnan -#endif - -// ========================================================================= // -#if WIN32 && !__MINGW32__ // Lacks built-in isnan() and isinf() -#define DECL_FP_LIMIT_FUNCS \ - static bool IsFinite(const Type x) { return _finite(x); } \ - static bool IsNaN(const Type x) { return _isnan(x); } \ - static bool IsInf(const Type x) { return (_fpclass(x) & (_FPCLASS_NINF | _FPCLASS_PINF)) != 0; } \ - static bool IsPosInf(const Type x) { return _fpclass(x) == _FPCLASS_PINF; } \ - static bool IsNegInf(const Type x) { return _fpclass(x) == _FPCLASS_NINF; } -#else -#define DECL_FP_LIMIT_FUNCS \ - static bool IsFinite(const Type x) { return !ISINF(x) && !ISNAN(x); } \ - static bool IsNaN(const Type x) { return ISNAN(x); } \ - static bool IsInf(const Type x) { return ISINF(x); } \ - static bool IsPosInf(const Type x) { return ISINF(x) && x > 0; } \ - static bool IsNegInf(const Type x) { return ISINF(x) && x < 0; } -#endif - -// We can't put floating-point constant values in the header here because -// such constants are not considered to be primitive-type constants by gcc. -// CAVEAT: Hence, they are going to be initialized only during -// the global objects construction time. -#define DECL_FP_LIMITS(FP_Type, PREFIX) \ -template<> \ -struct LIBPROTOBUF_EXPORT MathLimits { \ - typedef FP_Type Type; \ - typedef FP_Type UnsignedType; \ - static const bool kIsSigned = true; \ - static const bool kIsInteger = false; \ - static const Type kPosMin; \ - static const Type kPosMax; \ - static const Type kMin; \ - static const Type kMax; \ - static const Type kNegMin; \ - static const Type kNegMax; \ - static const int kMin10Exp = PREFIX##_MIN_10_EXP; \ - static const int kMax10Exp = PREFIX##_MAX_10_EXP; \ - static const Type kEpsilon; \ - static const Type kStdError; \ - static const int kPrecisionDigits = PREFIX##_DIG; \ - static const Type kNaN; \ - static const Type kPosInf; \ - static const Type kNegInf; \ - DECL_FP_LIMIT_FUNCS \ -}; - -DECL_FP_LIMITS(float, FLT) -DECL_FP_LIMITS(double, DBL) -DECL_FP_LIMITS(long double, LDBL) - -#undef ISINF -#undef ISNAN -#undef DECL_FP_LIMITS -#undef DECL_FP_LIMIT_FUNCS - -// ========================================================================= // -} // namespace protobuf -} // namespace google - -#endif // UTIL_MATH_MATHLIMITS_H__ diff --git a/third_party/protobuf-lite/google/protobuf/stubs/mutex.h b/third_party/protobuf-lite/google/protobuf/stubs/mutex.h index b9b7d2e1..b222ff74 100644 --- a/third_party/protobuf-lite/google/protobuf/stubs/mutex.h +++ b/third_party/protobuf-lite/google/protobuf/stubs/mutex.h @@ -32,8 +32,35 @@ #include +#ifdef GOOGLE_PROTOBUF_SUPPORT_WINDOWS_XP + +#include + +// GetMessage conflicts with GeneratedMessageReflection::GetMessage(). +#ifdef GetMessage +#undef GetMessage +#endif + +#endif + #include +// Define thread-safety annotations for use below, if we are building with +// Clang. +#if defined(__clang__) && !defined(SWIG) +#define GOOGLE_PROTOBUF_ACQUIRE(...) \ + __attribute__((acquire_capability(__VA_ARGS__))) +#define GOOGLE_PROTOBUF_RELEASE(...) \ + __attribute__((release_capability(__VA_ARGS__))) +#define GOOGLE_PROTOBUF_CAPABILITY(x) __attribute__((capability(x))) +#else +#define GOOGLE_PROTOBUF_ACQUIRE(...) +#define GOOGLE_PROTOBUF_RELEASE(...) +#define GOOGLE_PROTOBUF_CAPABILITY(x) +#endif + +#include + // =================================================================== // emulates google3/base/mutex.h namespace google { @@ -42,26 +69,51 @@ namespace internal { #define GOOGLE_PROTOBUF_LINKER_INITIALIZED +#ifdef GOOGLE_PROTOBUF_SUPPORT_WINDOWS_XP + +// This class is a lightweight replacement for std::mutex on Windows platforms. +// std::mutex does not work on Windows XP SP2 with the latest VC++ libraries, +// because it utilizes the Concurrency Runtime that is only supported on Windows +// XP SP3 and above. +class PROTOBUF_EXPORT CriticalSectionLock { + public: + CriticalSectionLock() { InitializeCriticalSection(&critical_section_); } + ~CriticalSectionLock() { DeleteCriticalSection(&critical_section_); } + void lock() { EnterCriticalSection(&critical_section_); } + void unlock() { LeaveCriticalSection(&critical_section_); } + + private: + CRITICAL_SECTION critical_section_; + + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CriticalSectionLock); +}; + +#endif + // Mutex is a natural type to wrap. As both google and other organization have // specialized mutexes. gRPC also provides an injection mechanism for custom // mutexes. -class LIBPROTOBUF_EXPORT WrappedMutex { +class GOOGLE_PROTOBUF_CAPABILITY("mutex") PROTOBUF_EXPORT WrappedMutex { public: WrappedMutex() = default; - void Lock() { mu_.lock(); } - void Unlock() { mu_.unlock(); } + void Lock() GOOGLE_PROTOBUF_ACQUIRE() { mu_.lock(); } + void Unlock() GOOGLE_PROTOBUF_RELEASE() { mu_.unlock(); } // Crash if this Mutex is not held exclusively by this thread. // May fail to crash when it should; will never crash when it should not. void AssertHeld() const {} private: +#ifndef GOOGLE_PROTOBUF_SUPPORT_WINDOWS_XP std::mutex mu_; +#else // ifndef GOOGLE_PROTOBUF_SUPPORT_WINDOWS_XP + CriticalSectionLock mu_; +#endif // #ifndef GOOGLE_PROTOBUF_SUPPORT_WINDOWS_XP }; using Mutex = WrappedMutex; // MutexLock(mu) acquires mu when constructed and releases it when destroyed. -class LIBPROTOBUF_EXPORT MutexLock { +class PROTOBUF_EXPORT MutexLock { public: explicit MutexLock(Mutex *mu) : mu_(mu) { this->mu_->Lock(); } ~MutexLock() { this->mu_->Unlock(); } @@ -74,12 +126,12 @@ class LIBPROTOBUF_EXPORT MutexLock { typedef MutexLock ReaderMutexLock; typedef MutexLock WriterMutexLock; -// MutexLockMaybe is like MutexLock, but is a no-op when mu is NULL. -class LIBPROTOBUF_EXPORT MutexLockMaybe { +// MutexLockMaybe is like MutexLock, but is a no-op when mu is nullptr. +class PROTOBUF_EXPORT MutexLockMaybe { public: explicit MutexLockMaybe(Mutex *mu) : - mu_(mu) { if (this->mu_ != NULL) { this->mu_->Lock(); } } - ~MutexLockMaybe() { if (this->mu_ != NULL) { this->mu_->Unlock(); } } + mu_(mu) { if (this->mu_ != nullptr) { this->mu_->Lock(); } } + ~MutexLockMaybe() { if (this->mu_ != nullptr) { this->mu_->Unlock(); } } private: Mutex *const mu_; GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MutexLockMaybe); @@ -97,7 +149,7 @@ class ThreadLocalStorage { } T* Get() { T* result = static_cast(pthread_getspecific(key_)); - if (result == NULL) { + if (result == nullptr) { result = new T(); pthread_setspecific(key_, result); } @@ -123,8 +175,12 @@ using internal::ReaderMutexLock; using internal::WriterMutexLock; using internal::MutexLockMaybe; - } // namespace protobuf } // namespace google +#undef GOOGLE_PROTOBUF_ACQUIRE +#undef GOOGLE_PROTOBUF_RELEASE + +#include + #endif // GOOGLE_PROTOBUF_STUBS_MUTEX_H_ diff --git a/third_party/protobuf-lite/google/protobuf/stubs/once.h b/third_party/protobuf-lite/google/protobuf/stubs/once.h index 4a184971..070d36d1 100644 --- a/third_party/protobuf-lite/google/protobuf/stubs/once.h +++ b/third_party/protobuf-lite/google/protobuf/stubs/once.h @@ -28,128 +28,28 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// Author: kenton@google.com (Kenton Varda) -// -// emulates google3/base/once.h -// -// This header is intended to be included only by internal .cc files and -// generated .pb.cc files. Users should not use this directly. -// -// This is basically a portable version of pthread_once(). -// -// This header declares: -// * A type called ProtobufOnceType. -// * A macro GOOGLE_PROTOBUF_DECLARE_ONCE() which declares a variable of type -// ProtobufOnceType. This is the only legal way to declare such a variable. -// The macro may only be used at the global scope (you cannot create local or -// class member variables of this type). -// * A function GoogleOnceInit(ProtobufOnceType* once, void (*init_func)()). -// This function, when invoked multiple times given the same ProtobufOnceType -// object, will invoke init_func on the first call only, and will make sure -// none of the calls return before that first call to init_func has finished. -// * The user can provide a parameter which GoogleOnceInit() forwards to the -// user-provided function when it is called. Usage example: -// int a = 10; -// GoogleOnceInit(&my_once, &MyFunctionExpectingIntArgument, &a); -// * This implementation guarantees that ProtobufOnceType is a POD (i.e. no -// static initializer generated). -// -// This implements a way to perform lazy initialization. It's more efficient -// than using mutexes as no lock is needed if initialization has already -// happened. -// -// Example usage: -// void Init(); -// GOOGLE_PROTOBUF_DECLARE_ONCE(once_init); -// -// // Calls Init() exactly once. -// void InitOnce() { -// GoogleOnceInit(&once_init, &Init); -// } -// -// Note that if GoogleOnceInit() is called before main() has begun, it must -// only be called by the thread that will eventually call main() -- that is, -// the thread that performs dynamic initialization. In general this is a safe -// assumption since people don't usually construct threads before main() starts, -// but it is technically not guaranteed. Unfortunately, Win32 provides no way -// whatsoever to statically-initialize its synchronization primitives, so our -// only choice is to assume that dynamic initialization is single-threaded. - #ifndef GOOGLE_PROTOBUF_STUBS_ONCE_H__ #define GOOGLE_PROTOBUF_STUBS_ONCE_H__ -#include #include #include +#include + namespace google { namespace protobuf { namespace internal { -using once_flag = std::atomic; - -template -void my_call_once(once_flag& once, Callable&& fn, Args&&... args) { - enum CallOnceState { - ONCE_INIT = 0, - ONCE_RUNNING = 1, - ONCE_DONE = 2, - }; - - int expected_state = ONCE_INIT; - if (once.compare_exchange_strong(expected_state, ONCE_RUNNING)) { - fn(std::forward(args)...); - once.store(ONCE_DONE); - return; - } - - if (expected_state == ONCE_DONE) { - return; - } - - while (once.load() == ONCE_RUNNING) { - } -} - +using once_flag = std::once_flag; template -void call_once(Args&&... args) { - my_call_once(std::forward(args)...); +void call_once(Args&&... args ) { + std::call_once(std::forward(args)...); } -} // namespace internal - -// TODO(gerbens) remove this once third_party is fully extracted -using ProtobufOnceType = internal::once_flag; - -inline void GoogleOnceInit(ProtobufOnceType* once, void (*init_func)()) { - internal::my_call_once(*once, init_func); -} - -template -inline void GoogleOnceInitArg(ProtobufOnceType* once, void (*init_func)(Arg*), - Arg* arg) { - internal::my_call_once(*once, init_func, arg); -} - -class GoogleOnceDynamic { - public: - // If this->Init() has not been called before by any thread, - // execute (*func_with_arg)(arg) then return. - // Otherwise, wait until that prior invocation has finished - // executing its function, then return. - template - void Init(void (*func_with_arg)(T*), T* arg) { - GoogleOnceInitArg(&this->state_, func_with_arg, arg); - } - - private: - ProtobufOnceType state_; -}; - -#define GOOGLE_PROTOBUF_ONCE_TYPE ::google::protobuf::ProtobufOnceType -#define GOOGLE_PROTOBUF_DECLARE_ONCE(NAME) \ - ::google::protobuf::ProtobufOnceType NAME +} // namespace internal } // namespace protobuf } // namespace google +#include + #endif // GOOGLE_PROTOBUF_STUBS_ONCE_H__ diff --git a/third_party/protobuf-lite/google/protobuf/stubs/once.h.org b/third_party/protobuf-lite/google/protobuf/stubs/once.h.org deleted file mode 100644 index f3835ccd..00000000 --- a/third_party/protobuf-lite/google/protobuf/stubs/once.h.org +++ /dev/null @@ -1,130 +0,0 @@ -// Protocol Buffers - Google's data interchange format -// Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Author: kenton@google.com (Kenton Varda) -// -// emulates google3/base/once.h -// -// This header is intended to be included only by internal .cc files and -// generated .pb.cc files. Users should not use this directly. -// -// This is basically a portable version of pthread_once(). -// -// This header declares: -// * A type called ProtobufOnceType. -// * A macro GOOGLE_PROTOBUF_DECLARE_ONCE() which declares a variable of type -// ProtobufOnceType. This is the only legal way to declare such a variable. -// The macro may only be used at the global scope (you cannot create local or -// class member variables of this type). -// * A function GoogleOnceInit(ProtobufOnceType* once, void (*init_func)()). -// This function, when invoked multiple times given the same ProtobufOnceType -// object, will invoke init_func on the first call only, and will make sure -// none of the calls return before that first call to init_func has finished. -// * The user can provide a parameter which GoogleOnceInit() forwards to the -// user-provided function when it is called. Usage example: -// int a = 10; -// GoogleOnceInit(&my_once, &MyFunctionExpectingIntArgument, &a); -// * This implementation guarantees that ProtobufOnceType is a POD (i.e. no -// static initializer generated). -// -// This implements a way to perform lazy initialization. It's more efficient -// than using mutexes as no lock is needed if initialization has already -// happened. -// -// Example usage: -// void Init(); -// GOOGLE_PROTOBUF_DECLARE_ONCE(once_init); -// -// // Calls Init() exactly once. -// void InitOnce() { -// GoogleOnceInit(&once_init, &Init); -// } -// -// Note that if GoogleOnceInit() is called before main() has begun, it must -// only be called by the thread that will eventually call main() -- that is, -// the thread that performs dynamic initialization. In general this is a safe -// assumption since people don't usually construct threads before main() starts, -// but it is technically not guaranteed. Unfortunately, Win32 provides no way -// whatsoever to statically-initialize its synchronization primitives, so our -// only choice is to assume that dynamic initialization is single-threaded. - -#ifndef GOOGLE_PROTOBUF_STUBS_ONCE_H__ -#define GOOGLE_PROTOBUF_STUBS_ONCE_H__ - -#include -#include - -namespace google { -namespace protobuf { -namespace internal { - -using once_flag = std::once_flag; -template -void call_once(Args&&... args ) { - std::call_once(std::forward(args)...); -} - -} // namespace internal - -// TODO(gerbens) remove this once third_party is fully extracted -using ProtobufOnceType = internal::once_flag; - -inline void GoogleOnceInit(ProtobufOnceType* once, void (*init_func)()) { - std::call_once(*once, init_func); -} - -template -inline void GoogleOnceInitArg(ProtobufOnceType* once, void (*init_func)(Arg*), - Arg* arg) { - std::call_once(*once, init_func, arg); -} - -class GoogleOnceDynamic { - public: - // If this->Init() has not been called before by any thread, - // execute (*func_with_arg)(arg) then return. - // Otherwise, wait until that prior invocation has finished - // executing its function, then return. - template - void Init(void (*func_with_arg)(T*), T* arg) { - GoogleOnceInitArg(&this->state_, func_with_arg, arg); - } - private: - ProtobufOnceType state_; -}; - -#define GOOGLE_PROTOBUF_ONCE_TYPE ::google::protobuf::ProtobufOnceType -#define GOOGLE_PROTOBUF_DECLARE_ONCE(NAME) \ - ::google::protobuf::ProtobufOnceType NAME - -} // namespace protobuf -} // namespace google - -#endif // GOOGLE_PROTOBUF_STUBS_ONCE_H__ diff --git a/third_party/protobuf-lite/google/protobuf/stubs/platform_macros.h b/third_party/protobuf-lite/google/protobuf/stubs/platform_macros.h index c3a64dd2..ce1b1e36 100644 --- a/third_party/protobuf-lite/google/protobuf/stubs/platform_macros.h +++ b/third_party/protobuf-lite/google/protobuf/stubs/platform_macros.h @@ -56,7 +56,7 @@ #elif defined(__aarch64__) #define GOOGLE_PROTOBUF_ARCH_AARCH64 1 #define GOOGLE_PROTOBUF_ARCH_64_BIT 1 -#elif defined(__MIPSEL__) +#elif defined(__mips__) #if defined(__LP64__) #define GOOGLE_PROTOBUF_ARCH_MIPS64 1 #define GOOGLE_PROTOBUF_ARCH_64_BIT 1 @@ -99,6 +99,7 @@ GOOGLE_PROTOBUF_PLATFORM_ERROR #if defined(__APPLE__) #define GOOGLE_PROTOBUF_OS_APPLE +#include #include #if TARGET_OS_IPHONE #define GOOGLE_PROTOBUF_OS_IPHONE @@ -125,4 +126,9 @@ GOOGLE_PROTOBUF_PLATFORM_ERROR #define GOOGLE_PROTOBUF_NO_THREADLOCAL #endif +#if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED < 1070 +// __thread keyword requires at least 10.7 +#define GOOGLE_PROTOBUF_NO_THREADLOCAL +#endif + #endif // GOOGLE_PROTOBUF_PLATFORM_MACROS_H_ diff --git a/third_party/protobuf-lite/google/protobuf/stubs/port.h b/third_party/protobuf-lite/google/protobuf/stubs/port.h index 6b52305f..0fcee689 100644 --- a/third_party/protobuf-lite/google/protobuf/stubs/port.h +++ b/third_party/protobuf-lite/google/protobuf/stubs/port.h @@ -32,20 +32,16 @@ #define GOOGLE_PROTOBUF_STUBS_PORT_H_ #include +#include #include #include #include #include -#if defined(__osf__) -// Tru64 lacks stdint.h, but has inttypes.h which defines a superset of -// what stdint.h would define. -#include -#elif !defined(_MSC_VER) -#include -#endif #include +#include + #undef PROTOBUF_LITTLE_ENDIAN #ifdef _WIN32 // Assuming windows is always little-endian. @@ -55,7 +51,7 @@ #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) #define PROTOBUF_LITTLE_ENDIAN 1 #endif - #if _MSC_VER >= 1300 && !defined(__INTEL_COMPILER) + #if defined(_MSC_VER) && _MSC_VER >= 1300 && !defined(__INTEL_COMPILER) // If MSVC has "/RTCc" set, it will complain about truncating casts at // runtime. This file contains some intentional truncating casts. #pragma runtime_checks("c", off) @@ -72,6 +68,19 @@ #define PROTOBUF_LITTLE_ENDIAN 1 #endif #endif + +// These #includes are for the byte swap functions declared later on. +#ifdef _MSC_VER +#include // NOLINT(build/include) +#include +#elif defined(__APPLE__) +#include +#elif defined(__GLIBC__) || defined(__BIONIC__) || defined(__CYGWIN__) +#include // IWYU pragma: export +#endif + +// Legacy: some users reference these (internal-only) macros even though we +// don't need them any more. #if defined(_MSC_VER) && defined(PROTOBUF_USE_DLLS) #ifdef LIBPROTOBUF_EXPORTS #define LIBPROTOBUF_EXPORT __declspec(dllexport) @@ -88,17 +97,9 @@ #define LIBPROTOC_EXPORT #endif -// These #includes are for the byte swap functions declared later on. -#ifdef _MSC_VER -#include // NOLINT(build/include) -#include -#elif defined(__APPLE__) -#include -#elif defined(__GLIBC__) || defined(__BIONIC__) || defined(__CYGWIN__) -#include // IWYU pragma: export -#endif - -#define PROTOBUF_RUNTIME_DEPRECATED(message) +#define PROTOBUF_RUNTIME_DEPRECATED(message) PROTOBUF_DEPRECATED_MSG(message) +#define GOOGLE_PROTOBUF_RUNTIME_DEPRECATED(message) \ + PROTOBUF_DEPRECATED_MSG(message) // =================================================================== // from google3/base/port.h @@ -109,36 +110,17 @@ // undefined otherwise. Do NOT define it to 0 -- that causes // '#ifdef LANG_CXX11' to behave differently from '#if LANG_CXX11'. #define LANG_CXX11 1 -#endif - -#if LANG_CXX11 && !defined(__NVCC__) -#define PROTOBUF_CXX11 1 -#else -#define PROTOBUF_CXX11 0 -#endif - -#if PROTOBUF_CXX11 -#define PROTOBUF_FINAL final #else -#define PROTOBUF_FINAL +#error "Protobuf requires at least C++11." #endif namespace google { namespace protobuf { +using ConstStringParam = const std::string &; + typedef unsigned int uint; -#ifdef _MSC_VER -typedef signed __int8 int8; -typedef __int16 int16; -typedef __int32 int32; -typedef __int64 int64; - -typedef unsigned __int8 uint8; -typedef unsigned __int16 uint16; -typedef unsigned __int32 uint32; -typedef unsigned __int64 uint64; -#else typedef int8_t int8; typedef int16_t int16; typedef int32_t int32; @@ -148,130 +130,13 @@ typedef uint8_t uint8; typedef uint16_t uint16; typedef uint32_t uint32; typedef uint64_t uint64; -#endif - -// long long macros to be used because gcc and vc++ use different suffixes, -// and different size specifiers in format strings -#undef GOOGLE_LONGLONG -#undef GOOGLE_ULONGLONG -#undef GOOGLE_LL_FORMAT - -#ifdef _MSC_VER -#define GOOGLE_LONGLONG(x) x##I64 -#define GOOGLE_ULONGLONG(x) x##UI64 -#define GOOGLE_LL_FORMAT "I64" // As in printf("%I64d", ...) -#else -// By long long, we actually mean int64. -#define GOOGLE_LONGLONG(x) x##LL -#define GOOGLE_ULONGLONG(x) x##ULL -// Used to format real long long integers. -#define GOOGLE_LL_FORMAT "ll" // As in "%lld". Note that "q" is poor form also. -#endif static const int32 kint32max = 0x7FFFFFFF; static const int32 kint32min = -kint32max - 1; -static const int64 kint64max = GOOGLE_LONGLONG(0x7FFFFFFFFFFFFFFF); +static const int64 kint64max = PROTOBUF_LONGLONG(0x7FFFFFFFFFFFFFFF); static const int64 kint64min = -kint64max - 1; static const uint32 kuint32max = 0xFFFFFFFFu; -static const uint64 kuint64max = GOOGLE_ULONGLONG(0xFFFFFFFFFFFFFFFF); - -// ------------------------------------------------------------------- -// Annotations: Some parts of the code have been annotated in ways that might -// be useful to some compilers or tools, but are not supported universally. -// You can #define these annotations yourself if the default implementation -// is not right for you. - -#ifndef GOOGLE_ATTRIBUTE_ALWAYS_INLINE -#if defined(__GNUC__) && (__GNUC__ > 3 ||(__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) -// For functions we want to force inline. -// Introduced in gcc 3.1. -#define GOOGLE_ATTRIBUTE_ALWAYS_INLINE __attribute__ ((always_inline)) -#else -// Other compilers will have to figure it out for themselves. -#define GOOGLE_ATTRIBUTE_ALWAYS_INLINE -#endif -#endif - -#define GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE GOOGLE_ATTRIBUTE_ALWAYS_INLINE - -#ifndef GOOGLE_ATTRIBUTE_NOINLINE -#if defined(__GNUC__) && (__GNUC__ > 3 ||(__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) -// For functions we want to force not inline. -// Introduced in gcc 3.1. -#define GOOGLE_ATTRIBUTE_NOINLINE __attribute__ ((noinline)) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) -// Seems to have been around since at least Visual Studio 2005 -#define GOOGLE_ATTRIBUTE_NOINLINE __declspec(noinline) -#else -// Other compilers will have to figure it out for themselves. -#define GOOGLE_ATTRIBUTE_NOINLINE -#endif -#endif - -#define GOOGLE_PROTOBUF_ATTRIBUTE_NOINLINE GOOGLE_ATTRIBUTE_NOINLINE - -#ifndef GOOGLE_ATTRIBUTE_FUNC_ALIGN -#if defined(__clang__) || \ - defined(__GNUC__) && (__GNUC__ > 4 ||(__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) -// Function alignment attribute introduced in gcc 4.3 -#define GOOGLE_ATTRIBUTE_FUNC_ALIGN(bytes) __attribute__ ((aligned(bytes))) -#else -#define GOOGLE_ATTRIBUTE_FUNC_ALIGN(bytes) -#endif -#endif - -#define GOOGLE_PROTOBUF_ATTRIBUTE_FUNC_ALIGN(bytes) \ - GOOGLE_ATTRIBUTE_FUNC_ALIGN(bytes) - -#ifndef GOOGLE_PREDICT_TRUE -#ifdef __GNUC__ -// Provided at least since GCC 3.0. -#define GOOGLE_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) -#else -#define GOOGLE_PREDICT_TRUE(x) (x) -#endif -#endif - -#ifndef GOOGLE_PREDICT_FALSE -#ifdef __GNUC__ -// Provided at least since GCC 3.0. -#define GOOGLE_PREDICT_FALSE(x) (__builtin_expect(x, 0)) -#else -#define GOOGLE_PREDICT_FALSE(x) (x) -#endif -#endif - -#ifndef GOOGLE_PROTOBUF_ATTRIBUTE_RETURNS_NONNULL -#ifdef __GNUC__ -#define GOOGLE_PROTOBUF_ATTRIBUTE_RETURNS_NONNULL \ - __attribute__((returns_nonnull)) -#endif -#endif - -// Delimits a block of code which may write to memory which is simultaneously -// written by other threads, but which has been determined to be thread-safe -// (e.g. because it is an idempotent write). -#ifndef GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN -#define GOOGLE_SAFE_CONCURRENT_WRITES_BEGIN() -#endif -#ifndef GOOGLE_SAFE_CONCURRENT_WRITES_END -#define GOOGLE_SAFE_CONCURRENT_WRITES_END() -#endif - -#define GOOGLE_GUARDED_BY(x) -#define GOOGLE_ATTRIBUTE_COLD - -#ifdef GOOGLE_PROTOBUF_DONT_USE_UNALIGNED -# define GOOGLE_PROTOBUF_USE_UNALIGNED 0 -#else -# if defined(_M_X64) || defined(__x86_64__) || defined(_M_IX86) || defined(__i386__) -# define GOOGLE_PROTOBUF_USE_UNALIGNED 1 -# else -# define GOOGLE_PROTOBUF_USE_UNALIGNED 0 -# endif -#endif - -#define GOOGLE_PROTOBUF_ATTRIBUTE_COLD GOOGLE_ATTRIBUTE_COLD +static const uint64 kuint64max = PROTOBUF_ULONGLONG(0xFFFFFFFFFFFFFFFF); #if defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) ||\ defined(MEMORY_SANITIZER) @@ -313,7 +178,7 @@ inline void GOOGLE_UNALIGNED_STORE64(void *p, uint64 v) { __sanitizer_unaligned_store64(p, v); } -#elif GOOGLE_PROTOBUF_USE_UNALIGNED +#elif defined(GOOGLE_PROTOBUF_USE_UNALIGNED) && GOOGLE_PROTOBUF_USE_UNALIGNED #define GOOGLE_UNALIGNED_LOAD16(_p) (*reinterpret_cast(_p)) #define GOOGLE_UNALIGNED_LOAD32(_p) (*reinterpret_cast(_p)) @@ -362,12 +227,6 @@ inline void GOOGLE_UNALIGNED_STORE64(void *p, uint64 v) { # define GOOGLE_PROTOBUF_USE_PORTABLE_LOG2 #endif -#if defined(_MSC_VER) -#define GOOGLE_THREAD_LOCAL __declspec(thread) -#else -#define GOOGLE_THREAD_LOCAL __thread -#endif - // The following guarantees declaration of the byte swap functions. #ifdef _MSC_VER #define bswap_16(x) _byteswap_ushort(x) @@ -382,10 +241,14 @@ inline void GOOGLE_UNALIGNED_STORE64(void *p, uint64 v) { #elif !defined(__GLIBC__) && !defined(__BIONIC__) && !defined(__CYGWIN__) +#ifndef bswap_16 static inline uint16 bswap_16(uint16 x) { return static_cast(((x & 0xFF) << 8) | ((x & 0xFF00) >> 8)); } #define bswap_16(x) bswap_16(x) +#endif + +#ifndef bswap_32 static inline uint32 bswap_32(uint32 x) { return (((x & 0xFF) << 24) | ((x & 0xFF00) << 8) | @@ -393,17 +256,21 @@ static inline uint32 bswap_32(uint32 x) { ((x & 0xFF000000) >> 24)); } #define bswap_32(x) bswap_32(x) +#endif + +#ifndef bswap_64 static inline uint64 bswap_64(uint64 x) { - return (((x & GOOGLE_ULONGLONG(0xFF)) << 56) | - ((x & GOOGLE_ULONGLONG(0xFF00)) << 40) | - ((x & GOOGLE_ULONGLONG(0xFF0000)) << 24) | - ((x & GOOGLE_ULONGLONG(0xFF000000)) << 8) | - ((x & GOOGLE_ULONGLONG(0xFF00000000)) >> 8) | - ((x & GOOGLE_ULONGLONG(0xFF0000000000)) >> 24) | - ((x & GOOGLE_ULONGLONG(0xFF000000000000)) >> 40) | - ((x & GOOGLE_ULONGLONG(0xFF00000000000000)) >> 56)); + return (((x & PROTOBUF_ULONGLONG(0xFF)) << 56) | + ((x & PROTOBUF_ULONGLONG(0xFF00)) << 40) | + ((x & PROTOBUF_ULONGLONG(0xFF0000)) << 24) | + ((x & PROTOBUF_ULONGLONG(0xFF000000)) << 8) | + ((x & PROTOBUF_ULONGLONG(0xFF00000000)) >> 8) | + ((x & PROTOBUF_ULONGLONG(0xFF0000000000)) >> 24) | + ((x & PROTOBUF_ULONGLONG(0xFF000000000000)) >> 40) | + ((x & PROTOBUF_ULONGLONG(0xFF00000000000000)) >> 56)); } #define bswap_64(x) bswap_64(x) +#endif #endif @@ -472,7 +339,7 @@ class Bits { // =================================================================== // from google3/util/endian/endian.h -LIBPROTOBUF_EXPORT uint32 ghtonl(uint32 x); +PROTOBUF_EXPORT uint32 ghtonl(uint32 x); class BigEndian { public: @@ -530,13 +397,9 @@ class BigEndian { } }; -#ifndef GOOGLE_ATTRIBUTE_SECTION_VARIABLE -#define GOOGLE_ATTRIBUTE_SECTION_VARIABLE(name) -#endif - -#define GOOGLE_PROTOBUF_ATTRIBUTE_SECTION_VARIABLE(name) - } // namespace protobuf } // namespace google +#include + #endif // GOOGLE_PROTOBUF_STUBS_PORT_H_ diff --git a/third_party/protobuf-lite/google/protobuf/stubs/status.h b/third_party/protobuf-lite/google/protobuf/stubs/status.h index c5d38f0b..bededad5 100644 --- a/third_party/protobuf-lite/google/protobuf/stubs/status.h +++ b/third_party/protobuf-lite/google/protobuf/stubs/status.h @@ -36,6 +36,8 @@ #include #include +#include + namespace google { namespace protobuf { namespace util { @@ -62,7 +64,7 @@ enum Code { }; } // namespace error -class LIBPROTOBUF_EXPORT Status { +class PROTOBUF_EXPORT Status { public: // Creates a "successful" status. Status(); @@ -88,9 +90,15 @@ class LIBPROTOBUF_EXPORT Status { int error_code() const { return error_code_; } + error::Code code() const { + return error_code_; + } StringPiece error_message() const { return error_message_; } + StringPiece message() const { + return error_message_; + } bool operator==(const Status& x) const; bool operator!=(const Status& x) const { @@ -98,19 +106,20 @@ class LIBPROTOBUF_EXPORT Status { } // Return a combination of the error code name and message. - string ToString() const; + std::string ToString() const; private: error::Code error_code_; - string error_message_; + std::string error_message_; }; // Prints a human-readable representation of 'x' to 'os'. -LIBPROTOBUF_EXPORT std::ostream& operator<<(std::ostream& os, const Status& x); - -#define EXPECT_OK(value) EXPECT_TRUE((value).ok()) +PROTOBUF_EXPORT std::ostream& operator<<(std::ostream& os, const Status& x); } // namespace util } // namespace protobuf } // namespace google + +#include + #endif // GOOGLE_PROTOBUF_STUBS_STATUS_H_ diff --git a/third_party/protobuf-lite/google/protobuf/stubs/statusor.h b/third_party/protobuf-lite/google/protobuf/stubs/statusor.h index 29f869ad..c02e89a9 100644 --- a/third_party/protobuf-lite/google/protobuf/stubs/statusor.h +++ b/third_party/protobuf-lite/google/protobuf/stubs/statusor.h @@ -33,7 +33,7 @@ // usable value, or an error Status explaining why such a value is // not present. To this end, StatusOr does not allow its Status // value to be Status::OK. Further, StatusOr does not allow the -// contained pointer to be NULL. +// contained pointer to be nullptr. // // The primary use-case for StatusOr is as the return value of a // function which may fail. @@ -89,6 +89,8 @@ #include +#include + namespace google { namespace protobuf { namespace util { @@ -114,15 +116,15 @@ class StatusOr { StatusOr(const Status& status); // NOLINT // Construct a new StatusOr with the given value. If T is a plain pointer, - // value must not be NULL. After calling this constructor, calls to + // value must not be nullptr. After calling this constructor, calls to // ValueOrDie() will succeed, and calls to status() will return OK. // // NOTE: Not explicit - we want to use StatusOr as a return type // so it is convenient and sensible to be able to do 'return T()' // when when the return type is StatusOr. // - // REQUIRES: if T is a plain pointer, value != NULL. This requirement is - // DCHECKed. In optimized builds, passing a NULL pointer here will have + // REQUIRES: if T is a plain pointer, value != nullptr. This requirement is + // DCHECKed. In optimized builds, passing a null pointer here will have // the effect of passing PosixErrorSpace::EINVAL as a fallback. StatusOr(const T& value); // NOLINT @@ -151,6 +153,7 @@ class StatusOr { // If you need to initialize a T object from the stored value, // ConsumeValueOrDie() may be more efficient. const T& ValueOrDie() const; + const T& value () const; private: Status status_; @@ -162,7 +165,7 @@ class StatusOr { namespace internal { -class LIBPROTOBUF_EXPORT StatusOrHelper { +class PROTOBUF_EXPORT StatusOrHelper { public: // Move type-agnostic error handling to the .cc. static void Crash(const util::Status& status); @@ -174,13 +177,13 @@ class LIBPROTOBUF_EXPORT StatusOrHelper { template struct StatusOrHelper::Specialize { - // For non-pointer T, a reference can never be NULL. + // For non-pointer T, a reference can never be nullptr. static inline bool IsValueNull(const T& t) { return false; } }; template struct StatusOrHelper::Specialize { - static inline bool IsValueNull(const T* t) { return t == NULL; } + static inline bool IsValueNull(const T* t) { return t == nullptr; } }; } // namespace internal @@ -202,7 +205,7 @@ inline StatusOr::StatusOr(const Status& status) { template inline StatusOr::StatusOr(const T& value) { if (internal::StatusOrHelper::Specialize::IsValueNull(value)) { - status_ = Status(error::INTERNAL, "NULL is not a vaild argument."); + status_ = Status(error::INTERNAL, "nullptr is not a valid argument."); } else { status_ = Status::OK; value_ = value; @@ -252,8 +255,18 @@ inline const T& StatusOr::ValueOrDie() const { } return value_; } + +template +inline const T& StatusOr::value() const { + if (!status_.ok()) { + internal::StatusOrHelper::Crash(status_); + } + return value_; +} } // namespace util } // namespace protobuf } // namespace google +#include + #endif // GOOGLE_PROTOBUF_STUBS_STATUSOR_H_ diff --git a/third_party/protobuf-lite/google/protobuf/stubs/stl_util.h b/third_party/protobuf-lite/google/protobuf/stubs/stl_util.h index 9e4c82a4..d01f9ec9 100644 --- a/third_party/protobuf-lite/google/protobuf/stubs/stl_util.h +++ b/third_party/protobuf-lite/google/protobuf/stubs/stl_util.h @@ -38,33 +38,13 @@ namespace google { namespace protobuf { -// STLDeleteContainerPointers() -// For a range within a container of pointers, calls delete -// (non-array version) on these pointers. -// NOTE: for these three functions, we could just implement a DeleteObject -// functor and then call for_each() on the range and functor, but this -// requires us to pull in all of algorithm.h, which seems expensive. -// For hash_[multi]set, it is important that this deletes behind the iterator -// because the hash_set may call the hash function on the iterator when it is -// advanced, which could result in the hash function trying to deference a -// stale pointer. -template -void STLDeleteContainerPointers(ForwardIterator begin, - ForwardIterator end) { - while (begin != end) { - ForwardIterator temp = begin; - ++begin; - delete *temp; - } -} - // Inside Google, this function implements a horrible, disgusting hack in which // we reach into the string's private implementation and resize it without // initializing the new bytes. In some cases doing this can significantly // improve performance. However, since it's totally non-portable it has no // place in open source code. Feel free to fill this function in with your // own disgusting hack if you want the perf boost. -inline void STLStringResizeUninitialized(string* s, size_t new_size) { +inline void STLStringResizeUninitialized(std::string* s, size_t new_size) { s->resize(new_size); } @@ -80,39 +60,9 @@ inline void STLStringResizeUninitialized(string* s, size_t new_size) { // (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-active.html#530) // proposes this as the method. According to Matt Austern, this should // already work on all current implementations. -inline char* string_as_array(string* str) { +inline char* string_as_array(std::string* str) { // DO NOT USE const_cast(str->data())! See the unittest for why. - return str->empty() ? NULL : &*str->begin(); -} - -// STLDeleteElements() deletes all the elements in an STL container and clears -// the container. This function is suitable for use with a vector, set, -// hash_set, or any other STL container which defines sensible begin(), end(), -// and clear() methods. -// -// If container is NULL, this function is a no-op. -// -// As an alternative to calling STLDeleteElements() directly, consider -// ElementDeleter (defined below), which ensures that your container's elements -// are deleted when the ElementDeleter goes out of scope. -template -void STLDeleteElements(T *container) { - if (!container) return; - STLDeleteContainerPointers(container->begin(), container->end()); - container->clear(); -} - -// Given an STL container consisting of (key, value) pairs, STLDeleteValues -// deletes all the "value" components and clears the container. Does nothing -// in the case it's given a NULL pointer. - -template -void STLDeleteValues(T *v) { - if (!v) return; - for (typename T::iterator i = v->begin(); i != v->end(); ++i) { - delete i->second; - } - v->clear(); + return str->empty() ? nullptr : &*str->begin(); } } // namespace protobuf diff --git a/third_party/protobuf-lite/google/protobuf/stubs/stringpiece.h b/third_party/protobuf-lite/google/protobuf/stubs/stringpiece.h index 563ff75d..fbcb20af 100644 --- a/third_party/protobuf-lite/google/protobuf/stubs/stringpiece.h +++ b/third_party/protobuf-lite/google/protobuf/stubs/stringpiece.h @@ -76,30 +76,30 @@ // // There are several ways to create a null StringPiece: // StringPiece() -// StringPiece(NULL) -// StringPiece(NULL, 0) -// For all of the above, sp.data() == NULL, sp.length() == 0, +// StringPiece(nullptr) +// StringPiece(nullptr, 0) +// For all of the above, sp.data() == nullptr, sp.length() == 0, // and sp.empty() == true. Also, if you create a StringPiece with -// a non-NULL pointer then sp.data() != NULL. Once created, -// sp.data() will stay either NULL or not-NULL, except if you call +// a non-null pointer then sp.data() != nullptr. Once created, +// sp.data() will stay either nullptr or not-nullptr, except if you call // sp.clear() or sp.set(). // -// Thus, you can use StringPiece(NULL) to signal an out-of-band value +// Thus, you can use StringPiece(nullptr) to signal an out-of-band value // that is different from other StringPiece values. This is similar -// to the way that const char* p1 = NULL; is different from +// to the way that const char* p1 = nullptr; is different from // const char* p2 = "";. // // There are many ways to create an empty StringPiece: // StringPiece() -// StringPiece(NULL) -// StringPiece(NULL, 0) +// StringPiece(nullptr) +// StringPiece(nullptr, 0) // StringPiece("") // StringPiece("", 0) // StringPiece("abcdef", 0) // StringPiece("abcdef"+6, 0) // For all of the above, sp.length() will be 0 and sp.empty() will be true. -// For some empty StringPiece values, sp.data() will be NULL. -// For some empty StringPiece values, sp.data() will not be NULL. +// For some empty StringPiece values, sp.data() will be nullptr. +// For some empty StringPiece values, sp.data() will not be nullptr. // // Be careful not to confuse: null StringPiece and empty StringPiece. // The set of empty StringPieces properly includes the set of null StringPieces. @@ -109,20 +109,20 @@ // All empty StringPiece values compare equal to each other. // Even a null StringPieces compares equal to a non-null empty StringPiece: // StringPiece() == StringPiece("", 0) -// StringPiece(NULL) == StringPiece("abc", 0) -// StringPiece(NULL, 0) == StringPiece("abcdef"+6, 0) +// StringPiece(nullptr) == StringPiece("abc", 0) +// StringPiece(nullptr, 0) == StringPiece("abcdef"+6, 0) // // Look carefully at this example: -// StringPiece("") == NULL +// StringPiece("") == nullptr // True or false? TRUE, because StringPiece::operator== converts -// the right-hand side from NULL to StringPiece(NULL), +// the right-hand side from nullptr to StringPiece(nullptr), // and then compares two zero-length spans of characters. // However, we are working to make this example produce a compile error. // // Suppose you want to write: -// bool TestWhat?(StringPiece sp) { return sp == NULL; } // BAD +// bool TestWhat?(StringPiece sp) { return sp == nullptr; } // BAD // Do not do that. Write one of these instead: -// bool TestNull(StringPiece sp) { return sp.data() == NULL; } +// bool TestNull(StringPiece sp) { return sp.data() == nullptr; } // bool TestEmpty(StringPiece sp) { return sp.empty(); } // The intent of TestWhat? is unclear. Did you mean TestNull or TestEmpty? // Right now, TestWhat? behaves likes TestEmpty. @@ -148,9 +148,10 @@ #include #include -#include #include +#include + namespace google { namespace protobuf { // StringPiece has *two* size types. @@ -163,7 +164,7 @@ namespace protobuf { // is 32 bits in LP32, 64 bits in LP64, 64 bits in LLP64 // future changes intended: http://go/64BitStringPiece // -typedef string::difference_type stringpiece_ssize_type; +typedef std::string::difference_type stringpiece_ssize_type; // STRINGPIECE_CHECK_SIZE protects us from 32-bit overflows. // TODO(mec): delete this after stringpiece_ssize_type goes 64 bit. @@ -175,7 +176,7 @@ typedef string::difference_type stringpiece_ssize_type; #define STRINGPIECE_CHECK_SIZE 0 #endif -class LIBPROTOBUF_EXPORT StringPiece { +class PROTOBUF_EXPORT StringPiece { private: const char* ptr_; stringpiece_ssize_type length_; @@ -207,11 +208,11 @@ class LIBPROTOBUF_EXPORT StringPiece { // // Style guide exception granted: // http://goto/style-guide-exception-20978288 - StringPiece() : ptr_(NULL), length_(0) {} + StringPiece() : ptr_(nullptr), length_(0) {} StringPiece(const char* str) // NOLINT(runtime/explicit) : ptr_(str), length_(0) { - if (str != NULL) { + if (str != nullptr) { length_ = CheckedSsizeTFromSizeT(strlen(str)); } } @@ -248,7 +249,7 @@ class LIBPROTOBUF_EXPORT StringPiece { bool empty() const { return length_ == 0; } void clear() { - ptr_ = NULL; + ptr_ = nullptr; length_ = 0; } @@ -260,7 +261,7 @@ class LIBPROTOBUF_EXPORT StringPiece { void set(const char* str) { ptr_ = str; - if (str != NULL) + if (str != nullptr) length_ = CheckedSsizeTFromSizeT(strlen(str)); else length_ = 0; @@ -300,25 +301,21 @@ class LIBPROTOBUF_EXPORT StringPiece { return 0; } - string as_string() const { - return ToString(); - } + std::string as_string() const { return ToString(); } // We also define ToString() here, since many other string-like // interfaces name the routine that converts to a C++ string // "ToString", and it's confusing to have the method that does that // for a StringPiece be called "as_string()". We also leave the // "as_string()" method defined here for existing code. - string ToString() const { - if (ptr_ == NULL) return string(); - return string(data(), static_cast(size())); + std::string ToString() const { + if (ptr_ == nullptr) return ""; + return std::string(data(), static_cast(size())); } - operator string() const { - return ToString(); - } + explicit operator std::string() const { return ToString(); } - void CopyToString(string* target) const; - void AppendToString(string* target) const; + void CopyToString(std::string* target) const; + void AppendToString(std::string* target) const; bool starts_with(StringPiece x) const { return (length_ >= x.length_) && @@ -463,6 +460,9 @@ struct StringPiecePod { std::string ToString() const { return std::string(data_, static_cast(size_)); } + + explicit operator std::string() const { return ToString(); } + private: const char* data_; stringpiece_ssize_type size_; @@ -476,7 +476,7 @@ GOOGLE_PROTOBUF_HASH_NAMESPACE_DECLARATION_START template<> struct hash { size_t operator()(const StringPiece& s) const { size_t result = 0; - for (const char *str = s.data(), *end = str + s.size(); str < end; str++) { + for (const char *str = s.data(), *end = str + s.size(); str < end; str++) { result = 5 * result + static_cast(*str); } return result; @@ -484,4 +484,6 @@ template<> struct hash { }; GOOGLE_PROTOBUF_HASH_NAMESPACE_DECLARATION_END +#include + #endif // STRINGS_STRINGPIECE_H_ diff --git a/third_party/protobuf-lite/google/protobuf/stubs/stringprintf.h b/third_party/protobuf-lite/google/protobuf/stubs/stringprintf.h index 7183ec6a..e3858be1 100644 --- a/third_party/protobuf-lite/google/protobuf/stubs/stringprintf.h +++ b/third_party/protobuf-lite/google/protobuf/stubs/stringprintf.h @@ -46,31 +46,40 @@ #include +#include + namespace google { namespace protobuf { // Return a C++ string -LIBPROTOBUF_EXPORT extern string StringPrintf(const char* format, ...); +PROTOBUF_EXPORT extern std::string StringPrintf(const char* format, ...); // Store result into a supplied string and return it -LIBPROTOBUF_EXPORT extern const string& SStringPrintf(string* dst, const char* format, ...); +PROTOBUF_EXPORT extern const std::string& SStringPrintf(std::string* dst, + const char* format, + ...); // Append result to a supplied string -LIBPROTOBUF_EXPORT extern void StringAppendF(string* dst, const char* format, ...); +PROTOBUF_EXPORT extern void StringAppendF(std::string* dst, const char* format, + ...); // Lower-level routine that takes a va_list and appends to a specified // string. All other routines are just convenience wrappers around it. -LIBPROTOBUF_EXPORT extern void StringAppendV(string* dst, const char* format, va_list ap); +PROTOBUF_EXPORT extern void StringAppendV(std::string* dst, const char* format, + va_list ap); // The max arguments supported by StringPrintfVector -LIBPROTOBUF_EXPORT extern const int kStringPrintfVectorMaxArgs; +PROTOBUF_EXPORT extern const int kStringPrintfVectorMaxArgs; // You can use this version when all your arguments are strings, but // you don't know how many arguments you'll have at compile time. // StringPrintfVector will LOG(FATAL) if v.size() > kStringPrintfVectorMaxArgs -LIBPROTOBUF_EXPORT extern string StringPrintfVector(const char* format, const std::vector& v); +PROTOBUF_EXPORT extern std::string StringPrintfVector( + const char* format, const std::vector& v); } // namespace protobuf } // namespace google +#include + #endif // GOOGLE_PROTOBUF_STUBS_STRINGPRINTF_H diff --git a/third_party/protobuf-lite/google/protobuf/stubs/strutil.h b/third_party/protobuf-lite/google/protobuf/stubs/strutil.h index a839b8b3..8ce81f28 100644 --- a/third_party/protobuf-lite/google/protobuf/stubs/strutil.h +++ b/third_party/protobuf-lite/google/protobuf/stubs/strutil.h @@ -33,15 +33,18 @@ #ifndef GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ #define GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ -#include -#include #include #include +#include + +#include +#include +#include namespace google { namespace protobuf { -#ifdef _MSC_VER +#if defined(_MSC_VER) && _MSC_VER < 1800 #define strtoll _strtoi64 #define strtoull _strtoui64 #elif defined(__DECCXX) && defined(__osf__) @@ -110,13 +113,13 @@ inline int hex_digit_to_int(char c) { // prefix string if the prefix matches, otherwise the original // string. // ---------------------------------------------------------------------- -inline bool HasPrefixString(const string& str, - const string& prefix) { +inline bool HasPrefixString(StringPiece str, StringPiece prefix) { return str.size() >= prefix.size() && - str.compare(0, prefix.size(), prefix) == 0; + memcmp(str.data(), prefix.data(), prefix.size()) == 0; } -inline string StripPrefixString(const string& str, const string& prefix) { +inline std::string StripPrefixString(const std::string& str, + const std::string& prefix) { if (HasPrefixString(str, prefix)) { return str.substr(prefix.size()); } else { @@ -132,13 +135,14 @@ inline string StripPrefixString(const string& str, const string& prefix) { // suffix string if the suffix matches, otherwise the original // string. // ---------------------------------------------------------------------- -inline bool HasSuffixString(const string& str, - const string& suffix) { +inline bool HasSuffixString(StringPiece str, StringPiece suffix) { return str.size() >= suffix.size() && - str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; + memcmp(str.data() + str.size() - suffix.size(), suffix.data(), + suffix.size()) == 0; } -inline string StripSuffixString(const string& str, const string& suffix) { +inline std::string StripSuffixString(const std::string& str, + const std::string& suffix) { if (HasSuffixString(str, suffix)) { return str.substr(0, str.size() - suffix.size()); } else { @@ -155,13 +159,10 @@ inline string StripSuffixString(const string& str, const string& suffix) { // StripWhitespace // Removes whitespaces from both ends of the given string. // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT void ReplaceCharacters(string* s, const char* remove, - char replacewith); -LIBPROTOBUF_EXPORT void StripString(string* s, const char* remove, - char replacewith); - -LIBPROTOBUF_EXPORT void StripWhitespace(string* s); +PROTOBUF_EXPORT void ReplaceCharacters(std::string* s, const char* remove, + char replacewith); +PROTOBUF_EXPORT void StripWhitespace(std::string* s); // ---------------------------------------------------------------------- // LowerString() @@ -173,24 +174,26 @@ LIBPROTOBUF_EXPORT void StripWhitespace(string* s); // strings. // ---------------------------------------------------------------------- -inline void LowerString(string * s) { - string::iterator end = s->end(); - for (string::iterator i = s->begin(); i != end; ++i) { +inline void LowerString(std::string* s) { + std::string::iterator end = s->end(); + for (std::string::iterator i = s->begin(); i != end; ++i) { // tolower() changes based on locale. We don't want this! if ('A' <= *i && *i <= 'Z') *i += 'a' - 'A'; } } -inline void UpperString(string * s) { - string::iterator end = s->end(); - for (string::iterator i = s->begin(); i != end; ++i) { +inline void UpperString(std::string* s) { + std::string::iterator end = s->end(); + for (std::string::iterator i = s->begin(); i != end; ++i) { // toupper() changes based on locale. We don't want this! if ('a' <= *i && *i <= 'z') *i += 'A' - 'a'; } } -inline string ToUpper(const string& s) { - string out = s; +inline void ToUpper(std::string* s) { UpperString(s); } + +inline std::string ToUpper(const std::string& s) { + std::string out = s; UpperString(&out); return out; } @@ -203,8 +206,10 @@ inline string ToUpper(const string& s) { // happened or not. // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT string StringReplace(const string& s, const string& oldsub, - const string& newsub, bool replace_all); +PROTOBUF_EXPORT std::string StringReplace(const std::string& s, + const std::string& oldsub, + const std::string& newsub, + bool replace_all); // ---------------------------------------------------------------------- // SplitStringUsing() @@ -212,8 +217,8 @@ LIBPROTOBUF_EXPORT string StringReplace(const string& s, const string& oldsub, // to 'result'. If there are consecutive delimiters, this function skips // over all of them. // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT void SplitStringUsing(const string& full, const char* delim, - std::vector* res); +PROTOBUF_EXPORT void SplitStringUsing(StringPiece full, const char* delim, + std::vector* res); // Split a string using one or more byte delimiters, presented // as a nul-terminated c string. Append the components to 'result'. @@ -223,17 +228,16 @@ LIBPROTOBUF_EXPORT void SplitStringUsing(const string& full, const char* delim, // // If "full" is the empty string, yields an empty string as the only value. // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT void SplitStringAllowEmpty(const string& full, - const char* delim, - std::vector* result); +PROTOBUF_EXPORT void SplitStringAllowEmpty(StringPiece full, const char* delim, + std::vector* result); // ---------------------------------------------------------------------- // Split() // Split a string using a character delimiter. // ---------------------------------------------------------------------- -inline std::vector Split( - const string& full, const char* delim, bool skip_empty = true) { - std::vector result; +inline std::vector Split(StringPiece full, const char* delim, + bool skip_empty = true) { + std::vector result; if (skip_empty) { SplitStringUsing(full, delim, &result); } else { @@ -250,12 +254,12 @@ inline std::vector Split( // another takes a pointer to the target string. In the latter case the // target string is cleared and overwritten. // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT void JoinStrings(const std::vector& components, - const char* delim, string* result); +PROTOBUF_EXPORT void JoinStrings(const std::vector& components, + const char* delim, std::string* result); -inline string JoinStrings(const std::vector& components, - const char* delim) { - string result; +inline std::string JoinStrings(const std::vector& components, + const char* delim) { + std::string result; JoinStrings(components, delim, &result); return result; } @@ -285,15 +289,15 @@ inline string JoinStrings(const std::vector& components, // // Errors: In the first form of the call, errors are reported with // LOG(ERROR). The same is true for the second form of the call if -// the pointer to the string std::vector is NULL; otherwise, error +// the pointer to the string std::vector is nullptr; otherwise, error // messages are stored in the std::vector. In either case, the effect on // the dest array is not defined, but rest of the source will be // processed. // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest); -LIBPROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest, - std::vector *errors); +PROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest); +PROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest, + std::vector* errors); // ---------------------------------------------------------------------- // UnescapeCEscapeString() @@ -304,16 +308,18 @@ LIBPROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest, // to be the same. // // The second call stores its errors in a supplied string vector. -// If the string vector pointer is NULL, it reports the errors with LOG(). +// If the string vector pointer is nullptr, it reports the errors with LOG(). // // In the first and second calls, the length of dest is returned. In the // the third call, the new string is returned. // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT int UnescapeCEscapeString(const string& src, string* dest); -LIBPROTOBUF_EXPORT int UnescapeCEscapeString(const string& src, string* dest, - std::vector *errors); -LIBPROTOBUF_EXPORT string UnescapeCEscapeString(const string& src); +PROTOBUF_EXPORT int UnescapeCEscapeString(const std::string& src, + std::string* dest); +PROTOBUF_EXPORT int UnescapeCEscapeString(const std::string& src, + std::string* dest, + std::vector* errors); +PROTOBUF_EXPORT std::string UnescapeCEscapeString(const std::string& src); // ---------------------------------------------------------------------- // CEscape() @@ -322,21 +328,21 @@ LIBPROTOBUF_EXPORT string UnescapeCEscapeString(const string& src); // // Escaped chars: \n, \r, \t, ", ', \, and !isprint(). // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT string CEscape(const string& src); +PROTOBUF_EXPORT std::string CEscape(const std::string& src); // ---------------------------------------------------------------------- // CEscapeAndAppend() // Escapes 'src' using C-style escape sequences, and appends the escaped // string to 'dest'. // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT void CEscapeAndAppend(StringPiece src, string* dest); +PROTOBUF_EXPORT void CEscapeAndAppend(StringPiece src, std::string* dest); namespace strings { // Like CEscape() but does not escape bytes with the upper bit set. -LIBPROTOBUF_EXPORT string Utf8SafeCEscape(const string& src); +PROTOBUF_EXPORT std::string Utf8SafeCEscape(const std::string& src); // Like CEscape() but uses hex (\x) escapes instead of octals. -LIBPROTOBUF_EXPORT string CHexEscape(const string& src); +PROTOBUF_EXPORT std::string CHexEscape(const std::string& src); } // namespace strings // ---------------------------------------------------------------------- @@ -349,10 +355,10 @@ LIBPROTOBUF_EXPORT string CHexEscape(const string& src); // platforms, so using these is safer, from the point of view of // overflow behavior, than using the standard libc functions. // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT int32 strto32_adaptor(const char *nptr, char **endptr, - int base); -LIBPROTOBUF_EXPORT uint32 strtou32_adaptor(const char *nptr, char **endptr, - int base); +PROTOBUF_EXPORT int32 strto32_adaptor(const char* nptr, char** endptr, + int base); +PROTOBUF_EXPORT uint32 strtou32_adaptor(const char* nptr, char** endptr, + int base); inline int32 strto32(const char *nptr, char **endptr, int base) { if (sizeof(int32) == sizeof(long)) @@ -391,44 +397,44 @@ inline uint64 strtou64(const char *nptr, char **endptr, int base) { // safe_strtof() // safe_strtod() // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT bool safe_strtob(StringPiece str, bool* value); +PROTOBUF_EXPORT bool safe_strtob(StringPiece str, bool* value); -LIBPROTOBUF_EXPORT bool safe_strto32(const string& str, int32* value); -LIBPROTOBUF_EXPORT bool safe_strtou32(const string& str, uint32* value); +PROTOBUF_EXPORT bool safe_strto32(const std::string& str, int32* value); +PROTOBUF_EXPORT bool safe_strtou32(const std::string& str, uint32* value); inline bool safe_strto32(const char* str, int32* value) { - return safe_strto32(string(str), value); + return safe_strto32(std::string(str), value); } inline bool safe_strto32(StringPiece str, int32* value) { return safe_strto32(str.ToString(), value); } inline bool safe_strtou32(const char* str, uint32* value) { - return safe_strtou32(string(str), value); + return safe_strtou32(std::string(str), value); } inline bool safe_strtou32(StringPiece str, uint32* value) { return safe_strtou32(str.ToString(), value); } -LIBPROTOBUF_EXPORT bool safe_strto64(const string& str, int64* value); -LIBPROTOBUF_EXPORT bool safe_strtou64(const string& str, uint64* value); +PROTOBUF_EXPORT bool safe_strto64(const std::string& str, int64* value); +PROTOBUF_EXPORT bool safe_strtou64(const std::string& str, uint64* value); inline bool safe_strto64(const char* str, int64* value) { - return safe_strto64(string(str), value); + return safe_strto64(std::string(str), value); } inline bool safe_strto64(StringPiece str, int64* value) { return safe_strto64(str.ToString(), value); } inline bool safe_strtou64(const char* str, uint64* value) { - return safe_strtou64(string(str), value); + return safe_strtou64(std::string(str), value); } inline bool safe_strtou64(StringPiece str, uint64* value) { return safe_strtou64(str.ToString(), value); } -LIBPROTOBUF_EXPORT bool safe_strtof(const char* str, float* value); -LIBPROTOBUF_EXPORT bool safe_strtod(const char* str, double* value); -inline bool safe_strtof(const string& str, float* value) { +PROTOBUF_EXPORT bool safe_strtof(const char* str, float* value); +PROTOBUF_EXPORT bool safe_strtod(const char* str, double* value); +inline bool safe_strtof(const std::string& str, float* value) { return safe_strtof(str.c_str(), value); } -inline bool safe_strtod(const string& str, double* value) { +inline bool safe_strtod(const std::string& str, double* value) { return safe_strtod(str.c_str(), value); } inline bool safe_strtof(StringPiece str, float* value) { @@ -464,13 +470,13 @@ inline bool safe_strtod(StringPiece str, double* value) { // DoubleToBuffer() and FloatToBuffer(). static const int kFastToBufferSize = 32; -LIBPROTOBUF_EXPORT char* FastInt32ToBuffer(int32 i, char* buffer); -LIBPROTOBUF_EXPORT char* FastInt64ToBuffer(int64 i, char* buffer); +PROTOBUF_EXPORT char* FastInt32ToBuffer(int32 i, char* buffer); +PROTOBUF_EXPORT char* FastInt64ToBuffer(int64 i, char* buffer); char* FastUInt32ToBuffer(uint32 i, char* buffer); // inline below char* FastUInt64ToBuffer(uint64 i, char* buffer); // inline below -LIBPROTOBUF_EXPORT char* FastHexToBuffer(int i, char* buffer); -LIBPROTOBUF_EXPORT char* FastHex64ToBuffer(uint64 i, char* buffer); -LIBPROTOBUF_EXPORT char* FastHex32ToBuffer(uint32 i, char* buffer); +PROTOBUF_EXPORT char* FastHexToBuffer(int i, char* buffer); +PROTOBUF_EXPORT char* FastHex64ToBuffer(uint64 i, char* buffer); +PROTOBUF_EXPORT char* FastHex32ToBuffer(uint32 i, char* buffer); // at least 22 bytes long inline char* FastIntToBuffer(int i, char* buffer) { @@ -506,10 +512,10 @@ inline char* FastULongToBuffer(unsigned long i, char* buffer) { // terminating the string). // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT char* FastInt32ToBufferLeft(int32 i, char* buffer); -LIBPROTOBUF_EXPORT char* FastUInt32ToBufferLeft(uint32 i, char* buffer); -LIBPROTOBUF_EXPORT char* FastInt64ToBufferLeft(int64 i, char* buffer); -LIBPROTOBUF_EXPORT char* FastUInt64ToBufferLeft(uint64 i, char* buffer); +PROTOBUF_EXPORT char* FastInt32ToBufferLeft(int32 i, char* buffer); +PROTOBUF_EXPORT char* FastUInt32ToBufferLeft(uint32 i, char* buffer); +PROTOBUF_EXPORT char* FastInt64ToBufferLeft(int64 i, char* buffer); +PROTOBUF_EXPORT char* FastUInt64ToBufferLeft(uint64 i, char* buffer); // Just define these in terms of the above. inline char* FastUInt32ToBuffer(uint32 i, char* buffer) { @@ -521,9 +527,7 @@ inline char* FastUInt64ToBuffer(uint64 i, char* buffer) { return buffer; } -inline string SimpleBtoa(bool value) { - return value ? "true" : "false"; -} +inline std::string SimpleBtoa(bool value) { return value ? "true" : "false"; } // ---------------------------------------------------------------------- // SimpleItoa() @@ -531,12 +535,12 @@ inline string SimpleBtoa(bool value) { // // Return value: string // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT string SimpleItoa(int i); -LIBPROTOBUF_EXPORT string SimpleItoa(unsigned int i); -LIBPROTOBUF_EXPORT string SimpleItoa(long i); -LIBPROTOBUF_EXPORT string SimpleItoa(unsigned long i); -LIBPROTOBUF_EXPORT string SimpleItoa(long long i); -LIBPROTOBUF_EXPORT string SimpleItoa(unsigned long long i); +PROTOBUF_EXPORT std::string SimpleItoa(int i); +PROTOBUF_EXPORT std::string SimpleItoa(unsigned int i); +PROTOBUF_EXPORT std::string SimpleItoa(long i); +PROTOBUF_EXPORT std::string SimpleItoa(unsigned long i); +PROTOBUF_EXPORT std::string SimpleItoa(long long i); +PROTOBUF_EXPORT std::string SimpleItoa(unsigned long long i); // ---------------------------------------------------------------------- // SimpleDtoa() @@ -557,11 +561,11 @@ LIBPROTOBUF_EXPORT string SimpleItoa(unsigned long long i); // // Return value: string // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT string SimpleDtoa(double value); -LIBPROTOBUF_EXPORT string SimpleFtoa(float value); +PROTOBUF_EXPORT std::string SimpleDtoa(double value); +PROTOBUF_EXPORT std::string SimpleFtoa(float value); -LIBPROTOBUF_EXPORT char* DoubleToBuffer(double i, char* buffer); -LIBPROTOBUF_EXPORT char* FloatToBuffer(float i, char* buffer); +PROTOBUF_EXPORT char* DoubleToBuffer(double i, char* buffer); +PROTOBUF_EXPORT char* FloatToBuffer(float i, char* buffer); // In practice, doubles should never need more than 24 bytes and floats // should never need more than 14 (including null terminators), but we @@ -610,7 +614,7 @@ struct Hex { } }; -struct LIBPROTOBUF_EXPORT AlphaNum { +struct PROTOBUF_EXPORT AlphaNum { const char *piece_data_; // move these to string_ref eventually size_t piece_size_; // move these to string_ref eventually @@ -619,16 +623,26 @@ struct LIBPROTOBUF_EXPORT AlphaNum { // No bool ctor -- bools convert to an integral type. // A bool ctor would also convert incoming pointers (bletch). - AlphaNum(int32 i32) + AlphaNum(int i32) : piece_data_(digits), piece_size_(FastInt32ToBufferLeft(i32, digits) - &digits[0]) {} - AlphaNum(uint32 u32) + AlphaNum(unsigned int u32) : piece_data_(digits), piece_size_(FastUInt32ToBufferLeft(u32, digits) - &digits[0]) {} - AlphaNum(int64 i64) + AlphaNum(long long i64) + : piece_data_(digits), + piece_size_(FastInt64ToBufferLeft(i64, digits) - &digits[0]) {} + AlphaNum(unsigned long long u64) + : piece_data_(digits), + piece_size_(FastUInt64ToBufferLeft(u64, digits) - &digits[0]) {} + + // Note: on some architectures, "long" is only 32 bits, not 64, but the + // performance hit of using FastInt64ToBufferLeft to handle 32-bit values + // is quite minor. + AlphaNum(long i64) : piece_data_(digits), piece_size_(FastInt64ToBufferLeft(i64, digits) - &digits[0]) {} - AlphaNum(uint64 u64) + AlphaNum(unsigned long u64) : piece_data_(digits), piece_size_(FastUInt64ToBufferLeft(u64, digits) - &digits[0]) {} @@ -644,7 +658,7 @@ struct LIBPROTOBUF_EXPORT AlphaNum { // TODO: Add a string_ref constructor, eventually // AlphaNum(const StringPiece &pc) : piece(pc) {} - AlphaNum(const string& str) + AlphaNum(const std::string& str) : piece_data_(str.data()), piece_size_(str.size()) {} AlphaNum(StringPiece str) @@ -692,32 +706,34 @@ using strings::AlphaNum; // be a reference into str. // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b); -LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, - const AlphaNum& c); -LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, - const AlphaNum& c, const AlphaNum& d); -LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, - const AlphaNum& c, const AlphaNum& d, - const AlphaNum& e); -LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, - const AlphaNum& c, const AlphaNum& d, - const AlphaNum& e, const AlphaNum& f); -LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, - const AlphaNum& c, const AlphaNum& d, - const AlphaNum& e, const AlphaNum& f, - const AlphaNum& g); -LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, - const AlphaNum& c, const AlphaNum& d, - const AlphaNum& e, const AlphaNum& f, - const AlphaNum& g, const AlphaNum& h); -LIBPROTOBUF_EXPORT string StrCat(const AlphaNum& a, const AlphaNum& b, - const AlphaNum& c, const AlphaNum& d, - const AlphaNum& e, const AlphaNum& f, - const AlphaNum& g, const AlphaNum& h, - const AlphaNum& i); - -inline string StrCat(const AlphaNum& a) { return string(a.data(), a.size()); } +PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b); +PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c); +PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c, const AlphaNum& d); +PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e); +PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f); +PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, + const AlphaNum& g); +PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, + const AlphaNum& g, const AlphaNum& h); +PROTOBUF_EXPORT std::string StrCat(const AlphaNum& a, const AlphaNum& b, + const AlphaNum& c, const AlphaNum& d, + const AlphaNum& e, const AlphaNum& f, + const AlphaNum& g, const AlphaNum& h, + const AlphaNum& i); + +inline std::string StrCat(const AlphaNum& a) { + return std::string(a.data(), a.size()); +} // ---------------------------------------------------------------------- // StrAppend() @@ -740,14 +756,14 @@ inline string StrCat(const AlphaNum& a) { return string(a.data(), a.size()); } // worked around as consecutive calls to StrAppend are quite efficient. // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT void StrAppend(string* dest, const AlphaNum& a); -LIBPROTOBUF_EXPORT void StrAppend(string* dest, const AlphaNum& a, - const AlphaNum& b); -LIBPROTOBUF_EXPORT void StrAppend(string* dest, const AlphaNum& a, - const AlphaNum& b, const AlphaNum& c); -LIBPROTOBUF_EXPORT void StrAppend(string* dest, const AlphaNum& a, - const AlphaNum& b, const AlphaNum& c, - const AlphaNum& d); +PROTOBUF_EXPORT void StrAppend(std::string* dest, const AlphaNum& a); +PROTOBUF_EXPORT void StrAppend(std::string* dest, const AlphaNum& a, + const AlphaNum& b); +PROTOBUF_EXPORT void StrAppend(std::string* dest, const AlphaNum& a, + const AlphaNum& b, const AlphaNum& c); +PROTOBUF_EXPORT void StrAppend(std::string* dest, const AlphaNum& a, + const AlphaNum& b, const AlphaNum& c, + const AlphaNum& d); // ---------------------------------------------------------------------- // Join() @@ -755,8 +771,8 @@ LIBPROTOBUF_EXPORT void StrAppend(string* dest, const AlphaNum& a, // the C-string "delim" as a separator between components. // ---------------------------------------------------------------------- template -void Join(Iterator start, Iterator end, - const char* delim, string* result) { +void Join(Iterator start, Iterator end, const char* delim, + std::string* result) { for (Iterator it = start; it != end; ++it) { if (it != start) { result->append(delim); @@ -766,9 +782,8 @@ void Join(Iterator start, Iterator end, } template -string Join(const Range& components, - const char* delim) { - string result; +std::string Join(const Range& components, const char* delim) { + std::string result; Join(components.begin(), components.end(), delim, &result); return result; } @@ -777,7 +792,7 @@ string Join(const Range& components, // ToHex() // Return a lower-case hex string representation of the given integer. // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT string ToHex(uint64 num); +PROTOBUF_EXPORT std::string ToHex(uint64 num); // ---------------------------------------------------------------------- // GlobalReplaceSubstring() @@ -786,9 +801,9 @@ LIBPROTOBUF_EXPORT string ToHex(uint64 num); // // NOTE: The string pieces must not overlap s. // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT int GlobalReplaceSubstring(const string& substring, - const string& replacement, - string* s); +PROTOBUF_EXPORT int GlobalReplaceSubstring(const std::string& substring, + const std::string& replacement, + std::string* s); // ---------------------------------------------------------------------- // Base64Unescape() @@ -796,7 +811,7 @@ LIBPROTOBUF_EXPORT int GlobalReplaceSubstring(const string& substring, // writes it to "dest". If src contains invalid characters, dest is cleared // and the function returns false. Returns true on success. // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT bool Base64Unescape(StringPiece src, string* dest); +PROTOBUF_EXPORT bool Base64Unescape(StringPiece src, std::string* dest); // ---------------------------------------------------------------------- // WebSafeBase64Unescape() @@ -809,18 +824,17 @@ LIBPROTOBUF_EXPORT bool Base64Unescape(StringPiece src, string* dest); // returns false (with dest empty) if src contains invalid chars; for // this version src and dest must be different strings. // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT int WebSafeBase64Unescape(const char* src, int slen, - char* dest, int szdest); -LIBPROTOBUF_EXPORT bool WebSafeBase64Unescape(StringPiece src, string* dest); +PROTOBUF_EXPORT int WebSafeBase64Unescape(const char* src, int slen, char* dest, + int szdest); +PROTOBUF_EXPORT bool WebSafeBase64Unescape(StringPiece src, std::string* dest); // Return the length to use for the output buffer given to the base64 escape // routines. Make sure to use the same value for do_padding in both. // This function may return incorrect results if given input_len values that // are extremely high, which should happen rarely. -LIBPROTOBUF_EXPORT int CalculateBase64EscapedLen(int input_len, - bool do_padding); +PROTOBUF_EXPORT int CalculateBase64EscapedLen(int input_len, bool do_padding); // Use this version when calling Base64Escape without a do_padding arg. -LIBPROTOBUF_EXPORT int CalculateBase64EscapedLen(int input_len); +PROTOBUF_EXPORT int CalculateBase64EscapedLen(int input_len); // ---------------------------------------------------------------------- // Base64Escape() @@ -834,23 +848,23 @@ LIBPROTOBUF_EXPORT int CalculateBase64EscapedLen(int input_len); // to escape them. It also has an extra parameter "do_padding", // which when set to false will prevent padding with "=". // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT int Base64Escape(const unsigned char* src, int slen, - char* dest, int szdest); -LIBPROTOBUF_EXPORT int WebSafeBase64Escape( - const unsigned char* src, int slen, char* dest, - int szdest, bool do_padding); +PROTOBUF_EXPORT int Base64Escape(const unsigned char* src, int slen, char* dest, + int szdest); +PROTOBUF_EXPORT int WebSafeBase64Escape(const unsigned char* src, int slen, + char* dest, int szdest, + bool do_padding); // Encode src into dest with padding. -LIBPROTOBUF_EXPORT void Base64Escape(StringPiece src, string* dest); +PROTOBUF_EXPORT void Base64Escape(StringPiece src, std::string* dest); // Encode src into dest web-safely without padding. -LIBPROTOBUF_EXPORT void WebSafeBase64Escape(StringPiece src, string* dest); +PROTOBUF_EXPORT void WebSafeBase64Escape(StringPiece src, std::string* dest); // Encode src into dest web-safely with padding. -LIBPROTOBUF_EXPORT void WebSafeBase64EscapeWithPadding(StringPiece src, - string* dest); +PROTOBUF_EXPORT void WebSafeBase64EscapeWithPadding(StringPiece src, + std::string* dest); -LIBPROTOBUF_EXPORT void Base64Escape(const unsigned char* src, int szsrc, - string* dest, bool do_padding); -LIBPROTOBUF_EXPORT void WebSafeBase64Escape(const unsigned char* src, int szsrc, - string* dest, bool do_padding); +PROTOBUF_EXPORT void Base64Escape(const unsigned char* src, int szsrc, + std::string* dest, bool do_padding); +PROTOBUF_EXPORT void WebSafeBase64Escape(const unsigned char* src, int szsrc, + std::string* dest, bool do_padding); inline bool IsValidCodePoint(uint32 code_point) { return code_point < 0xD800 || @@ -864,15 +878,76 @@ static const int UTFmax = 4; // in any external dependencies. The output buffer must be as least 4 bytes // large. // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT int EncodeAsUTF8Char(uint32 code_point, char* output); +PROTOBUF_EXPORT int EncodeAsUTF8Char(uint32 code_point, char* output); // ---------------------------------------------------------------------- // UTF8FirstLetterNumBytes() // Length of the first UTF-8 character. // ---------------------------------------------------------------------- -LIBPROTOBUF_EXPORT int UTF8FirstLetterNumBytes(const char* src, int len); +PROTOBUF_EXPORT int UTF8FirstLetterNumBytes(const char* src, int len); + +// From google3/third_party/absl/strings/escaping.h + +// ---------------------------------------------------------------------- +// CleanStringLineEndings() +// Clean up a multi-line string to conform to Unix line endings. +// Reads from src and appends to dst, so usually dst should be empty. +// +// If there is no line ending at the end of a non-empty string, it can +// be added automatically. +// +// Four different types of input are correctly handled: +// +// - Unix/Linux files: line ending is LF: pass through unchanged +// +// - DOS/Windows files: line ending is CRLF: convert to LF +// +// - Legacy Mac files: line ending is CR: convert to LF +// +// - Garbled files: random line endings: convert gracefully +// lonely CR, lonely LF, CRLF: convert to LF +// +// @param src The multi-line string to convert +// @param dst The converted string is appended to this string +// @param auto_end_last_line Automatically terminate the last line +// +// Limitations: +// +// This does not do the right thing for CRCRLF files created by +// broken programs that do another Unix->DOS conversion on files +// that are already in CRLF format. For this, a two-pass approach +// brute-force would be needed that +// +// (1) determines the presence of LF (first one is ok) +// (2) if yes, removes any CR, else convert every CR to LF +PROTOBUF_EXPORT void CleanStringLineEndings(const std::string& src, + std::string* dst, + bool auto_end_last_line); + +// Same as above, but transforms the argument in place. +PROTOBUF_EXPORT void CleanStringLineEndings(std::string* str, + bool auto_end_last_line); + +namespace strings { +inline bool EndsWith(StringPiece text, StringPiece suffix) { + return suffix.empty() || + (text.size() >= suffix.size() && + memcmp(text.data() + (text.size() - suffix.size()), suffix.data(), + suffix.size()) == 0); +} +} // namespace strings + +namespace internal { + +// A locale-independent version of the standard strtod(), which always +// uses a dot as the decimal separator. +double NoLocaleStrtod(const char* str, char** endptr); + +} // namespace internal } // namespace protobuf } // namespace google +#include + #endif // GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ diff --git a/third_party/protobuf-lite/google/protobuf/stubs/time.h b/third_party/protobuf-lite/google/protobuf/stubs/time.h index 45607ca9..b0611768 100644 --- a/third_party/protobuf-lite/google/protobuf/stubs/time.h +++ b/third_party/protobuf-lite/google/protobuf/stubs/time.h @@ -32,6 +32,8 @@ #include +#include + namespace google { namespace protobuf { namespace internal { @@ -49,27 +51,30 @@ struct DateTime { // negative to represent time before 1970-01-01) to DateTime. Returns false // if the timestamp is not in the range between 0001-01-01T00:00:00 and // 9999-12-31T23:59:59. -bool LIBPROTOBUF_EXPORT SecondsToDateTime(int64 seconds, DateTime* time); +bool PROTOBUF_EXPORT SecondsToDateTime(int64 seconds, DateTime* time); // Converts DateTime to a timestamp (seconds since 1970-01-01T00:00:00). // Returns false if the DateTime is not valid or is not in the valid range. -bool LIBPROTOBUF_EXPORT DateTimeToSeconds(const DateTime& time, int64* seconds); +bool PROTOBUF_EXPORT DateTimeToSeconds(const DateTime& time, int64* seconds); -void LIBPROTOBUF_EXPORT GetCurrentTime(int64* seconds, int32* nanos); +void PROTOBUF_EXPORT GetCurrentTime(int64* seconds, int32* nanos); -// Formats a time string in RFC3339 fromat. +// Formats a time string in RFC3339 format. // // For example, "2015-05-20T13:29:35.120Z". For nanos, 0, 3, 6 or 9 fractional // digits will be used depending on how many are required to represent the exact // value. // // Note that "nanos" must in the range of [0, 999999999]. -string LIBPROTOBUF_EXPORT FormatTime(int64 seconds, int32 nanos); +std::string PROTOBUF_EXPORT FormatTime(int64 seconds, int32 nanos); // Parses a time string. This method accepts RFC3339 date/time string with UTC // offset. For example, "2015-05-20T13:29:35.120-08:00". -bool LIBPROTOBUF_EXPORT ParseTime(const string& value, int64* seconds, int32* nanos); +bool PROTOBUF_EXPORT ParseTime(const std::string& value, int64* seconds, + int32* nanos); } // namespace internal } // namespace protobuf } // namespace google +#include + #endif // GOOGLE_PROTOBUF_STUBS_TIME_H_ diff --git a/third_party/protobuf-lite/google/protobuf/unknown_field_set.h b/third_party/protobuf-lite/google/protobuf/unknown_field_set.h new file mode 100644 index 00000000..ab3633da --- /dev/null +++ b/third_party/protobuf-lite/google/protobuf/unknown_field_set.h @@ -0,0 +1,411 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. +// +// Contains classes used to keep track of unrecognized fields seen while +// parsing a protocol message. + +#ifndef GOOGLE_PROTOBUF_UNKNOWN_FIELD_SET_H__ +#define GOOGLE_PROTOBUF_UNKNOWN_FIELD_SET_H__ + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#ifdef SWIG +#error "You cannot SWIG proto headers" +#endif + +namespace google { +namespace protobuf { +namespace internal { +class InternalMetadata; // metadata_lite.h +class WireFormat; // wire_format.h +class MessageSetFieldSkipperUsingCord; +// extension_set_heavy.cc +} // namespace internal + +class Message; // message.h +class UnknownField; // below + +// An UnknownFieldSet contains fields that were encountered while parsing a +// message but were not defined by its type. Keeping track of these can be +// useful, especially in that they may be written if the message is serialized +// again without being cleared in between. This means that software which +// simply receives messages and forwards them to other servers does not need +// to be updated every time a new field is added to the message definition. +// +// To get the UnknownFieldSet attached to any message, call +// Reflection::GetUnknownFields(). +// +// This class is necessarily tied to the protocol buffer wire format, unlike +// the Reflection interface which is independent of any serialization scheme. +class PROTOBUF_EXPORT UnknownFieldSet { + public: + UnknownFieldSet(); + ~UnknownFieldSet(); + + // Remove all fields. + inline void Clear(); + + // Remove all fields and deallocate internal data objects + void ClearAndFreeMemory(); + + // Is this set empty? + inline bool empty() const; + + // Merge the contents of some other UnknownFieldSet with this one. + void MergeFrom(const UnknownFieldSet& other); + + // Similar to above, but this function will destroy the contents of other. + void MergeFromAndDestroy(UnknownFieldSet* other); + + // Merge the contents an UnknownFieldSet with the UnknownFieldSet in + // *metadata, if there is one. If *metadata doesn't have an UnknownFieldSet + // then add one to it and make it be a copy of the first arg. + static void MergeToInternalMetadata(const UnknownFieldSet& other, + internal::InternalMetadata* metadata); + + // Swaps the contents of some other UnknownFieldSet with this one. + inline void Swap(UnknownFieldSet* x); + + // Computes (an estimate of) the total number of bytes currently used for + // storing the unknown fields in memory. Does NOT include + // sizeof(*this) in the calculation. + size_t SpaceUsedExcludingSelfLong() const; + + int SpaceUsedExcludingSelf() const { + return internal::ToIntSize(SpaceUsedExcludingSelfLong()); + } + + // Version of SpaceUsed() including sizeof(*this). + size_t SpaceUsedLong() const; + + int SpaceUsed() const { return internal::ToIntSize(SpaceUsedLong()); } + + // Returns the number of fields present in the UnknownFieldSet. + inline int field_count() const; + // Get a field in the set, where 0 <= index < field_count(). The fields + // appear in the order in which they were added. + inline const UnknownField& field(int index) const; + // Get a mutable pointer to a field in the set, where + // 0 <= index < field_count(). The fields appear in the order in which + // they were added. + inline UnknownField* mutable_field(int index); + + // Adding fields --------------------------------------------------- + + void AddVarint(int number, uint64 value); + void AddFixed32(int number, uint32 value); + void AddFixed64(int number, uint64 value); + void AddLengthDelimited(int number, const std::string& value); + std::string* AddLengthDelimited(int number); + UnknownFieldSet* AddGroup(int number); + + // Adds an unknown field from another set. + void AddField(const UnknownField& field); + + // Delete fields with indices in the range [start .. start+num-1]. + // Caution: implementation moves all fields with indices [start+num .. ]. + void DeleteSubrange(int start, int num); + + // Delete all fields with a specific field number. The order of left fields + // is preserved. + // Caution: implementation moves all fields after the first deleted field. + void DeleteByNumber(int number); + + // Parsing helpers ------------------------------------------------- + // These work exactly like the similarly-named methods of Message. + + bool MergeFromCodedStream(io::CodedInputStream* input); + bool ParseFromCodedStream(io::CodedInputStream* input); + bool ParseFromZeroCopyStream(io::ZeroCopyInputStream* input); + bool ParseFromArray(const void* data, int size); + inline bool ParseFromString(const std::string& data) { + return ParseFromArray(data.data(), static_cast(data.size())); + } + + // Merges this message's unknown field data (if any). This works whether + // the message is a lite or full proto (for legacy reasons, lite and full + // return different types for MessageType::unknown_fields()). + template + bool MergeFromMessage(const MessageType& message); + + static const UnknownFieldSet& default_instance(); + + private: + // For InternalMergeFrom + friend class UnknownField; + // Merges from other UnknownFieldSet. This method assumes, that this object + // is newly created and has no fields. + void InternalMergeFrom(const UnknownFieldSet& other); + void ClearFallback(); + + template ::value, int>::type = 0> + bool InternalMergeFromMessage(const MessageType& message) { + MergeFrom(message.GetReflection()->GetUnknownFields(message)); + return true; + } + + template ::value && + !std::is_base_of::value, + int>::type = 0> + bool InternalMergeFromMessage(const MessageType& message) { + const auto& unknown_fields = message.unknown_fields(); + io::ArrayInputStream array_stream(unknown_fields.data(), + unknown_fields.size()); + io::CodedInputStream coded_stream(&array_stream); + return MergeFromCodedStream(&coded_stream); + } + + std::vector fields_; + GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(UnknownFieldSet); +}; + +namespace internal { + +inline void WriteVarint(uint32 num, uint64 val, UnknownFieldSet* unknown) { + unknown->AddVarint(num, val); +} +inline void WriteLengthDelimited(uint32 num, StringPiece val, + UnknownFieldSet* unknown) { + unknown->AddLengthDelimited(num)->assign(val.data(), val.size()); +} + +PROTOBUF_EXPORT +const char* UnknownGroupParse(UnknownFieldSet* unknown, const char* ptr, + ParseContext* ctx); +PROTOBUF_EXPORT +const char* UnknownFieldParse(uint64 tag, UnknownFieldSet* unknown, + const char* ptr, ParseContext* ctx); + +} // namespace internal + +// Represents one field in an UnknownFieldSet. +class PROTOBUF_EXPORT UnknownField { + public: + enum Type { + TYPE_VARINT, + TYPE_FIXED32, + TYPE_FIXED64, + TYPE_LENGTH_DELIMITED, + TYPE_GROUP + }; + + // The field's field number, as seen on the wire. + inline int number() const; + + // The field type. + inline Type type() const; + + // Accessors ------------------------------------------------------- + // Each method works only for UnknownFields of the corresponding type. + + inline uint64 varint() const; + inline uint32 fixed32() const; + inline uint64 fixed64() const; + inline const std::string& length_delimited() const; + inline const UnknownFieldSet& group() const; + + inline void set_varint(uint64 value); + inline void set_fixed32(uint32 value); + inline void set_fixed64(uint64 value); + inline void set_length_delimited(const std::string& value); + inline std::string* mutable_length_delimited(); + inline UnknownFieldSet* mutable_group(); + + // Serialization API. + // These methods can take advantage of the underlying implementation and may + // archieve a better performance than using getters to retrieve the data and + // do the serialization yourself. + void SerializeLengthDelimitedNoTag(io::CodedOutputStream* output) const { + output->SetCur(InternalSerializeLengthDelimitedNoTag(output->Cur(), + output->EpsCopy())); + } + + inline size_t GetLengthDelimitedSize() const; + uint8* InternalSerializeLengthDelimitedNoTag( + uint8* target, io::EpsCopyOutputStream* stream) const; + + + // If this UnknownField contains a pointer, delete it. + void Delete(); + + // Make a deep copy of any pointers in this UnknownField. + void DeepCopy(const UnknownField& other); + + // Set the wire type of this UnknownField. Should only be used when this + // UnknownField is being created. + inline void SetType(Type type); + + union LengthDelimited { + std::string* string_value; + }; + + uint32 number_; + uint32 type_; + union { + uint64 varint_; + uint32 fixed32_; + uint64 fixed64_; + mutable union LengthDelimited length_delimited_; + UnknownFieldSet* group_; + } data_; +}; + +// =================================================================== +// inline implementations + +inline UnknownFieldSet::UnknownFieldSet() {} + +inline UnknownFieldSet::~UnknownFieldSet() { Clear(); } + +inline void UnknownFieldSet::ClearAndFreeMemory() { Clear(); } + +inline void UnknownFieldSet::Clear() { + if (!fields_.empty()) { + ClearFallback(); + } +} + +inline bool UnknownFieldSet::empty() const { return fields_.empty(); } + +inline void UnknownFieldSet::Swap(UnknownFieldSet* x) { + fields_.swap(x->fields_); +} + +inline int UnknownFieldSet::field_count() const { + return static_cast(fields_.size()); +} +inline const UnknownField& UnknownFieldSet::field(int index) const { + return (fields_)[static_cast(index)]; +} +inline UnknownField* UnknownFieldSet::mutable_field(int index) { + return &(fields_)[static_cast(index)]; +} + +inline void UnknownFieldSet::AddLengthDelimited(int number, + const std::string& value) { + AddLengthDelimited(number)->assign(value); +} + + + + +inline int UnknownField::number() const { return static_cast(number_); } +inline UnknownField::Type UnknownField::type() const { + return static_cast(type_); +} + +inline uint64 UnknownField::varint() const { + assert(type() == TYPE_VARINT); + return data_.varint_; +} +inline uint32 UnknownField::fixed32() const { + assert(type() == TYPE_FIXED32); + return data_.fixed32_; +} +inline uint64 UnknownField::fixed64() const { + assert(type() == TYPE_FIXED64); + return data_.fixed64_; +} +inline const std::string& UnknownField::length_delimited() const { + assert(type() == TYPE_LENGTH_DELIMITED); + return *data_.length_delimited_.string_value; +} +inline const UnknownFieldSet& UnknownField::group() const { + assert(type() == TYPE_GROUP); + return *data_.group_; +} + +inline void UnknownField::set_varint(uint64 value) { + assert(type() == TYPE_VARINT); + data_.varint_ = value; +} +inline void UnknownField::set_fixed32(uint32 value) { + assert(type() == TYPE_FIXED32); + data_.fixed32_ = value; +} +inline void UnknownField::set_fixed64(uint64 value) { + assert(type() == TYPE_FIXED64); + data_.fixed64_ = value; +} +inline void UnknownField::set_length_delimited(const std::string& value) { + assert(type() == TYPE_LENGTH_DELIMITED); + data_.length_delimited_.string_value->assign(value); +} +inline std::string* UnknownField::mutable_length_delimited() { + assert(type() == TYPE_LENGTH_DELIMITED); + return data_.length_delimited_.string_value; +} +inline UnknownFieldSet* UnknownField::mutable_group() { + assert(type() == TYPE_GROUP); + return data_.group_; +} +template +bool UnknownFieldSet::MergeFromMessage(const MessageType& message) { + // SFINAE will route to the right version. + return InternalMergeFromMessage(message); +} + + +inline size_t UnknownField::GetLengthDelimitedSize() const { + GOOGLE_DCHECK_EQ(TYPE_LENGTH_DELIMITED, type()); + return data_.length_delimited_.string_value->size(); +} + +inline void UnknownField::SetType(Type type) { + type_ = type; +} + + +} // namespace protobuf +} // namespace google + +#include +#endif // GOOGLE_PROTOBUF_UNKNOWN_FIELD_SET_H__ diff --git a/third_party/protobuf-lite/google/protobuf/wire_format_lite.h b/third_party/protobuf-lite/google/protobuf/wire_format_lite.h index 77eaa9a6..c742fe86 100644 --- a/third_party/protobuf-lite/google/protobuf/wire_format_lite.h +++ b/third_party/protobuf-lite/google/protobuf/wire_format_lite.h @@ -43,10 +43,13 @@ #include #include +#include #include +#include #include -#include +#include #include +#include // Do UTF-8 validation on string type in Debug build only #ifndef NDEBUG @@ -55,27 +58,23 @@ // Avoid conflict with iOS where #defines TYPE_BOOL. // -// If some one needs the macro TYPE_BOOL in a file that includes this header, it's -// possible to bring it back using push/pop_macro as follows. +// If some one needs the macro TYPE_BOOL in a file that includes this header, +// it's possible to bring it back using push/pop_macro as follows. // // #pragma push_macro("TYPE_BOOL") // #include this header and/or all headers that need the macro to be undefined. // #pragma pop_macro("TYPE_BOOL") #undef TYPE_BOOL -namespace google { - -namespace protobuf { - template class RepeatedField; // repeated_field.h -} +namespace google { namespace protobuf { namespace internal { -class StringPieceField; +#include // This class is for internal use by the protocol buffer library and by -// protocol-complier-generated message classes. It must not be called +// protocol-compiler-generated message classes. It must not be called // directly by clients. // // This class contains helpers for implementing the binary protocol buffer @@ -83,9 +82,8 @@ class StringPieceField; // reflection. // // This class is really a namespace that contains only static methods. -class LIBPROTOBUF_EXPORT WireFormatLite { +class PROTOBUF_EXPORT WireFormatLite { public: - // ----------------------------------------------------------------- // Helper constants and functions related to the format. These are // mostly meant for internal and generated code to use. @@ -102,50 +100,50 @@ class LIBPROTOBUF_EXPORT WireFormatLite { // unrecognized fields for forwards compatibility. enum WireType { - WIRETYPE_VARINT = 0, - WIRETYPE_FIXED64 = 1, + WIRETYPE_VARINT = 0, + WIRETYPE_FIXED64 = 1, WIRETYPE_LENGTH_DELIMITED = 2, - WIRETYPE_START_GROUP = 3, - WIRETYPE_END_GROUP = 4, - WIRETYPE_FIXED32 = 5, + WIRETYPE_START_GROUP = 3, + WIRETYPE_END_GROUP = 4, + WIRETYPE_FIXED32 = 5, }; // Lite alternative to FieldDescriptor::Type. Must be kept in sync. enum FieldType { - TYPE_DOUBLE = 1, - TYPE_FLOAT = 2, - TYPE_INT64 = 3, - TYPE_UINT64 = 4, - TYPE_INT32 = 5, - TYPE_FIXED64 = 6, - TYPE_FIXED32 = 7, - TYPE_BOOL = 8, - TYPE_STRING = 9, - TYPE_GROUP = 10, - TYPE_MESSAGE = 11, - TYPE_BYTES = 12, - TYPE_UINT32 = 13, - TYPE_ENUM = 14, - TYPE_SFIXED32 = 15, - TYPE_SFIXED64 = 16, - TYPE_SINT32 = 17, - TYPE_SINT64 = 18, - MAX_FIELD_TYPE = 18, + TYPE_DOUBLE = 1, + TYPE_FLOAT = 2, + TYPE_INT64 = 3, + TYPE_UINT64 = 4, + TYPE_INT32 = 5, + TYPE_FIXED64 = 6, + TYPE_FIXED32 = 7, + TYPE_BOOL = 8, + TYPE_STRING = 9, + TYPE_GROUP = 10, + TYPE_MESSAGE = 11, + TYPE_BYTES = 12, + TYPE_UINT32 = 13, + TYPE_ENUM = 14, + TYPE_SFIXED32 = 15, + TYPE_SFIXED64 = 16, + TYPE_SINT32 = 17, + TYPE_SINT64 = 18, + MAX_FIELD_TYPE = 18, }; // Lite alternative to FieldDescriptor::CppType. Must be kept in sync. enum CppType { - CPPTYPE_INT32 = 1, - CPPTYPE_INT64 = 2, - CPPTYPE_UINT32 = 3, - CPPTYPE_UINT64 = 4, - CPPTYPE_DOUBLE = 5, - CPPTYPE_FLOAT = 6, - CPPTYPE_BOOL = 7, - CPPTYPE_ENUM = 8, - CPPTYPE_STRING = 9, - CPPTYPE_MESSAGE = 10, - MAX_CPPTYPE = 10, + CPPTYPE_INT32 = 1, + CPPTYPE_INT64 = 2, + CPPTYPE_UINT32 = 3, + CPPTYPE_UINT64 = 4, + CPPTYPE_DOUBLE = 5, + CPPTYPE_FLOAT = 6, + CPPTYPE_BOOL = 7, + CPPTYPE_ENUM = 8, + CPPTYPE_STRING = 9, + CPPTYPE_MESSAGE = 10, + MAX_CPPTYPE = 10, }; // Helper method to get the CppType for a particular Type. @@ -158,16 +156,16 @@ class LIBPROTOBUF_EXPORT WireFormatLite { } // Number of bits in a tag which identify the wire type. - static const int kTagTypeBits = 3; + static constexpr int kTagTypeBits = 3; // Mask for those bits. - static const uint32 kTagTypeMask = (1 << kTagTypeBits) - 1; + static constexpr uint32 kTagTypeMask = (1 << kTagTypeBits) - 1; // Helper functions for encoding and decoding tags. (Inlined below and in // _inl.h) // - // This is different from MakeTag(field->number(), field->type()) in the case - // of packed repeated fields. - static uint32 MakeTag(int field_number, WireType type); + // This is different from MakeTag(field->number(), field->type()) in the + // case of packed repeated fields. + constexpr static uint32 MakeTag(int field_number, WireType type); static WireType GetTagWireType(uint32 tag); static int GetTagFieldNumber(uint32 tag); @@ -177,9 +175,9 @@ class LIBPROTOBUF_EXPORT WireFormatLite { WireFormatLite::FieldType type); // Skips a field value with the given tag. The input should start - // positioned immediately after the tag. Skipped values are simply discarded, - // not recorded anywhere. See WireFormat::SkipField() for a version that - // records to an UnknownFieldSet. + // positioned immediately after the tag. Skipped values are simply + // discarded, not recorded anywhere. See WireFormat::SkipField() for a + // version that records to an UnknownFieldSet. static bool SkipField(io::CodedInputStream* input, uint32 tag); // Skips a field value with the given tag. The input should start @@ -198,14 +196,12 @@ class LIBPROTOBUF_EXPORT WireFormatLite { static bool SkipMessage(io::CodedInputStream* input, io::CodedOutputStream* output); -// This macro does the same thing as WireFormatLite::MakeTag(), but the -// result is usable as a compile-time constant, which makes it usable -// as a switch case or a template input. WireFormatLite::MakeTag() is more -// type-safe, though, so prefer it if possible. -#define GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG(FIELD_NUMBER, TYPE) \ - static_cast( \ - (static_cast(FIELD_NUMBER) << ::google::protobuf::internal::WireFormatLite::kTagTypeBits) \ - | (TYPE)) + // This macro does the same thing as WireFormatLite::MakeTag(), but the + // result is usable as a compile-time constant, which makes it usable + // as a switch case or a template input. WireFormatLite::MakeTag() is more + // type-safe, though, so prefer it if possible. +#define GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG(FIELD_NUMBER, TYPE) \ + static_cast((static_cast(FIELD_NUMBER) << 3) | (TYPE)) // These are the tags for the old MessageSet format, which was defined as: // message MessageSet { @@ -214,21 +210,17 @@ class LIBPROTOBUF_EXPORT WireFormatLite { // required string message = 3; // } // } - static const int kMessageSetItemNumber = 1; - static const int kMessageSetTypeIdNumber = 2; - static const int kMessageSetMessageNumber = 3; - static const int kMessageSetItemStartTag = - GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG(kMessageSetItemNumber, - WireFormatLite::WIRETYPE_START_GROUP); - static const int kMessageSetItemEndTag = - GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG(kMessageSetItemNumber, - WireFormatLite::WIRETYPE_END_GROUP); - static const int kMessageSetTypeIdTag = - GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG(kMessageSetTypeIdNumber, - WireFormatLite::WIRETYPE_VARINT); - static const int kMessageSetMessageTag = - GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG(kMessageSetMessageNumber, - WireFormatLite::WIRETYPE_LENGTH_DELIMITED); + static constexpr int kMessageSetItemNumber = 1; + static constexpr int kMessageSetTypeIdNumber = 2; + static constexpr int kMessageSetMessageNumber = 3; + static const int kMessageSetItemStartTag = GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG( + kMessageSetItemNumber, WireFormatLite::WIRETYPE_START_GROUP); + static const int kMessageSetItemEndTag = GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG( + kMessageSetItemNumber, WireFormatLite::WIRETYPE_END_GROUP); + static const int kMessageSetTypeIdTag = GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG( + kMessageSetTypeIdNumber, WireFormatLite::WIRETYPE_VARINT); + static const int kMessageSetMessageTag = GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG( + kMessageSetMessageNumber, WireFormatLite::WIRETYPE_LENGTH_DELIMITED); // Byte size of all tags of a MessageSet::Item combined. static const size_t kMessageSetItemTagsSize; @@ -248,24 +240,12 @@ class LIBPROTOBUF_EXPORT WireFormatLite { // the purpose of varint. So, for the "sint32" and "sint64" field types, // we ZigZag-encode the values. static uint32 ZigZagEncode32(int32 n); - static int32 ZigZagDecode32(uint32 n); + static int32 ZigZagDecode32(uint32 n); static uint64 ZigZagEncode64(int64 n); - static int64 ZigZagDecode64(uint64 n); + static int64 ZigZagDecode64(uint64 n); // ================================================================= - // Methods for reading/writing individual field. The implementations - // of these methods are defined in wire_format_lite_inl.h; you must #include - // that file to use these. - -#ifdef NDEBUG -#define INL GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE -#else -// Avoid excessive inlining in non-optimized builds. Without other optimizations -// the inlining is not going to provide benefits anyway and the huge resulting -// functions, especially in the proto-generated serialization functions, produce -// stack frames so large that many tests run into stack overflows (b/32192897). -#define INL -#endif + // Methods for reading/writing individual field. // Read fields, not including tags. The assumption is that you already // read the tag to determine what field to read. @@ -274,15 +254,16 @@ class LIBPROTOBUF_EXPORT WireFormatLite { // the represented type and the FieldType. These are specialized with the // appropriate definition for each declared type. template - INL static bool ReadPrimitive(io::CodedInputStream* input, CType* value); + PROTOBUF_ALWAYS_INLINE static bool ReadPrimitive(io::CodedInputStream* input, + CType* value); // Reads repeated primitive values, with optimizations for repeats. // tag_size and tag should both be compile-time constants provided by the // protocol compiler. template - INL static bool ReadRepeatedPrimitive(int tag_size, uint32 tag, - io::CodedInputStream* input, - RepeatedField* value); + PROTOBUF_ALWAYS_INLINE static bool ReadRepeatedPrimitive( + int tag_size, uint32 tag, io::CodedInputStream* input, + RepeatedField* value); // Identical to ReadRepeatedPrimitive, except will not inline the // implementation. @@ -296,15 +277,16 @@ class LIBPROTOBUF_EXPORT WireFormatLite { // // This is only implemented for the types with fixed wire size, e.g. // float, double, and the (s)fixed* types. - template INL - static const uint8* ReadPrimitiveFromArray(const uint8* buffer, CType* value); + template + PROTOBUF_ALWAYS_INLINE static const uint8* ReadPrimitiveFromArray( + const uint8* buffer, CType* value); // Reads a primitive packed field. // // This is only implemented for packable types. template - INL static bool ReadPackedPrimitive(io::CodedInputStream* input, - RepeatedField* value); + PROTOBUF_ALWAYS_INLINE static bool ReadPackedPrimitive( + io::CodedInputStream* input, RepeatedField* value); // Identical to ReadPackedPrimitive, except will not inline the // implementation. @@ -324,17 +306,19 @@ class LIBPROTOBUF_EXPORT WireFormatLite { io::CodedInputStream* input, int field_number, bool (*is_valid)(int), io::CodedOutputStream* unknown_fields_stream, RepeatedField* values); - // Read a string. ReadString(..., string* value) requires an existing string. - static inline bool ReadString(io::CodedInputStream* input, string* value); - // ReadString(..., string** p) is internal-only, and should only be called - // from generated code. It starts by setting *p to "new string" - // if *p == &GetEmptyStringAlreadyInited(). It then invokes + // Read a string. ReadString(..., std::string* value) requires an + // existing std::string. + static inline bool ReadString(io::CodedInputStream* input, + std::string* value); + // ReadString(..., std::string** p) is internal-only, and should only be + // called from generated code. It starts by setting *p to "new std::string" if + // *p == &GetEmptyStringAlreadyInited(). It then invokes // ReadString(io::CodedInputStream* input, *p). This is useful for reducing // code size. - static inline bool ReadString(io::CodedInputStream* input, string** p); + static inline bool ReadString(io::CodedInputStream* input, std::string** p); // Analogous to ReadString(). - static bool ReadBytes(io::CodedInputStream* input, string* value); - static bool ReadBytes(io::CodedInputStream* input, string** p); + static bool ReadBytes(io::CodedInputStream* input, std::string* value); + static bool ReadBytes(io::CodedInputStream* input, std::string** p); enum Operation { PARSE = 0, @@ -342,8 +326,7 @@ class LIBPROTOBUF_EXPORT WireFormatLite { }; // Returns true if the data is valid UTF-8. - static bool VerifyUtf8String(const char* data, int size, - Operation op, + static bool VerifyUtf8String(const char* data, int size, Operation op, const char* field_name); template @@ -354,15 +337,7 @@ class LIBPROTOBUF_EXPORT WireFormatLite { static inline bool ReadMessage(io::CodedInputStream* input, MessageType* value); - // Do not use. template - static inline bool ReadGroupNoVirtual(int field_number, - io::CodedInputStream* input, - MessageType* value) { - return ReadGroup(field_number, input, value); - } - - template static inline bool ReadMessageNoVirtual(io::CodedInputStream* input, MessageType* value) { return ReadMessage(input, value); @@ -371,28 +346,38 @@ class LIBPROTOBUF_EXPORT WireFormatLite { // Write a tag. The Write*() functions typically include the tag, so // normally there's no need to call this unless using the Write*NoTag() // variants. - INL static void WriteTag(int field_number, WireType type, - io::CodedOutputStream* output); + PROTOBUF_ALWAYS_INLINE static void WriteTag(int field_number, WireType type, + io::CodedOutputStream* output); // Write fields, without tags. - INL static void WriteInt32NoTag(int32 value, io::CodedOutputStream* output); - INL static void WriteInt64NoTag(int64 value, io::CodedOutputStream* output); - INL static void WriteUInt32NoTag(uint32 value, io::CodedOutputStream* output); - INL static void WriteUInt64NoTag(uint64 value, io::CodedOutputStream* output); - INL static void WriteSInt32NoTag(int32 value, io::CodedOutputStream* output); - INL static void WriteSInt64NoTag(int64 value, io::CodedOutputStream* output); - INL static void WriteFixed32NoTag(uint32 value, - io::CodedOutputStream* output); - INL static void WriteFixed64NoTag(uint64 value, - io::CodedOutputStream* output); - INL static void WriteSFixed32NoTag(int32 value, - io::CodedOutputStream* output); - INL static void WriteSFixed64NoTag(int64 value, - io::CodedOutputStream* output); - INL static void WriteFloatNoTag(float value, io::CodedOutputStream* output); - INL static void WriteDoubleNoTag(double value, io::CodedOutputStream* output); - INL static void WriteBoolNoTag(bool value, io::CodedOutputStream* output); - INL static void WriteEnumNoTag(int value, io::CodedOutputStream* output); + PROTOBUF_ALWAYS_INLINE static void WriteInt32NoTag( + int32 value, io::CodedOutputStream* output); + PROTOBUF_ALWAYS_INLINE static void WriteInt64NoTag( + int64 value, io::CodedOutputStream* output); + PROTOBUF_ALWAYS_INLINE static void WriteUInt32NoTag( + uint32 value, io::CodedOutputStream* output); + PROTOBUF_ALWAYS_INLINE static void WriteUInt64NoTag( + uint64 value, io::CodedOutputStream* output); + PROTOBUF_ALWAYS_INLINE static void WriteSInt32NoTag( + int32 value, io::CodedOutputStream* output); + PROTOBUF_ALWAYS_INLINE static void WriteSInt64NoTag( + int64 value, io::CodedOutputStream* output); + PROTOBUF_ALWAYS_INLINE static void WriteFixed32NoTag( + uint32 value, io::CodedOutputStream* output); + PROTOBUF_ALWAYS_INLINE static void WriteFixed64NoTag( + uint64 value, io::CodedOutputStream* output); + PROTOBUF_ALWAYS_INLINE static void WriteSFixed32NoTag( + int32 value, io::CodedOutputStream* output); + PROTOBUF_ALWAYS_INLINE static void WriteSFixed64NoTag( + int64 value, io::CodedOutputStream* output); + PROTOBUF_ALWAYS_INLINE static void WriteFloatNoTag( + float value, io::CodedOutputStream* output); + PROTOBUF_ALWAYS_INLINE static void WriteDoubleNoTag( + double value, io::CodedOutputStream* output); + PROTOBUF_ALWAYS_INLINE static void WriteBoolNoTag( + bool value, io::CodedOutputStream* output); + PROTOBUF_ALWAYS_INLINE static void WriteEnumNoTag( + int value, io::CodedOutputStream* output); // Write array of primitive fields, without tags static void WriteFloatArray(const float* a, int n, @@ -440,13 +425,14 @@ class LIBPROTOBUF_EXPORT WireFormatLite { static void WriteEnum(int field_number, int value, io::CodedOutputStream* output); - static void WriteString(int field_number, const string& value, + static void WriteString(int field_number, const std::string& value, io::CodedOutputStream* output); - static void WriteBytes(int field_number, const string& value, + static void WriteBytes(int field_number, const std::string& value, io::CodedOutputStream* output); - static void WriteStringMaybeAliased(int field_number, const string& value, + static void WriteStringMaybeAliased(int field_number, + const std::string& value, io::CodedOutputStream* output); - static void WriteBytesMaybeAliased(int field_number, const string& value, + static void WriteBytesMaybeAliased(int field_number, const std::string& value, io::CodedOutputStream* output); static void WriteGroup(int field_number, const MessageLite& value, @@ -474,234 +460,253 @@ class LIBPROTOBUF_EXPORT WireFormatLite { io::CodedOutputStream* output); // Like above, but use only *ToArray methods of CodedOutputStream. - INL static uint8* WriteTagToArray(int field_number, WireType type, - uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteTagToArray(int field_number, + WireType type, + uint8* target); // Write fields, without tags. - INL static uint8* WriteInt32NoTagToArray(int32 value, uint8* target); - INL static uint8* WriteInt64NoTagToArray(int64 value, uint8* target); - INL static uint8* WriteUInt32NoTagToArray(uint32 value, uint8* target); - INL static uint8* WriteUInt64NoTagToArray(uint64 value, uint8* target); - INL static uint8* WriteSInt32NoTagToArray(int32 value, uint8* target); - INL static uint8* WriteSInt64NoTagToArray(int64 value, uint8* target); - INL static uint8* WriteFixed32NoTagToArray(uint32 value, uint8* target); - INL static uint8* WriteFixed64NoTagToArray(uint64 value, uint8* target); - INL static uint8* WriteSFixed32NoTagToArray(int32 value, uint8* target); - INL static uint8* WriteSFixed64NoTagToArray(int64 value, uint8* target); - INL static uint8* WriteFloatNoTagToArray(float value, uint8* target); - INL static uint8* WriteDoubleNoTagToArray(double value, uint8* target); - INL static uint8* WriteBoolNoTagToArray(bool value, uint8* target); - INL static uint8* WriteEnumNoTagToArray(int value, uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteInt32NoTagToArray(int32 value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteInt64NoTagToArray(int64 value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteUInt32NoTagToArray(uint32 value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteUInt64NoTagToArray(uint64 value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteSInt32NoTagToArray(int32 value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteSInt64NoTagToArray(int64 value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteFixed32NoTagToArray(uint32 value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteFixed64NoTagToArray(uint64 value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteSFixed32NoTagToArray(int32 value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteSFixed64NoTagToArray(int64 value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteFloatNoTagToArray(float value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteDoubleNoTagToArray(double value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteBoolNoTagToArray(bool value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteEnumNoTagToArray(int value, + uint8* target); // Write fields, without tags. These require that value.size() > 0. - template - INL static uint8* WritePrimitiveNoTagToArray( - const RepeatedField& value, - uint8* (*Writer)(T, uint8*), uint8* target); - template - INL static uint8* WriteFixedNoTagToArray( - const RepeatedField& value, - uint8* (*Writer)(T, uint8*), uint8* target); - - INL static uint8* WriteInt32NoTagToArray( - const RepeatedField< int32>& value, uint8* output); - INL static uint8* WriteInt64NoTagToArray( - const RepeatedField< int64>& value, uint8* output); - INL static uint8* WriteUInt32NoTagToArray( + template + PROTOBUF_ALWAYS_INLINE static uint8* WritePrimitiveNoTagToArray( + const RepeatedField& value, uint8* (*Writer)(T, uint8*), + uint8* target); + template + PROTOBUF_ALWAYS_INLINE static uint8* WriteFixedNoTagToArray( + const RepeatedField& value, uint8* (*Writer)(T, uint8*), + uint8* target); + + PROTOBUF_ALWAYS_INLINE static uint8* WriteInt32NoTagToArray( + const RepeatedField& value, uint8* output); + PROTOBUF_ALWAYS_INLINE static uint8* WriteInt64NoTagToArray( + const RepeatedField& value, uint8* output); + PROTOBUF_ALWAYS_INLINE static uint8* WriteUInt32NoTagToArray( const RepeatedField& value, uint8* output); - INL static uint8* WriteUInt64NoTagToArray( + PROTOBUF_ALWAYS_INLINE static uint8* WriteUInt64NoTagToArray( const RepeatedField& value, uint8* output); - INL static uint8* WriteSInt32NoTagToArray( - const RepeatedField< int32>& value, uint8* output); - INL static uint8* WriteSInt64NoTagToArray( - const RepeatedField< int64>& value, uint8* output); - INL static uint8* WriteFixed32NoTagToArray( + PROTOBUF_ALWAYS_INLINE static uint8* WriteSInt32NoTagToArray( + const RepeatedField& value, uint8* output); + PROTOBUF_ALWAYS_INLINE static uint8* WriteSInt64NoTagToArray( + const RepeatedField& value, uint8* output); + PROTOBUF_ALWAYS_INLINE static uint8* WriteFixed32NoTagToArray( const RepeatedField& value, uint8* output); - INL static uint8* WriteFixed64NoTagToArray( + PROTOBUF_ALWAYS_INLINE static uint8* WriteFixed64NoTagToArray( const RepeatedField& value, uint8* output); - INL static uint8* WriteSFixed32NoTagToArray( - const RepeatedField< int32>& value, uint8* output); - INL static uint8* WriteSFixed64NoTagToArray( - const RepeatedField< int64>& value, uint8* output); - INL static uint8* WriteFloatNoTagToArray( - const RepeatedField< float>& value, uint8* output); - INL static uint8* WriteDoubleNoTagToArray( + PROTOBUF_ALWAYS_INLINE static uint8* WriteSFixed32NoTagToArray( + const RepeatedField& value, uint8* output); + PROTOBUF_ALWAYS_INLINE static uint8* WriteSFixed64NoTagToArray( + const RepeatedField& value, uint8* output); + PROTOBUF_ALWAYS_INLINE static uint8* WriteFloatNoTagToArray( + const RepeatedField& value, uint8* output); + PROTOBUF_ALWAYS_INLINE static uint8* WriteDoubleNoTagToArray( const RepeatedField& value, uint8* output); - INL static uint8* WriteBoolNoTagToArray( - const RepeatedField< bool>& value, uint8* output); - INL static uint8* WriteEnumNoTagToArray( - const RepeatedField< int>& value, uint8* output); + PROTOBUF_ALWAYS_INLINE static uint8* WriteBoolNoTagToArray( + const RepeatedField& value, uint8* output); + PROTOBUF_ALWAYS_INLINE static uint8* WriteEnumNoTagToArray( + const RepeatedField& value, uint8* output); // Write fields, including tags. - INL static uint8* WriteInt32ToArray(int field_number, int32 value, - uint8* target); - INL static uint8* WriteInt64ToArray(int field_number, int64 value, - uint8* target); - INL static uint8* WriteUInt32ToArray(int field_number, uint32 value, - uint8* target); - INL static uint8* WriteUInt64ToArray(int field_number, uint64 value, - uint8* target); - INL static uint8* WriteSInt32ToArray(int field_number, int32 value, - uint8* target); - INL static uint8* WriteSInt64ToArray(int field_number, int64 value, - uint8* target); - INL static uint8* WriteFixed32ToArray(int field_number, uint32 value, - uint8* target); - INL static uint8* WriteFixed64ToArray(int field_number, uint64 value, - uint8* target); - INL static uint8* WriteSFixed32ToArray(int field_number, int32 value, - uint8* target); - INL static uint8* WriteSFixed64ToArray(int field_number, int64 value, - uint8* target); - INL static uint8* WriteFloatToArray(int field_number, float value, - uint8* target); - INL static uint8* WriteDoubleToArray(int field_number, double value, - uint8* target); - INL static uint8* WriteBoolToArray(int field_number, bool value, - uint8* target); - INL static uint8* WriteEnumToArray(int field_number, int value, - uint8* target); - - template - INL static uint8* WritePrimitiveToArray( - int field_number, - const RepeatedField& value, + PROTOBUF_ALWAYS_INLINE static uint8* WriteInt32ToArray(int field_number, + int32 value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteInt64ToArray(int field_number, + int64 value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteUInt32ToArray(int field_number, + uint32 value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteUInt64ToArray(int field_number, + uint64 value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteSInt32ToArray(int field_number, + int32 value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteSInt64ToArray(int field_number, + int64 value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteFixed32ToArray(int field_number, + uint32 value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteFixed64ToArray(int field_number, + uint64 value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteSFixed32ToArray(int field_number, + int32 value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteSFixed64ToArray(int field_number, + int64 value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteFloatToArray(int field_number, + float value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteDoubleToArray(int field_number, + double value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteBoolToArray(int field_number, + bool value, + uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteEnumToArray(int field_number, + int value, + uint8* target); + + template + PROTOBUF_ALWAYS_INLINE static uint8* WritePrimitiveToArray( + int field_number, const RepeatedField& value, uint8* (*Writer)(int, T, uint8*), uint8* target); - INL static uint8* WriteInt32ToArray( - int field_number, const RepeatedField< int32>& value, uint8* output); - INL static uint8* WriteInt64ToArray( - int field_number, const RepeatedField< int64>& value, uint8* output); - INL static uint8* WriteUInt32ToArray( + PROTOBUF_ALWAYS_INLINE static uint8* WriteInt32ToArray( + int field_number, const RepeatedField& value, uint8* output); + PROTOBUF_ALWAYS_INLINE static uint8* WriteInt64ToArray( + int field_number, const RepeatedField& value, uint8* output); + PROTOBUF_ALWAYS_INLINE static uint8* WriteUInt32ToArray( int field_number, const RepeatedField& value, uint8* output); - INL static uint8* WriteUInt64ToArray( + PROTOBUF_ALWAYS_INLINE static uint8* WriteUInt64ToArray( int field_number, const RepeatedField& value, uint8* output); - INL static uint8* WriteSInt32ToArray( - int field_number, const RepeatedField< int32>& value, uint8* output); - INL static uint8* WriteSInt64ToArray( - int field_number, const RepeatedField< int64>& value, uint8* output); - INL static uint8* WriteFixed32ToArray( + PROTOBUF_ALWAYS_INLINE static uint8* WriteSInt32ToArray( + int field_number, const RepeatedField& value, uint8* output); + PROTOBUF_ALWAYS_INLINE static uint8* WriteSInt64ToArray( + int field_number, const RepeatedField& value, uint8* output); + PROTOBUF_ALWAYS_INLINE static uint8* WriteFixed32ToArray( int field_number, const RepeatedField& value, uint8* output); - INL static uint8* WriteFixed64ToArray( + PROTOBUF_ALWAYS_INLINE static uint8* WriteFixed64ToArray( int field_number, const RepeatedField& value, uint8* output); - INL static uint8* WriteSFixed32ToArray( - int field_number, const RepeatedField< int32>& value, uint8* output); - INL static uint8* WriteSFixed64ToArray( - int field_number, const RepeatedField< int64>& value, uint8* output); - INL static uint8* WriteFloatToArray( - int field_number, const RepeatedField< float>& value, uint8* output); - INL static uint8* WriteDoubleToArray( + PROTOBUF_ALWAYS_INLINE static uint8* WriteSFixed32ToArray( + int field_number, const RepeatedField& value, uint8* output); + PROTOBUF_ALWAYS_INLINE static uint8* WriteSFixed64ToArray( + int field_number, const RepeatedField& value, uint8* output); + PROTOBUF_ALWAYS_INLINE static uint8* WriteFloatToArray( + int field_number, const RepeatedField& value, uint8* output); + PROTOBUF_ALWAYS_INLINE static uint8* WriteDoubleToArray( int field_number, const RepeatedField& value, uint8* output); - INL static uint8* WriteBoolToArray( - int field_number, const RepeatedField< bool>& value, uint8* output); - INL static uint8* WriteEnumToArray( - int field_number, const RepeatedField< int>& value, uint8* output); + PROTOBUF_ALWAYS_INLINE static uint8* WriteBoolToArray( + int field_number, const RepeatedField& value, uint8* output); + PROTOBUF_ALWAYS_INLINE static uint8* WriteEnumToArray( + int field_number, const RepeatedField& value, uint8* output); - INL static uint8* WriteStringToArray(int field_number, const string& value, - uint8* target); - INL static uint8* WriteBytesToArray(int field_number, const string& value, - uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteStringToArray( + int field_number, const std::string& value, uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteBytesToArray( + int field_number, const std::string& value, uint8* target); // Whether to serialize deterministically (e.g., map keys are // sorted) is a property of a CodedOutputStream, and in the process // of serialization, the "ToArray" variants may be invoked. But they don't // have a CodedOutputStream available, so they get an additional parameter // telling them whether to serialize deterministically. - template - INL static uint8* InternalWriteGroupToArray(int field_number, - const MessageType& value, - bool deterministic, - uint8* target); - template - INL static uint8* InternalWriteMessageToArray(int field_number, - const MessageType& value, - bool deterministic, - uint8* target); + template + PROTOBUF_ALWAYS_INLINE static uint8* InternalWriteGroup( + int field_number, const MessageType& value, uint8* target, + io::EpsCopyOutputStream* stream); + template + PROTOBUF_ALWAYS_INLINE static uint8* InternalWriteMessage( + int field_number, const MessageType& value, uint8* target, + io::EpsCopyOutputStream* stream); // Like above, but de-virtualize the call to SerializeWithCachedSizes(). The // pointer must point at an instance of MessageType, *not* a subclass (or // the subclass must not override SerializeWithCachedSizes()). template - INL static uint8* InternalWriteGroupNoVirtualToArray(int field_number, - const MessageType& value, - bool deterministic, - uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* InternalWriteGroupNoVirtualToArray( + int field_number, const MessageType& value, uint8* target); template - INL static uint8* InternalWriteMessageNoVirtualToArray( - int field_number, const MessageType& value, bool deterministic, - uint8* target); + PROTOBUF_ALWAYS_INLINE static uint8* InternalWriteMessageNoVirtualToArray( + int field_number, const MessageType& value, uint8* target); // For backward-compatibility, the last four methods also have versions // that are non-deterministic always. - INL static uint8* WriteGroupToArray(int field_number, - const MessageLite& value, uint8* target) { - return InternalWriteGroupToArray(field_number, value, false, target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteGroupToArray( + int field_number, const MessageLite& value, uint8* target) { + io::EpsCopyOutputStream stream( + target, + value.GetCachedSize() + + static_cast(2 * io::CodedOutputStream::VarintSize32( + static_cast(field_number) << 3)), + io::CodedOutputStream::IsDefaultSerializationDeterministic()); + return InternalWriteGroup(field_number, value, target, &stream); } - INL static uint8* WriteMessageToArray(int field_number, - const MessageLite& value, - uint8* target) { - return InternalWriteMessageToArray(field_number, value, false, target); - } - template - INL static uint8* WriteGroupNoVirtualToArray(int field_number, - const MessageType& value, - uint8* target) { - return InternalWriteGroupNoVirtualToArray(field_number, value, false, - target); + PROTOBUF_ALWAYS_INLINE static uint8* WriteMessageToArray( + int field_number, const MessageLite& value, uint8* target) { + int size = value.GetCachedSize(); + io::EpsCopyOutputStream stream( + target, + size + static_cast(io::CodedOutputStream::VarintSize32( + static_cast(field_number) << 3) + + io::CodedOutputStream::VarintSize32(size)), + io::CodedOutputStream::IsDefaultSerializationDeterministic()); + return InternalWriteMessage(field_number, value, target, &stream); } - template - INL static uint8* WriteMessageNoVirtualToArray(int field_number, - const MessageType& value, - uint8* target) { - return InternalWriteMessageNoVirtualToArray(field_number, value, false, - target); - } - -#undef INL // Compute the byte size of a field. The XxSize() functions do NOT include // the tag, so you must also call TagSize(). (This is because, for repeated // fields, you should only call TagSize() once and multiply it by the element // count, but you may have to call XxSize() for each individual element.) - static inline size_t Int32Size ( int32 value); - static inline size_t Int64Size ( int64 value); - static inline size_t UInt32Size (uint32 value); - static inline size_t UInt64Size (uint64 value); - static inline size_t SInt32Size ( int32 value); - static inline size_t SInt64Size ( int64 value); - static inline size_t EnumSize ( int value); - - static size_t Int32Size (const RepeatedField< int32>& value); - static size_t Int64Size (const RepeatedField< int64>& value); + static inline size_t Int32Size(int32 value); + static inline size_t Int64Size(int64 value); + static inline size_t UInt32Size(uint32 value); + static inline size_t UInt64Size(uint64 value); + static inline size_t SInt32Size(int32 value); + static inline size_t SInt64Size(int64 value); + static inline size_t EnumSize(int value); + + static size_t Int32Size(const RepeatedField& value); + static size_t Int64Size(const RepeatedField& value); static size_t UInt32Size(const RepeatedField& value); static size_t UInt64Size(const RepeatedField& value); - static size_t SInt32Size(const RepeatedField< int32>& value); - static size_t SInt64Size(const RepeatedField< int64>& value); - static size_t EnumSize (const RepeatedField< int>& value); + static size_t SInt32Size(const RepeatedField& value); + static size_t SInt64Size(const RepeatedField& value); + static size_t EnumSize(const RepeatedField& value); // These types always have the same size. - static const size_t kFixed32Size = 4; - static const size_t kFixed64Size = 8; - static const size_t kSFixed32Size = 4; - static const size_t kSFixed64Size = 8; - static const size_t kFloatSize = 4; - static const size_t kDoubleSize = 8; - static const size_t kBoolSize = 1; - - static inline size_t StringSize(const string& value); - static inline size_t BytesSize (const string& value); - - template - static inline size_t GroupSize (const MessageType& value); - template + static constexpr size_t kFixed32Size = 4; + static constexpr size_t kFixed64Size = 8; + static constexpr size_t kSFixed32Size = 4; + static constexpr size_t kSFixed64Size = 8; + static constexpr size_t kFloatSize = 4; + static constexpr size_t kDoubleSize = 8; + static constexpr size_t kBoolSize = 1; + + static inline size_t StringSize(const std::string& value); + static inline size_t BytesSize(const std::string& value); + + template + static inline size_t GroupSize(const MessageType& value); + template static inline size_t MessageSize(const MessageType& value); // Like above, but de-virtualize the call to ByteSize(). The // pointer must point at an instance of MessageType, *not* a subclass (or // the subclass must not override ByteSize()). - template - static inline size_t GroupSizeNoVirtual (const MessageType& value); - template + template + static inline size_t GroupSizeNoVirtual(const MessageType& value); + template static inline size_t MessageSizeNoVirtual(const MessageType& value); // Given the length of data, calculate the byte size of the data on the @@ -713,21 +718,19 @@ class LIBPROTOBUF_EXPORT WireFormatLite { // optimizations for primitive types that have fixed size on the wire, and // can be read using potentially faster paths. template - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE - static bool ReadRepeatedFixedSizePrimitive( - int tag_size, - uint32 tag, - google::protobuf::io::CodedInputStream* input, + PROTOBUF_ALWAYS_INLINE static bool ReadRepeatedFixedSizePrimitive( + int tag_size, uint32 tag, io::CodedInputStream* input, RepeatedField* value); // Like ReadRepeatedFixedSizePrimitive but for packed primitive fields. template - GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE - static bool ReadPackedFixedSizePrimitive( - google::protobuf::io::CodedInputStream* input, RepeatedField* value); + PROTOBUF_ALWAYS_INLINE static bool ReadPackedFixedSizePrimitive( + io::CodedInputStream* input, RepeatedField* value); static const CppType kFieldTypeToCppTypeMap[]; static const WireFormatLite::WireType kWireTypeForFieldType[]; + static void WriteSubMessageMaybeToArray(int size, const MessageLite& value, + io::CodedOutputStream* output); GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(WireFormatLite); }; @@ -736,7 +739,7 @@ class LIBPROTOBUF_EXPORT WireFormatLite { // discards them. WireFormat defines a subclass which writes to an // UnknownFieldSet. This class is used by ExtensionSet::ParseField(), since // ExtensionSet is part of the lite library but UnknownFieldSet is not. -class LIBPROTOBUF_EXPORT FieldSkipper { +class PROTOBUF_EXPORT FieldSkipper { public: FieldSkipper() {} virtual ~FieldSkipper() {} @@ -756,30 +759,30 @@ class LIBPROTOBUF_EXPORT FieldSkipper { // Subclass of FieldSkipper which saves skipped fields to a CodedOutputStream. -class LIBPROTOBUF_EXPORT CodedOutputStreamFieldSkipper : public FieldSkipper { +class PROTOBUF_EXPORT CodedOutputStreamFieldSkipper : public FieldSkipper { public: explicit CodedOutputStreamFieldSkipper(io::CodedOutputStream* unknown_fields) : unknown_fields_(unknown_fields) {} - virtual ~CodedOutputStreamFieldSkipper() {} + ~CodedOutputStreamFieldSkipper() override {} // implements FieldSkipper ----------------------------------------- - virtual bool SkipField(io::CodedInputStream* input, uint32 tag); - virtual bool SkipMessage(io::CodedInputStream* input); - virtual void SkipUnknownEnum(int field_number, int value); + bool SkipField(io::CodedInputStream* input, uint32 tag) override; + bool SkipMessage(io::CodedInputStream* input) override; + void SkipUnknownEnum(int field_number, int value) override; protected: io::CodedOutputStream* unknown_fields_; }; - // inline methods ==================================================== -inline WireFormatLite::CppType -WireFormatLite::FieldTypeToCppType(FieldType type) { +inline WireFormatLite::CppType WireFormatLite::FieldTypeToCppType( + FieldType type) { return kFieldTypeToCppTypeMap[type]; } -inline uint32 WireFormatLite::MakeTag(int field_number, WireType type) { +constexpr inline uint32 WireFormatLite::MakeTag(int field_number, + WireType type) { return GOOGLE_PROTOBUF_WIRE_FORMAT_MAKE_TAG(field_number, type); } @@ -794,7 +797,7 @@ inline int WireFormatLite::GetTagFieldNumber(uint32 tag) { inline size_t WireFormatLite::TagSize(int field_number, WireFormatLite::FieldType type) { size_t result = io::CodedOutputStream::VarintSize32( - static_cast(field_number << kTagTypeBits)); + static_cast(field_number << kTagTypeBits)); if (type == TYPE_GROUP) { // Groups have both a start and an end tag. return result * 2; @@ -804,27 +807,19 @@ inline size_t WireFormatLite::TagSize(int field_number, } inline uint32 WireFormatLite::EncodeFloat(float value) { - union {float f; uint32 i;}; - f = value; - return i; + return bit_cast(value); } inline float WireFormatLite::DecodeFloat(uint32 value) { - union {float f; uint32 i;}; - i = value; - return f; + return bit_cast(value); } inline uint64 WireFormatLite::EncodeDouble(double value) { - union {double f; uint64 i;}; - f = value; - return i; + return bit_cast(value); } inline double WireFormatLite::DecodeDouble(uint64 value) { - union {double f; uint64 i;}; - i = value; - return f; + return bit_cast(value); } // ZigZag Transform: Encodes signed integers so that they can be @@ -877,17 +872,995 @@ inline int64 WireFormatLite::ZigZagDecode64(uint64 n) { // call ReadBytes(). inline bool WireFormatLite::ReadString(io::CodedInputStream* input, - string* value) { + std::string* value) { return ReadBytes(input, value); } inline bool WireFormatLite::ReadString(io::CodedInputStream* input, - string** p) { + std::string** p) { return ReadBytes(input, p); } +inline uint8* InternalSerializeUnknownMessageSetItemsToArray( + const std::string& unknown_fields, uint8* target, + io::EpsCopyOutputStream* stream) { + return stream->WriteRaw(unknown_fields.data(), + static_cast(unknown_fields.size()), target); +} + +inline size_t ComputeUnknownMessageSetItemsSize( + const std::string& unknown_fields) { + return unknown_fields.size(); +} + +// Implementation details of ReadPrimitive. + +template <> +inline bool WireFormatLite::ReadPrimitive( + io::CodedInputStream* input, int32* value) { + uint32 temp; + if (!input->ReadVarint32(&temp)) return false; + *value = static_cast(temp); + return true; +} +template <> +inline bool WireFormatLite::ReadPrimitive( + io::CodedInputStream* input, int64* value) { + uint64 temp; + if (!input->ReadVarint64(&temp)) return false; + *value = static_cast(temp); + return true; +} +template <> +inline bool WireFormatLite::ReadPrimitive( + io::CodedInputStream* input, uint32* value) { + return input->ReadVarint32(value); +} +template <> +inline bool WireFormatLite::ReadPrimitive( + io::CodedInputStream* input, uint64* value) { + return input->ReadVarint64(value); +} +template <> +inline bool WireFormatLite::ReadPrimitive( + io::CodedInputStream* input, int32* value) { + uint32 temp; + if (!input->ReadVarint32(&temp)) return false; + *value = ZigZagDecode32(temp); + return true; +} +template <> +inline bool WireFormatLite::ReadPrimitive( + io::CodedInputStream* input, int64* value) { + uint64 temp; + if (!input->ReadVarint64(&temp)) return false; + *value = ZigZagDecode64(temp); + return true; +} +template <> +inline bool WireFormatLite::ReadPrimitive( + io::CodedInputStream* input, uint32* value) { + return input->ReadLittleEndian32(value); +} +template <> +inline bool WireFormatLite::ReadPrimitive( + io::CodedInputStream* input, uint64* value) { + return input->ReadLittleEndian64(value); +} +template <> +inline bool WireFormatLite::ReadPrimitive( + io::CodedInputStream* input, int32* value) { + uint32 temp; + if (!input->ReadLittleEndian32(&temp)) return false; + *value = static_cast(temp); + return true; +} +template <> +inline bool WireFormatLite::ReadPrimitive( + io::CodedInputStream* input, int64* value) { + uint64 temp; + if (!input->ReadLittleEndian64(&temp)) return false; + *value = static_cast(temp); + return true; +} +template <> +inline bool WireFormatLite::ReadPrimitive( + io::CodedInputStream* input, float* value) { + uint32 temp; + if (!input->ReadLittleEndian32(&temp)) return false; + *value = DecodeFloat(temp); + return true; +} +template <> +inline bool WireFormatLite::ReadPrimitive( + io::CodedInputStream* input, double* value) { + uint64 temp; + if (!input->ReadLittleEndian64(&temp)) return false; + *value = DecodeDouble(temp); + return true; +} +template <> +inline bool WireFormatLite::ReadPrimitive( + io::CodedInputStream* input, bool* value) { + uint64 temp; + if (!input->ReadVarint64(&temp)) return false; + *value = temp != 0; + return true; +} +template <> +inline bool WireFormatLite::ReadPrimitive( + io::CodedInputStream* input, int* value) { + uint32 temp; + if (!input->ReadVarint32(&temp)) return false; + *value = static_cast(temp); + return true; +} + +template <> +inline const uint8* +WireFormatLite::ReadPrimitiveFromArray( + const uint8* buffer, uint32* value) { + return io::CodedInputStream::ReadLittleEndian32FromArray(buffer, value); +} +template <> +inline const uint8* +WireFormatLite::ReadPrimitiveFromArray( + const uint8* buffer, uint64* value) { + return io::CodedInputStream::ReadLittleEndian64FromArray(buffer, value); +} +template <> +inline const uint8* +WireFormatLite::ReadPrimitiveFromArray( + const uint8* buffer, int32* value) { + uint32 temp; + buffer = io::CodedInputStream::ReadLittleEndian32FromArray(buffer, &temp); + *value = static_cast(temp); + return buffer; +} +template <> +inline const uint8* +WireFormatLite::ReadPrimitiveFromArray( + const uint8* buffer, int64* value) { + uint64 temp; + buffer = io::CodedInputStream::ReadLittleEndian64FromArray(buffer, &temp); + *value = static_cast(temp); + return buffer; +} +template <> +inline const uint8* +WireFormatLite::ReadPrimitiveFromArray( + const uint8* buffer, float* value) { + uint32 temp; + buffer = io::CodedInputStream::ReadLittleEndian32FromArray(buffer, &temp); + *value = DecodeFloat(temp); + return buffer; +} +template <> +inline const uint8* +WireFormatLite::ReadPrimitiveFromArray( + const uint8* buffer, double* value) { + uint64 temp; + buffer = io::CodedInputStream::ReadLittleEndian64FromArray(buffer, &temp); + *value = DecodeDouble(temp); + return buffer; +} + +template +inline bool WireFormatLite::ReadRepeatedPrimitive( + int, // tag_size, unused. + uint32 tag, io::CodedInputStream* input, RepeatedField* values) { + CType value; + if (!ReadPrimitive(input, &value)) return false; + values->Add(value); + int elements_already_reserved = values->Capacity() - values->size(); + while (elements_already_reserved > 0 && input->ExpectTag(tag)) { + if (!ReadPrimitive(input, &value)) return false; + values->AddAlreadyReserved(value); + elements_already_reserved--; + } + return true; +} + +template +inline bool WireFormatLite::ReadRepeatedFixedSizePrimitive( + int tag_size, uint32 tag, io::CodedInputStream* input, + RepeatedField* values) { + GOOGLE_DCHECK_EQ(UInt32Size(tag), static_cast(tag_size)); + CType value; + if (!ReadPrimitive(input, &value)) return false; + values->Add(value); + + // For fixed size values, repeated values can be read more quickly by + // reading directly from a raw array. + // + // We can get a tight loop by only reading as many elements as can be + // added to the RepeatedField without having to do any resizing. Additionally, + // we only try to read as many elements as are available from the current + // buffer space. Doing so avoids having to perform boundary checks when + // reading the value: the maximum number of elements that can be read is + // known outside of the loop. + const void* void_pointer; + int size; + input->GetDirectBufferPointerInline(&void_pointer, &size); + if (size > 0) { + const uint8* buffer = reinterpret_cast(void_pointer); + // The number of bytes each type occupies on the wire. + const int per_value_size = tag_size + static_cast(sizeof(value)); + + // parentheses around (std::min) prevents macro expansion of min(...) + int elements_available = + (std::min)(values->Capacity() - values->size(), size / per_value_size); + int num_read = 0; + while (num_read < elements_available && + (buffer = io::CodedInputStream::ExpectTagFromArray(buffer, tag)) != + NULL) { + buffer = ReadPrimitiveFromArray(buffer, &value); + values->AddAlreadyReserved(value); + ++num_read; + } + const int read_bytes = num_read * per_value_size; + if (read_bytes > 0) { + input->Skip(read_bytes); + } + } + return true; +} + +// Specializations of ReadRepeatedPrimitive for the fixed size types, which use +// the optimized code path. +#define READ_REPEATED_FIXED_SIZE_PRIMITIVE(CPPTYPE, DECLARED_TYPE) \ + template <> \ + inline bool WireFormatLite::ReadRepeatedPrimitive< \ + CPPTYPE, WireFormatLite::DECLARED_TYPE>( \ + int tag_size, uint32 tag, io::CodedInputStream* input, \ + RepeatedField* values) { \ + return ReadRepeatedFixedSizePrimitive( \ + tag_size, tag, input, values); \ + } + +READ_REPEATED_FIXED_SIZE_PRIMITIVE(uint32, TYPE_FIXED32) +READ_REPEATED_FIXED_SIZE_PRIMITIVE(uint64, TYPE_FIXED64) +READ_REPEATED_FIXED_SIZE_PRIMITIVE(int32, TYPE_SFIXED32) +READ_REPEATED_FIXED_SIZE_PRIMITIVE(int64, TYPE_SFIXED64) +READ_REPEATED_FIXED_SIZE_PRIMITIVE(float, TYPE_FLOAT) +READ_REPEATED_FIXED_SIZE_PRIMITIVE(double, TYPE_DOUBLE) + +#undef READ_REPEATED_FIXED_SIZE_PRIMITIVE + +template +bool WireFormatLite::ReadRepeatedPrimitiveNoInline( + int tag_size, uint32 tag, io::CodedInputStream* input, + RepeatedField* value) { + return ReadRepeatedPrimitive(tag_size, tag, input, + value); +} + +template +inline bool WireFormatLite::ReadPackedPrimitive(io::CodedInputStream* input, + RepeatedField* values) { + int length; + if (!input->ReadVarintSizeAsInt(&length)) return false; + io::CodedInputStream::Limit limit = input->PushLimit(length); + while (input->BytesUntilLimit() > 0) { + CType value; + if (!ReadPrimitive(input, &value)) return false; + values->Add(value); + } + input->PopLimit(limit); + return true; +} + +template +inline bool WireFormatLite::ReadPackedFixedSizePrimitive( + io::CodedInputStream* input, RepeatedField* values) { + int length; + if (!input->ReadVarintSizeAsInt(&length)) return false; + const int old_entries = values->size(); + const int new_entries = length / static_cast(sizeof(CType)); + const int new_bytes = new_entries * static_cast(sizeof(CType)); + if (new_bytes != length) return false; + // We would *like* to pre-allocate the buffer to write into (for + // speed), but *must* avoid performing a very large allocation due + // to a malicious user-supplied "length" above. So we have a fast + // path that pre-allocates when the "length" is less than a bound. + // We determine the bound by calling BytesUntilTotalBytesLimit() and + // BytesUntilLimit(). These return -1 to mean "no limit set". + // There are four cases: + // TotalBytesLimit Limit + // -1 -1 Use slow path. + // -1 >= 0 Use fast path if length <= Limit. + // >= 0 -1 Use slow path. + // >= 0 >= 0 Use fast path if length <= min(both limits). + int64 bytes_limit = input->BytesUntilTotalBytesLimit(); + if (bytes_limit == -1) { + bytes_limit = input->BytesUntilLimit(); + } else { + // parentheses around (std::min) prevents macro expansion of min(...) + bytes_limit = + (std::min)(bytes_limit, static_cast(input->BytesUntilLimit())); + } + if (bytes_limit >= new_bytes) { + // Fast-path that pre-allocates *values to the final size. +#if defined(PROTOBUF_LITTLE_ENDIAN) + values->Resize(old_entries + new_entries, 0); + // values->mutable_data() may change after Resize(), so do this after: + void* dest = reinterpret_cast(values->mutable_data() + old_entries); + if (!input->ReadRaw(dest, new_bytes)) { + values->Truncate(old_entries); + return false; + } +#else + values->Reserve(old_entries + new_entries); + CType value; + for (int i = 0; i < new_entries; ++i) { + if (!ReadPrimitive(input, &value)) return false; + values->AddAlreadyReserved(value); + } +#endif + } else { + // This is the slow-path case where "length" may be too large to + // safely allocate. We read as much as we can into *values + // without pre-allocating "length" bytes. + CType value; + for (int i = 0; i < new_entries; ++i) { + if (!ReadPrimitive(input, &value)) return false; + values->Add(value); + } + } + return true; +} + +// Specializations of ReadPackedPrimitive for the fixed size types, which use +// an optimized code path. +#define READ_REPEATED_PACKED_FIXED_SIZE_PRIMITIVE(CPPTYPE, DECLARED_TYPE) \ + template <> \ + inline bool \ + WireFormatLite::ReadPackedPrimitive( \ + io::CodedInputStream * input, RepeatedField * values) { \ + return ReadPackedFixedSizePrimitive( \ + input, values); \ + } + +READ_REPEATED_PACKED_FIXED_SIZE_PRIMITIVE(uint32, TYPE_FIXED32) +READ_REPEATED_PACKED_FIXED_SIZE_PRIMITIVE(uint64, TYPE_FIXED64) +READ_REPEATED_PACKED_FIXED_SIZE_PRIMITIVE(int32, TYPE_SFIXED32) +READ_REPEATED_PACKED_FIXED_SIZE_PRIMITIVE(int64, TYPE_SFIXED64) +READ_REPEATED_PACKED_FIXED_SIZE_PRIMITIVE(float, TYPE_FLOAT) +READ_REPEATED_PACKED_FIXED_SIZE_PRIMITIVE(double, TYPE_DOUBLE) + +#undef READ_REPEATED_PACKED_FIXED_SIZE_PRIMITIVE + +template +bool WireFormatLite::ReadPackedPrimitiveNoInline(io::CodedInputStream* input, + RepeatedField* values) { + return ReadPackedPrimitive(input, values); +} + + +template +inline bool WireFormatLite::ReadGroup(int field_number, + io::CodedInputStream* input, + MessageType* value) { + if (!input->IncrementRecursionDepth()) return false; + if (!value->MergePartialFromCodedStream(input)) return false; + input->UnsafeDecrementRecursionDepth(); + // Make sure the last thing read was an end tag for this group. + if (!input->LastTagWas(MakeTag(field_number, WIRETYPE_END_GROUP))) { + return false; + } + return true; +} +template +inline bool WireFormatLite::ReadMessage(io::CodedInputStream* input, + MessageType* value) { + int length; + if (!input->ReadVarintSizeAsInt(&length)) return false; + std::pair p = + input->IncrementRecursionDepthAndPushLimit(length); + if (p.second < 0 || !value->MergePartialFromCodedStream(input)) return false; + // Make sure that parsing stopped when the limit was hit, not at an endgroup + // tag. + return input->DecrementRecursionDepthAndPopLimit(p.first); +} + +// =================================================================== + +inline void WireFormatLite::WriteTag(int field_number, WireType type, + io::CodedOutputStream* output) { + output->WriteTag(MakeTag(field_number, type)); +} + +inline void WireFormatLite::WriteInt32NoTag(int32 value, + io::CodedOutputStream* output) { + output->WriteVarint32SignExtended(value); +} +inline void WireFormatLite::WriteInt64NoTag(int64 value, + io::CodedOutputStream* output) { + output->WriteVarint64(static_cast(value)); +} +inline void WireFormatLite::WriteUInt32NoTag(uint32 value, + io::CodedOutputStream* output) { + output->WriteVarint32(value); +} +inline void WireFormatLite::WriteUInt64NoTag(uint64 value, + io::CodedOutputStream* output) { + output->WriteVarint64(value); +} +inline void WireFormatLite::WriteSInt32NoTag(int32 value, + io::CodedOutputStream* output) { + output->WriteVarint32(ZigZagEncode32(value)); +} +inline void WireFormatLite::WriteSInt64NoTag(int64 value, + io::CodedOutputStream* output) { + output->WriteVarint64(ZigZagEncode64(value)); +} +inline void WireFormatLite::WriteFixed32NoTag(uint32 value, + io::CodedOutputStream* output) { + output->WriteLittleEndian32(value); +} +inline void WireFormatLite::WriteFixed64NoTag(uint64 value, + io::CodedOutputStream* output) { + output->WriteLittleEndian64(value); +} +inline void WireFormatLite::WriteSFixed32NoTag(int32 value, + io::CodedOutputStream* output) { + output->WriteLittleEndian32(static_cast(value)); +} +inline void WireFormatLite::WriteSFixed64NoTag(int64 value, + io::CodedOutputStream* output) { + output->WriteLittleEndian64(static_cast(value)); +} +inline void WireFormatLite::WriteFloatNoTag(float value, + io::CodedOutputStream* output) { + output->WriteLittleEndian32(EncodeFloat(value)); +} +inline void WireFormatLite::WriteDoubleNoTag(double value, + io::CodedOutputStream* output) { + output->WriteLittleEndian64(EncodeDouble(value)); +} +inline void WireFormatLite::WriteBoolNoTag(bool value, + io::CodedOutputStream* output) { + output->WriteVarint32(value ? 1 : 0); +} +inline void WireFormatLite::WriteEnumNoTag(int value, + io::CodedOutputStream* output) { + output->WriteVarint32SignExtended(value); +} + +// See comment on ReadGroupNoVirtual to understand the need for this template +// parameter name. +template +inline void WireFormatLite::WriteGroupNoVirtual( + int field_number, const MessageType_WorkAroundCppLookupDefect& value, + io::CodedOutputStream* output) { + WriteTag(field_number, WIRETYPE_START_GROUP, output); + value.MessageType_WorkAroundCppLookupDefect::SerializeWithCachedSizes(output); + WriteTag(field_number, WIRETYPE_END_GROUP, output); +} +template +inline void WireFormatLite::WriteMessageNoVirtual( + int field_number, const MessageType_WorkAroundCppLookupDefect& value, + io::CodedOutputStream* output) { + WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output); + output->WriteVarint32( + value.MessageType_WorkAroundCppLookupDefect::GetCachedSize()); + value.MessageType_WorkAroundCppLookupDefect::SerializeWithCachedSizes(output); +} + +// =================================================================== + +inline uint8* WireFormatLite::WriteTagToArray(int field_number, WireType type, + uint8* target) { + return io::CodedOutputStream::WriteTagToArray(MakeTag(field_number, type), + target); +} + +inline uint8* WireFormatLite::WriteInt32NoTagToArray(int32 value, + uint8* target) { + return io::CodedOutputStream::WriteVarint32SignExtendedToArray(value, target); +} +inline uint8* WireFormatLite::WriteInt64NoTagToArray(int64 value, + uint8* target) { + return io::CodedOutputStream::WriteVarint64ToArray(static_cast(value), + target); +} +inline uint8* WireFormatLite::WriteUInt32NoTagToArray(uint32 value, + uint8* target) { + return io::CodedOutputStream::WriteVarint32ToArray(value, target); +} +inline uint8* WireFormatLite::WriteUInt64NoTagToArray(uint64 value, + uint8* target) { + return io::CodedOutputStream::WriteVarint64ToArray(value, target); +} +inline uint8* WireFormatLite::WriteSInt32NoTagToArray(int32 value, + uint8* target) { + return io::CodedOutputStream::WriteVarint32ToArray(ZigZagEncode32(value), + target); +} +inline uint8* WireFormatLite::WriteSInt64NoTagToArray(int64 value, + uint8* target) { + return io::CodedOutputStream::WriteVarint64ToArray(ZigZagEncode64(value), + target); +} +inline uint8* WireFormatLite::WriteFixed32NoTagToArray(uint32 value, + uint8* target) { + return io::CodedOutputStream::WriteLittleEndian32ToArray(value, target); +} +inline uint8* WireFormatLite::WriteFixed64NoTagToArray(uint64 value, + uint8* target) { + return io::CodedOutputStream::WriteLittleEndian64ToArray(value, target); +} +inline uint8* WireFormatLite::WriteSFixed32NoTagToArray(int32 value, + uint8* target) { + return io::CodedOutputStream::WriteLittleEndian32ToArray( + static_cast(value), target); +} +inline uint8* WireFormatLite::WriteSFixed64NoTagToArray(int64 value, + uint8* target) { + return io::CodedOutputStream::WriteLittleEndian64ToArray( + static_cast(value), target); +} +inline uint8* WireFormatLite::WriteFloatNoTagToArray(float value, + uint8* target) { + return io::CodedOutputStream::WriteLittleEndian32ToArray(EncodeFloat(value), + target); +} +inline uint8* WireFormatLite::WriteDoubleNoTagToArray(double value, + uint8* target) { + return io::CodedOutputStream::WriteLittleEndian64ToArray(EncodeDouble(value), + target); +} +inline uint8* WireFormatLite::WriteBoolNoTagToArray(bool value, uint8* target) { + return io::CodedOutputStream::WriteVarint32ToArray(value ? 1 : 0, target); +} +inline uint8* WireFormatLite::WriteEnumNoTagToArray(int value, uint8* target) { + return io::CodedOutputStream::WriteVarint32SignExtendedToArray(value, target); +} + +template +inline uint8* WireFormatLite::WritePrimitiveNoTagToArray( + const RepeatedField& value, uint8* (*Writer)(T, uint8*), uint8* target) { + const int n = value.size(); + GOOGLE_DCHECK_GT(n, 0); + + const T* ii = value.data(); + int i = 0; + do { + target = Writer(ii[i], target); + } while (++i < n); + + return target; +} + +template +inline uint8* WireFormatLite::WriteFixedNoTagToArray( + const RepeatedField& value, uint8* (*Writer)(T, uint8*), uint8* target) { +#if defined(PROTOBUF_LITTLE_ENDIAN) + (void)Writer; + + const int n = value.size(); + GOOGLE_DCHECK_GT(n, 0); + + const T* ii = value.data(); + const int bytes = n * static_cast(sizeof(ii[0])); + memcpy(target, ii, static_cast(bytes)); + return target + bytes; +#else + return WritePrimitiveNoTagToArray(value, Writer, target); +#endif +} + +inline uint8* WireFormatLite::WriteInt32NoTagToArray( + const RepeatedField& value, uint8* target) { + return WritePrimitiveNoTagToArray(value, WriteInt32NoTagToArray, target); +} +inline uint8* WireFormatLite::WriteInt64NoTagToArray( + const RepeatedField& value, uint8* target) { + return WritePrimitiveNoTagToArray(value, WriteInt64NoTagToArray, target); +} +inline uint8* WireFormatLite::WriteUInt32NoTagToArray( + const RepeatedField& value, uint8* target) { + return WritePrimitiveNoTagToArray(value, WriteUInt32NoTagToArray, target); +} +inline uint8* WireFormatLite::WriteUInt64NoTagToArray( + const RepeatedField& value, uint8* target) { + return WritePrimitiveNoTagToArray(value, WriteUInt64NoTagToArray, target); +} +inline uint8* WireFormatLite::WriteSInt32NoTagToArray( + const RepeatedField& value, uint8* target) { + return WritePrimitiveNoTagToArray(value, WriteSInt32NoTagToArray, target); +} +inline uint8* WireFormatLite::WriteSInt64NoTagToArray( + const RepeatedField& value, uint8* target) { + return WritePrimitiveNoTagToArray(value, WriteSInt64NoTagToArray, target); +} +inline uint8* WireFormatLite::WriteFixed32NoTagToArray( + const RepeatedField& value, uint8* target) { + return WriteFixedNoTagToArray(value, WriteFixed32NoTagToArray, target); +} +inline uint8* WireFormatLite::WriteFixed64NoTagToArray( + const RepeatedField& value, uint8* target) { + return WriteFixedNoTagToArray(value, WriteFixed64NoTagToArray, target); +} +inline uint8* WireFormatLite::WriteSFixed32NoTagToArray( + const RepeatedField& value, uint8* target) { + return WriteFixedNoTagToArray(value, WriteSFixed32NoTagToArray, target); +} +inline uint8* WireFormatLite::WriteSFixed64NoTagToArray( + const RepeatedField& value, uint8* target) { + return WriteFixedNoTagToArray(value, WriteSFixed64NoTagToArray, target); +} +inline uint8* WireFormatLite::WriteFloatNoTagToArray( + const RepeatedField& value, uint8* target) { + return WriteFixedNoTagToArray(value, WriteFloatNoTagToArray, target); +} +inline uint8* WireFormatLite::WriteDoubleNoTagToArray( + const RepeatedField& value, uint8* target) { + return WriteFixedNoTagToArray(value, WriteDoubleNoTagToArray, target); +} +inline uint8* WireFormatLite::WriteBoolNoTagToArray( + const RepeatedField& value, uint8* target) { + return WritePrimitiveNoTagToArray(value, WriteBoolNoTagToArray, target); +} +inline uint8* WireFormatLite::WriteEnumNoTagToArray( + const RepeatedField& value, uint8* target) { + return WritePrimitiveNoTagToArray(value, WriteEnumNoTagToArray, target); +} + +inline uint8* WireFormatLite::WriteInt32ToArray(int field_number, int32 value, + uint8* target) { + target = WriteTagToArray(field_number, WIRETYPE_VARINT, target); + return WriteInt32NoTagToArray(value, target); +} +inline uint8* WireFormatLite::WriteInt64ToArray(int field_number, int64 value, + uint8* target) { + target = WriteTagToArray(field_number, WIRETYPE_VARINT, target); + return WriteInt64NoTagToArray(value, target); +} +inline uint8* WireFormatLite::WriteUInt32ToArray(int field_number, uint32 value, + uint8* target) { + target = WriteTagToArray(field_number, WIRETYPE_VARINT, target); + return WriteUInt32NoTagToArray(value, target); +} +inline uint8* WireFormatLite::WriteUInt64ToArray(int field_number, uint64 value, + uint8* target) { + target = WriteTagToArray(field_number, WIRETYPE_VARINT, target); + return WriteUInt64NoTagToArray(value, target); +} +inline uint8* WireFormatLite::WriteSInt32ToArray(int field_number, int32 value, + uint8* target) { + target = WriteTagToArray(field_number, WIRETYPE_VARINT, target); + return WriteSInt32NoTagToArray(value, target); +} +inline uint8* WireFormatLite::WriteSInt64ToArray(int field_number, int64 value, + uint8* target) { + target = WriteTagToArray(field_number, WIRETYPE_VARINT, target); + return WriteSInt64NoTagToArray(value, target); +} +inline uint8* WireFormatLite::WriteFixed32ToArray(int field_number, + uint32 value, uint8* target) { + target = WriteTagToArray(field_number, WIRETYPE_FIXED32, target); + return WriteFixed32NoTagToArray(value, target); +} +inline uint8* WireFormatLite::WriteFixed64ToArray(int field_number, + uint64 value, uint8* target) { + target = WriteTagToArray(field_number, WIRETYPE_FIXED64, target); + return WriteFixed64NoTagToArray(value, target); +} +inline uint8* WireFormatLite::WriteSFixed32ToArray(int field_number, + int32 value, uint8* target) { + target = WriteTagToArray(field_number, WIRETYPE_FIXED32, target); + return WriteSFixed32NoTagToArray(value, target); +} +inline uint8* WireFormatLite::WriteSFixed64ToArray(int field_number, + int64 value, uint8* target) { + target = WriteTagToArray(field_number, WIRETYPE_FIXED64, target); + return WriteSFixed64NoTagToArray(value, target); +} +inline uint8* WireFormatLite::WriteFloatToArray(int field_number, float value, + uint8* target) { + target = WriteTagToArray(field_number, WIRETYPE_FIXED32, target); + return WriteFloatNoTagToArray(value, target); +} +inline uint8* WireFormatLite::WriteDoubleToArray(int field_number, double value, + uint8* target) { + target = WriteTagToArray(field_number, WIRETYPE_FIXED64, target); + return WriteDoubleNoTagToArray(value, target); +} +inline uint8* WireFormatLite::WriteBoolToArray(int field_number, bool value, + uint8* target) { + target = WriteTagToArray(field_number, WIRETYPE_VARINT, target); + return WriteBoolNoTagToArray(value, target); +} +inline uint8* WireFormatLite::WriteEnumToArray(int field_number, int value, + uint8* target) { + target = WriteTagToArray(field_number, WIRETYPE_VARINT, target); + return WriteEnumNoTagToArray(value, target); +} + +template +inline uint8* WireFormatLite::WritePrimitiveToArray( + int field_number, const RepeatedField& value, + uint8* (*Writer)(int, T, uint8*), uint8* target) { + const int n = value.size(); + if (n == 0) { + return target; + } + + const T* ii = value.data(); + int i = 0; + do { + target = Writer(field_number, ii[i], target); + } while (++i < n); + + return target; +} + +inline uint8* WireFormatLite::WriteInt32ToArray( + int field_number, const RepeatedField& value, uint8* target) { + return WritePrimitiveToArray(field_number, value, WriteInt32ToArray, target); +} +inline uint8* WireFormatLite::WriteInt64ToArray( + int field_number, const RepeatedField& value, uint8* target) { + return WritePrimitiveToArray(field_number, value, WriteInt64ToArray, target); +} +inline uint8* WireFormatLite::WriteUInt32ToArray( + int field_number, const RepeatedField& value, uint8* target) { + return WritePrimitiveToArray(field_number, value, WriteUInt32ToArray, target); +} +inline uint8* WireFormatLite::WriteUInt64ToArray( + int field_number, const RepeatedField& value, uint8* target) { + return WritePrimitiveToArray(field_number, value, WriteUInt64ToArray, target); +} +inline uint8* WireFormatLite::WriteSInt32ToArray( + int field_number, const RepeatedField& value, uint8* target) { + return WritePrimitiveToArray(field_number, value, WriteSInt32ToArray, target); +} +inline uint8* WireFormatLite::WriteSInt64ToArray( + int field_number, const RepeatedField& value, uint8* target) { + return WritePrimitiveToArray(field_number, value, WriteSInt64ToArray, target); +} +inline uint8* WireFormatLite::WriteFixed32ToArray( + int field_number, const RepeatedField& value, uint8* target) { + return WritePrimitiveToArray(field_number, value, WriteFixed32ToArray, + target); +} +inline uint8* WireFormatLite::WriteFixed64ToArray( + int field_number, const RepeatedField& value, uint8* target) { + return WritePrimitiveToArray(field_number, value, WriteFixed64ToArray, + target); +} +inline uint8* WireFormatLite::WriteSFixed32ToArray( + int field_number, const RepeatedField& value, uint8* target) { + return WritePrimitiveToArray(field_number, value, WriteSFixed32ToArray, + target); +} +inline uint8* WireFormatLite::WriteSFixed64ToArray( + int field_number, const RepeatedField& value, uint8* target) { + return WritePrimitiveToArray(field_number, value, WriteSFixed64ToArray, + target); +} +inline uint8* WireFormatLite::WriteFloatToArray( + int field_number, const RepeatedField& value, uint8* target) { + return WritePrimitiveToArray(field_number, value, WriteFloatToArray, target); +} +inline uint8* WireFormatLite::WriteDoubleToArray( + int field_number, const RepeatedField& value, uint8* target) { + return WritePrimitiveToArray(field_number, value, WriteDoubleToArray, target); +} +inline uint8* WireFormatLite::WriteBoolToArray(int field_number, + const RepeatedField& value, + uint8* target) { + return WritePrimitiveToArray(field_number, value, WriteBoolToArray, target); +} +inline uint8* WireFormatLite::WriteEnumToArray(int field_number, + const RepeatedField& value, + uint8* target) { + return WritePrimitiveToArray(field_number, value, WriteEnumToArray, target); +} +inline uint8* WireFormatLite::WriteStringToArray(int field_number, + const std::string& value, + uint8* target) { + // String is for UTF-8 text only + // WARNING: In wire_format.cc, both strings and bytes are handled by + // WriteString() to avoid code duplication. If the implementations become + // different, you will need to update that usage. + target = WriteTagToArray(field_number, WIRETYPE_LENGTH_DELIMITED, target); + return io::CodedOutputStream::WriteStringWithSizeToArray(value, target); +} +inline uint8* WireFormatLite::WriteBytesToArray(int field_number, + const std::string& value, + uint8* target) { + target = WriteTagToArray(field_number, WIRETYPE_LENGTH_DELIMITED, target); + return io::CodedOutputStream::WriteStringWithSizeToArray(value, target); +} + + +template +inline uint8* WireFormatLite::InternalWriteGroup( + int field_number, const MessageType& value, uint8* target, + io::EpsCopyOutputStream* stream) { + target = WriteTagToArray(field_number, WIRETYPE_START_GROUP, target); + target = value._InternalSerialize(target, stream); + target = stream->EnsureSpace(target); + return WriteTagToArray(field_number, WIRETYPE_END_GROUP, target); +} +template +inline uint8* WireFormatLite::InternalWriteMessage( + int field_number, const MessageType& value, uint8* target, + io::EpsCopyOutputStream* stream) { + target = WriteTagToArray(field_number, WIRETYPE_LENGTH_DELIMITED, target); + target = io::CodedOutputStream::WriteVarint32ToArray( + static_cast(value.GetCachedSize()), target); + return value._InternalSerialize(target, stream); +} + +// See comment on ReadGroupNoVirtual to understand the need for this template +// parameter name. +template +inline uint8* WireFormatLite::InternalWriteGroupNoVirtualToArray( + int field_number, const MessageType_WorkAroundCppLookupDefect& value, + uint8* target) { + target = WriteTagToArray(field_number, WIRETYPE_START_GROUP, target); + target = value.MessageType_WorkAroundCppLookupDefect:: + SerializeWithCachedSizesToArray(target); + return WriteTagToArray(field_number, WIRETYPE_END_GROUP, target); +} +template +inline uint8* WireFormatLite::InternalWriteMessageNoVirtualToArray( + int field_number, const MessageType_WorkAroundCppLookupDefect& value, + uint8* target) { + target = WriteTagToArray(field_number, WIRETYPE_LENGTH_DELIMITED, target); + target = io::CodedOutputStream::WriteVarint32ToArray( + static_cast( + value.MessageType_WorkAroundCppLookupDefect::GetCachedSize()), + target); + return value + .MessageType_WorkAroundCppLookupDefect::SerializeWithCachedSizesToArray( + target); +} + +// =================================================================== + +inline size_t WireFormatLite::Int32Size(int32 value) { + return io::CodedOutputStream::VarintSize32SignExtended(value); +} +inline size_t WireFormatLite::Int64Size(int64 value) { + return io::CodedOutputStream::VarintSize64(static_cast(value)); +} +inline size_t WireFormatLite::UInt32Size(uint32 value) { + return io::CodedOutputStream::VarintSize32(value); +} +inline size_t WireFormatLite::UInt64Size(uint64 value) { + return io::CodedOutputStream::VarintSize64(value); +} +inline size_t WireFormatLite::SInt32Size(int32 value) { + return io::CodedOutputStream::VarintSize32(ZigZagEncode32(value)); +} +inline size_t WireFormatLite::SInt64Size(int64 value) { + return io::CodedOutputStream::VarintSize64(ZigZagEncode64(value)); +} +inline size_t WireFormatLite::EnumSize(int value) { + return io::CodedOutputStream::VarintSize32SignExtended(value); +} + +inline size_t WireFormatLite::StringSize(const std::string& value) { + return LengthDelimitedSize(value.size()); +} +inline size_t WireFormatLite::BytesSize(const std::string& value) { + return LengthDelimitedSize(value.size()); +} + + +template +inline size_t WireFormatLite::GroupSize(const MessageType& value) { + return value.ByteSizeLong(); +} +template +inline size_t WireFormatLite::MessageSize(const MessageType& value) { + return LengthDelimitedSize(value.ByteSizeLong()); +} + +// See comment on ReadGroupNoVirtual to understand the need for this template +// parameter name. +template +inline size_t WireFormatLite::GroupSizeNoVirtual( + const MessageType_WorkAroundCppLookupDefect& value) { + return value.MessageType_WorkAroundCppLookupDefect::ByteSizeLong(); +} +template +inline size_t WireFormatLite::MessageSizeNoVirtual( + const MessageType_WorkAroundCppLookupDefect& value) { + return LengthDelimitedSize( + value.MessageType_WorkAroundCppLookupDefect::ByteSizeLong()); +} + +inline size_t WireFormatLite::LengthDelimitedSize(size_t length) { + // The static_cast here prevents an error in certain compiler configurations + // but is not technically correct--if length is too large to fit in a uint32 + // then it will be silently truncated. We will need to fix this if we ever + // decide to start supporting serialized messages greater than 2 GiB in size. + return length + + io::CodedOutputStream::VarintSize32(static_cast(length)); +} + +template +bool ParseMessageSetItemImpl(io::CodedInputStream* input, MS ms) { + // This method parses a group which should contain two fields: + // required int32 type_id = 2; + // required data message = 3; + + uint32 last_type_id = 0; + + // If we see message data before the type_id, we'll append it to this so + // we can parse it later. + std::string message_data; + + while (true) { + const uint32 tag = input->ReadTagNoLastTag(); + if (tag == 0) return false; + + switch (tag) { + case WireFormatLite::kMessageSetTypeIdTag: { + uint32 type_id; + if (!input->ReadVarint32(&type_id)) return false; + last_type_id = type_id; + + if (!message_data.empty()) { + // We saw some message data before the type_id. Have to parse it + // now. + io::CodedInputStream sub_input( + reinterpret_cast(message_data.data()), + static_cast(message_data.size())); + sub_input.SetRecursionLimit(input->RecursionBudget()); + if (!ms.ParseField(last_type_id, &sub_input)) { + return false; + } + message_data.clear(); + } + + break; + } + + case WireFormatLite::kMessageSetMessageTag: { + if (last_type_id == 0) { + // We haven't seen a type_id yet. Append this data to message_data. + uint32 length; + if (!input->ReadVarint32(&length)) return false; + if (static_cast(length) < 0) return false; + uint32 size = static_cast( + length + io::CodedOutputStream::VarintSize32(length)); + message_data.resize(size); + auto ptr = reinterpret_cast(&message_data[0]); + ptr = io::CodedOutputStream::WriteVarint32ToArray(length, ptr); + if (!input->ReadRaw(ptr, length)) return false; + } else { + // Already saw type_id, so we can parse this directly. + if (!ms.ParseField(last_type_id, input)) { + return false; + } + } + + break; + } + + case WireFormatLite::kMessageSetItemEndTag: { + return true; + } + + default: { + if (!ms.SkipField(tag, input)) return false; + } + } + } +} + } // namespace internal } // namespace protobuf - } // namespace google + +#include + #endif // GOOGLE_PROTOBUF_WIRE_FORMAT_LITE_H__ diff --git a/third_party/protobuf-lite/google/protobuf/wire_format_lite_inl.h b/third_party/protobuf-lite/google/protobuf/wire_format_lite_inl.h deleted file mode 100644 index 6cd2c2fb..00000000 --- a/third_party/protobuf-lite/google/protobuf/wire_format_lite_inl.h +++ /dev/null @@ -1,996 +0,0 @@ -// Protocol Buffers - Google's data interchange format -// Copyright 2008 Google Inc. All rights reserved. -// https://developers.google.com/protocol-buffers/ -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Author: kenton@google.com (Kenton Varda) -// wink@google.com (Wink Saville) (refactored from wire_format.h) -// Based on original Protocol Buffers design by -// Sanjay Ghemawat, Jeff Dean, and others. - -#ifndef GOOGLE_PROTOBUF_WIRE_FORMAT_LITE_INL_H__ -#define GOOGLE_PROTOBUF_WIRE_FORMAT_LITE_INL_H__ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -namespace google { -namespace protobuf { -namespace internal { - -// Implementation details of ReadPrimitive. - -template <> -inline bool WireFormatLite::ReadPrimitive( - io::CodedInputStream* input, - int32* value) { - uint32 temp; - if (!input->ReadVarint32(&temp)) return false; - *value = static_cast(temp); - return true; -} -template <> -inline bool WireFormatLite::ReadPrimitive( - io::CodedInputStream* input, - int64* value) { - uint64 temp; - if (!input->ReadVarint64(&temp)) return false; - *value = static_cast(temp); - return true; -} -template <> -inline bool WireFormatLite::ReadPrimitive( - io::CodedInputStream* input, - uint32* value) { - return input->ReadVarint32(value); -} -template <> -inline bool WireFormatLite::ReadPrimitive( - io::CodedInputStream* input, - uint64* value) { - return input->ReadVarint64(value); -} -template <> -inline bool WireFormatLite::ReadPrimitive( - io::CodedInputStream* input, - int32* value) { - uint32 temp; - if (!input->ReadVarint32(&temp)) return false; - *value = ZigZagDecode32(temp); - return true; -} -template <> -inline bool WireFormatLite::ReadPrimitive( - io::CodedInputStream* input, - int64* value) { - uint64 temp; - if (!input->ReadVarint64(&temp)) return false; - *value = ZigZagDecode64(temp); - return true; -} -template <> -inline bool WireFormatLite::ReadPrimitive( - io::CodedInputStream* input, - uint32* value) { - return input->ReadLittleEndian32(value); -} -template <> -inline bool WireFormatLite::ReadPrimitive( - io::CodedInputStream* input, - uint64* value) { - return input->ReadLittleEndian64(value); -} -template <> -inline bool WireFormatLite::ReadPrimitive( - io::CodedInputStream* input, - int32* value) { - uint32 temp; - if (!input->ReadLittleEndian32(&temp)) return false; - *value = static_cast(temp); - return true; -} -template <> -inline bool WireFormatLite::ReadPrimitive( - io::CodedInputStream* input, - int64* value) { - uint64 temp; - if (!input->ReadLittleEndian64(&temp)) return false; - *value = static_cast(temp); - return true; -} -template <> -inline bool WireFormatLite::ReadPrimitive( - io::CodedInputStream* input, - float* value) { - uint32 temp; - if (!input->ReadLittleEndian32(&temp)) return false; - *value = DecodeFloat(temp); - return true; -} -template <> -inline bool WireFormatLite::ReadPrimitive( - io::CodedInputStream* input, - double* value) { - uint64 temp; - if (!input->ReadLittleEndian64(&temp)) return false; - *value = DecodeDouble(temp); - return true; -} -template <> -inline bool WireFormatLite::ReadPrimitive( - io::CodedInputStream* input, - bool* value) { - uint64 temp; - if (!input->ReadVarint64(&temp)) return false; - *value = temp != 0; - return true; -} -template <> -inline bool WireFormatLite::ReadPrimitive( - io::CodedInputStream* input, - int* value) { - uint32 temp; - if (!input->ReadVarint32(&temp)) return false; - *value = static_cast(temp); - return true; -} - -template <> -inline const uint8* WireFormatLite::ReadPrimitiveFromArray< - uint32, WireFormatLite::TYPE_FIXED32>( - const uint8* buffer, - uint32* value) { - return io::CodedInputStream::ReadLittleEndian32FromArray(buffer, value); -} -template <> -inline const uint8* WireFormatLite::ReadPrimitiveFromArray< - uint64, WireFormatLite::TYPE_FIXED64>( - const uint8* buffer, - uint64* value) { - return io::CodedInputStream::ReadLittleEndian64FromArray(buffer, value); -} -template <> -inline const uint8* WireFormatLite::ReadPrimitiveFromArray< - int32, WireFormatLite::TYPE_SFIXED32>( - const uint8* buffer, - int32* value) { - uint32 temp; - buffer = io::CodedInputStream::ReadLittleEndian32FromArray(buffer, &temp); - *value = static_cast(temp); - return buffer; -} -template <> -inline const uint8* WireFormatLite::ReadPrimitiveFromArray< - int64, WireFormatLite::TYPE_SFIXED64>( - const uint8* buffer, - int64* value) { - uint64 temp; - buffer = io::CodedInputStream::ReadLittleEndian64FromArray(buffer, &temp); - *value = static_cast(temp); - return buffer; -} -template <> -inline const uint8* WireFormatLite::ReadPrimitiveFromArray< - float, WireFormatLite::TYPE_FLOAT>( - const uint8* buffer, - float* value) { - uint32 temp; - buffer = io::CodedInputStream::ReadLittleEndian32FromArray(buffer, &temp); - *value = DecodeFloat(temp); - return buffer; -} -template <> -inline const uint8* WireFormatLite::ReadPrimitiveFromArray< - double, WireFormatLite::TYPE_DOUBLE>( - const uint8* buffer, - double* value) { - uint64 temp; - buffer = io::CodedInputStream::ReadLittleEndian64FromArray(buffer, &temp); - *value = DecodeDouble(temp); - return buffer; -} - -template -inline bool WireFormatLite::ReadRepeatedPrimitive( - int, // tag_size, unused. - uint32 tag, - io::CodedInputStream* input, - RepeatedField* values) { - CType value; - if (!ReadPrimitive(input, &value)) return false; - values->Add(value); - int elements_already_reserved = values->Capacity() - values->size(); - while (elements_already_reserved > 0 && input->ExpectTag(tag)) { - if (!ReadPrimitive(input, &value)) return false; - values->AddAlreadyReserved(value); - elements_already_reserved--; - } - return true; -} - -template -inline bool WireFormatLite::ReadRepeatedFixedSizePrimitive( - int tag_size, - uint32 tag, - io::CodedInputStream* input, - RepeatedField* values) { - GOOGLE_DCHECK_EQ(UInt32Size(tag), static_cast(tag_size)); - CType value; - if (!ReadPrimitive(input, &value)) - return false; - values->Add(value); - - // For fixed size values, repeated values can be read more quickly by - // reading directly from a raw array. - // - // We can get a tight loop by only reading as many elements as can be - // added to the RepeatedField without having to do any resizing. Additionally, - // we only try to read as many elements as are available from the current - // buffer space. Doing so avoids having to perform boundary checks when - // reading the value: the maximum number of elements that can be read is - // known outside of the loop. - const void* void_pointer; - int size; - input->GetDirectBufferPointerInline(&void_pointer, &size); - if (size > 0) { - const uint8* buffer = reinterpret_cast(void_pointer); - // The number of bytes each type occupies on the wire. - const int per_value_size = tag_size + static_cast(sizeof(value)); - - // parentheses around (std::min) prevents macro expansion of min(...) - int elements_available = - (std::min)(values->Capacity() - values->size(), size / per_value_size); - int num_read = 0; - while (num_read < elements_available && - (buffer = io::CodedInputStream::ExpectTagFromArray( - buffer, tag)) != NULL) { - buffer = ReadPrimitiveFromArray(buffer, &value); - values->AddAlreadyReserved(value); - ++num_read; - } - const int read_bytes = num_read * per_value_size; - if (read_bytes > 0) { - input->Skip(read_bytes); - } - } - return true; -} - -// Specializations of ReadRepeatedPrimitive for the fixed size types, which use -// the optimized code path. -#define READ_REPEATED_FIXED_SIZE_PRIMITIVE(CPPTYPE, DECLARED_TYPE) \ -template <> \ -inline bool WireFormatLite::ReadRepeatedPrimitive< \ - CPPTYPE, WireFormatLite::DECLARED_TYPE>( \ - int tag_size, \ - uint32 tag, \ - io::CodedInputStream* input, \ - RepeatedField* values) { \ - return ReadRepeatedFixedSizePrimitive< \ - CPPTYPE, WireFormatLite::DECLARED_TYPE>( \ - tag_size, tag, input, values); \ -} - -READ_REPEATED_FIXED_SIZE_PRIMITIVE(uint32, TYPE_FIXED32) -READ_REPEATED_FIXED_SIZE_PRIMITIVE(uint64, TYPE_FIXED64) -READ_REPEATED_FIXED_SIZE_PRIMITIVE(int32, TYPE_SFIXED32) -READ_REPEATED_FIXED_SIZE_PRIMITIVE(int64, TYPE_SFIXED64) -READ_REPEATED_FIXED_SIZE_PRIMITIVE(float, TYPE_FLOAT) -READ_REPEATED_FIXED_SIZE_PRIMITIVE(double, TYPE_DOUBLE) - -#undef READ_REPEATED_FIXED_SIZE_PRIMITIVE - -template -bool WireFormatLite::ReadRepeatedPrimitiveNoInline( - int tag_size, - uint32 tag, - io::CodedInputStream* input, - RepeatedField* value) { - return ReadRepeatedPrimitive( - tag_size, tag, input, value); -} - -template -inline bool WireFormatLite::ReadPackedPrimitive(io::CodedInputStream* input, - RepeatedField* values) { - int length; - if (!input->ReadVarintSizeAsInt(&length)) return false; - io::CodedInputStream::Limit limit = input->PushLimit(length); - while (input->BytesUntilLimit() > 0) { - CType value; - if (!ReadPrimitive(input, &value)) return false; - values->Add(value); - } - input->PopLimit(limit); - return true; -} - -template -inline bool WireFormatLite::ReadPackedFixedSizePrimitive( - io::CodedInputStream* input, RepeatedField* values) { - int length; - if (!input->ReadVarintSizeAsInt(&length)) return false; - const int old_entries = values->size(); - const int new_entries = length / static_cast(sizeof(CType)); - const int new_bytes = new_entries * static_cast(sizeof(CType)); - if (new_bytes != length) return false; - // We would *like* to pre-allocate the buffer to write into (for - // speed), but *must* avoid performing a very large allocation due - // to a malicious user-supplied "length" above. So we have a fast - // path that pre-allocates when the "length" is less than a bound. - // We determine the bound by calling BytesUntilTotalBytesLimit() and - // BytesUntilLimit(). These return -1 to mean "no limit set". - // There are four cases: - // TotalBytesLimit Limit - // -1 -1 Use slow path. - // -1 >= 0 Use fast path if length <= Limit. - // >= 0 -1 Use slow path. - // >= 0 >= 0 Use fast path if length <= min(both limits). - int64 bytes_limit = input->BytesUntilTotalBytesLimit(); - if (bytes_limit == -1) { - bytes_limit = input->BytesUntilLimit(); - } else { - // parentheses around (std::min) prevents macro expansion of min(...) - bytes_limit = - (std::min)(bytes_limit, static_cast(input->BytesUntilLimit())); - } - if (bytes_limit >= new_bytes) { - // Fast-path that pre-allocates *values to the final size. -#if defined(PROTOBUF_LITTLE_ENDIAN) - values->Resize(old_entries + new_entries, 0); - // values->mutable_data() may change after Resize(), so do this after: - void* dest = reinterpret_cast(values->mutable_data() + old_entries); - if (!input->ReadRaw(dest, new_bytes)) { - values->Truncate(old_entries); - return false; - } -#else - values->Reserve(old_entries + new_entries); - CType value; - for (int i = 0; i < new_entries; ++i) { - if (!ReadPrimitive(input, &value)) return false; - values->AddAlreadyReserved(value); - } -#endif - } else { - // This is the slow-path case where "length" may be too large to - // safely allocate. We read as much as we can into *values - // without pre-allocating "length" bytes. - CType value; - for (int i = 0; i < new_entries; ++i) { - if (!ReadPrimitive(input, &value)) return false; - values->Add(value); - } - } - return true; -} - -// Specializations of ReadPackedPrimitive for the fixed size types, which use -// an optimized code path. -#define READ_REPEATED_PACKED_FIXED_SIZE_PRIMITIVE(CPPTYPE, DECLARED_TYPE) \ -template <> \ -inline bool WireFormatLite::ReadPackedPrimitive< \ - CPPTYPE, WireFormatLite::DECLARED_TYPE>( \ - io::CodedInputStream* input, \ - RepeatedField* values) { \ - return ReadPackedFixedSizePrimitive< \ - CPPTYPE, WireFormatLite::DECLARED_TYPE>(input, values); \ -} - -READ_REPEATED_PACKED_FIXED_SIZE_PRIMITIVE(uint32, TYPE_FIXED32) -READ_REPEATED_PACKED_FIXED_SIZE_PRIMITIVE(uint64, TYPE_FIXED64) -READ_REPEATED_PACKED_FIXED_SIZE_PRIMITIVE(int32, TYPE_SFIXED32) -READ_REPEATED_PACKED_FIXED_SIZE_PRIMITIVE(int64, TYPE_SFIXED64) -READ_REPEATED_PACKED_FIXED_SIZE_PRIMITIVE(float, TYPE_FLOAT) -READ_REPEATED_PACKED_FIXED_SIZE_PRIMITIVE(double, TYPE_DOUBLE) - -#undef READ_REPEATED_PACKED_FIXED_SIZE_PRIMITIVE - -template -bool WireFormatLite::ReadPackedPrimitiveNoInline(io::CodedInputStream* input, - RepeatedField* values) { - return ReadPackedPrimitive(input, values); -} - - -template -inline bool WireFormatLite::ReadGroup( - int field_number, io::CodedInputStream* input, - MessageType* value) { - if (!input->IncrementRecursionDepth()) return false; - if (!value->MergePartialFromCodedStream(input)) return false; - input->UnsafeDecrementRecursionDepth(); - // Make sure the last thing read was an end tag for this group. - if (!input->LastTagWas(MakeTag(field_number, WIRETYPE_END_GROUP))) { - return false; - } - return true; -} -template -inline bool WireFormatLite::ReadMessage( - io::CodedInputStream* input, MessageType* value) { - int length; - if (!input->ReadVarintSizeAsInt(&length)) return false; - std::pair p = - input->IncrementRecursionDepthAndPushLimit(length); - if (p.second < 0 || !value->MergePartialFromCodedStream(input)) return false; - // Make sure that parsing stopped when the limit was hit, not at an endgroup - // tag. - return input->DecrementRecursionDepthAndPopLimit(p.first); -} - -// =================================================================== - -inline void WireFormatLite::WriteTag(int field_number, WireType type, - io::CodedOutputStream* output) { - output->WriteTag(MakeTag(field_number, type)); -} - -inline void WireFormatLite::WriteInt32NoTag(int32 value, - io::CodedOutputStream* output) { - output->WriteVarint32SignExtended(value); -} -inline void WireFormatLite::WriteInt64NoTag(int64 value, - io::CodedOutputStream* output) { - output->WriteVarint64(static_cast(value)); -} -inline void WireFormatLite::WriteUInt32NoTag(uint32 value, - io::CodedOutputStream* output) { - output->WriteVarint32(value); -} -inline void WireFormatLite::WriteUInt64NoTag(uint64 value, - io::CodedOutputStream* output) { - output->WriteVarint64(value); -} -inline void WireFormatLite::WriteSInt32NoTag(int32 value, - io::CodedOutputStream* output) { - output->WriteVarint32(ZigZagEncode32(value)); -} -inline void WireFormatLite::WriteSInt64NoTag(int64 value, - io::CodedOutputStream* output) { - output->WriteVarint64(ZigZagEncode64(value)); -} -inline void WireFormatLite::WriteFixed32NoTag(uint32 value, - io::CodedOutputStream* output) { - output->WriteLittleEndian32(value); -} -inline void WireFormatLite::WriteFixed64NoTag(uint64 value, - io::CodedOutputStream* output) { - output->WriteLittleEndian64(value); -} -inline void WireFormatLite::WriteSFixed32NoTag(int32 value, - io::CodedOutputStream* output) { - output->WriteLittleEndian32(static_cast(value)); -} -inline void WireFormatLite::WriteSFixed64NoTag(int64 value, - io::CodedOutputStream* output) { - output->WriteLittleEndian64(static_cast(value)); -} -inline void WireFormatLite::WriteFloatNoTag(float value, - io::CodedOutputStream* output) { - output->WriteLittleEndian32(EncodeFloat(value)); -} -inline void WireFormatLite::WriteDoubleNoTag(double value, - io::CodedOutputStream* output) { - output->WriteLittleEndian64(EncodeDouble(value)); -} -inline void WireFormatLite::WriteBoolNoTag(bool value, - io::CodedOutputStream* output) { - output->WriteVarint32(value ? 1 : 0); -} -inline void WireFormatLite::WriteEnumNoTag(int value, - io::CodedOutputStream* output) { - output->WriteVarint32SignExtended(value); -} - -// See comment on ReadGroupNoVirtual to understand the need for this template -// parameter name. -template -inline void WireFormatLite::WriteGroupNoVirtual( - int field_number, const MessageType_WorkAroundCppLookupDefect& value, - io::CodedOutputStream* output) { - WriteTag(field_number, WIRETYPE_START_GROUP, output); - value.MessageType_WorkAroundCppLookupDefect::SerializeWithCachedSizes(output); - WriteTag(field_number, WIRETYPE_END_GROUP, output); -} -template -inline void WireFormatLite::WriteMessageNoVirtual( - int field_number, const MessageType_WorkAroundCppLookupDefect& value, - io::CodedOutputStream* output) { - WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output); - output->WriteVarint32( - value.MessageType_WorkAroundCppLookupDefect::GetCachedSize()); - value.MessageType_WorkAroundCppLookupDefect::SerializeWithCachedSizes(output); -} - -// =================================================================== - -inline uint8* WireFormatLite::WriteTagToArray(int field_number, - WireType type, - uint8* target) { - return io::CodedOutputStream::WriteTagToArray(MakeTag(field_number, type), - target); -} - -inline uint8* WireFormatLite::WriteInt32NoTagToArray(int32 value, - uint8* target) { - return io::CodedOutputStream::WriteVarint32SignExtendedToArray(value, target); -} -inline uint8* WireFormatLite::WriteInt64NoTagToArray(int64 value, - uint8* target) { - return io::CodedOutputStream::WriteVarint64ToArray( - static_cast(value), target); -} -inline uint8* WireFormatLite::WriteUInt32NoTagToArray(uint32 value, - uint8* target) { - return io::CodedOutputStream::WriteVarint32ToArray(value, target); -} -inline uint8* WireFormatLite::WriteUInt64NoTagToArray(uint64 value, - uint8* target) { - return io::CodedOutputStream::WriteVarint64ToArray(value, target); -} -inline uint8* WireFormatLite::WriteSInt32NoTagToArray(int32 value, - uint8* target) { - return io::CodedOutputStream::WriteVarint32ToArray(ZigZagEncode32(value), - target); -} -inline uint8* WireFormatLite::WriteSInt64NoTagToArray(int64 value, - uint8* target) { - return io::CodedOutputStream::WriteVarint64ToArray(ZigZagEncode64(value), - target); -} -inline uint8* WireFormatLite::WriteFixed32NoTagToArray(uint32 value, - uint8* target) { - return io::CodedOutputStream::WriteLittleEndian32ToArray(value, target); -} -inline uint8* WireFormatLite::WriteFixed64NoTagToArray(uint64 value, - uint8* target) { - return io::CodedOutputStream::WriteLittleEndian64ToArray(value, target); -} -inline uint8* WireFormatLite::WriteSFixed32NoTagToArray(int32 value, - uint8* target) { - return io::CodedOutputStream::WriteLittleEndian32ToArray( - static_cast(value), target); -} -inline uint8* WireFormatLite::WriteSFixed64NoTagToArray(int64 value, - uint8* target) { - return io::CodedOutputStream::WriteLittleEndian64ToArray( - static_cast(value), target); -} -inline uint8* WireFormatLite::WriteFloatNoTagToArray(float value, - uint8* target) { - return io::CodedOutputStream::WriteLittleEndian32ToArray(EncodeFloat(value), - target); -} -inline uint8* WireFormatLite::WriteDoubleNoTagToArray(double value, - uint8* target) { - return io::CodedOutputStream::WriteLittleEndian64ToArray(EncodeDouble(value), - target); -} -inline uint8* WireFormatLite::WriteBoolNoTagToArray(bool value, - uint8* target) { - return io::CodedOutputStream::WriteVarint32ToArray(value ? 1 : 0, target); -} -inline uint8* WireFormatLite::WriteEnumNoTagToArray(int value, - uint8* target) { - return io::CodedOutputStream::WriteVarint32SignExtendedToArray(value, target); -} - -template -inline uint8* WireFormatLite::WritePrimitiveNoTagToArray( - const RepeatedField& value, - uint8* (*Writer)(T, uint8*), uint8* target) { - const int n = value.size(); - GOOGLE_DCHECK_GT(n, 0); - - const T* ii = value.unsafe_data(); - int i = 0; - do { - target = Writer(ii[i], target); - } while (++i < n); - - return target; -} - -template -inline uint8* WireFormatLite::WriteFixedNoTagToArray( - const RepeatedField& value, - uint8* (*Writer)(T, uint8*), uint8* target) { -#if defined(PROTOBUF_LITTLE_ENDIAN) - (void) Writer; - - const int n = value.size(); - GOOGLE_DCHECK_GT(n, 0); - - const T* ii = value.unsafe_data(); - const int bytes = n * static_cast(sizeof(ii[0])); - memcpy(target, ii, static_cast(bytes)); - return target + bytes; -#else - return WritePrimitiveNoTagToArray(value, Writer, target); -#endif -} - -inline uint8* WireFormatLite::WriteInt32NoTagToArray( - const RepeatedField< int32>& value, uint8* target) { - return WritePrimitiveNoTagToArray(value, WriteInt32NoTagToArray, target); -} -inline uint8* WireFormatLite::WriteInt64NoTagToArray( - const RepeatedField< int64>& value, uint8* target) { - return WritePrimitiveNoTagToArray(value, WriteInt64NoTagToArray, target); -} -inline uint8* WireFormatLite::WriteUInt32NoTagToArray( - const RepeatedField& value, uint8* target) { - return WritePrimitiveNoTagToArray(value, WriteUInt32NoTagToArray, target); -} -inline uint8* WireFormatLite::WriteUInt64NoTagToArray( - const RepeatedField& value, uint8* target) { - return WritePrimitiveNoTagToArray(value, WriteUInt64NoTagToArray, target); -} -inline uint8* WireFormatLite::WriteSInt32NoTagToArray( - const RepeatedField< int32>& value, uint8* target) { - return WritePrimitiveNoTagToArray(value, WriteSInt32NoTagToArray, target); -} -inline uint8* WireFormatLite::WriteSInt64NoTagToArray( - const RepeatedField< int64>& value, uint8* target) { - return WritePrimitiveNoTagToArray(value, WriteSInt64NoTagToArray, target); -} -inline uint8* WireFormatLite::WriteFixed32NoTagToArray( - const RepeatedField& value, uint8* target) { - return WriteFixedNoTagToArray(value, WriteFixed32NoTagToArray, target); -} -inline uint8* WireFormatLite::WriteFixed64NoTagToArray( - const RepeatedField& value, uint8* target) { - return WriteFixedNoTagToArray(value, WriteFixed64NoTagToArray, target); -} -inline uint8* WireFormatLite::WriteSFixed32NoTagToArray( - const RepeatedField< int32>& value, uint8* target) { - return WriteFixedNoTagToArray(value, WriteSFixed32NoTagToArray, target); -} -inline uint8* WireFormatLite::WriteSFixed64NoTagToArray( - const RepeatedField< int64>& value, uint8* target) { - return WriteFixedNoTagToArray(value, WriteSFixed64NoTagToArray, target); -} -inline uint8* WireFormatLite::WriteFloatNoTagToArray( - const RepeatedField< float>& value, uint8* target) { - return WriteFixedNoTagToArray(value, WriteFloatNoTagToArray, target); -} -inline uint8* WireFormatLite::WriteDoubleNoTagToArray( - const RepeatedField& value, uint8* target) { - return WriteFixedNoTagToArray(value, WriteDoubleNoTagToArray, target); -} -inline uint8* WireFormatLite::WriteBoolNoTagToArray( - const RepeatedField< bool>& value, uint8* target) { - return WritePrimitiveNoTagToArray(value, WriteBoolNoTagToArray, target); -} -inline uint8* WireFormatLite::WriteEnumNoTagToArray( - const RepeatedField< int>& value, uint8* target) { - return WritePrimitiveNoTagToArray(value, WriteEnumNoTagToArray, target); -} - -inline uint8* WireFormatLite::WriteInt32ToArray(int field_number, - int32 value, - uint8* target) { - target = WriteTagToArray(field_number, WIRETYPE_VARINT, target); - return WriteInt32NoTagToArray(value, target); -} -inline uint8* WireFormatLite::WriteInt64ToArray(int field_number, - int64 value, - uint8* target) { - target = WriteTagToArray(field_number, WIRETYPE_VARINT, target); - return WriteInt64NoTagToArray(value, target); -} -inline uint8* WireFormatLite::WriteUInt32ToArray(int field_number, - uint32 value, - uint8* target) { - target = WriteTagToArray(field_number, WIRETYPE_VARINT, target); - return WriteUInt32NoTagToArray(value, target); -} -inline uint8* WireFormatLite::WriteUInt64ToArray(int field_number, - uint64 value, - uint8* target) { - target = WriteTagToArray(field_number, WIRETYPE_VARINT, target); - return WriteUInt64NoTagToArray(value, target); -} -inline uint8* WireFormatLite::WriteSInt32ToArray(int field_number, - int32 value, - uint8* target) { - target = WriteTagToArray(field_number, WIRETYPE_VARINT, target); - return WriteSInt32NoTagToArray(value, target); -} -inline uint8* WireFormatLite::WriteSInt64ToArray(int field_number, - int64 value, - uint8* target) { - target = WriteTagToArray(field_number, WIRETYPE_VARINT, target); - return WriteSInt64NoTagToArray(value, target); -} -inline uint8* WireFormatLite::WriteFixed32ToArray(int field_number, - uint32 value, - uint8* target) { - target = WriteTagToArray(field_number, WIRETYPE_FIXED32, target); - return WriteFixed32NoTagToArray(value, target); -} -inline uint8* WireFormatLite::WriteFixed64ToArray(int field_number, - uint64 value, - uint8* target) { - target = WriteTagToArray(field_number, WIRETYPE_FIXED64, target); - return WriteFixed64NoTagToArray(value, target); -} -inline uint8* WireFormatLite::WriteSFixed32ToArray(int field_number, - int32 value, - uint8* target) { - target = WriteTagToArray(field_number, WIRETYPE_FIXED32, target); - return WriteSFixed32NoTagToArray(value, target); -} -inline uint8* WireFormatLite::WriteSFixed64ToArray(int field_number, - int64 value, - uint8* target) { - target = WriteTagToArray(field_number, WIRETYPE_FIXED64, target); - return WriteSFixed64NoTagToArray(value, target); -} -inline uint8* WireFormatLite::WriteFloatToArray(int field_number, - float value, - uint8* target) { - target = WriteTagToArray(field_number, WIRETYPE_FIXED32, target); - return WriteFloatNoTagToArray(value, target); -} -inline uint8* WireFormatLite::WriteDoubleToArray(int field_number, - double value, - uint8* target) { - target = WriteTagToArray(field_number, WIRETYPE_FIXED64, target); - return WriteDoubleNoTagToArray(value, target); -} -inline uint8* WireFormatLite::WriteBoolToArray(int field_number, - bool value, - uint8* target) { - target = WriteTagToArray(field_number, WIRETYPE_VARINT, target); - return WriteBoolNoTagToArray(value, target); -} -inline uint8* WireFormatLite::WriteEnumToArray(int field_number, - int value, - uint8* target) { - target = WriteTagToArray(field_number, WIRETYPE_VARINT, target); - return WriteEnumNoTagToArray(value, target); -} - -template -inline uint8* WireFormatLite::WritePrimitiveToArray( - int field_number, - const RepeatedField& value, - uint8* (*Writer)(int, T, uint8*), uint8* target) { - const int n = value.size(); - if (n == 0) { - return target; - } - - const T* ii = value.unsafe_data(); - int i = 0; - do { - target = Writer(field_number, ii[i], target); - } while (++i < n); - - return target; -} - -inline uint8* WireFormatLite::WriteInt32ToArray( - int field_number, const RepeatedField< int32>& value, uint8* target) { - return WritePrimitiveToArray(field_number, value, WriteInt32ToArray, target); -} -inline uint8* WireFormatLite::WriteInt64ToArray( - int field_number, const RepeatedField< int64>& value, uint8* target) { - return WritePrimitiveToArray(field_number, value, WriteInt64ToArray, target); -} -inline uint8* WireFormatLite::WriteUInt32ToArray( - int field_number, const RepeatedField& value, uint8* target) { - return WritePrimitiveToArray(field_number, value, WriteUInt32ToArray, target); -} -inline uint8* WireFormatLite::WriteUInt64ToArray( - int field_number, const RepeatedField& value, uint8* target) { - return WritePrimitiveToArray(field_number, value, WriteUInt64ToArray, target); -} -inline uint8* WireFormatLite::WriteSInt32ToArray( - int field_number, const RepeatedField< int32>& value, uint8* target) { - return WritePrimitiveToArray(field_number, value, WriteSInt32ToArray, target); -} -inline uint8* WireFormatLite::WriteSInt64ToArray( - int field_number, const RepeatedField< int64>& value, uint8* target) { - return WritePrimitiveToArray(field_number, value, WriteSInt64ToArray, target); -} -inline uint8* WireFormatLite::WriteFixed32ToArray( - int field_number, const RepeatedField& value, uint8* target) { - return WritePrimitiveToArray( - field_number, value, WriteFixed32ToArray, target); -} -inline uint8* WireFormatLite::WriteFixed64ToArray( - int field_number, const RepeatedField& value, uint8* target) { - return WritePrimitiveToArray( - field_number, value, WriteFixed64ToArray, target); -} -inline uint8* WireFormatLite::WriteSFixed32ToArray( - int field_number, const RepeatedField< int32>& value, uint8* target) { - return WritePrimitiveToArray( - field_number, value, WriteSFixed32ToArray, target); -} -inline uint8* WireFormatLite::WriteSFixed64ToArray( - int field_number, const RepeatedField< int64>& value, uint8* target) { - return WritePrimitiveToArray( - field_number, value, WriteSFixed64ToArray, target); -} -inline uint8* WireFormatLite::WriteFloatToArray( - int field_number, const RepeatedField< float>& value, uint8* target) { - return WritePrimitiveToArray(field_number, value, WriteFloatToArray, target); -} -inline uint8* WireFormatLite::WriteDoubleToArray( - int field_number, const RepeatedField& value, uint8* target) { - return WritePrimitiveToArray(field_number, value, WriteDoubleToArray, target); -} -inline uint8* WireFormatLite::WriteBoolToArray( - int field_number, const RepeatedField< bool>& value, uint8* target) { - return WritePrimitiveToArray(field_number, value, WriteBoolToArray, target); -} -inline uint8* WireFormatLite::WriteEnumToArray( - int field_number, const RepeatedField< int>& value, uint8* target) { - return WritePrimitiveToArray(field_number, value, WriteEnumToArray, target); -} -inline uint8* WireFormatLite::WriteStringToArray(int field_number, - const string& value, - uint8* target) { - // String is for UTF-8 text only - // WARNING: In wire_format.cc, both strings and bytes are handled by - // WriteString() to avoid code duplication. If the implementations become - // different, you will need to update that usage. - target = WriteTagToArray(field_number, WIRETYPE_LENGTH_DELIMITED, target); - return io::CodedOutputStream::WriteStringWithSizeToArray(value, target); -} -inline uint8* WireFormatLite::WriteBytesToArray(int field_number, - const string& value, - uint8* target) { - target = WriteTagToArray(field_number, WIRETYPE_LENGTH_DELIMITED, target); - return io::CodedOutputStream::WriteStringWithSizeToArray(value, target); -} - - -template -inline uint8* WireFormatLite::InternalWriteGroupToArray( - int field_number, const MessageType& value, bool deterministic, - uint8* target) { - target = WriteTagToArray(field_number, WIRETYPE_START_GROUP, target); - target = value.InternalSerializeWithCachedSizesToArray(deterministic, target); - return WriteTagToArray(field_number, WIRETYPE_END_GROUP, target); -} -template -inline uint8* WireFormatLite::InternalWriteMessageToArray( - int field_number, const MessageType& value, bool deterministic, - uint8* target) { - target = WriteTagToArray(field_number, WIRETYPE_LENGTH_DELIMITED, target); - target = io::CodedOutputStream::WriteVarint32ToArray( - static_cast(value.GetCachedSize()), target); - return value.InternalSerializeWithCachedSizesToArray(deterministic, target); -} - -// See comment on ReadGroupNoVirtual to understand the need for this template -// parameter name. -template -inline uint8* WireFormatLite::InternalWriteGroupNoVirtualToArray( - int field_number, const MessageType_WorkAroundCppLookupDefect& value, - bool deterministic, uint8* target) { - target = WriteTagToArray(field_number, WIRETYPE_START_GROUP, target); - target = value.MessageType_WorkAroundCppLookupDefect:: - InternalSerializeWithCachedSizesToArray(deterministic, target); - return WriteTagToArray(field_number, WIRETYPE_END_GROUP, target); -} -template -inline uint8* WireFormatLite::InternalWriteMessageNoVirtualToArray( - int field_number, const MessageType_WorkAroundCppLookupDefect& value, - bool deterministic, uint8* target) { - target = WriteTagToArray(field_number, WIRETYPE_LENGTH_DELIMITED, target); - target = io::CodedOutputStream::WriteVarint32ToArray( - static_cast( - value.MessageType_WorkAroundCppLookupDefect::GetCachedSize()), - target); - return value.MessageType_WorkAroundCppLookupDefect:: - InternalSerializeWithCachedSizesToArray(deterministic, target); -} - -// =================================================================== - -inline size_t WireFormatLite::Int32Size(int32 value) { - return io::CodedOutputStream::VarintSize32SignExtended(value); -} -inline size_t WireFormatLite::Int64Size(int64 value) { - return io::CodedOutputStream::VarintSize64(static_cast(value)); -} -inline size_t WireFormatLite::UInt32Size(uint32 value) { - return io::CodedOutputStream::VarintSize32(value); -} -inline size_t WireFormatLite::UInt64Size(uint64 value) { - return io::CodedOutputStream::VarintSize64(value); -} -inline size_t WireFormatLite::SInt32Size(int32 value) { - return io::CodedOutputStream::VarintSize32(ZigZagEncode32(value)); -} -inline size_t WireFormatLite::SInt64Size(int64 value) { - return io::CodedOutputStream::VarintSize64(ZigZagEncode64(value)); -} -inline size_t WireFormatLite::EnumSize(int value) { - return io::CodedOutputStream::VarintSize32SignExtended(value); -} - -inline size_t WireFormatLite::StringSize(const string& value) { - return LengthDelimitedSize(value.size()); -} -inline size_t WireFormatLite::BytesSize(const string& value) { - return LengthDelimitedSize(value.size()); -} - - -template -inline size_t WireFormatLite::GroupSize(const MessageType& value) { - return value.ByteSizeLong(); -} -template -inline size_t WireFormatLite::MessageSize(const MessageType& value) { - return LengthDelimitedSize(value.ByteSizeLong()); -} - -// See comment on ReadGroupNoVirtual to understand the need for this template -// parameter name. -template -inline size_t WireFormatLite::GroupSizeNoVirtual( - const MessageType_WorkAroundCppLookupDefect& value) { - return value.MessageType_WorkAroundCppLookupDefect::ByteSizeLong(); -} -template -inline size_t WireFormatLite::MessageSizeNoVirtual( - const MessageType_WorkAroundCppLookupDefect& value) { - return LengthDelimitedSize( - value.MessageType_WorkAroundCppLookupDefect::ByteSizeLong()); -} - -inline size_t WireFormatLite::LengthDelimitedSize(size_t length) { - // The static_cast here prevents an error in certain compiler configurations - // but is not technically correct--if length is too large to fit in a uint32 - // then it will be silently truncated. We will need to fix this if we ever - // decide to start supporting serialized messages greater than 2 GiB in size. - return length + io::CodedOutputStream::VarintSize32( - static_cast(length)); -} - -} // namespace internal -} // namespace protobuf - -} // namespace google -#endif // GOOGLE_PROTOBUF_WIRE_FORMAT_LITE_INL_H__ diff --git a/third_party/protobuf-lite/implicit_weak_message.cc b/third_party/protobuf-lite/implicit_weak_message.cc index 7a1d4446..53906169 100644 --- a/third_party/protobuf-lite/implicit_weak_message.cc +++ b/third_party/protobuf-lite/implicit_weak_message.cc @@ -30,31 +30,33 @@ #include -#include +#include #include +#include #include +#include + namespace google { namespace protobuf { namespace internal { -bool ImplicitWeakMessage::MergePartialFromCodedStream(io::CodedInputStream* input) { - io::StringOutputStream string_stream(&data_); - io::CodedOutputStream coded_stream(&string_stream, false); - return WireFormatLite::SkipMessage(input, &coded_stream); +const char* ImplicitWeakMessage::_InternalParse(const char* ptr, + ParseContext* ctx) { + return ctx->AppendString(ptr, &data_); } -::google::protobuf::internal::ExplicitlyConstructed +ExplicitlyConstructed implicit_weak_message_default_instance; -GOOGLE_PROTOBUF_DECLARE_ONCE(implicit_weak_message_once_init_); +internal::once_flag implicit_weak_message_once_init_; void InitImplicitWeakMessageDefaultInstance() { implicit_weak_message_default_instance.DefaultConstruct(); } const ImplicitWeakMessage* ImplicitWeakMessage::default_instance() { - ::google::protobuf::GoogleOnceInit(&implicit_weak_message_once_init_, - &InitImplicitWeakMessageDefaultInstance); + internal::call_once(implicit_weak_message_once_init_, + InitImplicitWeakMessageDefaultInstance); return &implicit_weak_message_default_instance.get(); } diff --git a/third_party/protobuf-lite/int128.cc b/third_party/protobuf-lite/int128.cc index a5090801..2119e655 100644 --- a/third_party/protobuf-lite/int128.cc +++ b/third_party/protobuf-lite/int128.cc @@ -34,12 +34,16 @@ #include // NOLINT(readability/streams) #include +#include + +#include + namespace google { namespace protobuf { const uint128_pod kuint128max = { - static_cast(GOOGLE_LONGLONG(0xFFFFFFFFFFFFFFFF)), - static_cast(GOOGLE_LONGLONG(0xFFFFFFFFFFFFFFFF)) + static_cast(PROTOBUF_LONGLONG(0xFFFFFFFFFFFFFFFF)), + static_cast(PROTOBUF_LONGLONG(0xFFFFFFFFFFFFFFFF)) }; // Returns the 0-based position of the last set bit (i.e., most significant bit) @@ -63,7 +67,7 @@ static inline int Fls64(uint64 n) { STEP(uint32, n32, pos, 0x10); STEP(uint32, n32, pos, 0x08); STEP(uint32, n32, pos, 0x04); - return pos + ((GOOGLE_ULONGLONG(0x3333333322221100) >> (n32 << 2)) & 0x3); + return pos + ((PROTOBUF_ULONGLONG(0x3333333322221100) >> (n32 << 2)) & 0x3); } #undef STEP @@ -76,52 +80,36 @@ static inline int Fls128(uint128 n) { return Fls64(Uint128Low64(n)); } -// Long division/modulo for uint128 implemented using the shift-subtract -// division algorithm adapted from: -// http://stackoverflow.com/questions/5386377/division-without-using void uint128::DivModImpl(uint128 dividend, uint128 divisor, uint128* quotient_ret, uint128* remainder_ret) { if (divisor == 0) { GOOGLE_LOG(FATAL) << "Division or mod by zero: dividend.hi=" << dividend.hi_ << ", lo=" << dividend.lo_; - } - - if (divisor > dividend) { + } else if (dividend < divisor) { *quotient_ret = 0; *remainder_ret = dividend; return; - } - - if (divisor == dividend) { - *quotient_ret = 1; - *remainder_ret = 0; - return; - } - - uint128 denominator = divisor; - uint128 position = 1; - uint128 quotient = 0; - - // Left aligns the MSB of the denominator and the dividend. - int shift = Fls128(dividend) - Fls128(denominator); - denominator <<= shift; - position <<= shift; - - // Uses shift-subtract algorithm to divide dividend by denominator. The - // remainder will be left in dividend. - while (position > 0) { - if (dividend >= denominator) { - dividend -= denominator; - quotient |= position; + } else { + int dividend_bit_length = Fls128(dividend); + int divisor_bit_length = Fls128(divisor); + int difference = dividend_bit_length - divisor_bit_length; + uint128 quotient = 0; + while (difference >= 0) { + quotient <<= 1; + uint128 shifted_divisor = divisor << difference; + if (shifted_divisor <= dividend) { + dividend -= shifted_divisor; + quotient += 1; + } + difference -= 1; } - position >>= 1; - denominator >>= 1; + //record the final quotient and remainder + *quotient_ret = quotient; + *remainder_ret = dividend; } - - *quotient_ret = quotient; - *remainder_ret = dividend; } + uint128& uint128::operator/=(const uint128& divisor) { uint128 quotient = 0; uint128 remainder = 0; @@ -145,15 +133,18 @@ std::ostream& operator<<(std::ostream& o, const uint128& b) { std::streamsize div_base_log; switch (flags & std::ios::basefield) { case std::ios::hex: - div = static_cast(GOOGLE_ULONGLONG(0x1000000000000000)); // 16^15 + div = + static_cast(PROTOBUF_ULONGLONG(0x1000000000000000)); // 16^15 div_base_log = 15; break; case std::ios::oct: - div = static_cast(GOOGLE_ULONGLONG(01000000000000000000000)); // 8^21 + div = static_cast( + PROTOBUF_ULONGLONG(01000000000000000000000)); // 8^21 div_base_log = 21; break; default: // std::ios::dec - div = static_cast(GOOGLE_ULONGLONG(10000000000000000000)); // 10^19 + div = static_cast( + PROTOBUF_ULONGLONG(10000000000000000000)); // 10^19 div_base_log = 19; break; } diff --git a/third_party/protobuf-lite/io_win32.cc b/third_party/protobuf-lite/io_win32.cc index 4407facb..d22ceac7 100644 --- a/third_party/protobuf-lite/io_win32.cc +++ b/third_party/protobuf-lite/io_win32.cc @@ -29,7 +29,9 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Author: laszlocsomor@google.com (Laszlo Csomor) -// +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. + // Implementation for long-path-aware open/mkdir/access/etc. on Windows, as well // as for the supporting utility functions. // @@ -40,25 +42,29 @@ // // This file is only used on Windows, it's empty on other platforms. -#if defined(_WIN32) +#if defined(_WIN32) && !defined(_XBOX_ONE) // Comment this out to fall back to using the ANSI versions (open, mkdir, ...) // instead of the Unicode ones (_wopen, _wmkdir, ...). Doing so can be useful to // debug failing tests if that's caused by the long path support. #define SUPPORT_LONGPATHS +#include + #include #include #include #include #include -#include #include #include #include -#include -#include +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN 1 +#endif + +#include #include #include @@ -67,7 +73,7 @@ namespace google { namespace protobuf { -namespace internal { +namespace io { namespace win32 { namespace { @@ -91,7 +97,7 @@ struct CharTraits { template bool null_or_empty(const char_type* s) { - return s == NULL || *s == 0; + return s == nullptr || *s == 0; } // Returns true if the path starts with a drive letter, e.g. "c:". @@ -225,7 +231,7 @@ bool as_windows_path(const char* path, wstring* result) { if (!is_path_absolute(wpath.c_str())) { - int size = ::GetCurrentDirectoryW(0, NULL); + int size = ::GetCurrentDirectoryW(0, nullptr); if (size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) { return false; } @@ -316,17 +322,17 @@ FILE* fopen(const char* path, const char* mode) { #ifdef SUPPORT_LONGPATHS if (null_or_empty(path)) { errno = EINVAL; - return NULL; + return nullptr; } wstring wpath; if (!as_windows_path(path, &wpath)) { errno = ENOENT; - return NULL; + return nullptr; } wstring wmode; if (!strings::utf8_to_wcs(mode, &wmode)) { errno = EINVAL; - return NULL; + return nullptr; } return ::_wfopen(wpath.c_str(), wmode.c_str()); #else @@ -334,7 +340,7 @@ FILE* fopen(const char* path, const char* mode) { #endif } -int close(int fd) { return ::close(fd); } +int close(int fd) { return ::_close(fd); } int dup(int fd) { return ::_dup(fd); } @@ -355,6 +361,56 @@ wstring testonly_utf8_to_winpath(const char* path) { return as_windows_path(path, &wpath) ? wpath : wstring(); } +ExpandWildcardsResult ExpandWildcards( + const string& path, std::function consume) { + if (path.find_first_of("*?") == string::npos) { + // There are no wildcards in the path, we don't need to expand it. + consume(path); + return ExpandWildcardsResult::kSuccess; + } + + wstring wpath; + if (!as_windows_path(path.c_str(), &wpath)) { + return ExpandWildcardsResult::kErrorInputPathConversion; + } + + static const wstring kDot = L"."; + static const wstring kDotDot = L".."; + WIN32_FIND_DATAW metadata; + HANDLE handle = ::FindFirstFileW(wpath.c_str(), &metadata); + if (handle == INVALID_HANDLE_VALUE) { + // The pattern does not match any files (or directories). + return ExpandWildcardsResult::kErrorNoMatchingFile; + } + + string::size_type pos = path.find_last_of("\\/"); + string dirname; + if (pos != string::npos) { + dirname = path.substr(0, pos + 1); + } + + ExpandWildcardsResult matched = ExpandWildcardsResult::kErrorNoMatchingFile; + do { + // Ignore ".", "..", and directories. + if ((metadata.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0 && + kDot != metadata.cFileName && kDotDot != metadata.cFileName) { + matched = ExpandWildcardsResult::kSuccess; + string filename; + if (!strings::wcs_to_utf8(metadata.cFileName, &filename)) { + return ExpandWildcardsResult::kErrorOutputPathConversion; + } + + if (dirname.empty()) { + consume(filename); + } else { + consume(dirname + filename); + } + } + } while (::FindNextFileW(handle, &metadata)); + FindClose(handle); + return matched; +} + namespace strings { bool wcs_to_mbs(const WCHAR* s, string* out, bool outUtf8) { @@ -365,15 +421,15 @@ bool wcs_to_mbs(const WCHAR* s, string* out, bool outUtf8) { BOOL usedDefaultChar = FALSE; SetLastError(0); int size = WideCharToMultiByte( - outUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, NULL, 0, NULL, - outUtf8 ? NULL : &usedDefaultChar); + outUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, nullptr, 0, nullptr, + outUtf8 ? nullptr : &usedDefaultChar); if ((size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) || usedDefaultChar) { return false; } std::unique_ptr astr(new CHAR[size]); WideCharToMultiByte( - outUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, astr.get(), size, NULL, NULL); + outUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, astr.get(), size, nullptr, nullptr); out->assign(astr.get()); return true; } @@ -386,7 +442,7 @@ bool mbs_to_wcs(const char* s, wstring* out, bool inUtf8) { SetLastError(0); int size = - MultiByteToWideChar(inUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, NULL, 0); + MultiByteToWideChar(inUtf8 ? CP_UTF8 : CP_ACP, 0, s, -1, nullptr, 0); if (size == 0 && GetLastError() != ERROR_INSUFFICIENT_BUFFER) { return false; } @@ -407,7 +463,7 @@ bool wcs_to_utf8(const wchar_t* input, string* out) { } // namespace strings } // namespace win32 -} // namespace internal +} // namespace io } // namespace protobuf } // namespace google diff --git a/third_party/protobuf-lite/message_lite.cc b/third_party/protobuf-lite/message_lite.cc index 123b142d..0e859914 100644 --- a/third_party/protobuf-lite/message_lite.cc +++ b/third_party/protobuf-lite/message_lite.cc @@ -33,26 +33,42 @@ // Based on original Protocol Buffers design by // Sanjay Ghemawat, Jeff Dean, and others. -#include - -#include -#include #include -#include + +#include +#include #include + #include #include +#include +#include #include +#include +#include #include +#include +#include +#include +#include +#include #include +#include + +#include namespace google { namespace protobuf { -string MessageLite::InitializationErrorString() const { +std::string MessageLite::InitializationErrorString() const { return "(cannot determine missing fields for lite message)"; } +std::string MessageLite::DebugString() const { + std::uintptr_t address = reinterpret_cast(this); + return StrCat("MessageLite at 0x", strings::Hex(address)); +} + namespace { // When serializing, we first compute the byte size, then serialize the message. @@ -71,12 +87,13 @@ void ByteSizeConsistencyError(size_t byte_size_before_serialization, GOOGLE_CHECK_EQ(bytes_produced_by_serialization, byte_size_before_serialization) << "Byte size calculation and serialization were inconsistent. This " "may indicate a bug in protocol buffers or it may be caused by " - "concurrent modification of " << message.GetTypeName() << "."; + "concurrent modification of " + << message.GetTypeName() << "."; GOOGLE_LOG(FATAL) << "This shouldn't be called if all the sizes are equal."; } -string InitializationErrorMessage(const char* action, - const MessageLite& message) { +std::string InitializationErrorMessage(const char* action, + const MessageLite& message) { // Note: We want to avoid depending on strutil in the lite library, otherwise // we'd use: // @@ -86,7 +103,7 @@ string InitializationErrorMessage(const char* action, // action, message.GetTypeName(), // message.InitializationErrorString()); - string result; + std::string result; result += "Can't "; result += action; result += " message of type \""; @@ -96,64 +113,89 @@ string InitializationErrorMessage(const char* action, return result; } -// Several of the Parse methods below just do one thing and then call another -// method. In a naive implementation, we might have ParseFromString() call -// ParseFromArray() which would call ParseFromZeroCopyStream() which would call -// ParseFromCodedStream() which would call MergeFromCodedStream() which would -// call MergePartialFromCodedStream(). However, when parsing very small -// messages, every function call introduces significant overhead. To avoid -// this without reproducing code, we use these forced-inline helpers. -GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE bool InlineMergeFromCodedStream( - io::CodedInputStream* input, MessageLite* message); -GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE bool InlineParseFromCodedStream( - io::CodedInputStream* input, MessageLite* message); -GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE bool InlineParsePartialFromCodedStream( - io::CodedInputStream* input, MessageLite* message); -GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE bool InlineParseFromArray( - const void* data, int size, MessageLite* message); -GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE bool InlineParsePartialFromArray( - const void* data, int size, MessageLite* message); - -inline bool InlineMergeFromCodedStream(io::CodedInputStream* input, - MessageLite* message) { - if (!message->MergePartialFromCodedStream(input)) return false; - if (!message->IsInitialized()) { - GOOGLE_LOG(ERROR) << InitializationErrorMessage("parse", *message); - return false; - } - return true; +inline StringPiece as_string_view(const void* data, int size) { + return StringPiece(static_cast(data), size); } -inline bool InlineParseFromCodedStream(io::CodedInputStream* input, - MessageLite* message) { - message->Clear(); - return InlineMergeFromCodedStream(input, message); +// Returns true of all required fields are present / have values. +inline bool CheckFieldPresence(const internal::ParseContext& ctx, + const MessageLite& msg, + MessageLite::ParseFlags parse_flags) { + if (PROTOBUF_PREDICT_FALSE((parse_flags & MessageLite::kMergePartial) != 0)) { + return true; + } + return msg.IsInitializedWithErrors(); } -inline bool InlineParsePartialFromCodedStream(io::CodedInputStream* input, - MessageLite* message) { - message->Clear(); - return message->MergePartialFromCodedStream(input); -} +} // namespace -inline bool InlineParseFromArray( - const void* data, int size, MessageLite* message) { - io::CodedInputStream input(reinterpret_cast(data), size); - return InlineParseFromCodedStream(&input, message) && - input.ConsumedEntireMessage(); +void MessageLite::LogInitializationErrorMessage() const { + GOOGLE_LOG(ERROR) << InitializationErrorMessage("parse", *this); } -inline bool InlineParsePartialFromArray( - const void* data, int size, MessageLite* message) { - io::CodedInputStream input(reinterpret_cast(data), size); - return InlineParsePartialFromCodedStream(&input, message) && - input.ConsumedEntireMessage(); -} +namespace internal { -} // namespace +template +bool MergeFromImpl(StringPiece input, MessageLite* msg, + MessageLite::ParseFlags parse_flags) { + const char* ptr; + internal::ParseContext ctx(io::CodedInputStream::GetDefaultRecursionLimit(), + aliasing, &ptr, input); + ptr = msg->_InternalParse(ptr, &ctx); + // ctx has an explicit limit set (length of string_view). + if (PROTOBUF_PREDICT_TRUE(ptr && ctx.EndedAtLimit())) { + return CheckFieldPresence(ctx, *msg, parse_flags); + } + return false; +} + +template +bool MergeFromImpl(io::ZeroCopyInputStream* input, MessageLite* msg, + MessageLite::ParseFlags parse_flags) { + const char* ptr; + internal::ParseContext ctx(io::CodedInputStream::GetDefaultRecursionLimit(), + aliasing, &ptr, input); + ptr = msg->_InternalParse(ptr, &ctx); + // ctx has no explicit limit (hence we end on end of stream) + if (PROTOBUF_PREDICT_TRUE(ptr && ctx.EndedAtEndOfStream())) { + return CheckFieldPresence(ctx, *msg, parse_flags); + } + return false; +} + +template +bool MergeFromImpl(BoundedZCIS input, MessageLite* msg, + MessageLite::ParseFlags parse_flags) { + const char* ptr; + internal::ParseContext ctx(io::CodedInputStream::GetDefaultRecursionLimit(), + aliasing, &ptr, input.zcis, input.limit); + ptr = msg->_InternalParse(ptr, &ctx); + if (PROTOBUF_PREDICT_FALSE(!ptr)) return false; + ctx.BackUp(ptr); + if (PROTOBUF_PREDICT_TRUE(ctx.EndedAtLimit())) { + return CheckFieldPresence(ctx, *msg, parse_flags); + } + return false; +} + +template bool MergeFromImpl(StringPiece input, MessageLite* msg, + MessageLite::ParseFlags parse_flags); +template bool MergeFromImpl(StringPiece input, MessageLite* msg, + MessageLite::ParseFlags parse_flags); +template bool MergeFromImpl(io::ZeroCopyInputStream* input, + MessageLite* msg, + MessageLite::ParseFlags parse_flags); +template bool MergeFromImpl(io::ZeroCopyInputStream* input, + MessageLite* msg, + MessageLite::ParseFlags parse_flags); +template bool MergeFromImpl(BoundedZCIS input, MessageLite* msg, + MessageLite::ParseFlags parse_flags); +template bool MergeFromImpl(BoundedZCIS input, MessageLite* msg, + MessageLite::ParseFlags parse_flags); +} // namespace internal -MessageLite* MessageLite::New(::google::protobuf::Arena* arena) const { +MessageLite* MessageLite::New(Arena* arena) const { MessageLite* message = New(); if (arena != NULL) { arena->Own(message); @@ -161,70 +203,169 @@ MessageLite* MessageLite::New(::google::protobuf::Arena* arena) const { return message; } +class ZeroCopyCodedInputStream : public io::ZeroCopyInputStream { + public: + ZeroCopyCodedInputStream(io::CodedInputStream* cis) : cis_(cis) {} + bool Next(const void** data, int* size) final { + if (!cis_->GetDirectBufferPointer(data, size)) return false; + cis_->Skip(*size); + return true; + } + void BackUp(int count) final { cis_->Advance(-count); } + bool Skip(int count) final { return cis_->Skip(count); } + int64_t ByteCount() const final { return 0; } + + bool aliasing_enabled() { return cis_->aliasing_enabled_; } + + private: + io::CodedInputStream* cis_; +}; + +bool MessageLite::MergeFromImpl(io::CodedInputStream* input, + MessageLite::ParseFlags parse_flags) { + ZeroCopyCodedInputStream zcis(input); + const char* ptr; + internal::ParseContext ctx(input->RecursionBudget(), zcis.aliasing_enabled(), + &ptr, &zcis); + // MergePartialFromCodedStream allows terminating the wireformat by 0 or + // end-group tag. Leaving it up to the caller to verify correct ending by + // calling LastTagWas on input. We need to maintain this behavior. + ctx.TrackCorrectEnding(); + ctx.data().pool = input->GetExtensionPool(); + ctx.data().factory = input->GetExtensionFactory(); + ptr = _InternalParse(ptr, &ctx); + if (PROTOBUF_PREDICT_FALSE(!ptr)) return false; + ctx.BackUp(ptr); + if (!ctx.EndedAtEndOfStream()) { + GOOGLE_DCHECK(ctx.LastTag() != 1); // We can't end on a pushed limit. + if (ctx.IsExceedingLimit(ptr)) return false; + input->SetLastTag(ctx.LastTag()); + } else { + input->SetConsumed(); + } + return CheckFieldPresence(ctx, *this, parse_flags); +} + +bool MessageLite::MergePartialFromCodedStream(io::CodedInputStream* input) { + return MergeFromImpl(input, kMergePartial); +} + bool MessageLite::MergeFromCodedStream(io::CodedInputStream* input) { - return InlineMergeFromCodedStream(input, this); + return MergeFromImpl(input, kMerge); } bool MessageLite::ParseFromCodedStream(io::CodedInputStream* input) { - return InlineParseFromCodedStream(input, this); + Clear(); + return MergeFromImpl(input, kParse); } bool MessageLite::ParsePartialFromCodedStream(io::CodedInputStream* input) { - return InlineParsePartialFromCodedStream(input, this); + Clear(); + return MergeFromImpl(input, kParsePartial); } bool MessageLite::ParseFromZeroCopyStream(io::ZeroCopyInputStream* input) { - io::CodedInputStream decoder(input); - return ParseFromCodedStream(&decoder) && decoder.ConsumedEntireMessage(); + return ParseFrom(input); } bool MessageLite::ParsePartialFromZeroCopyStream( io::ZeroCopyInputStream* input) { - io::CodedInputStream decoder(input); - return ParsePartialFromCodedStream(&decoder) && - decoder.ConsumedEntireMessage(); + return ParseFrom(input); +} + +bool MessageLite::ParseFromFileDescriptor(int file_descriptor) { + io::FileInputStream input(file_descriptor); + return ParseFromZeroCopyStream(&input) && input.GetErrno() == 0; +} + +bool MessageLite::ParsePartialFromFileDescriptor(int file_descriptor) { + io::FileInputStream input(file_descriptor); + return ParsePartialFromZeroCopyStream(&input) && input.GetErrno() == 0; +} + +bool MessageLite::ParseFromIstream(std::istream* input) { + io::IstreamInputStream zero_copy_input(input); + return ParseFromZeroCopyStream(&zero_copy_input) && input->eof(); } -bool MessageLite::ParseFromBoundedZeroCopyStream( +bool MessageLite::ParsePartialFromIstream(std::istream* input) { + io::IstreamInputStream zero_copy_input(input); + return ParsePartialFromZeroCopyStream(&zero_copy_input) && input->eof(); +} + +bool MessageLite::MergePartialFromBoundedZeroCopyStream( io::ZeroCopyInputStream* input, int size) { - io::CodedInputStream decoder(input); - decoder.PushLimit(size); - return ParseFromCodedStream(&decoder) && - decoder.ConsumedEntireMessage() && - decoder.BytesUntilLimit() == 0; + return ParseFrom(internal::BoundedZCIS{input, size}); +} + +bool MessageLite::MergeFromBoundedZeroCopyStream(io::ZeroCopyInputStream* input, + int size) { + return ParseFrom(internal::BoundedZCIS{input, size}); +} + +bool MessageLite::ParseFromBoundedZeroCopyStream(io::ZeroCopyInputStream* input, + int size) { + return ParseFrom(internal::BoundedZCIS{input, size}); } bool MessageLite::ParsePartialFromBoundedZeroCopyStream( io::ZeroCopyInputStream* input, int size) { - io::CodedInputStream decoder(input); - decoder.PushLimit(size); - return ParsePartialFromCodedStream(&decoder) && - decoder.ConsumedEntireMessage() && - decoder.BytesUntilLimit() == 0; + return ParseFrom(internal::BoundedZCIS{input, size}); } -bool MessageLite::ParseFromString(const string& data) { - return InlineParseFromArray(data.data(), data.size(), this); +bool MessageLite::ParseFromString(ConstStringParam data) { + return ParseFrom(data); } -bool MessageLite::ParsePartialFromString(const string& data) { - return InlineParsePartialFromArray(data.data(), data.size(), this); +bool MessageLite::ParsePartialFromString(ConstStringParam data) { + return ParseFrom(data); } bool MessageLite::ParseFromArray(const void* data, int size) { - return InlineParseFromArray(data, size, this); + return ParseFrom(as_string_view(data, size)); } bool MessageLite::ParsePartialFromArray(const void* data, int size) { - return InlineParsePartialFromArray(data, size, this); + return ParseFrom(as_string_view(data, size)); +} + +bool MessageLite::MergeFromString(ConstStringParam data) { + return ParseFrom(data); } // =================================================================== +inline uint8* SerializeToArrayImpl(const MessageLite& msg, uint8* target, + int size) { + constexpr bool debug = false; + if (debug) { + // Force serialization to a stream with a block size of 1, which forces + // all writes to the stream to cross buffers triggering all fallback paths + // in the unittests when serializing to string / array. + io::ArrayOutputStream stream(target, size, 1); + uint8* ptr; + io::EpsCopyOutputStream out( + &stream, io::CodedOutputStream::IsDefaultSerializationDeterministic(), + &ptr); + ptr = msg._InternalSerialize(ptr, &out); + out.Trim(ptr); + GOOGLE_DCHECK(!out.HadError() && stream.ByteCount() == size); + return target + size; + } else { + io::EpsCopyOutputStream out( + target, size, + io::CodedOutputStream::IsDefaultSerializationDeterministic()); + auto res = msg._InternalSerialize(target, &out); + GOOGLE_DCHECK(target + size == res); + return res; + } +} + uint8* MessageLite::SerializeWithCachedSizesToArray(uint8* target) const { - return InternalSerializeWithCachedSizesToArray( - io::CodedOutputStream::IsDefaultSerializationDeterministic(), target); + // We only optimize this when using optimize_for = SPEED. In other cases + // we just use the CodedOutputStream path. + return SerializeToArrayImpl(*this, target, GetCachedSize()); } bool MessageLite::SerializeToCodedStream(io::CodedOutputStream* output) const { @@ -236,76 +377,101 @@ bool MessageLite::SerializePartialToCodedStream( io::CodedOutputStream* output) const { const size_t size = ByteSizeLong(); // Force size to be cached. if (size > INT_MAX) { - GOOGLE_LOG(ERROR) << "Exceeded maximum protobuf size of 2GB: " << size; + GOOGLE_LOG(ERROR) << GetTypeName() + << " exceeded maximum protobuf size of 2GB: " << size; return false; } - uint8* buffer = output->GetDirectBufferForNBytesAndAdvance(size); - if (buffer != NULL) { - uint8* end = InternalSerializeWithCachedSizesToArray( - output->IsSerializationDeterministic(), buffer); - if (end - buffer != size) { - ByteSizeConsistencyError(size, ByteSizeLong(), end - buffer, *this); - } - return true; - } else { - int original_byte_count = output->ByteCount(); - SerializeWithCachedSizes(output); - if (output->HadError()) { - return false; - } - int final_byte_count = output->ByteCount(); - - if (final_byte_count - original_byte_count != size) { - ByteSizeConsistencyError(size, ByteSizeLong(), - final_byte_count - original_byte_count, *this); - } + int original_byte_count = output->ByteCount(); + SerializeWithCachedSizes(output); + if (output->HadError()) { + return false; + } + int final_byte_count = output->ByteCount(); - return true; + if (final_byte_count - original_byte_count != size) { + ByteSizeConsistencyError(size, ByteSizeLong(), + final_byte_count - original_byte_count, *this); } + + return true; } bool MessageLite::SerializeToZeroCopyStream( io::ZeroCopyOutputStream* output) const { - io::CodedOutputStream encoder(output); - return SerializeToCodedStream(&encoder); + GOOGLE_DCHECK(IsInitialized()) << InitializationErrorMessage("serialize", *this); + return SerializePartialToZeroCopyStream(output); } bool MessageLite::SerializePartialToZeroCopyStream( io::ZeroCopyOutputStream* output) const { - io::CodedOutputStream encoder(output); - return SerializePartialToCodedStream(&encoder); + const size_t size = ByteSizeLong(); // Force size to be cached. + if (size > INT_MAX) { + GOOGLE_LOG(ERROR) << GetTypeName() + << " exceeded maximum protobuf size of 2GB: " << size; + return false; + } + + uint8* target; + io::EpsCopyOutputStream stream( + output, io::CodedOutputStream::IsDefaultSerializationDeterministic(), + &target); + target = _InternalSerialize(target, &stream); + stream.Trim(target); + if (stream.HadError()) return false; + return true; } -bool MessageLite::AppendToString(string* output) const { +bool MessageLite::SerializeToFileDescriptor(int file_descriptor) const { + io::FileOutputStream output(file_descriptor); + return SerializeToZeroCopyStream(&output) && output.Flush(); +} + +bool MessageLite::SerializePartialToFileDescriptor(int file_descriptor) const { + io::FileOutputStream output(file_descriptor); + return SerializePartialToZeroCopyStream(&output) && output.Flush(); +} + +bool MessageLite::SerializeToOstream(std::ostream* output) const { + { + io::OstreamOutputStream zero_copy_output(output); + if (!SerializeToZeroCopyStream(&zero_copy_output)) return false; + } + return output->good(); +} + +bool MessageLite::SerializePartialToOstream(std::ostream* output) const { + io::OstreamOutputStream zero_copy_output(output); + return SerializePartialToZeroCopyStream(&zero_copy_output); +} + +bool MessageLite::AppendToString(std::string* output) const { GOOGLE_DCHECK(IsInitialized()) << InitializationErrorMessage("serialize", *this); return AppendPartialToString(output); } -bool MessageLite::AppendPartialToString(string* output) const { +bool MessageLite::AppendPartialToString(std::string* output) const { size_t old_size = output->size(); size_t byte_size = ByteSizeLong(); if (byte_size > INT_MAX) { - GOOGLE_LOG(ERROR) << "Exceeded maximum protobuf size of 2GB: " << byte_size; + GOOGLE_LOG(ERROR) << GetTypeName() + << " exceeded maximum protobuf size of 2GB: " << byte_size; return false; } STLStringResizeUninitialized(output, old_size + byte_size); uint8* start = reinterpret_cast(io::mutable_string_data(output) + old_size); - uint8* end = SerializeWithCachedSizesToArray(start); - if (end - start != byte_size) { - ByteSizeConsistencyError(byte_size, ByteSizeLong(), end - start, *this); - } + SerializeToArrayImpl(*this, start, byte_size); return true; } -bool MessageLite::SerializeToString(string* output) const { +bool MessageLite::SerializeToString(std::string* output) const { output->clear(); return AppendToString(output); } -bool MessageLite::SerializePartialToString(string* output) const { +bool MessageLite::SerializePartialToString(std::string* output) const { output->clear(); return AppendPartialToString(output); } @@ -316,71 +482,40 @@ bool MessageLite::SerializeToArray(void* data, int size) const { } bool MessageLite::SerializePartialToArray(void* data, int size) const { - int byte_size = ByteSizeLong(); + const size_t byte_size = ByteSizeLong(); + if (byte_size > INT_MAX) { + GOOGLE_LOG(ERROR) << GetTypeName() + << " exceeded maximum protobuf size of 2GB: " << byte_size; + return false; + } if (size < byte_size) return false; uint8* start = reinterpret_cast(data); - uint8* end = SerializeWithCachedSizesToArray(start); - if (end - start != byte_size) { - ByteSizeConsistencyError(byte_size, ByteSizeLong(), end - start, *this); - } + SerializeToArrayImpl(*this, start, byte_size); return true; } -string MessageLite::SerializeAsString() const { +std::string MessageLite::SerializeAsString() const { // If the compiler implements the (Named) Return Value Optimization, // the local variable 'output' will not actually reside on the stack // of this function, but will be overlaid with the object that the // caller supplied for the return value to be constructed in. - string output; - if (!AppendToString(&output)) - output.clear(); + std::string output; + if (!AppendToString(&output)) output.clear(); return output; } -string MessageLite::SerializePartialAsString() const { - string output; - if (!AppendPartialToString(&output)) - output.clear(); +std::string MessageLite::SerializePartialAsString() const { + std::string output; + if (!AppendPartialToString(&output)) output.clear(); return output; } -void MessageLite::SerializeWithCachedSizes( - io::CodedOutputStream* output) const { - GOOGLE_DCHECK(InternalGetTable()); - internal::TableSerialize( - *this, - static_cast(InternalGetTable()), - output); -} - -// The table driven code optimizes the case that the CodedOutputStream buffer -// is large enough to serialize into it directly. -// If the proto is optimized for speed, this method will be overridden by -// generated code for maximum speed. If the proto is optimized for size or -// is lite, then we need to specialize this to avoid infinite recursion. -uint8* MessageLite::InternalSerializeWithCachedSizesToArray( - bool deterministic, uint8* target) const { - const internal::SerializationTable* table = - static_cast(InternalGetTable()); - if (table == NULL) { - // We only optimize this when using optimize_for = SPEED. In other cases - // we just use the CodedOutputStream path. - int size = GetCachedSize(); - io::ArrayOutputStream out(target, size); - io::CodedOutputStream coded_out(&out); - coded_out.SetSerializationDeterministic(deterministic); - SerializeWithCachedSizes(&coded_out); - GOOGLE_CHECK(!coded_out.HadError()); - return target + size; - } else { - return internal::TableSerializeToArray(*this, table, deterministic, target); - } -} namespace internal { -template<> + +template <> MessageLite* GenericTypeHandler::NewFromPrototype( - const MessageLite* prototype, google::protobuf::Arena* arena) { + const MessageLite* prototype, Arena* arena) { return prototype->New(arena); } template <> @@ -388,20 +523,61 @@ void GenericTypeHandler::Merge(const MessageLite& from, MessageLite* to) { to->CheckTypeAndMergeFrom(from); } -template<> -void GenericTypeHandler::Merge(const string& from, - string* to) { +template <> +void GenericTypeHandler::Merge(const std::string& from, + std::string* to) { *to = from; } -bool proto3_preserve_unknown_ = true; +} // namespace internal + -void SetProto3PreserveUnknownsDefault(bool preserve) { - proto3_preserve_unknown_ = preserve; +// =================================================================== +// Shutdown support. + +namespace internal { + +struct ShutdownData { + ~ShutdownData() { + std::reverse(functions.begin(), functions.end()); + for (auto pair : functions) pair.first(pair.second); + } + + static ShutdownData* get() { + static auto* data = new ShutdownData; + return data; + } + + std::vector> functions; + Mutex mutex; +}; + +static void RunZeroArgFunc(const void* arg) { + void (*func)() = reinterpret_cast(const_cast(arg)); + func(); } +void OnShutdown(void (*func)()) { + OnShutdownRun(RunZeroArgFunc, reinterpret_cast(func)); +} + +void OnShutdownRun(void (*f)(const void*), const void* arg) { + auto shutdown_data = ShutdownData::get(); + MutexLock lock(&shutdown_data->mutex); + shutdown_data->functions.push_back(std::make_pair(f, arg)); +} } // namespace internal +void ShutdownProtobufLibrary() { + // This function should be called only once, but accepts multiple calls. + static bool is_shutdown = false; + if (!is_shutdown) { + delete internal::ShutdownData::get(); + is_shutdown = true; + } +} + + } // namespace protobuf } // namespace google diff --git a/third_party/protobuf-lite/parse_context.cc b/third_party/protobuf-lite/parse_context.cc new file mode 100644 index 00000000..22cdcbba --- /dev/null +++ b/third_party/protobuf-lite/parse_context.cc @@ -0,0 +1,593 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace google { +namespace protobuf { +namespace internal { + +namespace { + +// Only call if at start of tag. +bool ParseEndsInSlopRegion(const char* begin, int overrun, int depth) { + constexpr int kSlopBytes = EpsCopyInputStream::kSlopBytes; + GOOGLE_DCHECK(overrun >= 0); + GOOGLE_DCHECK(overrun <= kSlopBytes); + auto ptr = begin + overrun; + auto end = begin + kSlopBytes; + while (ptr < end) { + uint32 tag; + ptr = ReadTag(ptr, &tag); + if (ptr == nullptr || ptr > end) return false; + // ending on 0 tag is allowed and is the major reason for the necessity of + // this function. + if (tag == 0) return true; + switch (tag & 7) { + case 0: { // Varint + uint64 val; + ptr = VarintParse(ptr, &val); + if (ptr == nullptr) return false; + break; + } + case 1: { // fixed64 + ptr += 8; + break; + } + case 2: { // len delim + int32 size = ReadSize(&ptr); + if (ptr == nullptr || size > end - ptr) return false; + ptr += size; + break; + } + case 3: { // start group + depth++; + break; + } + case 4: { // end group + if (--depth < 0) return true; // We exit early + break; + } + case 5: { // fixed32 + ptr += 4; + break; + } + default: + return false; // Unknown wireformat + } + } + return false; +} + +} // namespace + +const char* EpsCopyInputStream::NextBuffer(int overrun, int depth) { + if (next_chunk_ == nullptr) return nullptr; // We've reached end of stream. + if (next_chunk_ != buffer_) { + GOOGLE_DCHECK(size_ > kSlopBytes); + // The chunk is large enough to be used directly + buffer_end_ = next_chunk_ + size_ - kSlopBytes; + auto res = next_chunk_; + next_chunk_ = buffer_; + if (aliasing_ == kOnPatch) aliasing_ = kNoDelta; + return res; + } + // Move the slop bytes of previous buffer to start of the patch buffer. + // Note we must use memmove because the previous buffer could be part of + // buffer_. + std::memmove(buffer_, buffer_end_, kSlopBytes); + if (overall_limit_ > 0 && + (depth < 0 || !ParseEndsInSlopRegion(buffer_, overrun, depth))) { + const void* data; + // ZeroCopyInputStream indicates Next may return 0 size buffers. Hence + // we loop. + while (StreamNext(&data)) { + if (size_ > kSlopBytes) { + // We got a large chunk + std::memcpy(buffer_ + kSlopBytes, data, kSlopBytes); + next_chunk_ = static_cast(data); + buffer_end_ = buffer_ + kSlopBytes; + if (aliasing_ >= kNoDelta) aliasing_ = kOnPatch; + return buffer_; + } else if (size_ > 0) { + std::memcpy(buffer_ + kSlopBytes, data, size_); + next_chunk_ = buffer_; + buffer_end_ = buffer_ + size_; + if (aliasing_ >= kNoDelta) aliasing_ = kOnPatch; + return buffer_; + } + GOOGLE_DCHECK(size_ == 0) << size_; + } + overall_limit_ = 0; // Next failed, no more needs for next + } + // End of stream or array + if (aliasing_ == kNoDelta) { + // If there is no more block and aliasing is true, the previous block + // is still valid and we can alias. We have users relying on string_view's + // obtained from protos to outlive the proto, when the parse was from an + // array. This guarantees string_view's are always aliased if parsed from + // an array. + aliasing_ = reinterpret_cast(buffer_end_) - + reinterpret_cast(buffer_); + } + next_chunk_ = nullptr; + buffer_end_ = buffer_ + kSlopBytes; + size_ = 0; + return buffer_; +} + +const char* EpsCopyInputStream::Next() { + GOOGLE_DCHECK(limit_ > kSlopBytes); + auto p = NextBuffer(0 /* immaterial */, -1); + if (p == nullptr) { + limit_end_ = buffer_end_; + // Distinguish ending on a pushed limit or ending on end-of-stream. + SetEndOfStream(); + return nullptr; + } + limit_ -= buffer_end_ - p; // Adjust limit_ relative to new anchor + limit_end_ = buffer_end_ + std::min(0, limit_); + return p; +} + +std::pair EpsCopyInputStream::DoneFallback(int overrun, + int depth) { + // Did we exceeded the limit (parse error). + if (PROTOBUF_PREDICT_FALSE(overrun > limit_)) return {nullptr, true}; + GOOGLE_DCHECK(overrun != limit_); // Guaranteed by caller. + GOOGLE_DCHECK(overrun < limit_); // Follows from above + // TODO(gerbens) Instead of this dcheck we could just assign, and remove + // updating the limit_end from PopLimit, ie. + // limit_end_ = buffer_end_ + (std::min)(0, limit_); + // if (ptr < limit_end_) return {ptr, false}; + GOOGLE_DCHECK(limit_end_ == buffer_end_ + (std::min)(0, limit_)); + // At this point we know the following assertion holds. + GOOGLE_DCHECK(limit_ > 0); + GOOGLE_DCHECK(limit_end_ == buffer_end_); // because limit_ > 0 + const char* p; + do { + // We are past the end of buffer_end_, in the slop region. + GOOGLE_DCHECK(overrun >= 0); + p = NextBuffer(overrun, depth); + if (p == nullptr) { + // We are at the end of the stream + if (PROTOBUF_PREDICT_FALSE(overrun != 0)) return {nullptr, true}; + GOOGLE_DCHECK(limit_ > 0); + limit_end_ = buffer_end_; + // Distinguish ending on a pushed limit or ending on end-of-stream. + SetEndOfStream(); + return {buffer_end_, true}; + } + limit_ -= buffer_end_ - p; // Adjust limit_ relative to new anchor + p += overrun; + overrun = p - buffer_end_; + } while (overrun >= 0); + limit_end_ = buffer_end_ + std::min(0, limit_); + return {p, false}; +} + +const char* EpsCopyInputStream::SkipFallback(const char* ptr, int size) { + return AppendSize(ptr, size, [](const char* p, int s) {}); +} + +const char* EpsCopyInputStream::ReadStringFallback(const char* ptr, int size, + std::string* str) { + str->clear(); + if (PROTOBUF_PREDICT_TRUE(size <= buffer_end_ - ptr + limit_)) { + // Reserve the string up to a static safe size. If strings are bigger than + // this we proceed by growing the string as needed. This protects against + // malicious payloads making protobuf hold on to a lot of memory. + str->reserve(str->size() + std::min(size, kSafeStringSize)); + } + return AppendSize(ptr, size, + [str](const char* p, int s) { str->append(p, s); }); +} + +const char* EpsCopyInputStream::AppendStringFallback(const char* ptr, int size, + std::string* str) { + if (PROTOBUF_PREDICT_TRUE(size <= buffer_end_ - ptr + limit_)) { + // Reserve the string up to a static safe size. If strings are bigger than + // this we proceed by growing the string as needed. This protects against + // malicious payloads making protobuf hold on to a lot of memory. + str->reserve(str->size() + std::min(size, kSafeStringSize)); + } + return AppendSize(ptr, size, + [str](const char* p, int s) { str->append(p, s); }); +} + + +template +const char* EpsCopyInputStream::ReadRepeatedFixed(const char* ptr, + Tag expected_tag, + RepeatedField* out) { + do { + out->Add(UnalignedLoad(ptr)); + ptr += sizeof(T); + if (PROTOBUF_PREDICT_FALSE(ptr >= limit_end_)) return ptr; + } while (UnalignedLoad(ptr) == expected_tag&& ptr += sizeof(Tag)); + return ptr; +} + +template +void byteswap(void* p); +template <> +void byteswap<1>(void* p) {} +template <> +void byteswap<4>(void* p) { + *static_cast(p) = bswap_32(*static_cast(p)); +} +template <> +void byteswap<8>(void* p) { + *static_cast(p) = bswap_64(*static_cast(p)); +} + +template +const char* EpsCopyInputStream::ReadPackedFixed(const char* ptr, int size, + RepeatedField* out) { + int nbytes = buffer_end_ + kSlopBytes - ptr; + while (size > nbytes) { + int num = nbytes / sizeof(T); + int old_entries = out->size(); + out->Reserve(old_entries + num); + int block_size = num * sizeof(T); + auto dst = out->AddNAlreadyReserved(num); +#ifdef PROTOBUF_LITTLE_ENDIAN + std::memcpy(dst, ptr, block_size); +#else + for (int i = 0; i < num; i++) + dst[i] = UnalignedLoad(ptr + i * sizeof(T)); +#endif + size -= block_size; + if (limit_ <= kSlopBytes) return nullptr; + ptr = Next(); + if (ptr == nullptr) return nullptr; + ptr += kSlopBytes - (nbytes - block_size); + nbytes = buffer_end_ + kSlopBytes - ptr; + } + int num = size / sizeof(T); + int old_entries = out->size(); + out->Reserve(old_entries + num); + int block_size = num * sizeof(T); + auto dst = out->AddNAlreadyReserved(num); +#ifdef PROTOBUF_LITTLE_ENDIAN + std::memcpy(dst, ptr, block_size); +#else + for (int i = 0; i < num; i++) dst[i] = UnalignedLoad(ptr + i * sizeof(T)); +#endif + ptr += block_size; + if (size != block_size) return nullptr; + return ptr; +} + +const char* EpsCopyInputStream::InitFrom(io::ZeroCopyInputStream* zcis) { + zcis_ = zcis; + const void* data; + int size; + limit_ = INT_MAX; + if (zcis->Next(&data, &size)) { + overall_limit_ -= size; + if (size > kSlopBytes) { + auto ptr = static_cast(data); + limit_ -= size - kSlopBytes; + limit_end_ = buffer_end_ = ptr + size - kSlopBytes; + next_chunk_ = buffer_; + if (aliasing_ == kOnPatch) aliasing_ = kNoDelta; + return ptr; + } else { + limit_end_ = buffer_end_ = buffer_ + kSlopBytes; + next_chunk_ = buffer_; + auto ptr = buffer_ + 2 * kSlopBytes - size; + std::memcpy(ptr, data, size); + return ptr; + } + } + overall_limit_ = 0; + next_chunk_ = nullptr; + size_ = 0; + limit_end_ = buffer_end_ = buffer_; + return buffer_; +} + +const char* ParseContext::ParseMessage(MessageLite* msg, const char* ptr) { + return ParseMessage(msg, ptr); +} +const char* ParseContext::ParseMessage(Message* msg, const char* ptr) { + // Use reinterptret case to prevent inclusion of non lite header + return ParseMessage(reinterpret_cast(msg), ptr); +} + +inline void WriteVarint(uint64 val, std::string* s) { + while (val >= 128) { + uint8 c = val | 0x80; + s->push_back(c); + val >>= 7; + } + s->push_back(val); +} + +void WriteVarint(uint32 num, uint64 val, std::string* s) { + WriteVarint(num << 3, s); + WriteVarint(val, s); +} + +void WriteLengthDelimited(uint32 num, StringPiece val, std::string* s) { + WriteVarint((num << 3) + 2, s); + WriteVarint(val.size(), s); + s->append(val.data(), val.size()); +} + +std::pair VarintParseSlow32(const char* p, uint32 res) { + for (std::uint32_t i = 2; i < 5; i++) { + uint32 byte = static_cast(p[i]); + res += (byte - 1) << (7 * i); + if (PROTOBUF_PREDICT_TRUE(byte < 128)) { + return {p + i + 1, res}; + } + } + // Accept >5 bytes + for (std::uint32_t i = 5; i < 10; i++) { + uint32 byte = static_cast(p[i]); + if (PROTOBUF_PREDICT_TRUE(byte < 128)) { + return {p + i + 1, res}; + } + } + return {nullptr, 0}; +} + +std::pair VarintParseSlow64(const char* p, uint32 res32) { + uint64 res = res32; + for (std::uint32_t i = 2; i < 10; i++) { + uint64 byte = static_cast(p[i]); + res += (byte - 1) << (7 * i); + if (PROTOBUF_PREDICT_TRUE(byte < 128)) { + return {p + i + 1, res}; + } + } + return {nullptr, 0}; +} + +std::pair ReadTagFallback(const char* p, uint32 res) { + for (std::uint32_t i = 2; i < 5; i++) { + uint32 byte = static_cast(p[i]); + res += (byte - 1) << (7 * i); + if (PROTOBUF_PREDICT_TRUE(byte < 128)) { + return {p + i + 1, res}; + } + } + return {nullptr, 0}; +} + +std::pair ReadSizeFallback(const char* p, uint32 res) { + for (std::uint32_t i = 1; i < 4; i++) { + uint32 byte = static_cast(p[i]); + res += (byte - 1) << (7 * i); + if (PROTOBUF_PREDICT_TRUE(byte < 128)) { + return {p + i + 1, res}; + } + } + std::uint32_t byte = static_cast(p[4]); + if (PROTOBUF_PREDICT_FALSE(byte >= 8)) return {nullptr, 0}; // size >= 2gb + res += (byte - 1) << 28; + // Protect against sign integer overflow in PushLimit. Limits are relative + // to buffer ends and ptr could potential be kSlopBytes beyond a buffer end. + // To protect against overflow we reject limits absurdly close to INT_MAX. + if (PROTOBUF_PREDICT_FALSE(res > INT_MAX - ParseContext::kSlopBytes)) { + return {nullptr, 0}; + } + return {p + 5, res}; +} + +const char* StringParser(const char* begin, const char* end, void* object, + ParseContext*) { + auto str = static_cast(object); + str->append(begin, end - begin); + return end; +} + +// Defined in wire_format_lite.cc +void PrintUTF8ErrorLog(const char* field_name, const char* operation_str, + bool emit_stacktrace); + +bool VerifyUTF8(StringPiece str, const char* field_name) { + if (!IsStructurallyValidUTF8(str)) { + PrintUTF8ErrorLog(field_name, "parsing", false); + return false; + } + return true; +} + +const char* InlineGreedyStringParser(std::string* s, const char* ptr, + ParseContext* ctx) { + int size = ReadSize(&ptr); + if (!ptr) return nullptr; + return ctx->ReadString(ptr, size, s); +} + + +template +const char* VarintParser(void* object, const char* ptr, ParseContext* ctx) { + return ctx->ReadPackedVarint(ptr, [object](uint64 varint) { + T val; + if (sign) { + if (sizeof(T) == 8) { + val = WireFormatLite::ZigZagDecode64(varint); + } else { + val = WireFormatLite::ZigZagDecode32(varint); + } + } else { + val = varint; + } + static_cast*>(object)->Add(val); + }); +} + +const char* PackedInt32Parser(void* object, const char* ptr, + ParseContext* ctx) { + return VarintParser(object, ptr, ctx); +} +const char* PackedUInt32Parser(void* object, const char* ptr, + ParseContext* ctx) { + return VarintParser(object, ptr, ctx); +} +const char* PackedInt64Parser(void* object, const char* ptr, + ParseContext* ctx) { + return VarintParser(object, ptr, ctx); +} +const char* PackedUInt64Parser(void* object, const char* ptr, + ParseContext* ctx) { + return VarintParser(object, ptr, ctx); +} +const char* PackedSInt32Parser(void* object, const char* ptr, + ParseContext* ctx) { + return VarintParser(object, ptr, ctx); +} +const char* PackedSInt64Parser(void* object, const char* ptr, + ParseContext* ctx) { + return VarintParser(object, ptr, ctx); +} + +const char* PackedEnumParser(void* object, const char* ptr, ParseContext* ctx) { + return VarintParser(object, ptr, ctx); +} + +const char* PackedBoolParser(void* object, const char* ptr, ParseContext* ctx) { + return VarintParser(object, ptr, ctx); +} + +template +const char* FixedParser(void* object, const char* ptr, ParseContext* ctx) { + int size = ReadSize(&ptr); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); + return ctx->ReadPackedFixed(ptr, size, + static_cast*>(object)); +} + +const char* PackedFixed32Parser(void* object, const char* ptr, + ParseContext* ctx) { + return FixedParser(object, ptr, ctx); +} +const char* PackedSFixed32Parser(void* object, const char* ptr, + ParseContext* ctx) { + return FixedParser(object, ptr, ctx); +} +const char* PackedFixed64Parser(void* object, const char* ptr, + ParseContext* ctx) { + return FixedParser(object, ptr, ctx); +} +const char* PackedSFixed64Parser(void* object, const char* ptr, + ParseContext* ctx) { + return FixedParser(object, ptr, ctx); +} +const char* PackedFloatParser(void* object, const char* ptr, + ParseContext* ctx) { + return FixedParser(object, ptr, ctx); +} +const char* PackedDoubleParser(void* object, const char* ptr, + ParseContext* ctx) { + return FixedParser(object, ptr, ctx); +} + +class UnknownFieldLiteParserHelper { + public: + explicit UnknownFieldLiteParserHelper(std::string* unknown) + : unknown_(unknown) {} + + void AddVarint(uint32 num, uint64 value) { + if (unknown_ == nullptr) return; + WriteVarint(num * 8, unknown_); + WriteVarint(value, unknown_); + } + void AddFixed64(uint32 num, uint64 value) { + if (unknown_ == nullptr) return; + WriteVarint(num * 8 + 1, unknown_); + char buffer[8]; + io::CodedOutputStream::WriteLittleEndian64ToArray( + value, reinterpret_cast(buffer)); + unknown_->append(buffer, 8); + } + const char* ParseLengthDelimited(uint32 num, const char* ptr, + ParseContext* ctx) { + int size = ReadSize(&ptr); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); + if (unknown_ == nullptr) return ctx->Skip(ptr, size); + WriteVarint(num * 8 + 2, unknown_); + WriteVarint(size, unknown_); + return ctx->AppendString(ptr, size, unknown_); + } + const char* ParseGroup(uint32 num, const char* ptr, ParseContext* ctx) { + if (unknown_) WriteVarint(num * 8 + 3, unknown_); + ptr = ctx->ParseGroup(this, ptr, num * 8 + 3); + GOOGLE_PROTOBUF_PARSER_ASSERT(ptr); + if (unknown_) WriteVarint(num * 8 + 4, unknown_); + return ptr; + } + void AddFixed32(uint32 num, uint32 value) { + if (unknown_ == nullptr) return; + WriteVarint(num * 8 + 5, unknown_); + char buffer[4]; + io::CodedOutputStream::WriteLittleEndian32ToArray( + value, reinterpret_cast(buffer)); + unknown_->append(buffer, 4); + } + + const char* _InternalParse(const char* ptr, ParseContext* ctx) { + return WireFormatParser(*this, ptr, ctx); + } + + private: + std::string* unknown_; +}; + +const char* UnknownGroupLiteParse(std::string* unknown, const char* ptr, + ParseContext* ctx) { + UnknownFieldLiteParserHelper field_parser(unknown); + return WireFormatParser(field_parser, ptr, ctx); +} + +const char* UnknownFieldParse(uint32 tag, std::string* unknown, const char* ptr, + ParseContext* ctx) { + UnknownFieldLiteParserHelper field_parser(unknown); + return FieldParser(tag, field_parser, ptr, ctx); +} + +} // namespace internal +} // namespace protobuf +} // namespace google diff --git a/third_party/protobuf-lite/repeated_field.cc b/third_party/protobuf-lite/repeated_field.cc index 310000aa..64506791 100644 --- a/third_party/protobuf-lite/repeated_field.cc +++ b/third_party/protobuf-lite/repeated_field.cc @@ -32,11 +32,15 @@ // Based on original Protocol Buffers design by // Sanjay Ghemawat, Jeff Dean, and others. +#include + #include -#include #include #include +#include + +#include namespace google { namespace protobuf { @@ -51,19 +55,17 @@ void** RepeatedPtrFieldBase::InternalExtend(int extend_amount) { return &rep_->elements[current_size_]; } Rep* old_rep = rep_; - Arena* arena = GetArenaNoVirtual(); - new_size = std::max(kMinRepeatedFieldAllocationSize, + Arena* arena = GetArena(); + new_size = std::max(internal::kRepeatedFieldLowerClampLimit, std::max(total_size_ * 2, new_size)); - GOOGLE_CHECK_LE(new_size, - (std::numeric_limits::max() - kRepHeaderSize) / - sizeof(old_rep->elements[0])) + GOOGLE_CHECK_LE(new_size, (std::numeric_limits::max() - kRepHeaderSize) / + sizeof(old_rep->elements[0])) << "Requested size is too large to fit into size_t."; size_t bytes = kRepHeaderSize + sizeof(old_rep->elements[0]) * new_size; if (arena == NULL) { rep_ = reinterpret_cast(::operator new(bytes)); } else { - rep_ = reinterpret_cast( - ::google::protobuf::Arena::CreateArray(arena, bytes)); + rep_ = reinterpret_cast(Arena::CreateArray(arena, bytes)); } #if defined(__GXX_DELETE_WITH_SIZE__) || defined(__cpp_sized_deallocation) const int old_total_size = total_size_; @@ -103,18 +105,17 @@ void RepeatedPtrFieldBase::CloseGap(int start, int num) { rep_->allocated_size -= num; } -google::protobuf::MessageLite* RepeatedPtrFieldBase::AddWeak( - const google::protobuf::MessageLite* prototype) { +MessageLite* RepeatedPtrFieldBase::AddWeak(const MessageLite* prototype) { if (rep_ != NULL && current_size_ < rep_->allocated_size) { - return reinterpret_cast( - rep_->elements[current_size_++]); + return reinterpret_cast(rep_->elements[current_size_++]); } if (!rep_ || rep_->allocated_size == total_size_) { Reserve(total_size_ + 1); } ++rep_->allocated_size; - google::protobuf::MessageLite* result = prototype ? prototype->New(arena_) : - Arena::CreateMessage(arena_); + MessageLite* result = prototype + ? prototype->New(arena_) + : Arena::CreateMessage(arena_); rep_->elements[current_size_++] = result; return result; } @@ -122,5 +123,14 @@ google::protobuf::MessageLite* RepeatedPtrFieldBase::AddWeak( } // namespace internal +template class PROTOBUF_EXPORT_TEMPLATE_DEFINE RepeatedField; +template class PROTOBUF_EXPORT_TEMPLATE_DEFINE RepeatedField; +template class PROTOBUF_EXPORT_TEMPLATE_DEFINE RepeatedField; +template class PROTOBUF_EXPORT_TEMPLATE_DEFINE RepeatedField; +template class PROTOBUF_EXPORT_TEMPLATE_DEFINE RepeatedField; +template class PROTOBUF_EXPORT_TEMPLATE_DEFINE RepeatedField; +template class PROTOBUF_EXPORT_TEMPLATE_DEFINE RepeatedField; +template class PROTOBUF_EXPORT_TEMPLATE_DEFINE RepeatedPtrField; + } // namespace protobuf } // namespace google diff --git a/third_party/protobuf-lite/status.cc b/third_party/protobuf-lite/status.cc index 2bfbe0b4..03b37c36 100644 --- a/third_party/protobuf-lite/status.cc +++ b/third_party/protobuf-lite/status.cc @@ -38,7 +38,7 @@ namespace google { namespace protobuf { namespace util { namespace error { -inline string CodeEnumToString(error::Code code) { +inline std::string CodeEnumToString(error::Code code) { switch (code) { case OK: return "OK"; @@ -111,7 +111,7 @@ bool Status::operator==(const Status& x) const { error_message_ == x.error_message_; } -string Status::ToString() const { +std::string Status::ToString() const { if (error_code_ == error::OK) { return "OK"; } else { diff --git a/third_party/protobuf-lite/statusor.cc b/third_party/protobuf-lite/statusor.cc index 48d1402a..c744b8d2 100644 --- a/third_party/protobuf-lite/statusor.cc +++ b/third_party/protobuf-lite/statusor.cc @@ -30,6 +30,8 @@ #include +#include + namespace google { namespace protobuf { namespace util { diff --git a/third_party/protobuf-lite/stringpiece.cc b/third_party/protobuf-lite/stringpiece.cc index 989474b7..353c78ca 100644 --- a/third_party/protobuf-lite/stringpiece.cc +++ b/third_party/protobuf-lite/stringpiece.cc @@ -35,6 +35,8 @@ #include #include +#include + namespace google { namespace protobuf { std::ostream& operator<<(std::ostream& o, StringPiece piece) { @@ -62,11 +64,11 @@ StringPiece::StringPiece(StringPiece x, GOOGLE_DCHECK_GE(len, 0); } -void StringPiece::CopyToString(string* target) const { +void StringPiece::CopyToString(std::string* target) const { target->assign(ptr_, length_); } -void StringPiece::AppendToString(string* target) const { +void StringPiece::AppendToString(std::string* target) const { target->append(ptr_, length_); } @@ -115,7 +117,7 @@ stringpiece_ssize_type StringPiece::find(char c, size_type pos) const { } const char* result = static_cast( memchr(ptr_ + pos, c, length_ - pos)); - return result != NULL ? result - ptr_ : npos; + return result != nullptr ? result - ptr_ : npos; } stringpiece_ssize_type StringPiece::rfind(StringPiece s, size_type pos) const { diff --git a/third_party/protobuf-lite/stringprintf.cc b/third_party/protobuf-lite/stringprintf.cc index d98b9b87..26031649 100644 --- a/third_party/protobuf-lite/stringprintf.cc +++ b/third_party/protobuf-lite/stringprintf.cc @@ -36,7 +36,9 @@ #include // For va_list and related operations #include // MSVC requires this for _vsnprintf #include + #include +#include namespace google { namespace protobuf { @@ -52,7 +54,7 @@ enum { IS_COMPILER_MSVC = 1 }; enum { IS_COMPILER_MSVC = 0 }; #endif -void StringAppendV(string* dst, const char* format, va_list ap) { +void StringAppendV(std::string* dst, const char* format, va_list ap) { // First try with a small fixed size buffer static const int kSpaceLength = 1024; char space[kSpaceLength]; @@ -76,7 +78,7 @@ void StringAppendV(string* dst, const char* format, va_list ap) { // Error or MSVC running out of space. MSVC 8.0 and higher // can be asked about space needed with the special idiom below: va_copy(backup_ap, ap); - result = vsnprintf(NULL, 0, format, backup_ap); + result = vsnprintf(nullptr, 0, format, backup_ap); va_end(backup_ap); } @@ -103,17 +105,16 @@ void StringAppendV(string* dst, const char* format, va_list ap) { delete[] buf; } - -string StringPrintf(const char* format, ...) { +std::string StringPrintf(const char* format, ...) { va_list ap; va_start(ap, format); - string result; + std::string result; StringAppendV(&result, format, ap); va_end(ap); return result; } -const string& SStringPrintf(string* dst, const char* format, ...) { +const std::string& SStringPrintf(std::string* dst, const char* format, ...) { va_list ap; va_start(ap, format); dst->clear(); @@ -122,7 +123,7 @@ const string& SStringPrintf(string* dst, const char* format, ...) { return *dst; } -void StringAppendF(string* dst, const char* format, ...) { +void StringAppendF(std::string* dst, const char* format, ...) { va_list ap; va_start(ap, format); StringAppendV(dst, format, ap); @@ -137,7 +138,8 @@ const int kStringPrintfVectorMaxArgs = 32; // and we can fix the problem or protect against an attack. static const char string_printf_empty_block[256] = { '\0' }; -string StringPrintfVector(const char* format, const std::vector& v) { +std::string StringPrintfVector(const char* format, + const std::vector& v) { GOOGLE_CHECK_LE(v.size(), kStringPrintfVectorMaxArgs) << "StringPrintfVector currently only supports up to " << kStringPrintfVectorMaxArgs << " arguments. " diff --git a/third_party/protobuf-lite/structurally_valid.cc b/third_party/protobuf-lite/structurally_valid.cc index b2239682..9a476c3b 100644 --- a/third_party/protobuf-lite/structurally_valid.cc +++ b/third_party/protobuf-lite/structurally_valid.cc @@ -395,7 +395,7 @@ int UTF8GenericScan(const UTF8ScanObj* st, const uint8* isrc = reinterpret_cast(str); const uint8* src = isrc; const uint8* srclimit = isrc + str_length; - const uint8* srclimit8 = srclimit - 7; + const uint8* srclimit8 = str_length < 7 ? isrc : srclimit - 7; const uint8* Tbl_0 = &st->state_table[st->state0]; DoAgain: @@ -456,8 +456,7 @@ int UTF8GenericScan(const UTF8ScanObj* st, } //---------------------------- - - // Exit posibilities: + // Exit possibilities: // Some exit code, !state0, back up over last char // Some exit code, state0, back up one byte exactly // source consumed, !state0, back up over partial char @@ -504,7 +503,7 @@ int UTF8GenericScanFastAscii(const UTF8ScanObj* st, const uint8* isrc = reinterpret_cast(str); const uint8* src = isrc; const uint8* srclimit = isrc + str_length; - const uint8* srclimit8 = srclimit - 7; + const uint8* srclimit8 = str_length < 7 ? isrc : srclimit - 7; int n; int rest_consumed; int exit_reason; @@ -555,14 +554,14 @@ InitDetector init_detector; bool IsStructurallyValidUTF8(const char* buf, int len) { if (!module_initialized_) return true; - + int bytes_consumed = 0; UTF8GenericScanFastAscii(&utf8acceptnonsurrogates_obj, buf, len, &bytes_consumed); return (bytes_consumed == len); } -int UTF8SpnStructurallyValid(const StringPiece& str) { +int UTF8SpnStructurallyValid(StringPiece str) { if (!module_initialized_) return str.size(); int bytes_consumed = 0; @@ -583,8 +582,7 @@ int UTF8SpnStructurallyValid(const StringPiece& str) { // // Fast case: all is structurally valid and no byte copying is done. // -char* UTF8CoerceToStructurallyValid(const StringPiece& src_str, - char* idst, +char* UTF8CoerceToStructurallyValid(StringPiece src_str, char* idst, const char replace_char) { const char* isrc = src_str.data(); const int len = src_str.length(); diff --git a/third_party/protobuf-lite/strutil.cc b/third_party/protobuf-lite/strutil.cc index 552d416f..2ecdb2bf 100644 --- a/third_party/protobuf-lite/strutil.cc +++ b/third_party/protobuf-lite/strutil.cc @@ -31,15 +31,16 @@ // from google3/strings/strutil.cc #include -#include #include #include // FLT_DIG and DBL_DIG -#include #include #include +#include #include +#include +#include #include #ifdef _WIN32 @@ -78,37 +79,22 @@ inline bool isprint(char c) { return c >= 0x20 && c <= 0x7E; } -// ---------------------------------------------------------------------- -// StripString -// Replaces any occurrence of the character 'remove' (or the characters -// in 'remove') with the character 'replacewith'. -// ---------------------------------------------------------------------- -void StripString(string* s, const char* remove, char replacewith) { - const char * str_start = s->c_str(); - const char * str = str_start; - for (str = strpbrk(str, remove); - str != NULL; - str = strpbrk(str + 1, remove)) { - (*s)[str - str_start] = replacewith; - } -} - // ---------------------------------------------------------------------- // ReplaceCharacters // Replaces any occurrence of the character 'remove' (or the characters // in 'remove') with the character 'replacewith'. // ---------------------------------------------------------------------- -void ReplaceCharacters(string *s, const char *remove, char replacewith) { +void ReplaceCharacters(std::string *s, const char *remove, char replacewith) { const char *str_start = s->c_str(); const char *str = str_start; for (str = strpbrk(str, remove); - str != NULL; + str != nullptr; str = strpbrk(str + 1, remove)) { (*s)[str - str_start] = replacewith; } } -void StripWhitespace(string* str) { +void StripWhitespace(std::string *str) { int str_length = str->length(); // Strip off leading whitespace. @@ -132,7 +118,7 @@ void StripWhitespace(string* str) { --last; } if (last != (str_length - 1) && last >= 0) { - str->erase(last + 1, string::npos); + str->erase(last + 1, std::string::npos); } } @@ -143,19 +129,19 @@ void StripWhitespace(string* str) { // it only replaces the first instance of "old." // ---------------------------------------------------------------------- -void StringReplace(const string& s, const string& oldsub, - const string& newsub, bool replace_all, - string* res) { +void StringReplace(const std::string &s, const std::string &oldsub, + const std::string &newsub, bool replace_all, + std::string *res) { if (oldsub.empty()) { res->append(s); // if empty, append the given string. return; } - string::size_type start_pos = 0; - string::size_type pos; + std::string::size_type start_pos = 0; + std::string::size_type pos; do { pos = s.find(oldsub, start_pos); - if (pos == string::npos) { + if (pos == std::string::npos) { break; } res->append(s, start_pos, pos - start_pos); @@ -174,9 +160,9 @@ void StringReplace(const string& s, const string& oldsub, // happened or not. // ---------------------------------------------------------------------- -string StringReplace(const string& s, const string& oldsub, - const string& newsub, bool replace_all) { - string ret; +std::string StringReplace(const std::string &s, const std::string &oldsub, + const std::string &newsub, bool replace_all) { + std::string ret; StringReplace(s, oldsub, newsub, replace_all, &ret); return ret; } @@ -190,10 +176,8 @@ string StringReplace(const string& s, const string& oldsub, // the characters in the string, not the entire string as a single delimiter. // ---------------------------------------------------------------------- template -static inline -void SplitStringToIteratorUsing(const string& full, - const char* delim, - ITR& result) { +static inline void SplitStringToIteratorUsing(StringPiece full, + const char *delim, ITR &result) { // Optimize the common case where delim is a single character. if (delim[0] != '\0' && delim[1] == '\0') { char c = delim[0]; @@ -205,29 +189,29 @@ void SplitStringToIteratorUsing(const string& full, } else { const char* start = p; while (++p != end && *p != c); - *result++ = string(start, p - start); + *result++ = std::string(start, p - start); } } return; } - string::size_type begin_index, end_index; + std::string::size_type begin_index, end_index; begin_index = full.find_first_not_of(delim); - while (begin_index != string::npos) { + while (begin_index != std::string::npos) { end_index = full.find_first_of(delim, begin_index); - if (end_index == string::npos) { - *result++ = full.substr(begin_index); + if (end_index == std::string::npos) { + *result++ = std::string(full.substr(begin_index)); return; } - *result++ = full.substr(begin_index, (end_index - begin_index)); + *result++ = + std::string(full.substr(begin_index, (end_index - begin_index))); begin_index = full.find_first_not_of(delim, end_index); } } -void SplitStringUsing(const string& full, - const char* delim, - std::vector* result) { - std::back_insert_iterator< std::vector > it(*result); +void SplitStringUsing(StringPiece full, const char *delim, + std::vector *result) { + std::back_insert_iterator > it(*result); SplitStringToIteratorUsing(full, delim, it); } @@ -242,30 +226,29 @@ void SplitStringUsing(const string& full, // // If "pieces" is negative for some reason, it returns the whole string // ---------------------------------------------------------------------- -template -static inline -void SplitStringToIteratorAllowEmpty(const StringType& full, - const char* delim, - int pieces, - ITR& result) { - string::size_type begin_index, end_index; +template +static inline void SplitStringToIteratorAllowEmpty(StringPiece full, + const char *delim, + int pieces, ITR &result) { + std::string::size_type begin_index, end_index; begin_index = 0; for (int i = 0; (i < pieces-1) || (pieces == 0); i++) { end_index = full.find_first_of(delim, begin_index); - if (end_index == string::npos) { - *result++ = full.substr(begin_index); + if (end_index == std::string::npos) { + *result++ = std::string(full.substr(begin_index)); return; } - *result++ = full.substr(begin_index, (end_index - begin_index)); + *result++ = + std::string(full.substr(begin_index, (end_index - begin_index))); begin_index = end_index + 1; } - *result++ = full.substr(begin_index); + *result++ = std::string(full.substr(begin_index)); } -void SplitStringAllowEmpty(const string& full, const char* delim, - std::vector* result) { - std::back_insert_iterator > it(*result); +void SplitStringAllowEmpty(StringPiece full, const char *delim, + std::vector *result) { + std::back_insert_iterator > it(*result); SplitStringToIteratorAllowEmpty(full, delim, 0, it); } @@ -276,11 +259,9 @@ void SplitStringAllowEmpty(const string& full, const char* delim, // // ---------------------------------------------------------------------- template -static void JoinStringsIterator(const ITERATOR& start, - const ITERATOR& end, - const char* delim, - string* result) { - GOOGLE_CHECK(result != NULL); +static void JoinStringsIterator(const ITERATOR &start, const ITERATOR &end, + const char *delim, std::string *result) { + GOOGLE_CHECK(result != nullptr); result->clear(); int delim_length = strlen(delim); @@ -303,9 +284,8 @@ static void JoinStringsIterator(const ITERATOR& start, } } -void JoinStrings(const std::vector& components, - const char* delim, - string * result) { +void JoinStrings(const std::vector &components, const char *delim, + std::string *result) { JoinStringsIterator(components.begin(), components.end(), delim, result); } @@ -318,7 +298,7 @@ void JoinStrings(const std::vector& components, // result is truncated to 8 bits. // // The second call stores its errors in a supplied string vector. -// If the string vector pointer is NULL, it reports the errors with LOG(). +// If the string vector pointer is nullptr, it reports the errors with LOG(). // ---------------------------------------------------------------------- #define IS_OCTAL_DIGIT(c) (((c) >= '0') && ((c) <= '7')) @@ -328,12 +308,12 @@ void JoinStrings(const std::vector& components, #define LOG_STRING(LEVEL, VECTOR) GOOGLE_LOG_IF(LEVEL, false) int UnescapeCEscapeSequences(const char* source, char* dest) { - return UnescapeCEscapeSequences(source, dest, NULL); + return UnescapeCEscapeSequences(source, dest, nullptr); } -int UnescapeCEscapeSequences(const char* source, char* dest, - std::vector *errors) { - GOOGLE_DCHECK(errors == NULL) << "Error reporting not implemented."; +int UnescapeCEscapeSequences(const char *source, char *dest, + std::vector *errors) { + GOOGLE_DCHECK(errors == nullptr) << "Error reporting not implemented."; char* d = dest; const char* p = source; @@ -387,8 +367,10 @@ int UnescapeCEscapeSequences(const char* source, char* dest, while (isxdigit(p[1])) // arbitrarily many hex digits ch = (ch << 4) + hex_digit_to_int(*++p); if (ch > 0xFF) - LOG_STRING(ERROR, errors) << "Value of " << - "\\" << string(hex_start, p+1-hex_start) << " exceeds 8 bits"; + LOG_STRING(ERROR, errors) + << "Value of " + << "\\" << std::string(hex_start, p + 1 - hex_start) + << " exceeds 8 bits"; *d++ = ch; break; } @@ -403,7 +385,7 @@ int UnescapeCEscapeSequences(const char* source, char* dest, } else { LOG_STRING(ERROR, errors) << "\\u must be followed by 4 hex digits: \\" - << string(hex_start, p+1-hex_start); + << std::string(hex_start, p+1-hex_start); break; } } @@ -422,7 +404,7 @@ int UnescapeCEscapeSequences(const char* source, char* dest, if (newrune > 0x10FFFF) { LOG_STRING(ERROR, errors) << "Value of \\" - << string(hex_start, p + 1 - hex_start) + << std::string(hex_start, p + 1 - hex_start) << " exceeds Unicode limit (0x10FFFF)"; break; } else { @@ -431,7 +413,7 @@ int UnescapeCEscapeSequences(const char* source, char* dest, } else { LOG_STRING(ERROR, errors) << "\\U must be followed by 8 hex digits: \\" - << string(hex_start, p+1-hex_start); + << std::string(hex_start, p+1-hex_start); break; } } @@ -458,17 +440,17 @@ int UnescapeCEscapeSequences(const char* source, char* dest, // to be the same. // // The second call stores its errors in a supplied string vector. -// If the string vector pointer is NULL, it reports the errors with LOG(). +// If the string vector pointer is nullptr, it reports the errors with LOG(). // // In the first and second calls, the length of dest is returned. In the // the third call, the new string is returned. // ---------------------------------------------------------------------- -int UnescapeCEscapeString(const string& src, string* dest) { - return UnescapeCEscapeString(src, dest, NULL); +int UnescapeCEscapeString(const std::string &src, std::string *dest) { + return UnescapeCEscapeString(src, dest, nullptr); } -int UnescapeCEscapeString(const string& src, string* dest, - std::vector *errors) { +int UnescapeCEscapeString(const std::string &src, std::string *dest, + std::vector *errors) { std::unique_ptr unescaped(new char[src.size() + 1]); int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), errors); GOOGLE_CHECK(dest); @@ -476,10 +458,10 @@ int UnescapeCEscapeString(const string& src, string* dest, return len; } -string UnescapeCEscapeString(const string& src) { +std::string UnescapeCEscapeString(const std::string &src) { std::unique_ptr unescaped(new char[src.size() + 1]); - int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), NULL); - return string(unescaped.get(), len); + int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), nullptr); + return std::string(unescaped.get(), len); } // ---------------------------------------------------------------------- @@ -576,7 +558,7 @@ static inline size_t CEscapedLength(StringPiece src) { // the required space using a lookup table, and also does not do any special // handling for Hex or UTF-8 characters. // ---------------------------------------------------------------------- -void CEscapeAndAppend(StringPiece src, string* dest) { +void CEscapeAndAppend(StringPiece src, std::string *dest) { size_t escaped_len = CEscapedLength(src); if (escaped_len == src.size()) { dest->append(src.data(), src.size()); @@ -610,30 +592,30 @@ void CEscapeAndAppend(StringPiece src, string* dest) { } } -string CEscape(const string& src) { - string dest; +std::string CEscape(const std::string &src) { + std::string dest; CEscapeAndAppend(src, &dest); return dest; } namespace strings { -string Utf8SafeCEscape(const string& src) { +std::string Utf8SafeCEscape(const std::string &src) { const int dest_length = src.size() * 4 + 1; // Maximum possible expansion std::unique_ptr dest(new char[dest_length]); const int len = CEscapeInternal(src.data(), src.size(), dest.get(), dest_length, false, true); GOOGLE_DCHECK_GE(len, 0); - return string(dest.get(), len); + return std::string(dest.get(), len); } -string CHexEscape(const string& src) { +std::string CHexEscape(const std::string &src) { const int dest_length = src.size() * 4 + 1; // Maximum possible expansion std::unique_ptr dest(new char[dest_length]); const int len = CEscapeInternal(src.data(), src.size(), dest.get(), dest_length, true, false); GOOGLE_DCHECK_GE(len, 0); - return string(dest.get(), len); + return std::string(dest.get(), len); } } // namespace strings @@ -681,8 +663,8 @@ uint32 strtou32_adaptor(const char *nptr, char **endptr, int base) { return static_cast(result); } -inline bool safe_parse_sign(string* text /*inout*/, - bool* negative_ptr /*output*/) { +inline bool safe_parse_sign(std::string *text /*inout*/, + bool *negative_ptr /*output*/) { const char* start = text->data(); const char* end = start + text->size(); @@ -709,9 +691,8 @@ inline bool safe_parse_sign(string* text /*inout*/, return true; } -template -bool safe_parse_positive_int( - string text, IntType* value_p) { +template +bool safe_parse_positive_int(std::string text, IntType *value_p) { int base = 10; IntType value = 0; const IntType vmax = std::numeric_limits::max(); @@ -743,9 +724,8 @@ bool safe_parse_positive_int( return true; } -template -bool safe_parse_negative_int( - const string& text, IntType* value_p) { +template +bool safe_parse_negative_int(const std::string &text, IntType *value_p) { int base = 10; IntType value = 0; const IntType vmin = std::numeric_limits::min(); @@ -784,8 +764,8 @@ bool safe_parse_negative_int( return true; } -template -bool safe_int_internal(string text, IntType* value_p) { +template +bool safe_int_internal(std::string text, IntType *value_p) { *value_p = 0; bool negative; if (!safe_parse_sign(&text, &negative)) { @@ -798,8 +778,8 @@ bool safe_int_internal(string text, IntType* value_p) { } } -template -bool safe_uint_internal(string text, IntType* value_p) { +template +bool safe_uint_internal(std::string text, IntType *value_p) { *value_p = 0; bool negative; if (!safe_parse_sign(&text, &negative) || negative) { @@ -982,7 +962,7 @@ static const char two_ASCII_digits[100][2] = { char* FastUInt32ToBufferLeft(uint32 u, char* buffer) { uint32 digits; - const char *ASCII_digits = NULL; + const char *ASCII_digits = nullptr; // The idea of this implementation is to trim the number of divides to as few // as possible by using multiplication and subtraction rather than mod (%), // and by outputting two digits at a time rather than one. @@ -1063,17 +1043,19 @@ char* FastUInt32ToBufferLeft(uint32 u, char* buffer) { } char* FastInt32ToBufferLeft(int32 i, char* buffer) { - uint32 u = i; + uint32 u = 0; if (i < 0) { *buffer++ = '-'; - u = -i; + u -= i; + } else { + u = i; } return FastUInt32ToBufferLeft(u, buffer); } char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) { int digits; - const char *ASCII_digits = NULL; + const char *ASCII_digits = nullptr; uint32 u = static_cast(u64); if (u == u64) return FastUInt32ToBufferLeft(u, buffer); @@ -1114,10 +1096,12 @@ char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) { } char* FastInt64ToBufferLeft(int64 i, char* buffer) { - uint64 u = i; + uint64 u = 0; if (i < 0) { *buffer++ = '-'; - u = -i; + u -= i; + } else { + u = i; } return FastUInt64ToBufferLeft(u, buffer); } @@ -1129,46 +1113,46 @@ char* FastInt64ToBufferLeft(int64 i, char* buffer) { // Return value: string // ---------------------------------------------------------------------- -string SimpleItoa(int i) { +std::string SimpleItoa(int i) { char buffer[kFastToBufferSize]; return (sizeof(i) == 4) ? FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer); } -string SimpleItoa(unsigned int i) { +std::string SimpleItoa(unsigned int i) { char buffer[kFastToBufferSize]; - return string(buffer, (sizeof(i) == 4) ? - FastUInt32ToBufferLeft(i, buffer) : - FastUInt64ToBufferLeft(i, buffer)); + return std::string(buffer, (sizeof(i) == 4) + ? FastUInt32ToBufferLeft(i, buffer) + : FastUInt64ToBufferLeft(i, buffer)); } -string SimpleItoa(long i) { +std::string SimpleItoa(long i) { char buffer[kFastToBufferSize]; return (sizeof(i) == 4) ? FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer); } -string SimpleItoa(unsigned long i) { +std::string SimpleItoa(unsigned long i) { char buffer[kFastToBufferSize]; - return string(buffer, (sizeof(i) == 4) ? - FastUInt32ToBufferLeft(i, buffer) : - FastUInt64ToBufferLeft(i, buffer)); + return std::string(buffer, (sizeof(i) == 4) + ? FastUInt32ToBufferLeft(i, buffer) + : FastUInt64ToBufferLeft(i, buffer)); } -string SimpleItoa(long long i) { +std::string SimpleItoa(long long i) { char buffer[kFastToBufferSize]; return (sizeof(i) == 4) ? FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer); } -string SimpleItoa(unsigned long long i) { +std::string SimpleItoa(unsigned long long i) { char buffer[kFastToBufferSize]; - return string(buffer, (sizeof(i) == 4) ? - FastUInt32ToBufferLeft(i, buffer) : - FastUInt64ToBufferLeft(i, buffer)); + return std::string(buffer, (sizeof(i) == 4) + ? FastUInt32ToBufferLeft(i, buffer) + : FastUInt64ToBufferLeft(i, buffer)); } // ---------------------------------------------------------------------- @@ -1212,12 +1196,12 @@ string SimpleItoa(unsigned long long i) { // implementation. // ---------------------------------------------------------------------- -string SimpleDtoa(double value) { +std::string SimpleDtoa(double value) { char buffer[kDoubleToBufferSize]; return DoubleToBuffer(value, buffer); } -string SimpleFtoa(float value) { +std::string SimpleFtoa(float value) { char buffer[kFloatToBufferSize]; return FloatToBuffer(value, buffer); } @@ -1231,7 +1215,7 @@ static inline bool IsValidFloatChar(char c) { void DelocalizeRadix(char* buffer) { // Fast check: if the buffer has a normal decimal point, assume no // translation is needed. - if (strchr(buffer, '.') != NULL) return; + if (strchr(buffer, '.') != nullptr) return; // Find the first unknown character. while (IsValidFloatChar(*buffer)) ++buffer; @@ -1268,7 +1252,7 @@ char* DoubleToBuffer(double value, char* buffer) { } else if (value == -std::numeric_limits::infinity()) { strcpy(buffer, "-inf"); return buffer; - } else if (MathLimits::IsNaN(value)) { + } else if (std::isnan(value)) { strcpy(buffer, "nan"); return buffer; } @@ -1286,7 +1270,7 @@ char* DoubleToBuffer(double value, char* buffer) { // of a double. This long double may have extra bits that make it compare // unequal to "value" even though it would be exactly equal if it were // truncated to a double. - volatile double parsed_value = strtod(buffer, NULL); + volatile double parsed_value = internal::NoLocaleStrtod(buffer, nullptr); if (parsed_value != value) { int snprintf_result = snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG+2, value); @@ -1318,7 +1302,7 @@ inline bool CaseEqual(StringPiece s1, StringPiece s2) { } bool safe_strtob(StringPiece str, bool* value) { - GOOGLE_CHECK(value != NULL) << "NULL output boolean given."; + GOOGLE_CHECK(value != nullptr) << "nullptr output boolean given."; if (CaseEqual(str, "true") || CaseEqual(str, "t") || CaseEqual(str, "yes") || CaseEqual(str, "y") || CaseEqual(str, "1")) { @@ -1338,7 +1322,7 @@ bool safe_strtof(const char* str, float* value) { char* endptr; errno = 0; // errno only gets set on errors #if defined(_WIN32) || defined (__hpux) // has no strtof() - *value = strtod(str, &endptr); + *value = internal::NoLocaleStrtod(str, &endptr); #else *value = strtof(str, &endptr); #endif @@ -1347,7 +1331,7 @@ bool safe_strtof(const char* str, float* value) { bool safe_strtod(const char* str, double* value) { char* endptr; - *value = strtod(str, &endptr); + *value = internal::NoLocaleStrtod(str, &endptr); if (endptr != str) { while (ascii_isspace(*endptr)) ++endptr; } @@ -1357,19 +1341,19 @@ bool safe_strtod(const char* str, double* value) { return *str != '\0' && *endptr == '\0'; } -bool safe_strto32(const string& str, int32* value) { +bool safe_strto32(const std::string &str, int32 *value) { return safe_int_internal(str, value); } -bool safe_strtou32(const string& str, uint32* value) { +bool safe_strtou32(const std::string &str, uint32 *value) { return safe_uint_internal(str, value); } -bool safe_strto64(const string& str, int64* value) { +bool safe_strto64(const std::string &str, int64 *value) { return safe_int_internal(str, value); } -bool safe_strtou64(const string& str, uint64* value) { +bool safe_strtou64(const std::string &str, uint64 *value) { return safe_uint_internal(str, value); } @@ -1386,7 +1370,7 @@ char* FloatToBuffer(float value, char* buffer) { } else if (value == -std::numeric_limits::infinity()) { strcpy(buffer, "-inf"); return buffer; - } else if (MathLimits::IsNaN(value)) { + } else if (std::isnan(value)) { strcpy(buffer, "nan"); return buffer; } @@ -1445,36 +1429,48 @@ AlphaNum::AlphaNum(strings::Hex hex) { // after the area just overwritten. It comes in multiple flavors to minimize // call overhead. static char *Append1(char *out, const AlphaNum &x) { - memcpy(out, x.data(), x.size()); - return out + x.size(); + if (x.size() > 0) { + memcpy(out, x.data(), x.size()); + out += x.size(); + } + return out; } static char *Append2(char *out, const AlphaNum &x1, const AlphaNum &x2) { - memcpy(out, x1.data(), x1.size()); - out += x1.size(); - - memcpy(out, x2.data(), x2.size()); - return out + x2.size(); + if (x1.size() > 0) { + memcpy(out, x1.data(), x1.size()); + out += x1.size(); + } + if (x2.size() > 0) { + memcpy(out, x2.data(), x2.size()); + out += x2.size(); + } + return out; } -static char *Append4(char *out, - const AlphaNum &x1, const AlphaNum &x2, +static char *Append4(char *out, const AlphaNum &x1, const AlphaNum &x2, const AlphaNum &x3, const AlphaNum &x4) { - memcpy(out, x1.data(), x1.size()); - out += x1.size(); - - memcpy(out, x2.data(), x2.size()); - out += x2.size(); - - memcpy(out, x3.data(), x3.size()); - out += x3.size(); - - memcpy(out, x4.data(), x4.size()); - return out + x4.size(); + if (x1.size() > 0) { + memcpy(out, x1.data(), x1.size()); + out += x1.size(); + } + if (x2.size() > 0) { + memcpy(out, x2.data(), x2.size()); + out += x2.size(); + } + if (x3.size() > 0) { + memcpy(out, x3.data(), x3.size()); + out += x3.size(); + } + if (x4.size() > 0) { + memcpy(out, x4.data(), x4.size()); + out += x4.size(); + } + return out; } -string StrCat(const AlphaNum &a, const AlphaNum &b) { - string result; +std::string StrCat(const AlphaNum &a, const AlphaNum &b) { + std::string result; result.resize(a.size() + b.size()); char *const begin = &*result.begin(); char *out = Append2(begin, a, b); @@ -1482,8 +1478,8 @@ string StrCat(const AlphaNum &a, const AlphaNum &b) { return result; } -string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) { - string result; +std::string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) { + std::string result; result.resize(a.size() + b.size() + c.size()); char *const begin = &*result.begin(); char *out = Append2(begin, a, b); @@ -1492,9 +1488,9 @@ string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) { return result; } -string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d) { - string result; +std::string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, + const AlphaNum &d) { + std::string result; result.resize(a.size() + b.size() + c.size() + d.size()); char *const begin = &*result.begin(); char *out = Append4(begin, a, b, c, d); @@ -1502,9 +1498,9 @@ string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, return result; } -string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e) { - string result; +std::string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, + const AlphaNum &d, const AlphaNum &e) { + std::string result; result.resize(a.size() + b.size() + c.size() + d.size() + e.size()); char *const begin = &*result.begin(); char *out = Append4(begin, a, b, c, d); @@ -1513,9 +1509,9 @@ string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, return result; } -string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f) { - string result; +std::string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, + const AlphaNum &d, const AlphaNum &e, const AlphaNum &f) { + std::string result; result.resize(a.size() + b.size() + c.size() + d.size() + e.size() + f.size()); char *const begin = &*result.begin(); @@ -1525,10 +1521,10 @@ string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, return result; } -string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g) { - string result; +std::string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, + const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, + const AlphaNum &g) { + std::string result; result.resize(a.size() + b.size() + c.size() + d.size() + e.size() + f.size() + g.size()); char *const begin = &*result.begin(); @@ -1539,10 +1535,10 @@ string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, return result; } -string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h) { - string result; +std::string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, + const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, + const AlphaNum &g, const AlphaNum &h) { + std::string result; result.resize(a.size() + b.size() + c.size() + d.size() + e.size() + f.size() + g.size() + h.size()); char *const begin = &*result.begin(); @@ -1552,10 +1548,10 @@ string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, return result; } -string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, - const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, - const AlphaNum &g, const AlphaNum &h, const AlphaNum &i) { - string result; +std::string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, + const AlphaNum &d, const AlphaNum &e, const AlphaNum &f, + const AlphaNum &g, const AlphaNum &h, const AlphaNum &i) { + std::string result; result.resize(a.size() + b.size() + c.size() + d.size() + e.size() + f.size() + g.size() + h.size() + i.size()); char *const begin = &*result.begin(); @@ -1574,27 +1570,27 @@ string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, GOOGLE_DCHECK_GT(uintptr_t((src).data() - (dest).data()), \ uintptr_t((dest).size())) -void StrAppend(string *result, const AlphaNum &a) { +void StrAppend(std::string *result, const AlphaNum &a) { GOOGLE_DCHECK_NO_OVERLAP(*result, a); result->append(a.data(), a.size()); } -void StrAppend(string *result, const AlphaNum &a, const AlphaNum &b) { +void StrAppend(std::string *result, const AlphaNum &a, const AlphaNum &b) { GOOGLE_DCHECK_NO_OVERLAP(*result, a); GOOGLE_DCHECK_NO_OVERLAP(*result, b); - string::size_type old_size = result->size(); + std::string::size_type old_size = result->size(); result->resize(old_size + a.size() + b.size()); char *const begin = &*result->begin(); char *out = Append2(begin + old_size, a, b); GOOGLE_DCHECK_EQ(out, begin + result->size()); } -void StrAppend(string *result, - const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) { +void StrAppend(std::string *result, const AlphaNum &a, const AlphaNum &b, + const AlphaNum &c) { GOOGLE_DCHECK_NO_OVERLAP(*result, a); GOOGLE_DCHECK_NO_OVERLAP(*result, b); GOOGLE_DCHECK_NO_OVERLAP(*result, c); - string::size_type old_size = result->size(); + std::string::size_type old_size = result->size(); result->resize(old_size + a.size() + b.size() + c.size()); char *const begin = &*result->begin(); char *out = Append2(begin + old_size, a, b); @@ -1602,32 +1598,29 @@ void StrAppend(string *result, GOOGLE_DCHECK_EQ(out, begin + result->size()); } -void StrAppend(string *result, - const AlphaNum &a, const AlphaNum &b, +void StrAppend(std::string *result, const AlphaNum &a, const AlphaNum &b, const AlphaNum &c, const AlphaNum &d) { GOOGLE_DCHECK_NO_OVERLAP(*result, a); GOOGLE_DCHECK_NO_OVERLAP(*result, b); GOOGLE_DCHECK_NO_OVERLAP(*result, c); GOOGLE_DCHECK_NO_OVERLAP(*result, d); - string::size_type old_size = result->size(); + std::string::size_type old_size = result->size(); result->resize(old_size + a.size() + b.size() + c.size() + d.size()); char *const begin = &*result->begin(); char *out = Append4(begin + old_size, a, b, c, d); GOOGLE_DCHECK_EQ(out, begin + result->size()); } -int GlobalReplaceSubstring(const string& substring, - const string& replacement, - string* s) { - GOOGLE_CHECK(s != NULL); +int GlobalReplaceSubstring(const std::string &substring, + const std::string &replacement, std::string *s) { + GOOGLE_CHECK(s != nullptr); if (s->empty() || substring.empty()) return 0; - string tmp; + std::string tmp; int num_replacements = 0; int pos = 0; for (int match_pos = s->find(substring.data(), pos, substring.length()); - match_pos != string::npos; - pos = match_pos + substring.length(), + match_pos != std::string::npos; pos = match_pos + substring.length(), match_pos = s->find(substring.data(), pos, substring.length())) { ++num_replacements; // Append the original content before the match. @@ -1959,24 +1952,25 @@ int Base64UnescapeInternal(const char *src_param, int szsrc, // #include // #include // #include +// #include // main() // { // static const char Base64[] = // "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; -// char *pos; +// const char *pos; // int idx, i, j; // printf(" "); // for (i = 0; i < 255; i += 8) { // for (j = i; j < i + 8; j++) { // pos = strchr(Base64, j); -// if ((pos == NULL) || (j == 0)) +// if ((pos == nullptr) || (j == 0)) // idx = -1; // else // idx = pos - Base64; // if (idx == -1) // printf(" %2d, ", idx); // else -// printf(" %2d/*%c*/,", idx, j); +// printf(" %2d/""*%c*""/,", idx, j); // } // printf("\n "); // } @@ -1994,7 +1988,7 @@ static const signed char kUnBase64[] = { 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1, -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, - 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, + 7/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1, -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, @@ -2028,7 +2022,7 @@ static const signed char kUnWebSafeBase64[] = { 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1, -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, - 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, + 7/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, 63/*_*/, -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, @@ -2057,8 +2051,8 @@ int WebSafeBase64Unescape(const char *src, int szsrc, char *dest, int szdest) { return Base64UnescapeInternal(src, szsrc, dest, szdest, kUnWebSafeBase64); } -static bool Base64UnescapeInternal(const char* src, int slen, string* dest, - const signed char* unbase64) { +static bool Base64UnescapeInternal(const char *src, int slen, std::string *dest, + const signed char *unbase64) { // Determine the size of the output string. Base64 encodes every 3 bytes into // 4 characters. any leftover chars are added directly for good measure. // This is documented in the base64 RFC: http://tools.ietf.org/html/rfc3548 @@ -2082,11 +2076,11 @@ static bool Base64UnescapeInternal(const char* src, int slen, string* dest, return true; } -bool Base64Unescape(StringPiece src, string* dest) { +bool Base64Unescape(StringPiece src, std::string *dest) { return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64); } -bool WebSafeBase64Unescape(StringPiece src, string* dest) { +bool WebSafeBase64Unescape(StringPiece src, std::string *dest) { return Base64UnescapeInternal(src.data(), src.size(), dest, kUnWebSafeBase64); } @@ -2205,9 +2199,9 @@ int WebSafeBase64Escape(const unsigned char *src, int szsrc, char *dest, kWebSafeBase64Chars, do_padding); } -void Base64EscapeInternal(const unsigned char* src, int szsrc, - string* dest, bool do_padding, - const char* base64_chars) { +void Base64EscapeInternal(const unsigned char *src, int szsrc, + std::string *dest, bool do_padding, + const char *base64_chars) { const int calc_escaped_size = CalculateBase64EscapedLen(szsrc, do_padding); dest->resize(calc_escaped_size); @@ -2220,27 +2214,27 @@ void Base64EscapeInternal(const unsigned char* src, int szsrc, dest->erase(escaped_len); } -void Base64Escape(const unsigned char *src, int szsrc, - string* dest, bool do_padding) { +void Base64Escape(const unsigned char *src, int szsrc, std::string *dest, + bool do_padding) { Base64EscapeInternal(src, szsrc, dest, do_padding, kBase64Chars); } -void WebSafeBase64Escape(const unsigned char *src, int szsrc, - string *dest, bool do_padding) { +void WebSafeBase64Escape(const unsigned char *src, int szsrc, std::string *dest, + bool do_padding) { Base64EscapeInternal(src, szsrc, dest, do_padding, kWebSafeBase64Chars); } -void Base64Escape(StringPiece src, string* dest) { +void Base64Escape(StringPiece src, std::string *dest) { Base64Escape(reinterpret_cast(src.data()), src.size(), dest, true); } -void WebSafeBase64Escape(StringPiece src, string* dest) { +void WebSafeBase64Escape(StringPiece src, std::string *dest) { WebSafeBase64Escape(reinterpret_cast(src.data()), src.size(), dest, false); } -void WebSafeBase64EscapeWithPadding(StringPiece src, string* dest) { +void WebSafeBase64EscapeWithPadding(StringPiece src, std::string *dest) { WebSafeBase64Escape(reinterpret_cast(src.data()), src.size(), dest, true); } @@ -2281,16 +2275,19 @@ int EncodeAsUTF8Char(uint32 code_point, char* output) { // Table of UTF-8 character lengths, based on first byte static const unsigned char kUTF8LenTbl[256] = { - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, - - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4 -}; + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 4, 4, 4, 4, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; // Return length of a single UTF-8 source character int UTF8FirstLetterNumBytes(const char* src, int len) { @@ -2300,5 +2297,181 @@ int UTF8FirstLetterNumBytes(const char* src, int len) { return kUTF8LenTbl[*reinterpret_cast(src)]; } +// ---------------------------------------------------------------------- +// CleanStringLineEndings() +// Clean up a multi-line string to conform to Unix line endings. +// Reads from src and appends to dst, so usually dst should be empty. +// +// If there is no line ending at the end of a non-empty string, it can +// be added automatically. +// +// Four different types of input are correctly handled: +// +// - Unix/Linux files: line ending is LF: pass through unchanged +// +// - DOS/Windows files: line ending is CRLF: convert to LF +// +// - Legacy Mac files: line ending is CR: convert to LF +// +// - Garbled files: random line endings: convert gracefully +// lonely CR, lonely LF, CRLF: convert to LF +// +// @param src The multi-line string to convert +// @param dst The converted string is appended to this string +// @param auto_end_last_line Automatically terminate the last line +// +// Limitations: +// +// This does not do the right thing for CRCRLF files created by +// broken programs that do another Unix->DOS conversion on files +// that are already in CRLF format. For this, a two-pass approach +// brute-force would be needed that +// +// (1) determines the presence of LF (first one is ok) +// (2) if yes, removes any CR, else convert every CR to LF + +void CleanStringLineEndings(const std::string &src, std::string *dst, + bool auto_end_last_line) { + if (dst->empty()) { + dst->append(src); + CleanStringLineEndings(dst, auto_end_last_line); + } else { + std::string tmp = src; + CleanStringLineEndings(&tmp, auto_end_last_line); + dst->append(tmp); + } +} + +void CleanStringLineEndings(std::string *str, bool auto_end_last_line) { + ptrdiff_t output_pos = 0; + bool r_seen = false; + ptrdiff_t len = str->size(); + + char *p = &(*str)[0]; + + for (ptrdiff_t input_pos = 0; input_pos < len;) { + if (!r_seen && input_pos + 8 < len) { + uint64_t v = GOOGLE_UNALIGNED_LOAD64(p + input_pos); + // Loop over groups of 8 bytes at a time until we come across + // a word that has a byte whose value is less than or equal to + // '\r' (i.e. could contain a \n (0x0a) or a \r (0x0d) ). + // + // We use a has_less macro that quickly tests a whole 64-bit + // word to see if any of the bytes has a value < N. + // + // For more details, see: + // http://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord +#define has_less(x, n) (((x) - ~0ULL / 255 * (n)) & ~(x) & ~0ULL / 255 * 128) + if (!has_less(v, '\r' + 1)) { +#undef has_less + // No byte in this word has a value that could be a \r or a \n + if (output_pos != input_pos) { + GOOGLE_UNALIGNED_STORE64(p + output_pos, v); + } + input_pos += 8; + output_pos += 8; + continue; + } + } + std::string::const_reference in = p[input_pos]; + if (in == '\r') { + if (r_seen) p[output_pos++] = '\n'; + r_seen = true; + } else if (in == '\n') { + if (input_pos != output_pos) + p[output_pos++] = '\n'; + else + output_pos++; + r_seen = false; + } else { + if (r_seen) p[output_pos++] = '\n'; + r_seen = false; + if (input_pos != output_pos) + p[output_pos++] = in; + else + output_pos++; + } + input_pos++; + } + if (r_seen || + (auto_end_last_line && output_pos > 0 && p[output_pos - 1] != '\n')) { + str->resize(output_pos + 1); + str->operator[](output_pos) = '\n'; + } else if (output_pos < len) { + str->resize(output_pos); + } +} + +namespace internal { + +// ---------------------------------------------------------------------- +// NoLocaleStrtod() +// This code will make you cry. +// ---------------------------------------------------------------------- + +namespace { + +// Returns a string identical to *input except that the character pointed to +// by radix_pos (which should be '.') is replaced with the locale-specific +// radix character. +std::string LocalizeRadix(const char *input, const char *radix_pos) { + // Determine the locale-specific radix character by calling sprintf() to + // print the number 1.5, then stripping off the digits. As far as I can + // tell, this is the only portable, thread-safe way to get the C library + // to divuldge the locale's radix character. No, localeconv() is NOT + // thread-safe. + char temp[16]; + int size = snprintf(temp, sizeof(temp), "%.1f", 1.5); + GOOGLE_CHECK_EQ(temp[0], '1'); + GOOGLE_CHECK_EQ(temp[size - 1], '5'); + GOOGLE_CHECK_LE(size, 6); + + // Now replace the '.' in the input with it. + std::string result; + result.reserve(strlen(input) + size - 3); + result.append(input, radix_pos); + result.append(temp + 1, size - 2); + result.append(radix_pos + 1); + return result; +} + +} // namespace + +double NoLocaleStrtod(const char *str, char **endptr) { + // We cannot simply set the locale to "C" temporarily with setlocale() + // as this is not thread-safe. Instead, we try to parse in the current + // locale first. If parsing stops at a '.' character, then this is a + // pretty good hint that we're actually in some other locale in which + // '.' is not the radix character. + + char *temp_endptr; + double result = strtod(str, &temp_endptr); + if (endptr != NULL) *endptr = temp_endptr; + if (*temp_endptr != '.') return result; + + // Parsing halted on a '.'. Perhaps we're in a different locale? Let's + // try to replace the '.' with a locale-specific radix character and + // try again. + std::string localized = LocalizeRadix(str, temp_endptr); + const char *localized_cstr = localized.c_str(); + char *localized_endptr; + result = strtod(localized_cstr, &localized_endptr); + if ((localized_endptr - localized_cstr) > (temp_endptr - str)) { + // This attempt got further, so replacing the decimal must have helped. + // Update endptr to point at the right location. + if (endptr != NULL) { + // size_diff is non-zero if the localized radix has multiple bytes. + int size_diff = localized.size() - strlen(str); + // const_cast is necessary to match the strtod() interface. + *endptr = const_cast( + str + (localized_endptr - localized_cstr - size_diff)); + } + } + + return result; +} + +} // namespace internal + } // namespace protobuf } // namespace google diff --git a/third_party/protobuf-lite/time.cc b/third_party/protobuf-lite/time.cc index 6def637e..922be76d 100644 --- a/third_party/protobuf-lite/time.cc +++ b/third_party/protobuf-lite/time.cc @@ -130,7 +130,7 @@ int64 SecondsSinceCommonEra(const DateTime& time) { // Format nanoseconds with either 3, 6, or 9 digits depending on the required // precision to represent the exact value. -string FormatNanos(int32 nanos) { +std::string FormatNanos(int32 nanos) { if (nanos % kNanosPerMillisecond == 0) { return StringPrintf("%03d", nanos / kNanosPerMillisecond); } else if (nanos % kNanosPerMicrosecond == 0) { @@ -142,12 +142,12 @@ string FormatNanos(int32 nanos) { // Parses an integer from a null-terminated char sequence. The method // consumes at most "width" chars. Returns a pointer after the consumed -// integer, or NULL if the data does not start with an integer or the +// integer, or nullptr if the data does not start with an integer or the // integer value does not fall in the range of [min_value, max_value]. const char* ParseInt(const char* data, int width, int min_value, int max_value, int* result) { if (!ascii_isdigit(*data)) { - return NULL; + return nullptr; } int value = 0; for (int i = 0; i < width; ++i, ++data) { @@ -161,7 +161,7 @@ const char* ParseInt(const char* data, int width, int min_value, *result = value; return data; } else { - return NULL; + return nullptr; } } @@ -169,7 +169,7 @@ const char* ParseInt(const char* data, int width, int min_value, // "010" will be parsed to 10000000 nanos. const char* ParseNanos(const char* data, int32* nanos) { if (!ascii_isdigit(*data)) { - return NULL; + return nullptr; } int value = 0; int len = 0; @@ -193,15 +193,15 @@ const char* ParseNanos(const char* data, int32* nanos) { const char* ParseTimezoneOffset(const char* data, int64* offset) { // Accept format "HH:MM". E.g., "08:00" int hour; - if ((data = ParseInt(data, 2, 0, 23, &hour)) == NULL) { - return NULL; + if ((data = ParseInt(data, 2, 0, 23, &hour)) == nullptr) { + return nullptr; } if (*data++ != ':') { - return NULL; + return nullptr; } int minute; - if ((data = ParseInt(data, 2, 0, 59, &minute)) == NULL) { - return NULL; + if ((data = ParseInt(data, 2, 0, 59, &minute)) == nullptr) { + return nullptr; } *offset = (hour * 60 + minute) * 60; return data; @@ -212,7 +212,7 @@ bool SecondsToDateTime(int64 seconds, DateTime* time) { if (seconds < kMinTime || seconds > kMaxTime) { return false; } - // It's easier to calcuate the DateTime starting from 0001-01-01T00:00:00 + // It's easier to calculate the DateTime starting from 0001-01-01T00:00:00 seconds = seconds + kSecondsFromEraToEpoch; int year = 1; if (seconds >= kSecondsPer400Years) { @@ -264,25 +264,25 @@ bool DateTimeToSeconds(const DateTime& time, int64* seconds) { void GetCurrentTime(int64* seconds, int32* nanos) { // TODO(xiaofeng): Improve the accuracy of this implementation (or just // remove this method from protobuf). - *seconds = time(NULL); + *seconds = time(nullptr); *nanos = 0; } -string FormatTime(int64 seconds, int32 nanos) { +std::string FormatTime(int64 seconds, int32 nanos) { DateTime time; if (nanos < 0 || nanos > 999999999 || !SecondsToDateTime(seconds, &time)) { return "InvalidTime"; } - string result = StringPrintf("%04d-%02d-%02dT%02d:%02d:%02d", - time.year, time.month, time.day, - time.hour, time.minute, time.second); + std::string result = + StringPrintf("%04d-%02d-%02dT%02d:%02d:%02d", time.year, time.month, + time.day, time.hour, time.minute, time.second); if (nanos != 0) { result += "." + FormatNanos(nanos); } return result + "Z"; } -bool ParseTime(const string& value, int64* seconds, int32* nanos) { +bool ParseTime(const std::string& value, int64* seconds, int32* nanos) { DateTime time; const char* data = value.c_str(); // We only accept: @@ -290,37 +290,37 @@ bool ParseTime(const string& value, int64* seconds, int32* nanos) { // With UTC offset: 2015-05-20T13:29:35.120-08:00 // Parse year - if ((data = ParseInt(data, 4, 1, 9999, &time.year)) == NULL) { + if ((data = ParseInt(data, 4, 1, 9999, &time.year)) == nullptr) { return false; } // Expect '-' if (*data++ != '-') return false; // Parse month - if ((data = ParseInt(data, 2, 1, 12, &time.month)) == NULL) { + if ((data = ParseInt(data, 2, 1, 12, &time.month)) == nullptr) { return false; } // Expect '-' if (*data++ != '-') return false; // Parse day - if ((data = ParseInt(data, 2, 1, 31, &time.day)) == NULL) { + if ((data = ParseInt(data, 2, 1, 31, &time.day)) == nullptr) { return false; } // Expect 'T' if (*data++ != 'T') return false; // Parse hour - if ((data = ParseInt(data, 2, 0, 23, &time.hour)) == NULL) { + if ((data = ParseInt(data, 2, 0, 23, &time.hour)) == nullptr) { return false; } // Expect ':' if (*data++ != ':') return false; // Parse minute - if ((data = ParseInt(data, 2, 0, 59, &time.minute)) == NULL) { + if ((data = ParseInt(data, 2, 0, 59, &time.minute)) == nullptr) { return false; } // Expect ':' if (*data++ != ':') return false; // Parse second - if ((data = ParseInt(data, 2, 0, 59, &time.second)) == NULL) { + if ((data = ParseInt(data, 2, 0, 59, &time.second)) == nullptr) { return false; } if (!DateTimeToSeconds(time, seconds)) { @@ -330,7 +330,7 @@ bool ParseTime(const string& value, int64* seconds, int32* nanos) { if (*data == '.') { ++data; // Parse nanoseconds. - if ((data = ParseNanos(data, nanos)) == NULL) { + if ((data = ParseNanos(data, nanos)) == nullptr) { return false; } } else { @@ -342,14 +342,14 @@ bool ParseTime(const string& value, int64* seconds, int32* nanos) { } else if (*data == '+') { ++data; int64 offset; - if ((data = ParseTimezoneOffset(data, &offset)) == NULL) { + if ((data = ParseTimezoneOffset(data, &offset)) == nullptr) { return false; } *seconds -= offset; } else if (*data == '-') { ++data; int64 offset; - if ((data = ParseTimezoneOffset(data, &offset)) == NULL) { + if ((data = ParseTimezoneOffset(data, &offset)) == nullptr) { return false; } *seconds += offset; diff --git a/third_party/protobuf-lite/wire_format_lite.cc b/third_party/protobuf-lite/wire_format_lite.cc index 1d8cda5a..dc256082 100644 --- a/third_party/protobuf-lite/wire_format_lite.cc +++ b/third_party/protobuf-lite/wire_format_lite.cc @@ -32,24 +32,26 @@ // Based on original Protocol Buffers design by // Sanjay Ghemawat, Jeff Dean, and others. -#include +#include #include #include #include + #include #include #include -#include +#include #include #include +#include + namespace google { namespace protobuf { namespace internal { - #if !defined(_MSC_VER) || _MSC_VER >= 1900 // Old version of MSVC doesn't like definitions of inline constants, GCC // requires them. @@ -62,64 +64,63 @@ const int WireFormatLite::kMessageSetMessageTag; // IBM xlC requires prefixing constants with WireFormatLite:: const size_t WireFormatLite::kMessageSetItemTagsSize = - io::CodedOutputStream::StaticVarintSize32< - WireFormatLite::kMessageSetItemStartTag>::value + - io::CodedOutputStream::StaticVarintSize32< - WireFormatLite::kMessageSetItemEndTag>::value + - io::CodedOutputStream::StaticVarintSize32< - WireFormatLite::kMessageSetTypeIdTag>::value + - io::CodedOutputStream::StaticVarintSize32< - WireFormatLite::kMessageSetMessageTag>::value; + io::CodedOutputStream::StaticVarintSize32< + WireFormatLite::kMessageSetItemStartTag>::value + + io::CodedOutputStream::StaticVarintSize32< + WireFormatLite::kMessageSetItemEndTag>::value + + io::CodedOutputStream::StaticVarintSize32< + WireFormatLite::kMessageSetTypeIdTag>::value + + io::CodedOutputStream::StaticVarintSize32< + WireFormatLite::kMessageSetMessageTag>::value; const WireFormatLite::CppType -WireFormatLite::kFieldTypeToCppTypeMap[MAX_FIELD_TYPE + 1] = { - static_cast(0), // 0 is reserved for errors - - CPPTYPE_DOUBLE, // TYPE_DOUBLE - CPPTYPE_FLOAT, // TYPE_FLOAT - CPPTYPE_INT64, // TYPE_INT64 - CPPTYPE_UINT64, // TYPE_UINT64 - CPPTYPE_INT32, // TYPE_INT32 - CPPTYPE_UINT64, // TYPE_FIXED64 - CPPTYPE_UINT32, // TYPE_FIXED32 - CPPTYPE_BOOL, // TYPE_BOOL - CPPTYPE_STRING, // TYPE_STRING - CPPTYPE_MESSAGE, // TYPE_GROUP - CPPTYPE_MESSAGE, // TYPE_MESSAGE - CPPTYPE_STRING, // TYPE_BYTES - CPPTYPE_UINT32, // TYPE_UINT32 - CPPTYPE_ENUM, // TYPE_ENUM - CPPTYPE_INT32, // TYPE_SFIXED32 - CPPTYPE_INT64, // TYPE_SFIXED64 - CPPTYPE_INT32, // TYPE_SINT32 - CPPTYPE_INT64, // TYPE_SINT64 + WireFormatLite::kFieldTypeToCppTypeMap[MAX_FIELD_TYPE + 1] = { + static_cast(0), // 0 is reserved for errors + + CPPTYPE_DOUBLE, // TYPE_DOUBLE + CPPTYPE_FLOAT, // TYPE_FLOAT + CPPTYPE_INT64, // TYPE_INT64 + CPPTYPE_UINT64, // TYPE_UINT64 + CPPTYPE_INT32, // TYPE_INT32 + CPPTYPE_UINT64, // TYPE_FIXED64 + CPPTYPE_UINT32, // TYPE_FIXED32 + CPPTYPE_BOOL, // TYPE_BOOL + CPPTYPE_STRING, // TYPE_STRING + CPPTYPE_MESSAGE, // TYPE_GROUP + CPPTYPE_MESSAGE, // TYPE_MESSAGE + CPPTYPE_STRING, // TYPE_BYTES + CPPTYPE_UINT32, // TYPE_UINT32 + CPPTYPE_ENUM, // TYPE_ENUM + CPPTYPE_INT32, // TYPE_SFIXED32 + CPPTYPE_INT64, // TYPE_SFIXED64 + CPPTYPE_INT32, // TYPE_SINT32 + CPPTYPE_INT64, // TYPE_SINT64 }; const WireFormatLite::WireType -WireFormatLite::kWireTypeForFieldType[MAX_FIELD_TYPE + 1] = { - static_cast(-1), // invalid - WireFormatLite::WIRETYPE_FIXED64, // TYPE_DOUBLE - WireFormatLite::WIRETYPE_FIXED32, // TYPE_FLOAT - WireFormatLite::WIRETYPE_VARINT, // TYPE_INT64 - WireFormatLite::WIRETYPE_VARINT, // TYPE_UINT64 - WireFormatLite::WIRETYPE_VARINT, // TYPE_INT32 - WireFormatLite::WIRETYPE_FIXED64, // TYPE_FIXED64 - WireFormatLite::WIRETYPE_FIXED32, // TYPE_FIXED32 - WireFormatLite::WIRETYPE_VARINT, // TYPE_BOOL - WireFormatLite::WIRETYPE_LENGTH_DELIMITED, // TYPE_STRING - WireFormatLite::WIRETYPE_START_GROUP, // TYPE_GROUP - WireFormatLite::WIRETYPE_LENGTH_DELIMITED, // TYPE_MESSAGE - WireFormatLite::WIRETYPE_LENGTH_DELIMITED, // TYPE_BYTES - WireFormatLite::WIRETYPE_VARINT, // TYPE_UINT32 - WireFormatLite::WIRETYPE_VARINT, // TYPE_ENUM - WireFormatLite::WIRETYPE_FIXED32, // TYPE_SFIXED32 - WireFormatLite::WIRETYPE_FIXED64, // TYPE_SFIXED64 - WireFormatLite::WIRETYPE_VARINT, // TYPE_SINT32 - WireFormatLite::WIRETYPE_VARINT, // TYPE_SINT64 + WireFormatLite::kWireTypeForFieldType[MAX_FIELD_TYPE + 1] = { + static_cast(-1), // invalid + WireFormatLite::WIRETYPE_FIXED64, // TYPE_DOUBLE + WireFormatLite::WIRETYPE_FIXED32, // TYPE_FLOAT + WireFormatLite::WIRETYPE_VARINT, // TYPE_INT64 + WireFormatLite::WIRETYPE_VARINT, // TYPE_UINT64 + WireFormatLite::WIRETYPE_VARINT, // TYPE_INT32 + WireFormatLite::WIRETYPE_FIXED64, // TYPE_FIXED64 + WireFormatLite::WIRETYPE_FIXED32, // TYPE_FIXED32 + WireFormatLite::WIRETYPE_VARINT, // TYPE_BOOL + WireFormatLite::WIRETYPE_LENGTH_DELIMITED, // TYPE_STRING + WireFormatLite::WIRETYPE_START_GROUP, // TYPE_GROUP + WireFormatLite::WIRETYPE_LENGTH_DELIMITED, // TYPE_MESSAGE + WireFormatLite::WIRETYPE_LENGTH_DELIMITED, // TYPE_BYTES + WireFormatLite::WIRETYPE_VARINT, // TYPE_UINT32 + WireFormatLite::WIRETYPE_VARINT, // TYPE_ENUM + WireFormatLite::WIRETYPE_FIXED32, // TYPE_SFIXED32 + WireFormatLite::WIRETYPE_FIXED64, // TYPE_SFIXED64 + WireFormatLite::WIRETYPE_VARINT, // TYPE_SINT32 + WireFormatLite::WIRETYPE_VARINT, // TYPE_SINT64 }; -bool WireFormatLite::SkipField( - io::CodedInputStream* input, uint32 tag) { +bool WireFormatLite::SkipField(io::CodedInputStream* input, uint32 tag) { // Field number 0 is illegal. if (WireFormatLite::GetTagFieldNumber(tag) == 0) return false; switch (WireFormatLite::GetTagWireType(tag)) { @@ -144,9 +145,9 @@ bool WireFormatLite::SkipField( if (!SkipMessage(input)) return false; input->DecrementRecursionDepth(); // Check that the ending tag matched the starting tag. - if (!input->LastTagWas(WireFormatLite::MakeTag( - WireFormatLite::GetTagFieldNumber(tag), - WireFormatLite::WIRETYPE_END_GROUP))) { + if (!input->LastTagWas( + WireFormatLite::MakeTag(WireFormatLite::GetTagFieldNumber(tag), + WireFormatLite::WIRETYPE_END_GROUP))) { return false; } return true; @@ -165,8 +166,8 @@ bool WireFormatLite::SkipField( } } -bool WireFormatLite::SkipField( - io::CodedInputStream* input, uint32 tag, io::CodedOutputStream* output) { +bool WireFormatLite::SkipField(io::CodedInputStream* input, uint32 tag, + io::CodedOutputStream* output) { // Field number 0 is illegal. if (WireFormatLite::GetTagFieldNumber(tag) == 0) return false; switch (WireFormatLite::GetTagWireType(tag)) { @@ -190,7 +191,7 @@ bool WireFormatLite::SkipField( output->WriteVarint32(tag); output->WriteVarint32(length); // TODO(mkilavuz): Provide API to prevent extra string copying. - string temp; + std::string temp; if (!input->ReadString(&temp, length)) return false; output->WriteString(temp); return true; @@ -201,9 +202,9 @@ bool WireFormatLite::SkipField( if (!SkipMessage(input, output)) return false; input->DecrementRecursionDepth(); // Check that the ending tag matched the starting tag. - if (!input->LastTagWas(WireFormatLite::MakeTag( - WireFormatLite::GetTagFieldNumber(tag), - WireFormatLite::WIRETYPE_END_GROUP))) { + if (!input->LastTagWas( + WireFormatLite::MakeTag(WireFormatLite::GetTagFieldNumber(tag), + WireFormatLite::WIRETYPE_END_GROUP))) { return false; } return true; @@ -244,7 +245,7 @@ bool WireFormatLite::SkipMessage(io::CodedInputStream* input) { } bool WireFormatLite::SkipMessage(io::CodedInputStream* input, - io::CodedOutputStream* output) { + io::CodedOutputStream* output) { while (true) { uint32 tag = input->ReadTag(); if (tag == 0) { @@ -264,8 +265,7 @@ bool WireFormatLite::SkipMessage(io::CodedInputStream* input, } } -bool FieldSkipper::SkipField( - io::CodedInputStream* input, uint32 tag) { +bool FieldSkipper::SkipField(io::CodedInputStream* input, uint32 tag) { return WireFormatLite::SkipField(input, tag); } @@ -273,13 +273,12 @@ bool FieldSkipper::SkipMessage(io::CodedInputStream* input) { return WireFormatLite::SkipMessage(input); } -void FieldSkipper::SkipUnknownEnum( - int /* field_number */, int /* value */) { +void FieldSkipper::SkipUnknownEnum(int /* field_number */, int /* value */) { // Nothing. } -bool CodedOutputStreamFieldSkipper::SkipField( - io::CodedInputStream* input, uint32 tag) { +bool CodedOutputStreamFieldSkipper::SkipField(io::CodedInputStream* input, + uint32 tag) { return WireFormatLite::SkipField(input, tag, unknown_fields_); } @@ -287,45 +286,21 @@ bool CodedOutputStreamFieldSkipper::SkipMessage(io::CodedInputStream* input) { return WireFormatLite::SkipMessage(input, unknown_fields_); } -void CodedOutputStreamFieldSkipper::SkipUnknownEnum( - int field_number, int value) { +void CodedOutputStreamFieldSkipper::SkipUnknownEnum(int field_number, + int value) { unknown_fields_->WriteVarint32(field_number); unknown_fields_->WriteVarint64(value); } -bool WireFormatLite::ReadPackedEnumNoInline(io::CodedInputStream* input, - bool (*is_valid)(int), - RepeatedField* values) { - uint32 length; - if (!input->ReadVarint32(&length)) return false; - io::CodedInputStream::Limit limit = input->PushLimit(length); - while (input->BytesUntilLimit() > 0) { - int value; - if (!google::protobuf::internal::WireFormatLite::ReadPrimitive< - int, WireFormatLite::TYPE_ENUM>(input, &value)) { - return false; - } - if (is_valid == NULL || is_valid(value)) { - values->Add(value); - } - } - input->PopLimit(limit); - return true; -} - bool WireFormatLite::ReadPackedEnumPreserveUnknowns( - io::CodedInputStream* input, - int field_number, - bool (*is_valid)(int), - io::CodedOutputStream* unknown_fields_stream, - RepeatedField* values) { + io::CodedInputStream* input, int field_number, bool (*is_valid)(int), + io::CodedOutputStream* unknown_fields_stream, RepeatedField* values) { uint32 length; if (!input->ReadVarint32(&length)) return false; io::CodedInputStream::Limit limit = input->PushLimit(length); while (input->BytesUntilLimit() > 0) { int value; - if (!google::protobuf::internal::WireFormatLite::ReadPrimitive< - int, WireFormatLite::TYPE_ENUM>(input, &value)) { + if (!ReadPrimitive(input, &value)) { return false; } if (is_valid == NULL || is_valid(value)) { @@ -386,7 +361,7 @@ static void WriteArray(const CType* a, int n, io::CodedOutputStream* output) { int to_do = std::min(kAtATime, n - i); uint8* ptr = buf; for (int j = 0; j < to_do; j++) { - EncodeFixedSizeValue(a[i+j], ptr); + EncodeFixedSizeValue(a[i + j], ptr); ptr += sizeof(a[0]); } output->WriteRaw(buf, to_do * sizeof(a[0])); @@ -400,12 +375,12 @@ void WireFormatLite::WriteFloatArray(const float* a, int n, } void WireFormatLite::WriteDoubleArray(const double* a, int n, - io::CodedOutputStream* output) { + io::CodedOutputStream* output) { WriteArray(a, n, output); } void WireFormatLite::WriteFixed32Array(const uint32* a, int n, - io::CodedOutputStream* output) { + io::CodedOutputStream* output) { WriteArray(a, n, output); } @@ -415,12 +390,12 @@ void WireFormatLite::WriteFixed64Array(const uint64* a, int n, } void WireFormatLite::WriteSFixed32Array(const int32* a, int n, - io::CodedOutputStream* output) { + io::CodedOutputStream* output) { WriteArray(a, n, output); } void WireFormatLite::WriteSFixed64Array(const int64* a, int n, - io::CodedOutputStream* output) { + io::CodedOutputStream* output) { WriteArray(a, n, output); } @@ -500,7 +475,7 @@ void WireFormatLite::WriteEnum(int field_number, int value, WriteEnumNoTag(value, output); } -void WireFormatLite::WriteString(int field_number, const string& value, +void WireFormatLite::WriteString(int field_number, const std::string& value, io::CodedOutputStream* output) { // String is for UTF-8 text only WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output); @@ -508,25 +483,25 @@ void WireFormatLite::WriteString(int field_number, const string& value, output->WriteVarint32(value.size()); output->WriteString(value); } -void WireFormatLite::WriteStringMaybeAliased( - int field_number, const string& value, - io::CodedOutputStream* output) { +void WireFormatLite::WriteStringMaybeAliased(int field_number, + const std::string& value, + io::CodedOutputStream* output) { // String is for UTF-8 text only WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output); GOOGLE_CHECK_LE(value.size(), kint32max); output->WriteVarint32(value.size()); output->WriteRawMaybeAliased(value.data(), value.size()); } -void WireFormatLite::WriteBytes(int field_number, const string& value, +void WireFormatLite::WriteBytes(int field_number, const std::string& value, io::CodedOutputStream* output) { WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output); GOOGLE_CHECK_LE(value.size(), kint32max); output->WriteVarint32(value.size()); output->WriteString(value); } -void WireFormatLite::WriteBytesMaybeAliased( - int field_number, const string& value, - io::CodedOutputStream* output) { +void WireFormatLite::WriteBytesMaybeAliased(int field_number, + const std::string& value, + io::CodedOutputStream* output) { WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output); GOOGLE_CHECK_LE(value.size(), kint32max); output->WriteVarint32(value.size()); @@ -534,16 +509,14 @@ void WireFormatLite::WriteBytesMaybeAliased( } -void WireFormatLite::WriteGroup(int field_number, - const MessageLite& value, +void WireFormatLite::WriteGroup(int field_number, const MessageLite& value, io::CodedOutputStream* output) { WriteTag(field_number, WIRETYPE_START_GROUP, output); value.SerializeWithCachedSizes(output); WriteTag(field_number, WIRETYPE_END_GROUP, output); } -void WireFormatLite::WriteMessage(int field_number, - const MessageLite& value, +void WireFormatLite::WriteMessage(int field_number, const MessageLite& value, io::CodedOutputStream* output) { WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output); const int size = value.GetCachedSize(); @@ -551,19 +524,17 @@ void WireFormatLite::WriteMessage(int field_number, value.SerializeWithCachedSizes(output); } +void WireFormatLite::WriteSubMessageMaybeToArray( + int size, const MessageLite& value, io::CodedOutputStream* output) { + output->SetCur(value._InternalSerialize(output->Cur(), output->EpsCopy())); +} + void WireFormatLite::WriteGroupMaybeToArray(int field_number, const MessageLite& value, io::CodedOutputStream* output) { WriteTag(field_number, WIRETYPE_START_GROUP, output); const int size = value.GetCachedSize(); - uint8* target = output->GetDirectBufferForNBytesAndAdvance(size); - if (target != NULL) { - uint8* end = value.InternalSerializeWithCachedSizesToArray( - output->IsSerializationDeterministic(), target); - GOOGLE_DCHECK_EQ(end - target, size); - } else { - value.SerializeWithCachedSizes(output); - } + WriteSubMessageMaybeToArray(size, value, output); WriteTag(field_number, WIRETYPE_END_GROUP, output); } @@ -573,39 +544,43 @@ void WireFormatLite::WriteMessageMaybeToArray(int field_number, WriteTag(field_number, WIRETYPE_LENGTH_DELIMITED, output); const int size = value.GetCachedSize(); output->WriteVarint32(size); - uint8* target = output->GetDirectBufferForNBytesAndAdvance(size); - if (target != NULL) { - uint8* end = value.InternalSerializeWithCachedSizesToArray( - output->IsSerializationDeterministic(), target); - GOOGLE_DCHECK_EQ(end - target, size); - } else { - value.SerializeWithCachedSizes(output); - } + WriteSubMessageMaybeToArray(size, value, output); } -GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE static bool ReadBytesToString( - io::CodedInputStream* input, string* value); +PROTOBUF_ALWAYS_INLINE static bool ReadBytesToString( + io::CodedInputStream* input, std::string* value); inline static bool ReadBytesToString(io::CodedInputStream* input, - string* value) { + std::string* value) { uint32 length; - return input->ReadVarint32(&length) && - input->InternalReadStringInline(value, length); + return input->ReadVarint32(&length) && input->ReadString(value, length); } -bool WireFormatLite::ReadBytes(io::CodedInputStream* input, string* value) { +bool WireFormatLite::ReadBytes(io::CodedInputStream* input, + std::string* value) { return ReadBytesToString(input, value); } -bool WireFormatLite::ReadBytes(io::CodedInputStream* input, string** p) { - if (*p == &::google::protobuf::internal::GetEmptyStringAlreadyInited()) { - *p = new ::std::string(); +bool WireFormatLite::ReadBytes(io::CodedInputStream* input, std::string** p) { + if (*p == &GetEmptyStringAlreadyInited()) { + *p = new std::string(); } return ReadBytesToString(input, *p); } -bool WireFormatLite::VerifyUtf8String(const char* data, - int size, - Operation op, +void PrintUTF8ErrorLog(const char* field_name, const char* operation_str, + bool emit_stacktrace) { + std::string stacktrace; + std::string quoted_field_name = ""; + if (field_name != nullptr) { + quoted_field_name = StringPrintf(" '%s'", field_name); + } + GOOGLE_LOG(ERROR) << "String field" << quoted_field_name << " contains invalid " + << "UTF-8 data when " << operation_str << " a protocol " + << "buffer. Use the 'bytes' type if you intend to send raw " + << "bytes. " << stacktrace; +} + +bool WireFormatLite::VerifyUtf8String(const char* data, int size, Operation op, const char* field_name) { if (!IsStructurallyValidUTF8(data, size)) { const char* operation_str = NULL; @@ -616,17 +591,9 @@ bool WireFormatLite::VerifyUtf8String(const char* data, case SERIALIZE: operation_str = "serializing"; break; - // no default case: have the compiler warn if a case is not covered. + // no default case: have the compiler warn if a case is not covered. } - string quoted_field_name = ""; - if (field_name != NULL) { - quoted_field_name = StringPrintf(" '%s'", field_name); - } - // no space below to avoid double space when the field name is missing. - GOOGLE_LOG(ERROR) << "String field" << quoted_field_name << " contains invalid " - << "UTF-8 data when " << operation_str << " a protocol " - << "buffer. Use the 'bytes' type if you intend to send raw " - << "bytes. "; + PrintUTF8ErrorLog(field_name, operation_str, false); return false; } return true; @@ -634,21 +601,19 @@ bool WireFormatLite::VerifyUtf8String(const char* data, // this code is deliberately written such that clang makes it into really // efficient SSE code. -template +template static size_t VarintSize(const T* data, const int n) { -#if __cplusplus >= 201103L static_assert(sizeof(T) == 4, "This routine only works for 32 bit integers"); // is_unsigned => !ZigZag - static_assert((std::is_unsigned::value ^ ZigZag) || - std::is_signed::value, - "Cannot ZigZag encode unsigned types"); + static_assert( + (std::is_unsigned::value ^ ZigZag) || std::is_signed::value, + "Cannot ZigZag encode unsigned types"); // is_unsigned => !SignExtended - static_assert((std::is_unsigned::value ^ SignExtended) || - std::is_signed::value, - "Cannot SignExtended unsigned types"); + static_assert( + (std::is_unsigned::value ^ SignExtended) || std::is_signed::value, + "Cannot SignExtended unsigned types"); static_assert(!(SignExtended && ZigZag), "Cannot SignExtended and ZigZag on the same type"); -#endif uint32 sum = n; uint32 msb_sum = 0; for (int i = 0; i < n; i++) { @@ -671,14 +636,12 @@ static size_t VarintSize(const T* data, const int n) { return sum; } -template +template static size_t VarintSize64(const T* data, const int n) { -#if __cplusplus >= 201103L static_assert(sizeof(T) == 8, "This routine only works for 64 bit integers"); // is_unsigned => !ZigZag static_assert(!ZigZag || !std::is_unsigned::value, "Cannot ZigZag encode unsigned types"); -#endif uint64 sum = n; for (int i = 0; i < n; i++) { uint64 x = data[i]; @@ -767,7 +730,7 @@ size_t WireFormatLite::EnumSize(const RepeatedField& value) { // enable this. #define USE_SSE_FOR_64_BIT_INTEGER_ARRAYS 0 #if USE_SSE_FOR_64_BIT_INTEGER_ARRAYS -size_t WireFormatLite::Int64Size (const RepeatedField< int64>& value) { +size_t WireFormatLite::Int64Size(const RepeatedField& value) { return VarintSize64(value.data(), value.size()); } @@ -775,13 +738,13 @@ size_t WireFormatLite::UInt64Size(const RepeatedField& value) { return VarintSize64(value.data(), value.size()); } -size_t WireFormatLite::SInt64Size(const RepeatedField< int64>& value) { +size_t WireFormatLite::SInt64Size(const RepeatedField& value) { return VarintSize64(value.data(), value.size()); } #else -size_t WireFormatLite::Int64Size (const RepeatedField< int64>& value) { +size_t WireFormatLite::Int64Size(const RepeatedField& value) { size_t out = 0; const int n = value.size(); for (int i = 0; i < n; i++) { @@ -799,7 +762,7 @@ size_t WireFormatLite::UInt64Size(const RepeatedField& value) { return out; } -size_t WireFormatLite::SInt64Size(const RepeatedField< int64>& value) { +size_t WireFormatLite::SInt64Size(const RepeatedField& value) { size_t out = 0; const int n = value.size(); for (int i = 0; i < n; i++) { diff --git a/third_party/protobuf-lite/zero_copy_stream_impl.cc b/third_party/protobuf-lite/zero_copy_stream_impl.cc new file mode 100644 index 00000000..52617e9e --- /dev/null +++ b/third_party/protobuf-lite/zero_copy_stream_impl.cc @@ -0,0 +1,366 @@ +// Protocol Buffers - Google's data interchange format +// Copyright 2008 Google Inc. All rights reserved. +// https://developers.google.com/protocol-buffers/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Author: kenton@google.com (Kenton Varda) +// Based on original Protocol Buffers design by +// Sanjay Ghemawat, Jeff Dean, and others. + +#ifndef _MSC_VER +#include +#include +#include +#include +#endif +#include + +#include +#include + +#include +#include +#include +#include +#include + + +namespace google { +namespace protobuf { +namespace io { + +#ifdef _WIN32 +// Win32 lseek is broken: If invoked on a non-seekable file descriptor, its +// return value is undefined. We re-define it to always produce an error. +#define lseek(fd, offset, origin) ((off_t)-1) +// DO NOT include , instead create functions in io_win32.{h,cc} and import +// them like we do below. +using google::protobuf::io::win32::access; +using google::protobuf::io::win32::close; +using google::protobuf::io::win32::open; +using google::protobuf::io::win32::read; +using google::protobuf::io::win32::write; +#endif + +namespace { + +// EINTR sucks. +int close_no_eintr(int fd) { + int result; + do { + result = close(fd); + } while (result < 0 && errno == EINTR); + return result; +} + +} // namespace + +// =================================================================== + +FileInputStream::FileInputStream(int file_descriptor, int block_size) + : copying_input_(file_descriptor), impl_(©ing_input_, block_size) {} + +bool FileInputStream::Close() { return copying_input_.Close(); } + +bool FileInputStream::Next(const void** data, int* size) { + return impl_.Next(data, size); +} + +void FileInputStream::BackUp(int count) { impl_.BackUp(count); } + +bool FileInputStream::Skip(int count) { return impl_.Skip(count); } + +int64_t FileInputStream::ByteCount() const { return impl_.ByteCount(); } + +FileInputStream::CopyingFileInputStream::CopyingFileInputStream( + int file_descriptor) + : file_(file_descriptor), + close_on_delete_(false), + is_closed_(false), + errno_(0), + previous_seek_failed_(false) {} + +FileInputStream::CopyingFileInputStream::~CopyingFileInputStream() { + if (close_on_delete_) { + if (!Close()) { + GOOGLE_LOG(ERROR) << "close() failed: " << strerror(errno_); + } + } +} + +bool FileInputStream::CopyingFileInputStream::Close() { + GOOGLE_CHECK(!is_closed_); + + is_closed_ = true; + if (close_no_eintr(file_) != 0) { + // The docs on close() do not specify whether a file descriptor is still + // open after close() fails with EIO. However, the glibc source code + // seems to indicate that it is not. + errno_ = errno; + return false; + } + + return true; +} + +int FileInputStream::CopyingFileInputStream::Read(void* buffer, int size) { + GOOGLE_CHECK(!is_closed_); + + int result; + do { + result = read(file_, buffer, size); + } while (result < 0 && errno == EINTR); + + if (result < 0) { + // Read error (not EOF). + errno_ = errno; + } + + return result; +} + +int FileInputStream::CopyingFileInputStream::Skip(int count) { + GOOGLE_CHECK(!is_closed_); + + if (!previous_seek_failed_ && lseek(file_, count, SEEK_CUR) != (off_t)-1) { + // Seek succeeded. + return count; + } else { + // Failed to seek. + + // Note to self: Don't seek again. This file descriptor doesn't + // support it. + previous_seek_failed_ = true; + + // Use the default implementation. + return CopyingInputStream::Skip(count); + } +} + +// =================================================================== + +FileOutputStream::FileOutputStream(int file_descriptor, int block_size) + : CopyingOutputStreamAdaptor(©ing_output_), + copying_output_(file_descriptor) {} + +bool FileOutputStream::Close() { + bool flush_succeeded = Flush(); + return copying_output_.Close() && flush_succeeded; +} + +FileOutputStream::CopyingFileOutputStream::CopyingFileOutputStream( + int file_descriptor) + : file_(file_descriptor), + close_on_delete_(false), + is_closed_(false), + errno_(0) {} + +FileOutputStream::~FileOutputStream() { Flush(); } + +FileOutputStream::CopyingFileOutputStream::~CopyingFileOutputStream() { + if (close_on_delete_) { + if (!Close()) { + GOOGLE_LOG(ERROR) << "close() failed: " << strerror(errno_); + } + } +} + +bool FileOutputStream::CopyingFileOutputStream::Close() { + GOOGLE_CHECK(!is_closed_); + + is_closed_ = true; + if (close_no_eintr(file_) != 0) { + // The docs on close() do not specify whether a file descriptor is still + // open after close() fails with EIO. However, the glibc source code + // seems to indicate that it is not. + errno_ = errno; + return false; + } + + return true; +} + +bool FileOutputStream::CopyingFileOutputStream::Write(const void* buffer, + int size) { + GOOGLE_CHECK(!is_closed_); + int total_written = 0; + + const uint8* buffer_base = reinterpret_cast(buffer); + + while (total_written < size) { + int bytes; + do { + bytes = write(file_, buffer_base + total_written, size - total_written); + } while (bytes < 0 && errno == EINTR); + + if (bytes <= 0) { + // Write error. + + // FIXME(kenton): According to the man page, if write() returns zero, + // there was no error; write() simply did not write anything. It's + // unclear under what circumstances this might happen, but presumably + // errno won't be set in this case. I am confused as to how such an + // event should be handled. For now I'm treating it as an error, since + // retrying seems like it could lead to an infinite loop. I suspect + // this never actually happens anyway. + + if (bytes < 0) { + errno_ = errno; + } + return false; + } + total_written += bytes; + } + + return true; +} + +// =================================================================== + +IstreamInputStream::IstreamInputStream(std::istream* input, int block_size) + : copying_input_(input), impl_(©ing_input_, block_size) {} + +bool IstreamInputStream::Next(const void** data, int* size) { + return impl_.Next(data, size); +} + +void IstreamInputStream::BackUp(int count) { impl_.BackUp(count); } + +bool IstreamInputStream::Skip(int count) { return impl_.Skip(count); } + +int64_t IstreamInputStream::ByteCount() const { return impl_.ByteCount(); } + +IstreamInputStream::CopyingIstreamInputStream::CopyingIstreamInputStream( + std::istream* input) + : input_(input) {} + +IstreamInputStream::CopyingIstreamInputStream::~CopyingIstreamInputStream() {} + +int IstreamInputStream::CopyingIstreamInputStream::Read(void* buffer, + int size) { + input_->read(reinterpret_cast(buffer), size); + int result = input_->gcount(); + if (result == 0 && input_->fail() && !input_->eof()) { + return -1; + } + return result; +} + +// =================================================================== + +OstreamOutputStream::OstreamOutputStream(std::ostream* output, int block_size) + : copying_output_(output), impl_(©ing_output_, block_size) {} + +OstreamOutputStream::~OstreamOutputStream() { impl_.Flush(); } + +bool OstreamOutputStream::Next(void** data, int* size) { + return impl_.Next(data, size); +} + +void OstreamOutputStream::BackUp(int count) { impl_.BackUp(count); } + +int64_t OstreamOutputStream::ByteCount() const { return impl_.ByteCount(); } + +OstreamOutputStream::CopyingOstreamOutputStream::CopyingOstreamOutputStream( + std::ostream* output) + : output_(output) {} + +OstreamOutputStream::CopyingOstreamOutputStream::~CopyingOstreamOutputStream() { +} + +bool OstreamOutputStream::CopyingOstreamOutputStream::Write(const void* buffer, + int size) { + output_->write(reinterpret_cast(buffer), size); + return output_->good(); +} + +// =================================================================== + +ConcatenatingInputStream::ConcatenatingInputStream( + ZeroCopyInputStream* const streams[], int count) + : streams_(streams), stream_count_(count), bytes_retired_(0) { +} + +bool ConcatenatingInputStream::Next(const void** data, int* size) { + while (stream_count_ > 0) { + if (streams_[0]->Next(data, size)) return true; + + // That stream is done. Advance to the next one. + bytes_retired_ += streams_[0]->ByteCount(); + ++streams_; + --stream_count_; + } + + // No more streams. + return false; +} + +void ConcatenatingInputStream::BackUp(int count) { + if (stream_count_ > 0) { + streams_[0]->BackUp(count); + } else { + GOOGLE_LOG(DFATAL) << "Can't BackUp() after failed Next()."; + } +} + +bool ConcatenatingInputStream::Skip(int count) { + while (stream_count_ > 0) { + // Assume that ByteCount() can be used to find out how much we actually + // skipped when Skip() fails. + int64 target_byte_count = streams_[0]->ByteCount() + count; + if (streams_[0]->Skip(count)) return true; + + // Hit the end of the stream. Figure out how many more bytes we still have + // to skip. + int64 final_byte_count = streams_[0]->ByteCount(); + GOOGLE_DCHECK_LT(final_byte_count, target_byte_count); + count = target_byte_count - final_byte_count; + + // That stream is done. Advance to the next one. + bytes_retired_ += final_byte_count; + ++streams_; + --stream_count_; + } + + return false; +} + +int64_t ConcatenatingInputStream::ByteCount() const { + if (stream_count_ == 0) { + return bytes_retired_; + } else { + return bytes_retired_ + streams_[0]->ByteCount(); + } +} + + +// =================================================================== + +} // namespace io +} // namespace protobuf +} // namespace google diff --git a/third_party/protobuf-lite/zero_copy_stream_impl_lite.cc b/third_party/protobuf-lite/zero_copy_stream_impl_lite.cc index 66ad49bc..54c5db94 100644 --- a/third_party/protobuf-lite/zero_copy_stream_impl_lite.cc +++ b/third_party/protobuf-lite/zero_copy_stream_impl_lite.cc @@ -37,9 +37,9 @@ #include #include -#include #include #include +#include #include namespace google { @@ -55,14 +55,12 @@ static const int kDefaultBlockSize = 8192; // =================================================================== -ArrayInputStream::ArrayInputStream(const void* data, int size, - int block_size) - : data_(reinterpret_cast(data)), - size_(size), - block_size_(block_size > 0 ? block_size : size), - position_(0), - last_returned_size_(0) { -} +ArrayInputStream::ArrayInputStream(const void* data, int size, int block_size) + : data_(reinterpret_cast(data)), + size_(size), + block_size_(block_size > 0 ? block_size : size), + position_(0), + last_returned_size_(0) {} bool ArrayInputStream::Next(const void** data, int* size) { if (position_ < size_) { @@ -73,7 +71,7 @@ bool ArrayInputStream::Next(const void** data, int* size) { return true; } else { // We're at the end of the array. - last_returned_size_ = 0; // Don't let caller back up. + last_returned_size_ = 0; // Don't let caller back up. return false; } } @@ -89,7 +87,7 @@ void ArrayInputStream::BackUp(int count) { bool ArrayInputStream::Skip(int count) { GOOGLE_CHECK_GE(count, 0); - last_returned_size_ = 0; // Don't let caller back up. + last_returned_size_ = 0; // Don't let caller back up. if (count > size_ - position_) { position_ = size_; return false; @@ -99,20 +97,17 @@ bool ArrayInputStream::Skip(int count) { } } -int64 ArrayInputStream::ByteCount() const { - return position_; -} +int64_t ArrayInputStream::ByteCount() const { return position_; } // =================================================================== ArrayOutputStream::ArrayOutputStream(void* data, int size, int block_size) - : data_(reinterpret_cast(data)), - size_(size), - block_size_(block_size > 0 ? block_size : size), - position_(0), - last_returned_size_(0) { -} + : data_(reinterpret_cast(data)), + size_(size), + block_size_(block_size > 0 ? block_size : size), + position_(0), + last_returned_size_(0) {} bool ArrayOutputStream::Next(void** data, int* size) { if (position_ < size_) { @@ -123,7 +118,7 @@ bool ArrayOutputStream::Next(void** data, int* size) { return true; } else { // We're at the end of the array. - last_returned_size_ = 0; // Don't let caller back up. + last_returned_size_ = 0; // Don't let caller back up. return false; } } @@ -137,41 +132,33 @@ void ArrayOutputStream::BackUp(int count) { last_returned_size_ = 0; // Don't let caller back up further. } -int64 ArrayOutputStream::ByteCount() const { - return position_; -} +int64_t ArrayOutputStream::ByteCount() const { return position_; } // =================================================================== -StringOutputStream::StringOutputStream(string* target) - : target_(target) { -} +StringOutputStream::StringOutputStream(std::string* target) : target_(target) {} bool StringOutputStream::Next(void** data, int* size) { GOOGLE_CHECK(target_ != NULL); - int old_size = target_->size(); + size_t old_size = target_->size(); // Grow the string. + size_t new_size; if (old_size < target_->capacity()) { // Resize the string to match its capacity, since we can get away // without a memory allocation this way. - STLStringResizeUninitialized(target_, target_->capacity()); + new_size = target_->capacity(); } else { - // Size has reached capacity, try to double the size. - if (old_size > std::numeric_limits::max() / 2) { - // Can not double the size otherwise it is going to cause integer - // overflow in the expression below: old_size * 2 "; - GOOGLE_LOG(ERROR) << "Cannot allocate buffer larger than kint32max for " - << "StringOutputStream."; - return false; - } - // Double the size, also make sure that the new size is at least - // kMinimumSize. - STLStringResizeUninitialized( - target_, - std::max(old_size * 2, - kMinimumSize + 0)); // "+ 0" works around GCC4 weirdness. + // Size has reached capacity, try to double it. + new_size = old_size * 2; } + // Avoid integer overflow in returned '*size'. + new_size = std::min(new_size, old_size + std::numeric_limits::max()); + // Increase the size, also make sure that it is at least kMinimumSize. + STLStringResizeUninitialized( + target_, + std::max(new_size, + kMinimumSize + 0)); // "+ 0" works around GCC4 weirdness. *data = mutable_string_data(target_) + old_size; *size = target_->size() - old_size; @@ -185,23 +172,19 @@ void StringOutputStream::BackUp(int count) { target_->resize(target_->size() - count); } -int64 StringOutputStream::ByteCount() const { +int64_t StringOutputStream::ByteCount() const { GOOGLE_CHECK(target_ != NULL); return target_->size(); } -void StringOutputStream::SetString(string* target) { - target_ = target; -} - // =================================================================== int CopyingInputStream::Skip(int count) { char junk[4096]; int skipped = 0; while (skipped < count) { - int bytes = - Read(junk, std::min(count - skipped, implicit_cast(sizeof(junk)))); + int bytes = Read(junk, std::min(count - skipped, + implicit_cast(sizeof(junk)))); if (bytes <= 0) { // EOF or read error. return skipped; @@ -213,14 +196,13 @@ int CopyingInputStream::Skip(int count) { CopyingInputStreamAdaptor::CopyingInputStreamAdaptor( CopyingInputStream* copying_stream, int block_size) - : copying_stream_(copying_stream), - owns_copying_stream_(false), - failed_(false), - position_(0), - buffer_size_(block_size > 0 ? block_size : kDefaultBlockSize), - buffer_used_(0), - backup_bytes_(0) { -} + : copying_stream_(copying_stream), + owns_copying_stream_(false), + failed_(false), + position_(0), + buffer_size_(block_size > 0 ? block_size : kDefaultBlockSize), + buffer_used_(0), + backup_bytes_(0) {} CopyingInputStreamAdaptor::~CopyingInputStreamAdaptor() { if (owns_copying_stream_) { @@ -264,12 +246,11 @@ bool CopyingInputStreamAdaptor::Next(const void** data, int* size) { void CopyingInputStreamAdaptor::BackUp(int count) { GOOGLE_CHECK(backup_bytes_ == 0 && buffer_.get() != NULL) - << " BackUp() can only be called after Next()."; + << " BackUp() can only be called after Next()."; GOOGLE_CHECK_LE(count, buffer_used_) - << " Can't back up over more bytes than were returned by the last call" - " to Next()."; - GOOGLE_CHECK_GE(count, 0) - << " Parameter to BackUp() can't be negative."; + << " Can't back up over more bytes than were returned by the last call" + " to Next()."; + GOOGLE_CHECK_GE(count, 0) << " Parameter to BackUp() can't be negative."; backup_bytes_ = count; } @@ -297,7 +278,7 @@ bool CopyingInputStreamAdaptor::Skip(int count) { return skipped == count; } -int64 CopyingInputStreamAdaptor::ByteCount() const { +int64_t CopyingInputStreamAdaptor::ByteCount() const { return position_ - backup_bytes_; } @@ -317,13 +298,12 @@ void CopyingInputStreamAdaptor::FreeBuffer() { CopyingOutputStreamAdaptor::CopyingOutputStreamAdaptor( CopyingOutputStream* copying_stream, int block_size) - : copying_stream_(copying_stream), - owns_copying_stream_(false), - failed_(false), - position_(0), - buffer_size_(block_size > 0 ? block_size : kDefaultBlockSize), - buffer_used_(0) { -} + : copying_stream_(copying_stream), + owns_copying_stream_(false), + failed_(false), + position_(0), + buffer_size_(block_size > 0 ? block_size : kDefaultBlockSize), + buffer_used_(0) {} CopyingOutputStreamAdaptor::~CopyingOutputStreamAdaptor() { WriteBuffer(); @@ -332,9 +312,7 @@ CopyingOutputStreamAdaptor::~CopyingOutputStreamAdaptor() { } } -bool CopyingOutputStreamAdaptor::Flush() { - return WriteBuffer(); -} +bool CopyingOutputStreamAdaptor::Flush() { return WriteBuffer(); } bool CopyingOutputStreamAdaptor::Next(void** data, int* size) { if (buffer_used_ == buffer_size_) { @@ -352,18 +330,49 @@ bool CopyingOutputStreamAdaptor::Next(void** data, int* size) { void CopyingOutputStreamAdaptor::BackUp(int count) { GOOGLE_CHECK_GE(count, 0); GOOGLE_CHECK_EQ(buffer_used_, buffer_size_) - << " BackUp() can only be called after Next()."; + << " BackUp() can only be called after Next()."; GOOGLE_CHECK_LE(count, buffer_used_) - << " Can't back up over more bytes than were returned by the last call" - " to Next()."; + << " Can't back up over more bytes than were returned by the last call" + " to Next()."; buffer_used_ -= count; } -int64 CopyingOutputStreamAdaptor::ByteCount() const { +int64_t CopyingOutputStreamAdaptor::ByteCount() const { return position_ + buffer_used_; } +bool CopyingOutputStreamAdaptor::WriteAliasedRaw(const void* data, int size) { + if (size >= buffer_size_) { + if (!Flush() || !copying_stream_->Write(data, size)) { + return false; + } + GOOGLE_DCHECK_EQ(buffer_used_, 0); + position_ += size; + return true; + } + + void* out; + int out_size; + while (true) { + if (!Next(&out, &out_size)) { + return false; + } + + if (size <= out_size) { + std::memcpy(out, data, size); + BackUp(out_size - size); + return true; + } + + std::memcpy(out, data, out_size); + data = static_cast(data) + out_size; + size -= out_size; + } + return true; +} + + bool CopyingOutputStreamAdaptor::WriteBuffer() { if (failed_) { // Already failed on a previous write. @@ -394,6 +403,63 @@ void CopyingOutputStreamAdaptor::FreeBuffer() { buffer_.reset(); } +// =================================================================== + +LimitingInputStream::LimitingInputStream(ZeroCopyInputStream* input, + int64 limit) + : input_(input), limit_(limit) { + prior_bytes_read_ = input_->ByteCount(); +} + +LimitingInputStream::~LimitingInputStream() { + // If we overshot the limit, back up. + if (limit_ < 0) input_->BackUp(-limit_); +} + +bool LimitingInputStream::Next(const void** data, int* size) { + if (limit_ <= 0) return false; + if (!input_->Next(data, size)) return false; + + limit_ -= *size; + if (limit_ < 0) { + // We overshot the limit. Reduce *size to hide the rest of the buffer. + *size += limit_; + } + return true; +} + +void LimitingInputStream::BackUp(int count) { + if (limit_ < 0) { + input_->BackUp(count - limit_); + limit_ = count; + } else { + input_->BackUp(count); + limit_ += count; + } +} + +bool LimitingInputStream::Skip(int count) { + if (count > limit_) { + if (limit_ < 0) return false; + input_->Skip(limit_); + limit_ = 0; + return false; + } else { + if (!input_->Skip(count)) return false; + limit_ -= count; + return true; + } +} + +int64_t LimitingInputStream::ByteCount() const { + if (limit_ < 0) { + return input_->ByteCount() + limit_ - prior_bytes_read_; + } else { + return input_->ByteCount() - prior_bytes_read_; + } +} + + // =================================================================== } // namespace io