From e43bf93eb6c4b7969debb9f7e5cccdd5112bb547 Mon Sep 17 00:00:00 2001 From: zhoushunjie Date: Thu, 27 Jan 2022 10:12:47 +0800 Subject: [PATCH 01/48] add string tensor and case convert kernels --- cmake/pten.cmake | 23 +- paddle/pten/CMakeLists.txt | 2 +- paddle/pten/api/lib/utils/CMakeLists.txt | 2 +- paddle/pten/api/lib/utils/allocator.h | 24 + paddle/pten/common/cpstring_internal.h | 553 ++ paddle/pten/common/data_type.h | 43 +- paddle/pten/common/pstring.h | 476 ++ paddle/pten/core/CMakeLists.txt | 1 + paddle/pten/core/kernel_utils.h | 8 +- paddle/pten/core/string_tensor.cc | 95 + paddle/pten/core/string_tensor.h | 155 + paddle/pten/core/tensor_meta.cc | 14 + paddle/pten/core/tensor_meta.h | 14 + paddle/pten/infermeta/unary.cc | 4 + paddle/pten/infermeta/unary.h | 3 + paddle/pten/kernels/CMakeLists.txt | 3 + paddle/pten/kernels/strings/CMakeLists.txt | 16 + .../kernels/strings/case_convert_kernel.h | 84 + paddle/pten/kernels/strings/case_utils.h | 148 + paddle/pten/kernels/strings/charcases_flag.h | 5981 +++++++++++++++++ .../pten/kernels/strings/cpu/CMakeLists.txt | 0 .../strings/cpu/case_convert_kernel.cc | 63 + .../pten/kernels/strings/gpu/CMakeLists.txt | 0 .../strings/gpu/case_convert_kernel.cu | 100 + paddle/pten/kernels/strings/unicode.cc | 96 + paddle/pten/kernels/strings/unicode.h | 201 + paddle/pten/kernels/strings/unicode_flag.h | 3473 ++++++++++ paddle/pten/tests/core/CMakeLists.txt | 1 + paddle/pten/tests/core/test_string_tensor.cc | 60 + paddle/pten/tests/kernels/CMakeLists.txt | 8 + .../test_strings_case_convert_dev_api.cc | 127 + 31 files changed, 11750 insertions(+), 28 deletions(-) create mode 100644 paddle/pten/common/cpstring_internal.h create mode 100644 paddle/pten/common/pstring.h create mode 100644 paddle/pten/core/string_tensor.cc create mode 100644 paddle/pten/core/string_tensor.h create mode 100644 paddle/pten/kernels/strings/CMakeLists.txt create mode 100644 paddle/pten/kernels/strings/case_convert_kernel.h create mode 100644 paddle/pten/kernels/strings/case_utils.h create mode 100644 paddle/pten/kernels/strings/charcases_flag.h create mode 100644 paddle/pten/kernels/strings/cpu/CMakeLists.txt create mode 100644 paddle/pten/kernels/strings/cpu/case_convert_kernel.cc create mode 100644 paddle/pten/kernels/strings/gpu/CMakeLists.txt create mode 100644 paddle/pten/kernels/strings/gpu/case_convert_kernel.cu create mode 100644 paddle/pten/kernels/strings/unicode.cc create mode 100644 paddle/pten/kernels/strings/unicode.h create mode 100644 paddle/pten/kernels/strings/unicode_flag.h create mode 100644 paddle/pten/tests/core/test_string_tensor.cc create mode 100644 paddle/pten/tests/kernels/test_strings_case_convert_dev_api.cc diff --git a/cmake/pten.cmake b/cmake/pten.cmake index 8e1d233986209b..45e85ac699aaa0 100644 --- a/cmake/pten.cmake +++ b/cmake/pten.cmake @@ -92,7 +92,7 @@ function(kernel_library TARGET) set(all_srcs) set(kernel_deps) - set(oneValueArgs "") + set(oneValueArgs SUB_DIR) set(multiValueArgs SRCS DEPS) cmake_parse_arguments(kernel_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) @@ -130,9 +130,17 @@ function(kernel_library TARGET) list(APPEND all_srcs ${xpu_srcs}) foreach(src ${all_srcs}) file(READ ${src} target_content) - string(REGEX MATCHALL "#include \"paddle\/pten\/kernels\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content}) + if ("${kernel_library_SUB_DIR}" STREQUAL "") + string(REGEX MATCHALL "#include \"paddle\/pten\/kernels\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content}) + else() + string(REGEX MATCHALL "#include \"paddle\/pten\/kernels\/${kernel_library_SUB_DIR}\/[a-z0-9_]+_kernel.h\"" include_kernels ${target_content}) + endif() foreach(include_kernel ${include_kernels}) - string(REGEX REPLACE "#include \"paddle\/pten\/kernels\/" "" kernel_name ${include_kernel}) + if ("${kernel_library_SUB_DIR}" STREQUAL "") + string(REGEX REPLACE "#include \"paddle\/pten\/kernels\/" "" kernel_name ${include_kernel}) + else() + string(REGEX REPLACE "#include \"paddle\/pten\/kernels\/${kernel_library_SUB_DIR}\/" "" kernel_name ${include_kernel}) + endif() string(REGEX REPLACE ".h\"" "" kernel_name ${kernel_name}) list(APPEND kernel_deps ${kernel_name}) endforeach() @@ -195,7 +203,6 @@ function(kernel_library TARGET) endif() endif() endif() - if (${common_srcs_len} GREATER 0 OR ${cpu_srcs_len} GREATER 0 OR ${gpu_srcs_len} GREATER 0 OR ${xpu_srcs_len} GREATER 0) # append target into PTEN_KERNELS property @@ -203,7 +210,6 @@ function(kernel_library TARGET) set(pten_kernels ${pten_kernels} ${TARGET}) set_property(GLOBAL PROPERTY PTEN_KERNELS ${pten_kernels}) endif() - # parse kernel name and auto generate kernel declaration # here, we don't need to check WITH_XXX, because if not WITH_XXX, the # xxx_srcs_len will be equal to 0 @@ -223,7 +229,7 @@ endfunction() function(register_kernels) set(options "") - set(oneValueArgs "") + set(oneValueArgs SUB_DIR) set(multiValueArgs EXCLUDES DEPS) cmake_parse_arguments(register_kernels "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) @@ -231,14 +237,13 @@ function(register_kernels) file(GLOB KERNELS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*_kernel.h") string(REPLACE ".h" "" KERNELS "${KERNELS}") list(LENGTH register_kernels_DEPS register_kernels_DEPS_len) - foreach(target ${KERNELS}) list(FIND register_kernels_EXCLUDES ${target} _index) if (${_index} EQUAL -1) if (${register_kernels_DEPS_len} GREATER 0) - kernel_library(${target} DEPS ${register_kernels_DEPS}) + kernel_library(${target} DEPS ${register_kernels_DEPS} SUB_DIR ${register_kernels_SUB_DIR}) else() - kernel_library(${target}) + kernel_library(${target} SUB_DIR ${register_kernels_SUB_DIR}) endif() endif() endforeach() diff --git a/paddle/pten/CMakeLists.txt b/paddle/pten/CMakeLists.txt index 78e86c12cb4bbb..c89ab3762aa67c 100644 --- a/paddle/pten/CMakeLists.txt +++ b/paddle/pten/CMakeLists.txt @@ -21,7 +21,7 @@ add_subdirectory(ops) add_subdirectory(tests) # make an unity target for compile deps -set(PTEN_DEPS convert_utils dense_tensor pten_context kernel_factory kernel_context arg_map_context infermeta lod_utils op_compat_infos) +set(PTEN_DEPS convert_utils dense_tensor string_tensor pten_context kernel_factory kernel_context arg_map_context infermeta lod_utils op_compat_infos) get_property(pten_kernels GLOBAL PROPERTY PTEN_KERNELS) # keep this message for debug, remove it later if needless message(STATUS "All standard pten kernels: ${pten_kernels}") diff --git a/paddle/pten/api/lib/utils/CMakeLists.txt b/paddle/pten/api/lib/utils/CMakeLists.txt index 74ecb3cd65262c..d5b2b93028e52c 100644 --- a/paddle/pten/api/lib/utils/CMakeLists.txt +++ b/paddle/pten/api/lib/utils/CMakeLists.txt @@ -1,2 +1,2 @@ cc_library(pten_api_utils SRCS storage.cc tensor_utils.cc DEPS -tensor_base convert_utils dense_tensor lod_tensor selected_rows_utils place var_type_traits) +tensor_base convert_utils dense_tensor lod_tensor selected_rows_utils place var_type_traits string_tensor) diff --git a/paddle/pten/api/lib/utils/allocator.h b/paddle/pten/api/lib/utils/allocator.h index acdba822ac4bb1..839dc092510dcb 100644 --- a/paddle/pten/api/lib/utils/allocator.h +++ b/paddle/pten/api/lib/utils/allocator.h @@ -35,5 +35,29 @@ class DefaultAllocator : public pten::Allocator { paddle::platform::Place place_; }; +class StringAllocator : public pten::Allocator { + public: + explicit StringAllocator(const paddle::platform::Place& place) + : place_(place) {} + + AllocationPtr Allocate(size_t bytes_size) override { + paddle::memory::AllocationPtr a = memory::Alloc(place_, bytes_size); + void* ptr = a->ptr(); + if (paddle::platform::is_cpu_place(place_)) { + std::memset(ptr, 0, bytes_size); + } else if (paddle::platform::is_gpu_place(place_)) { +#ifdef PADDLE_WITH_HIP + hipMemset(ptr, 0, bytes_size); +#else + cudaMemset(ptr, 0, bytes_size); +#endif + } + return a; + } + + private: + paddle::platform::Place place_; +}; + } // namespace experimental } // namespace paddle diff --git a/paddle/pten/common/cpstring_internal.h b/paddle/pten/common/cpstring_internal.h new file mode 100644 index 00000000000000..f6d900a4bafc81 --- /dev/null +++ b/paddle/pten/common/cpstring_internal.h @@ -0,0 +1,553 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include + +#if (defined(__NVCC__) || defined(__HIPCC__)) +#define HOSTDEVICE __host__ __device__ +#define DEVICE __device__ +#define HOST __host__ +#else +#define HOSTDEVICE +#define DEVICE +#define HOST +#endif + +#if (defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \ + __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || \ + defined(_WIN32) +#define PD_PSTRING_LITTLE_ENDIAN 1 +#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \ + __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define PD_PSTRING_LITTLE_ENDIAN 0 +#else +#error "Unable to detect endianness." +#endif + +#if defined(__clang__) || \ + (defined(__GNUC__) && \ + ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ >= 5)) +HOSTDEVICE static inline uint32_t swap32(uint32_t host_int) { + return __builtin_bswap32(host_int); +} + +#elif defined(_MSC_VER) +HOSTDEVICE static inline uint32_t swap32(uint32_t host_int) { + return _byteswap_ulong(host_int); +} + +#elif defined(__APPLE__) +HOSTDEVICE static inline uint32_t swap32(uint32_t host_int) { + return OSSwapInt32(host_int); +} + +#else +HOSTDEVICE static inline uint32_t swap32(uint32_t host_int) { +#if defined(__GLIBC__) + return bswap_32(host_int); +#else // defined(__GLIBC__) + return (((host_int & uint32_t{0xFF}) << 24) | + ((host_int & uint32_t{0xFF00}) << 8) | + ((host_int & uint32_t{0xFF0000}) >> 8) | + ((host_int & uint32_t{0xFF000000}) >> 24)); +#endif // defined(__GLIBC__) +} +#endif + +#if PD_PSTRING_LITTLE_ENDIAN || (defined(__NVCC__) || defined(__HIPCC__)) +#define PD_le32toh(x) x +#else // PD_PSTRING_LITTLE_ENDIAN +#define PD_le32toh(x) swap32(x) +#endif // PD_PSTRING_LARGE_ENDIAN + +HOSTDEVICE static inline size_t PD_align16(size_t i) { + return (i + 0xF) & ~0xF; +} + +HOSTDEVICE static inline size_t PD_max(size_t a, size_t b) { + return a > b ? a : b; +} +HOSTDEVICE static inline size_t PD_min(size_t a, size_t b) { + return a < b ? a : b; +} + +typedef enum PD_PString_Type { // NOLINT + PD_PSTR_SMALL = 0x00, + PD_PSTR_LARGE = 0x01, + PD_PSTR_OFFSET = 0x02, + PD_PSTR_VIEW = 0x03, + PD_PSTR_TYPE_MASK = 0x03 +} PD_PString_Type; + +typedef struct PD_PString_Large { // NOLINT + size_t size; + size_t cap; + char *ptr; +} PD_PString_Large; + +typedef struct PD_PString_Offset { // NOLINT + uint32_t size; + uint32_t offset; + uint32_t count; +} PD_PString_Offset; + +typedef struct PD_PString_View { // NOLINT + size_t size; + const char *ptr; +} PD_PString_View; + +typedef struct PD_PString_Raw { // NOLINT + uint8_t raw[24]; +} PD_PString_Raw; + +typedef union PD_PString_Union { // NOLINT + PD_PString_Large large; + PD_PString_Offset offset; + PD_PString_View view; + PD_PString_Raw raw; +} PD_PString_Union; + +enum { + PD_PString_SmallCapacity = + (sizeof(PD_PString_Union) - sizeof(/* null delim */ char) - + sizeof(/* uint8_t size */ uint8_t)), +}; + +typedef struct PD_PString_Small { // NOLINT + uint8_t size; + char str[PD_PString_SmallCapacity + sizeof(/* null delim */ char)]; +} PD_PString_Small; + +typedef struct PD_PString { // NOLINT + union { + // small conflicts with '#define small char' in RpcNdr.h for MSVC, so we use + // smll instead. + PD_PString_Small smll; + PD_PString_Large large; + PD_PString_Offset offset; + PD_PString_View view; + PD_PString_Raw raw; + } u; +} PD_PString; + +// TODO(dero): Fix for OSS, and add C only build test. +// _Static_assert(CHAR_BIT == 8); +// _Static_assert(sizeof(PD_PString) == 24); + +HOSTDEVICE static inline PD_PString_Type PD_PString_GetType( + const PD_PString *str) { + return (PD_PString_Type)(str->u.raw.raw[0] & PD_PSTR_TYPE_MASK); // NOLINT +} + +// XXX(dero): For the big-endian case, this function could potentially be more +// performant and readable by always storing the string size as little-endian +// and always byte-swapping on big endian, resulting in a simple 'bswap'+'shr' +// (for architectures that have a bswap op). +HOSTDEVICE static inline size_t PD_PString_ToActualSizeT(size_t size) { +#if PD_PSTRING_LITTLE_ENDIAN + return size >> 2; +#else // PD_PSTRING_LITTLE_ENDIAN + // 0xFF000000 or 0xFF00000000000000 depending on platform + static const size_t mask = ~((~(size_t)0) >> 8); // NOLINT + + return (((mask << 2) & size) >> 2) | (~mask & size); +#endif // PD_PSTRING_LITTLE_ENDIAN +} + +HOSTDEVICE static inline size_t PD_PString_ToInternalSizeT( + size_t size, PD_PString_Type type) { +#if PD_PSTRING_LITTLE_ENDIAN + return (size << 2) | type; +#else // PD_PSTRING_LITTLE_ENDIAN + // 0xFF000000 or 0xFF00000000000000 depending on platform + static const size_t mask = ~((~(size_t)0) >> 8); // NOLINT + + return (mask & (size << 2)) | (~mask & size) | + ((size_t)type << ((sizeof(size_t) - 1) * 8)); // NOLINT +#endif // PD_PSTRING_LITTLE_ENDIAN +} + +/* + * Need to implement in other source file. + */ +HOSTDEVICE static inline void PD_Free(void *ptr, size_t size) { free(ptr); } + +HOSTDEVICE static inline void *PD_Malloc(size_t size) { return malloc(size); } + +HOSTDEVICE static inline void *PD_Realloc(void *ptr, + size_t old_size, + size_t new_size) { +#if (defined(__NVCC__) || defined(__HIPCC__)) + if (old_size >= new_size) { + return ptr; + } + void *new_ptr = malloc(new_size); + memcpy(new_ptr, ptr, old_size); + free(ptr); + return new_ptr; +#else + return realloc(ptr, new_size); +#endif +} +HOSTDEVICE static inline void *PD_Memset(void *src, int ch, size_t size) { + return memset(src, ch, size); +} +HOSTDEVICE static inline void *PD_Memcpy(void *__restrict dest, + const void *__restrict src, + size_t size) { + return memcpy(dest, src, size); +} + +HOSTDEVICE static inline int PD_Memcmp(const void *s1, + const void *s2, + size_t size) { + const uint8_t *lstr = (const uint8_t *)(s1); // NOLINT + const uint8_t *rstr = (const uint8_t *)(s2); // NOLINT + for (size_t i = 0; i < size; ++i) { + if (lstr[i] != rstr[i]) { + return (lstr[i] - rstr[i]); + } + } + return 0; +} + +HOSTDEVICE static inline void *PD_Memmove(void *dest, + const void *src, + size_t size) { + const uint8_t *from = (const uint8_t *)(src); // NOLINT + uint8_t *to = (uint8_t *)(dest); // NOLINT + if (from == to || size == 0) { + return dest; + } + + if (to > from && (to - from < static_cast(size))) { + for (int i = size - 1; i >= 0; i--) { + to[i] = from[i]; + } + return dest; + } + if (from > to && (from - to < static_cast(size))) { + for (size_t i = 0; i < size; i++) { + to[i] = from[i]; + } + return dest; + } + PD_Memcpy(dest, src, size); + return dest; +} + +HOSTDEVICE static inline void PD_PString_Init(PD_PString *str) { + PD_Memset(str->u.raw.raw, 0, sizeof(PD_PString_Raw)); +} + +HOSTDEVICE static inline void PD_PString_Dealloc(PD_PString *str) { + if (PD_PString_GetType(str) == PD_PSTR_LARGE && + str->u.large.ptr != NULL) { // NOLINT + PD_Free(str->u.large.ptr, str->u.large.cap + 1); + PD_PString_Init(str); + } +} + +HOSTDEVICE static inline size_t PD_PString_GetSize(const PD_PString *str) { + switch (PD_PString_GetType(str)) { + case PD_PSTR_SMALL: + return str->u.smll.size >> 2; + case PD_PSTR_LARGE: + return PD_PString_ToActualSizeT(str->u.large.size); + case PD_PSTR_OFFSET: + return PD_le32toh(str->u.offset.size) >> 2; + case PD_PSTR_VIEW: + return PD_PString_ToActualSizeT(str->u.view.size); + default: + return 0; // Unreachable. + } +} + +HOSTDEVICE static inline size_t PD_PString_GetCapacity(const PD_PString *str) { + switch (PD_PString_GetType(str)) { + case PD_PSTR_SMALL: + return PD_PString_SmallCapacity; + case PD_PSTR_LARGE: + return str->u.large.cap; + case PD_PSTR_OFFSET: + case PD_PSTR_VIEW: + default: + return 0; + } +} + +HOSTDEVICE static inline const char *PD_PString_GetDataPointer( + const PD_PString *str) { + switch (PD_PString_GetType(str)) { + case PD_PSTR_SMALL: + return str->u.smll.str; + case PD_PSTR_LARGE: + return str->u.large.ptr; + case PD_PSTR_OFFSET: + return (const char *)str + str->u.offset.offset; // NOLINT + case PD_PSTR_VIEW: + return str->u.view.ptr; + default: + // Unreachable. + return NULL; // NOLINT + } +} + +HOSTDEVICE static inline char *PD_PString_ResizeUninitialized(PD_PString *str, + size_t new_size) { + size_t curr_size = PD_PString_GetSize(str); + size_t copy_size = PD_min(new_size, curr_size); + + PD_PString_Type curr_type = PD_PString_GetType(str); + const char *curr_ptr = PD_PString_GetDataPointer(str); + + // Case: SMALL/LARGE/VIEW/OFFSET -> SMALL + if (new_size <= PD_PString_SmallCapacity) { + str->u.smll.size = (uint8_t)((new_size << 2) | PD_PSTR_SMALL); // NOLINT + str->u.smll.str[new_size] = '\0'; + + if (curr_type != PD_PSTR_SMALL && copy_size) { + PD_Memcpy(str->u.smll.str, curr_ptr, copy_size); + } + + if (curr_type == PD_PSTR_LARGE) { + PD_Free((void *)curr_ptr, str->u.large.cap + 1); // NOLINT + } + + // We do not clear out the newly excluded region. + + return str->u.smll.str; + } + + // Case: SMALL/LARGE/VIEW/OFFSET -> LARGE + size_t new_cap; + size_t curr_cap = PD_PString_GetCapacity(str); + + if (new_size < curr_size && new_size < curr_cap / 2) { + // TODO(dero): Replace with shrink_to_fit flag. + new_cap = PD_align16(curr_cap / 2 + 1) - 1; + } else if (new_size > curr_cap) { + new_cap = PD_align16(new_size + 1) - 1; + } else { + new_cap = curr_cap; + } + + char *new_ptr; + if (new_cap == curr_cap) { + new_ptr = str->u.large.ptr; + } else if (curr_type == PD_PSTR_LARGE) { + new_ptr = (char *)PD_Realloc( // NOLINT + str->u.large.ptr, + curr_cap + 1, + new_cap + 1); + } else { + new_ptr = (char *)PD_Malloc(new_cap + 1); // NOLINT + if (copy_size) { + PD_Memcpy(new_ptr, curr_ptr, copy_size); + } + } + + str->u.large.size = PD_PString_ToInternalSizeT(new_size, PD_PSTR_LARGE); + str->u.large.ptr = new_ptr; + str->u.large.ptr[new_size] = '\0'; + str->u.large.cap = new_cap; + + return str->u.large.ptr; +} + +HOSTDEVICE static inline char *PD_PString_GetMutableDataPointer( + PD_PString *str) { + switch (PD_PString_GetType(str)) { + case PD_PSTR_SMALL: + return str->u.smll.str; + case PD_PSTR_OFFSET: + case PD_PSTR_VIEW: + // Convert OFFSET/VIEW to SMALL/LARGE + PD_PString_ResizeUninitialized(str, PD_PString_GetSize(str)); + return (PD_PString_GetType(str) == PD_PSTR_SMALL) ? str->u.smll.str + : str->u.large.ptr; + case PD_PSTR_LARGE: + return str->u.large.ptr; + default: + // Unreachable. + return NULL; // NOLINT + } +} + +HOSTDEVICE static inline void PD_PString_Reserve(PD_PString *str, + size_t new_cap) { + PD_PString_Type curr_type = PD_PString_GetType(str); + + if (new_cap <= PD_PString_SmallCapacity) { + // We do nothing, we let Resize/GetMutableDataPointer handle the + // conversion to SMALL from VIEW/OFFSET when the need arises. + // In the degenerate case, where new_cap <= PD_PString_SmallCapacity, + // curr_size > PD_PString_SmallCapacity, and the type is VIEW/OFFSET, we + // defer the malloc to Resize/GetMutableDataPointer. + return; + } + + if (curr_type == PD_PSTR_LARGE && new_cap <= str->u.large.cap) { + // We handle reduced cap in resize. + return; + } + + // Case: VIEW/OFFSET -> LARGE or grow an existing LARGE type + size_t curr_size = PD_PString_GetSize(str); + const char *curr_ptr = PD_PString_GetDataPointer(str); + + // Since VIEW and OFFSET types are read-only, their capacity is effectively 0. + // So we make sure we have enough room in the VIEW and OFFSET cases. + new_cap = PD_align16(PD_max(new_cap, curr_size) + 1) - 1; + size_t curr_cap = PD_PString_GetCapacity(str); + + if (curr_type == PD_PSTR_LARGE) { + str->u.large.ptr = (char *)PD_Realloc( // NOLINT + str->u.large.ptr, + curr_cap + 1, + new_cap + 1); + } else { + // Convert to Large + char *new_ptr = (char *)PD_Malloc(new_cap + 1); // NOLINT + PD_Memcpy(new_ptr, curr_ptr, curr_size); + + str->u.large.size = PD_PString_ToInternalSizeT(curr_size, PD_PSTR_LARGE); + str->u.large.ptr = new_ptr; + str->u.large.ptr[curr_size] = '\0'; + } + + str->u.large.cap = new_cap; +} + +HOSTDEVICE static inline void PD_PString_ReserveAmortized(PD_PString *str, + size_t new_cap) { + const size_t curr_cap = PD_PString_GetCapacity(str); + if (new_cap > curr_cap) { + PD_PString_Reserve(str, new_cap > 2 * curr_cap ? new_cap : 2 * curr_cap); + } +} + +HOSTDEVICE static inline char *PD_PString_Resize(PD_PString *str, + size_t new_size, + char c) { + size_t curr_size = PD_PString_GetSize(str); + char *cstr = PD_PString_ResizeUninitialized(str, new_size); + + if (new_size > curr_size) { + PD_Memset(cstr + curr_size, c, new_size - curr_size); + } + + return cstr; +} + +HOSTDEVICE static inline void PD_PString_AssignView(PD_PString *dst, + const char *src, + size_t size) { + PD_PString_Dealloc(dst); + + dst->u.view.size = PD_PString_ToInternalSizeT(size, PD_PSTR_VIEW); + dst->u.view.ptr = src; +} + +HOSTDEVICE static inline void PD_PString_AppendN(PD_PString *dst, + const char *src, + size_t src_size) { + if (!src_size) return; + + size_t dst_size = PD_PString_GetSize(dst); + + // For append use cases, we want to ensure amortized growth. + PD_PString_ReserveAmortized(dst, dst_size + src_size); + char *dst_c = PD_PString_ResizeUninitialized(dst, dst_size + src_size); + + PD_Memcpy(dst_c + dst_size, src, src_size); +} + +HOSTDEVICE static inline void PD_PString_Append(PD_PString *dst, + const PD_PString *src) { + const char *src_c = PD_PString_GetDataPointer(src); + size_t size = PD_PString_GetSize(src); + + PD_PString_AppendN(dst, src_c, size); +} + +HOSTDEVICE static inline void PD_PString_Copy(PD_PString *dst, + const char *src, + size_t size) { + char *dst_c = PD_PString_ResizeUninitialized(dst, size); + + if (size) PD_Memcpy(dst_c, src, size); +} + +HOSTDEVICE static inline void PD_PString_Assign(PD_PString *dst, + const PD_PString *src) { + if (dst == src) return; + + PD_PString_Dealloc(dst); + + switch (PD_PString_GetType(src)) { + case PD_PSTR_SMALL: + case PD_PSTR_VIEW: + *dst = *src; + return; + case PD_PSTR_LARGE: { + const char *src_c = PD_PString_GetDataPointer(src); + size_t size = PD_PString_GetSize(src); + + PD_PString_Copy(dst, src_c, size); + } + return; + case PD_PSTR_OFFSET: { + const char *src_c = PD_PString_GetDataPointer(src); + size_t size = PD_PString_GetSize(src); + + PD_PString_AssignView(dst, src_c, size); + } + return; + default: + return; // Unreachable. + } +} + +HOSTDEVICE static inline void PD_PString_Move(PD_PString *dst, + PD_PString *src) { + if (dst == src) return; + + PD_PString_Dealloc(dst); + + switch (PD_PString_GetType(src)) { + case PD_PSTR_SMALL: + case PD_PSTR_VIEW: + *dst = *src; + return; + case PD_PSTR_LARGE: + *dst = *src; + PD_PString_Init(src); + return; + case PD_PSTR_OFFSET: { + const char *src_c = PD_PString_GetDataPointer(src); + size_t size = PD_PString_GetSize(src); + + PD_PString_AssignView(dst, src_c, size); + } + return; + default: + return; // Unreachable. + } +} diff --git a/paddle/pten/common/data_type.h b/paddle/pten/common/data_type.h index 3e31d8ba69f2fd..0fa511f432b8e9 100644 --- a/paddle/pten/common/data_type.h +++ b/paddle/pten/common/data_type.h @@ -19,6 +19,7 @@ limitations under the License. */ #include "paddle/pten/common/float16.h" #include "paddle/pten/api/ext/exception.h" +#include "paddle/pten/common/pstring.h" namespace paddle { namespace experimental { @@ -27,6 +28,7 @@ using complex64 = ::pten::dtype::complex; using complex128 = ::pten::dtype::complex; using float16 = ::pten::dtype::float16; using bfloat16 = ::pten::dtype::bfloat16; +using pstring = ::pten::dtype::pstring; enum class DataType { UNDEFINED = 0, @@ -45,6 +47,7 @@ enum class DataType { FLOAT64, COMPLEX64, COMPLEX128, + STRING, NUM_DATA_TYPES, // See Note [ Why we need ALL in baisc kernel key member? ] ALL_DTYPE = UNDEFINED, @@ -72,6 +75,8 @@ inline size_t SizeOf(DataType data_type) { return 8; case DataType::COMPLEX128: return 16; + case DataType::STRING: + return 24; case DataType::UNDEFINED: return 0; case DataType::NUM_DATA_TYPES: @@ -82,22 +87,23 @@ inline size_t SizeOf(DataType data_type) { return 0; } -#define PT_FOR_EACH_DATA_TYPE(_) \ - _(bool, DataType::BOOL) \ - _(int8_t, DataType::INT8) \ - _(uint8_t, DataType::UINT8) \ - _(int16_t, DataType::INT16) \ - _(uint16_t, DataType::UINT16) \ - _(int32_t, DataType::INT32) \ - _(uint32_t, DataType::UINT32) \ - _(int64_t, DataType::INT64) \ - _(uint64_t, DataType::UINT64) \ - _(bfloat16, DataType::BFLOAT16) \ - _(float16, DataType::FLOAT16) \ - _(float, DataType::FLOAT32) \ - _(double, DataType::FLOAT64) \ - _(complex64, DataType::COMPLEX64) \ - _(complex128, DataType::COMPLEX128) +#define PT_FOR_EACH_DATA_TYPE(_) \ + _(bool, DataType::BOOL) \ + _(int8_t, DataType::INT8) \ + _(uint8_t, DataType::UINT8) \ + _(int16_t, DataType::INT16) \ + _(uint16_t, DataType::UINT16) \ + _(int32_t, DataType::INT32) \ + _(uint32_t, DataType::UINT32) \ + _(int64_t, DataType::INT64) \ + _(uint64_t, DataType::UINT64) \ + _(bfloat16, DataType::BFLOAT16) \ + _(float16, DataType::FLOAT16) \ + _(float, DataType::FLOAT32) \ + _(double, DataType::FLOAT64) \ + _(complex64, DataType::COMPLEX64) \ + _(complex128, DataType::COMPLEX128) \ + _(pstring, DataType::STRING) template struct DataTypeToCppType; @@ -175,6 +181,9 @@ inline std::ostream& operator<<(std::ostream& os, DataType dtype) { case DataType::COMPLEX128: os << "complex128"; break; + case DataType::STRING: + os << "string"; + break; default: PD_THROW("Invalid enum data type `", static_cast(dtype), "`."); } @@ -195,4 +204,6 @@ using bfloat16 = paddle::experimental::bfloat16; using complex64 = paddle::experimental::complex64; using complex128 = paddle::experimental::complex128; using float16 = paddle::experimental::float16; +using pstring = paddle::experimental::pstring; + } // namespace paddle diff --git a/paddle/pten/common/pstring.h b/paddle/pten/common/pstring.h new file mode 100644 index 00000000000000..af952e429d23e2 --- /dev/null +++ b/paddle/pten/common/pstring.h @@ -0,0 +1,476 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include + +#include +#include + +#include "paddle/pten/common/cpstring_internal.h" + +namespace pten { +namespace dtype { + +class pstring { + PD_PString pstr_; + + public: + enum Type { + // See cstring.h + SMALL = PD_PSTR_SMALL, + LARGE = PD_PSTR_LARGE, + OFFSET = PD_PSTR_OFFSET, + VIEW = PD_PSTR_VIEW, + }; + + typedef const char* const_iterator; + + // Ctor + HOSTDEVICE pstring(); + HOSTDEVICE pstring(const std::string& str); // NOLINT + HOSTDEVICE pstring(const char* str, size_t len); + HOSTDEVICE pstring(const char* str); // NOLINT + HOSTDEVICE pstring(size_t n, char c); + + // Copy + HOSTDEVICE pstring(const pstring& str); + + // Move + HOSTDEVICE pstring(pstring&& str) noexcept; + + // Dtor + HOSTDEVICE ~pstring(); + + // Copy Assignment + HOSTDEVICE pstring& operator=(const pstring& str); + HOSTDEVICE pstring& operator=(const std::string& str); + HOSTDEVICE pstring& operator=(const char* str); + HOSTDEVICE pstring& operator=(char ch); + + // Move Assignment + HOSTDEVICE pstring& operator=(pstring&& str); + + // Comparison + HOSTDEVICE int compare(const char* str, size_t len) const; + HOSTDEVICE bool operator<(const pstring& o) const; + HOSTDEVICE bool operator>(const pstring& o) const; + HOSTDEVICE bool operator==(const char* str) const; + HOSTDEVICE bool operator==(const pstring& o) const; + HOSTDEVICE bool operator!=(const char* str) const; + HOSTDEVICE bool operator!=(const pstring& o) const; + + // Conversion Operators + // TODO(b/147740521): Make explicit. + HOSTDEVICE operator std::string() const; // NOLINT + + // Attributes + HOSTDEVICE size_t size() const; + HOSTDEVICE size_t length() const; + HOSTDEVICE size_t capacity() const; + HOSTDEVICE bool empty() const; + HOSTDEVICE Type type() const; + + // Allocation + HOSTDEVICE void resize(size_t new_size, char c = 0); + // Similar to resize, but will leave the newly grown region uninitialized. + HOSTDEVICE void resize_uninitialized(size_t new_size); + HOSTDEVICE void clear() noexcept; + HOSTDEVICE void reserve(size_t n); + + // Iterators + HOSTDEVICE const_iterator begin() const; + HOSTDEVICE const_iterator end() const; + + // Const Element Access + HOSTDEVICE const char* c_str() const; + HOSTDEVICE const char* data() const; + HOSTDEVICE const char& operator[](size_t i) const; + HOSTDEVICE const char& back() const; + + // Mutable Element Access + // NOTE: For VIEW/OFFSET types, calling these methods will result in the + // conversion to a SMALL or heap allocated LARGE type. As a result, + // previously obtained pointers, references, or iterators to the underlying + // buffer will point to the original VIEW/OFFSET and not the new allocation. + HOSTDEVICE char* mdata(); + HOSTDEVICE char& operator[](size_t i); + + // Assignment + HOSTDEVICE pstring& assign(const char* str, size_t len); + HOSTDEVICE pstring& assign(const char* str); + + // View Assignment + HOSTDEVICE pstring& assign_as_view(const pstring& str); + HOSTDEVICE pstring& assign_as_view(const std::string& str); + HOSTDEVICE pstring& assign_as_view(const char* str, size_t len); + HOSTDEVICE pstring& assign_as_view(const char* str); + + // Modifiers + // NOTE: Invalid input will result in undefined behavior. + HOSTDEVICE pstring& append(const pstring& str); + HOSTDEVICE pstring& append(const char* str, size_t len); + HOSTDEVICE pstring& append(const char* str); + HOSTDEVICE pstring& append(size_t n, char c); + + HOSTDEVICE pstring& erase(size_t pos, size_t len); + + HOSTDEVICE pstring& insert(size_t pos, + const pstring& str, + size_t subpos, + size_t sublen); + HOSTDEVICE pstring& insert(size_t pos, size_t n, char c); + HOSTDEVICE void swap(pstring& str); + HOSTDEVICE void push_back(char ch); + + // Friends + HOSTDEVICE friend bool operator==(const char* a, const pstring& b); + HOSTDEVICE friend bool operator==(const std::string& a, const pstring& b); + HOSTDEVICE friend pstring operator+(const pstring& a, const pstring& b); + HOSTDEVICE friend std::ostream& operator<<(std::ostream& o, + const pstring& str); + HOSTDEVICE friend std::hash; +}; + +// Non-member function overloads + +HOSTDEVICE bool operator==(const char* a, const pstring& b); +HOSTDEVICE bool operator==(const std::string& a, const pstring& b); +HOSTDEVICE pstring operator+(const pstring& a, const pstring& b); +HOSTDEVICE std::ostream& operator<<(std::ostream& o, const pstring& str); +HOSTDEVICE size_t strlen(const char* start); + +// Implementations + +// Ctor + +HOSTDEVICE inline pstring::pstring() { PD_PString_Init(&pstr_); } + +HOSTDEVICE inline pstring::pstring(const char* str, size_t len) { + PD_PString_Init(&pstr_); + PD_PString_Copy(&pstr_, str, len); +} + +HOSTDEVICE inline pstring::pstring(const char* str) + : pstring(str, strlen(str)) {} + +HOSTDEVICE inline pstring::pstring(size_t n, char c) { + PD_PString_Init(&pstr_); + PD_PString_Resize(&pstr_, n, c); +} + +HOSTDEVICE inline pstring::pstring(const std::string& str) + : pstring(str.data(), str.size()) {} + +HOSTDEVICE inline pstring::pstring(const pstring& str) { + PD_PString_Init(&pstr_); + PD_PString_Assign(&pstr_, &str.pstr_); +} + +// Move + +HOSTDEVICE inline pstring::pstring(pstring&& str) noexcept { + PD_PString_Init(&pstr_); + PD_PString_Move(&pstr_, &str.pstr_); +} + +// Dtor + +HOSTDEVICE inline pstring::~pstring() { PD_PString_Dealloc(&pstr_); } + +// Copy Assignment + +HOSTDEVICE inline pstring& pstring::operator=(const pstring& str) { + PD_PString_Assign(&pstr_, &str.pstr_); + + return *this; +} + +HOSTDEVICE inline pstring& pstring::operator=(const std::string& str) { + PD_PString_Copy(&pstr_, str.data(), str.size()); + return *this; +} + +HOSTDEVICE inline pstring& pstring::operator=(const char* str) { + PD_PString_Copy(&pstr_, str, strlen(str)); + + return *this; +} + +HOSTDEVICE inline pstring& pstring::operator=(char c) { + resize_uninitialized(1); + (*this)[0] = c; + + return *this; +} + +// Move Assignment + +HOSTDEVICE inline pstring& pstring::operator=(pstring&& str) { + PD_PString_Move(&pstr_, &str.pstr_); + + return *this; +} + +// Comparison + +HOSTDEVICE inline int pstring::compare(const char* str, size_t len) const { + int ret = PD_Memcmp(data(), str, std::min(len, size())); + + if (ret < 0) return -1; + if (ret > 0) return +1; + + if (size() < len) return -1; + if (size() > len) return +1; + + return 0; +} + +HOSTDEVICE inline bool pstring::operator<(const pstring& o) const { + return compare(o.data(), o.size()) < 0; +} + +HOSTDEVICE inline bool pstring::operator>(const pstring& o) const { + return compare(o.data(), o.size()) > 0; +} + +HOSTDEVICE inline bool pstring::operator==(const char* str) const { + return strlen(str) == size() && PD_Memcmp(data(), str, size()) == 0; +} + +HOSTDEVICE inline bool pstring::operator==(const pstring& o) const { + return o.size() == size() && PD_Memcmp(data(), o.data(), size()) == 0; +} + +HOSTDEVICE inline bool pstring::operator!=(const char* str) const { + return !(*this == str); +} + +HOSTDEVICE inline bool pstring::operator!=(const pstring& o) const { + return !(*this == o); +} + +// Conversion Operators + +HOSTDEVICE inline pstring::operator std::string() const { + return std::string(data(), size()); +} + +// Attributes + +HOSTDEVICE inline size_t pstring::size() const { + return PD_PString_GetSize(&pstr_); +} + +HOSTDEVICE inline size_t pstring::length() const { return size(); } + +HOSTDEVICE inline size_t pstring::capacity() const { + return PD_PString_GetCapacity(&pstr_); +} + +HOSTDEVICE inline bool pstring::empty() const { return size() == 0; } + +HOSTDEVICE inline pstring::Type pstring::type() const { + return static_cast(PD_PString_GetType(&pstr_)); +} + +// Allocation + +HOSTDEVICE inline void pstring::resize(size_t new_size, char c) { + PD_PString_Resize(&pstr_, new_size, c); +} + +HOSTDEVICE inline void pstring::resize_uninitialized(size_t new_size) { + PD_PString_ResizeUninitialized(&pstr_, new_size); +} + +HOSTDEVICE inline void pstring::clear() noexcept { + PD_PString_ResizeUninitialized(&pstr_, 0); +} + +HOSTDEVICE inline void pstring::reserve(size_t n) { + PD_PString_Reserve(&pstr_, n); +} + +// Iterators + +HOSTDEVICE inline pstring::const_iterator pstring::begin() const { + return &(*this)[0]; +} +HOSTDEVICE inline pstring::const_iterator pstring::end() const { + return &(*this)[size()]; +} + +// Element Access + +HOSTDEVICE inline const char* pstring::c_str() const { return data(); } + +HOSTDEVICE inline const char* pstring::data() const { + return PD_PString_GetDataPointer(&pstr_); +} + +HOSTDEVICE inline const char& pstring::operator[](size_t i) const { + return data()[i]; +} + +HOSTDEVICE inline const char& pstring::back() const { + return (*this)[size() - 1]; +} + +HOSTDEVICE inline char* pstring::mdata() { + return PD_PString_GetMutableDataPointer(&pstr_); +} + +HOSTDEVICE inline char& pstring::operator[](size_t i) { return mdata()[i]; } + +// Assignment + +HOSTDEVICE inline pstring& pstring::assign(const char* str, size_t len) { + PD_PString_Copy(&pstr_, str, len); + + return *this; +} + +HOSTDEVICE inline pstring& pstring::assign(const char* str) { + assign(str, strlen(str)); + + return *this; +} + +// View Assignment + +HOSTDEVICE inline pstring& pstring::assign_as_view(const pstring& str) { + assign_as_view(str.data(), str.size()); + + return *this; +} + +HOSTDEVICE inline pstring& pstring::assign_as_view(const std::string& str) { + assign_as_view(str.data(), str.size()); + + return *this; +} + +HOSTDEVICE inline pstring& pstring::assign_as_view(const char* str, + size_t len) { + PD_PString_AssignView(&pstr_, str, len); + return *this; +} + +HOSTDEVICE inline pstring& pstring::assign_as_view(const char* str) { + assign_as_view(str, strlen(str)); + + return *this; +} + +// Modifiers + +HOSTDEVICE inline pstring& pstring::append(const pstring& str) { + PD_PString_Append(&pstr_, &str.pstr_); + + return *this; +} + +HOSTDEVICE inline pstring& pstring::append(const char* str, size_t len) { + PD_PString_AppendN(&pstr_, str, len); + + return *this; +} + +HOSTDEVICE inline pstring& pstring::append(const char* str) { + append(str, strlen(str)); + + return *this; +} + +HOSTDEVICE inline pstring& pstring::append(size_t n, char c) { + // For append use cases, we want to ensure amortized growth. + const size_t new_size = size() + n; + PD_PString_ReserveAmortized(&pstr_, new_size); + resize(new_size, c); + + return *this; +} + +HOSTDEVICE inline pstring& pstring::erase(size_t pos, size_t len) { + PD_Memmove(mdata() + pos, data() + pos + len, size() - len - pos); + + resize(size() - len); + + return *this; +} + +HOSTDEVICE inline pstring& pstring::insert(size_t pos, + const pstring& str, + size_t subpos, + size_t sublen) { + size_t orig_size = size(); + PD_PString_ResizeUninitialized(&pstr_, orig_size + sublen); + + PD_Memmove(mdata() + pos + sublen, data() + pos, orig_size - pos); + PD_Memmove(mdata() + pos, str.data() + subpos, sublen); + + return *this; +} + +HOSTDEVICE inline pstring& pstring::insert(size_t pos, size_t n, char c) { + size_t size_ = size(); + PD_PString_ResizeUninitialized(&pstr_, size_ + n); + + PD_Memmove(mdata() + pos + n, data() + pos, size_ - pos); + PD_Memset(mdata() + pos, c, n); + + return *this; +} + +HOSTDEVICE inline void pstring::swap(pstring& str) { + std::swap(pstr_, str.pstr_); +} + +HOSTDEVICE inline void pstring::push_back(char ch) { append(1, ch); } + +// Friends + +HOSTDEVICE inline bool operator==(const char* a, const pstring& b) { + return strlen(a) == b.size() && PD_Memcmp(a, b.data(), b.size()) == 0; +} + +HOSTDEVICE inline bool operator==(const std::string& a, const pstring& b) { + return a.size() == b.size() && PD_Memcmp(a.data(), b.data(), b.size()) == 0; +} + +HOSTDEVICE inline pstring operator+(const pstring& a, const pstring& b) { + pstring r; + r.reserve(a.size() + b.size()); + r.append(a); + r.append(b); + + return r; +} + +HOSTDEVICE inline std::ostream& operator<<(std::ostream& o, + const pstring& str) { + return o.write(str.data(), str.size()); +} + +HOSTDEVICE inline size_t strlen(const char* start) { + const char* end = start; + for (; *end != '\0'; ++end) { + } + return end - start; +} + +} // namespace dtype +} // namespace pten diff --git a/paddle/pten/core/CMakeLists.txt b/paddle/pten/core/CMakeLists.txt index e89d2cd3b3c387..177868cc9bbef8 100644 --- a/paddle/pten/core/CMakeLists.txt +++ b/paddle/pten/core/CMakeLists.txt @@ -20,6 +20,7 @@ cc_library(tensor_base SRCS tensor_base.cc allocator.cc storage.cc DEPS pten_enf cc_library(tensor_meta SRCS tensor_meta.cc DEPS pten_enforce mixed_vector) cc_library(lod_utils SRCS lod_utils.cc DEPS pten_enforce mixed_vector) cc_library(dense_tensor SRCS dense_tensor.cc DEPS convert_utils tensor_meta tensor_base) +cc_library(string_tensor SRCS string_tensor.cc DEPS convert_utils tensor_meta tensor_base) cc_library(pten_device_context SRCS device_context.cc DEPS tensor_base ) cc_library(meta_tensor SRCS meta_tensor.cc DEPS tensor_base tensor_meta dense_tensor) diff --git a/paddle/pten/core/kernel_utils.h b/paddle/pten/core/kernel_utils.h index 85fe2f22836e61..92780da3745850 100644 --- a/paddle/pten/core/kernel_utils.h +++ b/paddle/pten/core/kernel_utils.h @@ -20,6 +20,7 @@ #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/kernel_context.h" #include "paddle/pten/core/kernel_def.h" +#include "paddle/pten/core/string_tensor.h" // See Note [ Why still include the fluid headers? ] #include "paddle/pten/core/enforce.h" @@ -213,6 +214,9 @@ struct KernelImpl { PT_SPECIALIZE_KernelCallHelper_FOR_INPUT(DenseTensor); PT_SPECIALIZE_KernelCallHelper_FOR_OPTIONAL_INPUT(DenseTensor); PT_SPECIALIZE_KernelCallHelper_FOR_MULTI_INPUT(DenseTensor); + PT_SPECIALIZE_KernelCallHelper_FOR_INPUT(StringTensor); + PT_SPECIALIZE_KernelCallHelper_FOR_OPTIONAL_INPUT(StringTensor); + PT_SPECIALIZE_KernelCallHelper_FOR_MULTI_INPUT(StringTensor); // TODO(chenweihang): adapt SelectedRows // PT_SPECIALIZE_KernelCallHelper_FOR_INPUT(SelectedRowsTensor); @@ -229,11 +233,13 @@ struct KernelImpl { PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(const std::vector&); PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(const ScalarArray&); PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(const std::vector&); - + PT_SPECIALIZE_KernelCallHelper_FOR_ATTRIBUTE(const std::string&); /* Output Helpers */ PT_SPECIALIZE_KernelCallHelper_FOR_OUTPUT(DenseTensor); PT_SPECIALIZE_KernelCallHelper_FOR_MULTI_OUTPUT(DenseTensor); + PT_SPECIALIZE_KernelCallHelper_FOR_OUTPUT(StringTensor); + PT_SPECIALIZE_KernelCallHelper_FOR_MULTI_OUTPUT(StringTensor); // TODO(chenweihang): adapt SelectedRows // PT_SPECIALIZE_KernelCallHelper_FOR_OUTPUT(SelectedRowsTensor); diff --git a/paddle/pten/core/string_tensor.cc b/paddle/pten/core/string_tensor.cc new file mode 100644 index 00000000000000..a4d058533fc9d4 --- /dev/null +++ b/paddle/pten/core/string_tensor.cc @@ -0,0 +1,95 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/pten/core/string_tensor.h" + +namespace pten { + +StringTensor::StringTensor(Allocator* a, const StringTensorMeta& meta) + : meta_(meta), + storage_(make_intrusive(a, SizeOf(dtype()) * numel())) {} + +StringTensor::StringTensor(Allocator* a, StringTensorMeta&& meta) + : meta_(std::move(meta)), + storage_(make_intrusive(a, SizeOf(dtype()) * numel())) {} + +StringTensor::StringTensor(intrusive_ptr storage, + const StringTensorMeta& meta) + : meta_(meta), storage_(std::move(storage)) {} + +StringTensor::StringTensor(intrusive_ptr storage, + StringTensorMeta&& meta) + : meta_(std::move(meta)), storage_(std::move(storage)) {} + +int64_t StringTensor::numel() const { + if (meta_.is_scalar) { + return 1; + } + return product(meta_.dims); +} + +bool StringTensor::IsSharedWith(const StringTensor& b) const { + return storage_.get() == b.storage_.get() && storage_.get() != nullptr; +} + +dtype::pstring* StringTensor::mutable_data(size_t request_bytes /* = 0 */) { + PADDLE_ENFORCE( + valid(), + paddle::platform::errors::PreconditionNotMet( + "The meta data must be valid when call the mutable data function.")); + PADDLE_ENFORCE_NOT_NULL( + storage_, + paddle::platform::errors::PreconditionNotMet( + "The storage must be valid when call the mutable data function.")); + size_t bytes = numel() * SizeOf(dtype()); + if (request_bytes) { + PADDLE_ENFORCE_GE(request_bytes, + bytes, + paddle::platform::errors::InvalidArgument( + "The reserved size %d should be enough to meet the " + "volume required by metadata %d.", + request_bytes, + bytes)); + bytes = request_bytes; + } + if (storage_->size() < bytes || storage_->size() == 0) { + VLOG(10) << "mutbale data realloc, original size: " << storage_->size() + << ", new size: " << bytes; + storage_->Realloc(bytes); + } + return reinterpret_cast(storage_->data()); +} + +const dtype::pstring* StringTensor::data() const { + PADDLE_ENFORCE_NOT_NULL( + storage_, + paddle::platform::errors::PreconditionNotMet( + "The storage must be valid when call the mutable data function.")); + return reinterpret_cast(storage_->data()); +} + +void StringTensor::set_meta(StringTensorMeta&& meta) { + PADDLE_ENFORCE(!meta_.valid(), + paddle::platform::errors::InvalidArgument( + "Only when the original attribute of Tensor is " + "incomplete, can it be reset.")); + meta_ = std::move(meta); +} + +void StringTensor::Resize(const DDim& dims) { + meta_.dims = dims; + mutable_data(); +} + +} // namespace pten diff --git a/paddle/pten/core/string_tensor.h b/paddle/pten/core/string_tensor.h new file mode 100644 index 00000000000000..cf67d653fc3be1 --- /dev/null +++ b/paddle/pten/core/string_tensor.h @@ -0,0 +1,155 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/pten/common/pstring.h" +#include "paddle/pten/core/allocator.h" +#include "paddle/pten/core/storage.h" +#include "paddle/pten/core/tensor_base.h" +#include "paddle/pten/core/tensor_meta.h" + +namespace pten { + +/// \brief The Dense tensor store values in a contiguous sequential block +/// of memory where all values are represented. Tensors or multi-dimensional +/// arrays are used in math operators. +/// During the entire life cycle of a StringTensor, its device type and key +/// metadata are set unchanged. +class StringTensor : public TensorBase, + public TypeInfoTraits { + public: + /// \brief Construct a dense tensor and allocate space. + /// \param a The allocator used to allocate space. + /// \param meta The meta data of dense tensor. + StringTensor(Allocator* a, const StringTensorMeta& meta); + + /// \brief Construct a dense tensor and allocate space. + /// \param a The allocator used to allocate space. + /// \param meta The meta data of dense tensor. + StringTensor(Allocator* a, StringTensorMeta&& meta); + + /// \brief Use existing storage space to create dense tensor. This interface + /// can be used to deliberately create an uninitialized dense tensor. + /// \param storage The existing storage. + /// \param meta The meta data of dense tensor. + StringTensor(intrusive_ptr storage, const StringTensorMeta& meta); + + /// \brief Use existing storage space to create dense tensor. This interface + /// can be used to deliberately create an uninitialized dense tensor. + /// \param storage The existing storage. + /// \param meta The meta data of dense tensor. + StringTensor(intrusive_ptr storage, StringTensorMeta&& meta); + + /// \brief Because dense tensor is a kind of container, we give a default + /// constructor to use for stl container. But the dense tensor created with + /// the default constructor is not practical. + StringTensor() = default; + + /// \brief Because dense tensor is a resource handle, we provide a default + /// move constructor to support move semantics. + StringTensor(StringTensor&& other) = default; + + /// \brief We do not recommend deep copy of dense tensor because of its + /// efficiency and complexity across devices. The operation is disabled here. + StringTensor(const StringTensor& other) = delete; + + /// \brief Destroy the tensor object and release exclusive resources. + virtual ~StringTensor() = default; + + public: + /// \brief Returns the name of the class for type traits. + /// \return The name of the class. + static const char* name() { return "StringTensor"; } + + /// \brief Returns the number of elements contained in tensor. + /// \return The number of elements contained in tensor. + int64_t numel() const override; + + /// \brief Returns the dims of the tensor. + /// \return The dims of the tensor. + const DDim& dims() const noexcept override { return meta_.dims; } + + /// \brief Returns the data place of the tensor. + /// \return The data place of the tensor. + const Place& place() const override { return storage_->place(); } + + /// \brief Returns the meta information of the tensor. + /// \return The meta information of the tensor. + const StringTensorMeta& meta() const noexcept { return meta_; } + + /// \brief Returns the data type of the tensor. + /// \return The data type of the tensor. + DataType dtype() const noexcept override { return DataType::STRING; } + + /// \brief Returns the data layout of the tensor. + /// \return The data layout of the tensor. + DataLayout layout() const noexcept override { return DataLayout::ALL_LAYOUT; } + + /// \brief Sets the meta information of the tensor. Only when the original + /// attribute of Tensor is incomplete, can it be reset. + /// \param meta The meta information of the tensor. + void set_meta(StringTensorMeta&& meta); + + /// \brief Test whether the metadata is valid. + /// \return Whether the metadata is valid. + bool valid() const noexcept override { return meta_.valid(); } + + /// \brief Test whether the storage is allocated. + /// return Whether the storage is allocated. + bool initialized() const override { + return storage_ != nullptr && storage_->data() != nullptr; + } + + /// \brief Check if storage is shared with other objects. + /// \return Whether the storage is shared with other objects. + bool IsSharedWith(const StringTensor& b) const; + + /// \brief Change the shape information in the metadata. If the new size is + /// larger than the original value, the storage area will be reallocated. + /// \param dims The new dims of the dense tensor. + /// \param lod The new lod of the dense tensor. + void Resize(const DDim& dims); + + /// \brief Returns the actual storage size occupied by tensor, may be larger + /// than its shape dims. + /// \return The actual storage size occupied by tensor. + size_t capacity() const { return storage_->size(); } + + /// \brief Release the storage area for other purposes. Because of the + /// destruction of encapsulation, we do not support two dense tensors directly + /// sharing the same intrusive pointer. + /// \return The rvalue of instrusize pointer releated to the released storage. + intrusive_ptr release() { return std::move(storage_); } + + /// \brief Get the mutable data pointer value of pstring type. + /// Memory allocation may occur when calling this interface: + /// 1. When the storage size is not enough to meet the current shape of the + /// data. + /// 2. When more request_bytes parameters are used to reserve the data + /// storage. + /// param request_bytes The bytes to reserve the data storage. + /// \return The mutable data pointer value of type T. + dtype::pstring* mutable_data(size_t request_bytes = 0); + + /// \brief Get the const data pointer value of pstring type. + /// \return The const data pointer value of pstring type. + const dtype::pstring* data() const; + + private: + StringTensorMeta meta_; + intrusive_ptr storage_; +}; + +} // namespace pten diff --git a/paddle/pten/core/tensor_meta.cc b/paddle/pten/core/tensor_meta.cc index 844387bec5c58b..e81088eacf1836 100644 --- a/paddle/pten/core/tensor_meta.cc +++ b/paddle/pten/core/tensor_meta.cc @@ -40,4 +40,18 @@ bool DenseTensorMeta::valid() const noexcept { return valid; } +StringTensorMeta::StringTensorMeta(const DDim& dims) : dims(dims) {} + +bool StringTensorMeta::valid() const noexcept { + bool valid{true}; + valid = valid && (is_scalar || product(dims) >= 0); + return valid; +} + +bool operator==(const StringTensorMeta& lhs, const StringTensorMeta& rhs) { + bool ret = true; + return ret && (lhs.is_scalar == rhs.is_scalar) && (lhs.dims == rhs.dims) && + (lhs.offset == rhs.offset); +} + } // namespace pten diff --git a/paddle/pten/core/tensor_meta.h b/paddle/pten/core/tensor_meta.h index ac3f17267c4f94..b4d14fa7156c86 100644 --- a/paddle/pten/core/tensor_meta.h +++ b/paddle/pten/core/tensor_meta.h @@ -69,4 +69,18 @@ inline bool operator==(const DenseTensorMeta& lhs, const DenseTensorMeta& rhs) { (lhs.lod == rhs.lod) && (lhs.offset == rhs.offset); } +struct StringTensorMeta { + StringTensorMeta() = default; + explicit StringTensorMeta(const DDim& dims); + /// \brief Test whether the metadata is valid. Does not throw exceptions. + /// \return Whether the metadata is valid. + bool valid() const noexcept; + + /// During the entire life cycle of a DenseTensor, the following attributes + /// marked with `const` are expected to remain unchanged. + bool is_scalar{false}; + DDim dims; + size_t offset{0}; +}; + } // namespace pten diff --git a/paddle/pten/infermeta/unary.cc b/paddle/pten/infermeta/unary.cc index fec50d528dfc42..73b1dc6a96b46b 100644 --- a/paddle/pten/infermeta/unary.cc +++ b/paddle/pten/infermeta/unary.cc @@ -304,4 +304,8 @@ DenseTensorMeta ReduceInferMeta(const DenseTensorMeta& x_meta, return return_meta; } +StringTensorMeta UnchangedInferMeta(const StringTensorMeta& x_meta) { + return x_meta; +} + } // namespace pten diff --git a/paddle/pten/infermeta/unary.h b/paddle/pten/infermeta/unary.h index 670c70de84ccfd..eee9f275e7bfd4 100644 --- a/paddle/pten/infermeta/unary.h +++ b/paddle/pten/infermeta/unary.h @@ -67,4 +67,7 @@ DenseTensorMeta SumInferMeta(const DenseTensorMeta& x_meta, const std::vector& axis, DataType dtype, bool keep_dim); + +// Common InferMeta Functions of StringTensor for unary operators: +StringTensorMeta UnchangedInferMeta(const StringTensorMeta& x_meta); } // namespace pten diff --git a/paddle/pten/kernels/CMakeLists.txt b/paddle/pten/kernels/CMakeLists.txt index 615b80be592a08..ea153263b437ba 100644 --- a/paddle/pten/kernels/CMakeLists.txt +++ b/paddle/pten/kernels/CMakeLists.txt @@ -24,3 +24,6 @@ register_kernels(EXCLUDES math_kernel DEPS ${COMMON_KERNEL_DEPS}) kernel_library(math_kernel DEPS ${MATH_KERNEL_DEPS}) copy_if_different(${kernel_declare_file} ${kernel_declare_file_final}) + +# For strings kernels +add_subdirectory(strings) diff --git a/paddle/pten/kernels/strings/CMakeLists.txt b/paddle/pten/kernels/strings/CMakeLists.txt new file mode 100644 index 00000000000000..0ab63ca0f72185 --- /dev/null +++ b/paddle/pten/kernels/strings/CMakeLists.txt @@ -0,0 +1,16 @@ +add_subdirectory(cpu) +if(WITH_GPU OR WITH_ROCM) + add_subdirectory(gpu) +endif() + +cc_library(unicode SRCS unicode.cc) +set_property(GLOBAL PROPERTY STRING_KERNELS "") + +set(STRING_KERNEL_DEPS string_tensor kernel_context kernel_factory arg_map_context convert_utils lod_utils) +set(STRING_KERNEL_DEPS ${STRING_KERNEL_DEPS} eigen_function blas math_function) +# remove this dep after removing fluid deps on tensor creation +set(STRING_KERNEL_DEPS ${STRING_KERNEL_DEPS} pten_api_utils) +set(STRING_KERNEL_DEPS ${STRING_KERNEL_DEPS} infermeta) +set(STRING_KERNEL_DEPS ${STRING_KERNEL_DEPS} unicode) + +register_kernels(DEPS ${STRING_KERNEL_DEPS} SUB_DIR "strings") diff --git a/paddle/pten/kernels/strings/case_convert_kernel.h b/paddle/pten/kernels/strings/case_convert_kernel.h new file mode 100644 index 00000000000000..87cefefeea65b9 --- /dev/null +++ b/paddle/pten/kernels/strings/case_convert_kernel.h @@ -0,0 +1,84 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/pten/api/lib/utils/allocator.h" +#include "paddle/pten/api/lib/utils/storage.h" +#include "paddle/pten/core/string_tensor.h" +#include "paddle/pten/infermeta/unary.h" + +using pstring = ::pten::dtype::pstring; + +namespace pten { +namespace strings { +template +struct StringCaseConvertKernel { + void operator()(const ContextT& dev_ctx, + const StringTensor& x, + const std::string& encoding, + StringTensor* out) { + AsciiCoverter ascii_converter; + UTF8Converter utf8_converter; + const pstring* in_ptr = x.data(); + pstring* out_ptr = out->mutable_data(); + auto num = x.numel(); + if (encoding.empty()) { + for (int64_t i = 0; i < num; ++i) { + ascii_converter(dev_ctx, in_ptr[i], out_ptr + i); + } + } else { + for (int64_t i = 0; i < num; ++i) { + utf8_converter(dev_ctx, in_ptr[i], out_ptr + i); + } + } + } +}; + +template +void StringLowerKernel(const ContextT& dev_ctx, + const StringTensor& x, + const std::string& encoding, + StringTensor* out); + +template +void StringUpperKernel(const ContextT& dev_ctx, + const StringTensor& x, + const std::string& encoding, + StringTensor* out); + +template +StringTensor StringLower(const ContextT& dev_ctx, + const std::string& encoding, + const StringTensor& x) { + auto out_meta = UnchangedInferMeta(x.meta()); + pten::StringTensor string_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); + StringLowerKernel(dev_ctx, x, encoding, &string_out); + return string_out; +} + +template +StringTensor StringUpper(const ContextT& dev_ctx, + const std::string& encoding, + const StringTensor& x) { + auto out_meta = UnchangedInferMeta(x.meta()); + pten::StringTensor string_out( + pten::make_intrusive( + dev_ctx.GetPlace()), + std::move(out_meta)); + StringUpperKernel(dev_ctx, x, encoding, &string_out); + return string_out; +} + +} // namespace strings +} // namespace pten diff --git a/paddle/pten/kernels/strings/case_utils.h b/paddle/pten/kernels/strings/case_utils.h new file mode 100644 index 00000000000000..117978fbe9bcfa --- /dev/null +++ b/paddle/pten/kernels/strings/case_utils.h @@ -0,0 +1,148 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include + +#include "paddle/fluid/platform/transform.h" +#include "paddle/pten/common/pstring.h" +#include "paddle/pten/kernels/strings/unicode.h" +#if defined(__NVCC__) || defined(__HIPCC__) +#include +#include +#include "paddle/pten/backends/gpu/gpu_context.h" +#endif +namespace pten { +namespace strings { + +using pstring = dtype::pstring; +struct AsciiToLower { + HOSTDEVICE char operator()(char in) const { + return ('A' <= in && in <= 'Z') ? in - ('Z' - 'z') : in; + } +}; + +struct AsciiToUpper { + HOSTDEVICE char operator()(char in) const { + return ('a' <= in && in <= 'z') ? in ^ 0x20 : in; + } +}; + +template +struct AsciiCaseConverter { + void operator()(const DeviceContext& dev_ctx, + const pstring& in, + pstring* out) const { + paddle::platform::Transform trans; + trans(dev_ctx, in.begin(), in.end(), out->mdata(), CharConverter()); + } +}; + +template +struct UTF8ToLower { + HOSTDEVICE UTF8ToLower(uint8_t* unicode_flag_map, uint16_t* cases_map) + : unicode_flag_map_(unicode_flag_map), cases_map_(cases_map) {} + + HOSTDEVICE uint32_t operator()(uint32_t in) const { + uint32_t flg = (in <= 0x00FFFF ? unicode_flag_map_[in] : 0); + return (strings::isupper(flg) ? cases_map_[in] : in); + } + + uint8_t* unicode_flag_map_; + uint16_t* cases_map_; +}; + +template +struct UTF8ToUpper { + HOSTDEVICE UTF8ToUpper(uint8_t* unicode_flag_map, uint16_t* cases_map) + : unicode_flag_map_(unicode_flag_map), cases_map_(cases_map) {} + + HOSTDEVICE uint32_t operator()(uint32_t in) const { + uint32_t flg = (in <= 0x00FFFF ? unicode_flag_map_[in] : 0); + return (strings::islower(flg) ? cases_map_[in] : in); + } + + uint8_t* unicode_flag_map_; + uint16_t* cases_map_; +}; + +template typename CharConverter> +struct UTF8CaseConverter { + void operator()(const DeviceContext& dev_ctx, + const pstring& in, + pstring* out) const { + paddle::platform::Transform trans; + uint32_t unicode_len = + pten::strings::get_unicode_str_len(in.data(), in.size()); + std::vector unicode_in(unicode_len, 0); + pten::strings::get_unicode_str(in.data(), unicode_in.data(), unicode_len); + auto unicode_flag_map = + strings::UnicodeFlagMap::Instance()->data(); + auto cases_map = + strings::UnicodeFlagMap::Instance()->data(); + trans(dev_ctx, + unicode_in.begin(), + unicode_in.end(), + unicode_in.begin(), + CharConverter(unicode_flag_map, cases_map)); + uint32_t utf8_len = + pten::strings::get_utf8_str_len(unicode_in.data(), unicode_len); + std::vector result(utf8_len, 0); + pten::strings::get_utf8_str(unicode_in.data(), result.data(), unicode_len); + *out = result.data(); + } +}; + +#if defined(__NVCC__) || defined(__HIPCC__) + +template +struct AsciiCaseConverter { + void operator()(const GPUContext& dev_ctx, + const pstring& in, + pstring* out) const { + paddle::platform::Transform trans; + trans(dev_ctx, in.begin(), in.end(), out->mdata(), CharConverter()); + } +}; + +template